diff --git a/Documentation/ABI/testing/sysfs-device-mali b/Documentation/ABI/testing/sysfs-device-mali index 1ec265c5add4..12a1667feeb2 100644 --- a/Documentation/ABI/testing/sysfs-device-mali +++ b/Documentation/ABI/testing/sysfs-device-mali @@ -341,8 +341,7 @@ Description: device-driver that supports a CSF GPU. Used to enable firmware logs, logging levels valid values - are indicated using 'min and 'max' attribute values - values that are read-only. + are indicated using 'min' and 'max' attributes, which are read-only. Log level can be set using the 'cur' read, write attribute, we can use a valid log level value from min and max range values diff --git a/Documentation/ABI/testing/sysfs-device-mali-coresight-source b/Documentation/ABI/testing/sysfs-device-mali-coresight-source index 0f31a6acaa87..58d9085b8bb6 100644 --- a/Documentation/ABI/testing/sysfs-device-mali-coresight-source +++ b/Documentation/ABI/testing/sysfs-device-mali-coresight-source @@ -19,7 +19,7 @@ Description: What: /sys/bus/coresight/devices/mali-source-etm/is_enabled Description: - Attribute used to check if Coresight Source ITM is enabled. + Attribute used to check if Coresight Source ETM is enabled. What: /sys/bus/coresight/devices/mali-source-etm/trcconfigr Description: diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-mali-source.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-mali-source.yaml new file mode 100644 index 000000000000..d844ad10932c --- /dev/null +++ b/Documentation/devicetree/bindings/arm/arm,coresight-mali-source.yaml @@ -0,0 +1,163 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/arm,coresight-mali-source.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ARM CoreSight Mali Source integration + +maintainers: + - ARM Ltd. + +description: | + See Documentation/trace/coresight/coresight.rst for detailed information + about Coresight. + + This documentation will cover Mali specific devicetree integration. + + References to Sink ports are given as examples. Access to Sink is specific + to an implementation and would require dedicated kernel modules. + + Arm Mali GPU are supporting 3 different sources: ITM, ETM, ELA + + ELA source configuration via SysFS entries: + + The register values used by CoreSight for ELA can be configured using SysFS + interfaces. This implicitly includes configuring the ELA for independent or + shared JCN request and response channels. + +properties: + compatible: + enum: + - arm,coresight-mali-source-itm + - arm,coresight-mali-source-etm + - arm,coresight-mali-source-ela + + gpu: + minItems: 1 + maxItems: 1 + description: + Phandle to a Mali GPU definition + + port: + description: + Output connection to CoreSight Sink Trace bus. + + Legacy binding between Coresight Sources and CoreSight Sink. + For Linux kernel < v4.20. + $ref: /schemas/graph.yaml#/properties/port + + out-ports: + description: + Binding between Coresight Sources and CoreSight Sink. + For Linux kernel >= v4.20. + $ref: /schemas/graph.yaml#/properties/ports + + properties: + port: + description: Output connection to CoreSight Sink Trace bus. + $ref: /schemas/graph.yaml#/properties/port + +required: + - compatible + - gpu + - port + - out-ports + +additionalProperties: false + +examples: + +# A Sink node without legacy CoreSight connections + - | + mali-source-itm { + compatible = "arm,coresight-mali-source-itm"; + gpu = <&gpu>; + + out-ports { + port { + mali_source_itm_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port0>; + }; + }; + }; + }; + + mali-source-ela { + compatible = "arm,coresight-mali-source-ela"; + gpu = <&gpu>; + + out-ports { + port { + mali_source_ela_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port1>; + }; + }; + }; + }; + + mali-source-etm { + compatible = "arm,coresight-mali-source-etm"; + gpu = <&gpu>; + + out-ports { + port { + mali_source_etm_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port2>; + }; + }; + }; + }; + +# A Sink node with legacy CoreSight connections + - | + mali-source-itm { + compatible = "arm,coresight-mali-source-itm"; + gpu = <&gpu>; + + port { + mali_source_itm_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port0>; + }; + }; + }; + + mali-source-etm { + compatible = "arm,coresight-mali-source-etm"; + gpu = <&gpu>; + + port { + mali_source_etm_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port1>; + }; + }; + }; + + mali-source-ela { + compatible = "arm,coresight-mali-source-ela"; + gpu = <&gpu>; + + port { + mali_source_ela_out_port0: endpoint { + remote-endpoint = <&mali_sink_in_port2>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/arm/mali-bifrost.txt b/Documentation/devicetree/bindings/arm/mali-bifrost.txt index 85672c6c6258..8ada052ebe56 100644 --- a/Documentation/devicetree/bindings/arm/mali-bifrost.txt +++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2013-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -111,7 +111,10 @@ for details. - idvs-group-size : Override the IDVS group size value. Tasks are sent to cores in groups of N + 1, so i.e. 0xF means 16 tasks. Valid values are between 0 to 0x3F (including). -- l2-size : Override L2 cache size on GPU that supports it +- l2-size : Override L2 cache size on GPU that supports it. Value should be larger than the minimum + size 1KiB and smaller than the maximum size. Maximum size is Hardware integration dependent. + The value passed should be of log2(Cache Size in Bytes). + For example for a 1KiB of cache size, 0xa should be passed. - l2-hash : Override L2 hash function on GPU that supports it - l2-hash-values : Override L2 hash function using provided hash values, on GPUs that supports it. It is mutually exclusive with 'l2-hash'. Only one or the other must be @@ -129,6 +132,10 @@ for details. set and the setting coresponding to the SYSC_ALLOC register. - propagate-bits: Used to write to L2_CONFIG.PBHA_HWU. This bitset establishes which PBHA bits are propagated on the AXI bus. +- mma-wa-id: Sets the PBHA ID to be used for the PBHA override based MMA violation workaround. + The read and write allocation override bits for the PBHA are set to NONCACHEABLE + and the driver encodes the PBHA ID in the PTEs where this workaround is to be applied. + Valid values are from 1 to 15. Example for a Mali GPU with 1 clock and 1 regulator: @@ -237,7 +244,8 @@ gpu@0xfc010000 { ... pbha { int-id-override = <2 0x32>, <9 0x05>, <16 0x32>; - propagate-bits = /bits/ 4 <0x03>; + propagate-bits = /bits/ 8 <0x03>; + mma-wa-id = <2>; }; ... }; diff --git a/drivers/base/arm/Kconfig b/drivers/base/arm/Kconfig index e8bb8a40d2c5..c24a377723ca 100644 --- a/drivers/base/arm/Kconfig +++ b/drivers/base/arm/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software diff --git a/drivers/base/arm/Makefile b/drivers/base/arm/Makefile index 4aa68f89d3d9..42071f769729 100644 --- a/drivers/base/arm/Makefile +++ b/drivers/base/arm/Makefile @@ -125,6 +125,8 @@ CFLAGS_MODULE += -Wno-sign-compare CFLAGS_MODULE += -Wno-shift-negative-value # This flag is needed to avoid build errors on older kernels CFLAGS_MODULE += $(call cc-option, -Wno-cast-function-type) +# The following ensures the stack frame does not get larger than a page +CFLAGS_MODULE += -Wframe-larger-than=4096 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 diff --git a/drivers/base/arm/memory_group_manager/memory_group_manager.c b/drivers/base/arm/memory_group_manager/memory_group_manager.c index 389b0f051f3a..da4a0c39e63a 100644 --- a/drivers/base/arm/memory_group_manager/memory_group_manager.c +++ b/drivers/base/arm/memory_group_manager/memory_group_manager.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,10 +51,6 @@ static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigne } #endif -#define PTE_PBHA_SHIFT (59) -#define PTE_PBHA_MASK ((uint64_t)0xf << PTE_PBHA_SHIFT) -#define PTE_RES_BIT_MULTI_AS_SHIFT (63) - #define IMPORTED_MEMORY_ID (MEMORY_GROUP_MANAGER_NR_GROUPS - 1) /** @@ -263,7 +259,7 @@ static struct page *example_mgm_alloc_page(struct memory_group_manager_device *m } else { struct mgm_groups *data = mgm_dev->data; - dev_err(data->dev, "alloc_pages failed\n"); + dev_dbg(data->dev, "alloc_pages failed\n"); } return p; @@ -303,7 +299,8 @@ static int example_mgm_get_import_memory_id(struct memory_group_manager_device * } static u64 example_mgm_update_gpu_pte(struct memory_group_manager_device *const mgm_dev, - unsigned int const group_id, int const mmu_level, u64 pte) + unsigned int const group_id, unsigned int const pbha_id, + unsigned int pte_flags, int const mmu_level, u64 pte) { struct mgm_groups *const data = mgm_dev->data; @@ -313,7 +310,10 @@ static u64 example_mgm_update_gpu_pte(struct memory_group_manager_device *const if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) return pte; - pte |= ((u64)group_id << PTE_PBHA_SHIFT) & PTE_PBHA_MASK; + if (pte_flags & BIT(MMA_VIOLATION)) { + pr_warn_once("MMA violation! Applying PBHA override workaround to PTE\n"); + pte |= ((u64)pbha_id << PTE_PBHA_SHIFT) & PTE_PBHA_MASK; + } /* Address could be translated into a different bus address here */ pte |= ((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT); @@ -366,6 +366,16 @@ static vm_fault_t example_mgm_vmf_insert_pfn_prot(struct memory_group_manager_de return fault; } +static bool example_mgm_get_import_memory_cached_access_permitted( + struct memory_group_manager_device *mgm_dev, + struct memory_group_manager_import_data *import_data) +{ + CSTD_UNUSED(mgm_dev); + CSTD_UNUSED(import_data); + + return true; +} + static int mgm_initialize_data(struct mgm_groups *mgm_data) { int i; @@ -412,6 +422,8 @@ static int memory_group_manager_probe(struct platform_device *pdev) mgm_dev->ops.mgm_vmf_insert_pfn_prot = example_mgm_vmf_insert_pfn_prot; mgm_dev->ops.mgm_update_gpu_pte = example_mgm_update_gpu_pte; mgm_dev->ops.mgm_pte_to_original_pte = example_mgm_pte_to_original_pte; + mgm_dev->ops.mgm_get_import_memory_cached_access_permitted = + example_mgm_get_import_memory_cached_access_permitted; mgm_data = kzalloc(sizeof(*mgm_data), GFP_KERNEL); if (!mgm_data) { diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild index b35fcee88baa..d64c439fbabc 100644 --- a/drivers/gpu/arm/bifrost/Kbuild +++ b/drivers/gpu/arm/bifrost/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -69,7 +69,7 @@ endif # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= '"g22p0-01eac0"' +MALI_RELEASE_NAME ?= '"g25p0-00eac0"' # Set up defaults if not defined by build system ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) MALI_UNIT_TEST = 1 @@ -104,7 +104,6 @@ endif # # Experimental features must default to disabled, e.g.: # MALI_EXPERIMENTAL_FEATURE ?= 0 -MALI_INCREMENTAL_RENDERING_JM ?= 0 # # ccflags @@ -117,7 +116,6 @@ ccflags-y = \ -DMALI_COVERAGE=$(MALI_COVERAGE) \ -DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \ -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \ - -DMALI_INCREMENTAL_RENDERING_JM=$(MALI_INCREMENTAL_RENDERING_JM) \ -DMALI_PLATFORM_DIR=$(MALI_PLATFORM_DIR) @@ -212,6 +210,7 @@ endif INCLUDE_SUBDIR = \ + $(src)/arbiter/Kbuild \ $(src)/context/Kbuild \ $(src)/debug/Kbuild \ $(src)/device/Kbuild \ @@ -228,9 +227,6 @@ ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) INCLUDE_SUBDIR += $(src)/csf/Kbuild endif -ifeq ($(CONFIG_MALI_ARBITER_SUPPORT),y) - INCLUDE_SUBDIR += $(src)/arbiter/Kbuild -endif ifeq ($(CONFIG_MALI_BIFROST_DEVFREQ),y) ifeq ($(CONFIG_DEVFREQ_THERMAL),y) diff --git a/drivers/gpu/arm/bifrost/Kconfig b/drivers/gpu/arm/bifrost/Kconfig index 22fdfe80405a..b8ceff10e250 100644 --- a/drivers/gpu/arm/bifrost/Kconfig +++ b/drivers/gpu/arm/bifrost/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -63,6 +63,8 @@ config MALI_BIFROST_NO_MALI All calls to the simulated hardware will complete immediately as if the hardware completed the task. +endchoice + config MALI_NO_MALI_DEFAULT_GPU string "Default GPU for No Mali" depends on MALI_BIFROST_NO_MALI @@ -70,8 +72,12 @@ config MALI_NO_MALI_DEFAULT_GPU help This option sets the default GPU to identify as for No Mali builds. - -endchoice +config MALI_IS_FPGA + bool "Enable build of Mali kernel driver for FPGA" + depends on MALI_BIFROST + default n + help + This is the default HW backend. menu "Platform specific options" source "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/bifrost/platform/Kconfig" @@ -214,16 +220,6 @@ config MALI_CORESTACK If unsure, say N. -comment "Platform options" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - -config MALI_BIFROST_ERROR_INJECT - bool "Enable No Mali error injection" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_BIFROST_NO_MALI - default n - help - Enables insertion of errors to test module failure and recovery mechanisms. - comment "Debug options" depends on MALI_BIFROST && MALI_BIFROST_EXPERT @@ -304,7 +300,7 @@ endchoice config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS bool "Enable runtime selection of performance counters set via debugfs" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && DEBUG_FS + depends on MALI_BIFROST && MALI_BIFROST_EXPERT && DEBUG_FS && !MALI_CSF_SUPPORT default n help Select this option to make the secondary set of performance counters @@ -351,7 +347,7 @@ config MALI_PWRSOFT_765 changes have been backported say Y to avoid compilation errors. config MALI_HW_ERRATA_1485982_NOT_AFFECTED - bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" + bool "Disable workaround for KBASE_HW_ISSUE_GPU2017_1336" depends on MALI_BIFROST && MALI_BIFROST_EXPERT default n help @@ -363,7 +359,7 @@ config MALI_HW_ERRATA_1485982_NOT_AFFECTED coherency mode requires the L2 to be turned off. config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE - bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336" + bool "Use alternative workaround for KBASE_HW_ISSUE_GPU2017_1336" depends on MALI_BIFROST && MALI_BIFROST_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED default n help diff --git a/drivers/gpu/arm/bifrost/Makefile b/drivers/gpu/arm/bifrost/Makefile index 69dbe3750a10..e10033aabc57 100644 --- a/drivers/gpu/arm/bifrost/Makefile +++ b/drivers/gpu/arm/bifrost/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -41,11 +41,12 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) CONFIG_MALI_BIFROST_GATOR_SUPPORT ?= y CONFIG_MALI_ARBITRATION ?= n CONFIG_MALI_PARTITION_MANAGER ?= n - CONFIG_MALI_64BIT_HW_ACCESS ?= n + ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y) - # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=y + # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI!=y CONFIG_MALI_REAL_HW ?= y + else CONFIG_MALI_CORESIGHT = n endif @@ -76,7 +77,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) else # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=n CONFIG_MALI_REAL_HW = y - CONFIG_MALI_BIFROST_ERROR_INJECT = n endif @@ -108,7 +108,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) CONFIG_MALI_JOB_DUMP = n CONFIG_MALI_BIFROST_NO_MALI = n CONFIG_MALI_REAL_HW = y - CONFIG_MALI_BIFROST_ERROR_INJECT = n CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n @@ -157,7 +156,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) CONFIG_MALI_BIFROST \ CONFIG_MALI_CSF_SUPPORT \ CONFIG_MALI_BIFROST_GATOR_SUPPORT \ - CONFIG_MALI_ARBITER_SUPPORT \ CONFIG_MALI_ARBITRATION \ CONFIG_MALI_PARTITION_MANAGER \ CONFIG_MALI_REAL_HW \ @@ -171,7 +169,7 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) CONFIG_MALI_PWRSOFT_765 \ CONFIG_MALI_JOB_DUMP \ CONFIG_MALI_BIFROST_NO_MALI \ - CONFIG_MALI_BIFROST_ERROR_INJECT \ + CONFIG_MALI_IS_FPGA \ CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \ CONFIG_MALI_PRFCNT_SET_PRIMARY \ @@ -272,6 +270,8 @@ CFLAGS_MODULE += -Wmissing-field-initializers CFLAGS_MODULE += -Wno-type-limits CFLAGS_MODULE += $(call cc-option, -Wmaybe-uninitialized) CFLAGS_MODULE += $(call cc-option, -Wunused-macros) +# The following ensures the stack frame does not get larger than a page +CFLAGS_MODULE += -Wframe-larger-than=4096 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 diff --git a/drivers/gpu/arm/bifrost/arbiter/Kbuild b/drivers/gpu/arm/bifrost/arbiter/Kbuild index 2e6b111441ca..de339ccae394 100644 --- a/drivers/gpu/arm/bifrost/arbiter/Kbuild +++ b/drivers/gpu/arm/bifrost/arbiter/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -21,3 +21,4 @@ bifrost_kbase-y += \ arbiter/mali_kbase_arbif.o \ arbiter/mali_kbase_arbiter_pm.o + diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c index c290dd6b086f..49b42a6ec2c0 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -108,6 +108,7 @@ static void on_gpu_stop(struct device *dev) } KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED(kbdev, kbdev); + KBASE_KTRACE_ADD(kbdev, ARB_GPU_STOP_REQUESTED, NULL, 0); kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT); } @@ -133,6 +134,7 @@ static void on_gpu_granted(struct device *dev) } KBASE_TLSTREAM_TL_ARBITER_GRANTED(kbdev, kbdev); + KBASE_KTRACE_ADD(kbdev, ARB_GPU_GRANTED, NULL, 0); kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT); } @@ -156,10 +158,73 @@ static void on_gpu_lost(struct device *dev) dev_err(dev, "%s(): kbdev is NULL", __func__); return; } - + KBASE_TLSTREAM_TL_ARBITER_LOST(kbdev, kbdev); + KBASE_KTRACE_ADD(kbdev, ARB_GPU_LOST, NULL, 0); kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT); } +static int kbase_arbif_of_init(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if; + struct device_node *arbiter_if_node; + struct platform_device *pdev; + + if (!IS_ENABLED(CONFIG_OF)) { + /* + * Return -ENODEV in the event CONFIG_OF is not available and let the + * internal AW check for suitability for arbitration. + */ + return -ENODEV; + } + + arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, "arbiter-if", 0); + if (!arbiter_if_node) + arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, "arbiter_if", 0); + if (!arbiter_if_node) { + dev_dbg(kbdev->dev, "No arbiter_if in Device Tree"); + /* no arbiter interface defined in device tree */ + kbdev->arb.arb_dev = NULL; + kbdev->arb.arb_if = NULL; + return -ENODEV; + } + + pdev = of_find_device_by_node(arbiter_if_node); + if (!pdev) { + dev_err(kbdev->dev, "Failed to find arbiter_if device"); + return -EPROBE_DEFER; + } + + if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) { + dev_err(kbdev->dev, "arbiter_if driver not available"); + put_device(&pdev->dev); + return -EPROBE_DEFER; + } + kbdev->arb.arb_dev = &pdev->dev; + arb_if = platform_get_drvdata(pdev); + if (!arb_if) { + dev_err(kbdev->dev, "arbiter_if driver not ready"); + module_put(pdev->dev.driver->owner); + put_device(&pdev->dev); + return -EPROBE_DEFER; + } + + kbdev->arb.arb_if = arb_if; + return 0; +} + +static void kbase_arbif_of_term(struct kbase_device *kbdev) +{ + if (!IS_ENABLED(CONFIG_OF)) + return; + + if (kbdev->arb.arb_dev) { + module_put(kbdev->arb.arb_dev->driver->owner); + put_device(kbdev->arb.arb_dev); + } + kbdev->arb.arb_dev = NULL; +} + + /** * kbase_arbif_init() - Kbase Arbiter interface initialisation. * @kbdev: The kbase device structure for the device (must be a valid pointer) @@ -174,47 +239,21 @@ static void on_gpu_lost(struct device *dev) */ int kbase_arbif_init(struct kbase_device *kbdev) { -#if IS_ENABLED(CONFIG_OF) struct arbiter_if_arb_vm_ops ops; struct arbiter_if_dev *arb_if; - struct device_node *arbiter_if_node; - struct platform_device *pdev; - int err; + int err = 0; - dev_dbg(kbdev->dev, "%s\n", __func__); + /* Tries to init with 'arbiter-if' if present in devicetree */ + err = kbase_arbif_of_init(kbdev); - arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, "arbiter-if", 0); - if (!arbiter_if_node) - arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, "arbiter_if", 0); - if (!arbiter_if_node) { - dev_dbg(kbdev->dev, "No arbiter_if in Device Tree\n"); - /* no arbiter interface defined in device tree */ - kbdev->arb.arb_dev = NULL; - kbdev->arb.arb_if = NULL; - return 0; + if (err == -ENODEV) { + /* devicetree does not support arbitration */ + return -EPERM; } - pdev = of_find_device_by_node(arbiter_if_node); - if (!pdev) { - dev_err(kbdev->dev, "Failed to find arbiter_if device\n"); - return -EPROBE_DEFER; - } + if (err) + return err; - if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) { - dev_err(kbdev->dev, "arbiter_if driver not available\n"); - put_device(&pdev->dev); - return -EPROBE_DEFER; - } - kbdev->arb.arb_dev = &pdev->dev; - arb_if = platform_get_drvdata(pdev); - if (!arb_if) { - dev_err(kbdev->dev, "arbiter_if driver not ready\n"); - module_put(pdev->dev.driver->owner); - put_device(&pdev->dev); - return -EPROBE_DEFER; - } - - kbdev->arb.arb_if = arb_if; ops.arb_vm_gpu_stop = on_gpu_stop; ops.arb_vm_gpu_granted = on_gpu_granted; ops.arb_vm_gpu_lost = on_gpu_lost; @@ -225,25 +264,35 @@ int kbase_arbif_init(struct kbase_device *kbdev) kbdev->arb.arb_freq.freq_updated = false; mutex_init(&kbdev->arb.arb_freq.arb_freq_lock); - /* register kbase arbiter_if callbacks */ - if (arb_if->vm_ops.vm_arb_register_dev) { - err = arb_if->vm_ops.vm_arb_register_dev(arb_if, kbdev->dev, &ops); - if (err) { - dev_err(&pdev->dev, "Failed to register with arbiter. (err = %d)\n", err); - module_put(pdev->dev.driver->owner); - put_device(&pdev->dev); - if (err != -EPROBE_DEFER) - err = -EFAULT; - return err; - } + arb_if = kbdev->arb.arb_if; + + if (arb_if == NULL) { + dev_err(kbdev->dev, "No arbiter interface present"); + goto failure_term; + } + + if (!arb_if->vm_ops.vm_arb_register_dev) { + dev_err(kbdev->dev, "arbiter_if registration callback not present"); + goto failure_term; + } + + /* register kbase arbiter_if callbacks */ + err = arb_if->vm_ops.vm_arb_register_dev(arb_if, kbdev->dev, &ops); + if (err) { + dev_err(kbdev->dev, "Failed to register with arbiter. (err = %d)", err); + goto failure_term; } -#else /* CONFIG_OF */ - dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n"); - kbdev->arb.arb_dev = NULL; - kbdev->arb.arb_if = NULL; -#endif return 0; + +failure_term: + { + kbase_arbif_of_term(kbdev); + } + + if (err != -EPROBE_DEFER) + err = -EFAULT; + return err; } /** @@ -256,16 +305,13 @@ void kbase_arbif_destroy(struct kbase_device *kbdev) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; - if (arb_if && arb_if->vm_ops.vm_arb_unregister_dev) { - dev_dbg(kbdev->dev, "%s\n", __func__); + if (arb_if && arb_if->vm_ops.vm_arb_unregister_dev) arb_if->vm_ops.vm_arb_unregister_dev(kbdev->arb.arb_if); + + { + kbase_arbif_of_term(kbdev); } kbdev->arb.arb_if = NULL; - if (kbdev->arb.arb_dev) { - module_put(kbdev->arb.arb_dev->driver->owner); - put_device(kbdev->arb.arb_dev); - } - kbdev->arb.arb_dev = NULL; } /** @@ -278,10 +324,8 @@ void kbase_arbif_get_max_config(struct kbase_device *kbdev) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; - if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) { - dev_dbg(kbdev->dev, "%s\n", __func__); + if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) arb_if->vm_ops.vm_arb_get_max_config(arb_if); - } } /** @@ -295,8 +339,8 @@ void kbase_arbif_gpu_request(struct kbase_device *kbdev) struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) { - dev_dbg(kbdev->dev, "%s\n", __func__); KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); + KBASE_KTRACE_ADD(kbdev, ARB_GPU_REQUESTED, NULL, 0); arb_if->vm_ops.vm_arb_gpu_request(arb_if); } } @@ -312,10 +356,12 @@ void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required) struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; if (arb_if && arb_if->vm_ops.vm_arb_gpu_stopped) { - dev_dbg(kbdev->dev, "%s\n", __func__); KBASE_TLSTREAM_TL_ARBITER_STOPPED(kbdev, kbdev); - if (gpu_required) + KBASE_KTRACE_ADD(kbdev, ARB_GPU_STOPPED, NULL, 0); + if (gpu_required) { KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); + KBASE_KTRACE_ADD(kbdev, ARB_GPU_REQUESTED, NULL, 0); + } arb_if->vm_ops.vm_arb_gpu_stopped(arb_if, gpu_required); } } @@ -330,10 +376,8 @@ void kbase_arbif_gpu_active(struct kbase_device *kbdev) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; - if (arb_if && arb_if->vm_ops.vm_arb_gpu_active) { - dev_dbg(kbdev->dev, "%s\n", __func__); + if (arb_if && arb_if->vm_ops.vm_arb_gpu_active) arb_if->vm_ops.vm_arb_gpu_active(arb_if); - } } /** @@ -346,8 +390,6 @@ void kbase_arbif_gpu_idle(struct kbase_device *kbdev) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; - if (arb_if && arb_if->vm_ops.vm_arb_gpu_idle) { - dev_dbg(kbdev->dev, "vm_arb_gpu_idle\n"); + if (arb_if && arb_if->vm_ops.vm_arb_gpu_idle) arb_if->vm_ops.vm_arb_gpu_idle(arb_if); - } } diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h index 701ffd42f6f7..c77792115e4d 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,6 +50,7 @@ enum kbase_arbif_evt { KBASE_VM_OS_RESUME_EVENT, }; + /** * kbase_arbif_init() - Initialize the arbiter interface functionality. * @kbdev: The kbase device structure for the device (must be a valid pointer) diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c index 616b0a78cbe5..9b8551609dc7 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -48,7 +48,7 @@ MODULE_PARM_DESC( "On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe"); static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev); -static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device *kbdev); +static inline bool kbase_arbiter_pm_vm_gpu_assigned_locked(struct kbase_device *kbdev); /** * kbase_arbiter_pm_vm_state_str() - Helper function to get string @@ -85,7 +85,6 @@ static inline const char *kbase_arbiter_pm_vm_state_str(enum kbase_vm_state stat case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: return "KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT"; default: - KBASE_DEBUG_ASSERT(false); return "[UnknownState]"; } } @@ -117,14 +116,13 @@ static inline const char *kbase_arbiter_pm_vm_event_str(enum kbase_arbif_evt evt case KBASE_VM_REF_EVENT: return "KBASE_VM_REF_EVENT"; default: - KBASE_DEBUG_ASSERT(false); return "[UnknownEvent]"; } } /** * kbase_arbiter_pm_vm_set_state() - Sets new kbase_arbiter_vm_state - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * @new_state: kbase VM new state * * This function sets the new state for the VM @@ -201,6 +199,7 @@ static void kbase_arbiter_pm_resume_wq(struct work_struct *data) arb_vm_state->vm_arb_starting = false; mutex_unlock(&arb_vm_state->vm_state_lock); KBASE_TLSTREAM_TL_ARBITER_STARTED(kbdev, kbdev); + KBASE_KTRACE_ADD(kbdev, ARB_GPU_STARTED, NULL, 0); dev_dbg(kbdev->dev, "<%s\n", __func__); } @@ -229,7 +228,7 @@ static enum hrtimer_restart request_timer_callback(struct hrtimer *timer) /** * start_request_timer() - Start a timer after requesting GPU - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Start a timer to track when kbase is waiting for the GPU from the * Arbiter. If the timer expires before GPU is granted, a warning in @@ -245,7 +244,7 @@ static void start_request_timer(struct kbase_device *kbdev) /** * cancel_request_timer() - Stop the request timer - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Stops the request timer once GPU has been granted. Safe to call * even if timer is no longer running. @@ -260,7 +259,7 @@ static void cancel_request_timer(struct kbase_device *kbdev) /** * kbase_arbiter_pm_early_init() - Initialize arbiter for VM * Paravirtualized use. - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Initialize the arbiter and other required resources during the runtime * and request the GPU for the VM for the first time. @@ -272,7 +271,7 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) int err; struct kbase_arbiter_vm_state *arb_vm_state = NULL; - arb_vm_state = kmalloc(sizeof(struct kbase_arbiter_vm_state), GFP_KERNEL); + arb_vm_state = kzalloc(sizeof(struct kbase_arbiter_vm_state), GFP_KERNEL); if (arb_vm_state == NULL) return -ENOMEM; @@ -297,11 +296,13 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) err = kbase_arbif_init(kbdev); if (err) { - dev_err(kbdev->dev, "Failed to initialise arbif module. (err = %d)\n", err); + if (err != -EPERM) + dev_err(kbdev->dev, "Failed to initialise arbif module. (err = %d)", err); + goto arbif_init_fail; } - if (kbdev->arb.arb_if) { + if (kbase_has_arbiter(kbdev)) { kbase_arbif_gpu_request(kbdev); dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n"); @@ -311,7 +312,7 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) msecs_to_jiffies((unsigned int)gpu_req_timeout)); if (!err) { - dev_dbg(kbdev->dev, + dev_err(kbdev->dev, "Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n", gpu_req_timeout); @@ -336,7 +337,7 @@ arbif_init_fail: /** * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Clean up all the resources */ @@ -344,6 +345,14 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + if (arb_vm_state == NULL) + return; + + if (!kbase_has_arbiter(kbdev)) + return; + + kbase_arbiter_pm_release_interrupts(kbdev); + cancel_request_timer(kbdev); mutex_lock(&arb_vm_state->vm_state_lock); if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) { @@ -358,12 +367,6 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) kbdev->pm.arb_vm_state = NULL; } -/** - * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Releases interrupts and set the interrupt flag to false - */ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; @@ -376,29 +379,25 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) mutex_unlock(&arb_vm_state->vm_state_lock); } -/** - * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Install interrupts and set the interrupt_install flag to true. - * - * Return: 0 if success, or a Linux error code - */ int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; - int err; + int err = 0; mutex_lock(&arb_vm_state->vm_state_lock); - arb_vm_state->interrupts_installed = true; - err = kbase_install_interrupts(kbdev); + if (arb_vm_state->interrupts_installed == false) { + arb_vm_state->interrupts_installed = true; + err = kbase_install_interrupts(kbdev); + } else { + dev_dbg(kbdev->dev, "%s: interrupts installed already", __func__); + } mutex_unlock(&arb_vm_state->vm_state_lock); return err; } /** * kbase_arbiter_pm_vm_stopped() - Handle stop state for the VM - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Handles a stop state for the VM */ @@ -416,7 +415,13 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "%s %s\n", __func__, kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); - if (arb_vm_state->interrupts_installed) { + /* + * Release the interrupts on external arb_if to address Xen requirements. + * Interrupts are not released with internal arb_if as the IRQs are required + * to handle messaging to/from Arbiter/Resource Group. + */ + if (arb_vm_state->interrupts_installed + ) { arb_vm_state->interrupts_installed = false; kbase_release_interrupts(kbdev); } @@ -476,6 +481,12 @@ int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev) if (!kbdev) return result; + /* If there is no Arbiter, then there is no virtualization + * and current VM always has access to GPU. + */ + if (!kbase_has_arbiter(kbdev)) + return 1; + /* First check the GPU_LOST state */ kbase_pm_lock(kbdev); if (kbase_pm_is_gpu_lost(kbdev)) { @@ -507,7 +518,7 @@ int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev) /** * kbase_arbiter_pm_vm_gpu_start() - Handles the start state of the VM - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Handles the start state of the VM */ @@ -532,7 +543,15 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING); arb_vm_state->interrupts_installed = true; - kbase_install_interrupts(kbdev); + /* + * Re-install interrupts that were released for external arb_if to + * address Xen requirements. Interrupts are not released with internal + * arb_if as the IRQs are required to handle messaging to/from + * Arbiter/Resource Group. + */ + { + kbase_install_interrupts(kbdev); + } /* * GPU GRANTED received while in stop can be a result of a * repartitioning. @@ -561,7 +580,7 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) /** * kbase_arbiter_pm_vm_gpu_stop() - Handles the stop state of the VM - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Handles the start state of the VM */ @@ -603,7 +622,7 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev) /** * kbase_gpu_lost() - Kbase signals GPU is lost on a lost event signal - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * On GPU lost event signals GPU_LOST to the aribiter */ @@ -658,7 +677,7 @@ static void kbase_gpu_lost(struct kbase_device *kbdev) /** * kbase_arbiter_pm_vm_os_suspend_ready_state() - checks if VM is ready * to be moved to suspended state. - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Return: True if its ready to be suspended else False. */ @@ -678,10 +697,10 @@ static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state(struct kbase_devic /** * kbase_arbiter_pm_vm_os_prepare_suspend() - Prepare OS to be in suspend state * until it receives the grant message from arbiter - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Prepares OS to be in suspend state until it receives GRANT message - * from Arbiter asynchronously. + * from Arbiter asynchronously. This function assumes there is an active Arbiter. */ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) { @@ -689,10 +708,8 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) enum kbase_vm_state prev_state; lockdep_assert_held(&arb_vm_state->vm_state_lock); - if (kbdev->arb.arb_if) { - if (kbdev->pm.arb_vm_state->vm_state == KBASE_VM_STATE_SUSPENDED) - return; - } + if (kbdev->pm.arb_vm_state->vm_state == KBASE_VM_STATE_SUSPENDED) + return; /* Block suspend OS function until we are in a stable state * with vm_state_lock */ @@ -745,7 +762,7 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) /** * kbase_arbiter_pm_vm_os_resume() - Resume OS function once it receives * a grant message from arbiter - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Resume OS function once it receives GRANT message * from Arbiter asynchronously. @@ -774,7 +791,7 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) /** * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine. - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * @evt: VM event * * The state machine function. Receives events and transitions states @@ -784,7 +801,7 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, enum kbase_arbif_evt { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; - if (!kbdev->arb.arb_if) + if (!kbase_has_arbiter(kbdev)) return; mutex_lock(&arb_vm_state->vm_state_lock); @@ -853,7 +870,7 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, enum kbase_arbif_evt break; default: - dev_alert(kbdev->dev, "Got Unknown Event!"); + dev_err(kbdev->dev, "Got Unknown Event!"); break; } mutex_unlock(&arb_vm_state->vm_state_lock); @@ -863,7 +880,7 @@ KBASE_EXPORT_TEST_API(kbase_arbiter_pm_vm_event); /** * kbase_arbiter_pm_vm_wait_gpu_assignment() - VM wait for a GPU assignment. - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * VM waits for a GPU assignment. */ @@ -879,14 +896,14 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) } /** - * kbase_arbiter_pm_vm_gpu_assigned_lockheld() - Check if VM holds VM state lock - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * kbase_arbiter_pm_vm_gpu_assigned_locked() - Check if VM holds VM state lock + * @kbdev: The kbase device structure for the device * * Checks if the virtual machine holds VM state lock. * * Return: true if GPU is assigned, else false. */ -static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device *kbdev) +static inline bool kbase_arbiter_pm_vm_gpu_assigned_locked(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; @@ -898,13 +915,14 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device /** * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for * arbitration mode - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * @suspend_handler: The handler code for how to handle a suspend * that might occur * * This function handles a suspend event from the driver, * communicating with the arbiter and waiting synchronously for the GPU - * to be granted again depending on the VM state. + * to be granted again depending on the VM state. Returns immediately + * with success if there is no Arbiter. * * Return: 0 on success else 1 suspend handler isn not possible. */ @@ -914,58 +932,58 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; int res = 0; - if (kbdev->arb.arb_if) { - mutex_lock(&arb_vm_state->vm_state_lock); - while (!kbase_arbiter_pm_vm_gpu_assigned_lockheld(kbdev)) { - /* Update VM state since we have GPU work to do */ - if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_STOPPING_ACTIVE); - else if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPED) { - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_STOPPED_GPU_REQUESTED); - kbase_arbif_gpu_request(kbdev); - start_request_timer(kbdev); - } else if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU) + if (!kbase_has_arbiter(kbdev)) + return res; + + mutex_lock(&arb_vm_state->vm_state_lock); + while (!kbase_arbiter_pm_vm_gpu_assigned_locked(kbdev)) { + /* Update VM state since we have GPU work to do */ + if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_ACTIVE); + else if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPED) { + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + kbase_arbif_gpu_request(kbdev); + start_request_timer(kbdev); + } else if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU) + break; + + if (suspend_handler != KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) { + /* In case of GPU lost, even if + * active_count > 0, we no longer have GPU + * access + */ + if (kbase_pm_is_gpu_lost(kbdev)) + res = 1; + + switch (suspend_handler) { + case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: + res = 1; break; - - if (suspend_handler != KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) { - /* In case of GPU lost, even if - * active_count > 0, we no longer have GPU - * access - */ - if (kbase_pm_is_gpu_lost(kbdev)) + case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: + if (kbdev->pm.active_count == 0) res = 1; - - switch (suspend_handler) { - case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: - res = 1; - break; - case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: - if (kbdev->pm.active_count == 0) - res = 1; - break; - case KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED: - break; - default: - WARN(1, "Unknown suspend_handler\n"); - res = 1; - break; - } + break; + case KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED: + break; + default: + WARN(1, "Unknown suspend_handler\n"); + res = 1; break; } - - /* Need to synchronously wait for GPU assignment */ - atomic_inc(&kbdev->pm.gpu_users_waiting); - mutex_unlock(&arb_vm_state->vm_state_lock); - kbase_pm_unlock(kbdev); - kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); - kbase_pm_lock(kbdev); - mutex_lock(&arb_vm_state->vm_state_lock); - atomic_dec(&kbdev->pm.gpu_users_waiting); + break; } + + /* Need to synchronously wait for GPU assignment */ + atomic_inc(&kbdev->pm.gpu_users_waiting); mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_pm_unlock(kbdev); + kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); + kbase_pm_lock(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + atomic_dec(&kbdev->pm.gpu_users_waiting); } + mutex_unlock(&arb_vm_state->vm_state_lock); + return res; } diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h index 3734d32b6e2b..649f488d4f67 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -102,7 +102,7 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev); * * Install interrupts and set the interrupt_install flag to true. * - * Return: 0 if success, or a Linux error code + * Return: 0 if success or already installed. Otherwise a Linux error code */ int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild index c3db14217c6d..ffec0417aa5c 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild +++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -47,12 +47,7 @@ endif bifrost_kbase-$(CONFIG_MALI_BIFROST_DEVFREQ) += \ backend/gpu/mali_kbase_devfreq.o -ifneq ($(CONFIG_MALI_REAL_HW),y) - bifrost_kbase-y += backend/gpu/mali_kbase_model_linux.o -endif +bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_linux.o # NO_MALI Dummy model interface bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o -# HW error simulation -bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o - diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c index e47dd440bff2..851e6feafd30 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,14 +50,22 @@ static struct kbase_clk_rate_trace_op_conf * get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) { /* base case */ + const void *arbiter_if_node; struct kbase_clk_rate_trace_op_conf *callbacks = (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; -#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) - const void *arbiter_if_node; + + /* Nothing left to do here if there is no Arbiter/virtualization or if + * CONFIG_OF is not enabled. + */ + if (!IS_ENABLED(CONFIG_OF)) + return callbacks; if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev)) return callbacks; + if (!kbase_has_arbiter(kbdev)) + return callbacks; + arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter-if", NULL); if (!arbiter_if_node) arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); @@ -69,8 +77,6 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Arbitration supported but disabled by platform. Leaving clk rate callbacks as default.\n"); -#endif - return callbacks; } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c index 2649f1815e9f..e223535d01f7 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -366,7 +366,7 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) err = of_property_read_u64(node, "opp-hz-real", real_freqs); #endif if (err < 0) { - dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n", + dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d", err); continue; } @@ -374,8 +374,8 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) err = of_property_read_u32_array(node, "opp-microvolt", opp_volts, kbdev->nr_regulators); if (err < 0) { - dev_warn(kbdev->dev, - "Failed to read opp-microvolt property with error %d\n", err); + dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d", + err); continue; } #endif @@ -386,11 +386,12 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) dev_warn( kbdev->dev, - "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", + "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU", opp_freq); continue; } + core_count_p = of_get_property(node, "opp-core-count", NULL); if (core_count_p) { u64 remaining_core_mask = kbdev->gpu_props.shader_present; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c index 414ad546811a..a9b629ad7ea5 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -48,7 +48,7 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbasep_gpuprop /* Not a valid register on TMIX */ /* TGOx specific register */ - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_TLS_ALLOC)) + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_THREAD_TLS_ALLOC)) regdump->thread_tls_alloc = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_TLS_ALLOC)); #endif /* !MALI_USE_CSF */ @@ -64,7 +64,7 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbasep_gpuprop /* AMBA_FEATURES enum is mapped to COHERENCY_FEATURES enum */ regdump->coherency_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(COHERENCY_FEATURES)); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES)) + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_CORE_FEATURES)) regdump->core_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(CORE_FEATURES)); #if MALI_USE_CSF @@ -116,7 +116,7 @@ int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, struct kbasep_gpuprops_regdump *regdump) { - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_L2_CONFIG)) { regdump->l2_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(L2_FEATURES)); regdump->l2_config = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG)); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c index 131cfe32df9f..07960713f75a 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,8 @@ #include #include +#define WAIT_FOR_DUMP_TIMEOUT_MS 5000 + static int wait_prfcnt_ready(struct kbase_device *kbdev) { u32 val; @@ -163,6 +165,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) { unsigned long flags, pm_flags; struct kbase_device *kbdev = kctx->kbdev; + const unsigned long timeout = msecs_to_jiffies(WAIT_FOR_DUMP_TIMEOUT_MS); while (1) { spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); @@ -199,7 +202,8 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* Ongoing dump/setup - wait for its completion */ - wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); + wait_event_timeout(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0, + timeout); } kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; @@ -319,8 +323,19 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) unsigned long flags; int err; + unsigned long remaining; + const unsigned long timeout = msecs_to_jiffies(WAIT_FOR_DUMP_TIMEOUT_MS); + /* Wait for dump & cache clean to complete */ - wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); + remaining = wait_event_timeout(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0, timeout); + if (remaining == 0) { + err = -ETIME; + /* Set the backend state so it's clear things have gone bad (could be a HW issue) + */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_UNRECOVERABLE_ERROR; + goto timed_out; + } spin_lock_irqsave(&kbdev->hwcnt.lock, flags); @@ -336,7 +351,7 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - +timed_out: return err; } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h index 34e8178d1d76..feb76757f955 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h @@ -74,7 +74,7 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev); * Return: 0 on success. Error code (negative) on failure. */ int kbase_validate_interrupts(struct kbase_device *const kbdev); -#endif /* CONFIG_MALI_REAL_HW */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_BIFROST_DEBUG */ /** diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c index 9cb367508dde..152b140b5381 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,6 +23,7 @@ #include #include + #include #if IS_ENABLED(CONFIG_MALI_REAL_HW) @@ -163,13 +164,9 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) static irqreturn_t kbase_combined_irq_handler(int irq, void *data) { irqreturn_t irq_state = IRQ_NONE; - - if (kbase_job_irq_handler(irq, data) == IRQ_HANDLED) - irq_state = IRQ_HANDLED; - if (kbase_mmu_irq_handler(irq, data) == IRQ_HANDLED) - irq_state = IRQ_HANDLED; - if (kbase_gpu_irq_handler(irq, data) == IRQ_HANDLED) - irq_state = IRQ_HANDLED; + irq_state |= kbase_job_irq_handler(irq, data); + irq_state |= kbase_mmu_irq_handler(irq, data); + irq_state |= kbase_gpu_irq_handler(irq, data); return irq_state; } @@ -212,8 +209,7 @@ int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custo if (!handler) handler = kbase_get_interrupt_handler(kbdev, irq_tag); - if (request_irq(kbdev->irqs[irq].irq, handler, - kbdev->irqs[irq].flags | ((kbdev->nr_irqs == 1) ? 0 : IRQF_SHARED), + if (request_irq(kbdev->irqs[irq].irq, handler, kbdev->irqs[irq].flags | IRQF_SHARED, dev_name(kbdev->dev), kbase_tag(kbdev, irq)) != 0) { result = -EINVAL; dev_err(kbdev->dev, "Can't request interrupt %u (index %u)\n", kbdev->irqs[irq].irq, @@ -396,8 +392,8 @@ static int validate_interrupt(struct kbase_device *const kbdev, u32 tag) /* restore original interrupt */ if (request_irq(kbdev->irqs[irq].irq, kbase_get_interrupt_handler(kbdev, tag), - kbdev->irqs[irq].flags | ((kbdev->nr_irqs == 1) ? 0 : IRQF_SHARED), - dev_name(kbdev->dev), kbase_tag(kbdev, irq))) { + kbdev->irqs[irq].flags | IRQF_SHARED, dev_name(kbdev->dev), + kbase_tag(kbdev, irq))) { dev_err(kbdev->dev, "Can't restore original interrupt %u (index %u)\n", kbdev->irqs[irq].irq, tag); err = -EINVAL; @@ -449,10 +445,10 @@ int kbase_install_interrupts(struct kbase_device *kbdev) u32 i; for (i = 0; i < kbdev->nr_irqs; i++) { - const int result = request_irq( - kbdev->irqs[i].irq, kbase_get_interrupt_handler(kbdev, i), - kbdev->irqs[i].flags | ((kbdev->nr_irqs == 1) ? 0 : IRQF_SHARED), - dev_name(kbdev->dev), kbase_tag(kbdev, i)); + const int result = request_irq(kbdev->irqs[i].irq, + kbase_get_interrupt_handler(kbdev, i), + kbdev->irqs[i].flags | IRQF_SHARED, + dev_name(kbdev->dev), kbase_tag(kbdev, i)); if (result) { dev_err(kbdev->dev, "Can't request interrupt %u (index %u)\n", kbdev->irqs[i].irq, i); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c index e822dc59977b..b251de4fc23e 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -98,82 +98,6 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req return affinity; } -/** - * select_job_chain() - Select which job chain to submit to the GPU - * @katom: Pointer to the atom about to be submitted to the GPU - * - * Selects one of the fragment job chains attached to the special atom at the - * end of a renderpass, or returns the address of the single job chain attached - * to any other type of atom. - * - * Which job chain is selected depends upon whether the tiling phase of the - * renderpass completed normally or was soft-stopped because it used too - * much memory. It also depends upon whether one of the fragment job chains - * has already been run as part of the same renderpass. - * - * Return: GPU virtual address of the selected job chain - */ -static u64 select_job_chain(struct kbase_jd_atom *katom) -{ - struct kbase_context *const kctx = katom->kctx; - u64 jc = katom->jc; - struct kbase_jd_renderpass *rp; - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) - return jc; - - compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); - - rp = &kctx->jctx.renderpasses[katom->renderpass_id]; - /* We can read a subset of renderpass state without holding - * higher-level locks (but not end_katom, for example). - * If the end-of-renderpass atom is running with as-yet indeterminate - * OOM state then assume that the start atom was not soft-stopped. - */ - switch (rp->state) { - case KBASE_JD_RP_OOM: - /* Tiling ran out of memory. - * Start of incremental rendering, used once. - */ - jc = katom->jc_fragment.norm_read_forced_write; - break; - case KBASE_JD_RP_START: - case KBASE_JD_RP_PEND_OOM: - /* Tiling completed successfully first time. - * Single-iteration rendering, used once. - */ - jc = katom->jc_fragment.norm_read_norm_write; - break; - case KBASE_JD_RP_RETRY_OOM: - /* Tiling ran out of memory again. - * Continuation of incremental rendering, used as - * many times as required. - */ - jc = katom->jc_fragment.forced_read_forced_write; - break; - case KBASE_JD_RP_RETRY: - case KBASE_JD_RP_RETRY_PEND_OOM: - /* Tiling completed successfully this time. - * End of incremental rendering, used once. - */ - jc = katom->jc_fragment.forced_read_norm_write; - break; - default: - WARN_ON(1); - break; - } - - dev_dbg(kctx->kbdev->dev, "Selected job chain 0x%llx for end atom %pK in state %d\n", jc, - (void *)katom, (int)rp->state); - - katom->jc = jc; - return jc; -} - static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js, struct kbase_context *kctx) { @@ -196,7 +120,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, { struct kbase_context *kctx; u32 cfg; - u64 const jc_head = select_job_chain(katom); + u64 jc_head = katom->jc; u64 affinity; struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js]; @@ -220,21 +144,21 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, */ cfg = (u32)kctx->as_nr; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) && + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_FLUSH_REDUCTION) && !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) { /* Force a cache maintenance operation if the newly submitted * katom to the slot is from a different kctx. For a JM GPU - * that has the feature BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, + * that has the feature KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, * applies a FLUSH_INV_SHADER_OTHER. Otherwise, do a * FLUSH_CLEAN_INVALIDATE. */ u64 tagged_kctx = ptr_slot_rb->last_kctx_tagged; if (tagged_kctx != SLOT_RB_NULL_TAG_VAL && tagged_kctx != SLOT_RB_TAG_KCTX(kctx)) { - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER)) + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER)) cfg |= JS_CONFIG_START_FLUSH_INV_SHADER_OTHER; else cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; @@ -246,15 +170,14 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; - else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) + else if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_CLEAN_ONLY_SAFE)) cfg |= JS_CONFIG_END_FLUSH_CLEAN; else cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; cfg |= JS_CONFIG_THREAD_PRI(8); - if ((katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) || - (katom->core_req & BASE_JD_REQ_END_RENDERPASS)) + if (katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; if (!ptr_slot_rb->job_chain_flag) { @@ -268,7 +191,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, CONFIG_NEXT), cfg); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_FLUSH_REDUCTION)) kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, FLUSH_ID_NEXT), katom->flush_id); /* Write an approximate start timestamp. @@ -440,7 +363,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * jobs to hang. Reset GPU before allowing * any other jobs on the slot to continue. */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) { + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_3076)) { if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) { if (kbase_prepare_to_reset_gpu_locked( kbdev, RESET_FLAGS_NONE)) @@ -740,66 +663,6 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, } } -static int softstop_start_rp_nolock(struct kbase_context *kctx, struct kbase_va_region *reg) -{ - struct kbase_device *const kbdev = kctx->kbdev; - struct kbase_jd_atom *katom; - struct kbase_jd_renderpass *rp; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - katom = kbase_gpu_inspect(kbdev, 1, 0); - - if (!katom) { - dev_dbg(kctx->kbdev->dev, "No atom on job slot\n"); - return -ESRCH; - } - - if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { - dev_dbg(kctx->kbdev->dev, "Atom %pK on job slot is not start RP\n", (void *)katom); - return -EPERM; - } - - compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); - - rp = &kctx->jctx.renderpasses[katom->renderpass_id]; - if (WARN_ON(rp->state != KBASE_JD_RP_START && rp->state != KBASE_JD_RP_RETRY)) - return -EINVAL; - - dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n", (int)rp->state, (void *)reg); - - if (WARN_ON(katom != rp->start_katom)) - return -EINVAL; - - dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n", (void *)reg, - (void *)&rp->oom_reg_list); - list_move_tail(®->link, &rp->oom_reg_list); - dev_dbg(kctx->kbdev->dev, "Added region to list\n"); - - rp->state = (rp->state == KBASE_JD_RP_START ? KBASE_JD_RP_PEND_OOM : - KBASE_JD_RP_RETRY_PEND_OOM); - - kbase_job_slot_softstop(kbdev, 1, katom); - - return 0; -} - -int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx, - struct kbase_va_region *const reg) -{ - struct kbase_device *const kbdev = kctx->kbdev; - int err; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - err = softstop_start_rp_nolock(kctx, reg); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return err; -} - void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; @@ -839,7 +702,7 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) { u32 flush_id = 0; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_FLUSH_REDUCTION)) { mutex_lock(&kbdev->pm.lock); if (kbdev->pm.backend.gpu_powered) flush_id = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(LATEST_FLUSH)); @@ -1085,7 +948,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) /* The flush has completed so reset the active indicator */ kbdev->irq_reset_flush = false; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TMIX_8463)) { u64 val; const u32 timeout_us = kbase_get_timeout_ms(kbdev, KBASE_CLEAN_CACHE_TIMEOUT) * USEC_PER_MSEC; @@ -1268,14 +1131,12 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, unsigned int { unsigned int i; -#ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_gpu_lost(kbdev)) { /* GPU access has been removed, reset will be done by * Arbiter instead */ return false; } -#endif if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) kbase_instr_hwcnt_on_unrecoverable_error(kbdev); @@ -1328,7 +1189,7 @@ void kbase_reset_gpu(struct kbase_device *kbdev) if (!kbase_is_quick_reset_enabled(kbdev)) dev_err(kbdev->dev, - "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + "Preparing to soft-reset GPU: Waiting (up to %d ms) for all jobs to complete soft-stop\n", kbdev->reset_timeout_ms); hrtimer_start(&kbdev->hwaccess.backend.reset_timer, @@ -1350,7 +1211,7 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) if (!kbase_is_quick_reset_enabled(kbdev)) dev_err(kbdev->dev, - "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + "Preparing to soft-reset GPU: Waiting (up to %d ms) for all jobs to complete soft-stop\n", kbdev->reset_timeout_ms); hrtimer_start(&kbdev->hwaccess.backend.reset_timer, HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), HRTIMER_MODE_REL); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c index 842209f9c049..a4a640a0fb92 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -425,7 +425,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, struct kbase_jd_a } } - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TGOX_R1_1234)) { if (katom->atom_flags & KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { kbase_pm_protected_l2_override(kbdev, false); katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; @@ -698,7 +698,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, struct kbas kbase_pm_protected_entry_override_disable(kbdev); - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TGOX_R1_1234)) { /* * Power on L2 caches; this will also result in the * correct value written to coherency enable register. @@ -714,13 +714,13 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, struct kbas katom[idx]->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TGOX_R1_1234)) return -EAGAIN; /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; case KBASE_ATOM_ENTER_PROTECTED_FINISHED: - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TGOX_R1_1234)) { /* * Check that L2 caches are powered and, if so, * enter protected mode. @@ -864,11 +864,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); -#ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbase_reset_gpu_is_active(kbdev) || kbase_is_gpu_removed(kbdev)) -#else - if (kbase_reset_gpu_is_active(kbdev)) -#endif + if (kbase_reset_gpu_is_active(kbdev) || (kbase_is_gpu_removed(kbdev))) return; for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { @@ -896,7 +892,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) break; case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: - if (kbase_js_atom_blocked_on_x_dep(katom[idx])) + if (katom[idx]->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) break; katom[idx]->gpu_rb_state = @@ -1236,7 +1232,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp * When a hard-stop is followed close after a soft-stop, the completion * code may be set to STOPPED, even though the job is terminated */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) { + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TMIX_8438)) { if (completion_code == BASE_JD_EVENT_STOPPED && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { completion_code = BASE_JD_EVENT_TERMINATED; @@ -1331,6 +1327,9 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp dev_dbg(kbdev->dev, "Update job chain address of atom %pK to resume from 0x%llx\n", (void *)katom, job_tail); + /* Some of the job has been executed, so we update the job chain address to where + * we should resume from + */ katom->jc = job_tail; KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, katom, job_tail, js); } @@ -1381,6 +1380,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp dev_dbg(kbdev->dev, "Cross-slot dependency %pK has become runnable.\n", (void *)katom); + /* Cross-slot dependency has now become runnable. Try to submit it. */ + /* Check if there are lower priority jobs to soft stop */ kbase_job_slot_ctx_priority_check_locked(kctx, katom); @@ -1437,7 +1438,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) * then leave it in the RB and next time we're kicked * it will be processed again from the starting state. */ - if (keep_in_jm_rb) { + if (!kbase_is_gpu_removed(kbdev) && keep_in_jm_rb) { katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; /* As the atom was not removed, increment the * index so that we read the correct atom in the diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c index 202671b323d5..99037c25bf08 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,7 +46,7 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev, int nr_ } #endif /* CONFIG_MALI_BIFROST_DEBUG */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_9435)) { /* Timeouts would have to be 4x longer (due to micro- * architectural design) to support OpenCL conformance tests, so * only run the timer when there's: @@ -100,7 +100,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) /* The current version of the model doesn't support * Soft-Stop */ - if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { + if (!kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_5736)) { u32 ticks = atom->ticks++; #if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP) diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c index 41b9b37797d3..0f4a8cd096bb 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,42 +25,8 @@ * insmod'ing mali_kbase.ko with no arguments after a build with "scons * gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be * overridden by passing the 'no_mali_gpu' argument to insmod. - * - * - if CONFIG_MALI_BIFROST_ERROR_INJECT is defined the error injection system is - * activated. */ -/* Implementation of failure injection system: - * - * Error conditions are generated by gpu_generate_error(). - * According to CONFIG_MALI_BIFROST_ERROR_INJECT definition gpu_generate_error() either - * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or - * checks if there is (in error_track_list) an error configuration to be set for - * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined). - * Each error condition will trigger a specific "state" for a certain set of - * registers as per Midgard Architecture Specifications doc. - * - * According to Midgard Architecture Specifications doc the following registers - * are always affected by error conditions: - * - * JOB Exception: - * JOB_IRQ_RAWSTAT - * JOB STATUS AREA - * - * MMU Exception: - * MMU_IRQ_RAWSTAT - * AS_FAULTSTATUS - * AS_FAULTADDRESS - * - * GPU Exception: - * GPU_IRQ_RAWSTAT - * GPU_FAULTSTATUS - * GPU_FAULTADDRESS - * - * For further clarification on the model behaviour upon specific error - * conditions the user may refer to the Midgard Architecture Specification - * document - */ #include #include #include @@ -126,7 +92,7 @@ struct error_status_t hw_error_status; */ struct control_reg_values_t { const char *name; - u32 gpu_id; + u64 gpu_id; u32 as_present; u32 thread_max_threads; u32 thread_max_workgroup_size; @@ -524,7 +490,7 @@ MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cnt_idx, bool is_low_word) { - u64 *counters_data; + u64 *counters_data = NULL; u32 core_count = 0; u32 event_index; u64 value = 0; @@ -580,6 +546,9 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cn break; } + if (unlikely(counters_data == NULL)) + return 0; + for (core = 0; core < core_count; core++) { value += counters_data[event_index]; event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; @@ -1172,9 +1141,6 @@ static void midgard_model_update(void *h) /*this job is done assert IRQ lines */ signal_int(dummy, i); -#ifdef CONFIG_MALI_BIFROST_ERROR_INJECT - midgard_set_error(i); -#endif /* CONFIG_MALI_BIFROST_ERROR_INJECT */ update_register_statuses(dummy, i); /*if this job slot returned failures we cannot use it */ if (hw_error_status.job_irq_rawstat & (1u << (i + 16))) { @@ -1564,6 +1530,7 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) case L2_PWROFF_HI: case PWR_KEY: case PWR_OVERRIDE0: + case PWR_OVERRIDE1: #if MALI_USE_CSF case SHADER_PWRFEATURES: case CSF_CONFIG: @@ -1607,8 +1574,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) #else /* !MALI_USE_CSF */ if (addr == GPU_CONTROL_REG(GPU_ID)) { #endif /* !MALI_USE_CSF */ - - *value = dummy->control_reg_values->gpu_id; + *value = dummy->control_reg_values->gpu_id & U32_MAX; } else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) { *value = hw_error_status.job_irq_rawstat; pr_debug("%s", "JS_IRQ_RAWSTAT being read"); @@ -1987,7 +1953,8 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = dummy->control_reg_values->gpu_features_lo; } else if (addr == GPU_CONTROL_REG(GPU_FEATURES_HI)) { *value = dummy->control_reg_values->gpu_features_hi; - } else { + } + else { model_error_log( KBASE_CORE, "Dummy model register access: Reading unsupported register 0x%x. Returning 0\n", @@ -2166,9 +2133,3 @@ int gpu_model_control(void *model, struct kbase_model_control_params *params) return 0; } - -u64 midgard_model_arch_timer_get_cntfrq(void *h) -{ - CSTD_UNUSED(h); - return arch_timer_get_cntfrq(); -} diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c deleted file mode 100644 index 86d4e26bd6b4..000000000000 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c +++ /dev/null @@ -1,172 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * - * (C) COPYRIGHT 2014-2015, 2018-2023 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#include -#include -#include "backend/gpu/mali_kbase_model_linux.h" - -static struct kbase_error_atom *error_track_list; - -#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM - -/** Kernel 6.1.0 has dropped prandom_u32(), use get_random_u32() */ -#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) -#define prandom_u32 get_random_u32 -#endif - -/*following error probability are set quite high in order to stress the driver*/ -static unsigned int error_probability = 50; /* to be set between 0 and 100 */ -/* probability to have multiple error give that there is an error */ -static unsigned int multiple_error_probability = 50; - -/* all the error conditions supported by the model */ -#define TOTAL_FAULTS 27 -/* maximum number of levels in the MMU translation table tree */ -#define MAX_MMU_TABLE_LEVEL 4 -/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */ -#define MAX_CONCURRENT_FAULTS 3 - -/** - * gpu_generate_error - Generate GPU error - */ -static void gpu_generate_error(void) -{ - unsigned int errors_num = 0; - - /*is there at least one error? */ - if ((prandom_u32() % 100) < error_probability) { - /* pick up a faulty mmu address space */ - hw_error_status.faulty_mmu_as = prandom_u32() % NUM_MMU_AS; - /* pick up an mmu table level */ - hw_error_status.mmu_table_level = 1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL); - hw_error_status.errors_mask = (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); - - /*is there also one or more errors? */ - if ((prandom_u32() % 100) < multiple_error_probability) { - errors_num = 1 + (prandom_u32() % (MAX_CONCURRENT_FAULTS - 1)); - while (errors_num-- > 0) { - u32 temp_mask; - - temp_mask = (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); - /* below we check that no bit of the same error - * type is set again in the error mask - */ - if ((temp_mask & IS_A_JOB_ERROR) && - (hw_error_status.errors_mask & IS_A_JOB_ERROR)) { - errors_num++; - continue; - } - if ((temp_mask & IS_A_MMU_ERROR) && - (hw_error_status.errors_mask & IS_A_MMU_ERROR)) { - errors_num++; - continue; - } - if ((temp_mask & IS_A_GPU_ERROR) && - (hw_error_status.errors_mask & IS_A_GPU_ERROR)) { - errors_num++; - continue; - } - /* this error mask is already set */ - if ((hw_error_status.errors_mask | temp_mask) == - hw_error_status.errors_mask) { - errors_num++; - continue; - } - hw_error_status.errors_mask |= temp_mask; - } - } - } -} -#endif - -int job_atom_inject_error(struct kbase_error_params *params) -{ - struct kbase_error_atom *new_elem; - - KBASE_DEBUG_ASSERT(params); - - new_elem = kzalloc(sizeof(*new_elem), GFP_KERNEL); - - if (!new_elem) { - model_error_log(KBASE_CORE, - "\njob_atom_inject_error: kzalloc failed for new_elem\n"); - return -ENOMEM; - } - new_elem->params.jc = params->jc; - new_elem->params.errors_mask = params->errors_mask; - new_elem->params.mmu_table_level = params->mmu_table_level; - new_elem->params.faulty_mmu_as = params->faulty_mmu_as; - - /*circular list below */ - if (error_track_list == NULL) { /*no elements */ - error_track_list = new_elem; - new_elem->next = error_track_list; - } else { - struct kbase_error_atom *walker = error_track_list; - - while (walker->next != error_track_list) - walker = walker->next; - - new_elem->next = error_track_list; - walker->next = new_elem; - } - return 0; -} - -void midgard_set_error(u32 job_slot) -{ -#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM - gpu_generate_error(); -#else - struct kbase_error_atom *walker, *auxiliar; - - if (error_track_list != NULL) { - walker = error_track_list->next; - auxiliar = error_track_list; - do { - if (walker->params.jc == hw_error_status.current_jc) { - /* found a faulty atom matching with the - * current one - */ - hw_error_status.errors_mask = walker->params.errors_mask; - hw_error_status.mmu_table_level = walker->params.mmu_table_level; - hw_error_status.faulty_mmu_as = walker->params.faulty_mmu_as; - hw_error_status.current_job_slot = job_slot; - - if (walker->next == walker) { - /* only one element */ - kfree(error_track_list); - error_track_list = NULL; - } else { - auxiliar->next = walker->next; - if (walker == error_track_list) - error_track_list = walker->next; - - kfree(walker); - } - break; - } - auxiliar = walker; - walker = walker->next; - } while (auxiliar->next != error_track_list); - } -#endif /* CONFIG_MALI_ERROR_INJECT_RANDOM */ -} diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h index 77e089ef45c8..d38bb8891be1 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h @@ -48,12 +48,8 @@ /* * Include Model definitions */ - -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) #include -#endif /* IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -#if !IS_ENABLED(CONFIG_MALI_REAL_HW) /** * kbase_gpu_device_create() - Generic create function. * @@ -116,15 +112,6 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value); */ void midgard_model_read_reg(void *h, u32 addr, u32 *const value); -/** - * midgard_model_arch_timer_get_cntfrq - Get Model specific System Timer Frequency - * - * @h: Model handle. - * - * Return: Frequency in Hz - */ -u64 midgard_model_arch_timer_get_cntfrq(void *h); - /** * gpu_device_raise_irq() - Private IRQ raise function. * @@ -155,6 +142,5 @@ void gpu_device_set_data(void *model, void *data); * Return: Pointer to the data carried by model. */ void *gpu_device_get_data(void *model); -#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* _KBASE_MODEL_LINUX_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c index ca4e73d3fbb7..e1941d50133a 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,6 +36,7 @@ #include #include #include +#include #endif /* !MALI_USE_CSF */ #include #include @@ -97,10 +98,8 @@ void kbase_pm_register_access_enable(struct kbase_device *kbdev) if (callbacks) callbacks->power_on_callback(kbdev); -#ifdef CONFIG_MALI_ARBITER_SUPPORT if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) dev_err(kbdev->dev, "Attempting to power on while GPU lost\n"); -#endif kbdev->pm.backend.gpu_powered = true; } @@ -133,9 +132,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, kbase_pm_gpu_poweroff_wait_wq); kbdev->pm.backend.ca_cores_enabled = ~0ull; -#ifdef CONFIG_MALI_ARBITER_SUPPORT kbase_pm_set_gpu_lost(kbdev, false); -#endif init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); #if !MALI_USE_CSF @@ -177,15 +174,18 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) - kbdev->pm.backend.gpu_sleep_supported = - kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_GPU_SLEEP) && - !kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_1997) && - kbdev->pm.backend.callback_power_runtime_gpu_active && - kbdev->pm.backend.callback_power_runtime_gpu_idle; + kbdev->pm.backend.gpu_sleep_allowed = 0; + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_GPU_SLEEP) && + !kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TURSEHW_1997) && + kbdev->pm.backend.callback_power_runtime_gpu_active && + kbdev->pm.backend.callback_power_runtime_gpu_idle) + set_bit(KBASE_GPU_SUPPORTS_GPU_SLEEP, &kbdev->pm.backend.gpu_sleep_allowed); kbdev->pm.backend.apply_hw_issue_TITANHW_2938_wa = - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TITANHW_2938) && - kbdev->pm.backend.gpu_sleep_supported; + kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TITANHW_2938) && + test_bit(KBASE_GPU_SUPPORTS_GPU_SLEEP, &kbdev->pm.backend.gpu_sleep_allowed); + + /* FW Sleep-on-Idle is feature is kept disabled */ #endif if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) @@ -193,14 +193,14 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) /* WA1: L2 always_on for GPUs being affected by GPU2017-1336 */ if (!IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE)) { - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_GPU2017_1336)) kbdev->pm.backend.l2_always_on = true; return 0; } /* WA3: Clock slow down for GPUs being affected by GPU2017-1336 */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) { + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_GPU2017_1336)) { kbdev->pm.backend.gpu_clock_slow_down_wa = true; kbdev->pm.backend.gpu_clock_slow_down_desired = true; INIT_WORK(&kbdev->pm.backend.gpu_clock_control_work, @@ -345,13 +345,11 @@ static void pm_handle_power_off(struct kbase_device *kbdev) */ wait_for_mmu_fault_handling_in_gpu_poweroff_wait_wq(kbdev); -#ifdef CONFIG_MALI_ARBITER_SUPPORT /* poweron_required may have changed while pm lock * was released. */ if (kbase_pm_is_gpu_lost(kbdev)) backend->poweron_required = false; -#endif /* Turn off clock now that fault have been handled. We * dropped locks so poweron_required may have changed - @@ -393,7 +391,7 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) backend->poweron_required = false; kbdev->pm.backend.l2_desired = true; #if MALI_USE_CSF - kbdev->pm.backend.mcu_desired = true; + kbdev->pm.backend.mcu_desired = kbdev->pm.backend.mcu_poweron_required; #endif kbase_pm_update_state(kbdev); kbase_pm_update_cores_state_nolock(kbdev); @@ -860,9 +858,11 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) } KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask); #else -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0, - u64 new_core_mask_js1, u64 new_core_mask_js2) +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 *new_core_mask, + size_t new_core_mask_size) { + size_t i; + lockdep_assert_held(&kbdev->hwaccess_lock); lockdep_assert_held(&kbdev->pm.lock); @@ -870,13 +870,14 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_ dev_warn_once( kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled"); - new_core_mask_js0 = kbdev->pm.debug_core_mask[0]; + new_core_mask[0] = kbdev->pm.debug_core_mask[0]; } - kbdev->pm.debug_core_mask[0] = new_core_mask_js0; - kbdev->pm.debug_core_mask[1] = new_core_mask_js1; - kbdev->pm.debug_core_mask[2] = new_core_mask_js2; - kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | new_core_mask_js2; + kbdev->pm.debug_core_mask_all = 0; + for (i = 0; i < new_core_mask_size; i++) { + kbdev->pm.debug_core_mask[i] = new_core_mask[i]; + kbdev->pm.debug_core_mask_all |= new_core_mask[i]; + } kbase_pm_update_dynamic_cores_onoff(kbdev); } @@ -942,13 +943,11 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) /* System resume callback has begun */ kbdev->pm.resuming = true; kbdev->pm.suspending = false; -#ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_gpu_lost(kbdev)) { dev_dbg(kbdev->dev, "%s: GPU lost in progress\n", __func__); kbase_pm_unlock(kbdev); return; } -#endif kbase_pm_do_poweron(kbdev, true); #if !MALI_USE_CSF @@ -958,17 +957,20 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) kbase_pm_unlock(kbdev); } -#ifdef CONFIG_MALI_ARBITER_SUPPORT void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) { unsigned long flags; -#if !MALI_USE_CSF +#if MALI_USE_CSF + unsigned long flags_sched; +#else ktime_t end_timestamp = ktime_get_raw(); #endif struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; - if (!kbdev->arb.arb_if) + if (!kbase_has_arbiter(kbdev)) { + dev_warn(kbdev->dev, "%s called with no active arbiter!\n", __func__); return; + } mutex_lock(&kbdev->pm.lock); mutex_lock(&arb_vm_state->vm_state_lock); @@ -981,24 +983,45 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) */ WARN(!kbase_is_gpu_removed(kbdev), "GPU is still available after GPU lost event\n"); - /* Full GPU reset will have been done by hypervisor, so - * cancel - */ +#if MALI_USE_CSF + /* Full GPU reset will have been done by hypervisor, so cancel */ + if (kbase_reset_gpu_prevent_and_wait(kbdev)) + dev_warn(kbdev->dev, "Failed to prevent GPU reset."); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_csf_scheduler_spin_lock(kbdev, &flags_sched); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); + kbase_csf_scheduler_spin_unlock(kbdev, flags_sched); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_synchronize_irqs(kbdev); + + /* Scheduler reset happens outside of spinlock due to the mutex it acquires */ + kbase_csf_scheduler_reset(kbdev); + + /* Update kbase status */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->protected_mode = false; + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Cancel any pending HWC dumps */ + kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); +#else + /* Full GPU reset will have been done by hypervisor, so cancel */ atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + kbase_synchronize_irqs(kbdev); /* Clear all jobs running on the GPU */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->protected_mode = false; -#if !MALI_USE_CSF kbase_backend_reset(kbdev, &end_timestamp); kbase_pm_metrics_update(kbdev, NULL); -#endif kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -#if !MALI_USE_CSF /* Cancel any pending HWC dumps */ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING || @@ -1008,14 +1031,12 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) wake_up(&kbdev->hwcnt.backend.wait); } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -#endif +#endif /* MALI_USE_CSF */ } mutex_unlock(&arb_vm_state->vm_state_lock); mutex_unlock(&kbdev->pm.lock); } -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ - #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev) { @@ -1063,26 +1084,15 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) } /* Check if a Doorbell mirror interrupt occurred meanwhile. - * Also check if GPU idle work item is pending. If FW had sent the GPU idle notification - * after the wake up of MCU then it can be assumed that Userspace submission didn't make - * GPU non-idle, so runtime suspend doesn't need to be aborted. */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode && - !work_pending(&kbdev->csf.scheduler.gpu_idle_work)) { - u32 glb_req = - kbase_csf_firmware_global_input_read(&kbdev->csf.global_iface, GLB_REQ); - u32 glb_ack = kbase_csf_firmware_global_output(&kbdev->csf.global_iface, GLB_ACK); - - /* Only abort the runtime suspend if GPU idle event is not pending */ - if (!((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK)) { - dev_dbg(kbdev->dev, - "DB mirror interrupt occurred during runtime suspend after L2 power up"); - kbdev->pm.backend.gpu_wakeup_override = false; - kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_DB_MIRROR_IRQ; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - return -EBUSY; - } + if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode) { + dev_dbg(kbdev->dev, + "DB mirror interrupt occurred during runtime suspend after L2 power up"); + kbdev->pm.backend.gpu_wakeup_override = false; + kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_DB_MIRROR_IRQ; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return -EBUSY; } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* Need to release the kbdev->pm.lock to avoid lock ordering issue @@ -1237,4 +1247,5 @@ out: return ret; } + #endif diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c index 8daef13388a3..6522e5ca66e9 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -55,11 +55,18 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) unsigned long flags; #if MALI_USE_CSF u64 old_core_mask = 0; -#endif + bool mmu_sync_needed = false; + if (!IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) && + kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_GPU2019_3901)) { + mmu_sync_needed = true; + down_write(&kbdev->csf.mmu_sync_sem); + } +#endif spin_lock_irqsave(&kbdev->hwaccess_lock, flags); #if MALI_USE_CSF + if (!(core_mask & kbdev->pm.debug_core_mask)) { dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", @@ -98,6 +105,9 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) old_core_mask, core_mask); } } + + if (mmu_sync_needed) + up_write(&kbdev->csf.mmu_sync_sem); #endif dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", pm_backend->ca_cores_enabled); @@ -105,6 +115,10 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) return; unlock: spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#if MALI_USE_CSF + if (mmu_sync_needed) + up_write(&kbdev->csf.mmu_sync_sem); +#endif } KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask); #endif diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h index a0b8b9500077..a25fe6bdc912 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -114,6 +114,27 @@ enum kbase_pm_runtime_suspend_abort_reason { ABORT_REASON_NON_IDLE_CGS }; +/* The following indices point to the corresponding bits stored in + * &kbase_pm_backend_data.gpu_sleep_allowed. They denote the conditions that + * would be checked against to determine the level of support for GPU sleep + * and firmware sleep-on-idle. + */ +#define KBASE_GPU_SUPPORTS_GPU_SLEEP ((uint8_t)0) +#define KBASE_GPU_SUPPORTS_FW_SLEEP_ON_IDLE ((uint8_t)1) +#define KBASE_GPU_PERF_COUNTERS_COLLECTION_ENABLED ((uint8_t)2) +#define KBASE_GPU_IGNORE_IDLE_EVENT ((uint8_t)3) +#define KBASE_GPU_NON_IDLE_OFF_SLOT_GROUPS_AVAILABLE ((uint8_t)4) + +/* FW sleep-on-idle could be enabled if + * &kbase_pm_backend_data.gpu_sleep_allowed is equal to this value. + */ +#define KBASE_GPU_FW_SLEEP_ON_IDLE_ALLOWED \ + ((uint8_t)((1 << KBASE_GPU_SUPPORTS_GPU_SLEEP) | \ + (1 << KBASE_GPU_SUPPORTS_FW_SLEEP_ON_IDLE) | \ + (0 << KBASE_GPU_PERF_COUNTERS_COLLECTION_ENABLED) | \ + (0 << KBASE_GPU_IGNORE_IDLE_EVENT) | \ + (0 << KBASE_GPU_NON_IDLE_OFF_SLOT_GROUPS_AVAILABLE))) + /** * struct kbasep_pm_metrics - Metrics data collected for use by the power * management framework. @@ -304,7 +325,7 @@ union kbase_pm_policy_data { * called previously. * See &struct kbase_pm_callback_conf. * @ca_cores_enabled: Cores that are currently available - * @apply_hw_issue_TITANHW_2938_wa: Indicates if the workaround for BASE_HW_ISSUE_TITANHW_2938 + * @apply_hw_issue_TITANHW_2938_wa: Indicates if the workaround for KBASE_HW_ISSUE_TITANHW_2938 * needs to be applied when unmapping memory from GPU. * @mcu_state: The current state of the micro-control unit, only applicable * to GPUs that have such a component @@ -332,7 +353,11 @@ union kbase_pm_policy_data { * cores may be different, but there should be transitions in * progress that will eventually achieve this state (assuming * that the policy doesn't change its mind in the mean time). - * @mcu_desired: True if the micro-control unit should be powered on + * @mcu_desired: True if the micro-control unit should be powered on by the MCU state + * machine. Updated as per the value of @mcu_poweron_required. + * @mcu_poweron_required: Boolean flag updated mainly by the CSF Scheduler code, + * before updating the PM active count, to indicate to the + * PM code that micro-control unit needs to be powered up/down. * @policy_change_clamp_state_to_off: Signaling the backend is in PM policy * change transition, needs the mcu/L2 to be brought back to the * off state and remain in that state until the flag is cleared. @@ -346,10 +371,9 @@ union kbase_pm_policy_data { * @core_idle_work: Work item used to wait for undesired cores to become inactive. * The work item is enqueued when Host controls the power for * shader cores and down scaling of cores is performed. - * @gpu_sleep_supported: Flag to indicate that if GPU sleep feature can be - * supported by the kernel driver or not. If this - * flag is not set, then HW state is directly saved - * when GPU idle notification is received. + * @gpu_sleep_allowed: Bitmask to indicate the conditions that would be + * used to determine what support for GPU sleep is + * available. * @gpu_sleep_mode_active: Flag to indicate that the GPU needs to be in sleep * mode. It is set when the GPU idle notification is * received and is cleared when HW state has been @@ -485,6 +509,7 @@ struct kbase_pm_backend_data { u64 shaders_desired_mask; #if MALI_USE_CSF bool mcu_desired; + bool mcu_poweron_required; bool policy_change_clamp_state_to_off; unsigned int csf_pm_sched_flags; struct mutex policy_change_lock; @@ -492,7 +517,7 @@ struct kbase_pm_backend_data { struct work_struct core_idle_work; #ifdef KBASE_PM_RUNTIME - bool gpu_sleep_supported; + unsigned long gpu_sleep_allowed; bool gpu_sleep_mode_active; bool exit_gpu_sleep_mode; bool gpu_idled; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c index 506e168f86d2..c6b6f3a8668a 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -47,9 +47,7 @@ #include #include #include -#ifdef CONFIG_MALI_ARBITER_SUPPORT #include -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ #if MALI_USE_CSF #include @@ -70,6 +68,19 @@ MODULE_PARM_DESC(corestack_driver_control, "to the Mali GPU is known to be problematic."); KBASE_EXPORT_TEST_API(corestack_driver_control); +/** + * enum kbase_gpu_state - The state of data in the GPU. + * + * @GPU_STATE_INTACT: The GPU state is intact + * @GPU_STATE_LOST: The GPU state is lost + * @GPU_STATE_IN_RESET: The GPU is in reset state + * + * This enumeration is private to the file. It is used as + * the return values of platform specific PM + * callback (*power_on_callback). + */ +enum kbase_gpu_state { GPU_STATE_INTACT = 0, GPU_STATE_LOST, GPU_STATE_IN_RESET }; + /** * enum kbasep_pm_action - Actions that can be performed on a core. * @@ -110,7 +121,15 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) if (kbdev->pm.backend.l2_force_off_after_mcu_halt) return false; - if (kbdev->csf.scheduler.pm_active_count && kbdev->pm.backend.mcu_desired) + /* Check if policy changing transition needs MCU to be off. */ + if (unlikely(kbdev->pm.backend.policy_change_clamp_state_to_off)) + return false; + + if (kbdev->pm.backend.mcu_desired) + return true; + + /* For always_on policy, the MCU needs to be kept on */ + if (kbase_pm_no_mcu_core_pwroff(kbdev)) return true; #ifdef KBASE_PM_RUNTIME @@ -119,13 +138,7 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) return true; #endif - /* MCU is supposed to be ON, only when scheduler.pm_active_count is - * non zero. But for always_on policy, the MCU needs to be kept on, - * unless policy changing transition needs it off. - */ - - return (kbdev->pm.backend.mcu_desired && kbase_pm_no_mcu_core_pwroff(kbdev) && - !kbdev->pm.backend.policy_change_clamp_state_to_off); + return false; } #endif @@ -600,11 +613,11 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) /* * Skip if it is not supported */ - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) + if (!kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_L2_CONFIG)) return; #if MALI_USE_CSF - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) { + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_PBHA_HWU)) { val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG)); kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG), L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits)); @@ -728,16 +741,8 @@ bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, enum kbase_mcu_state s } #ifdef KBASE_PM_RUNTIME -/** - * kbase_pm_enable_mcu_db_notification - Enable the Doorbell notification on - * MCU side - * - * @kbdev: Pointer to the device. - * - * This function is called to re-enable the Doorbell notification on MCU side - * when MCU needs to beome active again. - */ -static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev) + +void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev) { u32 val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL)); @@ -763,7 +768,7 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev) kbase_get_timeout_ms(kbdev, KBASE_AS_INACTIVE_TIMEOUT) * USEC_PER_MSEC; lockdep_assert_held(&kbdev->hwaccess_lock); - if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_2716)) + if (!kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TURSEHW_2716)) return; /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */ @@ -912,6 +917,18 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) if (kbase_pm_is_mcu_desired(kbdev) && !backend->policy_change_clamp_state_to_off && backend->l2_state == KBASE_L2_ON) { + kbdev->csf.mcu_halted = false; + + /* Ensure that FW would not go to sleep immediately after + * resumption. + */ + kbase_csf_firmware_global_input_mask(&kbdev->csf.global_iface, + GLB_REQ, + GLB_REQ_REQ_IDLE_DISABLE, + GLB_REQ_IDLE_DISABLE_MASK); + atomic_set(&kbdev->csf.scheduler.gpu_idle_timer_enabled, false); + atomic_set(&kbdev->csf.scheduler.fw_soi_enabled, false); + kbase_csf_firmware_trigger_reload(kbdev); backend->mcu_state = KBASE_MCU_PEND_ON_RELOAD; } @@ -979,8 +996,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) kbase_hwcnt_backend_csf_set_hw_availability( &kbdev->hwcnt_gpu_iface, kbdev->gpu_props.curr_config.l2_slices, - kbdev->gpu_props.curr_config.shader_present & - kbdev->pm.debug_core_mask); + kbdev->gpu_props.curr_config.shader_present, + kbdev->pm.debug_core_mask); kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); kbase_csf_scheduler_spin_unlock(kbdev, flags); backend->hwcnt_disabled = false; @@ -990,7 +1007,6 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON: backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); - if (!kbase_pm_is_mcu_desired(kbdev)) backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE; else if (kbdev->csf.firmware_hctl_core_pwr) { @@ -1170,7 +1186,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) break; case KBASE_MCU_POWER_DOWN: - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TITANHW_2922)) { + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TITANHW_2922)) { if (!kbdev->csf.firmware_hctl_core_pwr) kbasep_pm_toggle_power_interrupt(kbdev, true); backend->mcu_state = KBASE_MCU_OFF; @@ -1191,7 +1207,20 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) #ifdef KBASE_PM_RUNTIME case KBASE_MCU_ON_SLEEP_INITIATE: if (!kbase_pm_is_mcu_desired(kbdev)) { - kbase_csf_firmware_trigger_mcu_sleep(kbdev); + bool db_notif_disabled = false; + + if (likely(test_bit(KBASE_GPU_SUPPORTS_FW_SLEEP_ON_IDLE, + &kbdev->pm.backend.gpu_sleep_allowed))) + db_notif_disabled = + kbase_reg_read32(kbdev, + GPU_CONTROL_ENUM(MCU_CONTROL)) & + MCU_CNTRL_DOORBELL_DISABLE_MASK; + + /* If DB notification is enabled on FW side then send a sleep + * request to FW. + */ + if (!db_notif_disabled) + kbase_csf_firmware_trigger_mcu_sleep(kbdev); backend->mcu_state = KBASE_MCU_ON_PEND_SLEEP; } else backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; @@ -1225,6 +1254,16 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_IN_SLEEP: if (kbase_pm_is_mcu_desired(kbdev) && backend->l2_state == KBASE_L2_ON) { wait_mcu_as_inactive(kbdev); + /* Ensure that FW would not go to sleep immediately after + * resumption. + */ + kbase_csf_firmware_global_input_mask(&kbdev->csf.global_iface, + GLB_REQ, + GLB_REQ_REQ_IDLE_DISABLE, + GLB_REQ_IDLE_DISABLE_MASK); + atomic_set(&kbdev->csf.scheduler.gpu_idle_timer_enabled, false); + atomic_set(&kbdev->csf.scheduler.fw_soi_enabled, false); + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( kbdev, kbase_backend_get_cycle_cnt(kbdev)); kbase_pm_enable_mcu_db_notification(kbdev); @@ -1342,6 +1381,8 @@ static void kbase_pm_l2_clear_backend_slot_submit_kctx(struct kbase_device *kbde static bool can_power_down_l2(struct kbase_device *kbdev) { + lockdep_assert_held(&kbdev->hwaccess_lock); + /* Defer the power-down if MMU is in process of page migration. */ return !kbdev->mmu_page_migrate_in_progress; } @@ -1367,20 +1408,6 @@ static bool need_tiler_control(struct kbase_device *kbdev) #endif } -/** - * hctl_l2_power_down - Initiate power down of L2 cache - * - * @kbdev: The kbase device structure for the device. - * - * This function initiates the power down of L2 cache when Host controls the power - * for Tiler block. The function expects that power down of Tiler to already have - * been initiated and it triggers the L2 power down only after the power down for - * Tiler is complete. - * The function shall be called only if L2 is in ready state. - */ -static void hctl_l2_power_down(struct kbase_device *kbdev) -{ -} /** * hctl_tiler_power_up_done - Check and/or initiate power up of Tiler @@ -1427,7 +1454,6 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) u64 l2_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2); u64 l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2); -#ifdef CONFIG_MALI_ARBITER_SUPPORT /* * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores * are vulnerable to corruption if gpu is lost @@ -1456,7 +1482,6 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) } break; } -#endif /* mask off ready from trans in case transitions finished * between the register reads @@ -1557,7 +1582,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) case KBASE_L2_RESTORE_CLOCKS: /* We always assume only GPUs being affected by - * BASE_HW_ISSUE_GPU2017_1336 fall into this state + * KBASE_HW_ISSUE_GPU2017_1336 fall into this state */ WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa); @@ -1659,7 +1684,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) case KBASE_L2_SLOW_DOWN_CLOCKS: /* We always assume only GPUs being affected by - * BASE_HW_ISSUE_GPU2017_1336 fall into this state + * KBASE_HW_ISSUE_GPU2017_1336 fall into this state */ WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa); @@ -1708,11 +1733,6 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) case KBASE_L2_PEND_OFF: if (likely(!backend->l2_always_on)) { - if (need_tiler_control(kbdev) && l2_ready) { - hctl_l2_power_down(kbdev); - break; - } - if (l2_trans || l2_ready) break; } else if (kbdev->cache_clean_in_progress) @@ -1727,11 +1747,10 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) } #endif /* Disabling MCU after L2 cache power down is to address - * BASE_HW_ISSUE_TITANHW_2922 hardware issue. + * KBASE_HW_ISSUE_TITANHW_2922 hardware issue. */ if (backend->l2_force_off_after_mcu_halt) { - kbase_csf_firmware_disable_mcu(kbdev); - kbase_csf_firmware_disable_mcu_wait(kbdev); + kbase_csf_stop_firmware_and_wait(kbdev); WARN_ON_ONCE(backend->mcu_state != KBASE_MCU_OFF); backend->l2_force_off_after_mcu_halt = false; } @@ -1878,12 +1897,7 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores * are vulnerable to corruption if gpu is lost */ - if (kbase_is_gpu_removed(kbdev) -#ifdef CONFIG_MALI_ARBITER_SUPPORT - || kbase_pm_is_gpu_lost(kbdev)) { -#else - ) { -#endif + if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; dev_dbg(kbdev->dev, "GPU lost has occurred - shaders off\n"); break; @@ -1988,9 +2002,8 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) kbdev, KBASE_PM_POLICY_EVENT_IDLE); if (kbdev->pm.backend.protected_transition_override || -#ifdef CONFIG_MALI_ARBITER_SUPPORT - kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev) || -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + (kbase_has_arbiter(kbdev) && (kbase_pm_is_suspending(kbdev) || + kbase_pm_is_gpu_lost(kbdev))) || !stt->configured_ticks || WARN_ON(stt->cancel_queued)) { backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; @@ -2057,10 +2070,9 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) kbdev, KBASE_PM_POLICY_EVENT_TIMER_MISS); backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; -#ifdef CONFIG_MALI_ARBITER_SUPPORT - } else if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { + } else if (kbase_has_arbiter(kbdev) && + (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev))) { backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ } break; @@ -2079,7 +2091,7 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) if (!backend->partial_shaderoff) shader_poweroff_timer_queue_cancel(kbdev); - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) { + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_921)) { kbase_gpu_start_cache_clean_nolock(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); backend->shaders_state = KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON; @@ -2429,6 +2441,9 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev) backend->in_reset = false; #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) backend->gpu_wakeup_override = false; + backend->db_mirror_interrupt_enabled = false; + backend->gpu_sleep_mode_active = false; + backend->exit_gpu_sleep_mode = false; #endif kbase_pm_update_state(kbdev); @@ -2653,12 +2668,9 @@ static int pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev, bool k const long timeout = kbase_csf_timeout_in_jiffies( kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT) + extra_wait_time_ms); #else -#ifdef CONFIG_MALI_ARBITER_SUPPORT /* Handling of timeout error isn't supported for arbiter builds */ - const long timeout = MAX_SCHEDULE_TIMEOUT; -#else - const long timeout = (long)msecs_to_jiffies(PM_TIMEOUT_MS); -#endif + const long timeout = kbase_has_arbiter(kbdev) ? MAX_SCHEDULE_TIMEOUT : + (long)msecs_to_jiffies(PM_TIMEOUT_MS); #endif int err = 0; @@ -2779,7 +2791,8 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev) * when the context (user process) needs to access to the page. */ unmap_mapping_range(kbdev->csf.user_reg.filp->f_inode->i_mapping, - kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE, 1); + (loff_t)kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE, + 1); list_del_init(&kctx->csf.user_reg.link); dev_dbg(kbdev->dev, "Updated USER Reg page mapping of ctx %d_%d", kctx->tgid, kctx->id); @@ -2797,7 +2810,7 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev) void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; - bool reset_required = is_resume; + int ret = is_resume; unsigned long flags; KBASE_DEBUG_ASSERT(kbdev != NULL); @@ -2806,12 +2819,10 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) #endif /* !MALI_USE_CSF */ lockdep_assert_held(&kbdev->pm.lock); -#ifdef CONFIG_MALI_ARBITER_SUPPORT if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) { dev_err(kbdev->dev, "%s: Cannot power up while GPU lost", __func__); return; } -#endif if (backend->gpu_powered) { #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) @@ -2836,7 +2847,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) backend->callback_power_resume(kbdev); return; } else if (backend->callback_power_on) { - reset_required = backend->callback_power_on(kbdev); + ret = backend->callback_power_on(kbdev); } spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -2849,15 +2860,18 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) #endif - if (reset_required) { + if (ret == GPU_STATE_IN_RESET) { + /* GPU is already in reset state after power on and no + * soft-reset needed. Just reconfiguration is needed. + */ + kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS | PM_NO_RESET); + } else if (ret == GPU_STATE_LOST) { /* GPU state was lost, reset GPU to ensure it is in a * consistent state */ kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); - } -#ifdef CONFIG_MALI_ARBITER_SUPPORT - else { - if (kbdev->arb.arb_if) { + } else { + if (kbase_has_arbiter(kbdev)) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; /* In the case that the GPU has just been granted by @@ -2873,8 +2887,8 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) * that a repartitioning occurred. In this case the current config * should be read again. */ - kbase_gpuprops_get_curr_config_props(kbdev, &kbdev->gpu_props.curr_config); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + if (kbase_has_arbiter(kbdev)) + kbase_gpuprops_get_curr_config_props(kbdev, &kbdev->gpu_props.curr_config); mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -2898,7 +2912,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) backend->l2_desired = true; #if MALI_USE_CSF { - if (reset_required) { + if (ret != GPU_STATE_INTACT) { /* GPU reset was done after the power on, so send the post * reset event instead. This is okay as GPU power off event * is same as pre GPU reset event. @@ -2966,12 +2980,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev) } #endif - if (kbase_is_gpu_removed(kbdev) -#ifdef CONFIG_MALI_ARBITER_SUPPORT - || kbase_pm_is_gpu_lost(kbdev)) { -#else - ) { -#endif + if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { /* Ensure we unblock any threads that are stuck waiting * for the GPU */ @@ -2989,10 +2998,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev) /* GPU is about to be turned off, switch to dummy page */ update_user_reg_page_mapping(kbdev); #endif - -#ifdef CONFIG_MALI_ARBITER_SUPPORT kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_IDLE_EVENT); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ if (kbdev->pm.backend.callback_power_off) kbdev->pm.backend.callback_power_off(kbdev); @@ -3046,6 +3052,7 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) return HRTIMER_NORESTART; } + static int kbase_set_gpu_quirks(struct kbase_device *kbdev) { #if MALI_USE_CSF @@ -3075,7 +3082,7 @@ static int kbase_set_gpu_quirks(struct kbase_device *kbdev) kbdev->hw_quirks_gpu = hw_quirks_gpu; #endif /* !MALI_USE_CSF */ - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_IDVS_GROUP_SIZE)) { u32 default_idvs_group_size = 0xF; u32 group_size = 0; @@ -3109,10 +3116,10 @@ static int kbase_set_sc_quirks(struct kbase_device *kbdev) if (kbase_is_gpu_removed(kbdev)) return -EIO; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_2968_TTRX_3162)) + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162)) hw_quirks_sc |= SC_VAR_ALGORITHM; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING)) + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_TLS_HASHING)) hw_quirks_sc |= SC_TLS_HASH_ENABLE; kbdev->hw_quirks_sc = hw_quirks_sc; @@ -3131,7 +3138,7 @@ static int kbase_set_tiler_quirks(struct kbase_device *kbdev) return -EIO; /* Set tiler clock gate override if required */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_T76X_3953)) hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE; kbdev->hw_quirks_tiler = hw_quirks_tiler; @@ -3139,6 +3146,7 @@ static int kbase_set_tiler_quirks(struct kbase_device *kbdev) return 0; } + static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) { struct device_node *np = kbdev->dev->of_node; @@ -3191,6 +3199,7 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) error = kbase_set_mmu_quirks(kbdev); } + return error; } @@ -3210,6 +3219,7 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) #else kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(JM_CONFIG), kbdev->hw_quirks_gpu); #endif + } void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) @@ -3257,16 +3267,10 @@ static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) } #endif -static int kbase_pm_do_reset(struct kbase_device *kbdev) +static int kbase_pm_do_reset_soft(struct kbase_device *kbdev) { - struct kbasep_reset_timeout_data rtdata; - u32 reg_offset, reg_val; int ret; - KBASE_KTRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, 0); - - KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev); - if (kbdev->pm.backend.callback_soft_reset) { ret = kbdev->pm.backend.callback_soft_reset(kbdev); if (ret < 0) @@ -3279,12 +3283,30 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) GPU_COMMAND_SOFT_RESET); } } + return 0; +} - reg_offset = GPU_CONTROL_ENUM(GPU_IRQ_MASK); - reg_val = RESET_COMPLETED; +static int kbase_pm_do_reset(struct kbase_device *kbdev) +{ + struct kbasep_reset_timeout_data rtdata; + u32 reg_offset, reg_val; + int ret; - /* Unmask the reset complete interrupt only */ - kbase_reg_write32(kbdev, reg_offset, reg_val); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, 0); + + KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev); + + { + ret = kbase_pm_do_reset_soft(kbdev); + if (ret) + return ret; + + reg_offset = GPU_CONTROL_ENUM(GPU_IRQ_MASK); + reg_val = RESET_COMPLETED; + + /* Unmask the reset complete interrupt only */ + kbase_reg_write32(kbdev, reg_offset, reg_val); + } /* Initialize a structure for tracking the status of the reset */ rtdata.kbdev = kbdev; @@ -3333,9 +3355,8 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) /* The GPU doesn't seem to be responding to the reset so try a hard * reset, but only when NOT in arbitration mode. */ -#ifdef CONFIG_MALI_ARBITER_SUPPORT - if (!kbdev->arb.arb_if) { -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + if (!kbase_has_arbiter(kbdev)) { dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", RESET_TIMEOUT); @@ -3365,9 +3386,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", RESET_TIMEOUT); -#ifdef CONFIG_MALI_ARBITER_SUPPORT } -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ return -EINVAL; } @@ -3418,9 +3437,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); /* Soft reset the GPU */ -#ifdef CONFIG_MALI_ARBITER_SUPPORT if (!(flags & PM_NO_RESET)) -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ err = kbdev->protected_ops->protected_mode_disable(kbdev->protected_dev); spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); @@ -3441,7 +3458,6 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) if (err) goto exit; - if (flags & PM_HW_ISSUES_DETECT) { err = kbase_pm_hw_issues_detect(kbdev); if (err) @@ -3451,6 +3467,10 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_hw_issues_apply(kbdev); kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); kbase_amba_set_shareable_cache_support(kbdev); +#if MALI_USE_CSF + kbase_backend_update_gpu_timestamp_offset(kbdev); + kbdev->csf.compute_progress_timeout_cc = 0; +#endif /* Sanity check protected mode was left after reset */ WARN_ON(kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) & diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h index 033c80a7c6b4..a7fa191b89d1 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -821,6 +821,21 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev); */ bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, enum kbase_mcu_state state); +#ifdef KBASE_PM_RUNTIME + +/** + * kbase_pm_enable_mcu_db_notification - Enable the Doorbell notification on + * MCU side + * + * @kbdev: Pointer to the device. + * + * This function is called to re-enable the Doorbell notification on MCU side + * when MCU needs to beome active again. + */ +void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev); + +#endif /* KBASE_PM_RUNTIME */ + /** * kbase_pm_idle_groups_sched_suspendable - Check whether the scheduler can be * suspended to low power state when all @@ -963,11 +978,29 @@ static inline bool kbase_pm_gpu_sleep_allowed(struct kbase_device *kbdev) * A high positive value of autosuspend_delay can be used to keep the * GPU in sleep state for a long time. */ - if (unlikely(!kbdev->dev->power.autosuspend_delay || - (kbdev->dev->power.autosuspend_delay < 0))) + if (unlikely(kbdev->dev->power.autosuspend_delay <= 0)) return false; - return kbdev->pm.backend.gpu_sleep_supported; + return test_bit(KBASE_GPU_SUPPORTS_GPU_SLEEP, &kbdev->pm.backend.gpu_sleep_allowed); +} + +/** + * kbase_pm_fw_sleep_on_idle_allowed - Check if FW sleep-on-idle could be enabled + * + * @kbdev: Device pointer + * + * This function should be called whenever the conditions that impact + * FW sleep-on-idle support change so that it could be enabled/disabled + * accordingly. + * + * Return: true if FW sleep-on-idle is allowed + */ +static inline bool kbase_pm_fw_sleep_on_idle_allowed(struct kbase_device *kbdev) +{ + if (unlikely(kbdev->dev->power.autosuspend_delay <= 0)) + return false; + + return kbdev->pm.backend.gpu_sleep_allowed == KBASE_GPU_FW_SLEEP_ON_IDLE_ALLOWED; } /** diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c index 23e447b15767..457e91a0a978 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -77,7 +77,16 @@ void kbase_pm_policy_init(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.pm_current_policy = default_policy; kbdev->pm.backend.csf_pm_sched_flags = default_policy->pm_sched_flags; + +#ifdef KBASE_PM_RUNTIME + if (kbase_pm_idle_groups_sched_suspendable(kbdev)) + clear_bit(KBASE_GPU_IGNORE_IDLE_EVENT, &kbdev->pm.backend.gpu_sleep_allowed); + else + set_bit(KBASE_GPU_IGNORE_IDLE_EVENT, &kbdev->pm.backend.gpu_sleep_allowed); +#endif /* KBASE_PM_RUNTIME */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + #else CSTD_UNUSED(flags); kbdev->pm.backend.pm_current_policy = default_policy; @@ -127,7 +136,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev) pm->backend.poweroff_wait_in_progress = false; pm->backend.l2_desired = true; #if MALI_USE_CSF - pm->backend.mcu_desired = true; + pm->backend.mcu_desired = pm->backend.mcu_poweron_required; #endif spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -400,6 +409,13 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, const struct kbase_pm_polic /* New policy in place, release the clamping on mcu/L2 off state */ kbdev->pm.backend.policy_change_clamp_state_to_off = false; kbase_pm_update_state(kbdev); + +#ifdef KBASE_PM_RUNTIME + if (kbase_pm_idle_groups_sched_suspendable(kbdev)) + clear_bit(KBASE_GPU_IGNORE_IDLE_EVENT, &kbdev->pm.backend.gpu_sleep_allowed); + else + set_bit(KBASE_GPU_IGNORE_IDLE_EVENT, &kbdev->pm.backend.gpu_sleep_allowed); +#endif /* KBASE_PM_RUNTIME */ #endif spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c index 0bf0f5a062d3..d3715d97d23c 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,10 +30,7 @@ #include #include #include - -#if !IS_ENABLED(CONFIG_MALI_REAL_HW) -#include -#endif +#include struct kbase_timeout_info { char *selector_str; @@ -41,12 +38,15 @@ struct kbase_timeout_info { }; #if MALI_USE_CSF + +#define GPU_TIMESTAMP_OFFSET_INVALID S64_MAX + static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { [CSF_FIRMWARE_TIMEOUT] = { "CSF_FIRMWARE_TIMEOUT", MIN(CSF_FIRMWARE_TIMEOUT_CYCLES, CSF_FIRMWARE_PING_TIMEOUT_CYCLES) }, [CSF_PM_TIMEOUT] = { "CSF_PM_TIMEOUT", CSF_PM_TIMEOUT_CYCLES }, [CSF_GPU_RESET_TIMEOUT] = { "CSF_GPU_RESET_TIMEOUT", CSF_GPU_RESET_TIMEOUT_CYCLES }, - [CSF_CSG_SUSPEND_TIMEOUT] = { "CSF_CSG_SUSPEND_TIMEOUT", CSF_CSG_SUSPEND_TIMEOUT_CYCLES }, + [CSF_CSG_TERM_TIMEOUT] = { "CSF_CSG_TERM_TIMEOUT", CSF_CSG_TERM_TIMEOUT_CYCLES }, [CSF_FIRMWARE_BOOT_TIMEOUT] = { "CSF_FIRMWARE_BOOT_TIMEOUT", CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES }, [CSF_FIRMWARE_PING_TIMEOUT] = { "CSF_FIRMWARE_PING_TIMEOUT", @@ -82,6 +82,68 @@ static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { }; #endif +#if MALI_USE_CSF +void kbase_backend_invalidate_gpu_timestamp_offset(struct kbase_device *kbdev) +{ + kbdev->backend_time.gpu_timestamp_offset = GPU_TIMESTAMP_OFFSET_INVALID; +} +KBASE_EXPORT_TEST_API(kbase_backend_invalidate_gpu_timestamp_offset); + +/** + * kbase_backend_compute_gpu_ts_offset() - Compute GPU TS offset. + * + * @kbdev: Kbase device. + * + * This function compute the value of GPU and CPU TS offset: + * - set to zero current TIMESTAMP_OFFSET register + * - read CPU TS and convert it to ticks + * - read GPU TS + * - calculate diff between CPU and GPU ticks + * - cache the diff as the GPU TS offset + * + * To reduce delays, preemption must be disabled during reads of both CPU and GPU TS + * this function require access to GPU register to be enabled + */ +static inline void kbase_backend_compute_gpu_ts_offset(struct kbase_device *kbdev) +{ + s64 cpu_ts_ticks = 0; + s64 gpu_ts_ticks = 0; + + if (kbdev->backend_time.gpu_timestamp_offset != GPU_TIMESTAMP_OFFSET_INVALID) + return; + + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(TIMESTAMP_OFFSET), 0); + + gpu_ts_ticks = kbase_reg_read64_coherent(kbdev, GPU_CONTROL_ENUM(TIMESTAMP)); + cpu_ts_ticks = ktime_get_raw_ns(); + cpu_ts_ticks = div64_u64(cpu_ts_ticks * kbdev->backend_time.divisor, + kbdev->backend_time.multiplier); + kbdev->backend_time.gpu_timestamp_offset = cpu_ts_ticks - gpu_ts_ticks; +} + +void kbase_backend_update_gpu_timestamp_offset(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.lock); + + kbase_backend_compute_gpu_ts_offset(kbdev); + + dev_dbg(kbdev->dev, "Setting GPU timestamp offset register to %lld (%lld ns)", + kbdev->backend_time.gpu_timestamp_offset, + div64_s64(kbdev->backend_time.gpu_timestamp_offset * + (s64)kbdev->backend_time.multiplier, + (s64)kbdev->backend_time.divisor)); + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(TIMESTAMP_OFFSET), + kbdev->backend_time.gpu_timestamp_offset); +} +#if MALI_UNIT_TEST +u64 kbase_backend_read_gpu_timestamp_offset_reg(struct kbase_device *kbdev) +{ + return kbase_reg_read64_coherent(kbdev, GPU_CONTROL_ENUM(TIMESTAMP_OFFSET)); +} +KBASE_EXPORT_TEST_API(kbase_backend_read_gpu_timestamp_offset_reg); +#endif +#endif + void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle_counter, u64 *system_time, struct timespec64 *ts) { @@ -100,6 +162,7 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle ktime_get_raw_ts64(ts); #endif } +KBASE_EXPORT_TEST_API(kbase_backend_get_gpu_time_norequest); #if !MALI_USE_CSF /** @@ -143,6 +206,7 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, kbase_pm_release_gpu_cycle_counter(kbdev); #endif } +KBASE_EXPORT_TEST_API(kbase_backend_get_gpu_time); static u64 kbase_device_get_scaling_frequency(struct kbase_device *kbdev) { @@ -171,6 +235,15 @@ void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_ } selector_str = timeout_info[selector].selector_str; +#if MALI_USE_CSF + if (IS_ENABLED(CONFIG_MALI_REAL_HW) && !IS_ENABLED(CONFIG_MALI_IS_FPGA) && + unlikely(timeout_ms >= MAX_TIMEOUT_MS)) { + dev_warn(kbdev->dev, "%s is capped from %dms to %dms\n", + timeout_info[selector].selector_str, timeout_ms, MAX_TIMEOUT_MS); + timeout_ms = MAX_TIMEOUT_MS; + } +#endif + kbdev->backend_time.device_scaled_timeouts[selector] = timeout_ms; dev_dbg(kbdev->dev, "\t%-35s: %ums\n", selector_str, timeout_ms); } @@ -282,36 +355,14 @@ u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kb if (WARN_ON(!kbdev)) return 0; - return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor) + - kbdev->backend_time.offset; -} - -/** - * get_cpu_gpu_time() - Get current CPU and GPU timestamps. - * - * @kbdev: Kbase device. - * @cpu_ts: Output CPU timestamp. - * @gpu_ts: Output GPU timestamp. - * @gpu_cycle: Output GPU cycle counts. - */ -static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_ts, u64 *gpu_cycle) -{ - struct timespec64 ts; - - kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts); - - if (cpu_ts) - *cpu_ts = (u64)(ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec); + return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor); } +KBASE_EXPORT_TEST_API(kbase_backend_time_convert_gpu_to_cpu); #endif u64 kbase_arch_timer_get_cntfrq(struct kbase_device *kbdev) { - u64 freq = arch_timer_get_cntfrq(); - -#if !IS_ENABLED(CONFIG_MALI_REAL_HW) - freq = midgard_model_arch_timer_get_cntfrq(kbdev->model); -#endif + u64 freq = mali_arch_timer_get_cntfrq(); dev_dbg(kbdev->dev, "System Timer Freq = %lluHz", freq); @@ -322,13 +373,10 @@ int kbase_backend_time_init(struct kbase_device *kbdev) { int err = 0; #if MALI_USE_CSF - u64 cpu_ts = 0; - u64 gpu_ts = 0; u64 freq; u64 common_factor; kbase_pm_register_access_enable(kbdev); - get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); freq = kbase_arch_timer_get_cntfrq(kbdev); if (!freq) { @@ -348,9 +396,8 @@ int kbase_backend_time_init(struct kbase_device *kbdev) goto disable_registers; } - kbdev->backend_time.offset = - (s64)(cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier, - kbdev->backend_time.divisor)); + kbase_backend_invalidate_gpu_timestamp_offset( + kbdev); /* force computation of GPU Timestamp offset */ #endif if (kbase_timeout_scaling_init(kbdev)) { diff --git a/drivers/gpu/arm/bifrost/build.bp b/drivers/gpu/arm/bifrost/build.bp index 9ee968af8de5..2ae771f5b546 100644 --- a/drivers/gpu/arm/bifrost/build.bp +++ b/drivers/gpu/arm/bifrost/build.bp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,18 +71,6 @@ bob_defaults { mali_real_hw: { kbuild_options: ["CONFIG_MALI_REAL_HW=y"], }, - mali_error_inject_none: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT_NONE=y"], - }, - mali_error_inject_track_list: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT_TRACK_LIST=y"], - }, - mali_error_inject_random: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], - }, - mali_error_inject: { - kbuild_options: ["CONFIG_MALI_BIFROST_ERROR_INJECT=y"], - }, mali_debug: { kbuild_options: [ "CONFIG_MALI_BIFROST_DEBUG=y", @@ -125,7 +113,7 @@ bob_defaults { mali_hw_errata_1485982_use_clock_alternative: { kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"], }, - platform_is_fpga: { + mali_is_fpga: { kbuild_options: ["CONFIG_MALI_IS_FPGA=y"], }, mali_coresight: { @@ -160,7 +148,6 @@ bob_defaults { // is an umbrella feature that would be open for inappropriate use // (catch-all for experimental CS code without separating it into // different features). - "MALI_INCREMENTAL_RENDERING_JM={{.incremental_rendering_jm}}", "MALI_BASE_CSF_PERFORMANCE_TESTS={{.base_csf_performance_tests}}", ], } @@ -174,6 +161,9 @@ bob_kernel_module { "*.c", "*.h", "Kbuild", + "arbiter/*.c", + "arbiter/*.h", + "arbiter/Kbuild", "backend/gpu/*.c", "backend/gpu/*.h", "backend/gpu/Kbuild", @@ -239,6 +229,7 @@ bob_kernel_module { "jm/*.h", "tl/backend/*_jm.c", "mmu/backend/*_jm.c", + "mmu/backend/*_jm.h", "ipa/backend/*_jm.c", "ipa/backend/*_jm.h", ], @@ -263,17 +254,11 @@ bob_kernel_module { "hwcnt/backend/*_csf_*.h", "tl/backend/*_csf.c", "mmu/backend/*_csf.c", + "mmu/backend/*_csf.h", "ipa/backend/*_csf.c", "ipa/backend/*_csf.h", ], }, - mali_arbiter_support: { - srcs: [ - "arbiter/*.c", - "arbiter/*.h", - "arbiter/Kbuild", - ], - }, kbuild_options: [ "CONFIG_MALI_BIFROST=m", "CONFIG_MALI_KUTF=n", diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c index 8b1410886b05..fe1dbfaca872 100644 --- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c +++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -116,8 +116,7 @@ static void kbase_context_term_partial(struct kbase_context *kctx, unsigned int struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat, base_context_create_flags const flags, - unsigned long const api_version, - struct kbase_file *const kfile) + unsigned long const api_version, struct file *const filp) { struct kbase_context *kctx; unsigned int i = 0; @@ -136,7 +135,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_c kctx->kbdev = kbdev; kctx->api_version = api_version; - kctx->kfile = kfile; + kctx->filp = filp; kctx->create_flags = flags; memcpy(kctx->comm, current->comm, sizeof(current->comm)); @@ -187,11 +186,15 @@ void kbase_destroy_context(struct kbase_context *kctx) * Customer side that a hang could occur if context termination is * not blocked until the resume of GPU device. */ + if (kbase_has_arbiter(kbdev)) + atomic_inc(&kbdev->pm.gpu_users_waiting); while (kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { - dev_info(kbdev->dev, "Suspend in progress when destroying context"); + dev_dbg(kbdev->dev, "Suspend in progress when destroying context"); wait_event(kbdev->pm.resume_wait, !kbase_pm_is_suspending(kbdev)); } + if (kbase_has_arbiter(kbdev)) + atomic_dec(&kbdev->pm.gpu_users_waiting); /* Have synchronized against the System suspend and incremented the * pm.active_count. So any subsequent invocation of System suspend diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c index f2eefe9ddcd0..ef474f625f63 100644 --- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c +++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -168,8 +168,7 @@ static void kbase_context_term_partial(struct kbase_context *kctx, unsigned int struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat, base_context_create_flags const flags, - unsigned long const api_version, - struct kbase_file *const kfile) + unsigned long const api_version, struct file *const filp) { struct kbase_context *kctx; unsigned int i = 0; @@ -188,7 +187,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_c kctx->kbdev = kbdev; kctx->api_version = api_version; - kctx->kfile = kfile; + kctx->filp = filp; kctx->create_flags = flags; if (is_compat) @@ -232,14 +231,13 @@ void kbase_destroy_context(struct kbase_context *kctx) if (WARN_ON(!kbdev)) return; - /* Context termination could happen whilst the system suspend of + /* Context termination could happen whilst the system suspend of * the GPU device is ongoing or has completed. It has been seen on * Customer side that a hang could occur if context termination is * not blocked until the resume of GPU device. */ -#ifdef CONFIG_MALI_ARBITER_SUPPORT - atomic_inc(&kbdev->pm.gpu_users_waiting); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + if (kbase_has_arbiter(kbdev)) + atomic_inc(&kbdev->pm.gpu_users_waiting); while (kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { dev_dbg(kbdev->dev, "Suspend in progress when destroying context"); @@ -256,9 +254,8 @@ void kbase_destroy_context(struct kbase_context *kctx) */ wait_event(kbdev->pm.resume_wait, !kbase_pm_is_resuming(kbdev)); -#ifdef CONFIG_MALI_ARBITER_SUPPORT - atomic_dec(&kbdev->pm.gpu_users_waiting); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + if (kbase_has_arbiter(kbdev)) + atomic_dec(&kbdev->pm.gpu_users_waiting); kbase_mem_pool_group_mark_dying(&kctx->mem_pools); diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c index 36cfde3cdab1..2c7417bd6506 100644 --- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -141,7 +141,7 @@ int kbase_context_common_init(struct kbase_context *kctx) kctx->pid = task_pid_vnr(current); /* Check if this is a Userspace created context */ - if (likely(kctx->kfile)) { + if (likely(kctx->filp)) { struct pid *pid_struct; rcu_read_lock(); @@ -184,6 +184,8 @@ int kbase_context_common_init(struct kbase_context *kctx) spin_lock_init(&kctx->waiting_soft_jobs_lock); INIT_LIST_HEAD(&kctx->waiting_soft_jobs); + init_waitqueue_head(&kctx->event_queue); + kbase_gpu_vm_lock(kctx); bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); kbase_gpu_vm_unlock(kctx); @@ -195,7 +197,7 @@ int kbase_context_common_init(struct kbase_context *kctx) mutex_unlock(&kctx->kbdev->kctx_list_lock); if (err) { dev_err(kctx->kbdev->dev, "(err:%d) failed to insert kctx to kbase_process", err); - if (likely(kctx->kfile)) { + if (likely(kctx->filp)) { mmdrop(kctx->process_mm); put_task_struct(kctx->task); } @@ -284,7 +286,7 @@ void kbase_context_common_term(struct kbase_context *kctx) kbase_remove_kctx_from_process(kctx); mutex_unlock(&kctx->kbdev->kctx_list_lock); - if (likely(kctx->kfile)) { + if (likely(kctx->filp)) { mmdrop(kctx->process_mm); put_task_struct(kctx->task); } diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h index e2295d020292..07c235fab11e 100644 --- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -56,9 +56,9 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx); * BASEP_CONTEXT_CREATE_KERNEL_FLAGS. * @api_version: Application program interface version, as encoded in * a single integer by the KBASE_API_VERSION macro. - * @kfile: Pointer to the object representing the /dev/malixx device - * file instance. Shall be passed as NULL for internally created - * contexts. + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. + * Shall be passed as NULL for internally created contexts. * * Up to one context can be created for each client that opens the device file * /dev/malixx. Context creation is deferred until a special ioctl() system call @@ -68,8 +68,7 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx); */ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat, base_context_create_flags const flags, - unsigned long api_version, - struct kbase_file *const kfile); + unsigned long api_version, struct file *filp); /** * kbase_destroy_context - Destroy a kernel base context. diff --git a/drivers/gpu/arm/bifrost/csf/Kbuild b/drivers/gpu/arm/bifrost/csf/Kbuild index 5df35864efc7..8159bc9d10e8 100644 --- a/drivers/gpu/arm/bifrost/csf/Kbuild +++ b/drivers/gpu/arm/bifrost/csf/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -48,8 +48,10 @@ bifrost_kbase-y += \ ifeq ($(CONFIG_MALI_BIFROST_NO_MALI),y) bifrost_kbase-y += csf/mali_kbase_csf_firmware_no_mali.o +bifrost_kbase-y += csf/mali_kbase_csf_fw_io_no_mali.o else bifrost_kbase-y += csf/mali_kbase_csf_firmware.o +bifrost_kbase-y += csf/mali_kbase_csf_fw_io.o endif bifrost_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c index 61a4be9ccc94..ec47b88fac53 100644 --- a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c +++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -943,6 +943,8 @@ void kbase_ipa_control_protm_entered(struct kbase_device *kbdev) struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; lockdep_assert_held(&kbdev->hwaccess_lock); + + ipa_ctrl->protm_start = ktime_get_raw_ns(); } @@ -955,6 +957,7 @@ void kbase_ipa_control_protm_exited(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); + for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i]; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c index 9dffe34f095b..d3300ea8dcde 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,6 +45,9 @@ #define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */ #define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096) +/* 0.2 second assuming 600 MHz GPU clock, which is double of iterator disabling timeout */ +#define MAX_PROGRESS_TIMEOUT_EVENT_DELAY ((u32)120000000) + #define PROTM_ALLOC_MAX_RETRIES ((u8)5) const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = { @@ -539,6 +542,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED; + queue->clear_faults = true; + INIT_LIST_HEAD(&queue->link); atomic_set(&queue->pending_kick, 0); INIT_LIST_HEAD(&queue->pending_kick_link); @@ -589,11 +594,19 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx, u32 const glb_version = iface->version; u32 instr = iface->instr_features; u8 max_size = GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(instr); - u32 min_buf_size = - (1u << reg->ex_event_size) * GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr); + const u8 event_size = reg->ex_event_size; + u64 min_buf_size; /* If cs_trace_command not supported, the call fails */ if (glb_version < kbase_csf_interface_version(1, 1, 0)) + return -EPERM; + + /* Sanity check to avoid shift-out-of-bounds */ + if (event_size >= 32) + return -EINVAL; + + min_buf_size = ((u64)1 << event_size) * GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr); + if (min_buf_size > UINT32_MAX) return -EINVAL; /* Validate the ring buffer configuration parameters */ @@ -605,8 +618,8 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx, /* Validate the cs_trace configuration parameters */ if (reg->ex_buffer_size && - ((reg->ex_event_size > max_size) || (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) || - (reg->ex_buffer_size < min_buf_size))) + ((event_size > max_size) || (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) || + (reg->ex_buffer_size < (u32)min_buf_size))) return -EINVAL; return csf_queue_register_internal(kctx, NULL, reg); @@ -734,7 +747,7 @@ out: } /** - * get_bound_queue_group - Get the group to which a queue was bound + * get_bound_queue_group() - Get the group to which a queue was bound * * @queue: Pointer to the queue for this group * @@ -847,6 +860,47 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, int csi_index kbase_csf_ring_csg_doorbell(kbdev, csg_nr); } +int kbase_csf_queue_group_clear_faults(struct kbase_context *kctx, + struct kbase_ioctl_queue_group_clear_faults *faults) +{ + void __user *user_bufs = u64_to_user_ptr(faults->addr); + u32 i; + struct kbase_device *kbdev = kctx->kbdev; + const u32 nr_queues = faults->nr_queues; + + if (unlikely(nr_queues > kbdev->csf.global_iface.groups[0].stream_num)) { + dev_warn(kbdev->dev, "Invalid nr_queues %u", nr_queues); + return -EINVAL; + } + + for (i = 0; i < nr_queues; ++i) { + u64 buf_gpu_addr; + struct kbase_va_region *region; + + if (copy_from_user(&buf_gpu_addr, user_bufs, sizeof(buf_gpu_addr))) + return -EFAULT; + mutex_lock(&kctx->csf.lock); + kbase_gpu_vm_lock(kctx); + region = kbase_region_tracker_find_region_enclosing_address(kctx, buf_gpu_addr); + if (likely(!kbase_is_region_invalid_or_free(region))) { + struct kbase_queue *queue = region->user_data; + + queue->clear_faults = true; + } else { + dev_warn(kbdev->dev, "GPU queue %u without a valid command buffer region", + i); + kbase_gpu_vm_unlock(kctx); + mutex_unlock(&kctx->csf.lock); + return -EFAULT; + } + kbase_gpu_vm_unlock(kctx); + mutex_unlock(&kctx->csf.lock); + user_bufs = (void __user *)((uintptr_t)user_bufs + sizeof(buf_gpu_addr)); + } + + return 0; +} + int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick) { struct kbase_device *kbdev = kctx->kbdev; @@ -868,7 +922,7 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue struct kbase_queue *queue = region->user_data; if (queue && (queue->bind_state == KBASE_CSF_QUEUE_BOUND)) { - spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock); if (list_empty(&queue->pending_kick_link)) { /* Queue termination shall block until this * kick has been handled. @@ -876,10 +930,12 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue atomic_inc(&queue->pending_kick); list_add_tail( &queue->pending_kick_link, - &kbdev->csf.pending_gpuq_kicks[queue->group_priority]); - complete(&kbdev->csf.scheduler.kthread_signal); + &kbdev->csf.pending_gpuq_kick_queues[queue->group_priority]); + if (atomic_cmpxchg(&kbdev->csf.pending_gpuq_kicks, false, true) == + false) + complete(&kbdev->csf.scheduler.kthread_signal); } - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); } } else { dev_dbg(kbdev->dev, @@ -1095,12 +1151,11 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx, } static void timer_event_worker(struct work_struct *data); -static void protm_event_worker(struct work_struct *data); static void term_normal_suspend_buffer(struct kbase_context *const kctx, struct kbase_normal_suspend_buffer *s_buf); /** - * create_suspend_buffers - Setup normal and protected mode + * create_suspend_buffers() - Setup normal and protected mode * suspend buffers. * * @kctx: Address of the kbase context within which the queue group @@ -1199,6 +1254,8 @@ static int create_queue_group(struct kbase_context *const kctx, group->deschedule_deferred_cnt = 0; #endif + group->cs_fault_report_enable = create->in.cs_fault_report_enable; + group->group_uid = generate_group_uid(); create->out.group_uid = group->group_uid; @@ -1206,7 +1263,9 @@ static int create_queue_group(struct kbase_context *const kctx, INIT_LIST_HEAD(&group->link_to_schedule); INIT_LIST_HEAD(&group->error_fatal.link); INIT_WORK(&group->timer_event_work, timer_event_worker); - INIT_WORK(&group->protm_event_work, protm_event_worker); + INIT_LIST_HEAD(&group->protm_event_work); + group->progress_timer_state = 0; + atomic_set(&group->pending_protm_event_work, 0); bitmap_zero(group->protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); group->run_state = KBASE_CSF_GROUP_INACTIVE; @@ -1251,14 +1310,6 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, const u32 tiler_count = hweight64(create->in.tiler_mask); const u32 fragment_count = hweight64(create->in.fragment_mask); const u32 compute_count = hweight64(create->in.compute_mask); - size_t i; - - for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) { - if (create->in.padding[i] != 0) { - dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); - return -EINVAL; - } - } mutex_lock(&kctx->csf.lock); @@ -1379,7 +1430,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) } /** - * term_queue_group - Terminate a GPU command queue group. + * term_queue_group() - Terminate a GPU command queue group. * * @group: Pointer to GPU command queue group data. * @@ -1407,8 +1458,8 @@ static void term_queue_group(struct kbase_queue_group *group) } /** - * wait_group_deferred_deschedule_completion - Wait for refcount of the group to - * become 0 that was taken when the group deschedule had to be deferred. + * wait_group_deferred_deschedule_completion() - Wait for refcount of the group + * to become 0 that was taken when the group deschedule had to be deferred. * * @group: Pointer to GPU command queue group that is being deleted. * @@ -1437,7 +1488,10 @@ static void wait_group_deferred_deschedule_completion(struct kbase_queue_group * static void cancel_queue_group_events(struct kbase_queue_group *group) { cancel_work_sync(&group->timer_event_work); - cancel_work_sync(&group->protm_event_work); + + /* Drain a pending protected mode request if any */ + kbase_csf_scheduler_wait_for_kthread_pending_work(group->kctx->kbdev, + &group->pending_protm_event_work); } static void remove_pending_group_fatal_error(struct kbase_queue_group *group) @@ -1592,6 +1646,7 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->csf.queue_list); INIT_LIST_HEAD(&kctx->csf.link); + atomic_set(&kctx->csf.pending_sync_update, 0); kbase_csf_event_init(kctx); @@ -1827,7 +1882,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) } /** - * handle_oom_event - Handle the OoM event generated by the firmware for the + * handle_oom_event() - Handle the OoM event generated by the firmware for the * CSI. * * @group: Pointer to the CSG group the oom-event belongs to. @@ -1902,7 +1957,7 @@ static int handle_oom_event(struct kbase_queue_group *const group, } /** - * report_tiler_oom_error - Report a CSG error due to a tiler heap OOM event + * report_tiler_oom_error() - Report a CSG error due to a tiler heap OOM event * * @group: Pointer to the GPU command queue group that encountered the error */ @@ -1945,7 +2000,7 @@ static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev) } /** - * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue. + * kbase_queue_oom_event() - Handle tiler out-of-memory for a GPU command queue. * * @queue: Pointer to queue for which out-of-memory event was received. * @@ -2033,7 +2088,7 @@ unlock: } /** - * oom_event_worker - Tiler out-of-memory handler called from a workqueue. + * oom_event_worker() - Tiler out-of-memory handler called from a workqueue. * * @data: Pointer to a work_struct embedded in GPU command queue data. * @@ -2061,7 +2116,8 @@ static void oom_event_worker(struct work_struct *data) } /** - * report_group_timeout_error - Report the timeout error for the group to userspace. + * report_group_timeout_error() - Report the timeout error for the group to + * userspace. * * @group: Pointer to the group for which timeout error occurred */ @@ -2085,7 +2141,7 @@ static void report_group_timeout_error(struct kbase_queue_group *const group) } /** - * timer_event_worker - Handle the progress timeout error for the group + * timer_event_worker() - Handle the progress timeout error for the group * * @data: Pointer to a work_struct embedded in GPU command queue group data. * @@ -2120,19 +2176,74 @@ static void timer_event_worker(struct work_struct *data) } /** - * handle_progress_timer_event - Progress timer timeout event handler. + * handle_progress_timer_events() - Progress timer timeout events handler. * - * @group: Pointer to GPU queue group for which the timeout event is received. + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @slot_mask: Bitmap reflecting the slots on which progress timer timeouts happen. * * Notify a waiting user space client of the timeout. * Enqueue a work item to terminate the group and notify the event notification * thread of progress timeout fault for the GPU command queue group. + * Ignore fragment timeout if it is following a compute timeout. */ -static void handle_progress_timer_event(struct kbase_queue_group *const group) +static void handle_progress_timer_events(struct kbase_device *const kbdev, unsigned long *slot_mask) { - kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx, DF_PROGRESS_TIMER_TIMEOUT); + u32 max_csg_slots = kbdev->csf.global_iface.group_num; + u32 csg_nr; + struct kbase_queue_group *group = NULL; + struct kbase_csf_cmd_stream_group_info *ginfo; - queue_work(group->kctx->csf.wq, &group->timer_event_work); + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + if (likely(bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS))) + return; + + /* Log each timeout and Update timestamp of compute progress timeout */ + for_each_set_bit(csg_nr, slot_mask, max_csg_slots) { + group = kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + ginfo = &kbdev->csf.global_iface.groups[csg_nr]; + group->progress_timer_state = + kbase_csf_firmware_csg_output(ginfo, CSG_PROGRESS_TIMER_STATE); + + dev_info( + kbdev->dev, + "[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %u with state %x", + kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid, + group->kctx->id, csg_nr, group->progress_timer_state); + + if (CSG_PROGRESS_TIMER_STATE_GET(group->progress_timer_state) == + CSG_PROGRESS_TIMER_STATE_COMPUTE) + kbdev->csf.compute_progress_timeout_cc = kbase_backend_get_cycle_cnt(kbdev); + } + + /* Ignore fragment timeout if it is following a compute timeout. + * Otherwise, terminate the command stream group. + */ + for_each_set_bit(csg_nr, slot_mask, max_csg_slots) { + group = kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + + /* Check if it is a fragment timeout right after another compute timeout. + * In such case, kill compute CSG and give fragment CSG a second chance + */ + if (CSG_PROGRESS_TIMER_STATE_GET(group->progress_timer_state) == + CSG_PROGRESS_TIMER_STATE_FRAGMENT) { + u64 cycle_counter = kbase_backend_get_cycle_cnt(kbdev); + u64 compute_progress_timeout_cc = kbdev->csf.compute_progress_timeout_cc; + + if (compute_progress_timeout_cc <= cycle_counter && + cycle_counter <= compute_progress_timeout_cc + + MAX_PROGRESS_TIMEOUT_EVENT_DELAY) { + dev_info( + kbdev->dev, + "Ignored Fragment iterator timeout for group %d on slot %d", + group->handle, group->csg_nr); + continue; + } + } + + kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx, + DF_PROGRESS_TIMER_TIMEOUT); + queue_work(group->kctx->csf.wq, &group->timer_event_work); + } } /** @@ -2211,41 +2322,7 @@ static void report_group_fatal_error(struct kbase_queue_group *const group) } /** - * protm_event_worker - Protected mode switch request event handler - * called from a workqueue. - * - * @data: Pointer to a work_struct embedded in GPU command queue group data. - * - * Request to switch to protected mode. - */ -static void protm_event_worker(struct work_struct *data) -{ - struct kbase_queue_group *const group = - container_of(data, struct kbase_queue_group, protm_event_work); - struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; - int err = 0; - - KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u); - - err = alloc_grp_protected_suspend_buffer_pages(group); - if (!err) { - kbase_csf_scheduler_group_protm_enter(group); - } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) { - sbuf->alloc_retries++; - /* try again to allocate pages */ - queue_work(group->kctx->csf.wq, &group->protm_event_work); - } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) { - dev_err(group->kctx->kbdev->dev, - "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d", - group->handle, group->kctx->tgid, group->kctx->id); - report_group_fatal_error(group); - } - - KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u); -} - -/** - * handle_fault_event - Handler for CS fault. + * handle_fault_event() - Handler for CS fault. * * @queue: Pointer to queue for which fault event was received. * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for @@ -2267,14 +2344,14 @@ static void handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack const u8 cs_fault_exception_type = CS_FAULT_EXCEPTION_TYPE_GET(cs_fault); const u32 cs_fault_exception_data = CS_FAULT_EXCEPTION_DATA_GET(cs_fault); const u64 cs_fault_info_exception_data = CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info); - bool use_old_log_format = true; + bool has_trace_info = false; bool skip_fault_report = kbase_ctx_flag(queue->kctx, KCTX_PAGE_FAULT_REPORT_SKIP); kbase_csf_scheduler_spin_lock_assert_held(kbdev); - if (use_old_log_format && !skip_fault_report) + if (!has_trace_info && !skip_fault_report) dev_warn(kbdev->dev, "Ctx %d_%d Group %d CSG %d CSI: %d\n" "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n" @@ -2286,47 +2363,32 @@ static void handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack cs_fault_info_exception_data); -#if IS_ENABLED(CONFIG_DEBUG_FS) - /* CS_RESOURCE_TERMINATED type fault event can be ignored from the - * standpoint of dump on error. It is used to report fault for the CSIs - * that are associated with the same CSG as the CSI for which the actual - * fault was reported by the Iterator. - * Dumping would be triggered when the actual fault is reported. + /* If dump-on-fault daemon is waiting for a fault, wake up the daemon. + * Acknowledging the fault is deferred to the bottom-half until the wait + * of the dump completion is done. * - * CS_INHERIT_FAULT can also be ignored. It could happen due to the error - * in other types of queues (cpu/kcpu). If a fault had occurred in some - * other GPU queue then the dump would have been performed anyways when - * that fault was reported. + * Otherwise acknowledge the fault and ring the doorbell for the faulty queue + * to enter into recoverable state. */ - if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) && - (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) { - if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) { - queue->cs_error = cs_fault; - queue->cs_error_info = cs_fault_info; - queue->cs_error_fatal = false; - queue_work(queue->kctx->csf.wq, &queue->cs_error_work); - return; - } - } -#endif + if (likely(!kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) { + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK); + kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, + true); + queue->cs_error_acked = true; + } else + queue->cs_error_acked = false; - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK); - kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true); + queue->cs_error = cs_fault; + queue->cs_error_info = cs_fault_info; + queue->cs_error_fatal = false; + if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) + dev_warn(kbdev->dev, "%s: failed to enqueue a work", __func__); } -static void report_queue_fatal_error(struct kbase_queue *const queue, u32 cs_fatal, - u64 cs_fatal_info, struct kbase_queue_group *group) +static void report_queue_error(struct kbase_queue *const queue, u32 cs_error, u64 cs_error_info, + struct kbase_queue_group *group, bool fatal) { - struct base_csf_notification - error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, - .payload = { - .csg_error = { - .error = { .error_type = - BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, - .payload = { .fatal_queue = { - .sideband = cs_fatal_info, - .status = cs_fatal, - } } } } } }; + struct base_csf_notification error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR }; if (!queue) return; @@ -2335,17 +2397,30 @@ static void report_queue_fatal_error(struct kbase_queue *const queue, u32 cs_fat return; error.payload.csg_error.handle = group->handle; - error.payload.csg_error.error.payload.fatal_queue.csi_index = (__u8)queue->csi_index; + if (fatal) { + error.payload.csg_error.error.error_type = BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL; + error.payload.csg_error.error.payload.fatal_queue.sideband = cs_error_info; + error.payload.csg_error.error.payload.fatal_queue.status = cs_error; + error.payload.csg_error.error.payload.fatal_queue.csi_index = queue->csi_index; + } else { + error.payload.csg_error.error.error_type = BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT; + error.payload.csg_error.error.payload.fault_queue.sideband = cs_error_info; + error.payload.csg_error.error.payload.fault_queue.status = cs_error; + error.payload.csg_error.error.payload.fault_queue.csi_index = queue->csi_index; + } kbase_csf_event_add_error(queue->kctx, &group->error_fatal, &error); kbase_event_wakeup(queue->kctx); + + if (!fatal) + queue->clear_faults = false; } /** - * cs_error_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue + * cs_error_worker() - Handle the CS_FATAL/CS_FAULT error for the GPU queue * * @data: Pointer to a work_struct embedded in GPU command queue. * - * Terminate the CSG and report the error to userspace. + * Terminate the CSG for CS_FATAL and report the error to userspace. */ static void cs_error_worker(struct work_struct *const data) { @@ -2356,6 +2431,7 @@ static void cs_error_worker(struct work_struct *const data) struct kbase_queue_group *group; bool reset_prevented = false; int err; + const bool cs_fatal = queue->cs_error_fatal; kbase_debug_csf_fault_wait_completion(kbdev); err = kbase_reset_gpu_prevent_and_wait(kbdev); @@ -2371,45 +2447,57 @@ static void cs_error_worker(struct work_struct *const data) group = get_bound_queue_group(queue); if (!group) { - dev_warn(kbdev->dev, "queue not bound when handling fatal event"); + dev_warn(kbdev->dev, "queue not bound when handling an error event"); goto unlock; } -#if IS_ENABLED(CONFIG_DEBUG_FS) - if (!queue->cs_error_fatal) { - unsigned long flags; - int slot_num; + if (!cs_fatal) { + if (group->cs_fault_report_enable && queue->clear_faults) + report_queue_error(queue, queue->cs_error, queue->cs_error_info, group, + false); + if (unlikely(!queue->cs_error_acked)) { + unsigned long flags; + int slot_num; - kbase_csf_scheduler_spin_lock(kbdev, &flags); - slot_num = kbase_csf_scheduler_group_get_slot_locked(group); - if (slot_num >= 0) { - struct kbase_csf_cmd_stream_group_info const *ginfo = - &kbdev->csf.global_iface.groups[slot_num]; - struct kbase_csf_cmd_stream_info const *stream = - &ginfo->streams[queue->csi_index]; - u32 const cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + slot_num = kbase_csf_scheduler_group_get_slot_locked(group); + if (likely(slot_num >= 0)) { + struct kbase_csf_cmd_stream_group_info const *ginfo = + &kbdev->csf.global_iface.groups[slot_num]; + struct kbase_csf_cmd_stream_info const *stream = + &ginfo->streams[queue->csi_index]; + u32 const cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK); + u32 const cs_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ); - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK); - kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, slot_num, true); + /* Acknowledge the fault and ring the doorbell for the queue + * if it hasn't yet done. + */ + if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) { + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, + CS_REQ_FAULT_MASK); + kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, + slot_num, true); + } + } + kbase_csf_scheduler_spin_unlock(kbdev, flags); } - kbase_csf_scheduler_spin_unlock(kbdev, flags); - goto unlock; - } -#endif - - term_queue_group(group); - flush_gpu_cache_on_fatal_error(kbdev); - /* For an invalid GPU page fault, CS_BUS_FAULT fatal error is expected after the - * page fault handler disables the AS of faulty context. Need to skip reporting the - * CS_BUS_FAULT fatal error to the Userspace as it doesn't have the full fault info. - * Page fault handler will report the fatal error with full page fault info. - */ - if ((cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT) && group->faulted) { - dev_dbg(kbdev->dev, - "Skipped reporting CS_BUS_FAULT for queue %d of group %d of ctx %d_%d", - queue->csi_index, group->handle, kctx->tgid, kctx->id); } else { - report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, group); + term_queue_group(group); + flush_gpu_cache_on_fatal_error(kbdev); + /* For an invalid GPU page fault, CS_BUS_FAULT fatal error is expected after the + * page fault handler disables the AS of faulty context. Need to skip reporting the + * CS_BUS_FAULT fatal error to the Userspace as it doesn't have the full fault info. + * Page fault handler will report the fatal error with full page fault info. + */ + if ((cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT) && + group->faulted) { + dev_dbg(kbdev->dev, + "Skipped reporting CS_BUS_FAULT for queue %d of group %d of ctx %d_%d", + queue->csi_index, group->handle, kctx->tgid, kctx->id); + } else { + report_queue_error(queue, queue->cs_error, queue->cs_error_info, group, + true); + } } unlock: @@ -2419,7 +2507,7 @@ unlock: } /** - * handle_fatal_event - Handler for CS fatal. + * handle_fatal_event() - Handler for CS fatal. * * @queue: Pointer to queue for which fatal event was received. * @stream: Pointer to the structure containing info provided by the @@ -2443,13 +2531,13 @@ static void handle_fatal_event(struct kbase_queue *const queue, const u32 cs_fatal_exception_type = CS_FATAL_EXCEPTION_TYPE_GET(cs_fatal); const u32 cs_fatal_exception_data = CS_FATAL_EXCEPTION_DATA_GET(cs_fatal); const u64 cs_fatal_info_exception_data = CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info); - bool use_old_log_format = true; + bool has_trace_info = false; bool skip_fault_report = kbase_ctx_flag(queue->kctx, KCTX_PAGE_FAULT_REPORT_SKIP); kbase_csf_scheduler_spin_lock_assert_held(kbdev); - if (use_old_log_format && !skip_fault_report) + if (!has_trace_info && !skip_fault_report) dev_warn(kbdev->dev, "Ctx %d_%d Group %d CSG %d CSI: %d\n" "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" @@ -2481,7 +2569,7 @@ static void handle_fatal_event(struct kbase_queue *const queue, } /** - * process_cs_interrupts - Process interrupts for a CS. + * process_cs_interrupts() - Process interrupts for a CS. * * @group: Pointer to GPU command queue group data. * @ginfo: The CSG interface provided by the firmware. @@ -2595,7 +2683,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, } if (!group->protected_suspend_buf.pma) - queue_work(group->kctx->csf.wq, &group->protm_event_work); + kbase_csf_scheduler_enqueue_protm_event_work(group); if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) { clear_bit(group->csg_nr, scheduler->csg_slots_idle_mask); @@ -2608,12 +2696,14 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, } /** - * process_csg_interrupts - Process interrupts for a CSG. + * process_csg_interrupts() - Process interrupts for a CSG. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @csg_nr: CSG number. * @track: Pointer that tracks the highest idle CSG and the newly possible viable * protected mode requesting group, in current IRQ context. + * @progress_timeout_slot_mask: slot mask to indicate on which slot progress timeout + * happens. * * Handles interrupts for a CSG and for CSs within it. * @@ -2625,7 +2715,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, * See process_cs_interrupts() for details of per-stream interrupt handling. */ static void process_csg_interrupts(struct kbase_device *const kbdev, u32 const csg_nr, - struct irq_idle_and_protm_track *track) + struct irq_idle_and_protm_track *track, + unsigned long *progress_timeout_slot_mask) { struct kbase_csf_cmd_stream_group_info *ginfo; struct kbase_queue_group *group = NULL; @@ -2712,13 +2803,9 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, u32 const c KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group, req ^ ack); - dev_info( - kbdev->dev, - "[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %u\n", - kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid, - group->kctx->id, csg_nr); - handle_progress_timer_event(group); + set_bit(csg_nr, progress_timeout_slot_mask); + } process_cs_interrupts(group, ginfo, irqreq, irqack, track); @@ -2728,7 +2815,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, u32 const c } /** - * process_prfcnt_interrupts - Process performance counter interrupts. + * process_prfcnt_interrupts() - Process performance counter interrupts. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @glb_req: Global request register value. @@ -2800,7 +2887,7 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, u } /** - * check_protm_enter_req_complete - Check if PROTM_ENTER request completed + * check_protm_enter_req_complete() - Check if PROTM_ENTER request completed * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @glb_req: Global request register value. @@ -2828,13 +2915,14 @@ static inline void check_protm_enter_req_complete(struct kbase_device *kbdev, u3 dev_dbg(kbdev->dev, "Protected mode entry interrupt received"); kbdev->protected_mode = true; + kbase_ipa_protection_mode_switch_event(kbdev); kbase_ipa_control_protm_entered(kbdev); kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface); } /** - * process_protm_exit - Handle the protected mode exit interrupt + * process_protm_exit() - Handle the protected mode exit interrupt * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @glb_ack: Global acknowledge register value. @@ -2923,7 +3011,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, if (!tock_triggered) { dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n", group->handle, group->csg_nr); - queue_work(group->kctx->csf.wq, &group->protm_event_work); + kbase_csf_scheduler_enqueue_protm_event_work(group); } } } @@ -2952,6 +3040,46 @@ static void order_job_irq_clear_with_iface_mem_read(void) dmb(osh); } +static const char *const glb_fatal_status_errors[GLB_FATAL_STATUS_VALUE_COUNT] = { + [GLB_FATAL_STATUS_VALUE_OK] = "OK", + [GLB_FATAL_STATUS_VALUE_ASSERT] = "Firmware assert triggered", + [GLB_FATAL_STATUS_VALUE_UNEXPECTED_EXCEPTION] = + "Hardware raised an exception firmware did not expect", + [GLB_FATAL_STATUS_VALUE_HANG] = "Firmware hangs and watchdog timer expired", +}; + +/** + * handle_glb_fatal_event() - Handle the GLB fatal event + * + * @kbdev: Instance of GPU device. + * @global_iface: CSF global interface + */ +static void handle_glb_fatal_event(struct kbase_device *kbdev, + const struct kbase_csf_global_iface *const global_iface) +{ + const char *error_string = NULL; + const u32 fatal_status = kbase_csf_firmware_global_output(global_iface, GLB_FATAL_STATUS); + + lockdep_assert_held(&kbdev->hwaccess_lock); + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + dev_warn(kbdev->dev, "MCU encountered unrecoverable error"); + + if (fatal_status < GLB_FATAL_STATUS_VALUE_COUNT) + error_string = glb_fatal_status_errors[fatal_status]; + else { + dev_err(kbdev->dev, "Invalid GLB_FATAL_STATUS (%u)", fatal_status); + return; + } + + if (fatal_status == GLB_FATAL_STATUS_VALUE_OK) + dev_err(kbdev->dev, "GLB_FATAL_STATUS(OK) must be set with proper reason"); + else { + dev_warn(kbdev->dev, "GLB_FATAL_STATUS: %s", error_string); + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu_locked(kbdev); + } +} + void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) { bool deferred_handling_glb_idle_irq = false; @@ -2972,18 +3100,25 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX, .idle_slot = S8_MAX }; + DECLARE_BITMAP(progress_timeout_csgs, MAX_SUPPORTED_CSGS) = { 0 }; kbase_csf_scheduler_spin_lock(kbdev, &flags); - /* Looping through and track the highest idle and protm groups */ + /* Looping through and track the highest idle and protm groups. + * Also track the groups for which progress timer timeout happened. + */ while (csg_interrupts != 0) { u32 const csg_nr = (u32)ffs((int)csg_interrupts) - 1; - process_csg_interrupts(kbdev, csg_nr, &track); + process_csg_interrupts(kbdev, csg_nr, &track, + progress_timeout_csgs); csg_interrupts &= ~(1U << csg_nr); } /* Handle protm from the tracked information */ process_tracked_info_for_protm(kbdev, &track); + /* Handle pending progress timeout(s) */ + handle_progress_timer_events(kbdev, progress_timeout_csgs); + kbase_csf_scheduler_spin_unlock(kbdev, flags); } @@ -3012,11 +3147,28 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) /* Handle IDLE Hysteresis notification event */ if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) { + u32 const glb_idle_timer_cfg = + kbase_csf_firmware_global_input_read( + global_iface, GLB_IDLE_TIMER_CONFIG); + dev_dbg(kbdev->dev, "Idle-hysteresis event flagged"); kbase_csf_firmware_global_input_mask( global_iface, GLB_REQ, glb_ack, GLB_REQ_IDLE_EVENT_MASK); + if (glb_idle_timer_cfg & + GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_MASK) { + /* The FW is going to sleep, we shall: + * - Enable fast GPU idle handling to avoid + * confirming CSGs status in gpu_idle_worker(). + * - Enable doorbell mirroring to minimise the + * chance of KBase raising kernel doorbells which + * would cause the FW to be woken up. + */ + kbdev->csf.scheduler.fast_gpu_idle_handling = true; + kbase_pm_enable_db_mirror_interrupt(kbdev); + } + glb_idle_irq_received = true; /* Defer handling this IRQ to account for a race condition * where the idle worker could be executed before we have @@ -3026,6 +3178,9 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) deferred_handling_glb_idle_irq = true; } + if (glb_ack & GLB_ACK_FATAL_MASK) + handle_glb_fatal_event(kbdev, global_iface); + process_prfcnt_interrupts(kbdev, glb_req, glb_ack); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -3050,13 +3205,10 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) if (deferred_handling_glb_idle_irq) { unsigned long flags; - bool invoke_pm_state_machine; kbase_csf_scheduler_spin_lock(kbdev, &flags); - invoke_pm_state_machine = kbase_csf_scheduler_process_gpu_idle_event(kbdev); + kbase_csf_scheduler_process_gpu_idle_event(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); - if (unlikely(invoke_pm_state_machine)) - kbase_pm_update_state(kbdev); } wake_up_all(&kbdev->csf.event_wait); @@ -3087,6 +3239,11 @@ void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev) if (kbdev->csf.db_filp) { struct page *page = as_page(kbdev->csf.dummy_db_page); + /* This is a shared dummy sink page for avoiding potential segmentation fault + * to user-side library when a csi is off slot. Additionally, the call is on + * module unload path, so the page can be left uncleared before returning it + * back to kbdev memory pool. + */ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); fput(kbdev->csf.db_filp); @@ -3118,26 +3275,27 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) return 0; } -void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev) +void kbase_csf_pending_gpuq_kick_queues_init(struct kbase_device *kbdev) { size_t i; - for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) - INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kicks[i]); - spin_lock_init(&kbdev->csf.pending_gpuq_kicks_lock); + atomic_set(&kbdev->csf.pending_gpuq_kicks, false); + for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kick_queues); ++i) + INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kick_queues[i]); + spin_lock_init(&kbdev->csf.pending_gpuq_kick_queues_lock); } -void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev) +void kbase_csf_pending_gpuq_kick_queues_term(struct kbase_device *kbdev) { size_t i; - spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); - for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) { - if (!list_empty(&kbdev->csf.pending_gpuq_kicks[i])) + spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock); + for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kick_queues); ++i) { + if (!list_empty(&kbdev->csf.pending_gpuq_kick_queues[i])) dev_warn(kbdev->dev, "Some GPU queue kicks for priority %zu were not handled", i); } - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); } void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) @@ -3145,6 +3303,11 @@ void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) if (kbdev->csf.user_reg.filp) { struct page *page = as_page(kbdev->csf.user_reg.dummy_page); + /* This is a shared dummy page in place of the real USER Register page just + * before the GPU is powered down. Additionally, the call is on module unload + * path, so the page can be left uncleared before returning it back to kbdev + * memory pool. + */ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); fput(kbdev->csf.user_reg.filp); } @@ -3227,17 +3390,17 @@ void kbase_csf_process_queue_kick(struct kbase_queue *queue) if (err == -EBUSY) { retry_kick = true; - spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock); if (list_empty(&queue->pending_kick_link)) { /* A failed queue kick shall be pushed to the * back of the queue to avoid potential abuse. */ list_add_tail( &queue->pending_kick_link, - &kbdev->csf.pending_gpuq_kicks[queue->group_priority]); - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + &kbdev->csf.pending_gpuq_kick_queues[queue->group_priority]); + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); } else { - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); WARN_ON(atomic_read(&queue->pending_kick) == 0); } @@ -3260,3 +3423,27 @@ out_release_queue: WARN_ON(atomic_read(&queue->pending_kick) == 0); atomic_dec(&queue->pending_kick); } + +void kbase_csf_process_protm_event_request(struct kbase_queue_group *group) +{ + struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; + int err = 0; + + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u); + + err = alloc_grp_protected_suspend_buffer_pages(group); + if (!err) { + kbase_csf_scheduler_group_protm_enter(group); + } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) { + sbuf->alloc_retries++; + /* try again to allocate pages */ + kbase_csf_scheduler_enqueue_protm_event_work(group); + } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) { + dev_err(group->kctx->kbdev->dev, + "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d", + group->handle, group->kctx->tgid, group->kctx->id); + report_group_fatal_error(group); + } + + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u); +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h index b2f6ab2c4a27..566136342a06 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h @@ -243,6 +243,19 @@ struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, */ int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, u8 group_handle); +/** + * kbase_csf_queue_group_clear_faults - Re-enable CS Fault reporting. + * + * @kctx: Pointer to the kbase context within which the + * CS Faults for the queues has to be re-enabled. + * @clear_faults: Pointer to the structure which contains details of the + * queues for which the CS Fault reporting has to be re-enabled. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_queue_group_clear_faults(struct kbase_context *kctx, + struct kbase_ioctl_queue_group_clear_faults *clear_faults); + /** * kbase_csf_queue_group_create - Create a GPU command queue group. * @@ -379,20 +392,20 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev); void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev); /** - * kbase_csf_pending_gpuq_kicks_init - Initialize the data used for handling - * GPU queue kicks. + * kbase_csf_pending_gpuq_kick_queues_init - Initialize the data used for handling + * GPU queue kicks. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. */ -void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev); +void kbase_csf_pending_gpuq_kick_queues_init(struct kbase_device *kbdev); /** - * kbase_csf_pending_gpuq_kicks_term - De-initialize the data used for handling - * GPU queue kicks. + * kbase_csf_pending_gpuq_kick_queues_term - De-initialize the data used for handling + * GPU queue kicks. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. */ -void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev); +void kbase_csf_pending_gpuq_kick_queues_term(struct kbase_device *kbdev); /** * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface. @@ -546,4 +559,13 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev) */ void kbase_csf_process_queue_kick(struct kbase_queue *queue); +/** + * kbase_csf_process_protm_event_request - Handle protected mode switch request + * + * @group: The group to handle protected mode request + * + * Request to switch to protected mode. + */ +void kbase_csf_process_protm_event_request(struct kbase_queue_group *group); + #endif /* _KBASE_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c index c885845bc62e..32f33a58a6f7 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,6 +28,8 @@ #include #include #include +#include +#include #define MAX_SCHED_STATE_STRING_LEN (16) /** @@ -268,6 +270,87 @@ static const struct file_operations kbasep_csf_debugfs_scheduler_state_fops = { .open = simple_open, .llseek = default_llseek, }; +static int kbasep_csf_debugfs_eviction_timeout_get(void *data, u64 *val) +{ + struct kbase_device *const kbdev = data; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + *val = kbdev->csf.csg_suspend_timeout_ms - CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS; + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return 0; +} + +static int kbasep_csf_debugfs_eviction_timeout_set(void *data, u64 val) +{ + struct kbase_device *const kbdev = data; + unsigned long flags_schd, flags_hw; + u64 dur_ms = val; + int ret = 0; + + if (unlikely(dur_ms < CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MIN || + dur_ms > CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MAX)) { + dev_err(kbdev->dev, "Invalid CSG suspend timeout input (%llu)", dur_ms); + return -EFAULT; + } + dur_ms = dur_ms + CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS; + + /* The 'fw_load_lock' is taken to synchronize against the deferred + * loading of FW, update will take effect after firmware gets loaded. + */ + mutex_lock(&kbdev->fw_load_lock); + if (unlikely(!kbdev->csf.firmware_inited)) { + kbase_csf_scheduler_spin_lock(kbdev, &flags_schd); + kbdev->csf.csg_suspend_timeout_ms = (unsigned int)dur_ms; + kbase_csf_scheduler_spin_unlock(kbdev, flags_schd); + mutex_unlock(&kbdev->fw_load_lock); + dev_info(kbdev->dev, "CSF set csg suspend timeout deferred till fw is loaded"); + goto end; + } + mutex_unlock(&kbdev->fw_load_lock); + + /* Firmware reloading is triggered by silent reset, and then update will take effect. + */ + kbase_csf_scheduler_pm_active(kbdev); + if (kbase_csf_scheduler_killable_wait_mcu_active(kbdev)) { + dev_err(kbdev->dev, + "Unable to activate the MCU, the csg suspend timeout value shall remain unchanged"); + kbase_csf_scheduler_pm_idle(kbdev); + ret = -EFAULT; + goto exit; + } + spin_lock_irqsave(&kbdev->hwaccess_lock, flags_hw); + if (kbase_reset_gpu_silent(kbdev)) { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags_hw); + dev_err(kbdev->dev, "CSF set csg suspend timeout pending reset, try again"); + kbase_csf_scheduler_pm_idle(kbdev); + ret = -EFAULT; + goto exit; + } + /* GPU reset is placed and it will take place only after hwaccess_lock is released, + * update on host side should be done after GPU reset is placed and before it takes place. + */ + kbase_csf_scheduler_spin_lock(kbdev, &flags_schd); + kbdev->csf.csg_suspend_timeout_ms = (unsigned int)dur_ms; + kbase_csf_scheduler_spin_unlock(kbdev, flags_schd); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags_hw); + /* Keep PM active until reset finished to allow FW reloading to take place, + * and then update request will be sent to FW during initialization. + */ + kbase_reset_gpu_wait(kbdev); + kbase_csf_scheduler_pm_idle(kbdev); + +end: + dev_info(kbdev->dev, "CSF set csg suspend timeout: %u ms", (unsigned int)dur_ms); + +exit: + return ret; +} + +DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_eviction_timeout_fops, + &kbasep_csf_debugfs_eviction_timeout_get, + &kbasep_csf_debugfs_eviction_timeout_set, "%llu\n"); void kbase_csf_debugfs_init(struct kbase_device *kbdev) { @@ -280,6 +363,8 @@ void kbase_csf_debugfs_init(struct kbase_device *kbdev) &kbasep_csf_debugfs_scheduling_timer_kick_fops); debugfs_create_file("scheduler_state", 0644, kbdev->mali_debugfs_directory, kbdev, &kbasep_csf_debugfs_scheduler_state_fops); + debugfs_create_file("eviction_timeout_ms", 0644, kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_debugfs_eviction_timeout_fops); kbase_csf_tl_reader_debugfs_init(kbdev); } diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h index 8d7c896e1051..38e7cb940d97 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,6 +30,7 @@ #include "mali_kbase_csf_firmware.h" #include "mali_kbase_csf_event.h" #include +#include "mali_kbase_csf_fw_io.h" #include @@ -267,7 +268,7 @@ enum kbase_queue_group_priority { * @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired * Shader, L2 and MCU state. * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete. - * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for a CSG to be suspended. + * @CSF_CSG_TERM_TIMEOUT: Timeout given for a CSG to be terminated. * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot. * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond * to a ping from KBase. @@ -289,7 +290,7 @@ enum kbase_timeout_selector { CSF_FIRMWARE_TIMEOUT, CSF_PM_TIMEOUT, CSF_GPU_RESET_TIMEOUT, - CSF_CSG_SUSPEND_TIMEOUT, + CSF_CSG_TERM_TIMEOUT, CSF_FIRMWARE_BOOT_TIMEOUT, CSF_FIRMWARE_PING_TIMEOUT, CSF_SCHED_PROTM_PROGRESS_TIMEOUT, @@ -398,6 +399,10 @@ struct kbase_csf_notification { * @cs_error: Records information about the CS fatal event or * about CS fault event if dump on fault is enabled. * @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred. + * @cs_error_acked: Flag to indicate that acknowledging the fault has been done + * at top-half of fault handler. + * @clear_faults: Flag to track if the CS fault reporting is enabled for this queue. + * It's protected by &kbase_context.csf.lock. * @extract_ofs: The current EXTRACT offset, this is only updated when handling * the GLB IDLE IRQ if the idle timeout value is non-0 in order * to help detect a queue's true idle status. @@ -441,6 +446,8 @@ struct kbase_queue { u64 cs_error_info; u32 cs_error; bool cs_error_fatal; + bool cs_error_acked; + bool clear_faults; u64 extract_ofs; u64 saved_cmd_ptr; }; @@ -501,6 +508,8 @@ struct kbase_protected_suspend_buffer { * @compute_max: Maximum number of compute endpoints the group is * allowed to use. * @csi_handlers: Requested CSI exception handler flags for the group. + * @cs_fault_report_enable: Indicated if reporting of CS_FAULTs to + * userspace is enabled. * @tiler_mask: Mask of tiler endpoints the group is allowed to use. * @fragment_mask: Mask of fragment endpoints the group is allowed to use. * @compute_mask: Mask of compute endpoints the group is allowed to use. @@ -531,14 +540,21 @@ struct kbase_protected_suspend_buffer { * @bound_queues: Array of registered queues bound to this queue group. * @doorbell_nr: Index of the hardware doorbell page assigned to the * group. - * @protm_event_work: Work item corresponding to the protected mode entry - * event for this queue. + * @protm_event_work: List item corresponding to the protected mode entry + * event for this queue. This would be handled by + * kbase_csf_scheduler_kthread(). + * @pending_protm_event_work: Indicates that kbase_csf_scheduler_kthread() should + * handle PROTM request for this group. This would + * be set to false when the work is done. This is used + * mainly for synchronisation with group termination. * @protm_pending_bitmap: Bit array to keep a track of CSs that * have pending protected mode entry requests. * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be * returned to userspace if such an error has occurred. * @timer_event_work: Work item to handle the progress timeout fatal event * for the group. + * @progress_timer_state: Value of CSG_PROGRESS_TIMER_STATE register when progress + * timer timeout is reported for the group. * @deschedule_deferred_cnt: Counter keeping a track of the number of threads * that tried to deschedule the group and had to defer * the descheduling due to the dump on fault. @@ -569,7 +585,7 @@ struct kbase_queue_group { u8 compute_max; u8 csi_handlers; - + __u8 cs_fault_report_enable; u64 tiler_mask; u64 fragment_mask; u64 compute_mask; @@ -588,12 +604,14 @@ struct kbase_queue_group { struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP]; int doorbell_nr; - struct work_struct protm_event_work; + struct list_head protm_event_work; + atomic_t pending_protm_event_work; DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); struct kbase_csf_notification error_fatal; struct work_struct timer_event_work; + u32 progress_timer_state; /** * @dvs_buf: Address and size of scratch memory. @@ -625,6 +643,9 @@ struct kbase_queue_group { * @cmd_seq_num: The sequence number assigned to an enqueued command, * in incrementing order (older commands shall have a * smaller number). + * @kcpu_wq: Work queue to process KCPU commands for all queues in this + * context. This would be used if the context is not prioritised, + * otherwise it would be handled by kbase_csf_scheduler_kthread(). * @jit_lock: Lock to serialise JIT operations. * @jit_cmds_head: A list of the just-in-time memory commands, both * allocate & free, in submission order, protected @@ -640,6 +661,8 @@ struct kbase_csf_kcpu_queue_context { DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES); atomic64_t cmd_seq_num; + struct workqueue_struct *kcpu_wq; + struct mutex jit_lock; struct list_head jit_cmds_head; struct list_head jit_blocked_queues; @@ -747,15 +770,7 @@ struct kbase_csf_ctx_heap_reclaim_info { * GPU command queues are idle and at least one of them * is blocked on a sync wait operation. * @num_idle_wait_grps: Length of the @idle_wait_groups list. - * @sync_update_wq_high_prio: high-priority work queue to process the - * SYNC_UPDATE events by sync_set / sync_add - * instruction execution on command streams bound to - * groups of @idle_wait_groups list. This WQ would - * be used if the context is prioritised. - * @sync_update_wq_normal_prio: similar to sync_update_wq_high_prio, but this - * WQ would be used if the context is not - * prioritised. - * @sync_update_work: Work item to process the SYNC_UPDATE events. + * @sync_update_work: List item to process the SYNC_UPDATE event. * @ngrp_to_schedule: Number of groups added for the context to the * 'groups_to_schedule' list of scheduler instance. * @heap_info: Heap reclaim information data of the kctx. As the @@ -768,9 +783,7 @@ struct kbase_csf_scheduler_context { u32 num_runnable_grps; struct list_head idle_wait_groups; u32 num_idle_wait_grps; - struct workqueue_struct *sync_update_wq_high_prio; - struct workqueue_struct *sync_update_wq_normal_prio; - struct work_struct sync_update_work; + struct list_head sync_update_work; u32 ngrp_to_schedule; struct kbase_csf_ctx_heap_reclaim_info heap_info; }; @@ -865,17 +878,16 @@ struct kbase_csf_user_reg_context { * @wq: Dedicated workqueue to process work items corresponding * to the OoM events raised for chunked tiler heaps being * used by GPU command queues, and progress timeout events. - * @kcpu_wq_high_prio: High-priority work queue to process KCPU commands for - * all queues in this context. This WQ would be used if - * the context is prioritised. - * @kcpu_wq_normal_prio: Similar to kcpu_wq_high_prio, but this WQ would be - * used if the context is not prioritised. * @link: Link to this csf context in the 'runnable_kctxs' list of * the scheduler instance * @sched: Object representing the scheduler's context * @cpu_queue: CPU queue information. Only be available when DEBUG_FS * is enabled. * @user_reg: Collective information to support mapping to USER Register page. + * @pending_sync_update: Indicates that kbase_csf_scheduler_kthread() should + * handle SYNC_UPDATE event for this context. This would + * be set to false when the work is done. This is used + * mainly for synchronisation with context termination. */ struct kbase_csf_context { struct list_head event_pages_head; @@ -888,12 +900,11 @@ struct kbase_csf_context { struct kbase_csf_event event; struct kbase_csf_tiler_heap_context tiler_heaps; struct workqueue_struct *wq; - struct workqueue_struct *kcpu_wq_high_prio; - struct workqueue_struct *kcpu_wq_normal_prio; struct list_head link; struct kbase_csf_scheduler_context sched; struct kbase_csf_cpu_queue_context cpu_queue; struct kbase_csf_user_reg_context user_reg; + atomic_t pending_sync_update; }; /** @@ -922,13 +933,11 @@ struct kbase_csf_reset_gpu { * of CSG slots. * @resident_group: pointer to the queue group that is resident on the CSG slot. * @state: state of the slot as per enum @kbase_csf_csg_slot_state. - * @trigger_jiffies: value of jiffies when change in slot state is recorded. * @priority: dynamic priority assigned to CSG slot. */ struct kbase_csf_csg_slot { struct kbase_queue_group *resident_group; atomic_t state; - unsigned long trigger_jiffies; u8 priority; }; @@ -936,14 +945,15 @@ struct kbase_csf_csg_slot { * struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim * kctx lists inside the CSF device's scheduler. * - * @heap_reclaim: Tiler heap reclaim shrinker object. + * @heap_reclaim: Defines Tiler heap reclaim shrinker object. * @ctx_lists: Array of kctx lists, size matching CSG defined priorities. The * lists track the kctxs attached to the reclaim manager. * @unused_pages: Estimated number of unused pages from the @ctxlist array. The * number is indicative for use with reclaim shrinker's count method. */ struct kbase_csf_sched_heap_reclaim_mgr { - struct shrinker heap_reclaim; + DEFINE_KBASE_SHRINKER heap_reclaim; + struct list_head ctx_lists[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; atomic_t unused_pages; }; @@ -1042,10 +1052,29 @@ struct kbase_csf_mcu_shared_regions { * workqueue items (kernel-provided delayed_work * items do not use hrtimer and for some reason do * not provide sufficiently reliable periodicity). - * @pending_tick_work: Indicates that kbase_csf_scheduler_kthread() should perform - * a scheduling tick. - * @pending_tock_work: Indicates that kbase_csf_scheduler_kthread() should perform - * a scheduling tock. + * @pending_sync_update_works: Indicates that kbase_csf_scheduler_kthread() + * should handle SYNC_UPDATE events. + * @sync_update_work_ctxs_lock: Lock protecting the list of contexts that + * require handling SYNC_UPDATE events. + * @sync_update_work_ctxs: The list of contexts that require handling + * SYNC_UPDATE events. + * @pending_protm_event_works: Indicates that kbase_csf_scheduler_kthread() + * should handle PROTM requests. + * @protm_event_work_grps_lock: Lock protecting the list of groups that + * have requested protected mode. + * @protm_event_work_grps: The list of groups that have requested + * protected mode. + * @pending_kcpuq_works: Indicates that kbase_csf_scheduler_kthread() + * should process pending KCPU queue works. + * @kcpuq_work_queues_lock: Lock protecting the list of KCPU queues that + * need to be processed. + * @kcpuq_work_queues: The list of KCPU queue that need to be processed + * @pending_tick_work: Indicates that kbase_csf_scheduler_kthread() should + * perform a scheduling tick. + * @pending_tock_work: Indicates that kbase_csf_scheduler_kthread() should + * perform a scheduling tock. + * @pending_gpu_idle_work: Indicates that kbase_csf_scheduler_kthread() should + * handle the GPU IDLE event. * @ping_work: Work item that would ping the firmware at regular * intervals, only if there is a single active CSG * slot, to check if firmware is alive and would @@ -1063,10 +1092,6 @@ struct kbase_csf_mcu_shared_regions { * This pointer being set doesn't necessarily indicates * that GPU is in protected mode, kbdev->protected_mode * needs to be checked for that. - * @idle_wq: Workqueue for executing GPU idle notification - * handler. - * @gpu_idle_work: Work item for facilitating the scheduler to bring - * the GPU to a low-power mode on becoming idle. * @fast_gpu_idle_handling: Indicates whether to relax many of the checks * normally done in the GPU idle worker. This is * set to true when handling the GLB IDLE IRQ if the @@ -1109,8 +1134,11 @@ struct kbase_csf_mcu_shared_regions { * thread when a queue needs attention. * @kthread_running: Whether the GPU queue submission thread should keep * executing. - * @gpuq_kthread: High-priority thread used to handle GPU queue + * @gpuq_kthread: Dedicated thread primarily used to handle + * latency-sensitive tasks such as GPU queue * submissions. + * @gpu_idle_timer_enabled: Tracks whether the GPU idle timer is enabled or disabled. + * @fw_soi_enabled: True if FW Sleep-on-Idle is currently enabled. */ struct kbase_csf_scheduler { struct mutex lock; @@ -1134,14 +1162,22 @@ struct kbase_csf_scheduler { unsigned long last_schedule; atomic_t timer_enabled; struct hrtimer tick_timer; + atomic_t pending_sync_update_works; + spinlock_t sync_update_work_ctxs_lock; + struct list_head sync_update_work_ctxs; + atomic_t pending_protm_event_works; + spinlock_t protm_event_work_grps_lock; + struct list_head protm_event_work_grps; + atomic_t pending_kcpuq_works; + spinlock_t kcpuq_work_queues_lock; + struct list_head kcpuq_work_queues; atomic_t pending_tick_work; atomic_t pending_tock_work; + atomic_t pending_gpu_idle_work; struct delayed_work ping_work; struct kbase_context *top_kctx; struct kbase_queue_group *top_grp; struct kbase_queue_group *active_protm_grp; - struct workqueue_struct *idle_wq; - struct work_struct gpu_idle_work; bool fast_gpu_idle_handling; atomic_t gpu_no_longer_idle; atomic_t non_idle_offslot_grps; @@ -1180,6 +1216,8 @@ struct kbase_csf_scheduler { */ spinlock_t gpu_metrics_lock; #endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ + atomic_t gpu_idle_timer_enabled; + atomic_t fw_soi_enabled; }; /* @@ -1643,6 +1681,7 @@ struct kbase_csf_user_reg { * @gpu_idle_dur_count_no_modifier: Update csffw_glb_req_idle_enable to make the shr(10) * modifier conditional on the new flag * in GLB_IDLE_TIMER_CONFIG. + * @csg_suspend_timeout_ms: Timeout given for a CSG to be suspended. * for any request sent to the firmware. * @hwcnt: Contain members required for handling the dump of * HW counters. @@ -1653,12 +1692,29 @@ struct kbase_csf_user_reg { * @dof: Structure for dump on fault. * @user_reg: Collective information to support the mapping to * USER Register page for user processes. - * @pending_gpuq_kicks: Lists of GPU queue that have been kicked but not - * yet processed, categorised by queue group's priority. - * @pending_gpuq_kicks_lock: Protect @pending_gpu_kicks and - * kbase_queue.pending_kick_link. + * @pending_gpuq_kicks: Indicates that kbase_csf_scheduler_kthread() + * should handle GPU queue kicks. + * @pending_gpuq_kick_queues: Lists of GPU queued that have been kicked but not + * yet processed, categorised by queue group's priority. + * @pending_gpuq_kick_queues_lock: Protect @pending_gpuq_kick_queues and + * kbase_queue.pending_kick_link. * @quirks_ext: Pointer to an allocated buffer containing the firmware * workarounds configuration. + * @mmu_sync_sem: RW Semaphore to defer MMU operations till the P.Mode entrance + * or DCS request has been completed. + * @pmode_sync_sem: RW Semaphore to prevent MMU operations during P.Mode entrance. + * @page_fault_cnt_ptr_address: GPU VA of the location in FW data memory, extracted from the + * FW image header, that will store the GPU VA of FW visible + * memory location where the @page_fault_cnt value will be written to. + * @page_fault_cnt_ptr: CPU VA of the FW visible memory location where the @page_fault_cnt + * value will be written to. + * @page_fault_cnt: Counter that is incremented on every GPU page fault, just before the + * MMU is unblocked to retry the memory transaction that caused the GPU + * page fault. The access to counter is serialized appropriately. + * @mcu_halted: Flag to inform MCU FSM that the MCU has already halted. + * @fw_io: Firmware I/O interface. + * @compute_progress_timeout_cc: Value of GPU cycle count register when progress + * timer timeout is reported for the compute iterator. */ struct kbase_csf_device { struct kbase_mmu_table mcu_mmu; @@ -1696,6 +1752,7 @@ struct kbase_csf_device { u64 gpu_idle_hysteresis_ns; u32 gpu_idle_dur_count; u32 gpu_idle_dur_count_no_modifier; + u32 csg_suspend_timeout_ms; struct kbase_csf_hwcnt hwcnt; struct kbase_csf_mcu_fw fw; struct kbase_csf_firmware_log fw_log; @@ -1710,9 +1767,18 @@ struct kbase_csf_device { struct kbase_debug_coresight_device coresight; #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ struct kbase_csf_user_reg user_reg; - struct list_head pending_gpuq_kicks[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; - spinlock_t pending_gpuq_kicks_lock; + atomic_t pending_gpuq_kicks; + struct list_head pending_gpuq_kick_queues[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; + spinlock_t pending_gpuq_kick_queues_lock; u32 *quirks_ext; + struct rw_semaphore mmu_sync_sem; + struct rw_semaphore pmode_sync_sem; + u32 page_fault_cnt_ptr_address; + u32 *page_fault_cnt_ptr; + u32 page_fault_cnt; + bool mcu_halted; + struct kbase_csf_fw_io fw_io; + u64 compute_progress_timeout_cc; }; /** diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c index 952a9b9cdd94..2d8f96641181 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,6 +40,7 @@ #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" #include #include +#include #include #include #include @@ -55,6 +56,7 @@ #include #include +#include #define MALI_MAX_DEFAULT_FIRMWARE_NAME_LEN ((size_t)64) #define DEFAULT_FW_NAME MALI_RELEASE_NAME".mali_csffw.bin" @@ -68,6 +70,7 @@ static unsigned int csf_firmware_boot_timeout_ms; module_param(csf_firmware_boot_timeout_ms, uint, 0444); MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, "Maximum time to wait for firmware to boot."); +static bool kbase_iter_trace_enable; #ifdef CONFIG_MALI_BIFROST_DEBUG /* Makes Driver wait indefinitely for an acknowledgment for the different @@ -97,6 +100,7 @@ MODULE_PARM_DESC(fw_debug, "Enables effective use of a debugger for debugging fi #define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) #define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6) #define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) +#define CSF_FIRMWARE_ENTRY_TYPE_PAGE_FAULT_CNT (8) #define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP (9) #define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) @@ -115,7 +119,8 @@ MODULE_PARM_DESC(fw_debug, "Enables effective use of a debugger for debugging fi #define CSF_GLB_REQ_CFG_MASK \ (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ - GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) + GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK | \ + GLB_REQ_CFG_EVICTION_TIMER_MASK | GLB_REQ_ITER_TRACE_ENABLE_MASK) static inline u32 input_page_read(const u32 *const input, const u32 offset) { @@ -179,6 +184,92 @@ struct firmware_timeline_metadata { size_t size; }; +static void reinit_page_fault_cnt_firmware_memory(struct kbase_device *kbdev) +{ + if (!kbdev->csf.page_fault_cnt_ptr) + return; + + /* Store the GPU address of shared memory location, where the page fault counter + * value will be written, inside the FW data memory. + */ + kbase_csf_update_firmware_memory( + kbdev, kbdev->csf.page_fault_cnt_ptr_address, + (u32)((kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) + + PAGE_SIZE - sizeof(u32))); + + *kbdev->csf.page_fault_cnt_ptr = kbdev->csf.page_fault_cnt = 0; +} + +static void init_page_fault_cnt_firmware_memory(struct kbase_device *kbdev) +{ + if (!kbdev->csf.page_fault_cnt_ptr_address) + return; + + if (WARN_ON_ONCE(!kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg)) + return; + + /* Save the CPU address of shared memory location where the page fault counter + * value will be written. + * The shared memory location comes from the last 4 bytes of the page that + * is allocated to maintain the extract offset value for different trace + * buffers. Only the first 4 bytes of every cacheline is used for the extract offset + * value. + */ + kbdev->csf.page_fault_cnt_ptr = + (u32 *)((u8 *)kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + PAGE_SIZE - + sizeof(u32)); + reinit_page_fault_cnt_firmware_memory(kbdev); +} + +/** + * set_iterator_trace_enable - Set the value for 'kbase_iter_trace_enable' global variable + * according to the value of GLB_FEATURES.ITER_TRACE_SUPPORTED bit, + * and the corresponding device tree entry. + * @kbdev: Kernel base device pointer + */ +static void set_iterator_trace_enable(struct kbase_device *kbdev) +{ + const struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; + bool dev_support_iter_trace = iface->features & GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK; + const void *dt_iter_trace_param; + unsigned int val; + + if (!dev_support_iter_trace) { + kbase_iter_trace_enable = false; + return; + } + + + /* check device tree for iterator trace enable property and + * fallback to "iter_trace_enable" if not found and try again + */ + dt_iter_trace_param = of_get_property(kbdev->dev->of_node, "iter-trace-enable", NULL); + + if (!dt_iter_trace_param) + dt_iter_trace_param = + of_get_property(kbdev->dev->of_node, "iter_trace_enable", NULL); + + val = (dt_iter_trace_param) ? be32_to_cpup(dt_iter_trace_param) : 0; + dev_dbg(kbdev->dev, "Iterator trace enable device-tree config value: %u", val); + + kbase_iter_trace_enable = val ? true : false; +} + +static void iterator_trace_reinit(struct kbase_device *kbdev) +{ + if (kbase_iter_trace_enable) { + kbase_csf_firmware_global_input_mask(&kbdev->csf.global_iface, GLB_REQ, + GLB_REQ_ITER_TRACE_ENABLE_MASK, + GLB_REQ_ITER_TRACE_ENABLE_MASK); + } +} + +static void iterator_trace_init(struct kbase_device *kbdev) +{ + set_iterator_trace_enable(kbdev); + iterator_trace_reinit(kbdev); +} + /* The shared interface area, used for communicating with firmware, is managed * like a virtual memory zone. Reserve the virtual space from that zone * corresponding to shared interface entry parsed from the firmware image. @@ -217,7 +308,7 @@ void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL), MCU_CONTROL_REQ_DISABLE); } -static void wait_for_firmware_stop(struct kbase_device *kbdev) +void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) { u32 val; const u32 timeout_us = @@ -232,17 +323,12 @@ static void wait_for_firmware_stop(struct kbase_device *kbdev) KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF(kbdev, kbase_backend_get_cycle_cnt(kbdev)); } -void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) -{ - wait_for_firmware_stop(kbdev); -} - -static void stop_csf_firmware(struct kbase_device *kbdev) +void kbase_csf_stop_firmware_and_wait(struct kbase_device *kbdev) { /* Stop the MCU firmware */ kbase_csf_firmware_disable_mcu(kbdev); - wait_for_firmware_stop(kbdev); + kbase_csf_firmware_disable_mcu_wait(kbdev); } static void wait_for_firmware_boot(struct kbase_device *kbdev) @@ -261,7 +347,6 @@ static void wait_for_firmware_boot(struct kbase_device *kbdev) */ remaining = wait_event_timeout(kbdev->csf.event_wait, kbdev->csf.interrupt_received == true, wait_timeout); - if (!remaining) dev_err(kbdev->dev, "Timed out waiting for fw boot completion"); @@ -485,6 +570,8 @@ static int reload_fw_image(struct kbase_device *kbdev) kbdev->csf.firmware_full_reload_needed = false; kbase_csf_firmware_reload_trace_buffers_data(kbdev); + reinit_page_fault_cnt_firmware_memory(kbdev); + iterator_trace_reinit(kbdev); out: return ret; } @@ -1043,6 +1130,14 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs } kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry); return 0; + case CSF_FIRMWARE_ENTRY_TYPE_PAGE_FAULT_CNT: + /* Entry about the location of page fault counter */ + if (size < sizeof(*entry)) { + dev_err(kbdev->dev, "Page fault counter entry too short (size=%u)", size); + return -EINVAL; + } + kbdev->csf.page_fault_cnt_ptr_address = *entry; + return 0; case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP: /* Core Dump section */ if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) { @@ -1552,7 +1647,6 @@ static bool global_request_complete(struct kbase_device *const kbdev, u32 const unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); - if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & req_mask) == (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & req_mask)) complete = true; @@ -1644,9 +1738,27 @@ static void set_timeout_global(const struct kbase_csf_global_iface *const global set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); } +static inline void set_gpu_idle_timer_glb_req(struct kbase_device *const kbdev, bool set) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + if (set) { + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, + GLB_REQ_IDLE_ENABLE_MASK); + } else { + kbase_csf_firmware_global_input_mask( + global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE, GLB_REQ_IDLE_DISABLE_MASK); + } + + atomic_set(&kbdev->csf.scheduler.gpu_idle_timer_enabled, set); +} + static void enable_gpu_idle_timer(struct kbase_device *const kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + bool const fw_soi_allowed = kbase_pm_fw_sleep_on_idle_allowed(kbdev); kbase_csf_scheduler_spin_lock_assert_held(kbdev); @@ -1654,15 +1766,114 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbdev->csf.gpu_idle_dur_count); kbase_csf_firmware_global_input_mask(global_iface, GLB_IDLE_TIMER_CONFIG, - kbdev->csf.gpu_idle_dur_count_no_modifier, + kbdev->csf.gpu_idle_dur_count_no_modifier + << GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT, GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_IDLE_TIMER_CONFIG, + fw_soi_allowed + << GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_SHIFT, + GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_MASK); - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, - GLB_REQ_IDLE_ENABLE_MASK); + set_gpu_idle_timer_glb_req(kbdev, true); + atomic_set(&kbdev->csf.scheduler.fw_soi_enabled, fw_soi_allowed); dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", kbdev->csf.gpu_idle_dur_count); } +/** + * convert_dur_to_suspend_count() - Convert CSG suspend timeout from ms to cycle count + * @kbdev: Instance of a GPU platform device that implements a CSF interface + * @dur_ms: Timeout value in ms + * @no_modifier: Indicate whether bit-shift is applied, 0 when applied, 1 otherwise + * + * Convert CSG suspend timeout from ms to cycle count, then generate a register value + * combining cycle count and timer source + * + * Return: Register value which will be stored into register GLB_EVICTION_TIMER. + */ +static u32 convert_dur_to_suspend_count(struct kbase_device *kbdev, const u64 dur_ms, + u32 *no_modifier) +{ + /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ + u64 freq = kbase_arch_timer_get_cntfrq(kbdev); + u64 dur_val = dur_ms; + u32 cnt_val_u32, reg_val_u32; + const bool src_system_timestamp = freq > 0; + const u8 SUSPEND_VAL_UNIT_SHIFT = 10; + + if (!src_system_timestamp) { + /* Get the cycle_counter source alternative */ + spin_lock(&kbdev->pm.clk_rtm.lock); + if (kbdev->pm.clk_rtm.clks[0]) + freq = kbdev->pm.clk_rtm.clks[0]->clock_val; + else + dev_err(kbdev->dev, "No GPU clock, unexpected intregration issue!"); + spin_unlock(&kbdev->pm.clk_rtm.lock); + + dev_info(kbdev->dev, + "No timestamp frequency, use cycle counter for csg suspend timeout!"); + } + + /* Formula for dur_val = (dur/1e3) * freq_HZ) */ + dur_val = dur_val * freq; + dur_val = div_u64(dur_val, MSEC_PER_SEC); + if (dur_val < S32_MAX) { + *no_modifier = 1; + } else { + dur_val = dur_val >> SUSPEND_VAL_UNIT_SHIFT; + *no_modifier = 0; + } + + /* Interface limits the value field to S32_MAX */ + cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; + + reg_val_u32 = GLB_EVICTION_TIMER_TIMEOUT_SET(0, cnt_val_u32); + /* add the source flag */ + reg_val_u32 = GLB_EVICTION_TIMER_TIMER_SOURCE_SET( + reg_val_u32, + (src_system_timestamp ? GLB_EVICTION_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP : + GLB_EVICTION_TIMER_TIMER_SOURCE_GPU_COUNTER)); + + return reg_val_u32; +} + +/** + * set_csg_suspend_timeout() - Update CSG suspend timeout setting on FW side + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface + */ +static void set_csg_suspend_timeout(struct kbase_device *const kbdev) +{ + u32 dur_ms, dur_val; + u32 no_modifier = 0; + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + dur_ms = kbdev->csf.csg_suspend_timeout_ms; + if (unlikely(dur_ms < CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MIN + + CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS || + dur_ms > CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MAX + + CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS)) { + dev_err(kbdev->dev, "Unexpected CSG suspend timeout: %ums, default to: %ums", + dur_ms, CSG_SUSPEND_TIMEOUT_MS); + kbdev->csf.csg_suspend_timeout_ms = CSG_SUSPEND_TIMEOUT_MS; + dur_ms = CSG_SUSPEND_TIMEOUT_MS; + } + dur_ms = dur_ms - CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS; + + dur_val = convert_dur_to_suspend_count(kbdev, dur_ms, &no_modifier); + + kbase_csf_firmware_global_input(global_iface, GLB_EVICTION_TIMER, dur_val); + + kbase_csf_firmware_global_input_mask(global_iface, GLB_EVICTION_TIMER_CONFIG, no_modifier, + GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_MASK); + + set_global_request(global_iface, GLB_REQ_CFG_EVICTION_TIMER_MASK); + + dev_dbg(kbdev->dev, "Updating CSG suspend timeout with count-value: 0x%.8x", dur_val); +} + static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; @@ -1751,7 +1962,8 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | - GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK; + GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | + GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_MASK | GLB_ACK_IRQ_MASK_ITER_TRACE_ENABLE_MASK; const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; unsigned long flags; @@ -1766,11 +1978,10 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); - /* The GPU idle timer is always enabled for simplicity. Checks will be - * done before scheduling the GPU idle worker to see if it is - * appropriate for the current power policy. + /* The csg suspend timeout is always enabled so customer has the flexibility to update it + * at any time. */ - enable_gpu_idle_timer(kbdev); + set_csg_suspend_timeout(kbdev); /* Unmask the interrupts */ kbase_csf_firmware_global_input(global_iface, GLB_ACK_IRQ_MASK, ack_irq_mask); @@ -1890,6 +2101,7 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work) { struct kbase_device *kbdev = container_of(work, struct kbase_device, csf.firmware_reload_work); + unsigned long flags; int err; dev_info(kbdev->dev, "reloading firmware"); @@ -1908,7 +2120,9 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work) return; /* Reboot the firmware */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_csf_firmware_enable_mcu(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev) @@ -1945,6 +2159,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_REBOOT, NULL, 0u); + /* Tell MCU state machine to transit to next state */ kbdev->csf.firmware_reloaded = true; kbase_pm_update_state(kbdev); @@ -2045,29 +2260,44 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, return kbdev->csf.gpu_idle_dur_count; } - /* The 'reg_lock' is also taken and is held till the update is not + /* The scheduler lock is also taken and is held till the update is not * complete, to ensure the update of idle timer value by multiple Users * gets serialized. */ - mutex_lock(&kbdev->csf.reg_lock); - /* The firmware only reads the new idle timer value when the timer is - * disabled. - */ + kbase_csf_scheduler_lock(kbdev); + while (atomic_read(&kbdev->csf.scheduler.pending_gpu_idle_work) > 0) { + kbase_csf_scheduler_unlock(kbdev); + kbase_csf_scheduler_wait_for_kthread_pending_work( + kbdev, &kbdev->csf.scheduler.pending_gpu_idle_work); + kbase_csf_scheduler_lock(kbdev); + } kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbase_csf_firmware_disable_gpu_idle_timer(kbdev); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - /* Ensure that the request has taken effect */ - wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); - kbase_csf_scheduler_spin_lock(kbdev, &flags); kbdev->csf.gpu_idle_hysteresis_ns = dur_ns; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbdev->csf.gpu_idle_dur_count_no_modifier = no_modifier; - kbase_csf_firmware_enable_gpu_idle_timer(kbdev); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); - mutex_unlock(&kbdev->csf.reg_lock); + if (atomic_read(&kbdev->csf.scheduler.gpu_idle_timer_enabled)) { + /* Timer is already enabled. Disable the timer as FW only reads + * the new idle timer value when timer is re-enabled. + */ + kbase_csf_firmware_disable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + /* Ensure that the request has taken effect */ + if (wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK)) + dev_err(kbdev->dev, + "Failed to disable GLB_IDLE timer when setting a new idle hysteresis timeout"); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_csf_firmware_enable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + if (wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK)) + dev_err(kbdev->dev, + "Failed to re-enable GLB_IDLE timer when setting a new idle hysteresis timeout"); + } else { + kbase_csf_scheduler_spin_unlock(kbdev, flags); + } + + kbase_csf_scheduler_unlock(kbdev); kbase_csf_scheduler_pm_idle(kbdev); kbase_reset_gpu_allow(kbdev); end: @@ -2168,78 +2398,6 @@ u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev) return kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS); } -/** - * kbase_csf_get_iterator_trace_enable - Parsing the iterator_trace enable firstly from - * the module parameter, and then from device-tree. - * @kbdev: Kernel base device pointer - * - * Return: true on enabled, otherwise false. - */ -static bool kbase_csf_get_iterator_trace_enable(struct kbase_device *kbdev) -{ - const void *dt_iter_trace_param; - unsigned int val; - - - /* check device tree for iterator trace enable property and - * fallback to "iter_trace_enable" if not found and try again - */ - dt_iter_trace_param = of_get_property(kbdev->dev->of_node, "iter-trace-enable", NULL); - - if (!dt_iter_trace_param) - dt_iter_trace_param = - of_get_property(kbdev->dev->of_node, "iter_trace_enable", NULL); - - val = (dt_iter_trace_param) ? be32_to_cpup(dt_iter_trace_param) : 0; - dev_dbg(kbdev->dev, "Iterator trace enable device-tree config value: %u", val); - - return (val != 0); -} - -/** - * kbase_device_csf_iterator_trace_init - Send request to enable iterator - * trace port. - * @kbdev: Kernel base device pointer - * - * Return: 0 on success (or if enable request is not sent), or error - * code -EINVAL on failure of GPU to acknowledge enable request. - */ -static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev) -{ - /* Enable the iterator trace port if supported by the GPU and is - * configured to do so. The FW must advertise this feature in GLB_FEATURES. - */ - if (kbdev->pm.backend.gpu_powered) { - const struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; - bool dev_support_iter_trace = iface->features & - GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK; - - dev_dbg(kbdev->dev, "Device supporting iterator trace: %s\n", - dev_support_iter_trace ? "true" : "false"); - if (dev_support_iter_trace && kbase_csf_get_iterator_trace_enable(kbdev)) { - long ack_timeout = kbase_csf_timeout_in_jiffies( - kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT)); - - /* write enable request to global input */ - kbase_csf_firmware_global_input_mask(iface, GLB_REQ, - GLB_REQ_ITER_TRACE_ENABLE_MASK, - GLB_REQ_ITER_TRACE_ENABLE_MASK); - /* Ring global doorbell */ - kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); - - ack_timeout = wait_event_timeout( - kbdev->csf.event_wait, - !((kbase_csf_firmware_global_input_read(iface, GLB_REQ) ^ - kbase_csf_firmware_global_output(iface, GLB_ACK)) & - GLB_REQ_ITER_TRACE_ENABLE_MASK), - ack_timeout); - - return ack_timeout ? 0 : -EINVAL; - } - } - return 0; -} - int kbase_csf_firmware_early_init(struct kbase_device *kbdev) { init_waitqueue_head(&kbdev->csf.event_wait); @@ -2253,10 +2411,9 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) INIT_WORK(&kbdev->csf.firmware_reload_work, kbase_csf_firmware_reload_worker); INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); - kbdev->csf.glb_init_request_pending = true; - + init_rwsem(&kbdev->csf.mmu_sync_sem); mutex_init(&kbdev->csf.reg_lock); - kbase_csf_pending_gpuq_kicks_init(kbdev); + kbase_csf_pending_gpuq_kick_queues_init(kbdev); kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL }; @@ -2265,7 +2422,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) void kbase_csf_firmware_early_term(struct kbase_device *kbdev) { - kbase_csf_pending_gpuq_kicks_term(kbdev); + kbase_csf_pending_gpuq_kick_queues_term(kbdev); mutex_destroy(&kbdev->csf.reg_lock); } @@ -2284,6 +2441,8 @@ int kbase_csf_firmware_late_init(struct kbase_device *kbdev) convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ns, &no_modifier); kbdev->csf.gpu_idle_dur_count_no_modifier = no_modifier; + kbdev->csf.csg_suspend_timeout_ms = CSG_SUSPEND_TIMEOUT_MS; + return 0; } @@ -2472,6 +2631,8 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) goto err_out; } + init_page_fault_cnt_firmware_memory(kbdev); + ret = kbase_csf_firmware_cfg_fw_wa_init(kbdev); if (ret != 0) { dev_err(kbdev->dev, "Failed to initialize firmware workarounds"); @@ -2492,6 +2653,8 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) if (ret != 0) goto err_out; + iterator_trace_init(kbdev); + ret = kbase_csf_doorbell_mapping_init(kbdev); if (ret != 0) goto err_out; @@ -2522,10 +2685,6 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) if (ret != 0) goto err_out; - ret = kbase_device_csf_iterator_trace_init(kbdev); - if (ret != 0) - goto err_out; - if (kbdev->csf.fw_core_dump.available) kbase_csf_firmware_core_dump_init(kbdev); @@ -2575,7 +2734,7 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) kbdev->csf.firmware_inited = false; if (WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF)) { kbdev->pm.backend.mcu_state = KBASE_MCU_OFF; - stop_csf_firmware(kbdev); + kbase_csf_stop_firmware_and_wait(kbdev); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -2731,7 +2890,7 @@ int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 c unsigned long remaining = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT)) + jiffies; - u32 read_val; + u32 read_val = 0; dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask); @@ -2778,12 +2937,10 @@ void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) { - struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; - kbase_csf_scheduler_spin_lock_assert_held(kbdev); - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE, - GLB_REQ_IDLE_DISABLE_MASK); + set_gpu_idle_timer_glb_req(kbdev, false); + atomic_set(&kbdev->csf.scheduler.fw_soi_enabled, false); dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); @@ -2807,6 +2964,7 @@ int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms); } + int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, u64 const timeout) { const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -2845,8 +3003,6 @@ int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) { int err; - lockdep_assert_held(&kbdev->mmu_hw_mutex); - err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); if (!err) { @@ -2912,6 +3068,7 @@ void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) { struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; + lockdep_assert_held(&kbdev->hwaccess_lock); /* Clear the HALT bit before triggering the boot of MCU firmware */ kbase_csf_firmware_global_input_mask(iface, GLB_REQ, 0, GLB_REQ_HALT_MASK); @@ -2935,11 +3092,23 @@ void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev) { + bool db_notif_disabled; + lockdep_assert_held(&kbdev->hwaccess_lock); - return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) && - kbase_csf_firmware_mcu_halted(kbdev)); + db_notif_disabled = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL)) & + MCU_CNTRL_DOORBELL_DISABLE_MASK; + + if (!db_notif_disabled || !kbase_csf_firmware_mcu_halted(kbdev)) + return false; + + if (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK)) + return true; + + kbase_pm_enable_mcu_db_notification(kbdev); + dev_dbg(kbdev->dev, "Enabled DB notification"); + return false; } #endif @@ -3191,6 +3360,9 @@ void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev, } if (csf_mapping->phys) { + /* This is on module unload path, so the pages can be left uncleared before + * returning them back to kbdev memory pool. + */ kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], csf_mapping->num_pages, csf_mapping->phys, false, false); } @@ -3198,3 +3370,127 @@ void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev, vunmap(csf_mapping->cpu_addr); kfree(csf_mapping->phys); } + +#ifdef KBASE_PM_RUNTIME + +void kbase_csf_firmware_soi_update(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + unsigned long flags; + + /* There are 3 possibilities: + * - Sleep-on-Idle allowed + * - Sleep-on-Idle not allowed, GLB_IDLE timer disabled + * - Sleep-on-Idle not allowed, GLB_IDLE timer enabled + */ + if (kbase_pm_fw_sleep_on_idle_allowed(kbdev)) { + if (likely(atomic_read(&kbdev->csf.scheduler.fw_soi_enabled))) + return; + } else { + if (test_bit(KBASE_GPU_NON_IDLE_OFF_SLOT_GROUPS_AVAILABLE, + &kbdev->pm.backend.gpu_sleep_allowed)) { + if (likely(!atomic_read(&kbdev->csf.scheduler.gpu_idle_timer_enabled))) + return; + } else if (likely(atomic_read(&kbdev->csf.scheduler.gpu_idle_timer_enabled))) { + return; + } + } + + if (kbase_reset_gpu_try_prevent(kbdev)) + return; + + kbase_csf_scheduler_lock(kbdev); + + if (atomic_read(&scheduler->pending_gpu_idle_work) > 0) + goto out_unlock_scheduler_lock; + + if ((scheduler->state == SCHED_SUSPENDED) || (scheduler->state == SCHED_SLEEPING)) + goto out_unlock_scheduler_lock; + + if (kbdev->pm.backend.mcu_state != KBASE_MCU_ON) + goto out_unlock_scheduler_lock; + + /* Ensure that an existing DISABLE request is completed before + * proceeding. They are made without waiting for them to complete such + * as when enabling the MCU. + */ + if (wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK)) { + dev_err(kbdev->dev, + "Existing GLB_IDLE timer config change failed to complete in time (gpu_sleep_allowed:%lx)", + kbdev->pm.backend.gpu_sleep_allowed); + goto out_unlock_scheduler_lock; + } + + /* Disable the GLB IDLE timer if it's currently enabled */ + if (atomic_read(&kbdev->csf.scheduler.gpu_idle_timer_enabled)) { + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_csf_firmware_disable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + if (wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK)) { + dev_err(kbdev->dev, + "Failed to disable GLB_IDLE timer following FW Sleep-on-Idle config change (gpu_sleep_allowed:%lx)", + kbdev->pm.backend.gpu_sleep_allowed); + goto out_unlock_scheduler_lock; + } + } + + /* The GLB IDLE timer and, consequently, FW Sleep-on-Idle could remain + * disabled in certain cases. Otherwise, we shall re-enable GLB IDLE + * timer with the new FW Sleep-on-Idle configuration. + */ + if (!test_bit(KBASE_GPU_NON_IDLE_OFF_SLOT_GROUPS_AVAILABLE, + &kbdev->pm.backend.gpu_sleep_allowed)) { + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_csf_firmware_enable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + if (wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK)) { + dev_err(kbdev->dev, + "Failed to re-enable GLB_IDLE timer following FW Sleep-on-Idle config change (gpu_sleep_allowed:%lx)", + kbdev->pm.backend.gpu_sleep_allowed); + goto out_unlock_scheduler_lock; + } + } + + if (atomic_read(&scheduler->fw_soi_enabled)) { + dev_dbg(kbdev->dev, "FW Sleep-on-Idle was enabled"); + KBASE_KTRACE_ADD(kbdev, FIRMWARE_SLEEP_ON_IDLE_CHANGED, NULL, true); + } else { + dev_dbg(kbdev->dev, "FW Sleep-on-Idle was disabled"); + KBASE_KTRACE_ADD(kbdev, FIRMWARE_SLEEP_ON_IDLE_CHANGED, NULL, false); + } + +out_unlock_scheduler_lock: + kbase_csf_scheduler_unlock(kbdev); + kbase_reset_gpu_allow(kbdev); +} + +int kbase_csf_firmware_soi_disable_on_scheduler_suspend(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + unsigned long flags; + + lockdep_assert_held(&scheduler->lock); + + if (WARN_ON_ONCE(scheduler->state != SCHED_INACTIVE)) + return 0; + + if (!atomic_read(&kbdev->csf.scheduler.fw_soi_enabled)) + return 0; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + if (atomic_read(&scheduler->fw_soi_enabled)) { + kbase_csf_firmware_disable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + if (wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK)) { + dev_err(kbdev->dev, "Failed to disable Sleep-on-Idle config"); + return -ETIMEDOUT; + } + KBASE_KTRACE_ADD(kbdev, FIRMWARE_SLEEP_ON_IDLE_CHANGED, NULL, false); + } else { + kbase_csf_scheduler_spin_unlock(kbdev, flags); + } + + return 0; +} + +#endif /* KBASE_PM_RUNTIME */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h index a2948a98e9a7..20cb03991bbe 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -591,13 +591,20 @@ void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev); void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev); /** - * kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled - * status. + * kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled status. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. */ void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev); +/** + * kbase_csf_stop_firmware_and_wait - Disable firmware and wait for the MCU to reach + * disabled status. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_stop_firmware_and_wait(struct kbase_device *kbdev); + #ifdef KBASE_PM_RUNTIME /** * kbase_csf_firmware_trigger_mcu_sleep - Send the command to put MCU in sleep @@ -618,6 +625,7 @@ void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev); bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev); #endif + /** * kbase_csf_firmware_trigger_reload() - Trigger the reboot of MCU firmware, for * the cold boot case firmware image would @@ -926,4 +934,27 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev); */ int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev); +#ifdef KBASE_PM_RUNTIME + +/** + * kbase_csf_firmware_soi_update - Update FW Sleep-on-Idle config + * + * @kbdev: Device pointer + * + * This function reconfigures the FW Sleep-on-Idle configuration if necessary. + */ +void kbase_csf_firmware_soi_update(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_soi_disable_on_scheduler_suspend - Disable FW Sleep-on-Idle config + * on scheduler suspension + * + * @kbdev: Device pointer + * + * Return: 0 on success, otherwise failure + */ +int kbase_csf_firmware_soi_disable_on_scheduler_suspend(struct kbase_device *kbdev); + +#endif /* KBASE_PM_RUNTIME */ + #endif diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c index d08686f5829b..030a1ebf0ac6 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c @@ -367,10 +367,10 @@ int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev) */ entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks-ext"); - if (entry_count == -EINVAL) + if (entry_count < 0) entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks_ext"); - if (entry_count == -EINVAL || entry_count == -ENODATA) + if (entry_count < 0) return 0; entry_bytes = (size_t)entry_count * sizeof(u32); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c index 90568f6fa09f..a206ed3da210 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -670,6 +670,23 @@ static void set_timeout_global(const struct kbase_csf_global_iface *const global set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); } +static inline void set_gpu_idle_timer_glb_req(struct kbase_device *const kbdev, bool set) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + if (set) { + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, + GLB_REQ_IDLE_ENABLE_MASK); + } else { + kbase_csf_firmware_global_input_mask( + global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE, GLB_REQ_IDLE_DISABLE_MASK); + } + + atomic_set(&kbdev->csf.scheduler.gpu_idle_timer_enabled, set); +} + static void enable_gpu_idle_timer(struct kbase_device *const kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; @@ -678,8 +695,11 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, kbdev->csf.gpu_idle_dur_count); - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, - GLB_REQ_IDLE_ENABLE_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_IDLE_TIMER_CONFIG, + kbdev->csf.gpu_idle_dur_count_no_modifier, + GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK); + + set_gpu_idle_timer_glb_req(kbdev, true); dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", kbdev->csf.gpu_idle_dur_count); } @@ -768,12 +788,6 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); - /* The GPU idle timer is always enabled for simplicity. Checks will be - * done before scheduling the GPU idle worker to see if it is - * appropriate for the current power policy. - */ - enable_gpu_idle_timer(kbdev); - /* Unmask the interrupts */ kbase_csf_firmware_global_input(global_iface, GLB_ACK_IRQ_MASK, ack_irq_mask); @@ -857,11 +871,11 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work) container_of(work, struct kbase_device, csf.firmware_reload_work); unsigned long flags; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* Reboot the firmware */ kbase_csf_firmware_enable_mcu(kbdev); /* Tell MCU state machine to transit to next state */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->csf.firmware_reloaded = true; kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -881,6 +895,7 @@ void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev) kbdev->csf.firmware_reloaded = true; } } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_trigger_reload); void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) { @@ -889,6 +904,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) if (unlikely(!kbdev->csf.firmware_inited)) return; + /* Tell MCU state machine to transit to next state */ kbdev->csf.firmware_reloaded = true; kbase_pm_update_state(kbdev); @@ -900,7 +916,7 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_n /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = kbase_arch_timer_get_cntfrq(kbdev); u64 dur_val = dur_ns; - u32 cnt_val_u32, reg_val_u32; + u32 cnt_val_u32, reg_val_u32, timer_src; bool src_system_timestamp = freq > 0; if (!src_system_timestamp) { @@ -932,9 +948,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_n reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); /* add the source flag */ - reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET( - reg_val_u32, (src_system_timestamp ? GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP : - GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER)); + timer_src = src_system_timestamp ? GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP : + GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER; + reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, timer_src); return reg_val_u32; } @@ -989,29 +1005,33 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, return kbdev->csf.gpu_idle_dur_count; } - /* The 'reg_lock' is also taken and is held till the update is not + /* The scheduler lock is also taken and is held till the update is not * complete, to ensure the update of idle timer value by multiple Users * gets serialized. */ - mutex_lock(&kbdev->csf.reg_lock); - /* The firmware only reads the new idle timer value when the timer is - * disabled. - */ - kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbase_csf_firmware_disable_gpu_idle_timer(kbdev); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - /* Ensure that the request has taken effect */ - wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); - + kbase_csf_scheduler_lock(kbdev); kbase_csf_scheduler_spin_lock(kbdev, &flags); kbdev->csf.gpu_idle_hysteresis_ns = dur_ns; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbdev->csf.gpu_idle_dur_count_no_modifier = no_modifier; - kbase_csf_firmware_enable_gpu_idle_timer(kbdev); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); - mutex_unlock(&kbdev->csf.reg_lock); + if (atomic_read(&kbdev->csf.scheduler.gpu_idle_timer_enabled)) { + /* Timer is already enabled. Disable the timer as FW only reads + * the new idle timer value when timer is re-enabled. + */ + kbase_csf_firmware_disable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + /* Ensure that the request has taken effect */ + wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_csf_firmware_enable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); + } else { + kbase_csf_scheduler_spin_unlock(kbdev, flags); + } + + kbase_csf_scheduler_unlock(kbdev); kbase_csf_scheduler_pm_idle(kbdev); kbase_reset_gpu_allow(kbdev); end: @@ -1118,15 +1138,16 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) INIT_WORK(&kbdev->csf.firmware_reload_work, kbase_csf_firmware_reload_worker); INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); + init_rwsem(&kbdev->csf.mmu_sync_sem); mutex_init(&kbdev->csf.reg_lock); - kbase_csf_pending_gpuq_kicks_init(kbdev); + kbase_csf_pending_gpuq_kick_queues_init(kbdev); return 0; } void kbase_csf_firmware_early_term(struct kbase_device *kbdev) { - kbase_csf_pending_gpuq_kicks_term(kbdev); + kbase_csf_pending_gpuq_kick_queues_term(kbdev); mutex_destroy(&kbdev->csf.reg_lock); } @@ -1185,6 +1206,7 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) /* NO_MALI: Don't load the MMU tables or boot CSF firmware */ + ret = invent_capabilities(kbdev); if (ret != 0) goto error; @@ -1278,13 +1300,9 @@ void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) { - struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; - kbase_csf_scheduler_spin_lock_assert_held(kbdev); - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE, - GLB_REQ_IDLE_DISABLE_MASK); - + set_gpu_idle_timer_glb_req(kbdev, false); dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); @@ -1308,6 +1326,7 @@ int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); } + int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, u64 const timeout) { const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -1370,6 +1389,8 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) { + lockdep_assert_held(&kbdev->hwaccess_lock); + /* Trigger the boot of MCU firmware, Use the AUTO mode as * otherwise on fast reset, to exit protected mode, MCU will * not reboot by itself to enter normal mode. @@ -1384,6 +1405,7 @@ void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_gpu_idle_timer_glb_req(kbdev, false); set_global_request(global_iface, GLB_REQ_SLEEP_MASK); dev_dbg(kbdev->dev, "Sending sleep request to MCU"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); @@ -1515,6 +1537,12 @@ void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL), MCU_CONTROL_REQ_DISABLE); } +void kbase_csf_stop_firmware_and_wait(struct kbase_device *kbdev) +{ + /* Stop the MCU firmware, no wait required on NO_MALI instance */ + kbase_csf_firmware_disable_mcu(kbdev); +} + void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) { /* NO_MALI: Nothing to do here */ @@ -1637,3 +1665,16 @@ void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev, vunmap(csf_mapping->cpu_addr); kfree(csf_mapping->phys); } + +#ifdef KBASE_PM_RUNTIME + +void kbase_csf_firmware_soi_update(struct kbase_device *kbdev) +{ +} + +int kbase_csf_firmware_soi_disable_on_scheduler_suspend(struct kbase_device *kbdev) +{ + return 0; +} + +#endif /* KBASE_PM_RUNTIME */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io.c new file mode 100644 index 000000000000..c65f837a9f72 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase.h" +#include "mali_kbase_csf_fw_io.h" +#include + +#include + +static inline u32 input_page_read(const u32 *const input, const u32 offset) +{ + WARN_ON(offset % sizeof(u32)); + + return input[offset / sizeof(u32)]; +} + +static inline void input_page_write(u32 *const input, const u32 offset, const u32 value) +{ + WARN_ON(offset % sizeof(u32)); + + input[offset / sizeof(u32)] = value; +} + +static inline void input_page_partial_write(u32 *const input, const u32 offset, u32 value, u32 mask) +{ + WARN_ON(offset % sizeof(u32)); + + input[offset / sizeof(u32)] = (input_page_read(input, offset) & ~mask) | (value & mask); +} + +static inline u32 output_page_read(const u32 *const output, const u32 offset) +{ + WARN_ON(offset % sizeof(u32)); + + return output[offset / sizeof(u32)]; +} + +void kbase_csf_fw_io_init(struct kbase_csf_fw_io *fw_io) +{ + spin_lock_init(&fw_io->lock); + bitmap_zero(fw_io->status, KBASE_FW_IO_STATUS_NUM_BITS); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_init); + +void kbase_csf_fw_io_term(struct kbase_csf_fw_io *fw_io) +{ + /* Nothing to do. */ +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_term); + +void kbase_csf_fw_io_global_write(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_global_iface *iface, u32 offset, u32 value) +{ + const struct kbase_device *const kbdev = iface->kbdev; + + lockdep_assert_held(&fw_io->lock); + + dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); + input_page_write(iface->input, offset, value); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_global_write); + +void kbase_csf_fw_io_global_write_mask(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_global_iface *iface, u32 offset, + u32 value, u32 mask) +{ + const struct kbase_device *const kbdev = iface->kbdev; + + lockdep_assert_held(&fw_io->lock); + + dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", offset, value, mask); + input_page_partial_write(iface->input, offset, value, mask); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_global_write_mask); + +u32 kbase_csf_fw_io_global_input_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_global_iface *iface, u32 offset) +{ + const struct kbase_device *const kbdev = iface->kbdev; + u32 val; + + lockdep_assert_held(&fw_io->lock); + + val = input_page_read(iface->input, offset); + dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val); + + return val; +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_global_input_read); + +u32 kbase_csf_fw_io_global_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_global_iface *iface, u32 offset) +{ + const struct kbase_device *const kbdev = iface->kbdev; + u32 val; + + lockdep_assert_held(&fw_io->lock); + + val = output_page_read(iface->output, offset); + dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); + + return val; +} + +void kbase_csf_fw_io_group_write(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_group_info *info, u32 offset, + u32 value) +{ + const struct kbase_device *const kbdev = info->kbdev; + + lockdep_assert_held(&fw_io->lock); + + dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", offset, value); + input_page_write(info->input, offset, value); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_group_write); + +void kbase_csf_fw_io_group_write_mask(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_group_info *info, + u32 offset, u32 value, u32 mask) +{ + const struct kbase_device *const kbdev = info->kbdev; + + lockdep_assert_held(&fw_io->lock); + + dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", offset, value, mask); + input_page_partial_write(info->input, offset, value, mask); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_group_write_mask); + +u32 kbase_csf_fw_io_group_input_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_group_info *info, u32 offset) +{ + const struct kbase_device *const kbdev = info->kbdev; + u32 val; + + lockdep_assert_held(&fw_io->lock); + + val = input_page_read(info->input, offset); + dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val); + + return val; +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_group_input_read); + +u32 kbase_csf_fw_io_group_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_group_info *info, u32 offset) +{ + const struct kbase_device *const kbdev = info->kbdev; + u32 val; + + lockdep_assert_held(&fw_io->lock); + + val = output_page_read(info->output, offset); + dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); + + return val; +} + +void kbase_csf_fw_io_stream_write(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_info *info, u32 offset, + u32 value) +{ + const struct kbase_device *const kbdev = info->kbdev; + + lockdep_assert_held(&fw_io->lock); + + dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value); + input_page_write(info->input, offset, value); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_stream_write); + +void kbase_csf_fw_io_stream_write_mask(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_info *info, u32 offset, + u32 value, u32 mask) +{ + const struct kbase_device *const kbdev = info->kbdev; + + lockdep_assert_held(&fw_io->lock); + + dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", offset, value, mask); + input_page_partial_write(info->input, offset, value, mask); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_stream_write_mask); + +u32 kbase_csf_fw_io_stream_input_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_info *info, u32 offset) +{ + const struct kbase_device *const kbdev = info->kbdev; + u32 val; + + lockdep_assert_held(&fw_io->lock); + + val = input_page_read(info->input, offset); + dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val); + + return val; +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_stream_input_read); + +u32 kbase_csf_fw_io_stream_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_info *info, u32 offset) +{ + const struct kbase_device *const kbdev = info->kbdev; + u32 val; + + lockdep_assert_held(&fw_io->lock); + + val = output_page_read(info->output, offset); + dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val); + + return val; +} + +void kbase_csf_fw_io_set_status(struct kbase_csf_fw_io *fw_io, + enum kbase_csf_fw_io_status_bits status_bit) +{ + set_bit(status_bit, fw_io->status); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_set_status); + +void kbase_csf_fw_io_clear_status(struct kbase_csf_fw_io *fw_io, + enum kbase_csf_fw_io_status_bits status_bit) +{ + clear_bit(status_bit, fw_io->status); +} + +bool kbase_csf_fw_io_test_status(struct kbase_csf_fw_io *fw_io, + enum kbase_csf_fw_io_status_bits status_bit) +{ + return test_bit(status_bit, fw_io->status); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_test_status); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io.h new file mode 100644 index 000000000000..a8eb1ab51fbc --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io.h @@ -0,0 +1,362 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_FW_IO_H_ +#define _KBASE_CSF_FW_IO_H_ + +#include +#include +#include +#include + +/** The wait completed because the GPU was lost. */ +#define KBASE_CSF_FW_IO_WAIT_GPU_LOST 1 + +/** The wait was aborted because of an unexpected event. */ +#define KBASE_CSF_FW_IO_WAIT_UNSUPPORTED 255 + +/** + * enum kbase_csf_fw_io_status_bits - Status bits for firmware I/O interface. + * + * @KBASE_FW_IO_STATUS_GPU_SUSPENDED: The GPU is suspended. + * @KBASE_FW_IO_STATUS_NUM_BITS: Number of bits used to encode the state. + */ +enum kbase_csf_fw_io_status_bits { + KBASE_FW_IO_STATUS_GPU_SUSPENDED = 0, + KBASE_FW_IO_STATUS_NUM_BITS, +}; + +/** + * struct kbase_csf_fw_io - Manager of firmware input/output interface. + * + * @lock: Mutex to serialize access to the interface. + * @status: Internal status of the MCU interface. + */ +struct kbase_csf_fw_io { + spinlock_t lock; + DECLARE_BITMAP(status, KBASE_FW_IO_STATUS_NUM_BITS); +}; + +struct kbase_csf_global_iface; +struct kbase_csf_cmd_stream_group_info; +struct kbase_csf_cmd_stream_info; + +/** + * kbase_csf_fw_io_init() - Initialize manager of firmware input/output interface. + * + * @fw_io: Firmware I/O interface to initialize. + */ +void kbase_csf_fw_io_init(struct kbase_csf_fw_io *fw_io); + +/** + * kbase_csf_fw_io_term() - Terminate manager of firmware input/output interface. + * + * @fw_io: Firmware I/O interface to terminate. + */ +void kbase_csf_fw_io_term(struct kbase_csf_fw_io *fw_io); + +/** + * kbase_csf_fw_io_open() - Start a transaction with the firmware input/output interface. + * + * @fw_io: Firmware I/O interface to open. + * + * Return: 0 on success, otherwise an error code reflecting the status of the + * interface. + */ +static inline int kbase_csf_fw_io_open(struct kbase_csf_fw_io *fw_io) +{ + if (test_bit(KBASE_FW_IO_STATUS_GPU_SUSPENDED, fw_io->status)) + return -KBASE_CSF_FW_IO_WAIT_GPU_LOST; + + spin_lock(&fw_io->lock); + + return 0; +} + +/** + * kbase_csf_fw_io_open_force() - Force a transaction with the firmware input/output interface. + * + * @fw_io: Firmware I/O interface to open. + * + * This function forces the start of a transaction regardless of the status + * of the interface. + */ +static inline void kbase_csf_fw_io_open_force(struct kbase_csf_fw_io *fw_io) +{ + spin_lock(&fw_io->lock); +} + +/** + * kbase_csf_fw_io_close() - End a transaction with the firmware input/output interface. + * + * @fw_io: Firmware I/O interface to close. + */ +static inline void kbase_csf_fw_io_close(struct kbase_csf_fw_io *fw_io) +{ + spin_unlock(&fw_io->lock); +} + +/** + * kbase_csf_fw_io_assert_opened() - Assert if a transaction with the firmware input/output + * interface has started. + * + * @fw_io: Firmware I/O interface. + */ +static inline void kbase_csf_fw_io_assert_opened(struct kbase_csf_fw_io *fw_io) +{ + lockdep_assert_held(&fw_io->lock); +} + +/** + * kbase_csf_fw_io_global_write() - Write a word in the global input page. + * + * @fw_io: Firmware I/O manager. + * @iface: CSF interface provided by the firmware. + * @offset: Offset of the word to write, in bytes. + * @value: Value to be written. + */ +void kbase_csf_fw_io_global_write(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_global_iface *iface, u32 offset, + u32 value); + +/** + * kbase_csf_fw_io_global_write_mask() - Write part of a word in the global input page. + * + * @fw_io: Firmware I/O manager. + * @iface: CSF interface provided by the firmware. + * @offset: Offset of the word to write, in bytes. + * @value: Value to be written. + * @mask: Bitmask with the bits to be modified set. + */ +void kbase_csf_fw_io_global_write_mask(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_global_iface *iface, u32 offset, + u32 value, u32 mask); + +/** + * kbase_csf_fw_io_global_input_read() - Read a word in the global input page. + * + * @fw_io: Firmware I/O manager. + * @iface: CSF interface provided by the firmware. + * @offset: Offset of the word to be read, in bytes. + * + * Return: Value of the word read from the global input page. + */ +u32 kbase_csf_fw_io_global_input_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_global_iface *iface, u32 offset); + +/** + * kbase_csf_fw_io_global_read() - Read a word in the global output page. + * + * @fw_io: Firmware I/O manager. + * @iface: CSF interface provided by the firmware. + * @offset: Offset of the word to be read, in bytes. + * + * Return: Value of the word read from the global output page. + */ +u32 kbase_csf_fw_io_global_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_global_iface *iface, u32 offset); + +/** + * kbase_csf_fw_io_group_write() - Write a word in a CSG's input page. + * + * @fw_io: Firmware I/O manager. + * @info: CSG interface provided by the firmware. + * @offset: Offset of the word to write, in bytes. + * @value: Value to be written. + */ +void kbase_csf_fw_io_group_write(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_group_info *info, u32 offset, + u32 value); + +/** + * kbase_csf_fw_io_group_write_mask() - Write part of a word in a CSG's input page. + * + * @fw_io: Firmware I/O manager. + * @info: CSG interface provided by the firmware. + * @offset: Offset of the word to write, in bytes. + * @value: Value to be written. + * @mask: Bitmask with the bits to be modified set. + */ +void kbase_csf_fw_io_group_write_mask(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_group_info *info, + u32 offset, u32 value, u32 mask); + +/** + * kbase_csf_fw_io_group_input_read() - Read a word in a CSG's input page. + * + * @fw_io: Firmware I/O manager. + * @info: CSG interface provided by the firmware. + * @offset: Offset of the word to be read, in bytes. + * + * Return: Value of the word read from a CSG's input page. + */ +u32 kbase_csf_fw_io_group_input_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_group_info *info, + u32 offset); + +/** + * kbase_csf_fw_io_group_read() - Read a word in a CSG's output page. + * + * @fw_io: Firmware I/O manager. + * @info: CSG interface provided by the firmware. + * @offset: Offset of the word to be read, in bytes. + * + * Return: Value of the word read from the CSG's output page. + */ +u32 kbase_csf_fw_io_group_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_group_info *info, u32 offset); + +/** + * kbase_csf_fw_io_stream_write() - Write a word in a CS's input page. + * + * @fw_io: Firmware I/O manager. + * @info: CSI interface provided by the firmware. + * @offset: Offset of the word to write, in bytes. + * @value: Value to be written. + */ +void kbase_csf_fw_io_stream_write(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_info *info, u32 offset, + u32 value); + +/** + * kbase_csf_fw_io_stream_write_mask() - Write part of a word in a CS's input page. + * + * @fw_io: Firmware I/O manager. + * @info: CSI interface provided by the firmware. + * @offset: Offset of the word to write, in bytes. + * @value: Value to be written. + * @mask: Bitmask with the bits to be modified set. + */ +void kbase_csf_fw_io_stream_write_mask(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_info *info, u32 offset, + u32 value, u32 mask); + +/** + * kbase_csf_fw_io_stream_input_read() - Read a word in a CS's input page. + * + * @fw_io: Firmware I/O manager. + * @info: CSI interface provided by the firmware. + * @offset: Offset of the word to be read, in bytes. + * + * Return: Value of the word read from a CS's input page. + */ +u32 kbase_csf_fw_io_stream_input_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_info *info, u32 offset); + +/** + * kbase_csf_fw_io_stream_read() - Read a word in a CS's output page. + * + * @fw_io: Firmware I/O manager. + * @info: CSI interface provided by the firmware. + * @offset: Offset of the word to be read, in bytes. + * + * Return: Value of the word read from the CS's output page. + */ +u32 kbase_csf_fw_io_stream_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_info *info, u32 offset); + +/** + * kbase_csf_fw_io_set_status() - Set a FW I/O status bit. + * + * @fw_io: Firmware I/O manager. + * @status_bit: Status bit to set. + */ +void kbase_csf_fw_io_set_status(struct kbase_csf_fw_io *fw_io, + enum kbase_csf_fw_io_status_bits status_bit); + +/** + * kbase_csf_fw_io_clear_status() - Clear a FW I/O status bit. + * + * @fw_io: Firmware I/O manager. + * @status_bit: Status bit to clear. + */ +void kbase_csf_fw_io_clear_status(struct kbase_csf_fw_io *fw_io, + enum kbase_csf_fw_io_status_bits status_bit); + +/** + * kbase_csf_fw_io_test_status() - Test a FW I/O status bit. + * + * @fw_io: Firmware I/O manager. + * @status_bit: Status bit to test. + * + * Return: Value of the tested status bit. + */ +bool kbase_csf_fw_io_test_status(struct kbase_csf_fw_io *fw_io, + enum kbase_csf_fw_io_status_bits status_bit); + +/** + * kbase_csf_fw_io_wait_event_timeout() - Wait until condition gets true, timeout + * occurs or a FW I/O status bit is set. The rest of the functionalities is equal + * to wait_event_timeout(). + * + * @fw_io: Firmware I/O manager. + * @wq_head: The waitqueue to wait on. + * @condition: C expression for the event to wait for + * @timeout: Timeout, in jiffies + * + * Return: Remaining jiffies (at least 1) on success, + * 0 on timeout, + * negative KBASE_CSF_FW_IO_WAIT_* error codes otherwise. + */ +#define kbase_csf_fw_io_wait_event_timeout(fw_io, wq_head, condition, timeout) \ + ({ \ + int __ret; \ + int __wait_remaining = wait_event_timeout( \ + wq_head, condition || kbasep_csf_fw_io_check_status(fw_io), timeout); \ + __ret = kbasep_csf_fw_io_handle_wait_result(fw_io, __wait_remaining); \ + __ret; \ + }) + +/** + * kbasep_csf_fw_io_check_status() - Private function to check if any FW I/O status bit is set. + * + * @fw_io: Firmware I/O manager. + * + * Return: True if any FW I/O status bit is set, false otherwise. + */ +static inline bool kbasep_csf_fw_io_check_status(struct kbase_csf_fw_io *fw_io) +{ + return !bitmap_empty(fw_io->status, KBASE_FW_IO_STATUS_NUM_BITS); +} + +/** + * kbasep_csf_fw_io_handle_wait_result() - Private function to handle the wait_event_timeout() + * result. + * + * @fw_io: Firmware I/O manager + * @wait_remaining: Remaining jiffies returned by wait_event_timeout() + * + * Return: Remaining jiffies (at least 1) on success, + * 0 on timeout, + * negative KBASE_CSF_FW_IO_WAIT_* error codes otherwise. + */ +static inline int kbasep_csf_fw_io_handle_wait_result(struct kbase_csf_fw_io *fw_io, + int wait_remaining) +{ + /* Check for any FW IO status bit set */ + if (!bitmap_empty(fw_io->status, KBASE_FW_IO_STATUS_NUM_BITS)) + return (test_bit(KBASE_FW_IO_STATUS_GPU_SUSPENDED, fw_io->status)) ? + -KBASE_CSF_FW_IO_WAIT_GPU_LOST : + -KBASE_CSF_FW_IO_WAIT_UNSUPPORTED; + + return wait_remaining; +} +#endif diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io_no_mali.c new file mode 100644 index 000000000000..0cffc8475654 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io_no_mali.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase.h" +#include "mali_kbase_csf_fw_io.h" +#include + +#include + +static inline u32 input_page_read(const u32 *const input, const u32 offset) +{ + WARN_ON(offset % sizeof(u32)); + + return input[offset / sizeof(u32)]; +} + +static inline void input_page_write(u32 *const input, const u32 offset, const u32 value) +{ + WARN_ON(offset % sizeof(u32)); + + input[offset / sizeof(u32)] = value; +} + +static inline void input_page_partial_write(u32 *const input, const u32 offset, u32 value, u32 mask) +{ + WARN_ON(offset % sizeof(u32)); + + input[offset / sizeof(u32)] = (input_page_read(input, offset) & ~mask) | (value & mask); +} + +static inline u32 output_page_read(const u32 *const output, const u32 offset) +{ + WARN_ON(offset % sizeof(u32)); + + return output[offset / sizeof(u32)]; +} + +static inline void output_page_write(u32 *const output, const u32 offset, const u32 value) +{ + WARN_ON(offset % sizeof(u32)); + + output[offset / sizeof(u32)] = value; +} + +void kbase_csf_fw_io_init(struct kbase_csf_fw_io *fw_io) +{ + spin_lock_init(&fw_io->lock); + bitmap_zero(fw_io->status, KBASE_FW_IO_STATUS_NUM_BITS); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_init); + +void kbase_csf_fw_io_term(struct kbase_csf_fw_io *fw_io) +{ + /* Nothing to do. */ +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_term); + +void kbase_csf_fw_io_global_write(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_global_iface *iface, u32 offset, u32 value) +{ + const struct kbase_device *const kbdev = iface->kbdev; + + lockdep_assert_held(&fw_io->lock); + + dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); + input_page_write(iface->input, offset, value); + + if (offset == GLB_REQ) { + /* NO_MALI: Immediately acknowledge requests - except for PRFCNT_ENABLE + * and PRFCNT_SAMPLE. These will be processed along with the + * corresponding performance counter registers when the global doorbell + * is rung in order to emulate the performance counter sampling behavior + * of the real firmware. + */ + const u32 ack = output_page_read(iface->output, GLB_ACK); + const u32 req_mask = ~(GLB_REQ_PRFCNT_ENABLE_MASK | GLB_REQ_PRFCNT_SAMPLE_MASK); + const u32 toggled = (value ^ ack) & req_mask; + + output_page_write(iface->output, GLB_ACK, ack ^ toggled); + } +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_global_write); + +void kbase_csf_fw_io_global_write_mask(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_global_iface *iface, u32 offset, + u32 value, u32 mask) +{ + const struct kbase_device *const kbdev = iface->kbdev; + + lockdep_assert_held(&fw_io->lock); + + dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", offset, value, mask); + + /* NO_MALI: Go through existing function to capture writes */ + kbase_csf_fw_io_global_write(fw_io, iface, offset, + (input_page_read(iface->input, offset) & ~mask) | + (value & mask)); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_global_write_mask); + +u32 kbase_csf_fw_io_global_input_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_global_iface *iface, u32 offset) +{ + const struct kbase_device *const kbdev = iface->kbdev; + u32 val; + + lockdep_assert_held(&fw_io->lock); + + val = input_page_read(iface->input, offset); + dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val); + + return val; +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_global_input_read); + +u32 kbase_csf_fw_io_global_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_global_iface *iface, u32 offset) +{ + const struct kbase_device *const kbdev = iface->kbdev; + u32 val; + + lockdep_assert_held(&fw_io->lock); + + val = output_page_read(iface->output, offset); + dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); + + return val; +} + +void kbase_csf_fw_io_group_write(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_group_info *info, u32 offset, + u32 value) +{ + const struct kbase_device *const kbdev = info->kbdev; + + lockdep_assert_held(&fw_io->lock); + + dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", offset, value); + input_page_write(info->input, offset, value); + + if (offset == CSG_REQ) { + /* NO_MALI: Immediately acknowledge requests */ + output_page_write(info->output, CSG_ACK, value); + } +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_group_write); + +void kbase_csf_fw_io_group_write_mask(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_group_info *info, + u32 offset, u32 value, u32 mask) +{ + const struct kbase_device *const kbdev = info->kbdev; + + lockdep_assert_held(&fw_io->lock); + + dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", offset, value, mask); + + /* NO_MALI: Go through existing function to capture writes */ + kbase_csf_fw_io_group_write(fw_io, info, offset, + (input_page_read(info->input, offset) & ~mask) | + (value & mask)); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_group_write_mask); + +u32 kbase_csf_fw_io_group_input_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_group_info *info, u32 offset) +{ + const struct kbase_device *const kbdev = info->kbdev; + u32 val; + + lockdep_assert_held(&fw_io->lock); + + val = input_page_read(info->input, offset); + dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val); + + return val; +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_group_input_read); + +u32 kbase_csf_fw_io_group_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_group_info *info, u32 offset) +{ + const struct kbase_device *const kbdev = info->kbdev; + u32 val; + + lockdep_assert_held(&fw_io->lock); + + val = output_page_read(info->output, offset); + dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); + + return val; +} + +void kbase_csf_fw_io_stream_write(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_info *info, u32 offset, + u32 value) +{ + const struct kbase_device *const kbdev = info->kbdev; + + lockdep_assert_held(&fw_io->lock); + + dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value); + input_page_write(info->input, offset, value); + + if (offset == CS_REQ) { + /* NO_MALI: Immediately acknowledge requests */ + output_page_write(info->output, CS_ACK, value); + } +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_stream_write); + +void kbase_csf_fw_io_stream_write_mask(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_info *info, u32 offset, + u32 value, u32 mask) +{ + const struct kbase_device *const kbdev = info->kbdev; + + lockdep_assert_held(&fw_io->lock); + + dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", offset, value, mask); + + /* NO_MALI: Go through existing function to capture writes */ + kbase_csf_fw_io_stream_write(fw_io, info, offset, + (input_page_read(info->input, offset) & ~mask) | + (value & mask)); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_stream_write_mask); + +u32 kbase_csf_fw_io_stream_input_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_info *info, u32 offset) +{ + const struct kbase_device *const kbdev = info->kbdev; + u32 val; + + lockdep_assert_held(&fw_io->lock); + + val = input_page_read(info->input, offset); + dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val); + + return val; +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_stream_input_read); + +u32 kbase_csf_fw_io_stream_read(struct kbase_csf_fw_io *fw_io, + const struct kbase_csf_cmd_stream_info *info, u32 offset) +{ + const struct kbase_device *const kbdev = info->kbdev; + u32 val; + + lockdep_assert_held(&fw_io->lock); + + val = output_page_read(info->output, offset); + dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val); + + return val; +} + +void kbase_csf_fw_io_set_status(struct kbase_csf_fw_io *fw_io, + enum kbase_csf_fw_io_status_bits status_bit) +{ + set_bit(status_bit, fw_io->status); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_set_status); + +void kbase_csf_fw_io_clear_status(struct kbase_csf_fw_io *fw_io, + enum kbase_csf_fw_io_status_bits status_bit) +{ + clear_bit(status_bit, fw_io->status); +} + +bool kbase_csf_fw_io_test_status(struct kbase_csf_fw_io *fw_io, + enum kbase_csf_fw_io_status_bits status_bit) +{ + return test_bit(status_bit, fw_io->status); +} +KBASE_EXPORT_TEST_API(kbase_csf_fw_io_test_status); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c index 12a79b4852fb..fb181026719f 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -180,8 +180,9 @@ void kbase_csf_heap_context_allocator_term(struct kbase_csf_heap_context_allocat u64 kbase_csf_heap_context_allocator_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) { struct kbase_context *const kctx = ctx_alloc->kctx; - u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | - BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD; + base_mem_alloc_flags flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | + BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE | + BASE_MEM_PROT_CPU_RD; u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned); u64 heap_gpu_va = 0; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c index 76e42e847fc3..09c92f0bed4e 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,13 +39,7 @@ static DEFINE_SPINLOCK(kbase_csf_fence_lock); #endif -#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG #define FENCE_WAIT_TIMEOUT_MS 3000 -#endif - -static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, bool drain_queue); - -static void kcpu_queue_process_worker(struct work_struct *data); static int kbase_kcpu_map_import_prepare(struct kbase_kcpu_command_queue *kcpu_queue, struct base_kcpu_command_import_info *import_info, @@ -377,7 +371,7 @@ static int kbase_kcpu_jit_allocate_prepare(struct kbase_kcpu_command_queue *kcpu goto out; } - if (copy_from_user(info, data, sizeof(*info) * count) != 0) { + if (copy_from_user(info, data, size_mul(sizeof(*info), count)) != 0) { ret = -EINVAL; goto out_free; } @@ -445,6 +439,16 @@ static void kbase_kcpu_jit_allocate_finish(struct kbase_kcpu_command_queue *queu kfree(cmd->info.jit_alloc.info); } +static void enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue) +{ + struct kbase_context *const kctx = queue->kctx; + + if (!atomic_read(&kctx->prioritized)) + queue_work(kctx->csf.kcpu_queues.kcpu_wq, &queue->work); + else + kbase_csf_scheduler_enqueue_kcpuq_work(queue); +} + /** * kbase_kcpu_jit_retry_pending_allocs() - Retry blocked JIT_ALLOC commands * @@ -464,9 +468,7 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx) * kbase_csf_kcpu_queue_context.jit_lock . */ list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &blocked_queue->work); + enqueue_kcpuq_work(blocked_queue); } static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, @@ -561,7 +563,7 @@ static int kbase_kcpu_jit_free_prepare(struct kbase_kcpu_command_queue *kcpu_que goto out_free; } - if (copy_from_user(ids, data, sizeof(*ids) * count)) { + if (copy_from_user(ids, data, size_mul(sizeof(*ids), count))) { ret = -EINVAL; goto out_free; } @@ -717,11 +719,8 @@ static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx, static enum kbase_csf_event_callback_action event_cqs_callback(void *param) { struct kbase_kcpu_command_queue *kcpu_queue = (struct kbase_kcpu_command_queue *)param; - struct kbase_context *kctx = kcpu_queue->kctx; - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &kcpu_queue->work); + enqueue_kcpuq_work(kcpu_queue); return KBASE_CSF_EVENT_CALLBACK_KEEP; } @@ -853,7 +852,8 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, if (!objs) return -ENOMEM; - if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_info->objs), nr_objs * sizeof(*objs))) { + if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_info->objs), + size_mul(nr_objs, sizeof(*objs)))) { kfree(objs); return -ENOMEM; } @@ -958,7 +958,8 @@ static int kbase_kcpu_cqs_set_prepare(struct kbase_kcpu_command_queue *kcpu_queu if (!objs) return -ENOMEM; - if (copy_from_user(objs, u64_to_user_ptr(cqs_set_info->objs), nr_objs * sizeof(*objs))) { + if (copy_from_user(objs, u64_to_user_ptr(cqs_set_info->objs), + size_mul(nr_objs, sizeof(*objs)))) { kfree(objs); return -ENOMEM; } @@ -1116,7 +1117,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare( return -ENOMEM; if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_operation_info->objs), - nr_objs * sizeof(*objs))) { + size_mul(nr_objs, sizeof(*objs)))) { kfree(objs); return -ENOMEM; } @@ -1281,7 +1282,7 @@ static int kbase_kcpu_cqs_set_operation_prepare( return -ENOMEM; if (copy_from_user(objs, u64_to_user_ptr(cqs_set_operation_info->objs), - nr_objs * sizeof(*objs))) { + size_mul(nr_objs, sizeof(*objs)))) { kfree(objs); return -ENOMEM; } @@ -1322,9 +1323,7 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence, struct dma_fe fence->seqno); /* Resume kcpu command queue processing. */ - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &kcpu_queue->work); + enqueue_kcpuq_work(kcpu_queue); } static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue, @@ -1360,7 +1359,6 @@ static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_ fence_info->fence = NULL; } -#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG /** * fence_timeout_callback() - Timeout callback function for fence-wait * @@ -1399,9 +1397,7 @@ static void fence_timeout_callback(struct timer_list *timer) kbase_sync_fence_info_get(fence, &info); if (info.status == 1) { - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &kcpu_queue->work); + enqueue_kcpuq_work(kcpu_queue); } else if (info.status == 0) { dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums", FENCE_WAIT_TIMEOUT_MS); @@ -1430,7 +1426,6 @@ static void fence_wait_timeout_start(struct kbase_kcpu_command_queue *cmd) { mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS)); } -#endif /** * kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command @@ -1469,9 +1464,8 @@ static int kbase_kcpu_fence_wait_process(struct kbase_kcpu_command_queue *kcpu_q fence_status = cb_err; if (cb_err == 0) { kcpu_queue->fence_wait_processed = true; -#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG - fence_wait_timeout_start(kcpu_queue); -#endif + if (IS_ENABLED(CONFIG_MALI_BIFROST_FENCE_DEBUG)) + fence_wait_timeout_start(kcpu_queue); } else if (cb_err == -ENOENT) { fence_status = dma_fence_get_status(fence); if (!fence_status) { @@ -1692,9 +1686,7 @@ static void fence_signal_timeout_cb(struct timer_list *timer) if (atomic_read(&kcpu_queue->fence_signal_pending_cnt) > 1) fence_signal_timeout_start(kcpu_queue); - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &kcpu_queue->timeout_work); + queue_work(kctx->csf.kcpu_queues.kcpu_wq, &kcpu_queue->timeout_work); } } @@ -1973,7 +1965,7 @@ static void kcpu_queue_process_worker(struct work_struct *data) container_of(data, struct kbase_kcpu_command_queue, work); mutex_lock(&queue->lock); - kcpu_queue_process(queue, false); + kbase_csf_kcpu_queue_process(queue, false); mutex_unlock(&queue->lock); } @@ -2006,7 +1998,7 @@ static int delete_queue(struct kbase_context *kctx, u32 id) /* Drain the remaining work for this queue first and go past * all the waits. */ - kcpu_queue_process(queue, true); + kbase_csf_kcpu_queue_process(queue, true); /* All commands should have been processed */ WARN_ON(queue->num_pending_cmds); @@ -2022,11 +2014,20 @@ static int delete_queue(struct kbase_context *kctx, u32 id) mutex_unlock(&queue->lock); cancel_work_sync(&queue->timeout_work); + + /* + * Drain a pending request to process this queue in + * kbase_csf_scheduler_kthread() if any. By this point the + * queue would be empty so this would be a no-op. + */ + kbase_csf_scheduler_wait_for_kthread_pending_work(kctx->kbdev, + &queue->pending_kick); + cancel_work_sync(&queue->work); mutex_destroy(&queue->lock); - kfree(queue); + vfree(queue); } else { dev_dbg(kctx->kbdev->dev, "Attempt to delete a non-existent KCPU queue"); mutex_unlock(&kctx->csf.kcpu_queues.lock); @@ -2079,7 +2080,7 @@ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(struct kbase_device *kbde KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue); } -static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue) +void kbase_csf_kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue) { struct kbase_device *kbdev = queue->kctx->kbdev; bool process_next = true; @@ -2199,10 +2200,10 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drai KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(kbdev, queue); - kbase_gpu_vm_lock(queue->kctx); + kbase_gpu_vm_lock_with_pmode_sync(queue->kctx); meta = kbase_sticky_resource_acquire(queue->kctx, - cmd->info.import.gpu_va); - kbase_gpu_vm_unlock(queue->kctx); + cmd->info.import.gpu_va, NULL); + kbase_gpu_vm_unlock_with_pmode_sync(queue->kctx); if (meta == NULL) { queue->has_error = true; @@ -2219,10 +2220,10 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drai KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue); - kbase_gpu_vm_lock(queue->kctx); + kbase_gpu_vm_lock_with_pmode_sync(queue->kctx); ret = kbase_sticky_resource_release(queue->kctx, NULL, cmd->info.import.gpu_va); - kbase_gpu_vm_unlock(queue->kctx); + kbase_gpu_vm_unlock_with_pmode_sync(queue->kctx); if (!ret) { queue->has_error = true; @@ -2240,10 +2241,10 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drai KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(kbdev, queue); - kbase_gpu_vm_lock(queue->kctx); + kbase_gpu_vm_lock_with_pmode_sync(queue->kctx); ret = kbase_sticky_resource_release_force(queue->kctx, NULL, cmd->info.import.gpu_va); - kbase_gpu_vm_unlock(queue->kctx); + kbase_gpu_vm_unlock_with_pmode_sync(queue->kctx); if (!ret) { queue->has_error = true; @@ -2642,7 +2643,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, } queue->num_pending_cmds += enq->nr_commands; - kcpu_queue_process(queue, false); + kbase_csf_kcpu_queue_process(queue, false); } out: @@ -2653,23 +2654,14 @@ out: int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx) { - kctx->csf.kcpu_wq_high_prio = alloc_workqueue("mali_kcpu_wq_%i_high_prio", - WQ_UNBOUND | WQ_HIGHPRI, 0, kctx->tgid); - if (kctx->csf.kcpu_wq_high_prio == NULL) { + kctx->csf.kcpu_queues.kcpu_wq = + alloc_workqueue("mali_kcpu_wq_%i_%i", 0, 0, kctx->tgid, kctx->id); + if (kctx->csf.kcpu_queues.kcpu_wq == NULL) { dev_err(kctx->kbdev->dev, "Failed to initialize KCPU queue high-priority workqueue"); return -ENOMEM; } - kctx->csf.kcpu_wq_normal_prio = - alloc_workqueue("mali_kcpu_wq_%i_normal_prio", 0, 0, kctx->tgid); - if (kctx->csf.kcpu_wq_normal_prio == NULL) { - dev_err(kctx->kbdev->dev, - "Failed to initialize KCPU queue normal-priority workqueue"); - destroy_workqueue(kctx->csf.kcpu_wq_high_prio); - return -ENOMEM; - } - mutex_init(&kctx->csf.kcpu_queues.lock); return 0; @@ -2688,8 +2680,7 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) mutex_destroy(&kctx->csf.kcpu_queues.lock); - destroy_workqueue(kctx->csf.kcpu_wq_normal_prio); - destroy_workqueue(kctx->csf.kcpu_wq_high_prio); + destroy_workqueue(kctx->csf.kcpu_queues.kcpu_wq); } KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term); @@ -2699,15 +2690,42 @@ int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, return delete_queue(kctx, (u32)del->id); } +static struct kbase_kcpu_dma_fence_meta * +kbase_csf_kcpu_queue_metadata_new(struct kbase_context *kctx, u64 fence_context) +{ + int n; + struct kbase_kcpu_dma_fence_meta *metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); + + if (!metadata) + goto early_ret; + + *metadata = (struct kbase_kcpu_dma_fence_meta){ + .kbdev = kctx->kbdev, + .kctx_id = kctx->id, + }; + + /* Please update MAX_TIMELINE_NAME macro when making changes to the string. */ + n = scnprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%u-%d_%u-%llu-kcpu", + kctx->kbdev->id, kctx->tgid, kctx->id, fence_context); + if (WARN_ON(n >= MAX_TIMELINE_NAME)) { + kfree(metadata); + metadata = NULL; + goto early_ret; + } + + kbase_refcount_set(&metadata->refcount, 1); + +early_ret: + return metadata; +} +KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_csf_kcpu_queue_metadata_new, ERRNO_NULL); + int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_new *newq) { struct kbase_kcpu_command_queue *queue; - int idx; - int n; - int ret = 0; -#if IS_ENABLED(CONFIG_SYNC_FILE) struct kbase_kcpu_dma_fence_meta *metadata; -#endif + int idx; + int ret = 0; /* The queue id is of u8 type and we use the index of the kcpu_queues * array as an id, so the number of elements in the array can't be * more than 256. @@ -2727,54 +2745,48 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu goto out; } - queue = kzalloc(sizeof(*queue), GFP_KERNEL); - + queue = vzalloc(sizeof(*queue)); if (!queue) { ret = -ENOMEM; goto out; } + *queue = (struct kbase_kcpu_command_queue) + { + .kctx = kctx, .start_offset = 0, .num_pending_cmds = 0, .enqueue_failed = false, + .command_started = false, .has_error = false, .id = idx, +#if IS_ENABLED(CONFIG_SYNC_FILE) + .fence_context = dma_fence_context_alloc(1), .fence_seqno = 0, + .fence_wait_processed = false, +#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */ + }; + + mutex_init(&queue->lock); + INIT_WORK(&queue->work, kcpu_queue_process_worker); + INIT_LIST_HEAD(&queue->high_prio_work); + atomic_set(&queue->pending_kick, 0); + INIT_WORK(&queue->timeout_work, kcpu_queue_timeout_worker); + INIT_LIST_HEAD(&queue->jit_blocked); + + if (IS_ENABLED(CONFIG_SYNC_FILE)) { + metadata = kbase_csf_kcpu_queue_metadata_new(kctx, queue->fence_context); + if (!metadata) { + vfree(queue); + ret = -ENOMEM; + goto out; + } + + queue->metadata = metadata; + atomic_inc(&kctx->kbdev->live_fence_metadata); + atomic_set(&queue->fence_signal_pending_cnt, 0); + kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb); + } + + if (IS_ENABLED(CONFIG_MALI_BIFROST_FENCE_DEBUG)) + kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback); + bitmap_set(kctx->csf.kcpu_queues.in_use, (unsigned int)idx, 1); kctx->csf.kcpu_queues.array[idx] = queue; - mutex_init(&queue->lock); - queue->kctx = kctx; - queue->start_offset = 0; - queue->num_pending_cmds = 0; -#if IS_ENABLED(CONFIG_SYNC_FILE) - queue->fence_context = dma_fence_context_alloc(1); - queue->fence_seqno = 0; - queue->fence_wait_processed = false; - - metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); - if (!metadata) { - kfree(queue); - ret = -ENOMEM; - goto out; - } - - metadata->kbdev = kctx->kbdev; - metadata->kctx_id = kctx->id; - n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%u-%d_%u-%llu-kcpu", - kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context); - if (WARN_ON(n >= MAX_TIMELINE_NAME)) { - kfree(queue); - kfree(metadata); - ret = -EINVAL; - goto out; - } - - kbase_refcount_set(&metadata->refcount, 1); - queue->metadata = metadata; - atomic_inc(&kctx->kbdev->live_fence_metadata); -#endif /* CONFIG_SYNC_FILE */ - queue->enqueue_failed = false; - queue->command_started = false; - INIT_LIST_HEAD(&queue->jit_blocked); - queue->has_error = false; - INIT_WORK(&queue->work, kcpu_queue_process_worker); - INIT_WORK(&queue->timeout_work, kcpu_queue_timeout_worker); - queue->id = idx; - newq->id = idx; /* Fire the tracepoint with the mutex held to enforce correct ordering @@ -2784,14 +2796,6 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu queue->num_pending_cmds); KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue, queue->fence_context, 0); -#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG - kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback); -#endif - -#if IS_ENABLED(CONFIG_SYNC_FILE) - atomic_set(&queue->fence_signal_pending_cnt, 0); - kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb); -#endif out: mutex_unlock(&kctx->csf.kcpu_queues.lock); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h index d1f18ed5caca..291509bef5a6 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h @@ -243,7 +243,19 @@ struct kbase_kcpu_command { * @work: struct work_struct which contains a pointer to * the function which handles processing of kcpu * commands enqueued into a kcpu command queue; - * part of kernel API for processing workqueues + * part of kernel API for processing workqueues. + * This would be used if the context is not + * prioritised, otherwise it would be handled by + * kbase_csf_scheduler_kthread(). + * @high_prio_work: A counterpart to @work, this queue would be + * added to a list to be processed by + * kbase_csf_scheduler_kthread() if it is + * prioritised. + * @pending_kick: Indicates that kbase_csf_scheduler_kthread() + * should re-evaluate pending commands for this + * queue. This would be set to false when the work + * is done. This is used mainly for + * synchronisation with queue termination. * @timeout_work: struct work_struct which contains a pointer to the * function which handles post-timeout actions * queue when a fence signal timeout occurs. @@ -287,6 +299,8 @@ struct kbase_kcpu_command_queue { struct kbase_context *kctx; struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE]; struct work_struct work; + struct list_head high_prio_work; + atomic_t pending_kick; struct work_struct timeout_work; u8 start_offset; u8 id; @@ -299,9 +313,7 @@ struct kbase_kcpu_command_queue { bool command_started; struct list_head jit_blocked; bool has_error; -#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG struct timer_list fence_timeout; -#endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */ #if IS_ENABLED(CONFIG_SYNC_FILE) struct kbase_kcpu_dma_fence_meta *metadata; #endif /* CONFIG_SYNC_FILE */ @@ -334,6 +346,18 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_delete *del); +/** + * kbase_csf_kcpu_queue_process - Proces pending KCPU queue commands + * + * @queue: The queue to process pending commands for + * @drain_queue: Whether to skip all blocking commands in the queue. + * This is expected to be set to true on queue + * termination. + * + * Return: 0 if successful or a negative error code on failure. + */ +void kbase_csf_kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue); + /** * kbase_csf_kcpu_queue_enqueue - Enqueue a KCPU command into a KCPU command * queue. diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h index d01f3070cf5b..7e96a9d01fc7 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -172,6 +172,11 @@ #define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */ #define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */ #define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */ +/* TODO: GPUCORE-xxxx: Remove after spec alignment, use 0x1C as CSG_RESOURCE_DEP is deprecated*/ +/* CSG_OUTPUT_BLOCK register offsets */ +#ifndef CSG_PROGRESS_TIMER_STATE +#define CSG_PROGRESS_TIMER_STATE 0x001C /* () Current resource status */ +#endif /* GLB_CONTROL_BLOCK register offsets */ #define GLB_VERSION 0x0000 /* () Global interface version */ @@ -250,7 +255,7 @@ #define GLB_ACK 0x0000 /* () Global acknowledge */ #define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */ -#define GLB_HALT_STATUS 0x0010 /* () Global halt status */ +#define GLB_FATAL_STATUS 0x0010 /* () Global fatal error status */ #define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */ #define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */ #define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */ @@ -1243,6 +1248,21 @@ CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) +/* CSG_PROGRESS_TIMER_STATE register */ +#ifndef CSG_PROGRESS_TIMER_STATE_GET +#define CSG_PROGRESS_TIMER_STATE_SHIFT 0 +#define CSG_PROGRESS_TIMER_STATE_MASK ((u32)0xFFFFFFFF << CSG_PROGRESS_TIMER_STATE_SHIFT) +#define CSG_PROGRESS_TIMER_STATE_GET(reg_val) \ + (((reg_val)&CSG_PROGRESS_TIMER_STATE_MASK) >> CSG_PROGRESS_TIMER_STATE_SHIFT) +#define CSG_PROGRESS_TIMER_STATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_PROGRESS_TIMER_STATE_MASK) | \ + (((value) << CSG_PROGRESS_TIMER_STATE_SHIFT) & CSG_PROGRESS_TIMER_STATE_MASK)) +/* CSG_PROGRESS_TIMER_STATE values */ +#define CSG_PROGRESS_TIMER_STATE_COMPUTE 0x0 +#define CSG_PROGRESS_TIMER_STATE_FRAGMENT 0x1 +#define CSG_PROGRESS_TIMER_STATE_TILER 0x2 +#define CSG_PROGRESS_TIMER_STATE_NEURAL 0x3 +#endif /* End of CSG_OUTPUT_BLOCK register set definitions */ /* STREAM_CONTROL_BLOCK register set definitions */ @@ -1380,6 +1400,13 @@ #define GLB_REQ_SLEEP_SET(reg_val, value) \ (((reg_val) & ~GLB_REQ_SLEEP_MASK) | \ (((value) << GLB_REQ_SLEEP_SHIFT) & GLB_REQ_SLEEP_MASK)) +#define GLB_REQ_CFG_EVICTION_TIMER_SHIFT 16 +#define GLB_REQ_CFG_EVICTION_TIMER_MASK (0x1 << GLB_REQ_CFG_EVICTION_TIMER_SHIFT) +#define GLB_REQ_CFG_EVICTION_TIMER_GET(reg_val) \ + (((reg_val)&GLB_REQ_CFG_EVICTION_TIMER_MASK) >> GLB_REQ_CFG_EVICTION_TIMER_SHIFT) +#define GLB_REQ_CFG_EVICTION_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_CFG_EVICTION_TIMER_MASK) | \ + (((value) << GLB_REQ_CFG_EVICTION_TIMER_SHIFT) & GLB_REQ_CFG_EVICTION_TIMER_MASK)) #define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20 #define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT) #define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \ @@ -1422,6 +1449,12 @@ #define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \ (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \ (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & GLB_REQ_PRFCNT_OVERFLOW_MASK)) +#define GLB_ACK_FATAL_SHIFT GPU_U(27) +#define GLB_ACK_FATAL_MASK (GPU_U(0x1) << GLB_ACK_FATAL_SHIFT) +#define GLB_ACK_FATAL_GET(reg_val) (((reg_val)&GLB_ACK_FATAL_MASK) >> GLB_ACK_FATAL_SHIFT) +#define GLB_ACK_FATAL_SET(reg_val, value) \ + (~(~(reg_val) | GLB_ACK_FATAL_MASK) | \ + (((value) << GLB_ACK_FATAL_SHIFT) & GLB_ACK_FATAL_MASK)) #define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30 #define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT) #define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) \ @@ -1518,6 +1551,17 @@ (((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) | \ (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK)) +#define GLB_ACK_IRQ_MASK_ITER_TRACE_ENABLE_SHIFT 11 +#define GLB_ACK_IRQ_MASK_ITER_TRACE_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_ITER_TRACE_ENABLE_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_SHIFT 16 +#define GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_MASK) >> \ + GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_SHIFT) & \ + GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_MASK)) #define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20 #define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) #define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \ @@ -1629,6 +1673,45 @@ GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK)) #endif /* End of GLB_PWROFF_TIMER_CONFIG values */ +/* GLB_EVICTION_TIMER register */ +#ifndef GLB_EVICTION_TIMER +#define GLB_EVICTION_TIMER 0x0090 +#define GLB_EVICTION_TIMER_TIMEOUT_SHIFT (0) +#define GLB_EVICTION_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_EVICTION_TIMER_TIMEOUT_SHIFT) +#define GLB_EVICTION_TIMER_TIMEOUT_GET(reg_val) \ + (((reg_val)&GLB_EVICTION_TIMER_TIMEOUT_MASK) >> GLB_EVICTION_TIMER_TIMEOUT_SHIFT) +#define GLB_EVICTION_TIMER_TIMEOUT_SET(reg_val, value) \ + (((reg_val) & ~GLB_EVICTION_TIMER_TIMEOUT_MASK) | \ + (((value) << GLB_EVICTION_TIMER_TIMEOUT_SHIFT) & GLB_EVICTION_TIMER_TIMEOUT_MASK)) +#define GLB_EVICTION_TIMER_TIMER_SOURCE_SHIFT (31) +#define GLB_EVICTION_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_EVICTION_TIMER_TIMER_SOURCE_SHIFT) +#define GLB_EVICTION_TIMER_TIMER_SOURCE_GET(reg_val) \ + (((reg_val)&GLB_EVICTION_TIMER_TIMER_SOURCE_MASK) >> GLB_EVICTION_TIMER_TIMER_SOURCE_SHIFT) +#define GLB_EVICTION_TIMER_TIMER_SOURCE_SET(reg_val, value) \ + (((reg_val) & ~GLB_EVICTION_TIMER_TIMER_SOURCE_MASK) | \ + (((value) << GLB_EVICTION_TIMER_TIMER_SOURCE_SHIFT) & \ + GLB_EVICTION_TIMER_TIMER_SOURCE_MASK)) +/* GLB_EVICTION_TIMER_TIMER_SOURCE values */ +#define GLB_EVICTION_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0U +#define GLB_EVICTION_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1U +/* End of GLB_EVICTION_TIMER_TIMER_SOURCE values */ +#endif /* End of GLB_EVICTION_TIMER */ + +/* GLB_EVICTION_TIMER_CONFIG register */ +#ifndef GLB_EVICTION_TIMER_CONFIG +#define GLB_EVICTION_TIMER_CONFIG 0x0094 /* () Configuration fields for GLB_EVICTION_TIMER */ +#define GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_SHIFT 0 +#define GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_MASK \ + (0x1 << GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_SHIFT) +#define GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_GET(reg_val) \ + (((reg_val)&GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_MASK) >> \ + GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_SHIFT) +#define GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_SET(reg_val, value) \ + (((reg_val) & ~GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_MASK) | \ + (((value) << GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_SHIFT) & \ + GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_MASK)) +#endif /* End of GLB_EVICTION_TIMER_CONFIG values */ + /* GLB_ALLOC_EN register */ #define GLB_ALLOC_EN_MASK_SHIFT 0 #define GLB_ALLOC_EN_MASK_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << GLB_ALLOC_EN_MASK_SHIFT) @@ -1711,6 +1794,15 @@ (((reg_val) & ~GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK) | \ (((value) << GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT) & \ GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK)) +#define GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_SHIFT 9 +#define GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_MASK (0x1 << GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_SHIFT) +#define GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_GET(reg_val) \ + (((reg_val)&GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_MASK) >> \ + GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_SHIFT) +#define GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_MASK) | \ + (((value) << GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_SHIFT) & \ + GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_MASK)) #endif /* End of GLB_IDLE_TIMER_CONFIG values */ /* GLB_INSTR_FEATURES register */ @@ -1822,6 +1914,20 @@ (((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \ (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK)) +/* GLB_FATAL_STATUS register */ +#define GLB_FATAL_STATUS_VALUE_SHIFT GPU_U(0) +#define GLB_FATAL_STATUS_VALUE_MASK (GPU_U(0xFFFFFFFF) << GLB_FATAL_STATUS_VALUE_SHIFT) +#define GLB_FATAL_STATUS_VALUE_GET(reg_val) \ + (((reg_val)&GLB_FATAL_STATUS_VALUE_MASK) >> GLB_FATAL_STATUS_VALUE_SHIFT) + +enum glb_fatal_status { + GLB_FATAL_STATUS_VALUE_OK, + GLB_FATAL_STATUS_VALUE_ASSERT, + GLB_FATAL_STATUS_VALUE_UNEXPECTED_EXCEPTION, + GLB_FATAL_STATUS_VALUE_HANG, + GLB_FATAL_STATUS_VALUE_COUNT +}; + /* GLB_DEBUG_ACK register */ #define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23) #define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c index 240397ebc16d..ffd27318cba3 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -224,8 +224,11 @@ static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev, int err_du static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) { + unsigned long flags; + kbase_io_history_dump(kbdev); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); dev_err(kbdev->dev, "Register state:"); dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x MCU_STATUS=0x%08x", kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT)), @@ -251,6 +254,7 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG))); } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } /** @@ -396,6 +400,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_ini */ if (likely(firmware_inited)) kbase_csf_scheduler_reset(kbdev); + cancel_work_sync(&kbdev->csf.firmware_reload_work); dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n"); @@ -403,6 +408,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_ini kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, silent); + if (ret == SOFT_RESET_FAILED) { dev_err(kbdev->dev, "Soft-reset failed"); goto err; @@ -490,6 +496,11 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data) bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) { + if (kbase_pm_is_gpu_lost(kbdev)) { + /* GPU access has been removed, reset will be done by Arbiter instead */ + return false; + } + if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c index 81ddeb667d06..cd6abd62f6c5 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,11 +36,13 @@ #include "mali_kbase_csf_tiler_heap_reclaim.h" #include "mali_kbase_csf_mcu_shared_reg.h" #include +#include #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) #include #include #endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ + /* Value to indicate that a queue group is not groups_to_schedule list */ #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) @@ -84,13 +86,19 @@ scheduler_get_protm_enter_async_group(struct kbase_device *const kbdev, struct kbase_queue_group *const group); static struct kbase_queue_group *get_tock_top_group(struct kbase_csf_scheduler *const scheduler); static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev); -static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask); +static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask, + bool reset); static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool system_suspend); static void schedule_in_cycle(struct kbase_queue_group *group, bool force); static bool queue_group_scheduled_locked(struct kbase_queue_group *group); #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) +bool is_gpu_level_suspend_supported(struct kbase_device *const kbdev) +{ + return false; +} + #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) /** * gpu_metrics_ctx_init() - Take a reference on GPU metrics context if it exists, @@ -119,7 +127,7 @@ static inline int gpu_metrics_ctx_init(struct kbase_context *kctx) put_cred(cred); /* Return early if this is not a Userspace created context */ - if (unlikely(!kctx->kfile)) + if (unlikely(!kctx->filp)) return 0; /* Serialize against the other threads trying to create/destroy Kbase contexts. */ @@ -156,7 +164,7 @@ static inline int gpu_metrics_ctx_init(struct kbase_context *kctx) static inline void gpu_metrics_ctx_term(struct kbase_context *kctx) { /* Return early if this is not a Userspace created context */ - if (unlikely(!kctx->kfile)) + if (unlikely(!kctx->filp)) return; /* Serialize against the other threads trying to create/destroy Kbase contexts. */ @@ -458,11 +466,14 @@ static void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group * * This function notifies the Userspace client waiting for the faults and wait * for the Client to complete the dumping. - * The function is called only from Scheduling tick/tock when a request sent by - * the Scheduler to FW times out or from the protm event work item of the group - * when the protected mode entry request times out. - * In the latter case there is no wait done as scheduler lock would be released - * immediately. In the former case the function waits and releases the scheduler + * The function is mainly called from Scheduling tick/tock when a request sent by + * the Scheduler to FW times out. It can be called outside the tick/tock when timeout + * happens in the following 3 cases :- + * - Entry to protected mode is initiated from protm event work item. + * - Forced exit from protected mode is triggered when GPU queue of an on-slot group is kicked. + * - CSG termination request is sent when Userspace tries to delete the queue group. + * In the latter 3 cases there is no wait done as scheduler lock would be released + * immediately. In the tick/tock case the function waits and releases the scheduler * lock before the wait. It has been ensured that the Scheduler view of the groups * won't change meanwhile, so no group can enter/exit the Scheduler, become * runnable or go off slot. @@ -478,10 +489,9 @@ static void schedule_actions_trigger_df(struct kbase_device *kbdev, struct kbase if (!kbase_debug_csf_fault_notify(kbdev, kctx, error)) return; - if (unlikely(scheduler->state != SCHED_BUSY)) { - WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE); + /* Return early if the function was called outside the tick/tock */ + if (unlikely(scheduler->state != SCHED_BUSY)) return; - } mutex_unlock(&scheduler->lock); kbase_debug_csf_fault_wait_completion(kbdev); @@ -788,19 +798,20 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev) static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler) { atomic_set(&scheduler->gpu_no_longer_idle, false); - queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work); + atomic_inc(&scheduler->pending_gpu_idle_work); + complete(&scheduler->kthread_signal); } -bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) +void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; bool can_suspend_on_idle; - bool invoke_pm_state_machine = false; lockdep_assert_held(&kbdev->hwaccess_lock); lockdep_assert_held(&scheduler->interrupt_lock); - can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev); + can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev) && + !kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state); KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL, (((u64)can_suspend_on_idle) << 32)); @@ -812,23 +823,6 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) scheduler->fast_gpu_idle_handling = (kbdev->csf.gpu_idle_hysteresis_ns == 0) || !kbase_csf_scheduler_all_csgs_idle(kbdev); - /* If GPU idle event occurred after the runtime suspend was aborted due to - * DB_MIRROR irq then it suggests that Userspace submission didn't make GPU - * non-idle. So the planned resumption of scheduling can be cancelled and - * MCU can be put back to sleep state to re-trigger the runtime suspend. - */ - if (unlikely(kbdev->pm.backend.exit_gpu_sleep_mode && - kbdev->pm.backend.runtime_suspend_abort_reason == - ABORT_REASON_DB_MIRROR_IRQ)) { - /* Cancel the planned resumption of scheduling */ - kbdev->pm.backend.exit_gpu_sleep_mode = false; - kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_NONE; - /* PM state machine can be invoked to put MCU back to the sleep - * state right away and thereby re-trigger the runtime suspend. - */ - invoke_pm_state_machine = true; - } - /* The GPU idle worker relies on update_on_slot_queues_offsets() to have * finished. It's queued before to reduce the time it takes till execution * but it'll eventually be blocked by the scheduler->interrupt_lock. @@ -839,8 +833,6 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) /* The extract offsets are unused in fast GPU idle handling */ if (!scheduler->fast_gpu_idle_handling) update_on_slot_queues_offsets(kbdev); - - return invoke_pm_state_machine; } u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev) @@ -1027,6 +1019,8 @@ static void scheduler_force_protm_exit(struct kbase_device *kbdev) * * @kbdev: Pointer to the device * @suspend_handler: Handler code for how to handle a suspend that might occur. + * @active_after_sleep: Flag to indicate that Scheduler is being activated from + * the sleeping state. * * This function is usually called when Scheduler needs to be activated. * The PM reference count is acquired for the Scheduler and the power on @@ -1035,7 +1029,8 @@ static void scheduler_force_protm_exit(struct kbase_device *kbdev) * Return: 0 if successful or a negative error code on failure. */ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, - enum kbase_pm_suspend_handler suspend_handler) + enum kbase_pm_suspend_handler suspend_handler, + bool active_after_sleep) { unsigned long flags; u32 prev_count; @@ -1043,24 +1038,35 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, lockdep_assert_held(&kbdev->csf.scheduler.lock); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); prev_count = kbdev->csf.scheduler.pm_active_count; if (!WARN_ON(prev_count == U32_MAX)) kbdev->csf.scheduler.pm_active_count++; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* On 0 => 1, make a pm_ctx_active request */ if (!prev_count) { - ret = kbase_pm_context_active_handle_suspend(kbdev, suspend_handler); - /* Invoke the PM state machines again as the change in MCU - * desired status, due to the update of scheduler.pm_active_count, - * may be missed by the thread that called pm_wait_for_desired_state() - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (ret) + kbase_pm_lock(kbdev); + kbdev->pm.backend.mcu_poweron_required = true; + ret = kbase_pm_context_active_handle_suspend_locked(kbdev, suspend_handler); + if (ret) { kbdev->csf.scheduler.pm_active_count--; - kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.mcu_poweron_required = false; + } else { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (active_after_sleep) { + kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_NONE; + kbdev->pm.backend.gpu_sleep_mode_active = false; + } + /* Check if the GPU is already active */ + if (kbdev->pm.active_count > 1) { + /* GPU is already active, so need to invoke the PM state machines + * explicitly to turn on the MCU. + */ + kbdev->pm.backend.mcu_desired = true; + kbase_pm_update_state(kbdev); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + kbase_pm_unlock(kbdev); } return ret; @@ -1072,8 +1078,6 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, * Scheduler * * @kbdev: Pointer to the device - * @flags: Pointer to the flags variable containing the interrupt state - * when hwaccess lock was acquired. * * This function is called when Scheduler needs to be activated from the * sleeping state. @@ -1081,42 +1085,15 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, * MCU is initiated. It resets the flag that indicates to the MCU state * machine that MCU needs to be put in sleep state. * - * Note: This function shall be called with hwaccess lock held and it may - * release that lock and reacquire it. - * * Return: zero when the PM reference was taken and non-zero when the * system is being suspending/suspended. */ -static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev, unsigned long *flags) +static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev) { - u32 prev_count; - int ret = 0; - lockdep_assert_held(&kbdev->csf.scheduler.lock); - lockdep_assert_held(&kbdev->hwaccess_lock); - prev_count = kbdev->csf.scheduler.pm_active_count; - if (!WARN_ON(prev_count == U32_MAX)) - kbdev->csf.scheduler.pm_active_count++; - - kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_NONE; - - /* On 0 => 1, make a pm_ctx_active request */ - if (!prev_count) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, *flags); - - ret = kbase_pm_context_active_handle_suspend( - kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); - - spin_lock_irqsave(&kbdev->hwaccess_lock, *flags); - if (ret) - kbdev->csf.scheduler.pm_active_count--; - else - kbdev->pm.backend.gpu_sleep_mode_active = false; - kbase_pm_update_state(kbdev); - } - - return ret; + return scheduler_pm_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, + true); } #endif @@ -1136,28 +1113,32 @@ static void scheduler_pm_idle(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->csf.scheduler.lock); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); prev_count = kbdev->csf.scheduler.pm_active_count; if (!WARN_ON(prev_count == 0)) kbdev->csf.scheduler.pm_active_count--; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (prev_count == 1) { - kbase_pm_context_idle(kbdev); - /* Invoke the PM state machines again as the change in MCU - * desired status, due to the update of scheduler.pm_active_count, - * may be missed by the thread that called pm_wait_for_desired_state() - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_pm_lock(kbdev); + kbdev->pm.backend.mcu_poweron_required = false; + kbase_pm_context_idle_locked(kbdev); + /* Check if GPU is still active */ + if (kbdev->pm.active_count) { + /* GPU is still active, so need to invoke the PM state machines + * explicitly to turn off the MCU. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.mcu_desired = false; + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + kbase_pm_unlock(kbdev); } } #ifdef KBASE_PM_RUNTIME /** * scheduler_pm_idle_before_sleep() - Release the PM reference count and - * trigger the tranistion to sleep state. + * trigger the transition to sleep state. * * @kbdev: Pointer to the device * @@ -1168,28 +1149,15 @@ static void scheduler_pm_idle(struct kbase_device *kbdev) static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev) { unsigned long flags; - u32 prev_count; lockdep_assert_held(&kbdev->csf.scheduler.lock); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - prev_count = kbdev->csf.scheduler.pm_active_count; - if (!WARN_ON(prev_count == 0)) - kbdev->csf.scheduler.pm_active_count--; kbdev->pm.backend.gpu_sleep_mode_active = true; kbdev->pm.backend.exit_gpu_sleep_mode = false; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (prev_count == 1) { - kbase_pm_context_idle(kbdev); - /* Invoke the PM state machines again as the change in MCU - * desired status, due to the update of scheduler.pm_active_count, - * may be missed by the thread that called pm_wait_for_desired_state() - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } + scheduler_pm_idle(kbdev); } #endif @@ -1205,8 +1173,8 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) if (scheduler->state == SCHED_SUSPENDED) { dev_dbg(kbdev->dev, "Re-activating the Scheduler after suspend"); - ret = scheduler_pm_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); + ret = scheduler_pm_active_handle_suspend( + kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE, false); #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) hrtimer_start(&scheduler->gpu_metrics_timer, HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_tp_emit_interval()), @@ -1214,13 +1182,8 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) #endif } else { #ifdef KBASE_PM_RUNTIME - unsigned long flags; - dev_dbg(kbdev->dev, "Re-activating the Scheduler out of sleep"); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = scheduler_pm_active_after_sleep(kbdev, &flags); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + ret = scheduler_pm_active_after_sleep(kbdev); #endif } @@ -1228,7 +1191,7 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) /* GPUCORE-29850 would add the handling for the case where * Scheduler could not be activated due to system suspend. */ - dev_info(kbdev->dev, "Couldn't wakeup Scheduler due to system suspend"); + dev_dbg(kbdev->dev, "Couldn't wakeup Scheduler due to system suspend"); return; } @@ -1239,13 +1202,20 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) scheduler_enable_tick_timer_nolock(kbdev); } -static void scheduler_suspend(struct kbase_device *kbdev) +static int scheduler_suspend(struct kbase_device *kbdev) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; lockdep_assert_held(&scheduler->lock); if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) { +#if KBASE_PM_RUNTIME + int ret; + + ret = kbase_csf_firmware_soi_disable_on_scheduler_suspend(kbdev); + if (ret) + return ret; +#endif /* KBASE_PM_RUNTIME */ dev_dbg(kbdev->dev, "Suspending the Scheduler"); scheduler_pm_idle(kbdev); scheduler->state = SCHED_SUSPENDED; @@ -1254,6 +1224,8 @@ static void scheduler_suspend(struct kbase_device *kbdev) #endif KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); } + + return 0; } /** @@ -1505,7 +1477,7 @@ static int sched_halt_stream(struct kbase_queue *queue) long remaining; int slot; int err = 0; - const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT); + const u32 group_schedule_timeout = kbdev->csf.csg_suspend_timeout_ms; const u32 fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); if (WARN_ON(!group)) @@ -1985,7 +1957,6 @@ static enum kbase_csf_csg_slot_state update_csg_slot_status(struct kbase_device if ((state == CSG_ACK_STATE_START) || (state == CSG_ACK_STATE_RESUME)) { slot_state = CSG_SLOT_RUNNING; atomic_set(&csg_slot->state, slot_state); - csg_slot->trigger_jiffies = jiffies; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group, state); dev_dbg(kbdev->dev, "Group %u running on slot %d\n", @@ -1996,7 +1967,6 @@ static enum kbase_csf_csg_slot_state update_csg_slot_status(struct kbase_device if ((state == CSG_ACK_STATE_SUSPEND) || (state == CSG_ACK_STATE_TERMINATE)) { slot_state = CSG_SLOT_STOPPED; atomic_set(&csg_slot->state, slot_state); - csg_slot->trigger_jiffies = jiffies; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state); dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n", @@ -2095,7 +2065,6 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) kbase_csf_ring_csg_doorbell(kbdev, slot); spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP); - csg_slot[slot].trigger_jiffies = jiffies; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd); KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(kbdev, kbdev->id, (u32)slot, suspend); @@ -2814,7 +2783,6 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) spin_unlock_bh(&kbdev->csf.scheduler.gpu_metrics_lock); #endif - csg_slot->trigger_jiffies = jiffies; atomic_set(&csg_slot->state, CSG_SLOT_READY); KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, (u64)slot); @@ -3028,7 +2996,6 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, u8 prio) /* Update status before rings the door-bell, marking ready => run */ atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN); - csg_slot->trigger_jiffies = jiffies; csg_slot->priority = prio; /* Trace the programming of the CSG on the slot */ @@ -3131,8 +3098,9 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, static int term_group_sync(struct kbase_queue_group *group) { struct kbase_device *kbdev = group->kctx->kbdev; - const unsigned int fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); - long remaining = kbase_csf_timeout_in_jiffies(fw_timeout_ms); + const unsigned int group_term_timeout_ms = + kbase_get_timeout_ms(kbdev, CSF_CSG_TERM_TIMEOUT); + long remaining = kbase_csf_timeout_in_jiffies(group_term_timeout_ms); int err = 0; term_csg_slot(group); @@ -3148,11 +3116,11 @@ static int term_group_sync(struct kbase_queue_group *group) dev_warn( kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d", - kbase_backend_get_cycle_cnt(kbdev), fw_timeout_ms, group->handle, + kbase_backend_get_cycle_cnt(kbdev), group_term_timeout_ms, group->handle, group->kctx->tgid, group->kctx->id, group->csg_nr); if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) error_type = DF_PING_REQUEST_TIMEOUT; - kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type); + schedule_actions_trigger_df(kbdev, group->kctx, error_type); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); @@ -3593,8 +3561,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); - long remaining = kbase_csf_timeout_in_jiffies( - kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT)); + long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.csg_suspend_timeout_ms); bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); @@ -4138,7 +4105,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, * entry to protected mode happens with a memory region being locked and * the same region is then accessed by the GPU in protected mode. */ - mutex_lock(&kbdev->mmu_hw_mutex); + down_write(&kbdev->csf.mmu_sync_sem); spin_lock_irqsave(&scheduler->interrupt_lock, flags); /* Check if the previous transition to enter & exit the protected @@ -4204,7 +4171,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); err = kbase_csf_wait_protected_mode_enter(kbdev); - mutex_unlock(&kbdev->mmu_hw_mutex); + up_write(&kbdev->csf.mmu_sync_sem); if (err) schedule_actions_trigger_df( @@ -4219,7 +4186,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, } spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); + up_write(&kbdev->csf.mmu_sync_sem); } /** @@ -4798,25 +4765,27 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool s struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; - int ret = suspend_active_queue_groups(kbdev, slot_mask); + if (unlikely(suspend_active_queue_groups(kbdev, slot_mask, false))) { + if (!is_gpu_level_suspend_supported(kbdev)) { + const int csg_nr = ffs(slot_mask[0]) - 1; + struct kbase_queue_group *group; + enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; - if (unlikely(ret)) { - const int csg_nr = ffs(slot_mask[0]) - 1; - struct kbase_queue_group *group = scheduler->csg_slots[csg_nr].resident_group; - enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; + group = scheduler->csg_slots[csg_nr].resident_group; - /* The suspend of CSGs failed, - * trigger the GPU reset to be in a deterministic state. - */ - dev_warn( - kbdev->dev, - "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n", - kbase_backend_get_cycle_cnt(kbdev), - kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT), - kbdev->csf.global_iface.group_num, slot_mask); - if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) - error_type = DF_PING_REQUEST_TIMEOUT; - schedule_actions_trigger_df(kbdev, group->kctx, error_type); + /* The suspend of CSGs failed, + * trigger the GPU reset to be in a deterministic state. + */ + dev_warn( + kbdev->dev, + "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n", + kbase_backend_get_cycle_cnt(kbdev), + kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT), + kbdev->csf.global_iface.group_num, slot_mask); + if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) + error_type = DF_PING_REQUEST_TIMEOUT; + schedule_actions_trigger_df(kbdev, group->kctx, error_type); + } if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); @@ -4824,6 +4793,8 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool s return -1; } + kbdev->csf.mcu_halted = false; + /* Check if the groups became active whilst the suspend was ongoing, * but only for the case where the system suspend is not in progress */ @@ -4841,7 +4812,7 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool s * Returns false if any of the queues inside any of the groups that have been * assigned a physical CSG slot have work to execute, or have executed work * since having received a GPU idle notification. This function is used to - * handle a rance condition between firmware reporting GPU idle and userspace + * handle a race condition between firmware reporting GPU idle and userspace * submitting more work by directly ringing a doorbell. * * Return: false if any queue inside any resident group has work to be processed @@ -4983,19 +4954,23 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) } dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle"); - scheduler_suspend(kbdev); - cancel_tick_work(scheduler); - return true; + ret = scheduler_suspend(kbdev); + if (!ret) { + cancel_tick_work(scheduler); + return true; + } + + return false; } -static void gpu_idle_worker(struct work_struct *work) +static void gpu_idle_worker(struct kbase_device *kbdev) { - struct kbase_device *kbdev = - container_of(work, struct kbase_device, csf.scheduler.gpu_idle_work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool scheduler_is_idle_suspendable = false; bool all_groups_suspended = false; + WARN_ON_ONCE(atomic_read(&scheduler->pending_gpu_idle_work) == 0); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u); #define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \ @@ -5005,7 +4980,7 @@ static void gpu_idle_worker(struct work_struct *work) dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n"); KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL, __ENCODE_KTRACE_INFO(true, false, false)); - return; + goto exit; } kbase_debug_csf_fault_wait_completion(kbdev); mutex_lock(&scheduler->lock); @@ -5014,7 +4989,7 @@ static void gpu_idle_worker(struct work_struct *work) if (unlikely(scheduler->state == SCHED_BUSY)) { mutex_unlock(&scheduler->lock); kbase_reset_gpu_allow(kbdev); - return; + goto exit; } #endif @@ -5039,6 +5014,9 @@ static void gpu_idle_worker(struct work_struct *work) __ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable, all_groups_suspended)); #undef __ENCODE_KTRACE_INFO + +exit: + atomic_dec(&scheduler->pending_gpu_idle_work); } static int scheduler_prepare(struct kbase_device *kbdev) @@ -5226,8 +5204,7 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS); while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS)) { - long remaining = kbase_csf_timeout_in_jiffies( - kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT)); + long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.csg_suspend_timeout_ms); DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS); @@ -5417,8 +5394,9 @@ static void schedule_actions(struct kbase_device *kbdev, bool is_tick) * steps and thus extending the previous tick's arrangement, * in particular, no alterations to on-slot CSGs. */ - if (keep_lru_on_slots(kbdev)) + if (keep_lru_on_slots(kbdev)) { return; + } } if (is_tick) @@ -5511,6 +5489,15 @@ redo_local_tock: } evict_lru_or_blocked_csg(kbdev); + +#ifdef KBASE_PM_RUNTIME + if (atomic_read(&scheduler->non_idle_offslot_grps)) + set_bit(KBASE_GPU_NON_IDLE_OFF_SLOT_GROUPS_AVAILABLE, + &kbdev->pm.backend.gpu_sleep_allowed); + else + clear_bit(KBASE_GPU_NON_IDLE_OFF_SLOT_GROUPS_AVAILABLE, + &kbdev->pm.backend.gpu_sleep_allowed); +#endif /* KBASE_PM_RUNTIME */ } /** @@ -5544,9 +5531,10 @@ static bool can_skip_scheduling(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbdev->pm.backend.exit_gpu_sleep_mode) { - int ret = scheduler_pm_active_after_sleep(kbdev, &flags); + int ret; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + ret = scheduler_pm_active_after_sleep(kbdev); if (!ret) { scheduler->state = SCHED_INACTIVE; KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); @@ -5662,25 +5650,28 @@ exit_no_schedule_unlock: kbase_reset_gpu_allow(kbdev); } -static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask) + +static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask, + bool reset) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; u32 num_groups = kbdev->csf.global_iface.group_num; + struct kbase_queue_group *group; u32 slot_num; int ret; lockdep_assert_held(&scheduler->lock); - for (slot_num = 0; slot_num < num_groups; slot_num++) { - struct kbase_queue_group *group = scheduler->csg_slots[slot_num].resident_group; + for (slot_num = 0; slot_num < num_groups; slot_num++) { + group = scheduler->csg_slots[slot_num].resident_group; - if (group) { - suspend_queue_group(group); - set_bit(slot_num, slot_mask); + if (group) { + suspend_queue_group(group); + set_bit(slot_num, slot_mask); + } } - } - ret = wait_csg_slots_suspend(kbdev, slot_mask); + ret = wait_csg_slots_suspend(kbdev, slot_mask); return ret; } @@ -5693,7 +5684,7 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) mutex_lock(&scheduler->lock); - ret = suspend_active_queue_groups(kbdev, slot_mask); + ret = suspend_active_queue_groups(kbdev, slot_mask, true); if (ret) { dev_warn( @@ -5830,9 +5821,12 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); /* Cancel any potential queued delayed work(s) */ - cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work); cancel_tick_work(scheduler); cancel_tock_work(scheduler); + /* gpu_idle_worker() might already be running at this point, which + * could decrement the pending_gpu_idle_worker counter to below 0. + * It'd be safer to let it run if one has already been scheduled. + */ cancel_delayed_work_sync(&scheduler->ping_work); mutex_lock(&scheduler->lock); @@ -5850,22 +5844,35 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) scheduler->top_kctx = NULL; scheduler->top_grp = NULL; + atomic_set(&scheduler->gpu_idle_timer_enabled, false); + atomic_set(&scheduler->fw_soi_enabled, false); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, scheduler->num_active_address_spaces | (((u64)scheduler->total_runnable_grps) << 32)); +#ifdef KBASE_PM_RUNTIME + if (scheduler->state == SCHED_SLEEPING) { +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + hrtimer_cancel(&scheduler->gpu_metrics_timer); +#endif + scheduler->state = SCHED_SUSPENDED; + KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); + } +#endif mutex_unlock(&scheduler->lock); } void kbase_csf_scheduler_reset(struct kbase_device *kbdev) { struct kbase_context *kctx; - WARN_ON(!kbase_reset_gpu_is_active(kbdev)); KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u); - kbase_debug_csf_fault_wait_completion(kbdev); + if (kbase_reset_gpu_is_active(kbdev)) + kbase_debug_csf_fault_wait_completion(kbdev); + if (scheduler_handle_reset_in_protected_mode(kbdev) && !suspend_active_queue_groups_on_reset(kbdev)) { @@ -6453,8 +6460,8 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) * check_group_sync_update_worker() - Check the sync wait condition for all the * blocked queue groups * - * @work: Pointer to the context-specific work item for evaluating the wait - * condition for all the queue groups in idle_wait_groups list. + * @kctx: The context to evaluate the wait condition for all the queue groups + * in idle_wait_groups list. * * This function checks the gpu queues of all the groups present in both * idle_wait_groups list of a context and all on slot idle groups (if GPU @@ -6464,27 +6471,14 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) * runnable groups so that Scheduler can consider scheduling the group * in next tick or exit protected mode. */ -static void check_group_sync_update_worker(struct work_struct *work) +static void check_group_sync_update_worker(struct kbase_context *kctx) { - struct kbase_context *const kctx = - container_of(work, struct kbase_context, csf.sched.sync_update_work); struct kbase_device *const kbdev = kctx->kbdev; struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool sync_updated = false; mutex_lock(&scheduler->lock); -#if IS_ENABLED(CONFIG_DEBUG_FS) - if (unlikely(scheduler->state == SCHED_BUSY)) { - queue_work(atomic_read(&kctx->prioritized) ? - kctx->csf.sched.sync_update_wq_high_prio : - kctx->csf.sched.sync_update_wq_normal_prio, - &kctx->csf.sched.sync_update_work); - mutex_unlock(&scheduler->lock); - return; - } -#endif - KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u); if (kctx->csf.sched.num_idle_wait_grps != 0) { struct kbase_queue_group *group, *temp; @@ -6522,13 +6516,10 @@ static void check_group_sync_update_worker(struct work_struct *work) static enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param) { struct kbase_context *const kctx = param; - struct workqueue_struct *wq = atomic_read(&kctx->prioritized) ? - kctx->csf.sched.sync_update_wq_high_prio : - kctx->csf.sched.sync_update_wq_normal_prio; KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u); - queue_work(wq, &kctx->csf.sched.sync_update_work); + kbase_csf_scheduler_enqueue_sync_update_work(kctx); return KBASE_CSF_EVENT_CALLBACK_KEEP; } @@ -6539,6 +6530,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) int err; struct kbase_device *kbdev = kctx->kbdev; + WARN_ON_ONCE(!kbdev->csf.scheduler.kthread_running); + #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) err = gpu_metrics_ctx_init(kctx); if (err) @@ -6551,25 +6544,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups); - kctx->csf.sched.sync_update_wq_high_prio = alloc_ordered_workqueue( - "mali_sync_wq_%i_high_prio", WQ_UNBOUND | WQ_HIGHPRI, kctx->tgid); - if (kctx->csf.sched.sync_update_wq_high_prio == NULL) { - dev_err(kbdev->dev, - "Failed to initialize scheduler context high-priority workqueue"); - err = -ENOMEM; - goto alloc_high_prio_wq_failed; - } - - kctx->csf.sched.sync_update_wq_normal_prio = - alloc_ordered_workqueue("mali_sync_wq_%i_normal_prio", 0, kctx->tgid); - if (kctx->csf.sched.sync_update_wq_normal_prio == NULL) { - dev_err(kbdev->dev, - "Failed to initialize scheduler context normal-priority workqueue"); - err = -ENOMEM; - goto alloc_normal_prio_wq_failed; - } - - INIT_WORK(&kctx->csf.sched.sync_update_work, check_group_sync_update_worker); + INIT_LIST_HEAD(&kctx->csf.sched.sync_update_work); kbase_csf_tiler_heap_reclaim_ctx_init(kctx); @@ -6583,10 +6558,6 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) return err; event_wait_add_failed: - destroy_workqueue(kctx->csf.sched.sync_update_wq_normal_prio); -alloc_normal_prio_wq_failed: - destroy_workqueue(kctx->csf.sched.sync_update_wq_high_prio); -alloc_high_prio_wq_failed: kbase_ctx_sched_remove_ctx(kctx); #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) gpu_metrics_ctx_term(kctx); @@ -6597,9 +6568,10 @@ alloc_high_prio_wq_failed: void kbase_csf_scheduler_context_term(struct kbase_context *kctx) { kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); - cancel_work_sync(&kctx->csf.sched.sync_update_work); - destroy_workqueue(kctx->csf.sched.sync_update_wq_normal_prio); - destroy_workqueue(kctx->csf.sched.sync_update_wq_high_prio); + + /* Drain a pending SYNC_UPDATE work if any */ + kbase_csf_scheduler_wait_for_kthread_pending_work(kctx->kbdev, + &kctx->csf.pending_sync_update); kbase_ctx_sched_remove_ctx(kctx); #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) @@ -6607,53 +6579,157 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx) #endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ } +static void handle_pending_sync_update_works(struct kbase_csf_scheduler *scheduler) +{ + struct kbase_context *sync_update_ctx; + + if (atomic_cmpxchg(&scheduler->pending_sync_update_works, true, false) == false) + return; + + do { + unsigned long flags; + + spin_lock_irqsave(&scheduler->sync_update_work_ctxs_lock, flags); + sync_update_ctx = NULL; + if (!list_empty(&scheduler->sync_update_work_ctxs)) { + sync_update_ctx = list_first_entry(&scheduler->sync_update_work_ctxs, + struct kbase_context, + csf.sched.sync_update_work); + list_del_init(&sync_update_ctx->csf.sched.sync_update_work); + } + spin_unlock_irqrestore(&scheduler->sync_update_work_ctxs_lock, flags); + + if (sync_update_ctx != NULL) { + WARN_ON_ONCE(atomic_read(&sync_update_ctx->csf.pending_sync_update) == 0); + check_group_sync_update_worker(sync_update_ctx); + atomic_dec(&sync_update_ctx->csf.pending_sync_update); + } + } while (sync_update_ctx != NULL); +} + +static void handle_pending_protm_requests(struct kbase_csf_scheduler *scheduler) +{ + struct kbase_queue_group *protm_grp; + + if (atomic_cmpxchg(&scheduler->pending_protm_event_works, true, false) == false) + return; + + do { + unsigned long flags; + + spin_lock_irqsave(&scheduler->protm_event_work_grps_lock, flags); + protm_grp = NULL; + if (!list_empty(&scheduler->protm_event_work_grps)) { + protm_grp = list_first_entry(&scheduler->protm_event_work_grps, + struct kbase_queue_group, protm_event_work); + list_del_init(&protm_grp->protm_event_work); + } + spin_unlock_irqrestore(&scheduler->protm_event_work_grps_lock, flags); + + if (protm_grp != NULL) { + WARN_ON_ONCE(atomic_read(&protm_grp->pending_protm_event_work) == 0); + kbase_csf_process_protm_event_request(protm_grp); + atomic_dec(&protm_grp->pending_protm_event_work); + } + } while (protm_grp != NULL); +} + +static void handle_pending_kcpuq_commands(struct kbase_csf_scheduler *scheduler) +{ + struct kbase_kcpu_command_queue *kcpuq; + + if (atomic_cmpxchg(&scheduler->pending_kcpuq_works, true, false) == false) + return; + + do { + unsigned long flags; + + spin_lock_irqsave(&scheduler->kcpuq_work_queues_lock, flags); + kcpuq = NULL; + if (!list_empty(&scheduler->kcpuq_work_queues)) { + kcpuq = list_first_entry(&scheduler->kcpuq_work_queues, + struct kbase_kcpu_command_queue, high_prio_work); + list_del_init(&kcpuq->high_prio_work); + } + spin_unlock_irqrestore(&scheduler->kcpuq_work_queues_lock, flags); + + if (kcpuq != NULL) { + WARN_ON_ONCE(atomic_read(&kcpuq->pending_kick) == 0); + + mutex_lock(&kcpuq->lock); + kbase_csf_kcpu_queue_process(kcpuq, false); + mutex_unlock(&kcpuq->lock); + + atomic_dec(&kcpuq->pending_kick); + } + } while (kcpuq != NULL); +} + +static void handle_pending_queue_kicks(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + struct kbase_queue *queue; + + if (atomic_cmpxchg(&kbdev->csf.pending_gpuq_kicks, true, false) == false) + return; + + do { + u8 prio; + + spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock); + queue = NULL; + for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) { + if (!list_empty(&kbdev->csf.pending_gpuq_kick_queues[prio])) { + queue = list_first_entry(&kbdev->csf.pending_gpuq_kick_queues[prio], + struct kbase_queue, pending_kick_link); + list_del_init(&queue->pending_kick_link); + break; + } + } + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); + + if (queue != NULL) { + WARN_ONCE( + prio != queue->group_priority, + "Queue %pK has priority %u but instead its kick was handled at priority %u", + (void *)queue, queue->group_priority, prio); + WARN_ON_ONCE(atomic_read(&queue->pending_kick) == 0); + + kbase_csf_process_queue_kick(queue); + + /* Perform a scheduling tock for high-priority queue groups if + * required. + */ + BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0); + BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1); + if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) && + atomic_read(&scheduler->pending_tock_work)) + schedule_on_tock(kbdev); + } + } while (queue != NULL); +} + static int kbase_csf_scheduler_kthread(void *data) { struct kbase_device *const kbdev = data; struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; while (scheduler->kthread_running) { - struct kbase_queue *queue; - if (wait_for_completion_interruptible(&scheduler->kthread_signal) != 0) continue; reinit_completion(&scheduler->kthread_signal); - /* Iterate through queues with pending kicks */ - do { - u8 prio; + /* + * The order in which these requests are handled is based on + * how they would influence each other's decisions. As a + * result, the tick & tock requests must be handled after all + * other requests, but before the GPU IDLE work. + */ - spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); - queue = NULL; - for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) { - if (!list_empty(&kbdev->csf.pending_gpuq_kicks[prio])) { - queue = list_first_entry( - &kbdev->csf.pending_gpuq_kicks[prio], - struct kbase_queue, pending_kick_link); - list_del_init(&queue->pending_kick_link); - break; - } - } - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); - - if (queue != NULL) { - WARN_ONCE( - prio != queue->group_priority, - "Queue %pK has priority %hhu but instead its kick was handled at priority %hhu", - (void *)queue, queue->group_priority, prio); - - kbase_csf_process_queue_kick(queue); - - /* Perform a scheduling tock for high-priority queue groups if - * required. - */ - BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0); - BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1); - if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) && - atomic_read(&scheduler->pending_tock_work)) - schedule_on_tock(kbdev); - } - } while (queue != NULL); + handle_pending_sync_update_works(scheduler); + handle_pending_protm_requests(scheduler); + handle_pending_kcpuq_commands(scheduler); + handle_pending_queue_kicks(kbdev); /* Check if we need to perform a scheduling tick/tock. A tick * event shall override a tock event but not vice-versa. @@ -6665,6 +6741,15 @@ static int kbase_csf_scheduler_kthread(void *data) schedule_on_tock(kbdev); } + /* Drain pending GPU idle works */ + while (atomic_read(&scheduler->pending_gpu_idle_work) > 0) + gpu_idle_worker(kbdev); + + /* Update GLB_IDLE timer/FW Sleep-on-Idle config (which might + * have been disabled during FW boot et. al.). + */ + kbase_csf_firmware_soi_update(kbdev); + dev_dbg(kbdev->dev, "Waking up for event after a scheduling iteration."); wake_up_all(&kbdev->csf.event_wait); } @@ -6694,7 +6779,7 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev) scheduler->kthread_running = true; scheduler->gpuq_kthread = kthread_run(&kbase_csf_scheduler_kthread, kbdev, "mali-gpuq-kthread"); - if (!scheduler->gpuq_kthread) { + if (IS_ERR_OR_NULL(scheduler->gpuq_kthread)) { kfree(scheduler->csg_slots); scheduler->csg_slots = NULL; @@ -6725,6 +6810,9 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev) scheduler->gpu_metrics_timer.function = gpu_metrics_timer_callback; #endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ + atomic_set(&scheduler->gpu_idle_timer_enabled, false); + atomic_set(&scheduler->fw_soi_enabled, false); + return kbase_csf_mcu_shared_regs_data_init(kbdev); } @@ -6734,12 +6822,6 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) atomic_set(&scheduler->timer_enabled, true); - scheduler->idle_wq = alloc_ordered_workqueue("csf_scheduler_gpu_idle_wq", WQ_HIGHPRI); - if (!scheduler->idle_wq) { - dev_err(kbdev->dev, "Failed to allocate GPU idle scheduler workqueue\n"); - return -ENOMEM; - } - INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); mutex_init(&scheduler->lock); @@ -6757,20 +6839,30 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; scheduler_doorbell_init(kbdev); - INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker); hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); scheduler->tick_timer.function = tick_timer_callback; - kbase_csf_tiler_heap_reclaim_mgr_init(kbdev); + atomic_set(&scheduler->pending_sync_update_works, false); + spin_lock_init(&scheduler->sync_update_work_ctxs_lock); + INIT_LIST_HEAD(&scheduler->sync_update_work_ctxs); + atomic_set(&scheduler->pending_protm_event_works, false); + spin_lock_init(&scheduler->protm_event_work_grps_lock); + INIT_LIST_HEAD(&scheduler->protm_event_work_grps); + atomic_set(&scheduler->pending_kcpuq_works, false); + spin_lock_init(&scheduler->kcpuq_work_queues_lock); + INIT_LIST_HEAD(&scheduler->kcpuq_work_queues); + atomic_set(&scheduler->pending_tick_work, false); + atomic_set(&scheduler->pending_tock_work, false); + atomic_set(&scheduler->pending_gpu_idle_work, 0); - return 0; + return kbase_csf_tiler_heap_reclaim_mgr_init(kbdev); } void kbase_csf_scheduler_term(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - if (scheduler->gpuq_kthread) { + if (!IS_ERR_OR_NULL(scheduler->gpuq_kthread)) { scheduler->kthread_running = false; complete(&scheduler->kthread_signal); kthread_stop(scheduler->gpuq_kthread); @@ -6784,7 +6876,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) * to be active at the time of Driver unload. */ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); - flush_work(&kbdev->csf.scheduler.gpu_idle_work); mutex_lock(&kbdev->csf.scheduler.lock); if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) { @@ -6811,9 +6902,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) { - if (kbdev->csf.scheduler.idle_wq) - destroy_workqueue(kbdev->csf.scheduler.idle_wq); - kbase_csf_tiler_heap_reclaim_mgr_term(kbdev); mutex_destroy(&kbdev->csf.scheduler.lock); } @@ -6926,9 +7014,10 @@ int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev) dev_warn(kbdev->dev, "failed to suspend active groups"); goto exit; } else { - dev_info(kbdev->dev, "Scheduler PM suspend"); - scheduler_suspend(kbdev); - cancel_tick_work(scheduler); + dev_dbg(kbdev->dev, "Scheduler PM suspend"); + result = scheduler_suspend(kbdev); + if (!result) + cancel_tick_work(scheduler); } } @@ -6968,7 +7057,7 @@ void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev) lockdep_assert_held(&scheduler->lock); if ((scheduler->total_runnable_grps > 0) && (scheduler->state == SCHED_SUSPENDED)) { - dev_info(kbdev->dev, "Scheduler PM resume"); + dev_dbg(kbdev->dev, "Scheduler PM resume"); scheduler_wakeup(kbdev, true); } } @@ -6989,7 +7078,7 @@ void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev) * the CSGs before powering down the GPU. */ mutex_lock(&kbdev->csf.scheduler.lock); - scheduler_pm_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); + scheduler_pm_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, false); mutex_unlock(&kbdev->csf.scheduler.lock); } KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active); @@ -7013,9 +7102,7 @@ static int scheduler_wait_mcu_active(struct kbase_device *kbdev, bool killable_w kbase_pm_lock(kbdev); WARN_ON(!kbdev->pm.active_count); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(!scheduler->pm_active_count); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_pm_unlock(kbdev); if (killable_wait) @@ -7096,6 +7183,65 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev) return 0; } +void kbase_csf_scheduler_enqueue_sync_update_work(struct kbase_context *kctx) +{ + struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; + unsigned long flags; + + spin_lock_irqsave(&scheduler->sync_update_work_ctxs_lock, flags); + if (list_empty(&kctx->csf.sched.sync_update_work)) { + list_add_tail(&kctx->csf.sched.sync_update_work, &scheduler->sync_update_work_ctxs); + atomic_inc(&kctx->csf.pending_sync_update); + if (atomic_cmpxchg(&scheduler->pending_sync_update_works, false, true) == false) + complete(&scheduler->kthread_signal); + } + spin_unlock_irqrestore(&scheduler->sync_update_work_ctxs_lock, flags); +} + +void kbase_csf_scheduler_enqueue_protm_event_work(struct kbase_queue_group *group) +{ + struct kbase_context *const kctx = group->kctx; + struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; + unsigned long flags; + + spin_lock_irqsave(&scheduler->protm_event_work_grps_lock, flags); + if (list_empty(&group->protm_event_work)) { + list_add_tail(&group->protm_event_work, &scheduler->protm_event_work_grps); + atomic_inc(&group->pending_protm_event_work); + if (atomic_cmpxchg(&scheduler->pending_protm_event_works, false, true) == false) + complete(&scheduler->kthread_signal); + } + spin_unlock_irqrestore(&scheduler->protm_event_work_grps_lock, flags); +} + +void kbase_csf_scheduler_enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue) +{ + struct kbase_csf_scheduler *const scheduler = &queue->kctx->kbdev->csf.scheduler; + unsigned long flags; + + spin_lock_irqsave(&scheduler->kcpuq_work_queues_lock, flags); + if (list_empty(&queue->high_prio_work)) { + list_add_tail(&queue->high_prio_work, &scheduler->kcpuq_work_queues); + atomic_inc(&queue->pending_kick); + if (atomic_cmpxchg(&scheduler->pending_kcpuq_works, false, true) == false) + complete(&scheduler->kthread_signal); + } + spin_unlock_irqrestore(&scheduler->kcpuq_work_queues_lock, flags); +} + +void kbase_csf_scheduler_wait_for_kthread_pending_work(struct kbase_device *kbdev, + atomic_t *pending) +{ + /* + * Signal kbase_csf_scheduler_kthread() to allow for the + * eventual completion of the current iteration. Once the work is + * done, the event_wait wait queue shall be signalled. + */ + + complete(&kbdev->csf.scheduler.kthread_signal); + wait_event(kbdev->csf.event_wait, atomic_read(pending) == 0); +} + void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev) { u32 csg_nr; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h index 5047092d6650..915945bb495e 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -235,7 +235,8 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev); * No explicit re-initialization is done for CSG & CS interface I/O pages; * instead, that happens implicitly on firmware reload. * - * Should be called only after initiating the GPU reset. + * Should be called either after initiating the GPU reset or when MCU reset is + * expected to follow such as GPU_LOST case. */ void kbase_csf_scheduler_reset(struct kbase_device *kbdev); @@ -487,6 +488,48 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev) kbdev->csf.global_iface.group_num); } +/** + * kbase_csf_scheduler_enqueue_sync_update_work() - Add a context to the list + * of contexts to handle + * SYNC_UPDATE events. + * + * @kctx: The context to handle SYNC_UPDATE event + * + * This function wakes up kbase_csf_scheduler_kthread() to handle pending + * SYNC_UPDATE events for all contexts. + */ +void kbase_csf_scheduler_enqueue_sync_update_work(struct kbase_context *kctx); + +/** + * kbase_csf_scheduler_enqueue_protm_event_work() - Add a group to the list + * of groups to handle + * PROTM requests. + * + * @group: The group to handle protected mode request + * + * This function wakes up kbase_csf_scheduler_kthread() to handle pending + * protected mode requests for all groups. + */ +void kbase_csf_scheduler_enqueue_protm_event_work(struct kbase_queue_group *group); + +/** + * kbase_csf_scheduler_enqueue_kcpuq_work() - Wake up kbase_csf_scheduler_kthread() to process + * pending commands for a KCPU queue. + * + * @queue: The queue to process pending commands for + */ +void kbase_csf_scheduler_enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue); + +/** + * kbase_csf_scheduler_wait_for_kthread_pending_work - Wait until a pending work has completed in + * kbase_csf_scheduler_kthread(). + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface + * @pending: The work to wait for + */ +void kbase_csf_scheduler_wait_for_kthread_pending_work(struct kbase_device *kbdev, + atomic_t *pending); + /** * kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick * @@ -591,11 +634,8 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev); * @kbdev: Pointer to the device * * This function is called when a GPU idle IRQ has been raised. - * - * Return: true if the PM state machine needs to be invoked after the processing - * of GPU idle irq, otherwise false. */ -bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev); +void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev); /** * kbase_csf_scheduler_get_nr_active_csgs() - Get the number of active CSGs @@ -653,4 +693,6 @@ void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev); void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev); #endif +bool is_gpu_level_suspend_supported(struct kbase_device *const kbdev); + #endif /* _KBASE_CSF_SCHEDULER_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c index aa88b5f59d3b..27b792500bdf 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -117,13 +117,13 @@ static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(char *buffer, int *l timeline_name = fence->ops->get_timeline_name(fence); is_signaled = info.status > 0; - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, fence, is_signaled); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, fence, is_signaled); /* Note: fence->seqno was u32 until 5.1 kernel, then u64 */ - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx", - timeline_name, fence->context, (u64)fence->seqno); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx", + timeline_name, fence->context, (u64)fence->seqno); kbase_fence_put(fence); } @@ -149,19 +149,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait(struct kbase_context *kctx, char int ret = kbasep_csf_sync_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); bool live_val_valid = (ret >= 0); - *length += - snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + *length += scnprintf( + buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); if (live_val_valid) - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "0x%.16llx", (u64)live_val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", (u64)live_val); else - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - CQS_UNREADABLE_LIVE_VALUE); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - " | op:gt arg_value:0x%.8x", cqs_obj->val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:gt arg_value:0x%.8x", cqs_obj->val); } } @@ -187,18 +187,18 @@ static void kbasep_csf_sync_print_kcpu_cqs_set(struct kbase_context *kctx, char bool live_val_valid = (ret >= 0); *length += - snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); if (live_val_valid) - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "0x%.16llx", (u64)live_val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", (u64)live_val); else - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - CQS_UNREADABLE_LIVE_VALUE); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - " | op:add arg_value:0x%.8x", 1); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:add arg_value:0x%.8x", 1); } } @@ -277,19 +277,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct kbase_context *kctx, c bool live_val_valid = (ret >= 0); - *length += - snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr); + *length += scnprintf( + buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr); if (live_val_valid) - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "0x%.16llx", live_val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", live_val); else - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - CQS_UNREADABLE_LIVE_VALUE); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - " | op:%s arg_value:0x%.16llx", op_name, wait_op->val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:%s arg_value:0x%.16llx", op_name, wait_op->val); } } @@ -319,18 +319,18 @@ static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct kbase_context *kctx, ch bool live_val_valid = (ret >= 0); *length += - snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr); + scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr); if (live_val_valid) - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "0x%.16llx", live_val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", live_val); else - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - CQS_UNREADABLE_LIVE_VALUE); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - " | op:%s arg_value:0x%.16llx", op_name, set_op->val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:%s arg_value:0x%.16llx", op_name, set_op->val); } } @@ -360,8 +360,8 @@ static void kbasep_csf_sync_kcpu_print_queue(struct kbase_context *kctx, int length = 0; started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P'; - length += snprintf(buffer, CSF_SYNC_DUMP_SIZE, "queue:KCPU-%d-%d exec:%c ", - kctx->id, queue->id, started_or_pending); + length += scnprintf(buffer, CSF_SYNC_DUMP_SIZE, "queue:KCPU-%d-%d exec:%c ", + kctx->id, queue->id, started_or_pending); cmd = &queue->commands[(u8)(queue->start_offset + i)]; switch (cmd->type) { @@ -388,12 +388,12 @@ static void kbasep_csf_sync_kcpu_print_queue(struct kbase_context *kctx, kbasep_csf_sync_print_kcpu_cqs_set_op(kctx, buffer, &length, cmd); break; default: - length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, - ", U, Unknown blocking command"); + length += scnprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, + ", U, Unknown blocking command"); break; } - length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, "\n"); + length += scnprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, "\n"); kbasep_print(kbpr, buffer); } diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c index 2d148eea025e..5a5a4c315396 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -218,7 +218,7 @@ static void remove_unlinked_chunk(struct kbase_context *kctx, if (WARN_ON(!list_empty(&chunk->link))) return; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); kbase_vunmap(kctx, &chunk->map); /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT * regions), and so we must clear that flag too before freeing. @@ -231,7 +231,7 @@ static void remove_unlinked_chunk(struct kbase_context *kctx, chunk->region->flags &= ~KBASE_REG_DONT_NEED; #endif kbase_mem_free_region(kctx, chunk->region); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); kfree(chunk); } @@ -260,8 +260,9 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * u64 chunk_size) { u64 nr_pages = PFN_UP(chunk_size); - u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | - BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD; + base_mem_alloc_flags flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | + BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE | + BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD; struct kbase_csf_tiler_heap_chunk *chunk = NULL; /* The chunk kernel mapping needs to be large enough to: * - initially zero the CHUNK_HDR_SIZE area @@ -350,13 +351,14 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * } remove_external_chunk_mappings(kctx, chunk); - kbase_gpu_vm_unlock(kctx); /* If page migration is enabled, we don't want to migrate tiler heap pages. * This does not change if the constituent pages are already marked as isolated. */ if (kbase_is_page_migration_enabled()) - kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE); + kbase_set_phy_alloc_page_status(kctx, chunk->region->gpu_alloc, NOT_MOVABLE); + + kbase_gpu_vm_unlock(kctx); return chunk; @@ -640,7 +642,7 @@ static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *co if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) || (reg->flags & KBASE_REG_PF_GROW)) { - dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags); + dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%llX!\n", reg->flags); return false; } @@ -737,7 +739,7 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); if (kbase_is_page_migration_enabled()) - kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE); + kbase_set_phy_alloc_page_status(kctx, buf_desc_reg->gpu_alloc, NOT_MOVABLE); kbase_gpu_vm_unlock(kctx); @@ -1058,6 +1060,7 @@ static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 c struct kbase_csf_tiler_heap_chunk *chunk = NULL; lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); chunk = find_chunk(heap, chunk_gpu_va); if (unlikely(!chunk)) { diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c index a2bb49422e98..df4feb77f0cd 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c @@ -331,8 +331,8 @@ static unsigned long kbase_csf_tiler_heap_reclaim_scan_free_pages(struct kbase_d static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) { - struct kbase_device *kbdev = - container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); + struct kbase_device *kbdev = KBASE_GET_KBASE_DATA_FROM_SHRINKER( + s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); return kbase_csf_tiler_heap_reclaim_count_free_pages(kbdev, sc); } @@ -340,8 +340,8 @@ static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s, struct shrink_control *sc) { - struct kbase_device *kbdev = - container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); + struct kbase_device *kbdev = KBASE_GET_KBASE_DATA_FROM_SHRINKER( + s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); return kbase_csf_tiler_heap_reclaim_scan_free_pages(kbdev, sc); } @@ -352,11 +352,17 @@ void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link); } -void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) +int kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - struct shrinker *reclaim = &scheduler->reclaim_mgr.heap_reclaim; u8 prio; + struct shrinker *reclaim; + + reclaim = + KBASE_INIT_RECLAIM(&(scheduler->reclaim_mgr), heap_reclaim, "mali-csf-tiler-heap"); + if (!reclaim) + return -ENOMEM; + KBASE_SET_RECLAIM(&(scheduler->reclaim_mgr), heap_reclaim, reclaim); for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT; prio++) @@ -366,6 +372,11 @@ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects; reclaim->seeks = HEAP_SHRINKER_SEEKS; reclaim->batch = HEAP_SHRINKER_BATCH; + + if (!IS_ENABLED(CONFIG_MALI_VECTOR_DUMP)) + KBASE_REGISTER_SHRINKER(reclaim, "mali-csf-tiler-heap", kbdev); + + return 0; } void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev) @@ -373,6 +384,9 @@ void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev) struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; u8 prio; + if (!IS_ENABLED(CONFIG_MALI_VECTOR_DUMP)) + KBASE_UNREGISTER_SHRINKER(scheduler->reclaim_mgr.heap_reclaim); + for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT; prio++) WARN_ON(!list_empty(&scheduler->reclaim_mgr.ctx_lists[prio])); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h index 7880de04c84f..d41b7baabd02 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h @@ -66,8 +66,10 @@ void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx); * @kbdev: Pointer to the device. * * This function must be called only when a kbase device is initialized. + * + * Return: 0 if issuing reclaim_mgr init was successful, otherwise an error code. */ -void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev); +int kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev); /** * kbase_csf_tiler_heap_reclaim_mgr_term - Termination call for the tiler heap reclaim manger. diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c index 54054661f7a9..eb5c8a40b8c9 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c @@ -151,13 +151,22 @@ static bool tl_reader_overflow_check(struct kbase_csf_tl_reader *self, u16 event * * Reset the reader to the default state, i.e. set all the * mutable fields to zero. + * + * NOTE: this function expects the irq spinlock to be held. */ static void tl_reader_reset(struct kbase_csf_tl_reader *self) { + lockdep_assert_held(&self->read_lock); + self->got_first_event = false; self->is_active = false; self->expected_event_id = 0; self->tl_header.btc = 0; + + /* There might be data left in the trace buffer from the previous + * tracing session. We don't want it to leak into this session. + */ + kbase_csf_firmware_trace_buffer_discard_all(self->trace_buffer); } int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) @@ -324,21 +333,16 @@ static int tl_reader_update_enable_bit(struct kbase_csf_tl_reader *self, bool va void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, struct kbase_tlstream *stream) { - self->timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT; + *self = (struct kbase_csf_tl_reader){ + .timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT, + .stream = stream, + .kbdev = NULL, /* This will be initialized by tl_reader_init_late() */ + .is_active = false, + }; kbase_timer_setup(&self->read_timer, kbasep_csf_tl_reader_read_callback); - self->stream = stream; - - /* This will be initialized by tl_reader_init_late() */ - self->kbdev = NULL; - self->trace_buffer = NULL; - self->tl_header.data = NULL; - self->tl_header.size = 0; - spin_lock_init(&self->read_lock); - - tl_reader_reset(self); } void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self) @@ -348,13 +352,19 @@ void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self) int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_device *kbdev) { + unsigned long flags; int rcode; + spin_lock_irqsave(&self->read_lock, flags); + /* If already running, early exit. */ - if (self->is_active) + if (self->is_active) { + spin_unlock_irqrestore(&self->read_lock, flags); return 0; + } if (tl_reader_init_late(self, kbdev)) { + spin_unlock_irqrestore(&self->read_lock, flags); #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) dev_warn(kbdev->dev, "CSFFW timeline is not available for MALI_BIFROST_NO_MALI builds!"); return 0; @@ -366,6 +376,9 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_dev tl_reader_reset(self); self->is_active = true; + + spin_unlock_irqrestore(&self->read_lock, flags); + /* Set bytes to copy to the header size. This is to trigger copying * of the header to the user space. */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c index 8ed7c91553a6..a9469c5949b4 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -469,14 +469,15 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(struct firmware_trace_buf } else { unsigned int bytes_copied_head, bytes_copied_tail; - bytes_copied_tail = min_t(unsigned int, num_bytes, (buffer_size - extract_offset)); + bytes_copied_tail = + min_t(unsigned int, num_bytes, size_sub(buffer_size, extract_offset)); memcpy(data, &data_cpu_va[extract_offset], bytes_copied_tail); bytes_copied_head = min_t(unsigned int, (num_bytes - bytes_copied_tail), insert_offset); memcpy(&data[bytes_copied_tail], data_cpu_va, bytes_copied_head); - bytes_copied = bytes_copied_head + bytes_copied_tail; + bytes_copied = size_add(bytes_copied_head, bytes_copied_tail); extract_offset += bytes_copied; if (extract_offset >= buffer_size) extract_offset = bytes_copied_head; @@ -519,6 +520,14 @@ void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace } EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_discard); +void kbase_csf_firmware_trace_buffer_discard_all(struct firmware_trace_buffer *trace_buffer) +{ + if (WARN_ON(!trace_buffer)) + return; + + *(trace_buffer->cpu_va.extract_cpu_va) = *(trace_buffer->cpu_va.insert_cpu_va); +} + static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask) { unsigned int i; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h index 90dfcb2699bc..35988eaf8f5a 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h @@ -179,6 +179,15 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(struct firmware_trace_buf */ void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer); +/** + * kbase_csf_firmware_trace_buffer_discard_all - Discard all data from a trace buffer + * + * @trace_buffer: Trace buffer handle + * + * Discard all the data in the trace buffer to make it empty. + */ +void kbase_csf_firmware_trace_buffer_discard_all(struct firmware_trace_buffer *trace_buffer); + /** * kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask * diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c index 7dc32a11bb29..5f13672e70b8 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -115,7 +115,7 @@ struct kbasep_printer *kbasep_printer_buffer_init(struct kbase_device *kbdev, if (kbpr) { if (kfifo_alloc(&kbpr->fifo, KBASEP_PRINTER_BUFFER_MAX_SIZE, GFP_KERNEL)) { - kfree(kbpr); + vfree(kbpr); return NULL; } kbpr->kbdev = kbdev; @@ -224,7 +224,7 @@ __attribute__((format(__printf__, 2, 3))) void kbasep_print(struct kbasep_printe va_list arglist; va_start(arglist, fmt); - len = vsnprintf(buffer, KBASEP_PRINT_FORMAT_BUFFER_MAX_SIZE, fmt, arglist); + len = vscnprintf(buffer, KBASEP_PRINT_FORMAT_BUFFER_MAX_SIZE, fmt, arglist); if (len <= 0) { pr_err("message write to the buffer failed"); goto exit; diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c index fe8201f7f7e6..da56d71f473f 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -443,7 +443,7 @@ kbase_debug_coresight_csf_config_create(void *client_data, } config = kzalloc(sizeof(struct kbase_debug_coresight_csf_config), GFP_KERNEL); - if (WARN_ON(!client)) + if (WARN_ON(!config)) return NULL; config->client = client; diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h index 18520db15502..04da9c8b9057 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -212,6 +212,9 @@ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START), KBASE_KTRACE_CODE_MAKE_CODE(SCHED_BUSY), KBASE_KTRACE_CODE_MAKE_CODE(SCHED_INACTIVE), KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SUSPENDED), KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SLEEPING), + /* info_val == true if FW Sleep-on-Idle is enabled, false otherwise */ + KBASE_KTRACE_CODE_MAKE_CODE(FIRMWARE_SLEEP_ON_IDLE_CHANGED), + /* info_val = mcu state */ #define KBASEP_MCU_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_MCU_##n), #include "backend/gpu/mali_kbase_pm_mcu_states.h" diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c index ec5ca10e135b..b14ffc69c54c 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,8 +27,8 @@ void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) { - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), - "group,slot,prio,csi,kcpu"), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + "group,slot,prio,csi,kcpu"), 0); } @@ -44,38 +44,39 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, char * if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_GROUP) { const s8 slot = be_msg->gpu.csg_nr; /* group,slot, */ - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%u,%d,", - be_msg->gpu.group_handle, slot), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + "%u,%d,", be_msg->gpu.group_handle, slot), 0); /* prio */ if (slot >= 0) - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), - "%u", be_msg->gpu.slot_prio), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + "%u", be_msg->gpu.slot_prio), 0); /* , */ - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0); + *written += + MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0); } else { /* No group,slot,prio fields, but ensure ending with "," */ *written += - MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ",,,"), 0); + MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ",,,"), 0); } /* queue parts: csi */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_CSF_QUEUE) - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d", - be_msg->gpu.csi_index), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d", + be_msg->gpu.csi_index), 0); /* , */ - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0); + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0); if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_KCPU) { /* kcpu data */ - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), - "kcpu %d (0x%llx)", be_msg->kcpu.id, - be_msg->kcpu.extra_info_val), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + "kcpu %d (0x%llx)", be_msg->kcpu.id, + be_msg->kcpu.extra_info_val), 0); } diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c index beac074f2035..39306e7d45e3 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,8 +27,8 @@ void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) { - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), - "katom,gpu_addr,jobslot,refcount"), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + "katom,gpu_addr,jobslot,refcount"), 0); } @@ -37,34 +37,34 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, char * { /* katom */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_ATOM) - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), - "atom %u (ud: 0x%llx 0x%llx)", - trace_msg->backend.gpu.atom_number, - trace_msg->backend.gpu.atom_udata[0], - trace_msg->backend.gpu.atom_udata[1]), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + "atom %u (ud: 0x%llx 0x%llx)", + trace_msg->backend.gpu.atom_number, + trace_msg->backend.gpu.atom_udata[0], + trace_msg->backend.gpu.atom_udata[1]), 0); /* gpu_addr */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_BACKEND) - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), - ",%.8llx,", trace_msg->backend.gpu.gpu_addr), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + ",%.8llx,", trace_msg->backend.gpu.gpu_addr), 0); else *written += - MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ",,"), 0); + MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ",,"), 0); /* jobslot */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT) - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d", - trace_msg->backend.gpu.jobslot), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d", + trace_msg->backend.gpu.jobslot), 0); - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0); + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0); /* refcount */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT) - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d", - trace_msg->backend.gpu.refcount), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d", + trace_msg->backend.gpu.refcount), 0); } diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h index 0b0de2385f85..7c40f472a78b 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -69,6 +69,7 @@ DEFINE_MALI_ADD_EVENT(SCHED_BUSY); DEFINE_MALI_ADD_EVENT(SCHED_INACTIVE); DEFINE_MALI_ADD_EVENT(SCHED_SUSPENDED); DEFINE_MALI_ADD_EVENT(SCHED_SLEEPING); +DEFINE_MALI_ADD_EVENT(FIRMWARE_SLEEP_ON_IDLE_CHANGED); #define KBASEP_MCU_STATE(n) DEFINE_MALI_ADD_EVENT(PM_MCU_##n); #include "backend/gpu/mali_kbase_pm_mcu_states.h" #undef KBASEP_MCU_STATE diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c index 0842460bc08a..036d1f5968f6 100644 --- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,15 +71,15 @@ static const char *const kbasep_ktrace_code_string[] = { static void kbasep_ktrace_format_header(char *buffer, int sz, s32 written) { - written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), - "secs,thread_id,cpu,code,kctx,"), + written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), + "secs,thread_id,cpu,code,kctx,"), 0); kbasep_ktrace_backend_format_header(buffer, sz, &written); - written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), - ",info_val,ktrace_version=%u.%u", KBASE_KTRACE_VERSION_MAJOR, - KBASE_KTRACE_VERSION_MINOR), + written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), + ",info_val,ktrace_version=%u.%u", KBASE_KTRACE_VERSION_MAJOR, + KBASE_KTRACE_VERSION_MINOR), 0); buffer[sz - 1] = 0; @@ -93,21 +93,21 @@ static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, char *b * * secs,thread_id,cpu,code, */ - written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), "%d.%.6d,%d,%d,%s,", - (int)trace_msg->timestamp.tv_sec, - (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, - trace_msg->cpu, - kbasep_ktrace_code_string[trace_msg->backend.gpu.code]), + written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), + "%d.%.6d,%d,%d,%s,", (int)trace_msg->timestamp.tv_sec, + (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, + trace_msg->cpu, + kbasep_ktrace_code_string[trace_msg->backend.gpu.code]), 0); /* kctx part: */ if (trace_msg->kctx_tgid) { - written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), "%d_%u", - trace_msg->kctx_tgid, trace_msg->kctx_id), + written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), "%d_%u", + trace_msg->kctx_tgid, trace_msg->kctx_id), 0); } /* Trailing comma */ - written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), ","), 0); + written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), ","), 0); /* Backend parts */ kbasep_ktrace_backend_format_msg(trace_msg, buffer, sz, &written); @@ -119,8 +119,8 @@ static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, char *b * Note that the last column is empty, it's simply to hold the ktrace * version in the header */ - written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), ",0x%.16llx", - (unsigned long long)trace_msg->info_val), + written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), ",0x%.16llx", + (unsigned long long)trace_msg->info_val), 0); buffer[sz - 1] = 0; } diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h index 991f70fe8540..d40eec013cb5 100644 --- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -149,13 +149,17 @@ KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RETAIN_CTX_NOLOCK), /* info_val == kctx->refcount */ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RELEASE_CTX), -#ifdef CONFIG_MALI_ARBITER_SUPPORT /* * Arbitration events */ - KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_LOST), KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_STATE), + KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_STATE), KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_EVT), -#endif + KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_GRANTED), + KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_LOST), + KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_STARTED), + KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_STOP_REQUESTED), + KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_STOPPED), + KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_REQUESTED), #if MALI_USE_CSF #include "debug/backend/mali_kbase_debug_ktrace_codes_csf.h" diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h index 1ebddfa3f44f..acc78eb5b0b2 100644 --- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,13 +95,16 @@ DEFINE_MALI_ADD_EVENT(PM_RUNTIME_RESUME_CALLBACK); #undef KBASEP_L2_STATE DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK); DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX); -#ifdef CONFIG_MALI_ARBITER_SUPPORT -DEFINE_MALI_ADD_EVENT(ARB_GPU_LOST); DEFINE_MALI_ADD_EVENT(ARB_VM_STATE); DEFINE_MALI_ADD_EVENT(ARB_VM_EVT); +DEFINE_MALI_ADD_EVENT(ARB_GPU_GRANTED); +DEFINE_MALI_ADD_EVENT(ARB_GPU_LOST); +DEFINE_MALI_ADD_EVENT(ARB_GPU_STARTED); +DEFINE_MALI_ADD_EVENT(ARB_GPU_STOP_REQUESTED); +DEFINE_MALI_ADD_EVENT(ARB_GPU_STOPPED); +DEFINE_MALI_ADD_EVENT(ARB_GPU_REQUESTED); -#endif #if MALI_USE_CSF #include "backend/mali_kbase_debug_linux_ktrace_csf.h" #else diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c index 52aa63330afe..218022ac3186 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -132,11 +132,15 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) fail_update_l2_features: kbase_backend_devfreq_term(kbdev); -fail_devfreq_init: - kbasep_pm_metrics_term(kbdev); -fail_pm_metrics_init: - kbase_ipa_control_term(kbdev); +fail_devfreq_init: + { + kbasep_pm_metrics_term(kbdev); + } +fail_pm_metrics_init: + { + kbase_ipa_control_term(kbdev); + } #ifdef CONFIG_MALI_BIFROST_DEBUG #if IS_ENABLED(CONFIG_MALI_REAL_HW) fail_interrupt_test: @@ -159,9 +163,11 @@ fail_reset_gpu_init: */ static void kbase_backend_late_term(struct kbase_device *kbdev) { - kbase_backend_devfreq_term(kbdev); - kbasep_pm_metrics_term(kbdev); - kbase_ipa_control_term(kbdev); + { + kbase_backend_devfreq_term(kbdev); + kbasep_pm_metrics_term(kbdev); + kbase_ipa_control_term(kbdev); + } kbase_hwaccess_pm_halt(kbdev); kbase_reset_gpu_term(kbdev); kbase_hwaccess_pm_term(kbdev); @@ -279,10 +285,8 @@ static const struct kbase_device_init dev_init[] = { { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { kbase_get_irqs, NULL, "IRQ search failed" }, -#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ -#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) { registers_map, registers_unmap, "Register map failed" }, -#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) { kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" }, #endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */ diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c index ab9df01610ab..3b27b87657a5 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,6 +28,16 @@ #include #include #include +#include + +bool kbase_is_gpu_removed(struct kbase_device *kbdev) +{ + if (!kbase_has_arbiter(kbdev)) + return false; + + + return (KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0); +} /** * kbase_report_gpu_fault - Report a GPU fault of the device. @@ -78,6 +88,7 @@ static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev) void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) { u32 power_changed_mask = (POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ); + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); @@ -85,7 +96,6 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) kbase_gpu_fault_interrupt(kbdev); if (val & GPU_PROTECTED_FAULT) { - struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; unsigned long flags; dev_err_ratelimited(kbdev->dev, "GPU fault in protected mode"); @@ -139,10 +149,33 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) unsigned long flags; dev_dbg(kbdev->dev, "Doorbell mirror interrupt received"); + + /* Assume that the doorbell comes from userspace which + * presents new works in order to invalidate a possible GPU + * idle event. + * If the doorbell was raised by KBase then the FW would handle + * the pending doorbell then raise a 2nd GBL_IDLE IRQ which + * would allow us to put the GPU to sleep. + */ + atomic_set(&scheduler->gpu_no_longer_idle, true); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_disable_db_mirror_interrupt(kbdev); - kbdev->pm.backend.exit_gpu_sleep_mode = true; - kbase_csf_scheduler_invoke_tick(kbdev); + + if (likely(kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP)) { + kbdev->pm.backend.exit_gpu_sleep_mode = true; + kbase_csf_scheduler_invoke_tick(kbdev); + } else if (likely(test_bit(KBASE_GPU_SUPPORTS_FW_SLEEP_ON_IDLE, + &kbdev->pm.backend.gpu_sleep_allowed)) && + (kbdev->pm.backend.mcu_state != KBASE_MCU_ON_PEND_SLEEP)) { + /* The firmware is going to sleep on its own but new + * doorbells were rung before we manage to handle + * the GLB_IDLE IRQ in the bottom half. We shall enable + * DB notification to allow the DB to be handled by FW. + */ + dev_dbg(kbdev->dev, "Re-enabling MCU immediately following DB_MIRROR IRQ"); + kbase_pm_enable_mcu_db_notification(kbdev); + } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } #endif @@ -169,10 +202,13 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) * cores. */ if (kbdev->pm.backend.l2_always_on || - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) + kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_921)) kbase_pm_power_changed(kbdev); } + if (val & MCU_STATUS_GPU_IRQ) + wake_up_all(&kbdev->csf.event_wait); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); } KBASE_EXPORT_TEST_API(kbase_gpu_interrupt); diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c index f971b3b939df..4dd9a228aa11 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,6 +28,14 @@ #include #include +bool kbase_is_gpu_removed(struct kbase_device *kbdev) +{ + if (!kbase_has_arbiter(kbdev)) + return false; + + return (KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0); +} + /** * kbase_report_gpu_fault - Report a GPU fault. * @kbdev: Kbase device pointer @@ -95,7 +103,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) * cores. */ if (kbdev->pm.backend.l2_always_on || - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) + kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_921)) kbase_pm_power_changed(kbdev); } diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c index ab46f858a542..8cdf26e28ac6 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,10 +32,6 @@ #include #include -#ifdef CONFIG_MALI_ARBITER_SUPPORT -#include -#endif - #include #include #include @@ -217,16 +213,14 @@ static const struct kbase_device_init dev_init[] = { { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { kbase_get_irqs, NULL, "IRQ search failed" }, -#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ -#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) { registers_map, registers_unmap, "Register map failed" }, -#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) { kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" }, #endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */ + { power_control_init, power_control_term, "Power control initialization failed" }, { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, - { kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" }, { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, { kbase_backend_time_init, NULL, "Time backend initialization failed" }, { kbase_device_misc_init, kbase_device_misc_term, diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c index b191c758c62f..e5bed33d1129 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,10 +51,7 @@ #include "backend/gpu/mali_kbase_irq_internal.h" #include "mali_kbase_regs_history_debugfs.h" #include "mali_kbase_pbha.h" - -#ifdef CONFIG_MALI_ARBITER_SUPPORT #include "arbiter/mali_kbase_arbiter_pm.h" -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ #if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) @@ -69,6 +66,22 @@ static DEFINE_MUTEX(kbase_dev_list_lock); static LIST_HEAD(kbase_dev_list); static unsigned int kbase_dev_nr; +static unsigned int mma_wa_id; + +static int set_mma_wa_id(const char *val, const struct kernel_param *kp) +{ + return kbase_param_set_uint_minmax(val, kp, 1, 15); +} + +static const struct kernel_param_ops mma_wa_id_ops = { + .set = set_mma_wa_id, + .get = param_get_uint, +}; + +module_param_cb(mma_wa_id, &mma_wa_id_ops, &mma_wa_id, 0444); +__MODULE_PARM_TYPE(mma_wa_id, "uint"); +MODULE_PARM_DESC(mma_wa_id, "PBHA ID for MMA workaround. Valid range is from 1 to 15."); + struct kbase_device *kbase_device_alloc(void) { return vzalloc(sizeof(struct kbase_device)); @@ -320,6 +333,10 @@ int kbase_device_misc_init(struct kbase_device *const kbdev) if (err) goto dma_set_mask_failed; + /* Set mma_wa_id if it has been passed in as a module parameter */ + if ((kbdev->gpu_props.gpu_id.arch_id >= GPU_ID_ARCH_MAKE(14, 8, 0)) && mma_wa_id != 0) + kbdev->mma_wa_id = mma_wa_id; + err = kbase_pbha_read_dtb(kbdev); if (err) goto term_as; @@ -556,14 +573,27 @@ int kbase_device_early_init(struct kbase_device *kbdev) /* Ensure we can access the GPU registers */ kbase_pm_register_access_enable(kbdev); - /* Initialize GPU_ID props */ - kbase_gpuprops_parse_gpu_id(&kbdev->gpu_props.gpu_id, kbase_reg_get_gpu_id(kbdev)); - - /* Initialize register mapping LUTs */ - err = kbase_regmap_init(kbdev); - if (err) + /* + * If -EPERM is returned, it means the device backend is not supported, but + * device initialization can continue. + */ + err = kbase_device_backend_init(kbdev); + if (err != 0 && err != -EPERM) goto pm_runtime_term; + /* + * Initialize register mapping LUTs. This would have been initialized on HW + * Arbitration but not on PV or non-arbitration devices. + */ + if (!kbase_reg_is_init(kbdev)) { + /* Initialize GPU_ID props */ + kbase_gpuprops_parse_gpu_id(&kbdev->gpu_props.gpu_id, kbase_reg_get_gpu_id(kbdev)); + + err = kbase_regmap_init(kbdev); + if (err) + goto backend_term; + } + /* Set the list of features available on the current HW * (identified by the GPU_ID register) */ @@ -572,7 +602,7 @@ int kbase_device_early_init(struct kbase_device *kbdev) /* Find out GPU properties based on the GPU feature registers. */ err = kbase_gpuprops_init(kbdev); if (err) - goto regmap_term; + goto backend_term; /* Get the list of workarounds for issues on the current HW * (identified by the GPU_ID register and impl_tech in THREAD_FEATURES) @@ -584,14 +614,12 @@ int kbase_device_early_init(struct kbase_device *kbdev) /* We're done accessing the GPU registers for now. */ kbase_pm_register_access_disable(kbdev); -#ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbdev->arb.arb_if) - err = kbase_arbiter_pm_install_interrupts(kbdev); - else + if (kbase_has_arbiter(kbdev)) { + if (kbdev->pm.arb_vm_state) + err = kbase_arbiter_pm_install_interrupts(kbdev); + } else { err = kbase_install_interrupts(kbdev); -#else - err = kbase_install_interrupts(kbdev); -#endif + } if (err) goto gpuprops_term; @@ -599,9 +627,13 @@ int kbase_device_early_init(struct kbase_device *kbdev) gpuprops_term: kbase_gpuprops_term(kbdev); -regmap_term: +backend_term: + kbase_device_backend_term(kbdev); kbase_regmap_term(kbdev); pm_runtime_term: + if (kbdev->pm.backend.gpu_powered) + kbase_pm_register_access_disable(kbdev); + kbase_pm_runtime_term(kbdev); platform_device_term: kbasep_platform_device_term(kbdev); @@ -613,15 +645,13 @@ ktrace_term: void kbase_device_early_term(struct kbase_device *kbdev) { -#ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbdev->arb.arb_if) + if (kbase_has_arbiter(kbdev)) kbase_arbiter_pm_release_interrupts(kbdev); else kbase_release_interrupts(kbdev); -#else - kbase_release_interrupts(kbdev); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ kbase_gpuprops_term(kbdev); + kbase_device_backend_term(kbdev); + kbase_regmap_term(kbdev); kbase_pm_runtime_term(kbdev); kbasep_platform_device_term(kbdev); kbase_ktrace_term(kbdev); diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h index 9cca6aff4554..1b15ff059194 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,6 +58,9 @@ void kbase_increment_device_id(void); * When a device file is opened for the first time, * load firmware and initialize hardware counter components. * + * It is safe for this function to be called multiple times without ill + * effects. Only the first call would be effective. + * * Return: 0 on success. An error code on failure. */ int kbase_device_firmware_init_once(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c index da597af9c46e..91379ac6429d 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,14 +27,6 @@ #include #include -bool kbase_is_gpu_removed(struct kbase_device *kbdev) -{ - if (!IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT)) - return false; - - return (kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0); -} - /** * busy_wait_cache_operation - Wait for a pending cache flush to complete * diff --git a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c index ca1ccbfb3dbe..9993b787ed21 100644 --- a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c +++ b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,7 +46,7 @@ u32 kbase_reg_read32(struct kbase_device *kbdev, u32 reg_enum) u32 val = 0; u32 offset; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return 0; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_32_BIT))) @@ -68,7 +68,7 @@ u64 kbase_reg_read64(struct kbase_device *kbdev, u32 reg_enum) u32 val32[2] = { 0 }; u32 offset; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return 0; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) @@ -91,7 +91,7 @@ u64 kbase_reg_read64_coherent(struct kbase_device *kbdev, u32 reg_enum) u32 hi1 = 0, hi2 = 0, lo = 0; u32 offset; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return 0; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) @@ -116,7 +116,7 @@ void kbase_reg_write32(struct kbase_device *kbdev, u32 reg_enum, u32 value) unsigned long flags; u32 offset; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_32_BIT))) @@ -135,7 +135,7 @@ void kbase_reg_write64(struct kbase_device *kbdev, u32 reg_enum, u64 value) unsigned long flags; u32 offset; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_64_BIT))) diff --git a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c index f4afbf55e312..ecf58cb45d15 100644 --- a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c +++ b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,12 +24,13 @@ #include #include +#include u64 kbase_reg_get_gpu_id(struct kbase_device *kbdev) { u32 val[2] = { 0 }; - val[0] = readl(kbdev->reg); + val[0] = mali_readl(kbdev->reg); return (u64)val[0] | ((u64)val[1] << 32); @@ -39,13 +40,13 @@ u32 kbase_reg_read32(struct kbase_device *kbdev, u32 reg_enum) { u32 val; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return 0; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_32_BIT))) return 0; - val = readl(kbdev->regmap.regs[reg_enum]); + val = mali_readl(kbdev->regmap.regs[reg_enum]); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) @@ -63,14 +64,13 @@ u64 kbase_reg_read64(struct kbase_device *kbdev, u32 reg_enum) { u64 val; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return 0; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) return 0; - val = (u64)readl(kbdev->regmap.regs[reg_enum]) | - ((u64)readl(kbdev->regmap.regs[reg_enum] + 4) << 32); + val = mali_readq(kbdev->regmap.regs[reg_enum]); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) { @@ -90,23 +90,14 @@ KBASE_EXPORT_TEST_API(kbase_reg_read64); u64 kbase_reg_read64_coherent(struct kbase_device *kbdev, u32 reg_enum) { u64 val; -#if !IS_ENABLED(CONFIG_MALI_64BIT_HW_ACCESS) - u32 hi1, hi2, lo; -#endif - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return 0; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) return 0; - do { - hi1 = readl(kbdev->regmap.regs[reg_enum] + 4); - lo = readl(kbdev->regmap.regs[reg_enum]); - hi2 = readl(kbdev->regmap.regs[reg_enum] + 4); - } while (hi1 != hi2); - - val = lo | (((u64)hi1) << 32); + val = mali_readq_coherent(kbdev->regmap.regs[reg_enum]); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) { @@ -125,13 +116,13 @@ KBASE_EXPORT_TEST_API(kbase_reg_read64_coherent); void kbase_reg_write32(struct kbase_device *kbdev, u32 reg_enum, u32 value) { - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_32_BIT))) return; - writel(value, kbdev->regmap.regs[reg_enum]); + mali_writel(value, kbdev->regmap.regs[reg_enum]); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) @@ -145,14 +136,13 @@ KBASE_EXPORT_TEST_API(kbase_reg_write32); void kbase_reg_write64(struct kbase_device *kbdev, u32 reg_enum, u64 value) { - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_64_BIT))) return; - writel(value & 0xFFFFFFFF, kbdev->regmap.regs[reg_enum]); - writel(value >> 32, kbdev->regmap.regs[reg_enum] + 4); + mali_writeq(value, kbdev->regmap.regs[reg_enum]); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) { diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c index 16a27c780d3b..d7dd6200d497 100644 --- a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c +++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,9 +24,56 @@ #include #include "mali_kbase_hw_access.h" +#include "mali_kbase_hw_access_regmap.h" #include +#define KBASE_REGMAP_ACCESS_ALWAYS_POWERED (1U << 16) + +static u32 always_powered_regs[] = { + +#if !MALI_USE_CSF + PTM_AW_IRQ_CLEAR, + PTM_AW_IRQ_INJECTION, + PTM_AW_IRQ_MASK, + PTM_AW_IRQ_RAWSTAT, + PTM_AW_IRQ_STATUS, + PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0, + PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1, + PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0, + PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1, + PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS, + PTM_ID, +#endif /* !MALI_USE_CSF */ +}; + +static void kbasep_reg_setup_always_powered_registers(struct kbase_device *kbdev) +{ + u32 i; + + +#if !MALI_USE_CSF + if (kbdev->gpu_props.gpu_id.arch_id < GPU_ID_ARCH_MAKE(9, 14, 0)) + return; +#endif /* MALI_USE_CSF */ + + for (i = 0; i < ARRAY_SIZE(always_powered_regs); i++) { + u32 reg_enum = always_powered_regs[i]; + + if (!kbase_reg_is_valid(kbdev, reg_enum)) + continue; + + kbdev->regmap.flags[reg_enum] |= KBASE_REGMAP_ACCESS_ALWAYS_POWERED; + } +} + +bool kbase_reg_is_powered_access_allowed(struct kbase_device *kbdev, u32 reg_enum) +{ + if (kbdev->regmap.flags[reg_enum] & KBASE_REGMAP_ACCESS_ALWAYS_POWERED) + return true; + return kbdev->pm.backend.gpu_powered; +} + bool kbase_reg_is_size64(struct kbase_device *kbdev, u32 reg_enum) { if (WARN_ON(reg_enum >= kbdev->regmap.size)) @@ -67,6 +114,11 @@ bool kbase_reg_is_accessible(struct kbase_device *kbdev, u32 reg_enum, u32 flags return true; } +bool kbase_reg_is_init(struct kbase_device *kbdev) +{ + return (kbdev->regmap.regs != NULL) && (kbdev->regmap.flags != NULL); +} + int kbase_reg_get_offset(struct kbase_device *kbdev, u32 reg_enum, u32 *offset) { if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, 0))) @@ -108,12 +160,12 @@ int kbase_regmap_init(struct kbase_device *kbdev) return -ENOMEM; } + kbasep_reg_setup_always_powered_registers(kbdev); + dev_info(kbdev->dev, "Register LUT %08x initialized for GPU arch 0x%08x\n", lut_arch_id, kbdev->gpu_props.gpu_id.arch_id); -#if IS_ENABLED(CONFIG_MALI_64BIT_HW_ACCESS) && IS_ENABLED(CONFIG_MALI_REAL_HW) - dev_info(kbdev->dev, "64-bit HW access enabled\n"); -#endif + return 0; } diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h index 40356596163d..654fb685fa06 100644 --- a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h +++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -128,6 +128,25 @@ bool kbase_reg_is_valid(struct kbase_device *kbdev, u32 reg_enum); */ bool kbase_reg_is_accessible(struct kbase_device *kbdev, u32 reg_enum, u32 flags); +/** + * kbase_reg_is_powered_access_allowed - check if registered is accessible given + * current power state + * + * @kbdev: Kbase device pointer + * @reg_enum: Register enum + * + * Return: boolean if register is accessible + */ +bool kbase_reg_is_powered_access_allowed(struct kbase_device *kbdev, u32 reg_enum); + +/** + * kbase_reg_is_init - check if regmap is initialized + * + * @kbdev: Kbase device pointer + * Return: boolean if regmap is initialized + */ +bool kbase_reg_is_init(struct kbase_device *kbdev); + /** * kbase_reg_get_offset - get register offset from enum * @kbdev: Kbase device pointer diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h index 97adb1322a35..591391c6a8a1 100644 --- a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h +++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -308,6 +308,16 @@ #define TC_CLOCK_GATE_OVERRIDE (1ul << 0) /* End TILER_CONFIG register */ +/* L2_FEATURES register */ +#define L2_FEATURES_CACHE_SIZE_SHIFT GPU_U(16) +#define L2_FEATURES_CACHE_SIZE_MASK (GPU_U(0xFF) << L2_FEATURES_CACHE_SIZE_SHIFT) +#define L2_FEATURES_CACHE_SIZE_GET(reg_val) \ + (((reg_val)&L2_FEATURES_CACHE_SIZE_MASK) >> L2_FEATURES_CACHE_SIZE_SHIFT) +#define L2_FEATURES_CACHE_SIZE_SET(reg_val, value) \ + (~(~(reg_val) | L2_FEATURES_CACHE_SIZE_MASK) | \ + (((value) << L2_FEATURES_CACHE_SIZE_SHIFT) & L2_FEATURES_CACHE_SIZE_MASK)) +/* End L2_FEATURES register */ + /* L2_CONFIG register */ #define L2_CONFIG_SIZE_SHIFT 16 #define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap_legacy.h b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap_legacy.h index a62d1707ebb7..9392d44f684b 100644 --- a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap_legacy.h +++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap_legacy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h index c3d12ad04c4e..e0568d8f8c6d 100644 --- a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -185,6 +185,7 @@ */ #define AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SHARED 0x0 + /* CSF_CONFIG register */ #define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2 diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c index 178d45501916..4f41693ff3c2 100644 --- a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -2240,6 +2240,56 @@ static void kbase_regmap_v9_2_init(struct kbase_device *kbdev) kbdev->regmap.regs[GPU_CONTROL__L2_CONFIG] = kbdev->reg + 0x48; } +static void kbase_regmap_v9_14_init(struct kbase_device *kbdev) +{ + if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) { + kbdev->regmap.size = NR_V9_14_REGS; + kbdev->regmap.regs = + kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL); + kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL); + } + + if (WARN_ON(kbdev->regmap.regs == NULL)) + return; + if (WARN_ON(kbdev->regmap.flags == NULL)) + return; + + kbase_regmap_v9_2_init(kbdev); + + kbdev->regmap.flags[PTM_AW_IRQ_CLEAR] = KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[PTM_AW_IRQ_INJECTION] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[PTM_AW_IRQ_MASK] = KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[PTM_AW_IRQ_RAWSTAT] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[PTM_AW_IRQ_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[PTM_ID] = KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ; + + kbdev->regmap.regs[PTM_AW_IRQ_CLEAR] = kbdev->reg + 0x1ffc8; + kbdev->regmap.regs[PTM_AW_IRQ_INJECTION] = kbdev->reg + 0x1ffd4; + kbdev->regmap.regs[PTM_AW_IRQ_MASK] = kbdev->reg + 0x1ffcc; + kbdev->regmap.regs[PTM_AW_IRQ_RAWSTAT] = kbdev->reg + 0x1ffc4; + kbdev->regmap.regs[PTM_AW_IRQ_STATUS] = kbdev->reg + 0x1ffd0; + kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0] = kbdev->reg + 0x1ffd8; + kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1] = kbdev->reg + 0x1ffdc; + kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0] = kbdev->reg + 0x1ffe4; + kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1] = kbdev->reg + 0x1ffe8; + kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS] = kbdev->reg + 0x1ffe0; + kbdev->regmap.regs[PTM_ID] = kbdev->reg + 0x1ffc0; +} + u32 kbase_regmap_backend_init(struct kbase_device *kbdev) { int i = 0; @@ -2254,6 +2304,7 @@ u32 kbase_regmap_backend_init(struct kbase_device *kbdev) { GPU_ID_ARCH_MAKE(7, 2, 0), kbase_regmap_v7_2_init }, { GPU_ID_ARCH_MAKE(9, 0, 0), kbase_regmap_v9_0_init }, { GPU_ID_ARCH_MAKE(9, 2, 0), kbase_regmap_v9_2_init }, + { GPU_ID_ARCH_MAKE(9, 14, 0), kbase_regmap_v9_14_init }, }; for (i = 0; i < ARRAY_SIZE(init_array) - 1; i++) { @@ -2967,6 +3018,18 @@ static char *enum_strings[] = { [GPU_CONTROL__CORE_FEATURES] = "GPU_CONTROL__CORE_FEATURES", [GPU_CONTROL__THREAD_TLS_ALLOC] = "GPU_CONTROL__THREAD_TLS_ALLOC", [GPU_CONTROL__L2_CONFIG] = "GPU_CONTROL__L2_CONFIG", + [PTM_AW_IRQ_CLEAR] = "PTM_AW_IRQ_CLEAR", + [PTM_AW_IRQ_INJECTION] = "PTM_AW_IRQ_INJECTION", + [PTM_AW_IRQ_MASK] = "PTM_AW_IRQ_MASK", + [PTM_AW_IRQ_RAWSTAT] = "PTM_AW_IRQ_RAWSTAT", + [PTM_AW_IRQ_STATUS] = "PTM_AW_IRQ_STATUS", + [PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0] = "PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0", + [PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1] = "PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1", + [PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0] = "PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0", + [PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1] = "PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1", + [PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS] = + "PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS", + [PTM_ID] = "PTM_ID", }; const char *kbase_reg_get_enum_string(u32 reg_enum) diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h index f5618c4794db..59d8745eaf4a 100644 --- a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -759,4 +759,19 @@ enum kbase_regmap_enum_v9_2 { NR_V9_2_REGS, }; +enum kbase_regmap_enum_v9_14 { + PTM_AW_IRQ_CLEAR = NR_V9_2_REGS, /* (RW) 32-bit 0x1FFC8 */ + PTM_AW_IRQ_INJECTION, /* (RW) 32-bit 0x1FFD4 */ + PTM_AW_IRQ_MASK, /* (RW) 32-bit 0x1FFCC */ + PTM_AW_IRQ_RAWSTAT, /* (RO) 32-bit 0x1FFC4 */ + PTM_AW_IRQ_STATUS, /* (RO) 32-bit 0x1FFD0 */ + PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0, /* (RO) 32-bit 0x1FFD8 */ + PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1, /* (RO) 32-bit 0x1FFDC */ + PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0, /* (RW) 32-bit 0x1FFE4 */ + PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1, /* (RW) 32-bit 0x1FFE8 */ + PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS, /* (RO) 32-bit 0x1FFE0 */ + PTM_ID, /* (RO) 32-bit 0x1FFC0 */ + NR_V9_14_REGS, +}; + #endif /* _MALI_KBASE_REGMAP_JM_ENUMS_H_ */ diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h index c3bc0f3e9924..650ed9b31eea 100644 --- a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -47,6 +47,8 @@ #define MMU_AS_OFFSET(n, regname) ENUM_OFFSET(n, MMU_AS_ENUM(0, regname), MMU_AS_ENUM(1, regname)) #define MMU_AS_BASE_OFFSET(n) MMU_AS_OFFSET(n, TRANSTAB) +#define PTM_AW_MESSAGE_ENUM(regname) PTM_AW_MESSAGE__##regname + /* register value macros */ /* GPU_STATUS values */ #define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ @@ -295,4 +297,11 @@ (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED | POWER_CHANGED_ALL | \ PRFCNT_SAMPLE_COMPLETED) +#define WINDOW_IRQ_MESSAGE (1U << 0) +#define WINDOW_IRQ_INVALID_ACCESS (1U << 1) +#define WINDOW_IRQ_GPU (1U << 2) +#define WINDOW_IRQ_JOB (1U << 3) +#define WINDOW_IRQ_MMU (1U << 4) +#define WINDOW_IRQ_EVENT (1U << 5) + #endif /* _MALI_KBASE_REGMAP_JM_MACROS_H_ */ diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h index cc3ba98ab6fe..a6d418b8e82c 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -78,6 +78,18 @@ typedef int kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *i */ typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend); +/** + * typedef kbase_hwcnt_backend_acquire_fn - Enable counter collection. + * @backend: Non-NULL pointer to backend interface. + */ +typedef void kbase_hwcnt_backend_acquire_fn(const struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_release_fn - Disable counter collection. + * @backend: Non-NULL pointer to backend interface. + */ +typedef void kbase_hwcnt_backend_release_fn(const struct kbase_hwcnt_backend *backend); + /** * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend * timestamp. @@ -206,6 +218,10 @@ typedef int kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend, * metadata. * @init: Function ptr to initialise an instance of the backend. * @term: Function ptr to terminate an instance of the backend. + * @acquire: Callback to indicate that counter collection has + * been enabled. + * @release: Callback to indicate that counter collection has + * been disabled. * @timestamp_ns: Function ptr to get the current backend timestamp. * @dump_enable: Function ptr to enable dumping. * @dump_enable_nolock: Function ptr to enable dumping while the @@ -222,6 +238,8 @@ struct kbase_hwcnt_backend_interface { kbase_hwcnt_backend_metadata_fn *metadata; kbase_hwcnt_backend_init_fn *init; kbase_hwcnt_backend_term_fn *term; + kbase_hwcnt_backend_acquire_fn *acquire; + kbase_hwcnt_backend_release_fn *release; kbase_hwcnt_backend_timestamp_ns_fn *timestamp_ns; kbase_hwcnt_backend_dump_enable_fn *dump_enable; kbase_hwcnt_backend_dump_enable_nolock_fn *dump_enable_nolock; diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c index d605253752ca..b937c047a94a 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,6 @@ #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h" #include "hwcnt/mali_kbase_hwcnt_gpu.h" -#include "hwcnt/mali_kbase_hwcnt_types.h" #include #include @@ -31,6 +30,7 @@ #include #include #include +#include #ifndef BASE_MAX_NR_CLOCKS_REGULATORS #define BASE_MAX_NR_CLOCKS_REGULATORS 4 @@ -255,7 +255,8 @@ struct kbase_hwcnt_csf_physical_layout { * @hwc_threshold_work: Worker for consuming available samples when * threshold interrupt raised. * @num_l2_slices: Current number of L2 slices allocated to the GPU. - * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. + * @powered_shader_core_mask: The common mask between the debug_core_mask + * and the shader_present_bitmap. */ struct kbase_hwcnt_backend_csf { struct kbase_hwcnt_backend_csf_info *info; @@ -283,7 +284,7 @@ struct kbase_hwcnt_backend_csf { struct work_struct hwc_dump_work; struct work_struct hwc_threshold_work; size_t num_l2_slices; - u64 shader_present_bitmap; + u64 powered_shader_core_mask; }; static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info) @@ -296,9 +297,11 @@ static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_c } void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface, - size_t num_l2_slices, u64 shader_present_bitmap) + size_t num_l2_slices, u64 shader_present, + u64 power_core_mask) { struct kbase_hwcnt_backend_csf_info *csf_info; + u64 norm_shader_present = power_core_mask & shader_present; if (!iface) return; @@ -309,16 +312,17 @@ void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_inte if (!csf_info || !csf_info->backend) return; + if (WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED)) return; if (WARN_ON(num_l2_slices > csf_info->backend->phys_layout.mmu_l2_cnt) || - WARN_ON((shader_present_bitmap & csf_info->backend->phys_layout.shader_avail_mask) != - shader_present_bitmap)) + WARN_ON((norm_shader_present & csf_info->backend->phys_layout.shader_avail_mask) != + norm_shader_present)) return; csf_info->backend->num_l2_slices = num_l2_slices; - csf_info->backend->shader_present_bitmap = shader_present_bitmap; + csf_info->backend->powered_shader_core_mask = norm_shader_present; } /** @@ -424,7 +428,7 @@ static void kbasep_hwcnt_backend_csf_init_layout( WARN_ON(!prfcnt_info); WARN_ON(!phys_layout); - shader_core_cnt = (size_t)fls64(prfcnt_info->core_mask); + shader_core_cnt = (size_t)fls64(prfcnt_info->sc_core_mask); values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; fw_block_cnt = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); hw_block_cnt = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); @@ -445,7 +449,7 @@ static void kbasep_hwcnt_backend_csf_init_layout( .fw_block_cnt = fw_block_cnt, .hw_block_cnt = hw_block_cnt, .block_cnt = fw_block_cnt + hw_block_cnt, - .shader_avail_mask = prfcnt_info->core_mask, + .shader_avail_mask = prfcnt_info->sc_core_mask, .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .values_per_block = values_per_block, .counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, @@ -454,17 +458,20 @@ static void kbasep_hwcnt_backend_csf_init_layout( } static void -kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf) +kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf, + bool user_bufs) { size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; size_t block_state_bytes = backend_csf->phys_layout.block_cnt * KBASE_HWCNT_BLOCK_STATE_BYTES * KBASE_HWCNT_BLOCK_STATE_STRIDE; - memset(backend_csf->to_user_buf, 0, user_buf_bytes); memset(backend_csf->accum_buf, 0, user_buf_bytes); memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); memset(backend_csf->block_states, 0, block_state_bytes); - memset(backend_csf->to_user_block_states, 0, block_state_bytes); + if (user_bufs) { + memset(backend_csf->to_user_buf, 0, user_buf_bytes); + memset(backend_csf->to_user_block_states, 0, block_state_bytes); + } } static void @@ -517,34 +524,21 @@ static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backe memset(backend_csf->block_states, 0, block_state_bytes); } -/** - * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with - * information from a sample. - * @phys_layout: Physical memory layout information of HWC - * sample buffer. - * @enable_mask: Counter enable mask for the block whose state is being updated. - * @enable_state: The CSF backend internal enabled state. - * @exiting_protm: Whether or not the sample is taken when the GPU is exiting - * protected mode. - * @block_idx: Index of block within the ringbuffer. - * @block_state: Pointer to existing block state of the block whose state is being - * updated. - * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling. - */ -static void kbasep_hwcnt_backend_csf_update_block_state( - const struct kbase_hwcnt_csf_physical_layout *phys_layout, const u32 enable_mask, - enum kbase_hwcnt_backend_csf_enable_state enable_state, bool exiting_protm, - size_t block_idx, blk_stt_t *const block_state, bool fw_in_protected_mode) +void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend, + const u32 enable_mask, bool exiting_protm, + size_t block_idx, blk_stt_t *const block_state, + bool fw_in_protected_mode) { + const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout; /* Offset of shader core blocks from the start of the HW blocks in the sample */ size_t shader_core_block_offset = - (size_t)(phys_layout->hw_block_cnt - phys_layout->shader_cnt); + (size_t)(phys_layout->block_cnt - phys_layout->shader_cnt); bool is_shader_core_block; - is_shader_core_block = block_idx >= shader_core_block_offset; + is_shader_core_block = (block_idx >= shader_core_block_offset); /* Set power bits for the block state for the block, for the sample */ - switch (enable_state) { + switch (backend->enable_state) { /* Disabled states */ case KBASE_HWCNT_BACKEND_CSF_DISABLED: case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: @@ -592,21 +586,45 @@ static void kbasep_hwcnt_backend_csf_update_block_state( KBASE_HWCNT_STATE_NORMAL); else kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_NORMAL); + + /* powered_shader_core_mask stored in the backend is a combination of + * the shader present and the debug core mask, so explicit checking of the + * core mask is not required here. + */ + if (is_shader_core_block) { + u64 current_shader_core = 1ULL << (block_idx - shader_core_block_offset); + + WARN_ON_ONCE(backend->phys_layout.shader_cnt > 64); + + if (current_shader_core & backend->info->backend->powered_shader_core_mask) + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE); + else if (current_shader_core & ~backend->info->backend->powered_shader_core_mask) + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_UNAVAILABLE); + else + WARN_ON_ONCE(true); + } + else + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE); } -static void kbasep_hwcnt_backend_csf_accumulate_sample( - const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, - u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, - blk_stt_t *const block_states, bool clearing_samples, - enum kbase_hwcnt_backend_csf_enable_state enable_state, bool fw_in_protected_mode) +static void kbasep_hwcnt_backend_csf_accumulate_sample(struct kbase_hwcnt_backend_csf *backend, + const u32 *old_sample_buf, + const u32 *new_sample_buf) { + const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout; + const size_t dump_bytes = backend->info->prfcnt_info.dump_bytes; + const size_t values_per_block = phys_layout->values_per_block; + blk_stt_t *const block_states = backend->block_states; + const bool fw_in_protected_mode = backend->info->fw_in_protected_mode; + const bool clearing_samples = backend->info->prfcnt_info.clearing_samples; + u64 *accum_buf = backend->accum_buf; + size_t block_idx; const u32 *old_block = old_sample_buf; const u32 *new_block = new_sample_buf; u64 *acc_block = accum_buf; /* Flag to indicate whether current sample is exiting protected mode. */ bool exiting_protm = false; - const size_t values_per_block = phys_layout->values_per_block; /* The block pointers now point to the first HW block, which is always a CSHW/front-end * block. The counter enable mask for this block can be checked to determine whether this @@ -620,9 +638,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset]; const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset]; /* Update block state with information of the current sample */ - kbasep_hwcnt_backend_csf_update_block_state(phys_layout, new_enable_mask, - enable_state, exiting_protm, block_idx, - &block_states[block_idx], + kbasep_hwcnt_backend_csf_update_block_state(backend, new_enable_mask, exiting_protm, + block_idx, &block_states[block_idx], fw_in_protected_mode); if (!(new_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) { @@ -706,7 +723,6 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base; const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt; const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; - bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; u32 *old_sample_buf = backend_csf->old_sample_buf; u32 *new_sample_buf = old_sample_buf; const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend_csf->phys_layout; @@ -740,10 +756,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; - kbasep_hwcnt_backend_csf_accumulate_sample( - phys_layout, buf_dump_bytes, backend_csf->accum_buf, old_sample_buf, - new_sample_buf, backend_csf->block_states, clearing_samples, - backend_csf->enable_state, backend_csf->info->fw_in_protected_mode); + kbasep_hwcnt_backend_csf_accumulate_sample(backend_csf, old_sample_buf, + new_sample_buf); old_sample_buf = new_sample_buf; } @@ -1215,11 +1229,6 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba backend_csf->ring_buf, 0, backend_csf->info->ring_buf_cnt, false); - /* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare - * for next enable. - */ - kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf); - /* Disabling HWCNT is an indication that blocks have been powered off. This is important to * know for L2, CSHW, and Tiler blocks, as this is currently the only way a backend can * know if they are being powered off. @@ -1255,6 +1264,12 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); } + + /* Reset accumulator, old_sample_buf and block_states to all-0 to prepare for next enable. + * Reset user buffers if ownership is transferred to the caller (i.e. dump_buffer + * is provided). + */ + kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf, dump_buffer); } /* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */ @@ -1279,6 +1294,11 @@ static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *bac backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); kbasep_hwcnt_backend_csf_cc_update(backend_csf); + /* There is a possibility that the transition to enabled state will remain + * during multiple dumps, hence append the OFF state. + */ + kbase_hwcnt_block_state_append(&backend_csf->accum_all_blk_stt, + KBASE_HWCNT_STATE_OFF); backend_csf->user_requested = true; backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return 0; @@ -1457,7 +1477,7 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, backend_csf->to_user_block_states, dst_enable_map, backend_csf->num_l2_slices, - backend_csf->shader_present_bitmap, accumulate); + backend_csf->powered_shader_core_mask, accumulate); /* If no error occurred (zero ret value), then update block state for all blocks in the * accumulation with the current sample's block state. @@ -1469,6 +1489,12 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend KBASE_HWCNT_STATE_UNKNOWN); } + /* Clear consumed user buffers. */ + memset(backend_csf->to_user_buf, 0, backend_csf->info->metadata->dump_buf_bytes); + memset(backend_csf->to_user_block_states, 0, + backend_csf->phys_layout.block_cnt * KBASE_HWCNT_BLOCK_STATE_BYTES * + KBASE_HWCNT_BLOCK_STATE_STRIDE); + return ret; } @@ -1684,6 +1710,22 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) kbasep_hwcnt_backend_csf_destroy(backend_csf); } +static void kbasep_hwcnt_backend_csf_acquire(const struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf_info *csf_info = backend_csf->info; + + csf_info->csf_if->acquire(csf_info->csf_if->ctx); +} + +static void kbasep_hwcnt_backend_csf_release(const struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf_info *csf_info = backend_csf->info; + + csf_info->csf_if->release(csf_info->csf_if->ctx); +} + /** * kbasep_hwcnt_backend_csf_info_destroy() - Destroy a CSF backend info. * @info: Pointer to info to destroy. @@ -2098,7 +2140,7 @@ int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface * gpu_info.has_fw_counters = csf_info->prfcnt_info.prfcnt_fw_size > 0; gpu_info.l2_count = csf_info->prfcnt_info.l2_count; gpu_info.csg_cnt = csf_info->prfcnt_info.csg_count; - gpu_info.core_mask = csf_info->prfcnt_info.core_mask; + gpu_info.sc_core_mask = csf_info->prfcnt_info.sc_core_mask; gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; gpu_info.prfcnt_values_per_block = csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; @@ -2115,7 +2157,7 @@ void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; if (csf_info->metadata) { - kbase_hwcnt_csf_metadata_destroy(csf_info->metadata); + kbase_hwcnt_metadata_destroy(csf_info->metadata); csf_info->metadata = NULL; } } @@ -2142,6 +2184,8 @@ int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u3 iface->metadata = kbasep_hwcnt_backend_csf_metadata; iface->init = kbasep_hwcnt_backend_csf_init; iface->term = kbasep_hwcnt_backend_csf_term; + iface->acquire = kbasep_hwcnt_backend_csf_acquire; + iface->release = kbasep_hwcnt_backend_csf_release; iface->timestamp_ns = kbasep_hwcnt_backend_csf_timestamp_ns; iface->dump_enable = kbasep_hwcnt_backend_csf_dump_enable; iface->dump_enable_nolock = kbasep_hwcnt_backend_csf_dump_enable_nolock; diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h index 2487db272a35..104f9c77a945 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,8 +30,10 @@ #include "hwcnt/backend/mali_kbase_hwcnt_backend.h" #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h" #include "hwcnt/mali_kbase_hwcnt_watchdog_if.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" struct kbase_hwcnt_physical_enable_map; +struct kbase_hwcnt_backend_csf; /** * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend @@ -123,11 +125,12 @@ void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interfac * this function is called. * @iface: Non-NULL pointer to HWC backend interface. * @num_l2_slices: Current number of L2 slices allocated to the GPU. - * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. + * @shader_present: Shader_present of the current configuration. + * @power_core_mask: Mask containing changed shader core power state. */ void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface, - size_t num_l2_slices, - uint64_t shader_present_bitmap); + size_t num_l2_slices, u64 shader_present, + u64 power_core_mask); /** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to * guarantee headers are @@ -174,4 +177,21 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interfa */ void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface); +/** + * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with + * information from a sample. + * @backend: CSF hardware counter backend. + * @enable_mask: Counter enable mask for the block whose state is being updated. + * @exiting_protm: Whether or not the sample is taken when the GPU is exiting + * protected mode. + * @block_idx: Index of block within the ringbuffer. + * @block_state: Pointer to existing block state of the block whose state is being + * updated. + * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling. + */ +void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend, + const u32 enable_mask, bool exiting_protm, + size_t block_idx, blk_stt_t *const block_state, + bool fw_in_protected_mode); + #endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h index 65bb965bcf9c..81f809fdc83a 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -68,7 +68,7 @@ struct kbase_hwcnt_backend_csf_if_enable { * @prfcnt_block_size: Bytes of each performance counter block. * @l2_count: The MMU L2 cache count. * @csg_count: The total number of CSGs in the system - * @core_mask: Shader core mask. + * @sc_core_mask: Shader core mask. * @clk_cnt: Clock domain count in the system. * @clearing_samples: Indicates whether counters are cleared after each sample * is taken. @@ -80,7 +80,7 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info { size_t prfcnt_block_size; size_t l2_count; u32 csg_count; - u64 core_mask; + u64 sc_core_mask; u8 clk_cnt; bool clearing_samples; }; @@ -114,6 +114,20 @@ typedef void (*kbase_hwcnt_backend_csf_if_lock_fn)(struct kbase_hwcnt_backend_cs typedef void (*kbase_hwcnt_backend_csf_if_unlock_fn)(struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags); +/** + * typedef kbase_hwcnt_backend_csf_if_acquire_fn - Enable counter collection. + * + * @ctx: Non-NULL pointer to a CSF context. + */ +typedef void (*kbase_hwcnt_backend_csf_if_acquire_fn)(struct kbase_hwcnt_backend_csf_if_ctx *ctx); + +/** + * typedef kbase_hwcnt_backend_csf_if_release_fn - Disable counter collection. + * + * @ctx: Non-NULL pointer to a CSF context. + */ +typedef void (*kbase_hwcnt_backend_csf_if_release_fn)(struct kbase_hwcnt_backend_csf_if_ctx *ctx); + /** * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance * counter information. @@ -272,6 +286,10 @@ typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)( * @assert_lock_held: Function ptr to assert backend spinlock is held. * @lock: Function ptr to acquire backend spinlock. * @unlock: Function ptr to release backend spinlock. + * @acquire: Callback to indicate that counter collection has + * been enabled. + * @release: Callback to indicate that counter collection has + * been disabled. * @get_prfcnt_info: Function ptr to get performance counter related * information. * @ring_buf_alloc: Function ptr to allocate ring buffer for CSF HWC. @@ -292,6 +310,8 @@ struct kbase_hwcnt_backend_csf_if { kbase_hwcnt_backend_csf_if_assert_lock_held_fn assert_lock_held; kbase_hwcnt_backend_csf_if_lock_fn lock; kbase_hwcnt_backend_csf_if_unlock_fn unlock; + kbase_hwcnt_backend_csf_if_acquire_fn acquire; + kbase_hwcnt_backend_csf_if_release_fn release; kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn get_prfcnt_info; kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn ring_buf_alloc; kbase_hwcnt_backend_csf_if_ring_buf_sync_fn ring_buf_sync; diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c index d79a99e5e89f..29f8a2a8838d 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -131,6 +131,26 @@ static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf kbase_csf_scheduler_spin_unlock(kbdev, flags); } +static void kbasep_hwcnt_backend_csf_if_fw_acquire(struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + /* Mark performance counters collection as enabled */ + set_bit(KBASE_GPU_PERF_COUNTERS_COLLECTION_ENABLED, + &fw_ctx->kbdev->pm.backend.gpu_sleep_allowed); +} + +static void kbasep_hwcnt_backend_csf_if_fw_release(struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + /* Mark performance counters collection as disabled */ + clear_bit(KBASE_GPU_PERF_COUNTERS_COLLECTION_ENABLED, + &fw_ctx->kbdev->pm.backend.gpu_sleep_allowed); +} + /** * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback * @@ -229,7 +249,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ .l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, - .core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1, + .sc_core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1, .prfcnt_hw_size = KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE, .prfcnt_fw_size = @@ -290,12 +310,13 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( .dump_bytes = fw_ctx->buf_bytes, .prfcnt_block_size = prfcnt_block_size, .l2_count = kbdev->gpu_props.num_l2_slices, - .core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props), + .sc_core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props), .csg_count = fw_block_count > 1 ? csg_count : 0, .clk_cnt = fw_ctx->clk_cnt, .clearing_samples = true, }; + /* Block size must be multiple of counter size. */ WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0); /* Total size must be multiple of block size. */ @@ -513,10 +534,15 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c fw_ring_buf->phys, fw_ring_buf->num_pages, fw_ring_buf->num_pages, MCU_AS_NR)); + /* Clear the dump ring_buf content to zeros */ + memset(fw_ring_buf->cpu_dump_base, 0, fw_ring_buf->num_pages * PAGE_SIZE); vunmap(fw_ring_buf->cpu_dump_base); + /* After zeroing, the ring_buf pages are dirty so need to pass the 'dirty' flag + * as true when freeing the pages to the Global pool. + */ kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - fw_ring_buf->num_pages, fw_ring_buf->phys, false, false); + fw_ring_buf->num_pages, fw_ring_buf->phys, true, false); kfree(fw_ring_buf->phys); @@ -807,6 +833,8 @@ int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev, if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held; if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock; if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock; + if_fw->acquire = kbasep_hwcnt_backend_csf_if_fw_acquire; + if_fw->release = kbasep_hwcnt_backend_csf_if_fw_release; if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info; if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc; if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync; diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c index 7fbef163976a..c3f2bcdbf256 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -165,7 +165,7 @@ static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, #endif info->l2_count = l2_count; - info->core_mask = core_mask; + info->sc_core_mask = core_mask; info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; /* Determine the number of available clock domains. */ @@ -186,7 +186,7 @@ static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_inf WARN_ON(!gpu_info); WARN_ON(!phys_layout); - shader_core_cnt = fls64(gpu_info->core_mask); + shader_core_cnt = fls64(gpu_info->sc_core_mask); *phys_layout = (struct kbase_hwcnt_jm_physical_layout){ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, @@ -195,7 +195,7 @@ static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_inf .shader_cnt = shader_core_cnt, .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT + gpu_info->l2_count + shader_core_cnt, - .shader_avail_mask = gpu_info->core_mask, + .shader_avail_mask = gpu_info->sc_core_mask, .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .values_per_block = gpu_info->prfcnt_values_per_block, .counters_per_block = @@ -384,14 +384,12 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, enable = (struct kbase_instr_hwcnt_enable) { - .fe_bm = phys_enable_map.fe_bm, - .shader_bm = phys_enable_map.shader_bm, - .tiler_bm = phys_enable_map.tiler_bm, - .mmu_l2_bm = phys_enable_map.mmu_l2_bm, + .fe_bm = phys_enable_map.fe_bm, .shader_bm = phys_enable_map.shader_bm, + .tiler_bm = phys_enable_map.tiler_bm, .mmu_l2_bm = phys_enable_map.mmu_l2_bm, .counter_set = phys_counter_set, #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) /* The dummy model needs the CPU mapping. */ - .dump_buffer = (uintptr_t)backend_jm->cpu_dump_va, + .dump_buffer = (uintptr_t)backend_jm->cpu_dump_va, #else .dump_buffer = backend_jm->gpu_dump_va, #endif /* CONFIG_MALI_BIFROST_NO_MALI */ @@ -411,7 +409,7 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, backend_jm->debug_core_mask = kbase_pm_ca_get_debug_core_mask(kbdev); backend_jm->max_l2_slices = backend_jm->info->hwcnt_gpu_info.l2_count; - backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.core_mask; + backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.sc_core_mask; backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); @@ -660,8 +658,8 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend, #endif /* CONFIG_MALI_BIFROST_NO_MALI */ errcode = kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map, backend_jm->pm_core_mask, backend_jm->debug_core_mask, - backend_jm->max_core_mask, backend_jm->max_l2_slices, - &backend_jm->curr_config, accumulate); + backend_jm->max_l2_slices, &backend_jm->curr_config, + accumulate); if (errcode) return errcode; @@ -685,7 +683,7 @@ static int kbasep_hwcnt_backend_jm_dump_alloc(const struct kbase_hwcnt_backend_j struct kbase_context *kctx, u64 *gpu_dump_va) { struct kbase_va_region *reg; - u64 flags; + base_mem_alloc_flags flags; u64 nr_pages; /* Calls to this function are inherently asynchronous, with respect to @@ -853,6 +851,14 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend) kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend); } +static void kbasep_hwcnt_backend_jm_acquire(const struct kbase_hwcnt_backend *backend) +{ +} + +static void kbasep_hwcnt_backend_jm_release(const struct kbase_hwcnt_backend *backend) +{ +} + /** * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info. * @info: Pointer to info to destroy. @@ -864,7 +870,7 @@ static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backen if (!info) return; - kbase_hwcnt_jm_metadata_destroy(info->metadata); + kbase_hwcnt_metadata_destroy(info->metadata); kfree(info); } @@ -934,6 +940,8 @@ int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev, iface->metadata = kbasep_hwcnt_backend_jm_metadata; iface->init = kbasep_hwcnt_backend_jm_init; iface->term = kbasep_hwcnt_backend_jm_term; + iface->acquire = kbasep_hwcnt_backend_jm_acquire; + iface->release = kbasep_hwcnt_backend_jm_release; iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns; iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable; iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock; diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c index cf2a2e65bc25..88917e72ac58 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -317,6 +317,14 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc kfree(wd_backend); } +static void kbasep_hwcnt_backend_jm_watchdog_acquire(const struct kbase_hwcnt_backend *backend) +{ +} + +static void kbasep_hwcnt_backend_jm_watchdog_release(const struct kbase_hwcnt_backend *backend) +{ +} + /* Job manager watchdog backend, implementation of kbase_hwcnt_backend_term_fn * Calling term does *not* destroy the interface */ @@ -807,6 +815,8 @@ int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface .metadata = kbasep_hwcnt_backend_jm_watchdog_metadata, .init = kbasep_hwcnt_backend_jm_watchdog_init, .term = kbasep_hwcnt_backend_jm_watchdog_term, + .acquire = kbasep_hwcnt_backend_jm_watchdog_acquire, + .release = kbasep_hwcnt_backend_jm_watchdog_release, .timestamp_ns = kbasep_hwcnt_backend_jm_watchdog_timestamp_ns, .dump_enable = kbasep_hwcnt_backend_jm_watchdog_dump_enable, .dump_enable_nolock = kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock, diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c index 8b1de2e1cdaf..8d308f1138a7 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -599,6 +599,9 @@ int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx, return errcode; } + /* Inform the backend that counter collection has been enabled. */ + hctx->iface->acquire(hctx->accum.backend); + spin_lock_irqsave(&hctx->state_lock, flags); WARN_ON(hctx->disable_count == 0); @@ -646,6 +649,9 @@ void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) mutex_unlock(&hctx->accum_lock); + /* Inform the backend that counter collection has been disabled. */ + hctx->iface->release(hctx->accum.backend); + kbasep_hwcnt_accumulator_term(hctx); mutex_lock(&hctx->accum_lock); diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c index 5da564546608..7cd16a0de4ce 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -169,7 +169,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu /* Calculate number of block instances that aren't cores */ non_core_block_count = 2 + gpu_info->l2_count; /* Calculate number of block instances that are shader cores */ - sc_block_count = (size_t)fls64(gpu_info->core_mask); + sc_block_count = (size_t)fls64(gpu_info->sc_core_mask); /* Determine the total number of cores */ core_block_count = sc_block_count; @@ -277,7 +277,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu kbase_hwcnt_set_avail_mask(&desc.avail_mask, 0, 0); kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, 0, non_core_block_count, U64_MAX); kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, non_core_block_count, sc_block_count, - gpu_info->core_mask); + gpu_info->sc_core_mask); return kbase_hwcnt_metadata_create(&desc, metadata); @@ -294,7 +294,7 @@ static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_in { WARN_ON(!gpu_info); - return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->core_mask)) * + return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->sc_core_mask)) * gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES; } @@ -338,14 +338,6 @@ int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, return 0; } -void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) -{ - if (!metadata) - return; - - kbase_hwcnt_metadata_destroy(metadata); -} - int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, enum kbase_hwcnt_set counter_set, const struct kbase_hwcnt_metadata **out_metadata) @@ -365,14 +357,6 @@ int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, return 0; } -void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) -{ - if (!metadata) - return; - - kbase_hwcnt_metadata_destroy(metadata); -} - bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type blk_type) { if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || @@ -384,6 +368,7 @@ bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type b return false; } + bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type blk_type) { if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || @@ -416,7 +401,7 @@ bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_t int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask, - u64 debug_core_mask, u64 max_core_mask, size_t max_l2_slices, + u64 debug_core_mask, size_t max_l2_slices, const struct kbase_hwcnt_curr_config *curr_config, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; @@ -466,9 +451,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, else hw_res_available = true; - /* - * Skip block if no values in the destination block are enabled. - */ + /* Skip block if no values in the destination block are enabled. */ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) { u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; @@ -581,7 +564,6 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, /* Shift each core mask right by 1 */ core_mask >>= 1; debug_core_mask >>= 1; - max_core_mask >>= 1; shader_present >>= 1; } } @@ -592,7 +574,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, blk_stt_t *src_block_stt, const struct kbase_hwcnt_enable_map *dst_enable_map, - size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate) + size_t num_l2_slices, u64 powered_shader_core_mask, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u64 *dump_src = src; @@ -614,9 +596,7 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, blk_stt_t *dst_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); - /* - * Skip block if no values in the destination block are enabled. - */ + /* Skip block if no values in the destination block are enabled. */ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) { u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h index 4339fddd64e2..896f1389eb37 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -169,7 +169,7 @@ enum kbase_hwcnt_physical_set { /** * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs. * @l2_count: L2 cache count. - * @core_mask: Shader core mask. May be sparse. + * @sc_core_mask: Shader core mask. May be sparse. * @clk_cnt: Number of clock domains available. * @csg_cnt: Number of CSGs available. * @prfcnt_values_per_block: Total entries (header + counters) of performance @@ -178,7 +178,7 @@ enum kbase_hwcnt_physical_set { */ struct kbase_hwcnt_gpu_info { size_t l2_count; - u64 core_mask; + u64 sc_core_mask; u8 clk_cnt; u8 csg_cnt; size_t prfcnt_values_per_block; @@ -261,13 +261,6 @@ int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info, const struct kbase_hwcnt_metadata **out_metadata, size_t *out_dump_bytes); -/** - * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata. - * - * @metadata: Pointer to metadata to destroy. - */ -void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); - /** * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the * CSF GPUs. @@ -282,13 +275,6 @@ int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info, enum kbase_hwcnt_set counter_set, const struct kbase_hwcnt_metadata **out_metadata); -/** - * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter - * metadata. - * @metadata: Pointer to metadata to destroy. - */ -void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); - /** * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw * dump buffer in src into the dump buffer @@ -300,9 +286,6 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. * @pm_core_mask: PM state synchronized shaders core mask with the dump. * @debug_core_mask: User-set mask of cores to be used by the GPU. - * @max_core_mask: Core mask of all cores allocated to the GPU (non - * virtualized platforms) or resource group (virtualized - * platforms). * @max_l2_slices: Maximum number of L2 slices allocated to the GPU (non * virtualised platforms) or resource group (virtualized * platforms). @@ -319,23 +302,23 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat */ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, const struct kbase_hwcnt_enable_map *dst_enable_map, - const u64 pm_core_mask, u64 debug_core_mask, u64 max_core_mask, - size_t max_l2_slices, const struct kbase_hwcnt_curr_config *curr_config, - bool accumulate); + const u64 pm_core_mask, u64 debug_core_mask, size_t max_l2_slices, + const struct kbase_hwcnt_curr_config *curr_config, bool accumulate); /** * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw * dump buffer in src into the dump buffer * abstraction in dst. - * @dst: Non-NULL pointer to destination dump buffer. - * @src: Non-NULL pointer to source raw dump buffer, of same length - * as dump_buf_bytes in the metadata of dst dump buffer. - * @src_block_stt: Non-NULL pointer to source block state buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * @num_l2_slices: Current number of L2 slices allocated to the GPU. - * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. - * @accumulate: True if counters in src should be accumulated into - * destination, rather than copied. + * @dst: Non-NULL pointer to destination dump buffer. + * @src: Non-NULL pointer to source raw dump buffer, of same length + * as dump_buf_bytes in the metadata of dst dump buffer. + * @src_block_stt: Non-NULL pointer to source block state buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @powered_shader_core_mask: The common mask between the debug_core_mask + * and the shader_present_bitmap. + * @accumulate: True if counters in src should be accumulated into + * destination, rather than copied. * * The dst and dst_enable_map MUST have been created from the same metadata as * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get @@ -346,7 +329,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, blk_stt_t *src_block_stt, const struct kbase_hwcnt_enable_map *dst_enable_map, - size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate); + size_t num_l2_slices, u64 powered_shader_core_mask, bool accumulate); /** * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block @@ -453,6 +436,7 @@ bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type b bool kbase_hwcnt_is_block_type_tiler(const enum kbase_hwcnt_gpu_v5_block_type blk_type); bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_type); + /** * kbase_hwcnt_gpu_enable_map_from_cm() - Builds enable map abstraction from * counter selection bitmasks. diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c index 3d0ad5af7263..3d2fd5e088da 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -125,6 +125,9 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) { + if (!metadata) + return; + kfree(metadata); } diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h index c7afe173d426..45f67f7c9a1b 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,65 +19,6 @@ * */ -/* - * Hardware counter types. - * Contains structures for describing the physical layout of hardware counter - * dump buffers and enable maps within a system. - * - * Also contains helper functions for manipulation of these dump buffers and - * enable maps. - * - * Through use of these structures and functions, hardware counters can be - * enabled, copied, accumulated, and generally manipulated in a generic way, - * regardless of the physical counter dump layout. - * - * Terminology: - * - * Hardware Counter System: - * A collection of hardware counter blocks, making a full hardware counter - * system. - * Hardware Counter Block: - * A block of hardware counters (e.g. shader block, tiler block). - * Hardware Counter Block Instance: - * An instance of a Hardware Counter Block (e.g. an MP4 GPU might have - * 4 shader block instances). - * - * Block Header: - * A header value inside a counter block. Headers don't count anything, - * so it is only valid to copy or zero them. Headers are always the first - * values in the block. - * Block Counter: - * A counter value inside a counter block. Counters can be zeroed, copied, - * or accumulated. Counters are always immediately after the headers in the - * block. - * Block Value: - * A catch-all term for block headers and block counters. - * - * Enable Map: - * An array of u64 bitfields, where each bit either enables exactly one - * block value, or is unused (padding). Note that this is derived from - * the client configuration, and is not obtained from the hardware. - * Dump Buffer: - * An array of u64 values, where each u64 corresponds either to one block - * value, or is unused (padding). - * Block State Buffer: - * An array of blk_stt_t values, where each blk_stt_t corresponds to one block - * instance and is used to track the on/off power state transitions, as well has - * hardware resource availability, and whether the block was operating - * in normal or protected mode. - * Availability Mask: - * A bitfield, where each bit corresponds to whether a block instance is - * physically available (e.g. an MP3 GPU may have a sparse core mask of - * 0b1011, meaning it only has 3 cores but for hardware counter dumps has the - * same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this - * case, the availability mask might be 0b1011111 (the exact layout will - * depend on the specific hardware architecture), with the 3 extra early bits - * corresponding to other block instances in the hardware counter system). - * Metadata: - * Structure describing the physical layout of the enable map and dump buffers - * for a specific hardware counter system. - */ - #ifndef _KBASE_HWCNT_TYPES_H_ #define _KBASE_HWCNT_TYPES_H_ diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c index e4138580de20..9305747ff472 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -115,7 +115,7 @@ static ssize_t param_string_set(struct file *file, const char __user *user_buf, goto end; } - buf_size = min(param->size - 1, count); + buf_size = min(size_sub(param->size, 1), count); if (copy_from_user(param->addr.str, user_buf, buf_size)) { ret = -EFAULT; goto end; diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h index 373b9b1b73b3..23e919314333 100644 --- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -308,11 +308,11 @@ enum kbase_atom_gpu_rb_state { * powered down and GPU shall come out of fully * coherent mode before entering protected mode. * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change; - * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on + * for KBASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on * so that coherency register contains correct value when * GPU enters protected mode. * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for - * BASE_HW_ISSUE_TGOX_R1_1234 check + * KBASE_HW_ISSUE_TGOX_R1_1234 check * that L2 is powered up and switch GPU to protected mode. */ enum kbase_atom_enter_protected_state { @@ -500,10 +500,6 @@ enum kbase_atom_exit_protected_state { * is snapshot of the age_count counter in kbase * context. * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. - * @renderpass_id:Renderpass identifier used to associate an atom that has - * BASE_JD_REQ_START_RENDERPASS set in its core requirements - * with an atom that has BASE_JD_REQ_END_RENDERPASS set. - * @jc_fragment: Set of GPU fragment job chains */ struct kbase_jd_atom { struct work_struct work; @@ -564,8 +560,6 @@ struct kbase_jd_atom { enum base_jd_event_code event_code; base_jd_core_req core_req; u8 jobslot; - u8 renderpass_id; - struct base_jd_fragment jc_fragment; u32 ticks; int sched_priority; @@ -676,71 +670,6 @@ static inline bool kbase_jd_atom_is_earlier(const struct kbase_jd_atom *katom_a, #define KBASE_JD_DEP_QUEUE_SIZE 256 -/** - * enum kbase_jd_renderpass_state - State of a renderpass - * @KBASE_JD_RP_COMPLETE: Unused or completed renderpass. Can only transition to - * START. - * @KBASE_JD_RP_START: Renderpass making a first attempt at tiling. - * Can transition to PEND_OOM or COMPLETE. - * @KBASE_JD_RP_PEND_OOM: Renderpass whose first attempt at tiling used too much - * memory and has a soft-stop pending. Can transition to - * OOM or COMPLETE. - * @KBASE_JD_RP_OOM: Renderpass whose first attempt at tiling used too much - * memory and therefore switched to incremental - * rendering. The fragment job chain is forced to run. - * Can only transition to RETRY. - * @KBASE_JD_RP_RETRY: Renderpass making a second or subsequent attempt at - * tiling. Can transition to RETRY_PEND_OOM or COMPLETE. - * @KBASE_JD_RP_RETRY_PEND_OOM: Renderpass whose second or subsequent attempt at - * tiling used too much memory again and has a - * soft-stop pending. Can transition to RETRY_OOM - * or COMPLETE. - * @KBASE_JD_RP_RETRY_OOM: Renderpass whose second or subsequent attempt at - * tiling used too much memory again. The fragment job - * chain is forced to run. Can only transition to RETRY. - * - * A state machine is used to control incremental rendering. - */ -enum kbase_jd_renderpass_state { - KBASE_JD_RP_COMPLETE, /* COMPLETE => START */ - KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */ - KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */ - KBASE_JD_RP_OOM, /* OOM => RETRY */ - KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or COMPLETE */ - KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or COMPLETE */ - KBASE_JD_RP_RETRY_OOM /* RETRY_OOM => RETRY */ -}; - -/** - * struct kbase_jd_renderpass - Data for a renderpass - * @state: Current state of the renderpass. If KBASE_JD_RP_COMPLETE then - * all other members are invalid. - * Both the job dispatcher context and hwaccess_lock must be - * locked to modify this so that it can be read with either - * (or both) locked. - * @start_katom: Address of the atom that is the start of a renderpass. - * Both the job dispatcher context and hwaccess_lock must be - * locked to modify this so that it can be read with either - * (or both) locked. - * @end_katom: Address of the atom that is the end of a renderpass, or NULL - * if that atom hasn't been added to the job scheduler yet. - * The job dispatcher context and hwaccess_lock must be - * locked to modify this so that it can be read with either - * (or both) locked. - * @oom_reg_list: A list of region structures which triggered out-of-memory. - * The hwaccess_lock must be locked to access this. - * - * Atoms tagged with BASE_JD_REQ_START_RENDERPASS or BASE_JD_REQ_END_RENDERPASS - * are associated with an object of this type, which is created and maintained - * by kbase to keep track of each renderpass. - */ -struct kbase_jd_renderpass { - enum kbase_jd_renderpass_state state; - struct kbase_jd_atom *start_katom; - struct kbase_jd_atom *end_katom; - struct list_head oom_reg_list; -}; - /** * struct kbase_jd_context - per context object encapsulating all the * Job dispatcher related state. @@ -751,9 +680,6 @@ struct kbase_jd_renderpass { * @atoms: Array of the objects representing atoms, * containing the complete state and attributes * of an atom. - * @renderpasses: Array of renderpass state for incremental - * rendering, indexed by user-specified renderpass - * ID. * @job_nr: Tracks the number of atoms being processed by the * kbase. This includes atoms that are not tracked by * scheduler: 'not ready to run' & 'dependency-only' @@ -803,7 +729,6 @@ struct kbase_jd_context { struct mutex lock; struct kbasep_js_kctx_info sched_info; struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; - struct kbase_jd_renderpass renderpasses[BASE_JD_RP_COUNT]; struct workqueue_struct *job_done_wq; wait_queue_head_t zero_jobs_wait; diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h index 333ad2d2b150..65b54c68d8c7 100644 --- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -567,22 +567,6 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom */ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp); -/** - * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot - * dependency - * @katom: Pointer to an atom in the slot ringbuffer - * - * A cross-slot dependency is ignored if necessary to unblock incremental - * rendering. If the atom at the start of a renderpass used too much memory - * and was soft-stopped then the atom at the end of a renderpass is submitted - * to hardware regardless of its dependency on the start-of-renderpass atom. - * This can happen multiple times for the same pair of atoms. - * - * Return: true to block the atom or false to allow it to be submitted to - * hardware. - */ -bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom); - /** * kbase_js_sched - Submit atoms from all available contexts. * @@ -809,8 +793,7 @@ static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state) { return (bool)(katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && - katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT && - katom_retained_state->event_code != BASE_JD_EVENT_END_RP_DONE); + katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT); } /** diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h deleted file mode 100644 index 1f32fc9dd553..000000000000 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h +++ /dev/null @@ -1,161 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, - * please update base/tools/hwconfig_generator/hwc_{issues,features}.py - * For more information see base/tools/docs/hwconfig_generator.md - */ - -#ifndef _BASE_HWCONFIG_FEATURES_H_ -#define _BASE_HWCONFIG_FEATURES_H_ - -enum base_hw_feature { - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_TLS_HASHING, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_L2_SLICE_HASH, - BASE_HW_FEATURE_GPU_SLEEP, - BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, - BASE_HW_FEATURE_CORE_FEATURES, - BASE_HW_FEATURE_PBHA_HWU, - BASE_HW_FEATURE_LARGE_PAGE_ALLOC, - BASE_HW_FEATURE_THREAD_TLS_ALLOC, - BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_generic[] = { - BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tMIx[] = { - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tHEx[] = { - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tSIx[] = { - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDVx[] = { - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNOx[] = { - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_TLS_HASHING, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGOx[] = { - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_TLS_HASHING, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CORE_FEATURES, - BASE_HW_FEATURE_THREAD_TLS_ALLOC, BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTRx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNAx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBEx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, - BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, - BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_L2_SLICE_HASH, BASE_HW_FEATURE_GPU_SLEEP, - BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTIx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_L2_SLICE_HASH, - BASE_HW_FEATURE_GPU_SLEEP, - BASE_HW_FEATURE_CORE_FEATURES, - BASE_HW_FEATURE_PBHA_HWU, - BASE_HW_FEATURE_END -}; - -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tKRx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_L2_SLICE_HASH, BASE_HW_FEATURE_GPU_SLEEP, - BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_PBHA_HWU, - BASE_HW_FEATURE_LARGE_PAGE_ALLOC, BASE_HW_FEATURE_END -}; - - -#endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h deleted file mode 100644 index 4426bd743b4e..000000000000 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h +++ /dev/null @@ -1,618 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, - * please update base/tools/hwconfig_generator/hwc_{issues,features}.py - * For more information see base/tools/docs/hwconfig_generator.md - */ - -#ifndef _BASE_HWCONFIG_ISSUES_H_ -#define _BASE_HWCONFIG_ISSUES_H_ - -enum base_hw_issue { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_7940, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8463, - BASE_HW_ISSUE_TMIX_8456, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_TNOX_1194, - BASE_HW_ISSUE_TGOX_R1_1234, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TSIX_1792, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_3076, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TTRX_3485, - BASE_HW_ISSUE_GPU2019_3212, - BASE_HW_ISSUE_TURSEHW_1997, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, - BASE_HW_ISSUE_TITANHW_2952, - BASE_HW_ISSUE_KRAKEHW_2151, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_KRAKEHW_2269, - BASE_HW_ISSUE_END -}; - -__attribute__(( - unused)) static const enum base_hw_issue base_hw_issues_generic[] = { BASE_HW_ISSUE_END }; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, BASE_HW_ISSUE_TMIX_8463, BASE_HW_ISSUE_TMIX_8456, - BASE_HW_ISSUE_TMIX_8438, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, BASE_HW_ISSUE_TMIX_8463, BASE_HW_ISSUE_TMIX_8456, - BASE_HW_ISSUE_TMIX_8438, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, BASE_HW_ISSUE_TMIX_8463, BASE_HW_ISSUE_TMIX_8456, - BASE_HW_ISSUE_TMIX_8438, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMIx[] = { - BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8456, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHEx[] = { - BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TSIX_1792, - BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TSIX_1792, - BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSIx[] = { - BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDVx[] = { - BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TNOX_1194, BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNOx[] = { - BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TNOX_1194, BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TGOX_R1_1234, BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGOx[] = { - BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_3076, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTRx[] = { - BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_3076, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNAx[] = { - BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBEx[] = { - BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { - BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p2[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBAx[] = { - BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, - BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, - BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_1997, - BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TURSEHW_1997, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2952, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2952, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p1[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tKRx_r0p0[] = { - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_KRAKEHW_2151, BASE_HW_ISSUE_KRAKEHW_2269, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tKRx_r0p1[] = { - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_KRAKEHW_2269, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tKRx[] = { - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_KRAKEHW_2151, BASE_HW_ISSUE_KRAKEHW_2269, BASE_HW_ISSUE_END -}; - - -#endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_csffw.bin b/drivers/gpu/arm/bifrost/mali_csffw.bin index 1f8413ba14d7..4319d6b74e3b 100644 Binary files a/drivers/gpu/arm/bifrost/mali_csffw.bin and b/drivers/gpu/arm/bifrost/mali_csffw.bin differ diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h index 498d53f15f9e..4d845ea08adb 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase.h +++ b/drivers/gpu/arm/bifrost/mali_kbase.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -204,22 +204,24 @@ int kbase_protected_mode_init(struct kbase_device *kbdev); void kbase_protected_mode_term(struct kbase_device *kbdev); /** - * kbase_device_pm_init() - Performs power management initialization and - * Verifies device tree configurations. + * kbase_device_backend_init() - Performs backend initialization and performs + * devicetree validation. * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Return: 0 if successful, otherwise a standard Linux error code + * If -EPERM is returned, it means the device backend is not supported, but + * device initialization can continue. */ -int kbase_device_pm_init(struct kbase_device *kbdev); +int kbase_device_backend_init(struct kbase_device *kbdev); /** - * kbase_device_pm_term() - Performs power management deinitialization and - * Free resources. + * kbase_device_backend_term() - Performs backend deinitialization and free + * resources. * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Clean up all the resources */ -void kbase_device_pm_term(struct kbase_device *kbdev); +void kbase_device_backend_term(struct kbase_device *kbdev); int power_control_init(struct kbase_device *kbdev); void power_control_term(struct kbase_device *kbdev); @@ -343,21 +345,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done); void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *katom); -/** - * kbase_job_slot_softstop_start_rp() - Soft-stop the atom at the start - * of a renderpass. - * @kctx: Pointer to a kernel base context. - * @reg: Reference of a growable GPU memory region in the same context. - * Takes ownership of the reference if successful. - * - * Used to switch to incremental rendering if we have nearly run out of - * virtual address space in a growable memory region and the atom currently - * executing on a job slot is the tiler job chain at the start of a renderpass. - * - * Return: 0 if successful, otherwise a negative error code. - */ -int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, struct kbase_va_region *reg); - /** * kbase_job_slot_softstop - Soft-stop the specified job slot * @@ -494,9 +481,7 @@ void kbasep_as_do_poke(struct work_struct *work); * or a dmb was executed recently (to ensure the value is most up-to-date). * However, without a lock the value could change afterwards. * - * Return: - * * false if a suspend is not in progress - * * !=false otherwise + * Return: False if a suspend is not in progress, true otherwise, */ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) { @@ -519,21 +504,20 @@ static inline bool kbase_pm_is_resuming(struct kbase_device *kbdev) return kbdev->pm.resuming; } -#ifdef CONFIG_MALI_ARBITER_SUPPORT /* * Check whether a gpu lost is in progress * * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Indicates whether a gpu lost has been received and jobs are no longer - * being scheduled + * being scheduled. * - * Return: false if gpu is lost - * Return: != false otherwise + * Return: false if GPU is already lost or if no Arbiter is present (as GPU will + * always be present in this case), true otherwise. */ static inline bool kbase_pm_is_gpu_lost(struct kbase_device *kbdev) { - return (atomic_read(&kbdev->pm.gpu_lost) == 0 ? false : true); + return (kbdev->arb.arb_if && ((bool)atomic_read(&kbdev->pm.gpu_lost))); } /* @@ -554,7 +538,6 @@ static inline void kbase_pm_set_gpu_lost(struct kbase_device *kbdev, bool gpu_lo if (new_val != cur_val) KBASE_KTRACE_ADD(kbdev, ARB_GPU_LOST, NULL, (u64)new_val); } -#endif /** * kbase_pm_is_active - Determine whether the GPU is active @@ -812,108 +795,8 @@ bool kbasep_adjust_prioritized_process(struct kbase_device *kbdev, bool add, uin #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) #endif -/** - * kbase_file_fops_count() - Get the kfile::fops_count value - * - * @kfile: Pointer to the object representing the mali device file. - * - * The value is read with kfile::lock held. - * - * Return: sampled value of kfile::fops_count. - */ -static inline int kbase_file_fops_count(struct kbase_file *kfile) -{ - int fops_count; - - spin_lock(&kfile->lock); - fops_count = kfile->fops_count; - spin_unlock(&kfile->lock); - - return fops_count; -} - -/** - * kbase_file_inc_fops_count_unless_closed() - Increment the kfile::fops_count value if the - * kfile::owner is still set. - * - * @kfile: Pointer to the object representing the /dev/malixx device file instance. - * - * Return: true if the increment was done otherwise false. - */ -static inline bool kbase_file_inc_fops_count_unless_closed(struct kbase_file *kfile) -{ - bool count_incremented = false; - - spin_lock(&kfile->lock); - if (kfile->owner) { - kfile->fops_count++; - count_incremented = true; - } - spin_unlock(&kfile->lock); - - return count_incremented; -} - -/** - * kbase_file_dec_fops_count() - Decrement the kfile::fops_count value - * - * @kfile: Pointer to the object representing the /dev/malixx device file instance. - * - * This function shall only be called to decrement kfile::fops_count if a successful call - * to kbase_file_inc_fops_count_unless_closed() was made previously by the current thread. - * - * The function would enqueue the kfile::destroy_kctx_work if the process that originally - * created the file instance has closed its copy and no Kbase handled file operations are - * in progress and no memory mappings are present for the file instance. - */ -static inline void kbase_file_dec_fops_count(struct kbase_file *kfile) -{ - spin_lock(&kfile->lock); - WARN_ON_ONCE(kfile->fops_count <= 0); - kfile->fops_count--; - if (unlikely(!kfile->fops_count && !kfile->owner && !kfile->map_count)) { - queue_work(system_wq, &kfile->destroy_kctx_work); -#if IS_ENABLED(CONFIG_DEBUG_FS) - wake_up(&kfile->zero_fops_count_wait); +#if !defined(UINT32_MAX) +#define UINT32_MAX ((uint32_t)0xFFFFFFFFU) #endif - } - spin_unlock(&kfile->lock); -} - -/** - * kbase_file_inc_cpu_mapping_count() - Increment the kfile::map_count value. - * - * @kfile: Pointer to the object representing the /dev/malixx device file instance. - * - * This function shall be called when the memory mapping on /dev/malixx device file - * instance is created. The kbase_file::setup_state shall be KBASE_FILE_COMPLETE. - */ -static inline void kbase_file_inc_cpu_mapping_count(struct kbase_file *kfile) -{ - spin_lock(&kfile->lock); - kfile->map_count++; - spin_unlock(&kfile->lock); -} - -/** - * kbase_file_dec_cpu_mapping_count() - Decrement the kfile::map_count value - * - * @kfile: Pointer to the object representing the /dev/malixx device file instance. - * - * This function is called to decrement kfile::map_count value when the memory mapping - * on /dev/malixx device file is closed. - * The function would enqueue the kfile::destroy_kctx_work if the process that originally - * created the file instance has closed its copy and there are no mappings present and no - * Kbase handled file operations are in progress for the file instance. - */ -static inline void kbase_file_dec_cpu_mapping_count(struct kbase_file *kfile) -{ - spin_lock(&kfile->lock); - WARN_ON_ONCE(kfile->map_count <= 0); - kfile->map_count--; - if (unlikely(!kfile->map_count && !kfile->owner && !kfile->fops_count)) - queue_work(system_wq, &kfile->destroy_kctx_work); - spin_unlock(&kfile->lock); -} #endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_caps.h b/drivers/gpu/arm/bifrost/mali_kbase_caps.h index a92569d31f06..000e30e1ed84 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_caps.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_caps.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,15 +33,40 @@ * * @MALI_KBASE_CAP_SYSTEM_MONITOR: System Monitor * @MALI_KBASE_CAP_JIT_PRESSURE_LIMIT: JIT Pressure limit - * @MALI_KBASE_CAP_MEM_GROW_ON_GPF: Memory grow on page fault - * @MALI_KBASE_CAP_MEM_PROTECTED: Protected memory + * @MALI_KBASE_CAP_QUERY_MEM_DONT_NEED: BASE_MEM_DONT_NEED is queryable + * @MALI_KBASE_CAP_QUERY_MEM_GROW_ON_GPF: BASE_MEM_GROW_ON_GPF is queryable + * @MALI_KBASE_CAP_QUERY_MEM_PROTECTED: BASE_MEM_PROTECTED is queryable + * @MALI_KBASE_CAP_QUERY_MEM_IMPORT_SYNC_ON_MAP_UNMAP: BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is + * queryable + * @MALI_KBASE_CAP_QUERY_MEM_KERNEL_SYNC: BASE_MEM_KERNEL_SYNC is queryable + * @MALI_KBASE_CAP_QUERY_MEM_SAME_VA: BASE_MEM_SAME_VA is queryable + * @MALI_KBASE_CAP_REJECT_ALLOC_MEM_DONT_NEED: BASE_MEM_DONT_NEED is not allocatable + * @MALI_KBASE_CAP_REJECT_ALLOC_MEM_PROTECTED_IN_UNPROTECTED_ALLOCS: BASE_MEM_PROTECTED is not + * allocatable in functions other + * than base_mem_protected + * @MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_8: BASE_MEM_UNUSED_BIT_8 is not allocatable + * @MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_19: BASE_MEM_UNUSED_BIT_19 is not allocatable + * @MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_20: BASE_MEM_UNUSED_BIT_20 is not allocatable + * @MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_27: BASE_MEM_UNUSED_BIT_27 is not allocatable * @MALI_KBASE_NUM_CAPS: Delimiter + * + * New enumerator must not be negative and smaller than @MALI_KBASE_NUM_CAPS. */ enum mali_kbase_cap { MALI_KBASE_CAP_SYSTEM_MONITOR = 0, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT, - MALI_KBASE_CAP_MEM_GROW_ON_GPF, - MALI_KBASE_CAP_MEM_PROTECTED, + MALI_KBASE_CAP_QUERY_MEM_DONT_NEED, + MALI_KBASE_CAP_QUERY_MEM_GROW_ON_GPF, + MALI_KBASE_CAP_QUERY_MEM_PROTECTED, + MALI_KBASE_CAP_QUERY_MEM_IMPORT_SYNC_ON_MAP_UNMAP, + MALI_KBASE_CAP_QUERY_MEM_KERNEL_SYNC, + MALI_KBASE_CAP_QUERY_MEM_SAME_VA, + MALI_KBASE_CAP_REJECT_ALLOC_MEM_DONT_NEED, + MALI_KBASE_CAP_REJECT_ALLOC_MEM_PROTECTED_IN_UNPROTECTED_ALLOCS, + MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_8, + MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_19, + MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_20, + MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_27, MALI_KBASE_NUM_CAPS }; @@ -57,14 +82,67 @@ static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_vers return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT); } -static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version) +static inline bool mali_kbase_supports_query_mem_dont_need(unsigned long api_version) { - return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF); + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_QUERY_MEM_DONT_NEED); } -static inline bool mali_kbase_supports_mem_protected(unsigned long api_version) +static inline bool mali_kbase_supports_query_mem_grow_on_gpf(unsigned long api_version) { - return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED); + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_QUERY_MEM_GROW_ON_GPF); +} + +static inline bool mali_kbase_supports_query_mem_protected(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_QUERY_MEM_PROTECTED); +} + +static inline bool mali_kbase_supports_query_mem_import_sync_on_map_unmap(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, + MALI_KBASE_CAP_QUERY_MEM_IMPORT_SYNC_ON_MAP_UNMAP); +} + +static inline bool mali_kbase_supports_query_mem_kernel_sync(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_QUERY_MEM_KERNEL_SYNC); +} + +static inline bool mali_kbase_supports_query_mem_same_va(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_QUERY_MEM_SAME_VA); +} + +static inline bool mali_kbase_supports_reject_alloc_mem_dont_need(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_REJECT_ALLOC_MEM_DONT_NEED); +} + +static inline bool +mali_kbase_supports_reject_alloc_mem_protected_in_unprotected_allocs(unsigned long api_version) +{ + return mali_kbase_supports_cap( + api_version, MALI_KBASE_CAP_REJECT_ALLOC_MEM_PROTECTED_IN_UNPROTECTED_ALLOCS); +} + +static inline bool mali_kbase_supports_reject_alloc_mem_unused_bit_8(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_8); +} + +static inline bool mali_kbase_supports_reject_alloc_mem_unused_bit_19(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_19); +} + +static inline bool mali_kbase_supports_reject_alloc_mem_unused_bit_20(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_20); +} + +static inline bool mali_kbase_supports_reject_alloc_mem_unused_bit_27(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_27); } #endif /* __KBASE_CAPS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config.h b/drivers/gpu/arm/bifrost/mali_kbase_config.h index 7233e2dd3920..2f9e28aaec9a 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_config.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_config.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -166,8 +166,9 @@ struct kbase_pm_callback_conf { * * The system integrator can decide whether to either do nothing, just switch off * the clocks to the GPU, or to completely power down the GPU. - * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * The platform specific private pointer kbase_device::platform_context can be + * accessed and modified in here. It is the platform \em callbacks responsibility + * to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). * * If runtime PM is enabled and @power_runtime_gpu_idle_callback is used * then this callback should power off the GPU (or switch off the clocks @@ -179,15 +180,18 @@ struct kbase_pm_callback_conf { /** Callback for when the GPU is about to become active and power must be supplied. * - * This function must not return until the GPU is powered and clocked sufficiently for register access to - * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. - * If the GPU state has been lost then this function must return 1, otherwise it should return 0. - * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * This function must not return until the GPU is powered and clocked sufficiently + * for register access to succeed. The return value specifies whether the GPU was + * powered down since the call to power_off_callback. + * If the GPU is in reset state it should return 2, if the GPU state has been lost + * then this function must return 1, otherwise it should return 0. + * The platform specific private pointer kbase_device::platform_context can be + * accessed and modified in here. It is the platform \em callbacks responsibility + * to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). * * The return value of the first call to this function is ignored. * - * @return 1 if the GPU state may have been lost, 0 otherwise. + * @return 2 if GPU in reset state, 1 if the GPU state may have been lost, 0 otherwise. */ int (*power_on_callback)(struct kbase_device *kbdev); @@ -223,9 +227,11 @@ struct kbase_pm_callback_conf { /** Callback for handling runtime power management initialization. * - * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback - * will become active from calls made to the OS from within this function. - * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. + * The runtime power management callbacks @ref power_runtime_off_callback + * and @ref power_runtime_on_callback will become active from calls made + * to the OS from within this function. + * The runtime calls can be triggered by calls from @ref power_off_callback + * and @ref power_on_callback. * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. * * @return 0 on success, else int error code. @@ -234,8 +240,9 @@ struct kbase_pm_callback_conf { /** Callback for handling runtime power management termination. * - * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback - * should no longer be called by the OS on completion of this function. + * The runtime power management callbacks @ref power_runtime_off_callback + * and @ref power_runtime_on_callback should no longer be called by the + * OS on completion of this function. * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. */ void (*power_runtime_term_callback)(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h index 20003c852863..7657c25d565c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -194,9 +194,22 @@ enum { */ #define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (3100000000ull) +/* Waiting timeout in clock cycles for GPU suspend to complete. */ +#define CSF_GPU_SUSPEND_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES) + /* Waiting timeout in clock cycles for GPU reset to complete. */ #define CSF_GPU_RESET_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES * 2) +/* Waiting timeout in clock cycles for a CSG to be terminated. + * + * Based on 0.6s timeout at 100MHZ, scaled from 0.1s at 600Mhz GPU frequency + * which is the timeout defined in FW to wait for iterator to complete the + * transitioning to DISABLED state. + * More cycles (0.4s @ 100Mhz = 40000000) are added up to ensure that + * host timeout is always bigger than FW timeout. + */ +#define CSF_CSG_TERM_TIMEOUT_CYCLES (100000000) + /* Waiting timeout in clock cycles for GPU firmware to boot. * * Based on 250ms timeout at 100MHz, scaled from a 50MHz GPU system. @@ -213,7 +226,10 @@ enum { * * Based on 10s timeout at 100MHz, scaled from a 50MHz GPU system. */ -#if IS_ENABLED(CONFIG_MALI_IS_FPGA) +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) +/* Set a large value to avoid timing out while vector dumping */ +#define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (250000000000ull) +#elif IS_ENABLED(CONFIG_MALI_IS_FPGA) #define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (2500000000ull) #else #define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (1000000000ull) @@ -239,6 +255,42 @@ enum { */ #define DEFAULT_PROGRESS_TIMEOUT_CYCLES (2500000000ull) +/* MIN value of iterators' suspend timeout*/ +#define CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MIN (200) +#if CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MIN <= 0 +#error "CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MIN should be larger than 0" +#endif + +/* MAX value of iterators' suspend timeout*/ +#define CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MAX (60000) +#if CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MAX >= (0xFFFFFFFF) +#error "CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MAX should be less than U32_MAX" +#endif + +/* Firmware iterators' suspend timeout, default 4000ms. Customer can update this by + * using debugfs -- csg_suspend_timeout + */ +#if IS_ENABLED(CONFIG_MALI_REAL_HW) && !IS_ENABLED(CONFIG_MALI_IS_FPGA) +#define CSG_SUSPEND_TIMEOUT_FIRMWARE_MS (4000) +#else +#define CSG_SUSPEND_TIMEOUT_FIRMWARE_MS (31000) +#endif +#if (CSG_SUSPEND_TIMEOUT_FIRMWARE_MS < CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MIN) || \ + (CSG_SUSPEND_TIMEOUT_FIRMWARE_MS > CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MAX) +#error "CSG_SUSPEND_TIMEOUT_FIRMWARE_MS is out of range" +#endif + +/* Additional time in milliseconds added to the firmware iterators' suspend timeout, + * default 100ms + */ +#define CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS (100) + +/* Host side CSG suspend timeout */ +#define CSG_SUSPEND_TIMEOUT_MS (CSG_SUSPEND_TIMEOUT_FIRMWARE_MS + CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS) + +/* MAX allowed timeout value(ms) on host side, should be less than ANR timeout */ +#define MAX_TIMEOUT_MS (4500) + #else /* MALI_USE_CSF */ /* A default timeout in clock cycles to be used when an invalid timeout @@ -311,14 +363,6 @@ enum { */ #define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024) -/* Default threshold at which to switch to incremental rendering - * - * Fraction of the maximum size of an allocation that grows on GPU page fault - * that can be used up before the driver switches to incremental rendering, - * in 256ths. 0 means disable incremental rendering. - */ -#define DEFAULT_IR_THRESHOLD (192) - /* Waiting time in clock cycles for the completion of a MMU operation. * * Ideally 1.6M GPU cycles required for the L2 cache (512KiB slice) flush. diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c index 237a3b829be9..9f88d4f3fbc6 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -62,9 +62,7 @@ #include "csf/mali_kbase_csf_cpu_queue.h" #include "csf/mali_kbase_csf_event.h" #endif -#ifdef CONFIG_MALI_ARBITER_SUPPORT #include "arbiter/mali_kbase_arbiter_pm.h" -#endif #include "mali_kbase_cs_experimental.h" @@ -76,6 +74,7 @@ #if IS_ENABLED(CONFIG_DEBUG_FS) #include "mali_kbase_pbha_debugfs.h" #endif +#include "mali_kbase_ioctl_helpers.h" #include #include @@ -106,6 +105,7 @@ #include #include #include +#include #include @@ -152,13 +152,29 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA #if MALI_USE_CSF { 1, 0 }, /* SYSTEM_MONITOR */ { 1, 0 }, /* JIT_PRESSURE_LIMIT */ - { 1, 0 }, /* MEM_GROW_ON_GPF */ - { 1, 0 } /* MEM_PROTECTED */ + { 1, 22 }, /* QUERY_MEM_DONT_NEED */ + { 1, 0 }, /* QUERY_MEM_GROW_ON_GPF */ + { 1, 0 }, /* QUERY_MEM_PROTECTED */ + { 1, 26 }, /* QUERY_MEM_IMPORT_SYNC_ON_MAP_UNMAP */ + { 1, 26 }, /* QUERY_MEM_KERNEL_SYNC */ + { 1, 28 }, /* QUERY_MEM_SAME_VA */ + { 1, 31 }, /* REJECT_ALLOC_MEM_DONT_NEED */ + { 1, 31 }, /* REJECT_ALLOC_MEM_PROTECTED_IN_UNPROTECTED_ALLOCS */ + { 1, 31 }, /* REJECT_ALLOC_MEM_UNUSED_BIT_20 */ + { 1, 31 } /* REJECT_ALLOC_MEM_UNUSED_BIT_27 */ #else { 11, 15 }, /* SYSTEM_MONITOR */ { 11, 25 }, /* JIT_PRESSURE_LIMIT */ - { 11, 2 }, /* MEM_GROW_ON_GPF */ - { 11, 2 } /* MEM_PROTECTED */ + { 11, 40 }, /* QUERY_MEM_DONT_NEED */ + { 11, 2 }, /* QUERY_MEM_GROW_ON_GPF */ + { 11, 2 }, /* QUERY_MEM_PROTECTED */ + { 11, 43 }, /* QUERY_MEM_IMPORT_SYNC_ON_MAP_UNMAP */ + { 11, 43 }, /* QUERY_MEM_KERNEL_SYNC */ + { 11, 44 }, /* QUERY_MEM_SAME_VA */ + { 11, 46 }, /* REJECT_ALLOC_MEM_DONT_NEED */ + { 11, 46 }, /* REJECT_ALLOC_MEM_PROTECTED_IN_UNPROTECTED_ALLOCS */ + { 11, 46 }, /* REJECT_ALLOC_MEM_UNUSED_BIT_8 */ + { 11, 46 } /* REJECT_ALLOC_MEM_UNUSED_BIT_19 */ #endif }; @@ -167,13 +183,11 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA static struct mutex kbase_probe_mutex; #endif -static void kbase_file_destroy_kctx_worker(struct work_struct *work); - /** * mali_kbase_supports_cap - Query whether a kbase capability is supported * * @api_version: API version to convert - * @cap: Capability to query for - see mali_kbase_caps.h + * @cap: Capability to query for - see mali_kbase_caps.h. Shouldn't be negative. * * Return: true if the capability is supported */ @@ -184,13 +198,10 @@ bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap) struct mali_kbase_capability_def const *cap_def; - if (WARN_ON(cap < 0)) - return false; - if (WARN_ON(cap >= MALI_KBASE_NUM_CAPS)) return false; - cap_def = &kbase_caps_table[(int)cap]; + cap_def = &kbase_caps_table[cap]; required_ver = KBASE_API_VERSION(cap_def->required_major, cap_def->required_minor); supported = (api_version >= required_ver); @@ -212,7 +223,7 @@ bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap) * Return: Address of an object representing a simulated device file, or NULL * on failure. * - * Note: This function always gets called in Userspace context. + * Note: This function shall always be called in Userspace context. */ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, struct file *const filp) { @@ -224,17 +235,6 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, struc kfile->kctx = NULL; kfile->api_version = 0; atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN); - /* Store the pointer to the file table structure of current process. */ - kfile->owner = current->files; - INIT_WORK(&kfile->destroy_kctx_work, kbase_file_destroy_kctx_worker); - spin_lock_init(&kfile->lock); - kfile->fops_count = 0; - kfile->map_count = 0; - typecheck(typeof(kfile->map_count), typeof(current->mm->map_count)); -#if IS_ENABLED(CONFIG_DEBUG_FS) - init_waitqueue_head(&kfile->zero_fops_count_wait); -#endif - init_waitqueue_head(&kfile->event_queue); } return kfile; } @@ -313,33 +313,6 @@ static unsigned long kbase_file_get_api_version(struct kbase_file *const kfile) */ static int kbase_file_create_kctx(struct kbase_file *kfile, base_context_create_flags flags); -/** - * kbase_file_inc_fops_count_if_allowed - Increment the kfile::fops_count value if the file - * operation is allowed for the current process. - * - * @kfile: Pointer to the object representing the /dev/malixx device file instance. - * - * The function shall be called at the beginning of certain file operation methods - * implemented for @kbase_fops, like ioctl, poll, read and mmap. - * - * kbase_file_dec_fops_count() shall be called if the increment was done. - * - * Return: true if the increment was done otherwise false. - * - * Note: This function shall always be called in Userspace context. - */ -static bool kbase_file_inc_fops_count_if_allowed(struct kbase_file *const kfile) -{ - /* Disallow file operations from the other process that shares the instance - * of /dev/malixx file i.e. 'kfile' or disallow file operations if parent - * process has closed the file instance. - */ - if (unlikely(kfile->owner != current->files)) - return false; - - return kbase_file_inc_fops_count_unless_closed(kfile); -} - /** * kbase_file_get_kctx_if_setup_complete - Get a kernel base context * pointer from a device file @@ -352,8 +325,6 @@ static bool kbase_file_inc_fops_count_if_allowed(struct kbase_file *const kfile) * * Return: Address of the kernel base context associated with the @kfile, or * NULL if no context exists. - * - * Note: This function shall always be called in Userspace context. */ static struct kbase_context *kbase_file_get_kctx_if_setup_complete(struct kbase_file *const kfile) { @@ -364,103 +335,38 @@ static struct kbase_context *kbase_file_get_kctx_if_setup_complete(struct kbase_ return kfile->kctx; } -/** - * kbase_file_destroy_kctx - Destroy the Kbase context created for @kfile. - * - * @kfile: A device file created by kbase_file_new() - */ -static void kbase_file_destroy_kctx(struct kbase_file *const kfile) -{ - if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_COMPLETE, KBASE_FILE_DESTROY_CTX) != - KBASE_FILE_COMPLETE) - return; - -#if IS_ENABLED(CONFIG_DEBUG_FS) - kbasep_mem_profile_debugfs_remove(kfile->kctx); - kbase_context_debugfs_term(kfile->kctx); -#endif - - kbase_destroy_context(kfile->kctx); - dev_dbg(kfile->kbdev->dev, "Deleted kbase context"); -} - -/** - * kbase_file_destroy_kctx_worker - Work item to destroy the Kbase context. - * - * @work: Pointer to the kfile::destroy_kctx_work. - * - * The work item shall only be enqueued if the context termination could not - * be done from @kbase_flush(). - */ -static void kbase_file_destroy_kctx_worker(struct work_struct *work) -{ - struct kbase_file *kfile = container_of(work, struct kbase_file, destroy_kctx_work); - - WARN_ON_ONCE(kfile->owner); - WARN_ON_ONCE(kfile->map_count); - WARN_ON_ONCE(kfile->fops_count); - - kbase_file_destroy_kctx(kfile); -} - -/** - * kbase_file_destroy_kctx_on_flush - Try destroy the Kbase context from the flush() - * method of @kbase_fops. - * - * @kfile: A device file created by kbase_file_new() - */ -static void kbase_file_destroy_kctx_on_flush(struct kbase_file *const kfile) -{ - bool can_destroy_context = false; - - spin_lock(&kfile->lock); - kfile->owner = NULL; - /* To destroy the context from flush() method, unlike the release() - * method, need to synchronize manually against the other threads in - * the current process that could be operating on the /dev/malixx file. - * - * Only destroy the context if all the memory mappings on the - * /dev/malixx file instance have been closed. If there are mappings - * present then the context would be destroyed later when the last - * mapping is closed. - * Also, only destroy the context if no file operations are in progress. - */ - can_destroy_context = !kfile->map_count && !kfile->fops_count; - spin_unlock(&kfile->lock); - - if (likely(can_destroy_context)) { - WARN_ON_ONCE(work_pending(&kfile->destroy_kctx_work)); - kbase_file_destroy_kctx(kfile); - } -} - /** * kbase_file_delete - Destroy an object representing a device file * * @kfile: A device file created by kbase_file_new() * - * If any context was created for the @kfile and is still alive, then it is destroyed. + * If any context was created for the @kfile then it is destroyed. */ static void kbase_file_delete(struct kbase_file *const kfile) { + struct kbase_device *kbdev = NULL; + if (WARN_ON(!kfile)) return; - /* All the CPU mappings on the device file should have been closed */ - WARN_ON_ONCE(kfile->map_count); -#if IS_ENABLED(CONFIG_DEBUG_FS) - /* There could still be file operations due to the debugfs file (mem_view) */ - wait_event(kfile->zero_fops_count_wait, !kbase_file_fops_count(kfile)); -#else - /* There shall not be any file operations in progress on the device file */ - WARN_ON_ONCE(kfile->fops_count); -#endif - kfile->filp->private_data = NULL; - cancel_work_sync(&kfile->destroy_kctx_work); - /* Destroy the context if it wasn't done earlier from the flush() method. */ - kbase_file_destroy_kctx(kfile); - kbase_release_device(kfile->kbdev); + kbdev = kfile->kbdev; + + if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) { + struct kbase_context *kctx = kfile->kctx; + +#if IS_ENABLED(CONFIG_DEBUG_FS) + kbasep_mem_profile_debugfs_remove(kctx); +#endif + kbase_context_debugfs_term(kctx); + + kbase_destroy_context(kctx); + + dev_dbg(kbdev->dev, "deleted base context\n"); + } + + kbase_release_device(kbdev); + kfree(kfile); } @@ -585,6 +491,9 @@ int kbase_get_irqs(struct kbase_device *kbdev) kbdev->nr_irqs = 0; result = get_irqs(kbdev, pdev); + if (!result) + return result; + if (result) dev_err(kbdev->dev, "Invalid or No interrupt resources"); @@ -736,7 +645,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, kbdev = kfile->kbdev; - kctx = kbase_create_context(kbdev, in_compat_syscall(), flags, kfile->api_version, kfile); + kctx = kbase_create_context(kbdev, in_compat_syscall(), flags, kfile->api_version, + kfile->filp); /* if bad flags, will stay stuck in setup mode */ if (!kctx) @@ -823,36 +733,6 @@ static int kbase_release(struct inode *inode, struct file *filp) return 0; } -/** - * kbase_flush - Function implementing the flush() method of @kbase_fops. - * - * @filp: Pointer to the /dev/malixx device file instance. - * @id: Pointer to the file table structure of current process. - * If @filp is being shared by multiple processes then @id can differ - * from kfile::owner. - * - * This function is called everytime the copy of @filp is closed. So if 3 processes - * are sharing the @filp then this function would be called 3 times and only after - * that kbase_release() would get called. - * - * Return: 0 if successful, otherwise a negative error code. - * - * Note: This function always gets called in Userspace context when the - * file is closed. - */ -static int kbase_flush(struct file *filp, fl_owner_t id) -{ - struct kbase_file *const kfile = filp->private_data; - - /* Try to destroy the context if the flush() method has been called for the - * process that created the instance of /dev/malixx file i.e. 'kfile'. - */ - if (kfile->owner == id) - kbase_file_destroy_kctx_on_flush(kfile); - - return 0; -} - static int kbase_api_set_flags(struct kbase_file *kfile, struct kbase_ioctl_set_flags *flags) { int err = 0; @@ -946,7 +826,7 @@ static int kbase_api_mem_alloc_ex(struct kbase_context *kctx, union kbase_ioctl_mem_alloc_ex *alloc_ex) { struct kbase_va_region *reg; - u64 flags = alloc_ex->in.flags; + base_mem_alloc_flags flags = alloc_ex->in.flags; u64 gpu_va; /* Calls to this function are inherently asynchronous, with respect to @@ -1056,7 +936,7 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx, union kbase_ioctl_mem static int kbase_api_mem_alloc(struct kbase_context *kctx, union kbase_ioctl_mem_alloc *alloc) { struct kbase_va_region *reg; - u64 flags = alloc->in.flags; + base_mem_alloc_flags flags = alloc->in.flags; u64 gpu_va; /* Calls to this function are inherently asynchronous, with respect to @@ -1182,16 +1062,6 @@ static int kbase_api_get_ddk_version(struct kbase_context *kctx, static int kbase_api_mem_jit_init(struct kbase_context *kctx, struct kbase_ioctl_mem_jit_init *jit_init) { - size_t i; - - for (i = 0; i < sizeof(jit_init->padding); i++) { - /* Ensure all padding bytes are 0 for potential future - * extension - */ - if (jit_init->padding[i]) - return -EINVAL; - } - return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, jit_init->max_allocations, jit_init->trim_level, jit_init->group_id, jit_init->phys_pages); @@ -1257,7 +1127,7 @@ static int kbase_api_mem_commit(struct kbase_context *kctx, struct kbase_ioctl_m static int kbase_api_mem_alias(struct kbase_context *kctx, union kbase_ioctl_mem_alias *alias) { struct base_mem_aliasing_info *ai; - u64 flags; + base_mem_alloc_flags flags; int err; if (alias->in.nents == 0 || alias->in.nents > BASE_MEM_ALIAS_MAX_ENTS) @@ -1268,7 +1138,7 @@ static int kbase_api_mem_alias(struct kbase_context *kctx, union kbase_ioctl_mem return -ENOMEM; err = copy_from_user(ai, u64_to_user_ptr(alias->in.aliasing_info), - sizeof(*ai) * alias->in.nents); + size_mul(sizeof(*ai), alias->in.nents)); if (err) { vfree(ai); return -EFAULT; @@ -1296,7 +1166,7 @@ static int kbase_api_mem_alias(struct kbase_context *kctx, union kbase_ioctl_mem static int kbase_api_mem_import(struct kbase_context *kctx, union kbase_ioctl_mem_import *import) { int ret; - u64 flags = import->in.flags; + base_mem_alloc_flags flags = import->in.flags; if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) return -ENOMEM; @@ -1408,15 +1278,17 @@ static int kbase_api_sticky_resource_map(struct kbase_context *kctx, if (!map->count || map->count > BASE_EXT_RES_COUNT_MAX) return -EOVERFLOW; - ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address), sizeof(u64) * map->count); + ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address), + size_mul(sizeof(u64), map->count)); if (ret != 0) return -EFAULT; - kbase_gpu_vm_lock(kctx); + down_read(kbase_mem_get_process_mmap_lock()); + kbase_gpu_vm_lock_with_pmode_sync(kctx); for (i = 0; i < map->count; i++) { - if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i])) { + if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i], current->mm)) { /* Invalid resource */ ret = -EINVAL; break; @@ -1430,7 +1302,8 @@ static int kbase_api_sticky_resource_map(struct kbase_context *kctx, } } - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); + up_read(kbase_mem_get_process_mmap_lock()); return ret; } @@ -1445,12 +1318,13 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, if (!unmap->count || unmap->count > BASE_EXT_RES_COUNT_MAX) return -EOVERFLOW; - ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address), sizeof(u64) * unmap->count); + ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address), + size_mul(sizeof(u64), unmap->count)); if (ret != 0) return -EFAULT; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); for (i = 0; i < unmap->count; i++) { if (!kbase_sticky_resource_release_force(kctx, NULL, gpu_addr[i])) { @@ -1459,7 +1333,7 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, } } - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return ret; } @@ -1517,11 +1391,16 @@ static int kbasep_cs_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_c return kbase_csf_queue_kick(kctx, kick); } +static int kbasep_queue_group_clear_faults(struct kbase_context *kctx, + struct kbase_ioctl_queue_group_clear_faults *faults) +{ + return kbase_csf_queue_group_clear_faults(kctx, faults); +} + static int kbasep_cs_queue_group_create_1_6(struct kbase_context *kctx, union kbase_ioctl_cs_queue_group_create_1_6 *create) { int ret; - size_t i; union kbase_ioctl_cs_queue_group_create new_create = { .in = { .tiler_mask = create->in.tiler_mask, @@ -1534,15 +1413,7 @@ static int kbasep_cs_queue_group_create_1_6(struct kbase_context *kctx, .compute_max = create->in.compute_max, } }; - for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) { - if (create->in.padding[i] != 0) { - dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); - return -EINVAL; - } - } - ret = kbase_csf_queue_group_create(kctx, &new_create); - create->out.group_handle = new_create.out.group_handle; create->out.group_uid = new_create.out.group_uid; @@ -1553,7 +1424,6 @@ static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx, union kbase_ioctl_cs_queue_group_create_1_18 *create) { int ret; - size_t i; union kbase_ioctl_cs_queue_group_create new_create = { .in = { .tiler_mask = create->in.tiler_mask, @@ -1568,15 +1438,7 @@ static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx, .dvs_buf = create->in.dvs_buf, } }; - for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) { - if (create->in.padding[i] != 0) { - dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); - return -EINVAL; - } - } - ret = kbase_csf_queue_group_create(kctx, &new_create); - create->out.group_handle = new_create.out.group_handle; create->out.group_uid = new_create.out.group_uid; @@ -1586,6 +1448,8 @@ static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx, static int kbasep_cs_queue_group_create(struct kbase_context *kctx, union kbase_ioctl_cs_queue_group_create *create) { + /* create->in.reserved only present pre-TDRX configuration. */ + if (create->in.reserved != 0) { dev_warn(kctx->kbdev->dev, "Invalid reserved field not 0 in queue group create\n"); return -EINVAL; @@ -1701,14 +1565,15 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, ¶m->out.prfcnt_size, ¶m->out.instr_features); if (copy_to_user(user_groups, group_data, - MIN(max_group_num, param->out.group_num) * sizeof(*group_data))) + size_mul(MIN(max_group_num, param->out.group_num), + sizeof(*group_data)))) err = -EFAULT; } if (!err) if (copy_to_user(user_streams, stream_data, - MIN(max_total_stream_num, param->out.total_stream_num) * - sizeof(*stream_data))) + size_mul(MIN(max_total_stream_num, param->out.total_stream_num), + sizeof(*stream_data)))) err = -EFAULT; kfree(group_data); @@ -1732,10 +1597,6 @@ static int kbase_ioctl_read_user_page(struct kbase_context *kctx, if (unlikely(user_page->in.offset != LATEST_FLUSH)) return -EINVAL; - /* Validating padding that must be zero */ - if (unlikely(user_page->in.padding != 0)) - return -EINVAL; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (!kbdev->pm.backend.gpu_powered) user_page->out.val_lo = POWER_DOWN_LATEST_FLUSH_VALUE; @@ -1762,83 +1623,33 @@ kbasep_ioctl_context_priority_check(struct kbase_context *kctx, return 0; } -#define KBASE_HANDLE_IOCTL(cmd, function, arg) \ - do { \ - int ret; \ - BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ - dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ - ret = function(arg); \ - dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \ - return ret; \ - } while (0) - -#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg) \ - do { \ - type param; \ - int ret, err; \ - dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ - BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ - BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ - err = copy_from_user(¶m, uarg, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - ret = function(arg, ¶m); \ - dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \ - return ret; \ - } while (0) - -#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg) \ - do { \ - type param; \ - int ret, err; \ - dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ - BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ - BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ - memset(¶m, 0, sizeof(param)); \ - ret = function(arg, ¶m); \ - err = copy_to_user(uarg, ¶m, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \ - return ret; \ - } while (0) - -#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg) \ - do { \ - type param; \ - int ret, err; \ - dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ - BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE | _IOC_READ)); \ - BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ - err = copy_from_user(¶m, uarg, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - ret = function(arg, ¶m); \ - err = copy_to_user(uarg, ¶m, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \ - return ret; \ - } while (0) - static int kbasep_ioctl_set_limited_core_count( struct kbase_context *kctx, struct kbase_ioctl_set_limited_core_count *set_limited_core_count) { const u64 shader_core_mask = kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER); - const u64 limited_core_mask = ((u64)1 << (set_limited_core_count->max_core_count)) - 1; + const u8 max_core_count = set_limited_core_count->max_core_count; + u64 limited_core_mask = 0; - if ((shader_core_mask & limited_core_mask) == 0) { - /* At least one shader core must be available after applying the mask */ + /* Sanity check to avoid shift-out-of-bounds */ + if (max_core_count > 64) + return -EINVAL; + else if (max_core_count == 64) + limited_core_mask = UINT64_MAX; + else + limited_core_mask = ((u64)1 << max_core_count) - 1; + + /* At least one shader core must be available after applying the mask */ + if ((shader_core_mask & limited_core_mask) == 0) return -EINVAL; - } kctx->limited_core_mask = limited_core_mask; return 0; } -static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsigned long arg) +static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct kbase_file *const kfile = filp->private_data; struct kbase_context *kctx = NULL; struct kbase_device *kbdev = kfile->kbdev; void __user *uarg = (void __user *)arg; @@ -2087,6 +1898,11 @@ static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsign KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_ENQUEUE, kbasep_kcpu_queue_enqueue, struct kbase_ioctl_kcpu_queue_enqueue, kctx); break; + case KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS, + kbasep_queue_group_clear_faults, + struct kbase_ioctl_queue_group_clear_faults, kctx); + break; case KBASE_IOCTL_CS_TILER_HEAP_INIT: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT, kbasep_cs_tiler_heap_init, union kbase_ioctl_cs_tiler_heap_init, kctx); @@ -2137,45 +1953,22 @@ static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsign return -ENOIOCTLCMD; } -static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct kbase_file *const kfile = filp->private_data; - long ioctl_ret; - - if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) - return -EPERM; - - ioctl_ret = kbase_kfile_ioctl(kfile, cmd, arg); - kbase_file_dec_fops_count(kfile); - - return ioctl_ret; -} - #if MALI_USE_CSF static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *kctx; + struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile); struct base_csf_notification event_data = { .type = BASE_CSF_NOTIFICATION_EVENT }; const size_t data_size = sizeof(event_data); bool read_event = false, read_error = false; - ssize_t err = 0; CSTD_UNUSED(f_pos); - if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) + if (unlikely(!kctx)) return -EPERM; - kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (unlikely(!kctx)) { - err = -EPERM; - goto out; - } - - if (count < data_size) { - err = -ENOBUFS; - goto out; - } + if (count < data_size) + return -ENOBUFS; if (atomic_read(&kctx->event_count)) read_event = true; @@ -2196,41 +1989,29 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof if (copy_to_user(buf, &event_data, data_size) != 0) { dev_warn(kctx->kbdev->dev, "Failed to copy data\n"); - err = -EFAULT; - goto out; + return -EFAULT; } if (read_event) atomic_set(&kctx->event_count, 0); -out: - kbase_file_dec_fops_count(kfile); - return err ? err : (ssize_t)data_size; + return data_size; } #else /* MALI_USE_CSF */ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *kctx; + struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile); struct base_jd_event_v2 uevent; - size_t out_count = 0; - ssize_t err = 0; + int out_count = 0; CSTD_UNUSED(f_pos); - if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) + if (unlikely(!kctx)) return -EPERM; - kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (unlikely(!kctx)) { - err = -EPERM; - goto out; - } - - if (count < sizeof(uevent)) { - err = -ENOBUFS; - goto out; - } + if (count < sizeof(uevent)) + return -ENOBUFS; memset(&uevent, 0, sizeof(uevent)); @@ -2239,29 +2020,21 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof if (out_count > 0) goto out; - if (filp->f_flags & O_NONBLOCK) { - err = -EAGAIN; - goto out; - } + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; - if (wait_event_interruptible(kfile->event_queue, - kbase_event_pending(kctx)) != 0) { - err = -ERESTARTSYS; - goto out; - } + if (wait_event_interruptible(kctx->event_queue, + kbase_event_pending(kctx)) != 0) + return -ERESTARTSYS; } if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { - if (out_count == 0) { - err = -EPIPE; - goto out; - } + if (out_count == 0) + return -EPIPE; goto out; } - if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) { - err = -EFAULT; - goto out; - } + if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) + return -EFAULT; buf += sizeof(uevent); out_count++; @@ -2269,59 +2042,40 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof } while (count >= sizeof(uevent)); out: - kbase_file_dec_fops_count(kfile); - return err ? err : (ssize_t)(out_count * sizeof(uevent)); + return out_count * sizeof(uevent); } #endif /* MALI_USE_CSF */ static __poll_t kbase_poll(struct file *filp, poll_table *wait) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *kctx; - __poll_t ret = 0; + struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) { -#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) - ret = POLLNVAL; -#else - ret = EPOLLNVAL; -#endif - return ret; - } - - kctx = kbase_file_get_kctx_if_setup_complete(kfile); if (unlikely(!kctx)) { #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) - ret = POLLERR; + return POLLERR; #else - ret = EPOLLERR; + return EPOLLERR; #endif - goto out; } - poll_wait(filp, &kfile->event_queue, wait); + poll_wait(filp, &kctx->event_queue, wait); if (kbase_event_pending(kctx)) { #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) - ret = POLLIN | POLLRDNORM; + return POLLIN | POLLRDNORM; #else - ret = EPOLLIN | EPOLLRDNORM; + return EPOLLIN | EPOLLRDNORM; #endif } -out: - kbase_file_dec_fops_count(kfile); - return ret; + return 0; } void kbase_event_wakeup(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx); dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n", (void *)kctx); -#ifdef CONFIG_MALI_BIFROST_DEBUG - if (WARN_ON_ONCE(!kctx->kfile)) - return; -#endif - wake_up_interruptible(&kctx->kfile->event_queue); + wake_up_interruptible(&kctx->event_queue); } KBASE_EXPORT_TEST_API(kbase_event_wakeup); @@ -2354,20 +2108,12 @@ KBASE_EXPORT_TEST_API(kbase_event_pending); static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *kctx; - int ret; + struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) + if (unlikely(!kctx)) return -EPERM; - kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (likely(kctx)) - ret = kbase_context_mmap(kctx, vma); - else - ret = -EPERM; - - kbase_file_dec_fops_count(kfile); - return ret; + return kbase_context_mmap(kctx, vma); } static int kbase_check_flags(int flags) @@ -2386,26 +2132,17 @@ static unsigned long kbase_get_unmapped_area(struct file *const filp, const unsi const unsigned long flags) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *kctx; - unsigned long address; + struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) + if (unlikely(!kctx)) return -EPERM; - kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (likely(kctx)) - address = kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags); - else - address = -EPERM; - - kbase_file_dec_fops_count(kfile); - return address; + return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags); } static const struct file_operations kbase_fops = { .owner = THIS_MODULE, .open = kbase_open, - .flush = kbase_flush, .release = kbase_release, .read = kbase_read, .poll = kbase_poll, @@ -2544,6 +2281,9 @@ static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, struct kbase_device *kbdev; unsigned long flags; ssize_t ret = 0; +#if !MALI_USE_CSF + size_t i; +#endif CSTD_UNUSED(attr); @@ -2562,22 +2302,173 @@ static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current in use core mask : 0x%llX\n", kbdev->pm.backend.shaders_avail); #else - ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS0) : 0x%llX\n", - kbdev->pm.debug_core_mask[0]); - ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS1) : 0x%llX\n", - kbdev->pm.debug_core_mask[1]); - ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS2) : 0x%llX\n", - kbdev->pm.debug_core_mask[2]); + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; i++) { + if (PAGE_SIZE < ret) + goto out_unlock; + + ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), + "Current core mask (JS%zu) : 0x%llX\n", i, + kbdev->pm.debug_core_mask[i]); + } #endif /* MALI_USE_CSF */ ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Available core mask : 0x%llX\n", kbdev->gpu_props.shader_present); - +#if !MALI_USE_CSF +out_unlock: +#endif spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return ret; } +#if MALI_USE_CSF +struct kbase_core_mask { + u64 new_core_mask; +}; + +static int core_mask_parse(struct kbase_device *const kbdev, const char *const buf, + struct kbase_core_mask *const mask) +{ + int err = kstrtou64(buf, 0, &mask->new_core_mask); + + if (err) + dev_err(kbdev->dev, "Couldn't process core mask write operation.\n"); + + return err; +} + +static int core_mask_set(struct kbase_device *kbdev, struct kbase_core_mask *const new_mask) +{ + u64 new_core_mask = new_mask->new_core_mask; + u64 shader_present; + unsigned long flags; + int ret = 0; + + kbase_csf_scheduler_lock(kbdev); + kbase_pm_lock(kbdev); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + shader_present = kbdev->gpu_props.shader_present; + + if ((new_core_mask & shader_present) != new_core_mask) { + dev_err(kbdev->dev, + "Invalid requested core mask 0x%llX: Includes non-existent cores (present = 0x%llX)", + new_core_mask, shader_present); + ret = -EINVAL; + goto exit; + } else if (!(new_core_mask & shader_present & kbdev->pm.backend.ca_cores_enabled)) { + dev_err(kbdev->dev, + "Invalid requested core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX)", + new_core_mask, kbdev->gpu_props.shader_present, + kbdev->pm.backend.ca_cores_enabled); + ret = -EINVAL; + goto exit; + } + + + if (kbdev->pm.debug_core_mask != new_core_mask) + kbase_pm_set_debug_core_mask(kbdev, new_core_mask); + +exit: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_pm_unlock(kbdev); + kbase_csf_scheduler_unlock(kbdev); + + return ret; +} +#else +struct kbase_core_mask { + u64 new_core_mask[BASE_JM_MAX_NR_SLOTS]; +}; + +static int core_mask_parse(struct kbase_device *const kbdev, const char *const buf, + struct kbase_core_mask *const mask) +{ + int items; + + items = sscanf(buf, "%llx %llx %llx", &mask->new_core_mask[0], &mask->new_core_mask[1], + &mask->new_core_mask[2]); + + if (items != 1 && items != BASE_JM_MAX_NR_SLOTS) { + dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" + "Use format \n" + "or \n"); + return -EINVAL; + } + + /* If only one value was provided, set all other core masks equal to the value. */ + if (items == 1) { + size_t i; + + for (i = 1; i < BASE_JM_MAX_NR_SLOTS; i++) + mask->new_core_mask[i] = mask->new_core_mask[0]; + } + + return 0; +} + +static int core_mask_set(struct kbase_device *kbdev, struct kbase_core_mask *const new_mask) +{ + u64 shader_present = kbdev->gpu_props.shader_present; + u64 group_core_mask = kbdev->gpu_props.coherency_info.group.core_mask; + u64 *new_core_mask; + unsigned long flags; + int ret = 0; + size_t i; + + kbase_pm_lock(kbdev); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + new_core_mask = &new_mask->new_core_mask[0]; + + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) { + if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { + dev_err(kbdev->dev, + "Invalid core mask 0x%llX for JS %zu: Includes non-existent cores (present = 0x%llX)", + new_core_mask[i], i, shader_present); + ret = -EINVAL; + goto exit; + + } else if (!(new_core_mask[i] & shader_present & + kbdev->pm.backend.ca_cores_enabled)) { + dev_err(kbdev->dev, + "Invalid core mask 0x%llX for JS %zu: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX)", + new_core_mask[i], i, kbdev->gpu_props.shader_present, + kbdev->pm.backend.ca_cores_enabled); + ret = -EINVAL; + goto exit; + } else if (!(new_core_mask[i] & group_core_mask)) { + dev_err(kbdev->dev, + "Invalid core mask 0x%llX for JS %zu: No intersection with group 0 core mask 0x%llX", + new_core_mask[i], i, group_core_mask); + ret = -EINVAL; + goto exit; + } else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) { + dev_err(kbdev->dev, + "Invalid core mask 0x%llX for JS %zu: No intersection with current core mask 0x%llX", + new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present); + ret = -EINVAL; + goto exit; + } + } + + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; i++) { + if (kbdev->pm.debug_core_mask[i] != new_core_mask[i]) { + kbase_pm_set_debug_core_mask(kbdev, new_core_mask, BASE_JM_MAX_NR_SLOTS); + break; + } + } + +exit: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_pm_unlock(kbdev); + + return ret; +} + +#endif + /** * core_mask_store - Store callback for the core_mask sysfs file. * @@ -2594,18 +2485,9 @@ static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr size_t count) { struct kbase_device *kbdev; -#if MALI_USE_CSF - u64 new_core_mask; -#else - u64 new_core_mask[3]; - u64 group_core_mask; - int i; -#endif /* MALI_USE_CSF */ + struct kbase_core_mask core_mask = {}; - int items; - ssize_t err = (ssize_t)count; - unsigned long flags; - u64 shader_present; + int err; CSTD_UNUSED(attr); @@ -2614,102 +2496,16 @@ static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr if (!kbdev) return -ENODEV; -#if MALI_USE_CSF - items = sscanf(buf, "%llx", &new_core_mask); + err = core_mask_parse(kbdev, buf, &core_mask); + if (err) + return err; - if (items != 1) { - dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" - "Use format \n"); - err = -EINVAL; - goto end; - } -#else - items = sscanf(buf, "%llx %llx %llx", &new_core_mask[0], &new_core_mask[1], - &new_core_mask[2]); + err = core_mask_set(kbdev, &core_mask); - if (items != 1 && items != 3) { - dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" - "Use format \n" - "or \n"); - err = -EINVAL; - goto end; - } + if (err) + return err; - if (items == 1) - new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; -#endif - - mutex_lock(&kbdev->pm.lock); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - shader_present = kbdev->gpu_props.shader_present; - -#if MALI_USE_CSF - if ((new_core_mask & shader_present) != new_core_mask) { - dev_err(dev, - "Invalid core mask 0x%llX: Includes non-existent cores (present = 0x%llX)", - new_core_mask, shader_present); - err = -EINVAL; - goto unlock; - - } else if (!(new_core_mask & shader_present & kbdev->pm.backend.ca_cores_enabled)) { - dev_err(dev, - "Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", - new_core_mask, kbdev->gpu_props.shader_present, - kbdev->pm.backend.ca_cores_enabled); - err = -EINVAL; - goto unlock; - } - - if (kbdev->pm.debug_core_mask != new_core_mask) - kbase_pm_set_debug_core_mask(kbdev, new_core_mask); -#else - group_core_mask = kbdev->gpu_props.coherency_info.group.core_mask; - - for (i = 0; i < 3; ++i) { - if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { - dev_err(dev, - "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)", - new_core_mask[i], i, shader_present); - err = -EINVAL; - goto unlock; - - } else if (!(new_core_mask[i] & shader_present & - kbdev->pm.backend.ca_cores_enabled)) { - dev_err(dev, - "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", - new_core_mask[i], i, kbdev->gpu_props.shader_present, - kbdev->pm.backend.ca_cores_enabled); - err = -EINVAL; - goto unlock; - } else if (!(new_core_mask[i] & group_core_mask)) { - dev_err(dev, - "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n", - new_core_mask[i], i, group_core_mask); - err = -EINVAL; - goto unlock; - } else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) { - dev_err(dev, - "Invalid core mask 0x%llX for JS %d: No intersection with current core mask 0x%llX\n", - new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present); - err = -EINVAL; - goto unlock; - } - } - - if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || - kbdev->pm.debug_core_mask[1] != new_core_mask[1] || - kbdev->pm.debug_core_mask[2] != new_core_mask[2]) { - kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], new_core_mask[1], - new_core_mask[2]); - } -#endif /* MALI_USE_CSF */ - -unlock: - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->pm.lock); -end: - return err; + return count; } /* @@ -3478,12 +3274,8 @@ int kbase_pm_gpu_freq_init(struct kbase_device *kbdev) /* convert found frequency to KHz */ found_freq /= 1000; - /* If lowest frequency in OPP table is still higher - * than the reference, then keep the reference frequency - * as the one to use for scaling . - */ - if (found_freq < lowest_freq_khz) - lowest_freq_khz = found_freq; + /* always use the lowest freqency from opp table */ + lowest_freq_khz = found_freq; } #else dev_err(kbdev->dev, "No operating-points-v2 node or operating-points property in DT"); @@ -4466,7 +4258,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev) goto out_region; } - kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size); + kbdev->reg = mali_ioremap(kbdev->reg_start, kbdev->reg_size); if (!kbdev->reg) { dev_err(kbdev->dev, "Can't remap register window\n"); err = -EINVAL; @@ -4484,7 +4276,7 @@ out_region: static void kbase_common_reg_unmap(struct kbase_device *const kbdev) { if (kbdev->reg) { - iounmap(kbdev->reg); + mali_iounmap(kbdev->reg); release_mem_region(kbdev->reg_start, kbdev->reg_size); kbdev->reg = NULL; kbdev->reg_start = 0; @@ -4535,7 +4327,7 @@ void registers_unmap(struct kbase_device *kbdev) kbase_common_reg_unmap(kbdev); } -#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) +#if defined(CONFIG_OF) static bool kbase_is_pm_enabled(const struct device_node *gpu_node) { @@ -4562,17 +4354,6 @@ static bool kbase_is_pm_enabled(const struct device_node *gpu_node) return is_pm_enable; } -static bool kbase_is_pv_enabled(const struct device_node *gpu_node) -{ - const void *arbiter_if_node; - - arbiter_if_node = of_get_property(gpu_node, "arbiter-if", NULL); - if (!arbiter_if_node) - arbiter_if_node = of_get_property(gpu_node, "arbiter_if", NULL); - - return arbiter_if_node ? true : false; -} - static bool kbase_is_full_coherency_enabled(const struct device_node *gpu_node) { const void *coherency_dts; @@ -4586,72 +4367,61 @@ static bool kbase_is_full_coherency_enabled(const struct device_node *gpu_node) } return false; } +#endif /* defined(CONFIG_OF) */ -#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ - -int kbase_device_pm_init(struct kbase_device *kbdev) +int kbase_device_backend_init(struct kbase_device *kbdev) { int err = 0; -#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) - u32 product_model; +#if defined(CONFIG_OF) + /* + * Attempt to initialize arbitration. + * If the platform is not suitable for arbitration, return -EPERM. + * The device initialization should not fail but kbase will + * not support arbitration. + */ + if (kbase_is_pm_enabled(kbdev->dev->of_node)) { + /* Arbitration AND power management invalid */ + dev_dbg(kbdev->dev, "Arbitration not supported with power management"); + return -EPERM; + } - if (kbase_is_pv_enabled(kbdev->dev->of_node)) { - dev_info(kbdev->dev, "Arbitration interface enabled\n"); - if (kbase_is_pm_enabled(kbdev->dev->of_node)) { - /* Arbitration AND power management invalid */ - dev_err(kbdev->dev, - "Invalid combination of arbitration AND power management\n"); - return -EPERM; - } - if (kbase_is_full_coherency_enabled(kbdev->dev->of_node)) { - /* Arbitration AND full coherency invalid */ - dev_err(kbdev->dev, - "Invalid combination of arbitration AND full coherency\n"); - return -EPERM; - } - err = kbase_arbiter_pm_early_init(kbdev); - if (err == 0) { - /* Check if Arbitration is running on - * supported GPU platform - */ - kbase_pm_register_access_enable(kbdev); + if (kbase_is_full_coherency_enabled(kbdev->dev->of_node)) { + /* Arbitration AND full coherency invalid */ + dev_dbg(kbdev->dev, "Arbitration not supported with full coherency"); + return -EPERM; + } + + err = kbase_arbiter_pm_early_init(kbdev); + if (err == 0) { +#if !MALI_USE_CSF + u32 product_model; + + /* + * Attempt to obtain and parse gpu_id in the event an external AW module + * is used for messaging. We should have access to GPU at this point. + */ + if (kbdev->gpu_props.gpu_id.arch_major == 0) kbase_gpuprops_parse_gpu_id(&kbdev->gpu_props.gpu_id, kbase_reg_get_gpu_id(kbdev)); - kbase_pm_register_access_disable(kbdev); - product_model = kbdev->gpu_props.gpu_id.product_model; - if (product_model != GPU_ID_PRODUCT_TGOX && - product_model != GPU_ID_PRODUCT_TNOX && - product_model != GPU_ID_PRODUCT_TBAX) { - kbase_arbiter_pm_early_term(kbdev); - dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); - return -EPERM; - } + product_model = kbdev->gpu_props.gpu_id.product_model; + if (product_model != GPU_ID_PRODUCT_TGOX && product_model != GPU_ID_PRODUCT_TNOX && + product_model != GPU_ID_PRODUCT_TBAX) { + kbase_arbiter_pm_early_term(kbdev); + dev_dbg(kbdev->dev, "GPU platform not suitable for arbitration"); + return -EPERM; } - } else { - kbdev->arb.arb_if = NULL; - kbdev->arb.arb_dev = NULL; - err = power_control_init(kbdev); +#endif /* !MALI_USE_CSF */ + dev_info(kbdev->dev, "Arbitration interface enabled"); } -#else - err = power_control_init(kbdev); -#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ +#endif /* defined(CONFIG_OF) */ return err; } -void kbase_device_pm_term(struct kbase_device *kbdev) +void kbase_device_backend_term(struct kbase_device *kbdev) { -#ifdef CONFIG_MALI_ARBITER_SUPPORT -#if IS_ENABLED(CONFIG_OF) - if (kbase_is_pv_enabled(kbdev->dev->of_node)) - kbase_arbiter_pm_early_term(kbdev); - else - power_control_term(kbdev); -#endif /* CONFIG_OF */ -#else - power_control_term(kbdev); -#endif + kbase_arbiter_pm_early_term(kbdev); } int power_control_init(struct kbase_device *kbdev) @@ -5064,11 +4834,12 @@ static struct dentry *init_debugfs(struct kbase_device *kbdev) return dentry; } + dentry = debugfs_ctx_defaults_init(kbdev); if (IS_ERR_OR_NULL(dentry)) return dentry; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { dentry = debugfs_create_file("protected_debug_mode", 0444, kbdev->mali_debugfs_directory, kbdev, &fops_protected_debug_mode); @@ -5955,11 +5726,11 @@ static int kbase_platform_device_probe(struct platform_device *pdev) #if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) mutex_unlock(&kbase_probe_mutex); #endif -#ifdef CONFIG_MALI_ARBITER_SUPPORT - mutex_lock(&kbdev->pm.lock); - kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT); - mutex_unlock(&kbdev->pm.lock); -#endif + if (kbase_has_arbiter(kbdev)) { + mutex_lock(&kbdev->pm.lock); + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT); + mutex_unlock(&kbdev->pm.lock); + } } return err; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h index 0c794e2e90bc..e6222979b72c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,9 +30,6 @@ */ static inline void mali_kbase_print_cs_experimental(void) { -#if MALI_INCREMENTAL_RENDERING_JM - pr_info("mali_kbase: INCREMENTAL_RENDERING_JM (experimental) enabled"); -#endif /* MALI_INCREMENTAL_RENDERING_JM */ } #endif /* _KBASE_CS_EXPERIMENTAL_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c index c92fb9e0957e..4b7f6a186ac0 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,7 +58,7 @@ static void debug_zone_mem_allocs_show(struct kbase_reg_zone *zone, struct seq_f for (p = rb_first(rbtree); p; p = rb_next(p)) { reg = rb_entry(p, struct kbase_va_region, rblink); if (!(reg->flags & KBASE_REG_FREE)) { - seq_printf(sfile, "%16llx, %16zx, %16zx, %8lx, %s\n", + seq_printf(sfile, "%16llx, %16zx, %16zx, %8llx, %s\n", reg->start_pfn << PAGE_SHIFT, reg->nr_pages << PAGE_SHIFT, kbase_reg_current_backed_size(reg) << PAGE_SHIFT, reg->flags, type_names[reg->gpu_alloc->type]); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c index dd8f8ff6fe79..48469cdcc34e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -237,7 +237,11 @@ static int debug_mem_open(struct inode *i, struct file *file) int ret; enum kbase_memory_zone idx; - if (!kbase_file_inc_fops_count_unless_closed(kctx->kfile)) +#if (KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE) + if (get_file_rcu(kctx->filp) == 0) +#else + if (get_file_rcu(&kctx->filp) == 0) +#endif return -ENOENT; /* Check if file was opened in write mode. GPU memory contents @@ -297,7 +301,7 @@ out: } seq_release(i, file); open_fail: - kbase_file_dec_fops_count(kctx->kfile); + fput(kctx->filp); return ret; } @@ -327,7 +331,7 @@ static int debug_mem_release(struct inode *inode, struct file *file) kfree(mem_data); } - kbase_file_dec_fops_count(kctx->kfile); + fput(kctx->filp); return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h index 13a5c30dcb61..b97df15f7a17 100755 --- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,8 +28,8 @@ #define _KBASE_DEFS_H_ #include -#include -#include +#include +#include #include #include #include @@ -52,10 +52,6 @@ #include -#if IS_ENABLED(CONFIG_DEBUG_FS) -#include -#endif /* CONFIG_DEBUG_FS */ - #ifdef CONFIG_MALI_BIFROST_DEVFREQ #include #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ @@ -64,9 +60,7 @@ #include #endif -#ifdef CONFIG_MALI_ARBITER_SUPPORT #include -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ #include #include @@ -78,6 +72,7 @@ #include #include #include +#include #include /** Number of milliseconds before we time out on a GPU soft/hard reset */ @@ -178,16 +173,11 @@ struct kbase_gpu_metrics { * * @link: Links the object in kbase_device::gpu_metrics::active_list * or kbase_device::gpu_metrics::inactive_list. - * @first_active_start_time: Records the time at which the application first became + * @active_start_time: Records the time at which the application first became * active in the current work period. - * @last_active_start_time: Records the time at which the application last became - * active in the current work period. - * @last_active_end_time: Records the time at which the application last became - * inactive in the current work period. - * @total_active: Tracks the time for which application has been active - * in the current work period. - * @prev_wp_active_end_time: Records the time at which the application last became - * inactive in the previous work period. + * @active_end_time: Records the time at which the application last became + * inactive in the current work period, or the time of the end of + * previous work period if the application remained active. * @aid: Unique identifier for an application. * @kctx_count: Counter to keep a track of the number of Kbase contexts * created for an application. There may be multiple Kbase @@ -195,19 +185,14 @@ struct kbase_gpu_metrics { * metrics context. * @active_cnt: Counter that is updated every time the GPU activity starts * and ends in the current work period for an application. - * @flags: Flags to track the state of GPU metrics context. */ struct kbase_gpu_metrics_ctx { struct list_head link; - u64 first_active_start_time; - u64 last_active_start_time; - u64 last_active_end_time; - u64 total_active; - u64 prev_wp_active_end_time; + u64 active_start_time; + u64 active_end_time; unsigned int aid; unsigned int kctx_count; u8 active_cnt; - u8 flags; }; #endif @@ -307,24 +292,33 @@ struct kbase_fault { #define MAX_PAGES_FOR_FREE_PGDS ((size_t)9) /* Maximum number of pointers to free PGDs */ -#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS) +#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(phys_addr_t)) * MAX_PAGES_FOR_FREE_PGDS) /** * struct kbase_mmu_table - object representing a set of GPU page tables - * @mmu_lock: Lock to serialize the accesses made to multi level GPU - * page tables - * @pgd: Physical address of the page allocated for the top - * level page table of the context, this is used for - * MMU HW programming as the address translation will - * start from the top level page table. - * @group_id: A memory group ID to be passed to a platform-specific - * memory group manager. - * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). - * @kctx: If this set of MMU tables belongs to a context then - * this is a back-reference to the context, otherwise - * it is NULL. - * @scratch_mem: Scratch memory used for MMU operations, which are - * serialized by the @mmu_lock. + * @mmu_lock: Lock to serialize the accesses made to multi level GPU + * page tables + * @pgd: Physical address of the page allocated for the top + * level page table of the context, this is used for + * MMU HW programming as the address translation will + * start from the top level page table. + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @kctx: If this set of MMU tables belongs to a context then + * this is a back-reference to the context, otherwise + * it is NULL. + * @scratch_mem: Scratch memory used for MMU operations, which are + * serialized by the @mmu_lock. + * @pgd_pages_list: List head to link all 16K/64K pages allocated for the PGDs of mmut. + * These pages will be used to allocate 4KB PGD pages for + * the GPU page table. + * Linked with &kbase_page_metadata.data.pt_mapped.pgd_link. + * @last_allocated_pgd_page: Pointer to PGD page from where the last sub page + * was allocated for mmut. + * @last_freed_pgd_page: Pointer to PGD page to which the last freed 4K sub page + * was returned for mmut. + * @num_free_pgd_sub_pages: The total number of free 4K PGD pages in the mmut. */ struct kbase_mmu_table { struct mutex mmu_lock; @@ -342,7 +336,7 @@ struct kbase_mmu_table { * @levels: Array of PGD pages, large enough to copy one PGD * for each level of the MMU table. */ - u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)]; + u64 levels[MIDGARD_MMU_BOTTOMLEVEL][GPU_PAGE_SIZE / sizeof(u64)]; } teardown_pages; /** * @free_pgds: Scratch memory used for insertion, update and teardown @@ -351,11 +345,18 @@ struct kbase_mmu_table { */ struct { /** @pgds: Array of pointers to PGDs to free. */ - struct page *pgds[MAX_FREE_PGDS]; + phys_addr_t pgds[MAX_FREE_PGDS]; /** @head_index: Index of first free element in the PGDs array. */ size_t head_index; } free_pgds; } scratch_mem; + +#if GPU_PAGES_PER_CPU_PAGE > 1 + struct list_head pgd_pages_list; + struct page *last_allocated_pgd_page; + struct page *last_freed_pgd_page; + u32 num_free_pgd_sub_pages; +#endif }; #if MALI_USE_CSF @@ -381,14 +382,9 @@ static inline int kbase_as_has_page_fault(struct kbase_as *as, struct kbase_faul * * @used_pages: Tracks usage of OS shared memory. Updated when OS memory is * allocated/freed. - * @ir_threshold: Fraction of the maximum size of an allocation that grows - * on GPU page fault that can be used before the driver - * switches to incremental rendering, in 1/256ths. - * 0 means disabled. */ struct kbasep_mem_device { atomic_t used_pages; - atomic_t ir_threshold; }; struct kbase_clk_rate_listener; @@ -493,9 +489,7 @@ struct kbase_pm_device_data { #if MALI_USE_CSF bool runtime_active; #endif -#ifdef CONFIG_MALI_ARBITER_SUPPORT atomic_t gpu_lost; -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ wait_queue_head_t zero_active_count_wait; wait_queue_head_t resume_wait; @@ -511,10 +505,8 @@ struct kbase_pm_device_data { void (*callback_power_runtime_term)(struct kbase_device *kbdev); u32 dvfs_period; struct kbase_pm_backend_data backend; -#ifdef CONFIG_MALI_ARBITER_SUPPORT struct kbase_arbiter_vm_state *arb_vm_state; atomic_t gpu_users_waiting; -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ struct kbase_clk_rate_trace_manager clk_rtm; }; @@ -555,7 +547,7 @@ struct kbase_mem_pool { u8 group_id; spinlock_t pool_lock; struct list_head page_list; - struct shrinker reclaim; + DEFINE_KBASE_SHRINKER reclaim; atomic_t isolation_in_progress_cnt; struct kbase_mem_pool *next_pool; @@ -847,8 +839,6 @@ struct kbase_mem_migrate { * @as_free: Bitpattern of free/available GPU address spaces. * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask * register used in the handling of Bus & Page faults. - * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are - * supported and used where possible. * @gpu_props: Object containing complete information about the * configuration/properties of GPU HW device in use. * @hw_issues_mask: List of SW workarounds for HW issues @@ -959,7 +949,7 @@ struct kbase_mem_migrate { * @ipa.last_sample_time: Records the time when counters, used for dynamic * energy estimation, were last sampled. * @previous_frequency: Previous frequency of GPU clock used for - * BASE_HW_ISSUE_GPU2017_1336 workaround, This clock is + * KBASE_HW_ISSUE_GPU2017_1336 workaround, This clock is * restored when L2 is powered on. * @job_fault_debug: Flag to control the dumping of debug data for job faults, * set when the 'job_fault' debugfs file is opened. @@ -1081,7 +1071,8 @@ struct kbase_mem_migrate { * KCPU queue. These structures may outlive kbase module * itself. Therefore, in such a case, a warning should be * be produced. - * @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures. + * @va_region_slab: kmem_cache (slab) for allocated @kbase_va_region structures. + * @page_metadata_slab: kmem_cache (slab) for allocated @kbase_page_metadata structures. * @fence_signal_timeout_enabled: Global flag for whether fence signal timeout tracking * is enabled. * @pcm_prioritized_process_nb: Notifier block for the Priority Control Manager @@ -1144,12 +1135,10 @@ struct kbase_device { spinlock_t mmu_mask_change; - bool pagesize_2mb; - struct kbase_gpu_props gpu_props; - unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; - unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; + unsigned long hw_issues_mask[(KBASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; + unsigned long hw_features_mask[(KBASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; struct { atomic_t count; @@ -1165,6 +1154,12 @@ struct kbase_device { */ u8 pbha_propagate_bits; + /** + * @mma_wa_id: The PBHA ID to use for the PBHA OVERRIDE based workaround for MMA violation. + * + */ + u32 mma_wa_id; + #if MALI_USE_CSF struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw; #else @@ -1256,7 +1251,6 @@ struct kbase_device { atomic_t job_fault_debug; #endif /* !MALI_USE_CSF */ -#if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *mali_debugfs_directory; struct dentry *debugfs_ctx_directory; struct dentry *debugfs_instr_directory; @@ -1278,7 +1272,6 @@ struct kbase_device { u32 reg_offset; } regs_dump_debugfs_data; #endif /* !MALI_CUSTOMER_RELEASE */ -#endif /* CONFIG_DEBUG_FS */ atomic_t ctx_num; @@ -1369,9 +1362,7 @@ struct kbase_device { } dummy_job_wa; bool dummy_job_wa_loaded; -#ifdef CONFIG_MALI_ARBITER_SUPPORT struct kbase_arbiter_device arb; -#endif /* Priority Control Manager device */ struct priority_control_manager_device *pcm_dev; @@ -1396,6 +1387,9 @@ struct kbase_device { atomic_t live_fence_metadata; #endif struct kmem_cache *va_region_slab; +#if GPU_PAGES_PER_CPU_PAGE > 1 + struct kmem_cache *page_metadata_slab; +#endif #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) /** @@ -1424,9 +1418,6 @@ struct kbase_device { * @KBASE_FILE_COMPLETE: Indicates if the setup for context has * completed, i.e. flags have been set for the * context. - * @KBASE_FILE_DESTROY_CTX: Indicates that destroying of context has begun or - * is complete. This state can only be reached after - * @KBASE_FILE_COMPLETE. * * The driver allows only limited interaction with user-space until setup * is complete. @@ -1436,8 +1427,7 @@ enum kbase_file_state { KBASE_FILE_VSN_IN_PROGRESS, KBASE_FILE_NEED_CTX, KBASE_FILE_CTX_IN_PROGRESS, - KBASE_FILE_COMPLETE, - KBASE_FILE_DESTROY_CTX + KBASE_FILE_COMPLETE }; /** @@ -1447,12 +1437,6 @@ enum kbase_file_state { * allocated from the probe method of the Mali driver. * @filp: Pointer to the struct file corresponding to device file * /dev/malixx instance, passed to the file's open method. - * @owner: Pointer to the file table structure of a process that - * created the instance of /dev/malixx device file. Set to - * NULL when that process closes the file instance. No more - * file operations would be allowed once set to NULL. - * It would be updated only in the Userspace context, i.e. - * when @kbase_open or @kbase_flush is called. * @kctx: Object representing an entity, among which GPU is * scheduled and which gets its own GPU address space. * Invalid until @setup_state is KBASE_FILE_COMPLETE. @@ -1461,44 +1445,13 @@ enum kbase_file_state { * @setup_state is KBASE_FILE_NEED_CTX. * @setup_state: Initialization state of the file. Values come from * the kbase_file_state enumeration. - * @destroy_kctx_work: Work item for destroying the @kctx, enqueued only when - * @fops_count and @map_count becomes zero after /dev/malixx - * file was previously closed by the @owner. - * @lock: Lock to serialize the access to members like @owner, @fops_count, - * @map_count. - * @fops_count: Counter that is incremented at the beginning of a method - * defined for @kbase_fops and is decremented at the end. - * So the counter keeps a track of the file operations in progress - * for /dev/malixx file, that are being handled by the Kbase. - * The counter is needed to defer the context termination as - * Userspace can close the /dev/malixx file and flush() method - * can get called when some other file operation is in progress. - * @map_count: Counter to keep a track of the memory mappings present on - * /dev/malixx file instance. The counter is needed to defer the - * context termination as Userspace can close the /dev/malixx - * file and flush() method can get called when mappings are still - * present. - * @zero_fops_count_wait: Waitqueue used to wait for the @fops_count to become 0. - * Currently needed only for the "mem_view" debugfs file. - * @event_queue: Wait queue used for blocking the thread, which consumes - * the base_jd_event corresponding to an atom, when there - * are no more posted events. */ struct kbase_file { struct kbase_device *kbdev; struct file *filp; - fl_owner_t owner; struct kbase_context *kctx; unsigned long api_version; atomic_t setup_state; - struct work_struct destroy_kctx_work; - spinlock_t lock; - int fops_count; - int map_count; -#if IS_ENABLED(CONFIG_DEBUG_FS) - wait_queue_head_t zero_fops_count_wait; -#endif - wait_queue_head_t event_queue; }; #if MALI_JIT_PRESSURE_LIMIT_BASE /** @@ -1680,8 +1633,8 @@ struct kbase_sub_alloc { /** * struct kbase_context - Kernel base context * - * @kfile: Pointer to the object representing the /dev/malixx device - * file instance. + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. * @kbdev: Pointer to the Kbase device for which the context is created. * @kctx_list_link: Node into Kbase device list of contexts. * @mmu: Structure holding details of the MMU tables for this @@ -1734,6 +1687,9 @@ struct kbase_sub_alloc { * used in conjunction with @cookies bitmask mainly for * providing a mechansim to have the same value for CPU & * GPU virtual address. + * @event_queue: Wait queue used for blocking the thread, which consumes + * the base_jd_event corresponding to an atom, when there + * are no more posted events. * @tgid: Thread group ID of the process whose thread created * the context (by calling KBASE_IOCTL_VERSION_CHECK or * KBASE_IOCTL_SET_FLAGS, depending on the @api_version). @@ -1945,7 +1901,7 @@ struct kbase_sub_alloc { * is made on the device file. */ struct kbase_context { - struct kbase_file *kfile; + struct file *filp; struct kbase_device *kbdev; struct list_head kctx_list_link; struct kbase_mmu_table mmu; @@ -1997,6 +1953,7 @@ struct kbase_context { DECLARE_BITMAP(cookies, BITS_PER_LONG); struct kbase_va_region *pending_regions[BITS_PER_LONG]; + wait_queue_head_t event_queue; pid_t tgid; pid_t pid; atomic_t prioritized; @@ -2006,7 +1963,8 @@ struct kbase_context { struct kbase_mem_pool_group mem_pools; - struct shrinker reclaim; + DEFINE_KBASE_SHRINKER reclaim; + struct list_head evict_list; atomic_t evict_nents; @@ -2181,6 +2139,18 @@ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props con return 15; /* 32 kB */ } +/** + * kbase_has_arbiter - Check whether GPU has an arbiter. + * + * @kbdev: KBase device. + * + * Return: True if there is an arbiter, False otherwise. + */ +static inline bool kbase_has_arbiter(struct kbase_device *kbdev) +{ + return (bool)kbdev->arb.arb_if; +} + /* Conversion helpers for setting up high resolution timers */ #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x)) * 1000000U)) #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c index 9c39f0e20f76..7b578c81af60 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -218,7 +218,7 @@ static bool wa_blob_load_needed(struct kbase_device *kbdev) if (of_machine_is_compatible("arm,juno")) return false; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3485)) + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_3485)) return true; return false; @@ -311,7 +311,7 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) while (blob_offset) { const struct wa_blob *blob; size_t nr_pages; - u64 flags; + base_mem_alloc_flags flags; u64 gpu_va; struct kbase_va_region *va_region; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h index 06690d4f17bb..d45a0fec4104 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h @@ -35,8 +35,37 @@ #include #if MALI_USE_CSF +/* Number of digits needed to express the max value of given unsigned type. + * + * Details: The number of digits needed to express the max value of given type is log10(t_max) + 1 + * sizeof(t) == log2(t_max)/8 + * log10(t_max) == log2(t_max) / log2(10) + * log2(t_max) == sizeof(type) * 8 + * 1/log2(10) is approx (1233 >> 12) + * Hence, number of digits for given type == log10(t_max) + 1 == sizeof(type) * 8 * (1233 >> 12) + 1 + */ +#define MAX_DIGITS_FOR_UNSIGNED_TYPE(t) ((((sizeof(t) * BITS_PER_BYTE) * 1233) >> 12) + 1) + +/* Number of digits needed to express the max value of given signed type, + * including the sign character, + */ +#define MAX_DIGITS_FOR_SIGNED_TYPE(t) (MAX_DIGITS_FOR_UNSIGNED_TYPE(t) + 1) + +/* Max number of characters for id member of kbase_device struct. */ +#define MAX_KBDEV_ID_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u32) +/* Max number of characters for tgid member of kbase_context struct. */ +#define MAX_KCTX_TGID_LEN MAX_DIGITS_FOR_SIGNED_TYPE(pid_t) +/* Max number of characters for id member of kbase_context struct. */ +#define MAX_KCTX_ID_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u32) +/* Max number of characters for fence_context member of kbase_kcpu_command_queue struct. */ +#define MAX_KCTX_QUEUE_FENCE_CTX_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u64) +/* Max number of characters for timeline name fixed format, including null character. */ +#define FIXED_FORMAT_LEN (9) + /* Maximum number of characters in DMA fence timeline name. */ -#define MAX_TIMELINE_NAME (32) +#define MAX_TIMELINE_NAME \ + (MAX_KBDEV_ID_LEN + MAX_KCTX_TGID_LEN + MAX_KCTX_ID_LEN + MAX_KCTX_QUEUE_FENCE_CTX_LEN + \ + FIXED_FORMAT_LEN) /** * struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c index 3a5b97db7c04..60ad1c272f84 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,96 +29,46 @@ #include #include -/** - * enum gpu_metrics_ctx_flags - Flags for the GPU metrics context - * - * @ACTIVE_INTERVAL_IN_WP: Flag set when the application first becomes active in - * the current work period. - * - * @INSIDE_ACTIVE_LIST: Flag to track if object is in kbase_device::gpu_metrics::active_list - * - * All members need to be separate bits. This enum is intended for use in a - * bitmask where multiple values get OR-ed together. - */ -enum gpu_metrics_ctx_flags { - ACTIVE_INTERVAL_IN_WP = 1 << 0, - INSIDE_ACTIVE_LIST = 1 << 1, -}; - static unsigned long gpu_metrics_tp_emit_interval_ns = DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS; module_param(gpu_metrics_tp_emit_interval_ns, ulong, 0444); MODULE_PARM_DESC(gpu_metrics_tp_emit_interval_ns, "Time interval in nano seconds at which GPU metrics tracepoints are emitted"); -static inline bool gpu_metrics_ctx_flag(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, - enum gpu_metrics_ctx_flags flag) -{ - return (gpu_metrics_ctx->flags & flag); -} - -static inline void gpu_metrics_ctx_flag_set(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, - enum gpu_metrics_ctx_flags flag) -{ - gpu_metrics_ctx->flags |= flag; -} - -static inline void gpu_metrics_ctx_flag_clear(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, - enum gpu_metrics_ctx_flags flag) -{ - gpu_metrics_ctx->flags &= ~flag; -} - static inline void validate_tracepoint_data(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u64 start_time, u64 end_time, u64 total_active) { #if 0 - WARN(total_active > NSEC_PER_SEC, "total_active %llu > 1 second for aid %u active_cnt %u", - total_active, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); - WARN(start_time >= end_time, "start_time %llu >= end_time %llu for aid %u active_cnt %u", start_time, end_time, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); WARN(total_active > (end_time - start_time), "total_active %llu > end_time %llu - start_time %llu for aid %u active_cnt %u", total_active, end_time, start_time, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); - - WARN(gpu_metrics_ctx->prev_wp_active_end_time > start_time, - "prev_wp_active_end_time %llu > start_time %llu for aid %u active_cnt %u", - gpu_metrics_ctx->prev_wp_active_end_time, start_time, gpu_metrics_ctx->aid, - gpu_metrics_ctx->active_cnt); #endif } static void emit_tracepoint_for_active_gpu_metrics_ctx( struct kbase_device *kbdev, struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u64 current_time) { - const u64 start_time = gpu_metrics_ctx->first_active_start_time; - u64 total_active = gpu_metrics_ctx->total_active; - u64 end_time; + const u64 start_time = gpu_metrics_ctx->active_start_time; + u64 total_active, end_time = current_time; /* Check if the GPU activity is currently ongoing */ if (gpu_metrics_ctx->active_cnt) { /* The following check is to handle the race on CSF GPUs that can happen between * the draining of trace buffer and FW emitting the ACT=1 event . */ - if (unlikely(current_time == gpu_metrics_ctx->last_active_start_time)) - current_time++; - end_time = current_time; - total_active += end_time - gpu_metrics_ctx->last_active_start_time; - - gpu_metrics_ctx->first_active_start_time = current_time; - gpu_metrics_ctx->last_active_start_time = current_time; - } else { - end_time = gpu_metrics_ctx->last_active_end_time; - gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP); + if (unlikely(end_time == start_time)) + end_time++; + gpu_metrics_ctx->active_start_time = end_time; } + total_active = end_time - start_time; trace_gpu_work_period(kbdev->id, gpu_metrics_ctx->aid, start_time, end_time, total_active); validate_tracepoint_data(gpu_metrics_ctx, start_time, end_time, total_active); - gpu_metrics_ctx->prev_wp_active_end_time = end_time; - gpu_metrics_ctx->total_active = 0; + gpu_metrics_ctx->active_end_time = end_time; } void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev, @@ -131,7 +81,8 @@ void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev, if (gpu_metrics_ctx->kctx_count) return; - if (gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) + /* Generate a tracepoint if there's still activity */ + if (gpu_metrics_ctx->active_cnt) emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ktime_get_raw_ns()); @@ -166,12 +117,11 @@ struct kbase_gpu_metrics_ctx *kbase_gpu_metrics_ctx_get(struct kbase_device *kbd void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev, struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, unsigned int aid) { + gpu_metrics_ctx->active_start_time = 0; + gpu_metrics_ctx->active_end_time = 0; gpu_metrics_ctx->aid = aid; - gpu_metrics_ctx->total_active = 0; gpu_metrics_ctx->kctx_count = 1; gpu_metrics_ctx->active_cnt = 0; - gpu_metrics_ctx->prev_wp_active_end_time = 0; - gpu_metrics_ctx->flags = 0; list_add_tail(&gpu_metrics_ctx->link, &kbdev->gpu_metrics.inactive_list); } @@ -180,17 +130,9 @@ void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timest struct kbase_gpu_metrics_ctx *gpu_metrics_ctx = kctx->gpu_metrics_ctx; gpu_metrics_ctx->active_cnt++; - if (gpu_metrics_ctx->active_cnt == 1) - gpu_metrics_ctx->last_active_start_time = timestamp_ns; - - if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) { - gpu_metrics_ctx->first_active_start_time = timestamp_ns; - gpu_metrics_ctx_flag_set(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP); - } - - if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)) { + if (gpu_metrics_ctx->active_cnt == 1) { + gpu_metrics_ctx->active_start_time = timestamp_ns; list_move_tail(&gpu_metrics_ctx->link, &kctx->kbdev->gpu_metrics.active_list); - gpu_metrics_ctx_flag_set(gpu_metrics_ctx, INSIDE_ACTIVE_LIST); } } @@ -201,22 +143,22 @@ void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestam if (WARN_ON_ONCE(!gpu_metrics_ctx->active_cnt)) return; + /* Do not emit tracepoint if GPU activity still continues. */ if (--gpu_metrics_ctx->active_cnt) return; - if (likely(timestamp_ns > gpu_metrics_ctx->last_active_start_time)) { - gpu_metrics_ctx->last_active_end_time = timestamp_ns; - gpu_metrics_ctx->total_active += - timestamp_ns - gpu_metrics_ctx->last_active_start_time; + if (likely(timestamp_ns > gpu_metrics_ctx->active_start_time)) { + emit_tracepoint_for_active_gpu_metrics_ctx(kctx->kbdev, gpu_metrics_ctx, + timestamp_ns); return; } /* Due to conversion from system timestamp to CPU timestamp (which involves rounding) * the value for start and end timestamp could come as same on CSF GPUs. */ - if (timestamp_ns == gpu_metrics_ctx->last_active_start_time) { - gpu_metrics_ctx->last_active_end_time = timestamp_ns + 1; - gpu_metrics_ctx->total_active += 1; + if (timestamp_ns == gpu_metrics_ctx->active_start_time) { + emit_tracepoint_for_active_gpu_metrics_ctx(kctx->kbdev, gpu_metrics_ctx, + timestamp_ns + 1); return; } @@ -224,12 +166,9 @@ void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestam * visible to the Kbase even though the system timestamp value sampled by FW was less than * the system timestamp value sampled by Kbase just before the draining of trace buffer. */ - if (gpu_metrics_ctx->last_active_start_time == gpu_metrics_ctx->first_active_start_time && - gpu_metrics_ctx->prev_wp_active_end_time == gpu_metrics_ctx->first_active_start_time) { - WARN_ON_ONCE(gpu_metrics_ctx->total_active); - gpu_metrics_ctx->last_active_end_time = - gpu_metrics_ctx->prev_wp_active_end_time + 1; - gpu_metrics_ctx->total_active = 1; + if (gpu_metrics_ctx->active_end_time == gpu_metrics_ctx->active_start_time) { + emit_tracepoint_for_active_gpu_metrics_ctx(kctx->kbdev, gpu_metrics_ctx, + gpu_metrics_ctx->active_end_time + 1); return; } @@ -242,15 +181,12 @@ void kbase_gpu_metrics_emit_tracepoint(struct kbase_device *kbdev, u64 ts) struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, *tmp; list_for_each_entry_safe(gpu_metrics_ctx, tmp, &gpu_metrics->active_list, link) { - if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) { - WARN_ON(!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)); - WARN_ON(gpu_metrics_ctx->active_cnt); - list_move_tail(&gpu_metrics_ctx->link, &gpu_metrics->inactive_list); - gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, INSIDE_ACTIVE_LIST); + if (gpu_metrics_ctx->active_cnt) { + emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ts); continue; } - emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ts); + list_move_tail(&gpu_metrics_ctx->link, &gpu_metrics->inactive_list); } } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h index c445dff32dc9..658cf1c164c5 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h @@ -106,7 +106,7 @@ void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev, * @kctx: Pointer to the Kbase context contributing data to the GPU metrics context. * @timestamp_ns: CPU timestamp at which the GPU activity started. * - * The provided timestamp would be later used as the "start_time_ns" for the + * The provided timestamp is used as the "start_time_ns" for the * power/gpu_work_period tracepoint if this is the first GPU activity for the GPU * metrics context in the current work period. * @@ -122,9 +122,9 @@ void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timest * @kctx: Pointer to the Kbase context contributing data to the GPU metrics context. * @timestamp_ns: CPU timestamp at which the GPU activity ended. * - * The provided timestamp would be later used as the "end_time_ns" for the - * power/gpu_work_period tracepoint if this is the last GPU activity for the GPU - * metrics context in the current work period. + * The provided timestamp is used as the "end_time_ns" for the power/gpu_work_period + * tracepoint if this is the last GPU activity for the GPU metrics context + * in the current work period. * * Note: The caller must appropriately serialize the call to this function with the * call to other GPU metrics functions declared in this file. @@ -138,8 +138,8 @@ void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestam * @kbdev: Pointer to the GPU device. * @ts: Timestamp at which the tracepoint is being emitted. * - * This function would loop through all the active GPU metrics contexts and emit a - * power/gpu_work_period tracepoint for them. + * This function would loop through all GPU metrics contexts in the active list and + * emit a power/gpu_work_period tracepoint if the GPU work in the context still active. * The GPU metrics context that is found to be inactive since the last tracepoint * was emitted would be moved to the inactive list. * The current work period would be considered as over and a new work period would diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c index 10b3b506e84e..9719580837cc 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -357,6 +357,7 @@ enum l2_config_override_result { /** * kbase_read_l2_config_from_dt - Read L2 configuration * @kbdev: The kbase device for which to get the L2 configuration. + * @regdump: Pointer to struct kbase_gpuprops_regdump structure. * * Check for L2 configuration overrides in module parameters and device tree. * Override values in module parameters take priority over override values in @@ -366,9 +367,16 @@ enum l2_config_override_result { * overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided. * L2_CONFIG_OVERRIDE_FAIL otherwise. */ -static enum l2_config_override_result kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) +static enum l2_config_override_result +kbase_read_l2_config_from_dt(struct kbase_device *const kbdev, + struct kbasep_gpuprops_regdump *regdump) { struct device_node *np = kbdev->dev->of_node; + /* + * CACHE_SIZE bit fields in L2_FEATURES register, default value after the reset/powerup + * holds the maximum size of the cache that can be programmed in L2_CONFIG register. + */ + const u8 l2_size_max = L2_FEATURES_CACHE_SIZE_GET(regdump->l2_features); if (!np) return L2_CONFIG_OVERRIDE_NONE; @@ -378,8 +386,12 @@ static enum l2_config_override_result kbase_read_l2_config_from_dt(struct kbase_ else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override)) kbdev->l2_size_override = 0; - if (kbdev->l2_size_override != 0 && kbdev->l2_size_override < OVERRIDE_L2_SIZE_MIN_LOG2) + if (kbdev->l2_size_override != 0 && (kbdev->l2_size_override < OVERRIDE_L2_SIZE_MIN_LOG2 || + kbdev->l2_size_override > l2_size_max)) { + dev_err(kbdev->dev, "Invalid Cache Size in %s", + override_l2_size ? "Module parameters" : "Device tree node"); return L2_CONFIG_OVERRIDE_FAIL; + } /* Check overriding value is supported, if not will result in * undefined behavior. @@ -425,11 +437,11 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) { int err = 0; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_L2_CONFIG)) { struct kbasep_gpuprops_regdump *regdump = &PRIV_DATA_REGDUMP(kbdev); /* Check for L2 cache size & hash overrides */ - switch (kbase_read_l2_config_from_dt(kbdev)) { + switch (kbase_read_l2_config_from_dt(kbdev, regdump)) { case L2_CONFIG_OVERRIDE_FAIL: err = -EIO; goto exit; @@ -687,7 +699,7 @@ static void kbase_populate_user_data(struct kbase_device *kbdev, struct gpu_prop data->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; } - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT)) + if (!kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_THREAD_GROUP_SPLIT)) data->thread_props.max_thread_group_split = 0; /* Raw Register Values */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c index c92d54c9e663..99558b82ba7b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,9 +30,10 @@ #include static inline void kbase_gpu_gwt_setup_page_permission(struct kbase_context *kctx, - unsigned long flag, struct rb_node *node) + unsigned long flag, + struct kbase_reg_zone *zone) { - struct rb_node *rbnode = node; + struct rb_node *rbnode = rb_first(&zone->reg_rbtree); while (rbnode) { struct kbase_va_region *reg; @@ -55,17 +56,15 @@ static inline void kbase_gpu_gwt_setup_page_permission(struct kbase_context *kct static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, unsigned long flag) { - kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&kctx->reg_zone[SAME_VA_ZONE].reg_rbtree)); - kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&kctx->reg_zone[CUSTOM_VA_ZONE].reg_rbtree)); + kbase_gpu_gwt_setup_page_permission(kctx, flag, &kctx->reg_zone[SAME_VA_ZONE]); + kbase_gpu_gwt_setup_page_permission(kctx, flag, &kctx->reg_zone[CUSTOM_VA_ZONE]); } int kbase_gpu_gwt_start(struct kbase_context *kctx) { - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); if (kctx->gwt_enabled) { - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return -EBUSY; } @@ -91,7 +90,7 @@ int kbase_gpu_gwt_start(struct kbase_context *kctx) kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return 0; } @@ -179,6 +178,10 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, union kbase_ioctl_cinstr_gwt_ __user void *user_addr = (__user void *)(uintptr_t)gwt_dump->in.addr_buffer; __user void *user_sizes = (__user void *)(uintptr_t)gwt_dump->in.size_buffer; + /* We don't have any valid user space buffer to copy the write modified addresses. */ + if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer || !gwt_dump->in.size_buffer) + return -EINVAL; + kbase_gpu_vm_lock(kctx); if (!kctx->gwt_enabled) { @@ -187,14 +190,6 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, union kbase_ioctl_cinstr_gwt_ return -EPERM; } - if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer || !gwt_dump->in.size_buffer) { - kbase_gpu_vm_unlock(kctx); - /* We don't have any valid user space buffer to copy the - * write modified addresses. - */ - return -EINVAL; - } - if (list_empty(&kctx->gwt_snapshot_list) && !list_empty(&kctx->gwt_current_list)) { list_replace_init(&kctx->gwt_current_list, &kctx->gwt_snapshot_list); @@ -228,14 +223,14 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, union kbase_ioctl_cinstr_gwt_ if (count) { err = copy_to_user((user_addr + (ubuf_count * sizeof(u64))), - (void *)addr_buffer, count * sizeof(u64)); + (void *)addr_buffer, size_mul(count, sizeof(u64))); if (err) { dev_err(kctx->kbdev->dev, "Copy to user failure\n"); kbase_gpu_vm_unlock(kctx); return err; } err = copy_to_user((user_sizes + (ubuf_count * sizeof(u64))), - (void *)num_page_buffer, count * sizeof(u64)); + (void *)num_page_buffer, size_mul(count, sizeof(u64))); if (err) { dev_err(kctx->kbdev->dev, "Copy to user failure\n"); kbase_gpu_vm_unlock(kctx); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c index 7d4200e96fd3..e04aad2422c7 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,8 +23,8 @@ * Run-time work-arounds helpers */ -#include -#include +#include +#include #include #include "mali_kbase.h" #include "mali_kbase_hw.h" @@ -92,7 +92,7 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) break; } - for (; *features != BASE_HW_FEATURE_END; features++) + for (; *features != KBASE_HW_FEATURE_END; features++) set_bit(*features, &kbdev->hw_features_mask[0]); #if defined(CONFIG_MALI_VECTOR_DUMP) @@ -103,8 +103,8 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) * in the implementation of flush reduction optimization due to * unclear or ambiguous ARCH spec. */ - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) - clear_bit(BASE_HW_FEATURE_FLUSH_REDUCTION, &kbdev->hw_features_mask[0]); + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_CLEAN_ONLY_SAFE)) + clear_bit(KBASE_HW_FEATURE_FLUSH_REDUCTION, &kbdev->hw_features_mask[0]); #endif } @@ -113,7 +113,7 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) * @kbdev: Device pointer * * Return: pointer to an array of hardware issues, terminated by - * BASE_HW_ISSUE_END. + * KBASE_HW_ISSUE_END. * * In debugging versions of the driver, unknown versions of a known GPU will * be treated as the most recent known version not later than the actual @@ -225,6 +225,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(struct kbase_dev { GPU_ID_PRODUCT_TVAX, { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 5), base_hw_issues_tVAx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 1, 0), base_hw_issues_tVAx_r0p1 }, { U32_MAX, NULL } } }, { GPU_ID_PRODUCT_TTUX, @@ -334,6 +336,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(struct kbase_dev gpu_id->version_id = fallback_version; } } + + return issues; } @@ -420,7 +424,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) gpu_id->product_major, gpu_id->arch_major, gpu_id->arch_minor, gpu_id->arch_rev, gpu_id->version_major, gpu_id->version_minor, gpu_id->version_status); - for (; *issues != BASE_HW_ISSUE_END; issues++) + for (; *issues != KBASE_HW_ISSUE_END; issues++) set_bit(*issues, &kbdev->hw_issues_mask[0]); return 0; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.h b/drivers/gpu/arm/bifrost/mali_kbase_hw.h index 44e1ee4a4a50..f14e5fb6d9ab 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hw.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,8 +23,8 @@ * DOC: Run-time work-arounds helpers */ -#ifndef _KBASE_HW_H_ -#define _KBASE_HW_H_ +#ifndef _MALI_KBASE_HW_H_ +#define _MALI_KBASE_HW_H_ #include "mali_kbase_defs.h" @@ -47,7 +47,7 @@ * @kbdev: Device pointer */ #define kbase_hw_has_l2_slice_hash_feature(kbdev) \ - test_bit(BASE_HW_FEATURE_L2_SLICE_HASH, &(kbdev)->hw_features_mask[0]) + test_bit(KBASE_HW_FEATURE_L2_SLICE_HASH, &(kbdev)->hw_features_mask[0]) /** * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID @@ -73,4 +73,4 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev); */ void kbase_hw_set_features_mask(struct kbase_device *kbdev); -#endif /* _KBASE_HW_H_ */ +#endif /* _MALI_KBASE_HW_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h index 7a0ea49099ba..982547d16022 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h @@ -129,14 +129,14 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) * kbase_pm_set_debug_core_mask - Set the debug core mask. * * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @new_core_mask_js0: The core mask to use for job slot 0 - * @new_core_mask_js1: The core mask to use for job slot 1 - * @new_core_mask_js2: The core mask to use for job slot 2 + * @new_core_mask: The core mask to use, as an array where each element refers + * to a job slot. + * @new_core_mask_size: Number of elements in the core mask array. * * This determines which cores the power manager is allowed to use. */ -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0, - u64 new_core_mask_js1, u64 new_core_mask_js2); +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 *new_core_mask, + size_t new_core_mask_size); #endif /* MALI_USE_CSF */ /** diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h index 0630dfa6db3a..222ff2001e56 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2018-2021, 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2018-2021, 2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,8 @@ * * @multiplier: Numerator of the converter's fraction. * @divisor: Denominator of the converter's fraction. - * @offset: Converter's offset term. + * @gpu_timestamp_offset: Cached CPU to GPU TS offset computed whenever whole system + * enters into standby mode where CPU Monotonic time is suspend. * @device_scaled_timeouts: Timeouts in milliseconds that were scaled to be * consistent with the minimum MCU frequency. This * array caches the results of all of the conversions @@ -55,7 +56,7 @@ struct kbase_backend_time { #if MALI_USE_CSF u64 multiplier; u64 divisor; - s64 offset; + s64 gpu_timestamp_offset; #endif unsigned int device_scaled_timeouts[KBASE_TIMEOUT_SELECTOR_COUNT]; }; @@ -70,6 +71,40 @@ struct kbase_backend_time { * Return: The CPU timestamp. */ u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts); + +/** + * kbase_backend_update_gpu_timestamp_offset() - Updates GPU timestamp offset register with the + * cached value. + * + * @kbdev: Kbase device pointer + * + * Compute the new cached value for GPU timestamp offset if the previously cached value has been + * invalidated and update the GPU timestamp offset register with the cached value. + */ +void kbase_backend_update_gpu_timestamp_offset(struct kbase_device *kbdev); + +/** + * kbase_backend_invalidate_gpu_timestamp_offset() - Invalidate cached GPU timestamp offset value + * + * @kbdev: Kbase device pointer + * + * This function invalidates cached GPU timestamp offset value whenever system suspend + * is about to happen where CPU TS counter will be stopped. + */ +void kbase_backend_invalidate_gpu_timestamp_offset(struct kbase_device *kbdev); + +#if MALI_UNIT_TEST +/** + * kbase_backend_read_gpu_timestamp_offset_reg() - Read GPU TIMESTAMP OFFSET Register + * + * @kbdev: Kbase device pointer + * + * This function read GPU TIMESTAMP OFFSET Register with proper register access + * + * Return: GPU TIMESTAMP OFFSET Register value, as unsigned 64 bit value + */ +u64 kbase_backend_read_gpu_timestamp_offset_reg(struct kbase_device *kbdev); +#endif #endif /** diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_kbase_hwconfig_features.h new file mode 100644 index 000000000000..265cb9585cc6 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwconfig_features.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_HWCONFIG_FEATURES_H_ +#define _KBASE_HWCONFIG_FEATURES_H_ + +#include + +enum base_hw_feature { + KBASE_HW_FEATURE_FLUSH_REDUCTION, + KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + KBASE_HW_FEATURE_TLS_HASHING, + KBASE_HW_FEATURE_THREAD_GROUP_SPLIT, + KBASE_HW_FEATURE_CLEAN_ONLY_SAFE, + KBASE_HW_FEATURE_IDVS_GROUP_SIZE, + KBASE_HW_FEATURE_L2_CONFIG, + KBASE_HW_FEATURE_L2_SLICE_HASH, + KBASE_HW_FEATURE_GPU_SLEEP, + KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, + KBASE_HW_FEATURE_CORE_FEATURES, + KBASE_HW_FEATURE_PBHA_HWU, + KBASE_HW_FEATURE_LARGE_PAGE_ALLOC, + KBASE_HW_FEATURE_THREAD_TLS_ALLOC, + KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_generic[] = { + KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tMIx[] = { + KBASE_HW_FEATURE_THREAD_GROUP_SPLIT, KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tHEx[] = { + KBASE_HW_FEATURE_THREAD_GROUP_SPLIT, KBASE_HW_FEATURE_FLUSH_REDUCTION, + KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tSIx[] = { + KBASE_HW_FEATURE_THREAD_GROUP_SPLIT, KBASE_HW_FEATURE_FLUSH_REDUCTION, + KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tDVx[] = { + KBASE_HW_FEATURE_THREAD_GROUP_SPLIT, KBASE_HW_FEATURE_FLUSH_REDUCTION, + KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tNOx[] = { + KBASE_HW_FEATURE_THREAD_GROUP_SPLIT, KBASE_HW_FEATURE_FLUSH_REDUCTION, + KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, KBASE_HW_FEATURE_TLS_HASHING, + KBASE_HW_FEATURE_IDVS_GROUP_SIZE, KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tGOx[] = { + KBASE_HW_FEATURE_THREAD_GROUP_SPLIT, KBASE_HW_FEATURE_FLUSH_REDUCTION, + KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, KBASE_HW_FEATURE_TLS_HASHING, + KBASE_HW_FEATURE_IDVS_GROUP_SIZE, KBASE_HW_FEATURE_CORE_FEATURES, + KBASE_HW_FEATURE_THREAD_TLS_ALLOC, KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tTRx[] = { + KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + KBASE_HW_FEATURE_IDVS_GROUP_SIZE, KBASE_HW_FEATURE_CLEAN_ONLY_SAFE, + KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tNAx[] = { + KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + KBASE_HW_FEATURE_IDVS_GROUP_SIZE, KBASE_HW_FEATURE_CLEAN_ONLY_SAFE, + KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tBEx[] = { + KBASE_HW_FEATURE_FLUSH_REDUCTION, + KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + KBASE_HW_FEATURE_IDVS_GROUP_SIZE, + KBASE_HW_FEATURE_L2_CONFIG, + KBASE_HW_FEATURE_CLEAN_ONLY_SAFE, + KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, + KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tBAx[] = { + KBASE_HW_FEATURE_FLUSH_REDUCTION, + KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + KBASE_HW_FEATURE_IDVS_GROUP_SIZE, + KBASE_HW_FEATURE_L2_CONFIG, + KBASE_HW_FEATURE_CLEAN_ONLY_SAFE, + KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, + KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tODx[] = { + KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + KBASE_HW_FEATURE_L2_CONFIG, KBASE_HW_FEATURE_CLEAN_ONLY_SAFE, KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tGRx[] = { + KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + KBASE_HW_FEATURE_L2_CONFIG, KBASE_HW_FEATURE_CLEAN_ONLY_SAFE, + KBASE_HW_FEATURE_CORE_FEATURES, KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tVAx[] = { + KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + KBASE_HW_FEATURE_L2_CONFIG, KBASE_HW_FEATURE_CLEAN_ONLY_SAFE, + KBASE_HW_FEATURE_CORE_FEATURES, KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tTUx[] = { + KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + KBASE_HW_FEATURE_L2_CONFIG, KBASE_HW_FEATURE_CLEAN_ONLY_SAFE, + KBASE_HW_FEATURE_L2_SLICE_HASH, KBASE_HW_FEATURE_GPU_SLEEP, + KBASE_HW_FEATURE_CORE_FEATURES, KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tTIx[] = { + KBASE_HW_FEATURE_FLUSH_REDUCTION, + KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + KBASE_HW_FEATURE_L2_CONFIG, + KBASE_HW_FEATURE_CLEAN_ONLY_SAFE, + KBASE_HW_FEATURE_L2_SLICE_HASH, + KBASE_HW_FEATURE_GPU_SLEEP, + KBASE_HW_FEATURE_CORE_FEATURES, + KBASE_HW_FEATURE_PBHA_HWU, + KBASE_HW_FEATURE_END +}; + +__maybe_unused static const enum base_hw_feature base_hw_features_tKRx[] = { + KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + KBASE_HW_FEATURE_L2_CONFIG, KBASE_HW_FEATURE_CLEAN_ONLY_SAFE, + KBASE_HW_FEATURE_L2_SLICE_HASH, KBASE_HW_FEATURE_GPU_SLEEP, + KBASE_HW_FEATURE_CORE_FEATURES, KBASE_HW_FEATURE_PBHA_HWU, + KBASE_HW_FEATURE_LARGE_PAGE_ALLOC, KBASE_HW_FEATURE_END +}; + + +#endif /* _KBASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_kbase_hwconfig_issues.h new file mode 100644 index 000000000000..b1a3a41b232b --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwconfig_issues.h @@ -0,0 +1,609 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_HWCONFIG_ISSUES_H_ +#define _KBASE_HWCONFIG_ISSUES_H_ + +#include + +enum base_hw_issue { + KBASE_HW_ISSUE_5736, + KBASE_HW_ISSUE_9435, + KBASE_HW_ISSUE_10682, + KBASE_HW_ISSUE_11054, + KBASE_HW_ISSUE_T76X_3953, + KBASE_HW_ISSUE_TMIX_7891, + KBASE_HW_ISSUE_TMIX_7940, + KBASE_HW_ISSUE_TMIX_8042, + KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TMIX_8138, + KBASE_HW_ISSUE_TMIX_8206, + KBASE_HW_ISSUE_TMIX_8343, + KBASE_HW_ISSUE_TMIX_8463, + KBASE_HW_ISSUE_TMIX_8456, + KBASE_HW_ISSUE_TSIX_1116, + KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TMIX_8438, + KBASE_HW_ISSUE_TNOX_1194, + KBASE_HW_ISSUE_TGOX_R1_1234, + KBASE_HW_ISSUE_TTRX_1337, + KBASE_HW_ISSUE_TSIX_1792, + KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_3076, + KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_TTRX_3414, + KBASE_HW_ISSUE_GPU2017_1336, + KBASE_HW_ISSUE_TTRX_3083, + KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TTRX_3485, + KBASE_HW_ISSUE_GPU2019_3212, + KBASE_HW_ISSUE_TURSEHW_1997, + KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_TURSEHW_2716, + KBASE_HW_ISSUE_GPU2019_3901, + KBASE_HW_ISSUE_GPU2021PRO_290, + KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_TITANHW_2679, + KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2922, + KBASE_HW_ISSUE_TITANHW_2952, + KBASE_HW_ISSUE_KRAKEHW_2151, + KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_KRAKEHW_2269, + KBASE_HW_ISSUE_TURSEHW_2934, + KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_generic[] = { KBASE_HW_ISSUE_END }; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_10682, KBASE_HW_ISSUE_11054, + KBASE_HW_ISSUE_T76X_3953, KBASE_HW_ISSUE_TMIX_7891, KBASE_HW_ISSUE_TMIX_8042, + KBASE_HW_ISSUE_TMIX_8133, KBASE_HW_ISSUE_TMIX_8138, KBASE_HW_ISSUE_TMIX_8206, + KBASE_HW_ISSUE_TMIX_8343, KBASE_HW_ISSUE_TMIX_8463, KBASE_HW_ISSUE_TMIX_8456, + KBASE_HW_ISSUE_TMIX_8438, KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_10682, KBASE_HW_ISSUE_11054, + KBASE_HW_ISSUE_TMIX_7891, KBASE_HW_ISSUE_TMIX_7940, KBASE_HW_ISSUE_TMIX_8042, + KBASE_HW_ISSUE_TMIX_8133, KBASE_HW_ISSUE_TMIX_8138, KBASE_HW_ISSUE_TMIX_8206, + KBASE_HW_ISSUE_TMIX_8343, KBASE_HW_ISSUE_TMIX_8463, KBASE_HW_ISSUE_TMIX_8456, + KBASE_HW_ISSUE_TMIX_8438, KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_10682, KBASE_HW_ISSUE_11054, + KBASE_HW_ISSUE_TMIX_7891, KBASE_HW_ISSUE_TMIX_7940, KBASE_HW_ISSUE_TMIX_8042, + KBASE_HW_ISSUE_TMIX_8133, KBASE_HW_ISSUE_TMIX_8138, KBASE_HW_ISSUE_TMIX_8206, + KBASE_HW_ISSUE_TMIX_8343, KBASE_HW_ISSUE_TMIX_8463, KBASE_HW_ISSUE_TMIX_8456, + KBASE_HW_ISSUE_TMIX_8438, KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tMIx[] = { + KBASE_HW_ISSUE_5736, KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TMIX_7891, + KBASE_HW_ISSUE_TMIX_7940, KBASE_HW_ISSUE_TMIX_8042, KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TMIX_8138, KBASE_HW_ISSUE_TMIX_8206, KBASE_HW_ISSUE_TMIX_8343, + KBASE_HW_ISSUE_TMIX_8456, KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_10682, KBASE_HW_ISSUE_11054, + KBASE_HW_ISSUE_TMIX_7891, KBASE_HW_ISSUE_TMIX_8042, KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_GPU2017_1336, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_10682, KBASE_HW_ISSUE_11054, + KBASE_HW_ISSUE_TMIX_7891, KBASE_HW_ISSUE_TMIX_8042, KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_GPU2017_1336, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_10682, KBASE_HW_ISSUE_11054, + KBASE_HW_ISSUE_TMIX_7891, KBASE_HW_ISSUE_TMIX_8042, KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_GPU2017_1336, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_10682, KBASE_HW_ISSUE_TMIX_7891, + KBASE_HW_ISSUE_TMIX_8042, KBASE_HW_ISSUE_TMIX_8133, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tHEx[] = { + KBASE_HW_ISSUE_5736, KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TMIX_7891, + KBASE_HW_ISSUE_TMIX_8042, KBASE_HW_ISSUE_TMIX_8133, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_11054, KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TSIX_1116, KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TSIX_1792, + KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_11054, KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TSIX_1116, KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TSIX_1792, + KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_11054, KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TSIX_1116, KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TMIX_8133, KBASE_HW_ISSUE_TSIX_1116, + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_GPU2017_1336, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tSIx[] = { + KBASE_HW_ISSUE_5736, KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TSIX_1116, KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TMIX_8133, KBASE_HW_ISSUE_TSIX_1116, + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_GPU2017_1336, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tDVx[] = { + KBASE_HW_ISSUE_5736, KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TSIX_1116, KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TSIX_1116, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TNOX_1194, KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tNOx[] = { + KBASE_HW_ISSUE_5736, KBASE_HW_ISSUE_9435, + KBASE_HW_ISSUE_TMIX_8133, KBASE_HW_ISSUE_TSIX_1116, + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TSIX_1116, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TNOX_1194, KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TMIX_8133, + KBASE_HW_ISSUE_TSIX_1116, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TGOX_R1_1234, KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tGOx[] = { + KBASE_HW_ISSUE_5736, KBASE_HW_ISSUE_9435, + KBASE_HW_ISSUE_TMIX_8133, KBASE_HW_ISSUE_TSIX_1116, + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_3076, KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_TTRX_3414, KBASE_HW_ISSUE_GPU2017_1336, + KBASE_HW_ISSUE_TTRX_3083, KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TTRX_3485, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_3076, KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_TTRX_3414, KBASE_HW_ISSUE_GPU2017_1336, + KBASE_HW_ISSUE_TTRX_3083, KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TTRX_3485, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { + KBASE_HW_ISSUE_9435, + KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, + KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_3076, + KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_TTRX_3414, + KBASE_HW_ISSUE_GPU2017_1336, + KBASE_HW_ISSUE_TTRX_3083, + KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tTRx[] = { + KBASE_HW_ISSUE_5736, KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_3414, KBASE_HW_ISSUE_TTRX_3083, + KBASE_HW_ISSUE_TTRX_3470, KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_3076, KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_TTRX_3414, KBASE_HW_ISSUE_GPU2017_1336, + KBASE_HW_ISSUE_TTRX_3083, KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TTRX_3485, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { + KBASE_HW_ISSUE_9435, + KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, + KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_3076, + KBASE_HW_ISSUE_TTRX_921, + KBASE_HW_ISSUE_TTRX_3414, + KBASE_HW_ISSUE_GPU2017_1336, + KBASE_HW_ISSUE_TTRX_3083, + KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tNAx[] = { + KBASE_HW_ISSUE_5736, KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_3414, KBASE_HW_ISSUE_TTRX_3083, + KBASE_HW_ISSUE_TTRX_3470, KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_TTRX_3414, + KBASE_HW_ISSUE_TTRX_3083, KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TTRX_3485, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_TTRX_3414, + KBASE_HW_ISSUE_TTRX_3083, KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_TTRX_3414, + KBASE_HW_ISSUE_TTRX_3083, KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_TTRX_3414, + KBASE_HW_ISSUE_TTRX_3083, KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tBEx[] = { + KBASE_HW_ISSUE_5736, KBASE_HW_ISSUE_9435, + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, + KBASE_HW_ISSUE_TTRX_3414, KBASE_HW_ISSUE_TTRX_3083, + KBASE_HW_ISSUE_TTRX_3470, KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_TTRX_3414, + KBASE_HW_ISSUE_TTRX_3083, KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TTRX_3485, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_TTRX_3414, + KBASE_HW_ISSUE_TTRX_3083, KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_TTRX_3414, + KBASE_HW_ISSUE_TTRX_3083, KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tBAx_r0p1[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_TTRX_3414, + KBASE_HW_ISSUE_TTRX_3083, KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tBAx_r0p2[] = { + KBASE_HW_ISSUE_9435, KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162, + KBASE_HW_ISSUE_TTRX_921, KBASE_HW_ISSUE_TTRX_3414, + KBASE_HW_ISSUE_TTRX_3083, KBASE_HW_ISSUE_TTRX_3470, + KBASE_HW_ISSUE_TTRX_3464, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tBAx[] = { + KBASE_HW_ISSUE_5736, KBASE_HW_ISSUE_9435, + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, + KBASE_HW_ISSUE_TTRX_3414, KBASE_HW_ISSUE_TTRX_3083, + KBASE_HW_ISSUE_TTRX_3470, KBASE_HW_ISSUE_TTRX_3464, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, + KBASE_HW_ISSUE_GPU2019_3212, KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tODx[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, + KBASE_HW_ISSUE_GPU2019_3212, KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tGRx[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tVAx_r0p1[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tVAx[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TURSEHW_1997, + KBASE_HW_ISSUE_GPU2019_3878, KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901, + KBASE_HW_ISSUE_GPU2021PRO_290, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TURSEHW_1997, + KBASE_HW_ISSUE_GPU2019_3878, KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901, + KBASE_HW_ISSUE_GPU2021PRO_290, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2922, KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_TURSEHW_2934, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tTUx[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_GPU2021PRO_290, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2922, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_GPU2021PRO_290, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2922, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_GPU2021PRO_290, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2922, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_GPU2021PRO_290, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2922, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_GPU2019_3878, + KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_GPU2021PRO_290, + KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2922, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tTIx[] = { + KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, + KBASE_HW_ISSUE_TURSEHW_2716, + KBASE_HW_ISSUE_GPU2021PRO_290, + KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_TITANHW_2679, + KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2922, + KBASE_HW_ISSUE_TITANHW_2952, + KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_TURSEHW_2934, + KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = { + KBASE_HW_ISSUE_TSIX_2033, + KBASE_HW_ISSUE_TTRX_1337, + KBASE_HW_ISSUE_TURSEHW_2716, + KBASE_HW_ISSUE_GPU2021PRO_290, + KBASE_HW_ISSUE_TITANHW_2710, + KBASE_HW_ISSUE_TITANHW_2679, + KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_TITANHW_2922, + KBASE_HW_ISSUE_TITANHW_2952, + KBASE_HW_ISSUE_TITANHW_2938, + KBASE_HW_ISSUE_TURSEHW_2934, + KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tTIx_r0p1[] = { + KBASE_HW_ISSUE_TSIX_2033, KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TURSEHW_2716, + KBASE_HW_ISSUE_GPU2021PRO_290, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, + KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tKRx_r0p0[] = { + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_KRAKEHW_2151, KBASE_HW_ISSUE_KRAKEHW_2269, KBASE_HW_ISSUE_TITANHW_2922, + KBASE_HW_ISSUE_TURSEHW_2934, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tKRx_r0p1[] = { + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_KRAKEHW_2269, KBASE_HW_ISSUE_TURSEHW_2934, KBASE_HW_ISSUE_KRAKEHW_2321, + KBASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tKRx[] = { + KBASE_HW_ISSUE_TTRX_1337, KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2022PRO_148, + KBASE_HW_ISSUE_KRAKEHW_2151, KBASE_HW_ISSUE_KRAKEHW_2269, KBASE_HW_ISSUE_TURSEHW_2934, + KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END +}; + + +#endif /* _KBASE_HWCONFIG_ISSUES_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ioctl_helpers.h b/drivers/gpu/arm/bifrost/mali_kbase_ioctl_helpers.h new file mode 100644 index 000000000000..e87925bab9b0 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_ioctl_helpers.h @@ -0,0 +1,542 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_IOCTL_HELPERS_H_ +#define _KBASE_IOCTL_HELPERS_H_ + +#include + +/* Macro for IOCTLs that don't have IOCTL struct */ +#define KBASE_HANDLE_IOCTL(cmd, function, arg) \ + do { \ + int ret; \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ + dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ + ret = function(arg); \ + dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \ + return ret; \ + } while (0) + +/* Macro for IOCTLs that have input IOCTL struct */ +#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + err = copy_from_user(¶m, uarg, sizeof(param)); \ + if (unlikely(err)) \ + return -EFAULT; \ + err = check_padding_##cmd(¶m); \ + if (unlikely(err)) \ + return -EINVAL; \ + ret = function(arg, ¶m); \ + dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \ + return ret; \ + } while (0) + +/* Macro for IOCTLs that have output IOCTL struct */ +#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + memset(¶m, 0, sizeof(param)); \ + ret = function(arg, ¶m); \ + err = copy_to_user(uarg, ¶m, sizeof(param)); \ + if (unlikely(err)) \ + return -EFAULT; \ + dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \ + return ret; \ + } while (0) + +/* Macro for IOCTLs that have input and output IOCTL struct */ +#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ + BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE | _IOC_READ)); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + err = copy_from_user(¶m, uarg, sizeof(param)); \ + if (unlikely(err)) \ + return -EFAULT; \ + err = check_padding_##cmd(¶m); \ + if (unlikely(err)) \ + return -EINVAL; \ + ret = function(arg, ¶m); \ + err = copy_to_user(uarg, ¶m, sizeof(param)); \ + if (unlikely(err)) \ + return -EFAULT; \ + dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \ + return ret; \ + } while (0) + +/* Inline functions to check padding bytes in the input IOCTL struct. + * Return 0 if all padding bytes are zero, non-zero otherwise. + */ +static inline int check_padding_KBASE_IOCTL_VERSION_CHECK(struct kbase_ioctl_version_check *p) +{ + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_VERSION_CHECK_RESERVED(struct kbase_ioctl_version_check *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_SET_FLAGS(struct kbase_ioctl_set_flags *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_GET_GPUPROPS(struct kbase_ioctl_get_gpuprops *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_MEM_ALLOC(union kbase_ioctl_mem_alloc *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_MEM_QUERY(union kbase_ioctl_mem_query *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_MEM_FREE(struct kbase_ioctl_mem_free *p) +{ + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_HWCNT_READER_SETUP(struct kbase_ioctl_hwcnt_reader_setup *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_HWCNT_SET(struct kbase_ioctl_hwcnt_values *p) +{ + return p->padding; +} + +static inline int check_padding_KBASE_IOCTL_GET_DDK_VERSION(struct kbase_ioctl_get_ddk_version *p) +{ + return p->padding; +} + +static inline int check_padding_KBASE_IOCTL_MEM_JIT_INIT(struct kbase_ioctl_mem_jit_init *p) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(p->padding); i++) { + if (p->padding[i]) + return -1; + } + + return 0; +} + +static inline int check_padding_KBASE_IOCTL_MEM_SYNC(struct kbase_ioctl_mem_sync *p) +{ + size_t i; + + /* + * Checking p->padding is deferred till the support window for backward-compatibility ends. + * GPUCORE-42000 will add the checking. + * + * To avoid the situation with old version of base which might not set padding bytes as 0, + * padding bytes are set as zero here on behalf on user space. + */ + for (i = 0; i < ARRAY_SIZE(p->padding); i++) + p->padding[i] = 0; + + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_MEM_FIND_CPU_OFFSET(union kbase_ioctl_mem_find_cpu_offset *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_TLSTREAM_ACQUIRE(struct kbase_ioctl_tlstream_acquire *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_MEM_COMMIT(struct kbase_ioctl_mem_commit *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_MEM_ALIAS(union kbase_ioctl_mem_alias *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_MEM_IMPORT(union kbase_ioctl_mem_import *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_MEM_FLAGS_CHANGE(struct kbase_ioctl_mem_flags_change *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_STREAM_CREATE(struct kbase_ioctl_stream_create *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_FENCE_VALIDATE(struct kbase_ioctl_fence_validate *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_MEM_PROFILE_ADD(struct kbase_ioctl_mem_profile_add *p) +{ + return p->padding; +} + +static inline int +check_padding_KBASE_IOCTL_STICKY_RESOURCE_MAP(struct kbase_ioctl_sticky_resource_map *p) +{ + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_STICKY_RESOURCE_UNMAP(struct kbase_ioctl_sticky_resource_unmap *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET( + union kbase_ioctl_mem_find_gpu_start_and_offset *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_CINSTR_GWT_DUMP(union kbase_ioctl_cinstr_gwt_dump *p) +{ + /* + * Checking p->padding is deferred till the support window for backward-compatibility ends. + * GPUCORE-42000 will add the checking. + * + * To avoid the situation with old version of base which might not set padding bytes as 0, + * padding bytes are set as zero here on behalf on user space. + */ + p->in.padding = 0; + return 0; +} + +static inline int check_padding_KBASE_IOCTL_MEM_EXEC_INIT(struct kbase_ioctl_mem_exec_init *p) +{ + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_GET_CPU_GPU_TIMEINFO(union kbase_ioctl_get_cpu_gpu_timeinfo *p) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(p->in.paddings); i++) { + if (p->in.paddings[i]) + return -1; + } + + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_CONTEXT_PRIORITY_CHECK(struct kbase_ioctl_context_priority_check *p) +{ + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_SET_LIMITED_CORE_COUNT(struct kbase_ioctl_set_limited_core_count *p) +{ + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO(struct kbase_ioctl_kinstr_prfcnt_enum_info *p) +{ + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_KINSTR_PRFCNT_SETUP(union kbase_ioctl_kinstr_prfcnt_setup *p) +{ + return 0; +} + +#if MALI_UNIT_TEST +#endif /* MALI_UNIT_TEST */ + +#if MALI_USE_CSF + +static inline int +check_padding_KBASE_IOCTL_CS_QUEUE_REGISTER(struct kbase_ioctl_cs_queue_register *p) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(p->padding); i++) { + if (p->padding[i]) + return -1; + } + + return 0; +} + +static inline int check_padding_KBASE_IOCTL_CS_QUEUE_KICK(struct kbase_ioctl_cs_queue_kick *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_CS_QUEUE_BIND(union kbase_ioctl_cs_queue_bind *p) +{ + size_t i; + + /* + * Checking p->padding is deferred till the support window for backward-compatibility ends. + * GPUCORE-42000 will add the checking. + * + * To avoid the situation with old version of base which might not set padding bytes as 0, + * padding bytes are set as zero here on behalf on user space. + */ + for (i = 0; i < ARRAY_SIZE(p->in.padding); i++) + p->in.padding[i] = 0; + + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_CS_QUEUE_REGISTER_EX(struct kbase_ioctl_cs_queue_register_ex *p) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(p->padding); i++) { + if (p->padding[i]) + return -1; + } + + for (i = 0; i < ARRAY_SIZE(p->ex_padding); i++) { + if (p->ex_padding[i]) + return -1; + } + + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_CS_QUEUE_TERMINATE(struct kbase_ioctl_cs_queue_terminate *p) +{ + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6(union kbase_ioctl_cs_queue_group_create_1_6 *p) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(p->in.padding); i++) { + if (p->in.padding[i]) + return -1; + } + + return 0; +} + +static inline int check_padding_KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18( + union kbase_ioctl_cs_queue_group_create_1_18 *p) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(p->in.padding); i++) { + if (p->in.padding[i]) + return -1; + } + + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_CS_QUEUE_GROUP_CREATE(union kbase_ioctl_cs_queue_group_create *p) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(p->in.padding); i++) { + if (p->in.padding[i]) + return -1; + } + + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE(struct kbase_ioctl_cs_queue_group_term *p) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(p->padding); i++) { + if (p->padding[i]) + return -1; + } + + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_KCPU_QUEUE_DELETE(struct kbase_ioctl_kcpu_queue_delete *p) +{ + size_t i; + + /* + * Checking p->padding is deferred till the support window for backward-compatibility ends. + * GPUCORE-42000 will add the checking. + * + * To avoid the situation with old version of base which might not set padding bytes as 0, + * padding bytes are set as zero here on behalf on user space. + */ + for (i = 0; i < ARRAY_SIZE(p->padding); i++) + p->padding[i] = 0; + + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_KCPU_QUEUE_ENQUEUE(struct kbase_ioctl_kcpu_queue_enqueue *p) +{ + size_t i; + + /* + * Checking p->padding is deferred till the support window for backward-compatibility ends. + * GPUCORE-42000 will add the checking. + * + * To avoid the situation with old version of base which might not set padding bytes as 0, + * padding bytes are set as zero here on behalf on user space. + */ + for (i = 0; i < ARRAY_SIZE(p->padding); i++) + p->padding[i] = 0; + + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_CS_TILER_HEAP_INIT(union kbase_ioctl_cs_tiler_heap_init *p) +{ + return p->in.padding; +} + +static inline int +check_padding_KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13(union kbase_ioctl_cs_tiler_heap_init_1_13 *p) +{ + return p->in.padding; +} + +static inline int +check_padding_KBASE_IOCTL_CS_TILER_HEAP_TERM(struct kbase_ioctl_cs_tiler_heap_term *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_CS_GET_GLB_IFACE(union kbase_ioctl_cs_get_glb_iface *p) +{ + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_CS_CPU_QUEUE_DUMP(struct kbase_ioctl_cs_cpu_queue_info *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_MEM_ALLOC_EX(union kbase_ioctl_mem_alloc_ex *p) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(p->in.extra); i++) { + if (p->in.extra[i]) + return -1; + } + + return 0; +} + +static inline int check_padding_KBASE_IOCTL_READ_USER_PAGE(union kbase_ioctl_read_user_page *p) +{ + return p->in.padding; +} + +static inline int +check_padding_KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS(struct kbase_ioctl_queue_group_clear_faults *p) +{ + size_t i; + + /* + * Checking p->padding is deferred till the support window for backward-compatibility ends. + * GPUCORE-42000 will add the checking. + * + * To avoid the situation with old version of base which might not set padding bytes as 0, + * padding bytes are set as zero here on behalf on user space. + */ + for (i = 0; i < ARRAY_SIZE(p->padding); i++) + p->padding[i] = 0; + + return 0; +} + +#else /* MALI_USE_CSF */ + +static inline int check_padding_KBASE_IOCTL_JOB_SUBMIT(struct kbase_ioctl_job_submit *p) +{ + return 0; +} + +static inline int +check_padding_KBASE_IOCTL_SOFT_EVENT_UPDATE(struct kbase_ioctl_soft_event_update *p) +{ + return 0; +} + +static inline int check_padding_KBASE_IOCTL_KINSTR_JM_FD(union kbase_kinstr_jm_fd *p) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(p->in.padding); i++) { + if (p->in.padding[i]) + return -1; + } + + return 0; +} + +#endif /* !MALI_USE_CSF */ + +#endif /* _KBASE_IOCTL_HELPERS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd.c b/drivers/gpu/arm/bifrost/mali_kbase_jd.c index 418a1913b241..4da7fa377bd7 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jd.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_jd.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,9 +39,6 @@ #include #include #include - -#include - #include /* Return whether katom will run on the GPU or not. Currently only soft jobs and @@ -209,7 +206,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, } if (copy_from_user(input_extres, get_compat_pointer(katom->kctx, user_atom->extres_list), - sizeof(*input_extres) * katom->nr_extres) != 0) { + size_mul(sizeof(*input_extres), katom->nr_extres)) != 0) { err = -EINVAL; goto failed_input_copy; } @@ -697,7 +694,6 @@ static void jd_trace_atom_submit(struct kbase_context *const kctx, static bool jd_submit_atom(struct kbase_context *const kctx, const struct base_jd_atom *const user_atom, - const struct base_jd_fragment *const user_jc_incr, struct kbase_jd_atom *const katom) { struct kbase_device *kbdev = kctx->kbdev; @@ -755,8 +751,6 @@ static bool jd_submit_atom(struct kbase_context *const kctx, } #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ - katom->renderpass_id = user_atom->renderpass_id; - /* Implicitly sets katom->protected_state.enter as well. */ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; @@ -875,20 +869,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, /* Create a new atom. */ jd_trace_atom_submit(kctx, katom, &katom->sched_priority); -#if !MALI_INCREMENTAL_RENDERING_JM - /* Reject atoms for incremental rendering if not supported */ - if (katom->core_req & (BASE_JD_REQ_START_RENDERPASS | BASE_JD_REQ_END_RENDERPASS)) { - dev_err(kctx->kbdev->dev, "Rejecting atom with unsupported core_req 0x%x\n", - katom->core_req); - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return kbase_jd_done_nolock(katom, true); - } -#endif /* !MALI_INCREMENTAL_RENDERING_JM */ - - if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) { - WARN_ON(katom->jc != 0); - katom->jc_fragment = *user_jc_incr; - } else if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { /* Reject atoms with job chain = NULL, as these cause issues * with soft-stop */ @@ -1018,8 +999,7 @@ int kbase_jd_submit(struct kbase_context *kctx, void __user *user_addr, u32 nr_a struct kbase_device *kbdev; u32 latest_flush; - bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2) || - stride == offsetof(struct base_jd_atom_v2, renderpass_id)); + bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2)); CSTD_UNUSED(uk6_atom); @@ -1035,10 +1015,7 @@ int kbase_jd_submit(struct kbase_context *kctx, void __user *user_addr, u32 nr_a return -EINVAL; } - if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) && - stride != sizeof(struct base_jd_atom_v2) && - stride != offsetof(struct base_jd_atom, renderpass_id) && - stride != sizeof(struct base_jd_atom)) { + if (stride != sizeof(struct base_jd_atom_v2) && stride != sizeof(struct base_jd_atom)) { dev_err(kbdev->dev, "Stride %u passed to job_submit isn't supported by the kernel\n", stride); return -EINVAL; @@ -1057,7 +1034,6 @@ int kbase_jd_submit(struct kbase_context *kctx, void __user *user_addr, u32 nr_a struct base_jd_atom user_atom = { .seq_nr = 0, }; - struct base_jd_fragment user_jc_incr; struct kbase_jd_atom *katom; if (unlikely(jd_atom_is_v2)) { @@ -1082,44 +1058,6 @@ int kbase_jd_submit(struct kbase_context *kctx, void __user *user_addr, u32 nr_a } } - if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) { - dev_dbg(kbdev->dev, "No renderpass ID: use 0\n"); - user_atom.renderpass_id = 0; - } else { - /* Ensure all padding bytes are 0 for potential future - * extension - */ - size_t j; - - dev_dbg(kbdev->dev, "Renderpass ID is %d\n", user_atom.renderpass_id); - for (j = 0; j < sizeof(user_atom.padding); j++) { - if (user_atom.padding[j]) { - dev_err(kbdev->dev, "Bad padding byte %zu: %d\n", j, - user_atom.padding[j]); - err = -EINVAL; - break; - } - } - if (err) - break; - } - - /* In this case 'jc' is the CPU address of a struct - * instead of a GPU address of a job chain. - */ - if (user_atom.core_req & BASE_JD_REQ_END_RENDERPASS) { - if (copy_from_user(&user_jc_incr, u64_to_user_ptr(user_atom.jc), - sizeof(user_jc_incr))) { - dev_err(kbdev->dev, - "Invalid jc address 0x%llx passed to job_submit\n", - user_atom.jc); - err = -EFAULT; - break; - } - dev_dbg(kbdev->dev, "Copied IR jobchain addresses\n"); - user_atom.jc = 0; - } - user_addr = (void __user *)((uintptr_t)user_addr + stride); mutex_lock(&jctx->lock); @@ -1172,8 +1110,7 @@ int kbase_jd_submit(struct kbase_context *kctx, void __user *user_addr, u32 nr_a mutex_lock(&jctx->lock); } KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START(kbdev, katom); - need_to_try_schedule_context |= - jd_submit_atom(kctx, &user_atom, &user_jc_incr, katom); + need_to_try_schedule_context |= jd_submit_atom(kctx, &user_atom, katom); KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END(kbdev, katom); /* Register a completed job as a disjoint event when the GPU is in a disjoint state * (ie. being reset). @@ -1579,9 +1516,6 @@ int kbase_jd_init(struct kbase_context *kctx) #endif } - for (i = 0; i < BASE_JD_RP_COUNT; i++) - kctx->jctx.renderpasses[i].state = KBASE_JD_RP_COMPLETE; - mutex_init(&kctx->jctx.lock); init_waitqueue_head(&kctx->jctx.zero_jobs_wait); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c index 55c1f4be25d5..d42fde37db2a 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -161,7 +161,7 @@ static inline int gpu_metrics_ctx_init(struct kbase_context *kctx) put_cred(cred); /* Return early if this is not a Userspace created context */ - if (unlikely(!kctx->kfile)) + if (unlikely(!kctx->filp)) return 0; /* Serialize against the other threads trying to create/destroy Kbase contexts. */ @@ -200,7 +200,7 @@ static inline void gpu_metrics_ctx_term(struct kbase_context *kctx) unsigned long flags; /* Return early if this is not a Userspace created context */ - if (unlikely(!kctx->kfile)) + if (unlikely(!kctx->filp)) return; /* Serialize against the other threads trying to create/destroy Kbase contexts. */ @@ -333,19 +333,6 @@ static void jsctx_queue_foreach_prio(struct kbase_context *kctx, unsigned int js rb_erase(node, &queue->runnable_tree); callback(kctx->kbdev, entry); - - /* Runnable end-of-renderpass atoms can also be in the linked - * list of atoms blocked on cross-slot dependencies. Remove them - * to avoid calling the callback twice. - */ - if (entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) { - WARN_ON(!(entry->core_req & BASE_JD_REQ_END_RENDERPASS)); - dev_dbg(kctx->kbdev->dev, "Del runnable atom %pK from X_DEP list\n", - (void *)entry); - - list_del(&entry->queue); - entry->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; - } } while (!list_empty(&queue->x_dep_head)) { @@ -1230,7 +1217,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, b dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_ctx_pullable\n", (void *)katom); return false; /* next atom blocked */ } - if (kbase_js_atom_blocked_on_x_dep(katom)) { + if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { if (katom->x_pre_dep->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || katom->x_pre_dep->will_fail_event_code) { dev_dbg(kbdev->dev, @@ -1371,9 +1358,6 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, struct kbase_jd_at (dep_atom->status != KBASE_JD_ATOM_STATE_UNUSED)) { katom->atom_flags |= KBASE_KATOM_FLAG_X_DEP_BLOCKED; - dev_dbg(kbdev->dev, "Set X_DEP flag on atom %pK\n", - (void *)katom); - katom->x_pre_dep = dep_atom; dep_atom->x_post_dep = katom; if (kbase_jd_katom_dep_type(&katom->dep[i]) == @@ -1447,110 +1431,12 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx) } KBASE_EXPORT_TEST_API(kbase_js_update_ctx_priority); -/** - * js_add_start_rp() - Add an atom that starts a renderpass to the job scheduler - * @start_katom: Pointer to the atom to be added. - * Return: 0 if successful or a negative value on failure. - */ -static int js_add_start_rp(struct kbase_jd_atom *const start_katom) -{ - struct kbase_context *const kctx = start_katom->kctx; - struct kbase_jd_renderpass *rp; - struct kbase_device *const kbdev = kctx->kbdev; - unsigned long flags; - - lockdep_assert_held(&kctx->jctx.lock); - - if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) - return -EINVAL; - - if (start_katom->core_req & BASE_JD_REQ_END_RENDERPASS) - return -EINVAL; - - compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); - - rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; - - if (rp->state != KBASE_JD_RP_COMPLETE) - return -EINVAL; - - dev_dbg(kctx->kbdev->dev, "JS add start atom %pK of RP %d\n", (void *)start_katom, - start_katom->renderpass_id); - - /* The following members are read when updating the job slot - * ringbuffer/fifo therefore they require additional locking. - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - rp->state = KBASE_JD_RP_START; - rp->start_katom = start_katom; - rp->end_katom = NULL; - INIT_LIST_HEAD(&rp->oom_reg_list); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return 0; -} - -/** - * js_add_end_rp() - Add an atom that ends a renderpass to the job scheduler - * @end_katom: Pointer to the atom to be added. - * Return: 0 if successful or a negative value on failure. - */ -static int js_add_end_rp(struct kbase_jd_atom *const end_katom) -{ - struct kbase_context *const kctx = end_katom->kctx; - struct kbase_jd_renderpass *rp; - struct kbase_device *const kbdev = kctx->kbdev; - - lockdep_assert_held(&kctx->jctx.lock); - - if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) - return -EINVAL; - - if (end_katom->core_req & BASE_JD_REQ_START_RENDERPASS) - return -EINVAL; - - compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); - - rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; - - dev_dbg(kbdev->dev, "JS add end atom %pK in state %d of RP %d\n", (void *)end_katom, - (int)rp->state, end_katom->renderpass_id); - - if (rp->state == KBASE_JD_RP_COMPLETE) - return -EINVAL; - - if (rp->end_katom == NULL) { - /* We can't be in a retry state until the fragment job chain - * has completed. - */ - unsigned long flags; - - WARN_ON(rp->state == KBASE_JD_RP_RETRY); - WARN_ON(rp->state == KBASE_JD_RP_RETRY_PEND_OOM); - WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - rp->end_katom = end_katom; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } else - WARN_ON(rp->end_katom != end_katom); - - return 0; -} - bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom) { unsigned long flags; struct kbasep_js_kctx_info *js_kctx_info; struct kbase_device *kbdev; struct kbasep_js_device_data *js_devdata; - int err = 0; bool enqueue_required = false; bool timer_sync = false; @@ -1566,17 +1452,6 @@ bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom) mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - if (atom->core_req & BASE_JD_REQ_START_RENDERPASS) - err = js_add_start_rp(atom); - else if (atom->core_req & BASE_JD_REQ_END_RENDERPASS) - err = js_add_end_rp(atom); - - if (err < 0) { - atom->event_code = BASE_JD_EVENT_JOB_INVALID; - atom->status = KBASE_JD_ATOM_STATE_COMPLETED; - goto out_unlock; - } - /* * Begin Runpool transaction */ @@ -1860,10 +1735,7 @@ kbasep_js_runpool_release_ctx_internal(struct kbase_device *kbdev, struct kbase_ kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, katom_retained_state); if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) && -#ifdef CONFIG_MALI_ARBITER_SUPPORT - !kbase_pm_is_gpu_lost(kbdev) && -#endif - !kbase_pm_is_suspending(kbdev)) { + !kbase_pm_is_gpu_lost(kbdev) && !kbase_pm_is_suspending(kbdev)) { /* Context is kept scheduled into an address space even when * there are no jobs, in this case we have to handle the * situation where all jobs have been evicted from the GPU and @@ -1880,10 +1752,7 @@ kbasep_js_runpool_release_ctx_internal(struct kbase_device *kbdev, struct kbase_ * which was previously acquired by kbasep_js_schedule_ctx(). */ if (new_ref_count == 1 && (!kbasep_js_is_submit_allowed(js_devdata, kctx) || -#ifdef CONFIG_MALI_ARBITER_SUPPORT - kbase_pm_is_gpu_lost(kbdev) || -#endif - kbase_pm_is_suspending(kbdev))) { + kbase_pm_is_gpu_lost(kbdev) || kbase_pm_is_suspending(kbdev))) { int num_slots = kbdev->gpu_props.num_job_slots; unsigned int slot; @@ -2189,11 +2058,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, struct kbase_cont * of it being called strictly after the suspend flag is set, and will * wait for this lock to drop) */ -#ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { -#else - if (kbase_pm_is_suspending(kbdev)) { -#endif /* Cause it to leave at some later point */ bool retained; CSTD_UNUSED(retained); @@ -2267,7 +2132,6 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_ js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; -#ifdef CONFIG_MALI_ARBITER_SUPPORT /* This should only happen in response to a system call * from a user-space thread. * In a non-arbitrated environment this can never happen @@ -2279,18 +2143,10 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_ * the wait event for KCTX_SCHEDULED, since no context * can be scheduled until we have the GPU again. */ - if (kbdev->arb.arb_if == NULL) + if (!kbase_has_arbiter(kbdev)) { if (WARN_ON(kbase_pm_is_suspending(kbdev))) return; -#else - /* This should only happen in response to a system call - * from a user-space thread. - * In a non-arbitrated environment this can never happen - * whilst suspending. - */ - if (WARN_ON(kbase_pm_is_suspending(kbdev))) - return; -#endif + } mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); @@ -2416,63 +2272,63 @@ void kbasep_js_resume(struct kbase_device *kbdev) struct kbase_context *kctx, *n; unsigned long flags; -#ifndef CONFIG_MALI_ARBITER_SUPPORT - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (kbase_has_arbiter(kbdev)) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - list_for_each_entry_safe(kctx, n, - &kbdev->js_data.ctx_list_unpullable[js][prio], - jctx.sched_info.ctx.ctx_list_entry[js]) { - struct kbasep_js_kctx_info *js_kctx_info; + list_for_each_entry_safe( + kctx, n, &kbdev->js_data.ctx_list_unpullable[js][prio], + jctx.sched_info.ctx.ctx_list_entry[js]) { + struct kbasep_js_kctx_info *js_kctx_info; + bool timer_sync = false; + + /* Drop lock so we can take kctx mutexes */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + js_kctx_info = &kctx->jctx.sched_info; + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + kbase_js_ctx_pullable(kctx, js, false)) + timer_sync = kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (timer_sync) + kbase_backend_ctx_count_changed(kbdev); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + /* Take lock before accessing list again */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { bool timer_sync = false; - /* Drop lock so we can take kctx mutexes */ - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - js_kctx_info = &kctx->jctx.sched_info; - - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - mutex_lock(&js_devdata->runpool_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && - kbase_js_ctx_pullable(kctx, js, false)) - timer_sync = kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, js); + list_for_each_entry_safe( + kctx, n, &kbdev->js_data.ctx_list_unpullable[js][prio], + jctx.sched_info.ctx.ctx_list_entry[js]) { + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + kbase_js_ctx_pullable(kctx, js, false)) + timer_sync |= kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (timer_sync) + if (timer_sync) { + mutex_lock(&js_devdata->runpool_mutex); kbase_backend_ctx_count_changed(kbdev); - - mutex_unlock(&js_devdata->runpool_mutex); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - /* Take lock before accessing list again */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + mutex_unlock(&js_devdata->runpool_mutex); + } } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -#else - bool timer_sync = false; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - list_for_each_entry_safe(kctx, n, - &kbdev->js_data.ctx_list_unpullable[js][prio], - jctx.sched_info.ctx.ctx_list_entry[js]) { - if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && - kbase_js_ctx_pullable(kctx, js, false)) - timer_sync |= kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, js); - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - if (timer_sync) { - mutex_lock(&js_devdata->runpool_mutex); - kbase_backend_ctx_count_changed(kbdev); - mutex_unlock(&js_devdata->runpool_mutex); - } -#endif } } mutex_unlock(&js_devdata->queue_mutex); @@ -2515,7 +2371,7 @@ static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_j bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - bool enqueue_required, add_required = true; + bool enqueue_required; katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); @@ -2525,10 +2381,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_at /* If slot will transition from unpullable to pullable then add to * pullable list */ - if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) - enqueue_required = true; - else - enqueue_required = false; + enqueue_required = jsctx_rb_none_to_pull(kctx, katom->slot_nr); if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || (katom->pre_dep && @@ -2541,15 +2394,9 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_at list_add_tail(&katom->queue, &queue->x_dep_head); katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; - if (kbase_js_atom_blocked_on_x_dep(katom)) { - enqueue_required = false; - add_required = false; - } + enqueue_required = false; } else { dev_dbg(kctx->kbdev->dev, "Atom %pK not added to X_DEP list\n", (void *)katom); - } - - if (add_required) { /* Check if there are lower priority jobs to soft stop */ kbase_job_slot_ctx_priority_check_locked(kctx, katom); @@ -2575,30 +2422,22 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_at */ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) { - struct kbase_context *const kctx = katom->kctx; - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock); while (katom) { WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); - if (!kbase_js_atom_blocked_on_x_dep(katom)) { - dev_dbg(kctx->kbdev->dev, + if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + dev_dbg(katom->kctx->kbdev->dev, "Del atom %pK from X_DEP list in js_move_to_tree\n", (void *)katom); list_del(&katom->queue); katom->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; - /* For incremental rendering, an end-of-renderpass atom - * may have had its dependency on start-of-renderpass - * ignored and may therefore already be in the tree. - */ - if (!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { - jsctx_tree_add(kctx, katom); - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; - } + jsctx_tree_add(katom->kctx, katom); + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; } else { - dev_dbg(kctx->kbdev->dev, "Atom %pK blocked on x-dep in js_move_to_tree\n", - (void *)katom); + dev_dbg(katom->kctx->kbdev->dev, + "Atom %pK blocked on x-dep in js_move_to_tree\n", (void *)katom); break; } @@ -2615,7 +2454,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) * * Remove all post dependencies of an atom from the context ringbuffers. * - * The original atom's event_code will be propogated to all dependent atoms. + * The original atom's event_code will be propagated to all dependent atoms. * * Context: Caller must hold the HW access lock */ @@ -2671,11 +2510,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js) dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", (void *)kctx); return NULL; } -#ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) -#else - if (kbase_pm_is_suspending(kbdev)) -#endif return NULL; katom = jsctx_rb_peek(kctx, js); @@ -2705,7 +2540,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js) return NULL; } - if (kbase_js_atom_blocked_on_x_dep(katom)) { + if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { if (katom->x_pre_dep->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || katom->x_pre_dep->will_fail_event_code) { dev_dbg(kbdev->dev, @@ -2745,190 +2580,6 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js) return katom; } -/** - * js_return_of_start_rp() - Handle soft-stop of an atom that starts a - * renderpass - * @start_katom: Pointer to the start-of-renderpass atom that was soft-stopped - * - * This function is called to switch to incremental rendering if the tiler job - * chain at the start of a renderpass has used too much memory. It prevents the - * tiler job being pulled for execution in the job scheduler again until the - * next phase of incremental rendering is complete. - * - * If the end-of-renderpass atom is already in the job scheduler (because a - * previous attempt at tiling used too much memory during the same renderpass) - * then it is unblocked; otherwise, it is run by handing it to the scheduler. - */ -static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) -{ - struct kbase_context *const kctx = start_katom->kctx; - struct kbase_device *const kbdev = kctx->kbdev; - struct kbase_jd_renderpass *rp; - struct kbase_jd_atom *end_katom; - unsigned long flags; - - lockdep_assert_held(&kctx->jctx.lock); - - if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) - return; - - compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); - - rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; - - if (WARN_ON(rp->start_katom != start_katom)) - return; - - dev_dbg(kctx->kbdev->dev, "JS return start atom %pK in state %d of RP %d\n", - (void *)start_katom, (int)rp->state, start_katom->renderpass_id); - - if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) - return; - - /* The tiler job might have been soft-stopped for some reason other - * than running out of memory. - */ - if (rp->state == KBASE_JD_RP_START || rp->state == KBASE_JD_RP_RETRY) { - dev_dbg(kctx->kbdev->dev, "JS return isn't OOM in state %d of RP %d\n", - (int)rp->state, start_katom->renderpass_id); - return; - } - - dev_dbg(kctx->kbdev->dev, "JS return confirm OOM in state %d of RP %d\n", (int)rp->state, - start_katom->renderpass_id); - - if (WARN_ON(rp->state != KBASE_JD_RP_PEND_OOM && rp->state != KBASE_JD_RP_RETRY_PEND_OOM)) - return; - - /* Prevent the tiler job being pulled for execution in the - * job scheduler again. - */ - dev_dbg(kbdev->dev, "Blocking start atom %pK\n", (void *)start_katom); - atomic_inc(&start_katom->blocked); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - rp->state = (rp->state == KBASE_JD_RP_PEND_OOM) ? KBASE_JD_RP_OOM : KBASE_JD_RP_RETRY_OOM; - - /* Was the fragment job chain submitted to kbase yet? */ - end_katom = rp->end_katom; - if (end_katom) { - dev_dbg(kctx->kbdev->dev, "JS return add end atom %pK\n", (void *)end_katom); - - if (rp->state == KBASE_JD_RP_RETRY_OOM) { - /* Allow the end of the renderpass to be pulled for - * execution again to continue incremental rendering. - */ - dev_dbg(kbdev->dev, "Unblocking end atom %pK\n", (void *)end_katom); - atomic_dec(&end_katom->blocked); - WARN_ON(!(end_katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); - WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); - - kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, end_katom->slot_nr); - - /* Expect the fragment job chain to be scheduled without - * further action because this function is called when - * returning an atom to the job scheduler ringbuffer. - */ - end_katom = NULL; - } else { - WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_QUEUED && - end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); - } - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - if (end_katom) - kbase_jd_dep_clear_locked(end_katom); -} - -/** - * js_return_of_end_rp() - Handle completion of an atom that ends a renderpass - * @end_katom: Pointer to the end-of-renderpass atom that was completed - * - * This function is called to continue incremental rendering if the tiler job - * chain at the start of a renderpass used too much memory. It resets the - * mechanism for detecting excessive memory usage then allows the soft-stopped - * tiler job chain to be pulled for execution again. - * - * The start-of-renderpass atom must already been submitted to kbase. - */ -static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) -{ - struct kbase_context *const kctx = end_katom->kctx; - struct kbase_device *const kbdev = kctx->kbdev; - struct kbase_jd_renderpass *rp; - struct kbase_jd_atom *start_katom; - unsigned long flags; - - lockdep_assert_held(&kctx->jctx.lock); - - if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) - return; - - compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); - - rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; - - if (WARN_ON(rp->end_katom != end_katom)) - return; - - dev_dbg(kctx->kbdev->dev, "JS return end atom %pK in state %d of RP %d\n", - (void *)end_katom, (int)rp->state, end_katom->renderpass_id); - - if (WARN_ON(rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM)) - return; - - /* Reduce the number of mapped pages in the memory regions that - * triggered out-of-memory last time so that we can detect excessive - * memory usage again. - */ - kbase_gpu_vm_lock(kctx); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - while (!list_empty(&rp->oom_reg_list)) { - struct kbase_va_region *reg = - list_first_entry(&rp->oom_reg_list, struct kbase_va_region, link); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - dev_dbg(kbdev->dev, "Reset backing to %zu pages for region %pK\n", - reg->threshold_pages, (void *)reg); - - if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED)) - kbase_mem_shrink(kctx, reg, reg->threshold_pages); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, "Deleting region %pK from list\n", (void *)reg); - list_del_init(®->link); - kbase_va_region_alloc_put(kctx, reg); - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - kbase_gpu_vm_unlock(kctx); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - rp->state = KBASE_JD_RP_RETRY; - dev_dbg(kbdev->dev, "Changed state to %d for retry\n", rp->state); - - /* Allow the start of the renderpass to be pulled for execution again - * to begin/continue incremental rendering. - */ - start_katom = rp->start_katom; - if (!WARN_ON(!start_katom)) { - dev_dbg(kbdev->dev, "Unblocking start atom %pK\n", (void *)start_katom); - atomic_dec(&start_katom->blocked); - (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, start_katom->slot_nr); - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - static void js_return_worker(struct work_struct *data) { struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); @@ -2949,9 +2600,7 @@ static void js_return_worker(struct work_struct *data) katom->event_code); KBASE_KTRACE_ADD_JM(kbdev, JS_RETURN_WORKER, kctx, katom, katom->jc, 0); - - if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) - KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom); + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom); kbase_backend_complete_wq(kbdev, katom); @@ -2960,8 +2609,7 @@ static void js_return_worker(struct work_struct *data) mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) - atomic_dec(&katom->blocked); + atomic_dec(&katom->blocked); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -3026,16 +2674,6 @@ static void js_return_worker(struct work_struct *data) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); - if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) { - mutex_lock(&kctx->jctx.lock); - js_return_of_start_rp(katom); - mutex_unlock(&kctx->jctx.lock); - } else if (katom->event_code == BASE_JD_EVENT_END_RP_DONE) { - mutex_lock(&kctx->jctx.lock); - js_return_of_end_rp(katom); - mutex_unlock(&kctx->jctx.lock); - } - dev_dbg(kbdev->dev, "JS: retained state %s finished", kbasep_js_has_atom_finished(&retained_state) ? "has" : "hasn't"); @@ -3071,144 +2709,6 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) queue_work(kctx->jctx.job_done_wq, &katom->work); } -/** - * js_complete_start_rp() - Handle completion of atom that starts a renderpass - * @kctx: Context pointer - * @start_katom: Pointer to the atom that completed - * - * Put any references to virtual memory regions that might have been added by - * kbase_job_slot_softstop_start_rp() because the tiler job chain completed - * despite any pending soft-stop request. - * - * If the atom that just completed was soft-stopped during a previous attempt to - * run it then there should be a blocked end-of-renderpass atom waiting for it, - * which we must unblock to process the output of the tiler job chain. - * - * Return: true if caller should call kbase_backend_ctx_count_changed() - */ -static bool js_complete_start_rp(struct kbase_context *kctx, - struct kbase_jd_atom *const start_katom) -{ - struct kbase_device *const kbdev = kctx->kbdev; - struct kbase_jd_renderpass *rp; - bool timer_sync = false; - - lockdep_assert_held(&kctx->jctx.lock); - - if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) - return false; - - compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); - - rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; - - if (WARN_ON(rp->start_katom != start_katom)) - return false; - - dev_dbg(kctx->kbdev->dev, "Start atom %pK is done in state %d of RP %d\n", - (void *)start_katom, (int)rp->state, start_katom->renderpass_id); - - if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) - return false; - - if (rp->state == KBASE_JD_RP_PEND_OOM || rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { - unsigned long flags; - - dev_dbg(kctx->kbdev->dev, "Start atom %pK completed before soft-stop\n", - (void *)start_katom); - - kbase_gpu_vm_lock(kctx); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - while (!list_empty(&rp->oom_reg_list)) { - struct kbase_va_region *reg = - list_first_entry(&rp->oom_reg_list, struct kbase_va_region, link); - - WARN_ON(reg->flags & KBASE_REG_VA_FREED); - dev_dbg(kctx->kbdev->dev, "Deleting region %pK from list\n", (void *)reg); - list_del_init(®->link); - kbase_va_region_alloc_put(kctx, reg); - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - kbase_gpu_vm_unlock(kctx); - } else { - dev_dbg(kctx->kbdev->dev, "Start atom %pK did not exceed memory threshold\n", - (void *)start_katom); - - WARN_ON(rp->state != KBASE_JD_RP_START && rp->state != KBASE_JD_RP_RETRY); - } - - if (rp->state == KBASE_JD_RP_RETRY || rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { - struct kbase_jd_atom *const end_katom = rp->end_katom; - - if (!WARN_ON(!end_katom)) { - unsigned long flags; - - /* Allow the end of the renderpass to be pulled for - * execution again to continue incremental rendering. - */ - dev_dbg(kbdev->dev, "Unblocking end atom %pK!\n", (void *)end_katom); - atomic_dec(&end_katom->blocked); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - timer_sync = kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, - end_katom->slot_nr); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } - } - - return timer_sync; -} - -/** - * js_complete_end_rp() - Handle final completion of atom that ends a renderpass - * @kctx: Context pointer - * @end_katom: Pointer to the atom that completed for the last time - * - * This function must only be called if the renderpass actually completed - * without the tiler job chain at the start using too much memory; otherwise - * completion of the end-of-renderpass atom is handled similarly to a soft-stop. - */ -static void js_complete_end_rp(struct kbase_context *kctx, struct kbase_jd_atom *const end_katom) -{ - struct kbase_device *const kbdev = kctx->kbdev; - unsigned long flags; - struct kbase_jd_renderpass *rp; - - lockdep_assert_held(&kctx->jctx.lock); - - if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) - return; - - compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); - - rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; - - if (WARN_ON(rp->end_katom != end_katom)) - return; - - dev_dbg(kbdev->dev, "End atom %pK is done in state %d of RP %d\n", (void *)end_katom, - (int)rp->state, end_katom->renderpass_id); - - if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || WARN_ON(rp->state == KBASE_JD_RP_OOM) || - WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM)) - return; - - /* Rendering completed without running out of memory. - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - WARN_ON(!list_empty(&rp->oom_reg_list)); - rp->state = KBASE_JD_RP_COMPLETE; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - dev_dbg(kbdev->dev, "Renderpass %d is complete\n", end_katom->renderpass_id); -} - bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom *katom) { struct kbasep_js_kctx_info *js_kctx_info; @@ -3225,13 +2725,6 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom dev_dbg(kbdev->dev, "%s for atom %pK (s:%u)\n", __func__, (void *)katom, atom_slot); - /* Update the incremental rendering state machine. - */ - if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) - timer_sync |= js_complete_start_rp(kctx, katom); - else if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) - js_complete_end_rp(kctx, katom); - js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; @@ -3320,61 +2813,6 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom return context_idle; } -/** - * js_end_rp_is_complete() - Check whether an atom that ends a renderpass has - * completed for the last time. - * - * @end_katom: Pointer to the atom that completed on the hardware. - * - * An atom that ends a renderpass may be run on the hardware several times - * before notifying userspace or allowing dependent atoms to be executed. - * - * This function is used to decide whether or not to allow end-of-renderpass - * atom completion. It only returns false if the atom at the start of the - * renderpass was soft-stopped because it used too much memory during the most - * recent attempt at tiling. - * - * Return: True if the atom completed for the last time. - */ -static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) -{ - struct kbase_context *const kctx = end_katom->kctx; - struct kbase_device *const kbdev = kctx->kbdev; - struct kbase_jd_renderpass *rp; - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) - return true; - - compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); - - rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; - - if (WARN_ON(rp->end_katom != end_katom)) - return true; - - dev_dbg(kbdev->dev, "JS complete end atom %pK in state %d of RP %d\n", (void *)end_katom, - (int)rp->state, end_katom->renderpass_id); - - if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) - return true; - - /* Failure of end-of-renderpass atoms must not return to the - * start of the renderpass. - */ - if (end_katom->event_code != BASE_JD_EVENT_DONE) - return true; - - if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM) - return true; - - dev_dbg(kbdev->dev, "Suppressing end atom completion\n"); - return false; -} - struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) { struct kbase_device *kbdev; @@ -3387,12 +2825,6 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - if ((katom->core_req & BASE_JD_REQ_END_RENDERPASS) && !js_end_rp_is_complete(katom)) { - katom->event_code = BASE_JD_EVENT_END_RP_DONE; - kbase_js_unpull(kctx, katom); - return NULL; - } - if (katom->will_fail_event_code) katom->event_code = katom->will_fail_event_code; @@ -3442,70 +2874,6 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_ return NULL; } -/** - * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot - * dependency - * @katom: Pointer to an atom in the slot ringbuffer - * - * A cross-slot dependency is ignored if necessary to unblock incremental - * rendering. If the atom at the start of a renderpass used too much memory - * and was soft-stopped then the atom at the end of a renderpass is submitted - * to hardware regardless of its dependency on the start-of-renderpass atom. - * This can happen multiple times for the same pair of atoms. - * - * Return: true to block the atom or false to allow it to be submitted to - * hardware - */ -bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) -{ - struct kbase_context *const kctx = katom->kctx; - struct kbase_device *kbdev = kctx->kbdev; - struct kbase_jd_renderpass *rp; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { - dev_dbg(kbdev->dev, "Atom %pK is not blocked on a cross-slot dependency", - (void *)katom); - return false; - } - - if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) { - dev_dbg(kbdev->dev, "Atom %pK is blocked on a cross-slot dependency", - (void *)katom); - return true; - } - - compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); - - rp = &kctx->jctx.renderpasses[katom->renderpass_id]; - /* We can read a subset of renderpass state without holding - * higher-level locks (but not end_katom, for example). - */ - - WARN_ON(rp->state == KBASE_JD_RP_COMPLETE); - - dev_dbg(kbdev->dev, "End atom has cross-slot dep in state %d\n", (int)rp->state); - - if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM) - return true; - - /* Tiler ran out of memory so allow the fragment job chain to run - * if it only depends on the tiler job chain. - */ - if (katom->x_pre_dep != rp->start_katom) { - dev_dbg(kbdev->dev, "Dependency is on %pK not start atom %pK\n", - (void *)katom->x_pre_dep, (void *)rp->start_katom); - return true; - } - - dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %pK\n", (void *)katom->x_pre_dep); - - return false; -} - void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask) { struct kbasep_js_device_data *js_devdata; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c index aae4df83e98d..87085912bd6c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -516,7 +516,8 @@ static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes, do { changes_tail = changes->tail; changes_count = reader_changes_count_locked(changes); - read_size = min(changes_count * entry_size, buffer_size & ~(entry_size - 1)); + read_size = + min(size_mul(changes_count, entry_size), buffer_size & ~(entry_size - 1)); if (!read_size) break; @@ -743,7 +744,6 @@ int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, union kbase_kinstr size_t const change_size = sizeof(struct kbase_kinstr_jm_atom_state_change); int status; int fd; - size_t i; if (!ctx || !jm_fd_arg) return -EINVAL; @@ -753,10 +753,6 @@ int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, union kbase_kinstr if (!is_power_of_2(in->count)) return -EINVAL; - for (i = 0; i < sizeof(in->padding); ++i) - if (in->padding[i]) - return -EINVAL; - status = reader_init(&reader, ctx, in->count); if (status < 0) return status; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_linux.h index 9195be347e2b..cb55d4b417c4 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_linux.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_linux.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,8 +35,13 @@ #if IS_ENABLED(MALI_KERNEL_TEST_API) #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) +/* Note: due to the 2-layer macro translation, using the NULL _etype does not + * compile, and one workaround is to use ERRNO_NULL instead. + */ +#define KBASE_ALLOW_ERROR_INJECTION_TEST_API(func, etype) ALLOW_ERROR_INJECTION(func, etype) #else #define KBASE_EXPORT_TEST_API(func) +#define KBASE_ALLOW_ERROR_INJECTION_TEST_API(func, etype) #endif #define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c index ddf6ea352e72..1436d8290ebc 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -42,13 +43,20 @@ #include #include #include -#include #include #include +/* Static key used to determine if large pages are enabled or not */ +static DEFINE_STATIC_KEY_FALSE(large_pages_static_key); + #define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-" #define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1) +#if GPU_PAGES_PER_CPU_PAGE > 1 +#define PAGE_METADATA_SLAB_NAME_PREFIX "page-metadata-slab-" +#define PAGE_METADATA_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(PAGE_METADATA_SLAB_NAME_PREFIX) + 1) +#endif + #if MALI_JIT_PRESSURE_LIMIT_BASE /* @@ -143,21 +151,21 @@ MODULE_PARM_DESC(large_page_conf, "User override for large page usage on support static void kbasep_mem_page_size_init(struct kbase_device *kbdev) { if (!IS_ENABLED(CONFIG_LARGE_PAGE_SUPPORT)) { - kbdev->pagesize_2mb = false; dev_info(kbdev->dev, "Large page support was disabled at compile-time!"); return; } switch (large_page_conf) { case LARGE_PAGE_AUTO: { - kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC); + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_LARGE_PAGE_ALLOC)) + static_branch_inc(&large_pages_static_key); dev_info(kbdev->dev, "Large page allocation set to %s after hardware feature check", - kbdev->pagesize_2mb ? "true" : "false"); + static_branch_unlikely(&large_pages_static_key) ? "true" : "false"); break; } case LARGE_PAGE_ON: { - kbdev->pagesize_2mb = true; - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC)) + static_branch_inc(&large_pages_static_key); + if (!kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_LARGE_PAGE_ALLOC)) dev_warn(kbdev->dev, "Enabling large page allocations on unsupporting GPU!"); else @@ -165,12 +173,10 @@ static void kbasep_mem_page_size_init(struct kbase_device *kbdev) break; } case LARGE_PAGE_OFF: { - kbdev->pagesize_2mb = false; dev_info(kbdev->dev, "Large page allocation override: turned off\n"); break; } default: { - kbdev->pagesize_2mb = false; dev_info(kbdev->dev, "Invalid large page override, turning off large pages\n"); break; } @@ -180,25 +186,31 @@ static void kbasep_mem_page_size_init(struct kbase_device *kbdev) * so that userspace could read it to figure out the state of the configuration * if necessary. */ - if (kbdev->pagesize_2mb) + if (static_branch_unlikely(&large_pages_static_key)) large_page_conf = LARGE_PAGE_ON; else large_page_conf = LARGE_PAGE_OFF; } +inline bool kbase_is_large_pages_enabled(void) +{ + return static_branch_unlikely(&large_pages_static_key); +} +KBASE_EXPORT_TEST_API(kbase_is_large_pages_enabled); + int kbase_mem_init(struct kbase_device *kbdev) { int err = 0; - struct kbasep_mem_device *memdev; char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE]; +#if GPU_PAGES_PER_CPU_PAGE > 1 + char page_metadata_slab_name[PAGE_METADATA_SLAB_NAME_SIZE]; +#endif #if IS_ENABLED(CONFIG_OF) struct device_node *mgm_node = NULL; #endif KBASE_DEBUG_ASSERT(kbdev); - memdev = &kbdev->memdev; - kbasep_mem_page_size_init(kbdev); scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s", @@ -212,6 +224,17 @@ int kbase_mem_init(struct kbase_device *kbdev) return -ENOMEM; } +#if GPU_PAGES_PER_CPU_PAGE > 1 + scnprintf(page_metadata_slab_name, PAGE_METADATA_SLAB_NAME_SIZE, + PAGE_METADATA_SLAB_NAME_PREFIX "%s", kbdev->devname); + kbdev->page_metadata_slab = kmem_cache_create( + page_metadata_slab_name, sizeof(struct kbase_page_metadata), 0, 0, NULL); + if (kbdev->page_metadata_slab == NULL) { + dev_err(kbdev->dev, "Failed to create page_metadata_slab"); + return -ENOMEM; + } +#endif + kbase_mem_migrate_init(kbdev); kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults, KBASE_MEM_POOL_MAX_SIZE_KCTX); @@ -221,12 +244,6 @@ int kbase_mem_init(struct kbase_device *kbdev) kbdev->dma_buf_root = RB_ROOT; mutex_init(&kbdev->dma_buf_lock); -#ifdef IR_THRESHOLD - atomic_set(&memdev->ir_threshold, IR_THRESHOLD); -#else - atomic_set(&memdev->ir_threshold, DEFAULT_IR_THRESHOLD); -#endif - kbdev->mgm_dev = &kbase_native_mgm_dev; #if IS_ENABLED(CONFIG_OF) @@ -292,6 +309,10 @@ void kbase_mem_term(struct kbase_device *kbdev) kbase_mem_migrate_term(kbdev); +#if GPU_PAGES_PER_CPU_PAGE > 1 + kmem_cache_destroy(kbdev->page_metadata_slab); + kbdev->page_metadata_slab = NULL; +#endif kmem_cache_destroy(kbdev->va_region_slab); kbdev->va_region_slab = NULL; @@ -524,15 +545,20 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) switch (alloc->imported.user_buf.state) { case KBASE_USER_BUF_STATE_GPU_MAPPED: { alloc->imported.user_buf.current_mapping_usage_count = 0; - kbase_user_buf_from_gpu_mapped_to_empty(kctx, reg); + kbase_mem_phy_alloc_ref_read(alloc) ? + kbase_user_buf_from_gpu_mapped_to_pinned(kctx, reg) : + kbase_user_buf_from_gpu_mapped_to_empty(kctx, reg); break; } case KBASE_USER_BUF_STATE_DMA_MAPPED: { - kbase_user_buf_from_dma_mapped_to_empty(kctx, reg); + kbase_mem_phy_alloc_ref_read(alloc) ? + kbase_user_buf_from_dma_mapped_to_pinned(kctx, reg) : + kbase_user_buf_from_dma_mapped_to_empty(kctx, reg); break; } case KBASE_USER_BUF_STATE_PINNED: { - kbase_user_buf_from_pinned_to_empty(kctx, reg); + if (!kbase_mem_phy_alloc_ref_read(alloc)) + kbase_user_buf_from_pinned_to_empty(kctx, reg); break; } case KBASE_USER_BUF_STATE_EMPTY: { @@ -672,7 +698,9 @@ void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr t_cpu_pa, dma_addr_t dma_addr; WARN_ON(!cpu_page); - WARN_ON((size_t)offset + size > PAGE_SIZE); + + if ((size_t)offset + size > PAGE_SIZE) + dev_warn(kctx->kbdev->dev, "Size and offset exceed page size"); dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + (dma_addr_t)offset; @@ -713,19 +741,105 @@ void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr t_cpu_pa, } } +static int kbase_get_sync_scope_params(struct kbase_context *kctx, unsigned long start, size_t size, + u64 *page_off, u64 *page_cnt, u64 *offset) +{ + u64 tmp_off; + struct kbase_cpu_mapping *map = + kbasep_find_enclosing_cpu_mapping(kctx, start, size, &tmp_off); + + if (!map) { + dev_dbg(kctx->kbdev->dev, "%s: Can't find CPU mapping 0x%016lX", __func__, start); + return -EINVAL; + } + + *page_off = tmp_off >> PAGE_SHIFT; + tmp_off &= ~PAGE_MASK; + *page_cnt = (size + tmp_off + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + *offset = tmp_off; + + return 0; +} + +static int kbase_sync_imported_user_buf(struct kbase_context *kctx, struct kbase_va_region *reg, + struct basep_syncset *sset, enum kbase_sync_type sync_fn) +{ + unsigned long start = (uintptr_t)sset->user_addr; + size_t size = (size_t)sset->size; + dma_addr_t *dma_addr = reg->gpu_alloc->imported.user_buf.dma_addrs; + u64 page_off = 0, page_count = 0, offset = 0; + u64 i; + size_t sz; + int err; + + lockdep_assert_held(&kctx->reg_lock); + + if (sync_fn != KBASE_SYNC_TO_CPU && sync_fn != KBASE_SYNC_TO_DEVICE) { + dev_dbg(kctx->kbdev->dev, "%s: Unknown kbase sync_fn type!", __func__); + return -EINVAL; + } + + /* Early return if the imported user_buffer is not yet mapped to GPU */ + if (reg->gpu_alloc->imported.user_buf.state != KBASE_USER_BUF_STATE_GPU_MAPPED) + return -EINVAL; + + err = kbase_get_sync_scope_params(kctx, start, size, &page_off, &page_count, &offset); + if (err) + return err; + + /* Check the sync is inside the imported range */ + if ((page_off >= reg->gpu_alloc->nents) || + ((page_off + page_count) > reg->gpu_alloc->nents)) + return -EINVAL; + + dma_addr = reg->gpu_alloc->imported.user_buf.dma_addrs; + /* Sync first page */ + sz = MIN(((size_t)PAGE_SIZE - offset), size); + if (sync_fn == KBASE_SYNC_TO_CPU) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr[page_off] + offset, sz, + DMA_BIDIRECTIONAL); + else + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr[page_off] + offset, sz, + DMA_BIDIRECTIONAL); + + /* Calculate the size for last page */ + sz = ((start + size - 1) & ~PAGE_MASK) + 1; + + /* Sync middle pages (if any) */ + for (i = 1; page_count > 2 && i < page_count - 1; i++) { + if (sync_fn == KBASE_SYNC_TO_CPU) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr[page_off + i], PAGE_SIZE, + DMA_BIDIRECTIONAL); + else + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr[page_off + i], + PAGE_SIZE, DMA_BIDIRECTIONAL); + } + + /* Sync last page (if any) */ + if (page_count > 1) { + i = page_off + page_count - 1; + if (sync_fn == KBASE_SYNC_TO_CPU) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr[i], sz, + DMA_BIDIRECTIONAL); + else + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr[i], sz, + DMA_BIDIRECTIONAL); + } + + return 0; +} + static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *sset, enum kbase_sync_type sync_fn) { int err = 0; struct kbase_va_region *reg; - struct kbase_cpu_mapping *map; unsigned long start; size_t size; struct tagged_addr *cpu_pa; struct tagged_addr *gpu_pa; - u64 page_off, page_count; + u64 page_off = 0, page_count = 0, offset = 0; u64 i; - u64 offset; size_t sz; kbase_os_mem_map_lock(kctx); @@ -748,7 +862,10 @@ static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *ss * memory may be cached. */ if (kbase_mem_is_imported(reg->gpu_alloc->type)) { - err = kbase_mem_do_sync_imported(kctx, reg, sync_fn); + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) + err = kbase_sync_imported_user_buf(kctx, reg, sset, sync_fn); + else + err = kbase_sync_imported_umm(kctx, reg, sync_fn); goto out_unlock; } @@ -758,17 +875,10 @@ static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *ss start = (uintptr_t)sset->user_addr; size = (size_t)sset->size; - map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset); - if (!map) { - dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", - start, sset->mem_handle.basep.handle); - err = -EINVAL; + err = kbase_get_sync_scope_params(kctx, start, size, &page_off, &page_count, &offset); + if (err) goto out_unlock; - } - page_off = offset >> PAGE_SHIFT; - offset &= ~PAGE_MASK; - page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT; cpu_pa = kbase_get_cpu_phy_pages(reg); gpu_pa = kbase_get_gpu_phy_pages(reg); @@ -777,7 +887,6 @@ static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *ss err = -EINVAL; goto out_unlock; } - if (page_off >= reg->gpu_alloc->nents) { /* Start of sync range is outside the physically backed region * so nothing to do @@ -942,7 +1051,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) __func__); return -EINVAL; } - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); if (gpu_addr >= BASE_MEM_COOKIE_BASE && gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { unsigned int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); @@ -981,7 +1090,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) } out_unlock: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return err; } @@ -1126,6 +1235,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa * to satisfy the memory allocation request. */ size_t nr_pages_to_account = 0; + size_t nr_pages_from_partials = 0; if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || WARN_ON(alloc->imported.native.kctx == NULL) || @@ -1156,7 +1266,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa /* Check if we have enough pages requested so we can allocate a large * page (512 * 4KB = 2MB ) */ - if (kbdev->pagesize_2mb && nr_left >= NUM_PAGES_IN_2MB_LARGE_PAGE) { + if (kbase_is_large_pages_enabled() && nr_left >= NUM_PAGES_IN_2MB_LARGE_PAGE) { size_t nr_lp = nr_left / NUM_PAGES_IN_2MB_LARGE_PAGE; res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id], @@ -1184,6 +1294,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa *tp++ = as_tagged_tag(page_to_phys(sa->page + pidx), FROM_PARTIAL); nr_left--; + nr_pages_from_partials++; if (bitmap_full(sa->sub_pages, NUM_PAGES_IN_2MB_LARGE_PAGE)) { @@ -1291,6 +1402,13 @@ alloc_failed: alloc->nents += nr_pages_to_free; kbase_free_phy_pages_helper(alloc, nr_pages_to_free); + + /* Notice that the sub-pages from "partials" are not subtracted + * from the counter by the free pages helper, because they just go + * back to the "partials" they belong to, therefore they must be + * subtracted from the counter here. + */ + nr_left += nr_pages_from_partials; } /* Undo the preliminary memory accounting that was done early on @@ -1307,6 +1425,7 @@ alloc_failed: invalid_request: return -ENOMEM; } +KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages_helper); static size_t free_partial_locked(struct kbase_context *kctx, struct kbase_mem_pool *pool, struct tagged_addr tp) @@ -1363,7 +1482,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_all kctx = alloc->imported.native.kctx; kbdev = kctx->kbdev; - if (!kbdev->pagesize_2mb) + if (!kbase_is_large_pages_enabled()) WARN_ON(pool->order); if (alloc->reg) { @@ -1386,7 +1505,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_all tp = alloc->pages + alloc->nents; new_pages = tp; - if (kbdev->pagesize_2mb && pool->order) { + if (kbase_is_large_pages_enabled() && pool->order) { size_t nr_lp = nr_left / NUM_PAGES_IN_2MB_LARGE_PAGE; res = kbase_mem_pool_alloc_pages_locked(pool, nr_lp * NUM_PAGES_IN_2MB_LARGE_PAGE, @@ -1503,7 +1622,7 @@ alloc_failed: struct tagged_addr *start_free = alloc->pages + alloc->nents; - if (kbdev->pagesize_2mb && pool->order) { + if (kbase_is_large_pages_enabled() && pool->order) { while (nr_pages_to_free) { if (is_huge_head(*start_free)) { kbase_mem_pool_free_pages_locked( @@ -1659,6 +1778,7 @@ int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pag return 0; } +KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper); void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, struct tagged_addr *pages, @@ -1897,11 +2017,13 @@ out_term: } KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); -void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, +void kbase_set_phy_alloc_page_status(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, enum kbase_page_status status) { u32 i = 0; + lockdep_assert_held(&kctx->reg_lock); + for (; i < alloc->nents; i++) { struct tagged_addr phys = alloc->pages[i]; struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys)); @@ -1921,7 +2043,7 @@ void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, } } -bool kbase_check_alloc_flags(unsigned long flags) +bool kbase_check_alloc_flags(struct kbase_context *kctx, unsigned long flags) { /* Only known input flags should be set. */ if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) @@ -1997,6 +2119,36 @@ bool kbase_check_alloc_flags(unsigned long flags) return false; #endif + /* Cannot be set only allocation, only with base_mem_set */ + if ((flags & BASE_MEM_DONT_NEED) && + (mali_kbase_supports_reject_alloc_mem_dont_need(kctx->api_version))) + return false; + + /* Cannot directly allocate protected memory, it is imported instead */ + if ((flags & BASE_MEM_PROTECTED) && + (mali_kbase_supports_reject_alloc_mem_protected_in_unprotected_allocs( + kctx->api_version))) + return false; + +/* No unused bits are valid for allocations */ +#if MALI_USE_CSF + if ((flags & BASE_MEM_UNUSED_BIT_20) && + (mali_kbase_supports_reject_alloc_mem_unused_bit_20(kctx->api_version))) + return false; + + if ((flags & BASE_MEM_UNUSED_BIT_27) && + (mali_kbase_supports_reject_alloc_mem_unused_bit_27(kctx->api_version))) + return false; +#else /* MALI_USE_CSF */ + if ((flags & BASE_MEM_UNUSED_BIT_8) && + (mali_kbase_supports_reject_alloc_mem_unused_bit_8(kctx->api_version))) + return false; + + if ((flags & BASE_MEM_UNUSED_BIT_19) && + (mali_kbase_supports_reject_alloc_mem_unused_bit_19(kctx->api_version))) + return false; +#endif /* MALI_USE_CSF */ + return true; } @@ -2156,17 +2308,31 @@ void kbase_gpu_vm_lock(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(kctx != NULL); mutex_lock(&kctx->reg_lock); } - KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); +void kbase_gpu_vm_lock_with_pmode_sync(struct kbase_context *kctx) +{ +#if MALI_USE_CSF + down_read(&kctx->kbdev->csf.mmu_sync_sem); +#endif + kbase_gpu_vm_lock(kctx); +} + void kbase_gpu_vm_unlock(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx != NULL); mutex_unlock(&kctx->reg_lock); } - KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); +void kbase_gpu_vm_unlock_with_pmode_sync(struct kbase_context *kctx) +{ + kbase_gpu_vm_unlock(kctx); +#if MALI_USE_CSF + up_read(&kctx->kbdev->csf.mmu_sync_sem); +#endif +} + #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_jit_debugfs_data { int (*func)(struct kbase_jit_debugfs_data *data); @@ -2708,7 +2874,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo delta = info->commit_pages - reg->gpu_alloc->nents; pages_required = delta; - if (kctx->kbdev->pagesize_2mb && pages_required >= NUM_PAGES_IN_2MB_LARGE_PAGE) { + if (kbase_is_large_pages_enabled() && pages_required >= NUM_PAGES_IN_2MB_LARGE_PAGE) { pool = &kctx->mem_pools.large[kctx->jit_group_id]; /* Round up to number of 2 MB pages required */ pages_required += (NUM_PAGES_IN_2MB_LARGE_PAGE - 1); @@ -2746,10 +2912,10 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo kbase_mem_pool_lock(pool); } - if (reg->gpu_alloc->nents > info->commit_pages) { + if (reg->gpu_alloc->nents >= info->commit_pages) { kbase_mem_pool_unlock(pool); spin_unlock(&kctx->mem_partials_lock); - dev_warn( + dev_info( kctx->kbdev->dev, "JIT alloc grown beyond the required number of initially required pages, this grow no longer needed."); goto done; @@ -2999,7 +3165,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) return NULL; - if (kctx->kbdev->pagesize_2mb) { + if (kbase_is_large_pages_enabled()) { /* Preallocate memory for the sub-allocation structs */ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); @@ -3008,7 +3174,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, } } - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); mutex_lock(&kctx->jit_evict_lock); /* @@ -3086,7 +3252,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, kbase_jit_done_phys_increase(kctx, needed_pages); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); if (ret) { /* @@ -3119,15 +3285,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, if (kbase_is_page_migration_enabled()) { kbase_gpu_vm_lock(kctx); mutex_lock(&kctx->jit_evict_lock); - kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED); + kbase_set_phy_alloc_page_status(kctx, reg->gpu_alloc, + ALLOCATED_MAPPED); mutex_unlock(&kctx->jit_evict_lock); kbase_gpu_vm_unlock(kctx); } } } else { /* No suitable JIT allocation was found so create a new one */ - u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | - BASE_MEM_GROW_ON_GPF | BASE_MEM_COHERENT_LOCAL | BASEP_MEM_NO_USER_FREE; + base_mem_alloc_flags flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | + BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | + BASE_MEM_COHERENT_LOCAL | BASEP_MEM_NO_USER_FREE; u64 gpu_addr; #if !MALI_USE_CSF @@ -3147,7 +3315,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ mutex_unlock(&kctx->jit_evict_lock); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension, &flags, &gpu_addr, mmu_sync_info); @@ -3224,6 +3392,7 @@ end: return reg; } +KBASE_EXPORT_TEST_API(kbase_jit_allocate); void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) { @@ -3249,9 +3418,9 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) u64 delta = old_pages - new_size; if (delta) { - mutex_lock(&kctx->reg_lock); + kbase_gpu_vm_lock_with_pmode_sync(kctx); kbase_mem_shrink(kctx, reg, old_pages - delta); - mutex_unlock(&kctx->reg_lock); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); } } @@ -3265,13 +3434,30 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX); + kbase_gpu_vm_lock_with_pmode_sync(kctx); + if (unlikely(atomic_read(®->cpu_alloc->kernel_mappings))) { + WARN_ON(atomic64_read(®->no_user_free_count) > 1); + kbase_va_region_no_user_free_dec(reg); + mutex_lock(&kctx->jit_evict_lock); + list_del(®->jit_node); + mutex_unlock(&kctx->jit_evict_lock); + kbase_mem_free_region(kctx, reg); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); + return; + } kbase_mem_evictable_mark_reclaim(reg->gpu_alloc); - - kbase_gpu_vm_lock(kctx); reg->flags |= KBASE_REG_DONT_NEED; reg->flags &= ~KBASE_REG_ACTIVE_JIT_ALLOC; kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); - kbase_gpu_vm_unlock(kctx); + + /* Inactive JIT regions should be freed by the shrinker and not impacted + * by page migration. Once freed, they will enter into the page migration + * state machine via the mempools. + */ + if (kbase_is_page_migration_enabled()) + kbase_set_phy_alloc_page_status(kctx, reg->gpu_alloc, NOT_MOVABLE); + + kbase_gpu_vm_unlock_with_pmode_sync(kctx); /* * Add the allocation to the eviction list and the jit pool, after this @@ -3286,14 +3472,9 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) list_move(®->jit_node, &kctx->jit_pool_head); - /* Inactive JIT regions should be freed by the shrinker and not impacted - * by page migration. Once freed, they will enter into the page migration - * state machine via the mempools. - */ - if (kbase_is_page_migration_enabled()) - kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE); mutex_unlock(&kctx->jit_evict_lock); } +KBASE_EXPORT_TEST_API(kbase_jit_free); void kbase_jit_backing_lost(struct kbase_va_region *reg) { @@ -3356,8 +3537,7 @@ void kbase_jit_term(struct kbase_context *kctx) struct kbase_va_region *walker; /* Free all allocations for this context */ - - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); mutex_lock(&kctx->jit_evict_lock); /* Free all allocations from the pool */ while (!list_empty(&kctx->jit_pool_head)) { @@ -3398,7 +3578,7 @@ void kbase_jit_term(struct kbase_context *kctx) WARN_ON(kctx->jit_phys_pages_to_be_allocated); #endif mutex_unlock(&kctx->jit_evict_lock); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); /* * Flush the freeing of allocations whose backing has been freed @@ -3916,9 +4096,6 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { user_buf_original_state = reg->gpu_alloc->imported.user_buf.state; - if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && (!reg->gpu_alloc->nents)) - return -EINVAL; - /* This function is reachable through many code paths, and the imported * memory handle could be in any of the possible states: consider all * of them as a valid starting point, and progress through all stages @@ -3928,19 +4105,31 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi * Error recovery restores the original state and goes no further. */ switch (user_buf_original_state) { - case KBASE_USER_BUF_STATE_EMPTY: - case KBASE_USER_BUF_STATE_PINNED: + case KBASE_USER_BUF_STATE_EMPTY: { + if (reg->gpu_alloc->imported.user_buf.mm != locked_mm) + return -EINVAL; + err = kbase_user_buf_from_empty_to_gpu_mapped(kctx, reg); + break; + } + case KBASE_USER_BUF_STATE_PINNED: { + if (!reg->gpu_alloc->nents) + return -EINVAL; + err = kbase_user_buf_from_pinned_to_gpu_mapped(kctx, reg); + break; + } case KBASE_USER_BUF_STATE_DMA_MAPPED: { - if (user_buf_original_state == KBASE_USER_BUF_STATE_EMPTY) - err = kbase_user_buf_from_empty_to_gpu_mapped(kctx, reg); - else if (user_buf_original_state == KBASE_USER_BUF_STATE_PINNED) - err = kbase_user_buf_from_pinned_to_gpu_mapped(kctx, reg); - else - err = kbase_user_buf_from_dma_mapped_to_gpu_mapped(kctx, reg); - - if (err) - return err; - + /* If the imported handle has not pinned any physical pages yet: + * this function can only be called within the context of a user + * process, which must be the same process as the one that + * originally created the memory handle. + * + * In all other transitions: make sure that the imported handle + * has already pinned physical pages before proceeding to mapping + * operations. + */ + if (!reg->gpu_alloc->nents) + return -EINVAL; + err = kbase_user_buf_from_dma_mapped_to_gpu_mapped(kctx, reg); break; } case KBASE_USER_BUF_STATE_GPU_MAPPED: { @@ -3954,6 +4143,8 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi reg->gpu_alloc->imported.user_buf.state); return -EINVAL; } + if (err) + return err; /* If the state was valid and the transition is happening, then the handle * must be in GPU_MAPPED state now and the reference counter of GPU mappings @@ -4021,13 +4212,8 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r kbase_va_region_alloc_put(kctx, reg); } -static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg) -{ - return reg->start_pfn << PAGE_SHIFT; -} - -struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(struct kbase_context *kctx, - u64 gpu_addr) +struct kbase_ctx_ext_res_meta * +kbase_sticky_resource_acquire(struct kbase_context *kctx, u64 gpu_addr, struct mm_struct *locked_mm) { struct kbase_ctx_ext_res_meta *meta = NULL; struct kbase_ctx_ext_res_meta *walker; @@ -4066,7 +4252,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(struct kbase_contex /* Map the external resource to the GPU allocation of the region * and acquire the reference to the VA region */ - if (kbase_map_external_resource(kctx, meta->reg, NULL)) + if (kbase_map_external_resource(kctx, meta->reg, locked_mm)) goto fail_map; meta->ref = 1; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h index e4a7d6bd0a30..880b8525ae37 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -104,8 +104,8 @@ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int /* Index of chosen MEMATTR for this region (0..7) */ #define KBASE_REG_MEMATTR_MASK (7ul << 16) -#define KBASE_REG_MEMATTR_INDEX(x) (((x)&7) << 16) -#define KBASE_REG_MEMATTR_VALUE(x) (((x)&KBASE_REG_MEMATTR_MASK) >> 16) +#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16) +#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16) /* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ /* Use GPU implementation-defined caching policy. */ @@ -482,6 +482,26 @@ struct kbase_page_metadata { struct kbase_mmu_table *mmut; /* GPU virtual page frame number info is in GPU_PAGE_SIZE units */ u64 pgd_vpfn_level; +#if GPU_PAGES_PER_CPU_PAGE > 1 + /** + * @pgd_link: Link to the &kbase_mmu_table.pgd_pages_list + */ + struct list_head pgd_link; + /** + * @pgd_page: Back pointer to the PGD page that the metadata is + * associated with + */ + struct page *pgd_page; + /** + * @allocated_sub_pages: Bitmap representing the allocation status + * of sub pages in the @pgd_page + */ + DECLARE_BITMAP(allocated_sub_pages, GPU_PAGES_PER_CPU_PAGE); + /** + * @num_allocated_sub_pages: The number of allocated sub pages in @pgd_page + */ + s8 num_allocated_sub_pages; +#endif } pt_mapped; struct { struct kbase_device *kbdev; @@ -510,6 +530,7 @@ enum kbase_jit_report_flags { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) }; /** * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying * physical allocation. + * @kctx: Pointer to Kbase context. * @alloc: the physical allocation containing the pages whose metadata is going * to be modified * @status: the status the pages should end up in @@ -518,7 +539,7 @@ enum kbase_jit_report_flags { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) }; * proper states are set. Instead, it is only used when we change the allocation * to NOT_MOVABLE or from NOT_MOVABLE to ALLOCATED_MAPPED */ -void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, +void kbase_set_phy_alloc_page_status(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, enum kbase_page_status status); static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) @@ -589,6 +610,11 @@ int kbase_mem_init(struct kbase_device *kbdev); void kbase_mem_halt(struct kbase_device *kbdev); void kbase_mem_term(struct kbase_device *kbdev); +static inline unsigned int kbase_mem_phy_alloc_ref_read(struct kbase_mem_phy_alloc *alloc) +{ + return kref_read(&alloc->kref); +} + static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc) { kref_get(&alloc->kref); @@ -615,9 +641,6 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m * @nr_pages: The size of the region in pages. * @initial_commit: Initial commit, for aligning the start address and * correctly growing KBASE_REG_TILER_ALIGN_TOP regions. - * @threshold_pages: If non-zero and the amount of memory committed to a region - * that can grow on page fault exceeds this number of pages - * then the driver switches to incremental rendering. * @flags: Flags * @extension: Number of pages allocated on page fault. * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region. @@ -654,8 +677,7 @@ struct kbase_va_region { void *user_data; size_t nr_pages; size_t initial_commit; - size_t threshold_pages; - unsigned long flags; + base_mem_alloc_flags flags; size_t extension; struct kbase_mem_phy_alloc *cpu_alloc; struct kbase_mem_phy_alloc *gpu_alloc; @@ -909,10 +931,12 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create(struct kbase_contex atomic_set(&alloc->gpu_mappings, 0); atomic_set(&alloc->kernel_mappings, 0); alloc->nents = 0; - alloc->pages = (void *)(alloc + 1); - /* fill pages with invalid address value */ - for (i = 0; i < nr_pages; i++) - alloc->pages[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS); + if (type != KBASE_MEM_TYPE_ALIAS) { + alloc->pages = (void *)(alloc + 1); + /* fill pages with invalid address value */ + for (i = 0; i < nr_pages; i++) + alloc->pages[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS); + } INIT_LIST_HEAD(&alloc->mappings); alloc->type = type; alloc->group_id = group_id; @@ -1302,7 +1326,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool, const bool alloc_ */ void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p); -bool kbase_check_alloc_flags(unsigned long flags); +bool kbase_check_alloc_flags(struct kbase_context *kctx, unsigned long flags); bool kbase_check_import_flags(unsigned long flags); static inline bool kbase_import_size_is_valid(struct kbase_device *kbdev, u64 va_pages) @@ -1408,12 +1432,30 @@ int kbase_update_region_flags(struct kbase_context *kctx, struct kbase_va_region */ void kbase_gpu_vm_lock(struct kbase_context *kctx); +/** + * kbase_gpu_vm_lock_with_pmode_sync() - Wrapper of kbase_gpu_vm_lock. + * @kctx: KBase context + * + * Same as kbase_gpu_vm_lock for JM GPU. + * Additionally acquire P.mode read-write semaphore for CSF GPU. + */ +void kbase_gpu_vm_lock_with_pmode_sync(struct kbase_context *kctx); + /** * kbase_gpu_vm_unlock() - Release the per-context region list lock * @kctx: KBase context */ void kbase_gpu_vm_unlock(struct kbase_context *kctx); +/** + * kbase_gpu_vm_unlock_with_pmode_sync() - Wrapper of kbase_gpu_vm_unlock. + * @kctx: KBase context + * + * Same as kbase_gpu_vm_unlock for JM GPU. + * Additionally release P.mode read-write semaphore for CSF GPU. + */ +void kbase_gpu_vm_unlock_with_pmode_sync(struct kbase_context *kctx); + int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); /** @@ -1651,7 +1693,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa * * @prealloc_sa: Information about the partial allocation if the amount of memory requested * is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be - * allocated by the caller if kbdev->pagesize_2mb is enabled. + * allocated by the caller if large pages are enabled. * * Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new * pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be @@ -1679,9 +1721,9 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa * This ensures that the pool can be grown to the required size and that the allocation can * complete without another thread using the newly grown pages. * - * If kbdev->pagesize_2mb is enabled and the allocation is >= 2MB, then @pool must be one of the - * pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the - * mempools from alloc->imported.native.kctx->mem_pools.small[]. + * If large (2MiB) pages are enabled and the allocation is >= 2MiB, then @pool + * must be one of the pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it + * must be one of the mempools from alloc->imported.native.kctx->mem_pools.small[]. * * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be pre-allocated because we * must not sleep (due to the usage of kmalloc()) whilst holding pool->pool_lock. @prealloc_sa @@ -1776,8 +1818,8 @@ static inline dma_addr_t kbase_dma_addr_from_tagged(struct tagged_addr tagged_pa phys_addr_t pa = as_phys_addr_t(tagged_pa); struct page *page = pfn_to_page(PFN_DOWN(pa)); dma_addr_t dma_addr = (is_huge(tagged_pa) || is_partial(tagged_pa)) ? - kbase_dma_addr_as_priv(page) : - kbase_dma_addr(page); + kbase_dma_addr_as_priv(page) : + kbase_dma_addr(page); return dma_addr; } @@ -2070,7 +2112,8 @@ bool kbase_has_exec_va_zone(struct kbase_context *kctx); * kbase_map_external_resource - Map an external resource to the GPU. * @kctx: kbase context. * @reg: External resource to map. - * @locked_mm: The mm_struct which has been locked for this operation. + * @locked_mm: The mm_struct which has been locked for this operation, + * or NULL if none is available. * * On successful mapping, the VA region and the gpu_alloc refcounts will be * increased, making it safe to use and store both values directly. @@ -2335,12 +2378,15 @@ int kbase_sticky_resource_init(struct kbase_context *kctx); * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. * @kctx: kbase context. * @gpu_addr: The GPU address of the external resource. + * @locked_mm: The mm_struct which has been locked for this operation, + * or NULL if none is available. * * Return: The metadata object which represents the binding between the * external resource and the kbase context on success or NULL on failure. */ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(struct kbase_context *kctx, - u64 gpu_addr); + u64 gpu_addr, + struct mm_struct *locked_mm); /** * kbase_sticky_resource_release - Release a reference on a sticky resource. @@ -2494,19 +2540,19 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, struct kbase_va_region *reg struct kbase_mem_phy_alloc *alloc); /** - * kbase_mem_do_sync_imported - Sync caches for imported memory + * kbase_sync_imported_umm - Sync caches for imported UMM memory * @kctx: Pointer to the kbase context * @reg: Pointer to the region with imported memory to sync * @sync_fn: The type of sync operation to perform * - * Sync CPU caches for supported (currently only dma-buf (UMM)) memory. + * Sync CPU caches for supported dma-buf (UMM) memory. * Attempting to sync unsupported imported memory types will result in an error * code, -EINVAL. * * Return: 0 on success, or a negative error code. */ -int kbase_mem_do_sync_imported(struct kbase_context *kctx, struct kbase_va_region *reg, - enum kbase_sync_type sync_fn); +int kbase_sync_imported_umm(struct kbase_context *kctx, struct kbase_va_region *reg, + enum kbase_sync_type sync_fn); /** * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to @@ -2595,4 +2641,7 @@ static inline base_mem_alloc_flags kbase_mem_group_id_set(int id) { return BASE_MEM_GROUP_ID_SET(id); } + +bool kbase_is_large_pages_enabled(void); + #endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c index 34d8f990f65c..a32da2645077 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,6 +46,7 @@ #include #include #include +#include #if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \ (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) @@ -82,8 +83,6 @@ #define KBASE_MEM_ION_SYNC_WORKAROUND #endif -#define IR_THRESHOLD_STEPS (256u) - /* * fully_backed_gpf_memory - enable full physical backing of all grow-on-GPU-page-fault * allocations in the kernel. @@ -294,7 +293,7 @@ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, struct kbase_vmap_s } struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, - u64 extension, u64 *flags, u64 *gpu_va, + u64 extension, base_mem_alloc_flags *flags, u64 *gpu_va, enum kbase_caller_mmu_sync_info mmu_sync_info) { struct kbase_va_region *reg; @@ -319,9 +318,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages else dev_dbg(dev, "Keeping requested GPU VA of 0x%llx\n", (unsigned long long)*gpu_va); - if (!kbase_check_alloc_flags(*flags)) { - dev_warn(dev, "%s called with bad flags (%llx)", __func__, - (unsigned long long)*flags); + if (!kbase_check_alloc_flags(kctx, *flags)) { + dev_warn(dev, "%s called with bad flags (%llx)", __func__, *flags); goto bad_flags; } @@ -334,6 +332,12 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } #endif + /* Ensure GPU cached if CPU cached */ + if ((*flags & BASE_MEM_CACHED_CPU) != 0) { + dev_warn_once(dev, "Clearing BASE_MEM_UNCACHED_GPU flag to avoid MMA violation\n"); + *flags &= ~BASE_MEM_UNCACHED_GPU; + } + if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ @@ -405,17 +409,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages *flags &= ~BASE_MEM_CACHED_CPU; if (*flags & BASE_MEM_GROW_ON_GPF) { - unsigned int const ir_threshold = - (unsigned int)atomic_read(&kctx->kbdev->memdev.ir_threshold); - - reg->threshold_pages = - ((va_pages * ir_threshold) + (IR_THRESHOLD_STEPS / 2)) / IR_THRESHOLD_STEPS; - } else - reg->threshold_pages = 0; - - if (*flags & BASE_MEM_GROW_ON_GPF) { - /* kbase_check_alloc_sizes() already checks extension is valid for - * assigning to reg->extension + /* kbase_check_alloc_sizes() already checks extension is valid for assigning to + * reg->extension. */ reg->extension = extension; #if !MALI_USE_CSF @@ -433,7 +428,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } reg->initial_commit = commit_pages; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) { /* Permanent kernel mappings must happen as soon as @@ -443,7 +438,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages */ int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages, commit_pages); if (err < 0) { - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); goto no_kern_mapping; } } @@ -455,7 +450,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages /* Bind to a cookie */ if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { dev_err(dev, "No cookies available for allocation!"); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); goto no_cookie; } /* return a cookie */ @@ -472,7 +467,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } else /* we control the VA */ { size_t align = 1; - if (kctx->kbdev->pagesize_2mb) { + if (kbase_is_large_pages_enabled()) { /* If there's enough (> 33 bits) of GPU VA space, align to 2MB * boundaries. The similar condition is used for mapping from * the SAME_VA zone inside kbase_context_get_unmapped_area(). @@ -490,7 +485,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align, mmu_sync_info) != 0) { dev_warn(dev, "Failed to map memory on GPU"); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); goto no_mmap; } /* return real GPU VA */ @@ -508,7 +503,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); #if MALI_USE_CSF if (*flags & BASE_MEM_FIXABLE) @@ -596,9 +591,11 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co *out |= BASE_MEM_COHERENT_SYSTEM; if (KBASE_REG_SHARE_IN & reg->flags) *out |= BASE_MEM_COHERENT_LOCAL; - if (KBASE_REG_DONT_NEED & reg->flags) - *out |= BASE_MEM_DONT_NEED; - if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) { + if (mali_kbase_supports_query_mem_dont_need(kctx->api_version)) { + if (KBASE_REG_DONT_NEED & reg->flags) + *out |= BASE_MEM_DONT_NEED; + } + if (mali_kbase_supports_query_mem_grow_on_gpf(kctx->api_version)) { /* Prior to this version, this was known about by * user-side but we did not return them. Returning * it caused certain clients that were not expecting @@ -608,7 +605,7 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co if (KBASE_REG_PF_GROW & reg->flags) *out |= BASE_MEM_GROW_ON_GPF; } - if (mali_kbase_supports_mem_protected(kctx->api_version)) { + if (mali_kbase_supports_query_mem_protected(kctx->api_version)) { /* Prior to this version, this was known about by * user-side but we did not return them. Returning * it caused certain clients that were not expecting @@ -634,9 +631,30 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co else *out |= BASE_MEM_FIXABLE; } -#endif +#endif /* MALI_USE_CSF */ if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags) *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE; + if (mali_kbase_supports_query_mem_import_sync_on_map_unmap(kctx->api_version)) { + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + if (reg->gpu_alloc->imported.umm.need_sync) + *out |= BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP; + } + } + if (mali_kbase_supports_query_mem_kernel_sync(kctx->api_version)) { + if (unlikely(reg->cpu_alloc != reg->gpu_alloc)) + *out |= BASE_MEM_KERNEL_SYNC; + } + if (mali_kbase_supports_query_mem_same_va(kctx->api_version)) { + if (kbase_bits_to_zone(reg->flags) == SAME_VA_ZONE) { + /* Imported memory is an edge case, where declaring it SAME_VA + * would be ambiguous. + */ + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM && + reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF) { + *out |= BASE_MEM_SAME_VA; + } + } + } *out |= kbase_mem_group_id_set(reg->cpu_alloc->group_id); @@ -667,7 +685,9 @@ out_unlock: static unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) { - struct kbase_context *kctx = container_of(s, struct kbase_context, reclaim); + struct kbase_context *kctx = + KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_context, reclaim); + int evict_nents = atomic_read(&kctx->evict_nents); unsigned long nr_freeable_items; @@ -717,8 +737,15 @@ static unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s struct kbase_mem_phy_alloc *tmp; unsigned long freed = 0; - kctx = container_of(s, struct kbase_context, reclaim); + kctx = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_context, reclaim); +#if MALI_USE_CSF + if (!down_read_trylock(&kctx->kbdev->csf.mmu_sync_sem)) { + dev_warn(kctx->kbdev->dev, + "Can't shrink GPU memory when P.Mode entrance is in progress"); + return 0; + } +#endif mutex_lock(&kctx->jit_evict_lock); list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { @@ -757,32 +784,36 @@ static unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s } mutex_unlock(&kctx->jit_evict_lock); - +#if MALI_USE_CSF + up_read(&kctx->kbdev->csf.mmu_sync_sem); +#endif return freed; } int kbase_mem_evictable_init(struct kbase_context *kctx) { + struct shrinker *reclaim; + INIT_LIST_HEAD(&kctx->evict_list); mutex_init(&kctx->jit_evict_lock); - kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; - kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; - kctx->reclaim.seeks = DEFAULT_SEEKS; - /* Kernel versions prior to 3.1 : - * struct shrinker does not define batch - */ -#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE - register_shrinker(&kctx->reclaim); -#else - register_shrinker(&kctx->reclaim, "mali-mem"); -#endif + reclaim = KBASE_INIT_RECLAIM(kctx, reclaim, "mali-mem"); + if (!reclaim) + return -ENOMEM; + KBASE_SET_RECLAIM(kctx, reclaim, reclaim); + + reclaim->count_objects = kbase_mem_evictable_reclaim_count_objects; + reclaim->scan_objects = kbase_mem_evictable_reclaim_scan_objects; + reclaim->seeks = DEFAULT_SEEKS; + + KBASE_REGISTER_SHRINKER(reclaim, "mali-mem", kctx); + return 0; } void kbase_mem_evictable_deinit(struct kbase_context *kctx) { - unregister_shrinker(&kctx->reclaim); + KBASE_UNREGISTER_SHRINKER(kctx->reclaim); } /** @@ -849,7 +880,7 @@ void kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) /* Indicate to page migration that the memory can be reclaimed by the shrinker. */ if (kbase_is_page_migration_enabled()) - kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE); + kbase_set_phy_alloc_page_status(kctx, gpu_alloc, NOT_MOVABLE); mutex_unlock(&kctx->jit_evict_lock); kbase_mem_evictable_mark_reclaim(gpu_alloc); @@ -907,7 +938,7 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) * from. */ if (kbase_is_page_migration_enabled()) - kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED); + kbase_set_phy_alloc_page_status(kctx, gpu_alloc, ALLOCATED_MAPPED); } } @@ -925,7 +956,8 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) * * Return: 0 on success, error code otherwise. */ -static int kbase_mem_flags_change_imported_umm(struct kbase_context *kctx, unsigned int flags, +static int kbase_mem_flags_change_imported_umm(struct kbase_context *kctx, + base_mem_alloc_flags flags, struct kbase_va_region *reg) { unsigned int real_flags = 0; @@ -1008,7 +1040,7 @@ static int kbase_mem_flags_change_imported_umm(struct kbase_context *kctx, unsig * * Return: 0 on success, error code otherwise. */ -static int kbase_mem_flags_change_native(struct kbase_context *kctx, unsigned int flags, +static int kbase_mem_flags_change_native(struct kbase_context *kctx, base_mem_alloc_flags flags, struct kbase_va_region *reg) { bool kbase_reg_dont_need_flag = (KBASE_REG_DONT_NEED & reg->flags); @@ -1040,8 +1072,8 @@ static int kbase_mem_flags_change_native(struct kbase_context *kctx, unsigned in return ret; } -int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, - unsigned int mask) +int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, base_mem_alloc_flags flags, + base_mem_alloc_flags mask) { struct kbase_va_region *reg; int ret = -EINVAL; @@ -1058,7 +1090,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in /* Lock down the context, and find the region */ down_write(kbase_mem_get_process_mmap_lock()); - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); @@ -1110,7 +1142,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in } out_unlock: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); up_write(kbase_mem_get_process_mmap_lock()); return ret; @@ -1118,8 +1150,8 @@ out_unlock: #define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS) -int kbase_mem_do_sync_imported(struct kbase_context *kctx, struct kbase_va_region *reg, - enum kbase_sync_type sync_fn) +int kbase_sync_imported_umm(struct kbase_context *kctx, struct kbase_va_region *reg, + enum kbase_sync_type sync_fn) { int ret = -EINVAL; struct dma_buf __maybe_unused *dma_buf; @@ -1317,7 +1349,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg) if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || alloc->imported.umm.need_sync) { if (!kbase_is_region_invalid_or_free(reg)) { - err = kbase_mem_do_sync_imported(kctx, reg, KBASE_SYNC_TO_DEVICE); + err = kbase_sync_imported_umm(kctx, reg, KBASE_SYNC_TO_DEVICE); WARN_ON_ONCE(err); } } @@ -1379,7 +1411,7 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, struct kbase_va_region *reg if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || alloc->imported.umm.need_sync) { if (!kbase_is_region_invalid_or_free(reg)) { - int err = kbase_mem_do_sync_imported(kctx, reg, KBASE_SYNC_TO_CPU); + int err = kbase_sync_imported_umm(kctx, reg, KBASE_SYNC_TO_CPU); WARN_ON_ONCE(err); } } @@ -1431,7 +1463,7 @@ static int get_umm_memory_group_id(struct kbase_context *kctx, struct dma_buf *d * object that wraps the dma-buf. */ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages, - u64 *flags, u32 padding) + base_mem_alloc_flags *flags, u32 padding) { struct kbase_va_region *reg; struct dma_buf *dma_buf; @@ -1577,7 +1609,8 @@ u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev) static struct kbase_va_region *kbase_mem_from_user_buffer(struct kbase_context *kctx, unsigned long address, unsigned long size, - u64 *va_pages, u64 *flags) + u64 *va_pages, + base_mem_alloc_flags *flags) { struct kbase_va_region *reg; enum kbase_memory_zone zone = CUSTOM_VA_ZONE; @@ -1709,7 +1742,7 @@ bad_size: return NULL; } -u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, +u64 kbase_mem_alias(struct kbase_context *kctx, base_mem_alloc_flags *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages) { struct kbase_va_region *reg; @@ -1794,7 +1827,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nent if (!reg->gpu_alloc->imported.alias.aliased) goto no_aliased_array; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* validate and add src handles */ for (i = 0; i < nents; i++) { @@ -1904,7 +1937,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nent reg->flags &= ~KBASE_REG_FREE; reg->flags &= ~KBASE_REG_GROWABLE; - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return gpu_va; @@ -1915,7 +1948,7 @@ bad_handle: * them is handled by putting reg's allocs, so no rollback of those * actions is done here. */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); no_aliased_array: invalid_flags: kbase_mem_phy_alloc_put(reg->cpu_alloc); @@ -1931,7 +1964,8 @@ bad_flags: } int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, - void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, u64 *flags) + void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, + base_mem_alloc_flags *flags) { struct kbase_va_region *reg; @@ -2016,7 +2050,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, if (!reg) goto no_reg; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* mmap needed to setup VA? */ if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) { @@ -2050,13 +2084,13 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, /* clear out private flags */ *flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return 0; no_gpu_va: no_cookie: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); kfree(reg); @@ -2096,7 +2130,7 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, struct kbase_va_re /* Nothing to do */ return; - unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping, + unmap_mapping_range(kctx->filp->f_inode->i_mapping, (loff_t)(gpu_va_start + new_pages) << PAGE_SHIFT, (loff_t)(old_pages - new_pages) << PAGE_SHIFT, 1); } @@ -2142,7 +2176,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) } down_write(kbase_mem_get_process_mmap_lock()); - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); @@ -2250,7 +2284,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) } out_unlock: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); if (read_locked) up_read(kbase_mem_get_process_mmap_lock()); else @@ -2274,11 +2308,16 @@ int kbase_mem_shrink(struct kbase_context *const kctx, struct kbase_va_region *c return -EINVAL; old_pages = kbase_reg_current_backed_size(reg); - if (WARN_ON(old_pages < new_pages)) + if (old_pages < new_pages) { + dev_warn( + kctx->kbdev->dev, + "Requested number of pages (%llu) is larger than the current number of pages (%llu)", + new_pages, old_pages); return -EINVAL; + } delta = old_pages - new_pages; - if (kctx->kbdev->pagesize_2mb) { + if (kbase_is_large_pages_enabled()) { struct tagged_addr *start_free = reg->gpu_alloc->pages + new_pages; /* Move the end of new commited range to a valid location. @@ -2332,7 +2371,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) KBASE_DEBUG_ASSERT(map->kctx); KBASE_DEBUG_ASSERT(map->alloc); - kbase_gpu_vm_lock(map->kctx); + kbase_gpu_vm_lock_with_pmode_sync(map->kctx); if (map->free_on_close) { KBASE_DEBUG_ASSERT(kbase_bits_to_zone(map->region->flags) == SAME_VA_ZONE); @@ -2346,10 +2385,9 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) list_del(&map->mappings_list); kbase_va_region_alloc_put(map->kctx, map->region); - kbase_gpu_vm_unlock(map->kctx); + kbase_gpu_vm_unlock_with_pmode_sync(map->kctx); kbase_mem_phy_alloc_put(map->alloc); - kbase_file_dec_cpu_mapping_count(map->kctx->kfile); kfree(map); } @@ -2549,7 +2587,6 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, struct kbase_va_region *re map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; list_add(&map->mappings_list, &map->alloc->mappings); - kbase_file_inc_cpu_mapping_count(kctx->kfile); out: return err; @@ -2749,7 +2786,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct * goto out; } - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) { /* The non-mapped tracking helper page */ @@ -2784,11 +2821,11 @@ int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct * #endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ #if MALI_USE_CSF case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE): - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); err = kbase_csf_cpu_mmap_user_reg_page(kctx, vma); goto out; case PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE)... PFN_DOWN(BASE_MEM_COOKIE_BASE) - 1: { - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); mutex_lock(&kctx->csf.lock); err = kbase_csf_cpu_mmap_user_io_pages(kctx, vma); mutex_unlock(&kctx->csf.lock); @@ -2882,7 +2919,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct * } #endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ out_unlock: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); out: if (err) dev_err(dev, "mmap failed %d\n", err); @@ -3067,7 +3104,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi return -ENOMEM; } - if (reg->flags & KBASE_REG_DONT_NEED) + if (kbase_is_region_shrinkable(reg)) return -EINVAL; prot = PAGE_KERNEL; @@ -3276,25 +3313,6 @@ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) #endif } -static void kbase_special_vm_open(struct vm_area_struct *vma) -{ - struct kbase_context *kctx = vma->vm_private_data; - - kbase_file_inc_cpu_mapping_count(kctx->kfile); -} - -static void kbase_special_vm_close(struct vm_area_struct *vma) -{ - struct kbase_context *kctx = vma->vm_private_data; - - kbase_file_dec_cpu_mapping_count(kctx->kfile); -} - -static const struct vm_operations_struct kbase_vm_special_ops = { - .open = kbase_special_vm_open, - .close = kbase_special_vm_close, -}; - static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) { if (vma_pages(vma) != 1) @@ -3303,10 +3321,7 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_ /* no real access */ vm_flags_clear(vma, VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO); - vma->vm_ops = &kbase_vm_special_ops; - vma->vm_private_data = kctx; - kbase_file_inc_cpu_mapping_count(kctx->kfile); return 0; } @@ -3367,7 +3382,6 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) struct kbase_device *kbdev; int err; bool reset_prevented = false; - struct kbase_file *kfile; if (!queue) { pr_debug("Close method called for the new User IO pages mapping vma\n"); @@ -3376,7 +3390,6 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) kctx = queue->kctx; kbdev = kctx->kbdev; - kfile = kctx->kfile; err = kbase_reset_gpu_prevent_and_wait(kbdev); if (err) @@ -3394,9 +3407,8 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) if (reset_prevented) kbase_reset_gpu_allow(kbdev); - kbase_file_dec_cpu_mapping_count(kfile); /* Now as the vma is closed, drop the reference on mali device file */ - fput(kfile->filp); + fput(kctx->filp); } #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) @@ -3546,7 +3558,6 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct v /* Also adjust the vm_pgoff */ vma->vm_pgoff = queue->db_file_offset; - kbase_file_inc_cpu_mapping_count(kctx->kfile); return 0; map_failed: @@ -3586,7 +3597,6 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) { struct kbase_context *kctx = vma->vm_private_data; struct kbase_device *kbdev; - struct kbase_file *kfile; if (unlikely(!kctx)) { pr_debug("Close function called for the unexpected mapping"); @@ -3594,7 +3604,6 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) } kbdev = kctx->kbdev; - kfile = kctx->kfile; if (unlikely(!kctx->csf.user_reg.vma)) dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d", @@ -3606,9 +3615,8 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) kctx->csf.user_reg.vma = NULL; - kbase_file_dec_cpu_mapping_count(kfile); /* Now as the VMA is closed, drop the reference on mali device file */ - fput(kfile->filp); + fput(kctx->filp); } /** @@ -3738,7 +3746,6 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct v vma->vm_ops = &kbase_csf_user_reg_vm_ops; vma->vm_private_data = kctx; - kbase_file_inc_cpu_mapping_count(kctx->kfile); return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h index 28666037d8c6..a4b3db7fdf89 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,7 +51,7 @@ struct kbase_hwc_dma_mapping { * Return: 0 on success or error code */ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, - u64 extension, u64 *flags, u64 *gpu_va, + u64 extension, base_mem_alloc_flags *flags, u64 *gpu_va, enum kbase_caller_mmu_sync_info mmu_sync_info); /** @@ -84,7 +84,8 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co * Return: 0 on success or error code */ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, - void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, u64 *flags); + void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, + base_mem_alloc_flags *flags); /** * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more @@ -99,7 +100,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, * * Return: 0 on failure or otherwise the GPU VA for the alias */ -u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, +u64 kbase_mem_alias(struct kbase_context *kctx, base_mem_alloc_flags *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages); /** @@ -112,8 +113,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nent * * Return: 0 on success or error code */ -int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, - unsigned int mask); +int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, base_mem_alloc_flags flags, + base_mem_alloc_flags mask); /** * kbase_mem_commit - Change the physical backing size of a region diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c index 93a07e7db4fa..eecab323f59f 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,6 +28,9 @@ #include #include +/* Static key used to determine if page migration is enabled or not */ +static DEFINE_STATIC_KEY_FALSE(page_migration_static_key); + /* Global integer used to determine if module parameter value has been * provided and if page migration feature is enabled. * Feature is disabled on all platforms by default. @@ -50,15 +53,6 @@ MODULE_PARM_DESC(kbase_page_migration_enabled, KBASE_EXPORT_TEST_API(kbase_page_migration_enabled); -bool kbase_is_page_migration_enabled(void) -{ - /* Handle uninitialised int case */ - if (kbase_page_migration_enabled < 0) - return false; - return IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) && kbase_page_migration_enabled; -} -KBASE_EXPORT_SYMBOL(kbase_is_page_migration_enabled); - #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) static const struct movable_operations movable_ops; #endif @@ -74,6 +68,12 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) return false; + /* Composite large-page is excluded from migration, trigger a warn if a development + * wrongly leads to it. + */ + if (is_huge_head(as_tagged(page_to_phys(p))) || is_partial(as_tagged(page_to_phys(p)))) + dev_WARN(kbdev->dev, "%s: migration-metadata attempted on large-page.", __func__); + page_md = kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL); if (!page_md) return false; @@ -225,7 +225,7 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new * This blocks the CPU page fault handler from remapping pages. * Only MCU's mmut is device wide, i.e. no corresponding kctx. */ - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); ret = kbase_mmu_migrate_page( as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr, @@ -254,7 +254,7 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new dma_unmap_page(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); /* Page fault handler for CPU mapping unblocked. */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return ret; } @@ -293,10 +293,10 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa /* Lock context to protect access to array of pages in physical allocation. * This blocks the CPU page fault handler from remapping pages. */ - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* Unmap the old physical range. */ - unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping, + unmap_mapping_range(kctx->filp->f_inode->i_mapping, (loff_t)(page_md->data.mapped.vpfn / GPU_PAGES_PER_CPU_PAGE) << PAGE_SHIFT, PAGE_SIZE, 1); @@ -332,7 +332,7 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); /* Page fault handler for CPU mapping unblocked. */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return ret; } @@ -685,11 +685,15 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev) * integer for a negative value to see if insmod parameter was * passed in at all (it will override the default negative value). */ - if (kbase_page_migration_enabled < 0) - kbase_page_migration_enabled = kbdev->pagesize_2mb ? 1 : 0; - else + if (kbase_page_migration_enabled < 0) { + if (kbase_is_large_pages_enabled()) + static_branch_inc(&page_migration_static_key); + } else { dev_info(kbdev->dev, "Page migration support explicitly %s at insmod.", kbase_page_migration_enabled ? "enabled" : "disabled"); + if (kbase_page_migration_enabled) + static_branch_inc(&page_migration_static_key); + } spin_lock_init(&mem_migrate->free_pages_lock); INIT_LIST_HEAD(&mem_migrate->free_pages_list); @@ -714,3 +718,9 @@ void kbase_mem_migrate_term(struct kbase_device *kbdev) iput(mem_migrate->inode); #endif } + +bool kbase_is_page_migration_enabled(void) +{ + return static_branch_unlikely(&page_migration_static_key); +} +KBASE_EXPORT_TEST_API(kbase_is_page_migration_enabled); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h index ece8734de792..70c3135a7829 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c index cb862d5b029c..5984730c337c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c @@ -480,7 +480,7 @@ static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, CSTD_UNUSED(sc); - pool = container_of(s, struct kbase_mem_pool, reclaim); + pool = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_mem_pool, reclaim); kbase_mem_pool_lock(pool); if (pool->dont_reclaim && !pool->dying) { @@ -502,7 +502,7 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, struct kbase_mem_pool *pool; unsigned long freed; - pool = container_of(s, struct kbase_mem_pool, reclaim); + pool = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_mem_pool, reclaim); kbase_mem_pool_lock(pool); if (pool->dont_reclaim && !pool->dying) { @@ -528,6 +528,8 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool unsigned int order, int group_id, struct kbase_device *kbdev, struct kbase_mem_pool *next_pool) { + struct shrinker *reclaim; + if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { return -EINVAL; } @@ -544,18 +546,17 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool spin_lock_init(&pool->pool_lock); INIT_LIST_HEAD(&pool->page_list); - pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; - pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; - pool->reclaim.seeks = DEFAULT_SEEKS; - /* Kernel versions prior to 3.1 : - * struct shrinker does not define batch - */ - pool->reclaim.batch = 0; -#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE - register_shrinker(&pool->reclaim); -#else - register_shrinker(&pool->reclaim, "mali-mem-pool"); -#endif + reclaim = KBASE_INIT_RECLAIM(pool, reclaim, "mali-mem-pool"); + if (!reclaim) + return -ENOMEM; + KBASE_SET_RECLAIM(pool, reclaim, reclaim); + + reclaim->count_objects = kbase_mem_pool_reclaim_count_objects; + reclaim->scan_objects = kbase_mem_pool_reclaim_scan_objects; + reclaim->seeks = DEFAULT_SEEKS; + reclaim->batch = 0; + + KBASE_REGISTER_SHRINKER(reclaim, "mali-mem-pool", pool); pool_dbg(pool, "initialized\n"); @@ -581,7 +582,7 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool) pool_dbg(pool, "terminate()\n"); - unregister_shrinker(&pool->reclaim); + KBASE_UNREGISTER_SHRINKER(pool->reclaim); kbase_mem_pool_lock(pool); pool->max_size = 0; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c index 5e3d1eeb6d28..f9a3788a2ecf 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -121,44 +121,24 @@ static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot(struct memory_group_manag return vmf_insert_pfn_prot(vma, addr, pfn, pgprot); } -/** - * kbase_native_mgm_update_gpu_pte - Native method to modify a GPU page table - * entry - * - * @mgm_dev: The memory group manager the request is being made through. - * @group_id: A physical memory group ID, which must be valid but is not used. - * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. - * @mmu_level: The level of the MMU page table where the page is getting mapped. - * @pte: The prepared page table entry. - * - * This function simply returns the @pte without modification. - * - * Return: A GPU page table entry to be stored in a page table. - */ static u64 kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev, - unsigned int group_id, int mmu_level, u64 pte) + unsigned int group_id, unsigned int pbha_id, + unsigned int pte_flags, int mmu_level, u64 pte) { - CSTD_UNUSED(mgm_dev); - CSTD_UNUSED(group_id); - CSTD_UNUSED(mmu_level); + if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return pte; + + if ((pte_flags & BIT(MMA_VIOLATION)) && pbha_id) { + pr_warn_once("MMA violation! Applying PBHA override workaround to PTE\n"); + pte |= ((u64)pbha_id << PTE_PBHA_SHIFT) & PTE_PBHA_MASK; + } + + /* Address could be translated into a different bus address here */ + pte |= ((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT); return pte; } -/** - * kbase_native_mgm_pte_to_original_pte - Native method to undo changes done in - * kbase_native_mgm_update_gpu_pte() - * - * @mgm_dev: The memory group manager the request is being made through. - * @group_id: A physical memory group ID, which must be valid but is not used. - * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. - * @mmu_level: The level of the MMU page table where the page is getting mapped. - * @pte: The prepared page table entry. - * - * This function simply returns the @pte without modification. - * - * Return: A GPU page table entry to be stored in a page table. - */ static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_device *mgm_dev, unsigned int group_id, int mmu_level, u64 pte) { @@ -166,17 +146,32 @@ static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_devi CSTD_UNUSED(group_id); CSTD_UNUSED(mmu_level); + /* Undo the group ID modification */ + pte &= ~PTE_PBHA_MASK; + /* Undo the bit set */ + pte &= ~((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT); + return pte; } +static bool kbase_native_mgm_get_import_memory_cached_access_permitted( + struct memory_group_manager_device *mgm_dev, + struct memory_group_manager_import_data *import_data) +{ + CSTD_UNUSED(mgm_dev); + CSTD_UNUSED(import_data); + + return true; +} + struct memory_group_manager_device kbase_native_mgm_dev = { - .ops = { - .mgm_alloc_page = kbase_native_mgm_alloc, - .mgm_free_page = kbase_native_mgm_free, - .mgm_get_import_memory_id = NULL, - .mgm_vmf_insert_pfn_prot = kbase_native_mgm_vmf_insert_pfn_prot, - .mgm_update_gpu_pte = kbase_native_mgm_update_gpu_pte, - .mgm_pte_to_original_pte = kbase_native_mgm_pte_to_original_pte, - }, + .ops = { .mgm_alloc_page = kbase_native_mgm_alloc, + .mgm_free_page = kbase_native_mgm_free, + .mgm_get_import_memory_id = NULL, + .mgm_vmf_insert_pfn_prot = kbase_native_mgm_vmf_insert_pfn_prot, + .mgm_update_gpu_pte = kbase_native_mgm_update_gpu_pte, + .mgm_pte_to_original_pte = kbase_native_mgm_pte_to_original_pte, + .mgm_get_import_memory_cached_access_permitted = + kbase_native_mgm_get_import_memory_cached_access_permitted }, .data = NULL }; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c index 341ea901e2e1..ea79811ea293 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pbha.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -220,6 +220,24 @@ void kbase_pbha_write_settings(struct kbase_device *kbdev) for (i = 0; i < GPU_SYSC_ALLOC_COUNT; ++i) kbase_reg_write32(kbdev, GPU_SYSC_ALLOC_OFFSET(i), kbdev->sysc_alloc[i]); } + + if (kbdev->mma_wa_id) { + /* PBHA OVERRIDE register index (0-3) */ + uint reg_index = kbdev->mma_wa_id >> 2; + /* PBHA index within a PBHA OVERRIDE register (0-3) */ + uint pbha_index = kbdev->mma_wa_id & 0x3; + /* 4 bits of read attributes + 4 bits of write attributes for each PBHA */ + uint pbha_shift = pbha_index * 8; + /* Noncacheable read = noncacheable write = b0001*/ + uint pbha_override_rw_noncacheable = 0x01 | 0x10; + + u32 pbha_override_val = + kbase_reg_read32(kbdev, GPU_SYSC_PBHA_OVERRIDE_OFFSET(reg_index)); + pbha_override_val &= ~((u32)0xFF << pbha_shift); + pbha_override_val |= ((u32)pbha_override_rw_noncacheable << pbha_shift); + kbase_reg_write32(kbdev, GPU_SYSC_PBHA_OVERRIDE_OFFSET(reg_index), + pbha_override_val); + } #else CSTD_UNUSED(kbdev); #endif /* MALI_USE_CSF */ @@ -277,16 +295,16 @@ static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev, static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, const struct device_node *pbha_node) { - u32 bits = 0; + u8 bits = 0; int err; - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) + if (!kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_PBHA_HWU)) return 0; - err = of_property_read_u32(pbha_node, "propagate-bits", &bits); + err = of_property_read_u8(pbha_node, "propagate-bits", &bits); if (err == -EINVAL) { - err = of_property_read_u32(pbha_node, "propagate_bits", &bits); + err = of_property_read_u8(pbha_node, "propagate_bits", &bits); } if (err < 0) { @@ -310,6 +328,43 @@ static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, kbdev->pbha_propagate_bits = bits; return 0; } + +static int kbase_pbha_read_mma_wa_id_property(struct kbase_device *kbdev, + const struct device_node *pbha_node) +{ + u32 mma_wa_id = 0; + int err; + + /* Skip if kbdev->mma_wa_id has already been set via the module parameter */ + if ((kbdev->gpu_props.gpu_id.arch_id < GPU_ID_ARCH_MAKE(14, 8, 0)) || kbdev->mma_wa_id != 0) + return 0; + + err = of_property_read_u32(pbha_node, "mma-wa-id", &mma_wa_id); + + /* Property does not exist. This is not a mandatory property, ignore this error */ + if (err == -EINVAL) + return 0; + + if (err == -ENODATA) { + dev_err(kbdev->dev, "DTB property mma-wa-id has no value\n"); + return err; + } + + if (err == -EOVERFLOW) { + dev_err(kbdev->dev, "DTB value for mma-wa-id is out of range\n"); + return err; + } + + if (mma_wa_id == 0 || mma_wa_id > 15) { + dev_err(kbdev->dev, + "Invalid DTB value for mma-wa-id: %u. Valid range is between 1 and 15.\n", + mma_wa_id); + return -EINVAL; + } + + kbdev->mma_wa_id = mma_wa_id; + return 0; +} #endif /* MALI_USE_CSF */ int kbase_pbha_read_dtb(struct kbase_device *kbdev) @@ -331,6 +386,12 @@ int kbase_pbha_read_dtb(struct kbase_device *kbdev) return err; err = kbase_pbha_read_propagate_bits_property(kbdev, pbha_node); + + if (err < 0) + return err; + + err = kbase_pbha_read_mma_wa_id_property(kbdev, pbha_node); + return err; #else return 0; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c index f1d2794dd86a..81f2df5ea977 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,8 +50,8 @@ static int int_id_overrides_show(struct seq_file *sfile, void *data) #endif /* MALI_USE_CSF */ for (j = 0; j < sizeof(u32); ++j) { - u8 r_val; - u8 w_val; + u8 r_val = 0; + u8 w_val = 0; switch (j) { case 0: @@ -234,7 +234,7 @@ void kbase_pbha_debugfs_init(struct kbase_device *kbdev) debugfs_create_file("int_id_overrides", mode, debugfs_pbha_dir, kbdev, &pbha_int_id_overrides_fops); #if MALI_USE_CSF - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_PBHA_HWU)) debugfs_create_file("propagate_bits", mode, debugfs_pbha_dir, kbdev, &pbha_propagate_bits_fops); #endif /* MALI_USE_CSF */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.c b/drivers/gpu/arm/bifrost/mali_kbase_pm.c index ff71524eeaaa..6719a120c1f3 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,9 +31,7 @@ #include #include -#ifdef CONFIG_MALI_ARBITER_SUPPORT #include -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ #include @@ -52,22 +50,21 @@ void kbase_pm_context_active(struct kbase_device *kbdev) (void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); } -int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, - enum kbase_pm_suspend_handler suspend_handler) +int kbase_pm_context_active_handle_suspend_locked(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler) { int c; KBASE_DEBUG_ASSERT(kbdev != NULL); dev_dbg(kbdev->dev, "%s - reason = %d, pid = %d\n", __func__, suspend_handler, current->pid); - kbase_pm_lock(kbdev); + lockdep_assert_held(&kbdev->pm.lock); -#ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler)) { - kbase_pm_unlock(kbdev); + /* If there is an Arbiter, wait for Arbiter to grant GPU back to KBase + * so suspend request can be handled. + */ + if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler)) return 1; - } -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ if (kbase_pm_is_suspending(kbdev)) { switch (suspend_handler) { @@ -76,7 +73,6 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, break; fallthrough; case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: - kbase_pm_unlock(kbdev); return 1; case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: @@ -94,27 +90,35 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, * any cores requested by the policy */ kbase_hwaccess_pm_gpu_active(kbdev); -#ifdef CONFIG_MALI_ARBITER_SUPPORT kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_REF_EVENT); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ kbase_clk_rate_trace_manager_gpu_active(kbdev); } - kbase_pm_unlock(kbdev); dev_dbg(kbdev->dev, "%s %d\n", __func__, kbdev->pm.active_count); return 0; } +int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler) +{ + int ret; + + kbase_pm_lock(kbdev); + ret = kbase_pm_context_active_handle_suspend_locked(kbdev, suspend_handler); + kbase_pm_unlock(kbdev); + + return ret; +} + KBASE_EXPORT_TEST_API(kbase_pm_context_active); -void kbase_pm_context_idle(struct kbase_device *kbdev) +void kbase_pm_context_idle_locked(struct kbase_device *kbdev) { int c; KBASE_DEBUG_ASSERT(kbdev != NULL); - - kbase_pm_lock(kbdev); + lockdep_assert_held(&kbdev->pm.lock); c = --kbdev->pm.active_count; KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_IDLE, NULL, (u64)c); @@ -133,10 +137,16 @@ void kbase_pm_context_idle(struct kbase_device *kbdev) wake_up(&kbdev->pm.zero_active_count_wait); } - kbase_pm_unlock(kbdev); dev_dbg(kbdev->dev, "%s %d (pid = %d)\n", __func__, kbdev->pm.active_count, current->pid); } +void kbase_pm_context_idle(struct kbase_device *kbdev) +{ + kbase_pm_lock(kbdev); + kbase_pm_context_idle_locked(kbdev); + kbase_pm_unlock(kbdev); +} + KBASE_EXPORT_TEST_API(kbase_pm_context_idle); static void reenable_hwcnt_on_resume(struct kbase_device *kbdev) @@ -155,7 +165,12 @@ static void reenable_hwcnt_on_resume(struct kbase_device *kbdev) #endif /* Resume HW counters intermediaries. */ - kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx); +#if MALI_USE_CSF + if (kbdev->csf.firmware_inited) +#endif + { + kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx); + } } static void resume_job_scheduling(struct kbase_device *kbdev) @@ -183,7 +198,12 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev) /* Suspend HW counter intermediaries. This blocks until workers and timers * are no longer running. */ - kbase_kinstr_prfcnt_suspend(kbdev->kinstr_prfcnt_ctx); +#if MALI_USE_CSF + if (kbdev->csf.firmware_inited) +#endif + { + kbase_kinstr_prfcnt_suspend(kbdev->kinstr_prfcnt_ctx); + } /* Disable GPU hardware counters. * This call will block until counters are disabled. @@ -199,21 +219,24 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev) kbdev->pm.suspending = true; mutex_unlock(&kbdev->pm.lock); -#ifdef CONFIG_MALI_ARBITER_SUPPORT -#if !MALI_USE_CSF - if (kbdev->arb.arb_if) { - unsigned int i; + if (kbase_has_arbiter(kbdev)) { unsigned long flags; +#if MALI_USE_CSF + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_disjoint_state_up(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#else + unsigned int i; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->js_data.runpool_irq.submit_allowed = 0; kbase_disjoint_state_up(kbdev); for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) kbase_job_slot_softstop(kbdev, i, NULL); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#endif } -#endif /* !MALI_USE_CSF */ -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ /* From now on, the active count will drop towards zero. Sometimes, * it'll go up briefly before going down again. However, once @@ -259,19 +282,21 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev) */ if (kbase_hwaccess_pm_suspend(kbdev)) { /* No early return yet */ - if (IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT)) + if (kbase_has_arbiter(kbdev)) WARN_ON_ONCE(1); else goto exit; } -#ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbdev->arb.arb_if) { + if (kbase_has_arbiter(kbdev)) { mutex_lock(&kbdev->pm.arb_vm_state->vm_state_lock); kbase_arbiter_pm_vm_stopped(kbdev); mutex_unlock(&kbdev->pm.arb_vm_state->vm_state_lock); } -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +#if MALI_USE_CSF + kbase_backend_invalidate_gpu_timestamp_offset(kbdev); +#endif return 0; @@ -307,14 +332,13 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) kbase_hwaccess_pm_resume(kbdev); /* Initial active call, to power on the GPU/cores if needed */ -#ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbase_pm_context_active_handle_suspend( - kbdev, (arb_gpu_start ? KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : - KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE))) - return; -#else - kbase_pm_context_active(kbdev); -#endif + if (kbase_has_arbiter(kbdev)) { + if (kbase_pm_context_active_handle_suspend( + kbdev, (arb_gpu_start ? KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE))) + return; + } else + kbase_pm_context_active(kbdev); resume_job_scheduling(kbdev); @@ -338,26 +362,19 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) int kbase_pm_suspend(struct kbase_device *kbdev) { int result = 0; -#ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbdev->arb.arb_if) + + if (kbase_has_arbiter(kbdev)) kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_SUSPEND_EVENT); else result = kbase_pm_driver_suspend(kbdev); -#else - result = kbase_pm_driver_suspend(kbdev); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ return result; } void kbase_pm_resume(struct kbase_device *kbdev) { -#ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbdev->arb.arb_if) + if (kbase_has_arbiter(kbdev)) kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_RESUME_EVENT); else kbase_pm_driver_resume(kbdev, false); -#else - kbase_pm_driver_resume(kbdev, false); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_pm.h index 46db4db5ffe0..25e4732a8d08 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,13 +33,12 @@ struct kbase_device; #define PM_ENABLE_IRQS 0x01 #define PM_HW_ISSUES_DETECT 0x02 -#ifdef CONFIG_MALI_ARBITER_SUPPORT -/* In the case that the GPU was granted by the Arbiter, it will have +/* Case 1: the GPU was granted by the Arbiter, it will have * already been reset. The following flag ensures it is not reset * twice. + * Case 2: GPU already in reset state after power on, then no soft-reset is needed. */ #define PM_NO_RESET 0x04 -#endif /** * kbase_pm_init - Initialize the power management framework. @@ -121,12 +120,10 @@ enum kbase_pm_suspend_handler { * (e.g. guarantee it's going to be idled very soon after) */ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE, -#ifdef CONFIG_MALI_ARBITER_SUPPORT /** Special case when Arbiter has notified we can use GPU. * Active count should always start at 0 in this case. */ KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED, -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ }; /** @@ -148,6 +145,18 @@ enum kbase_pm_suspend_handler { int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler); +/** + * kbase_pm_context_active_handle_suspend_locked - Same as kbase_pm_context_active_handle_suspend(), + * except that pm.lock is held by the caller. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @suspend_handler: The handler code for how to handle a suspend that might occur + * + * Return: 0 on success, non-zero othrewise. + */ +int kbase_pm_context_active_handle_suspend_locked(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler); + /** * kbase_pm_context_idle - Decrement the reference count of active contexts. * @@ -159,6 +168,14 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, */ void kbase_pm_context_idle(struct kbase_device *kbdev); +/** + * kbase_pm_context_idle_locked - Same as kbase_pm_context_idle(), except that + * pm.lock is held by the caller. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_context_idle_locked(struct kbase_device *kbdev); + /* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline * function */ @@ -215,7 +232,7 @@ void kbase_pm_vsync_callback(int buffer_updated, void *data); * kbase components to complete the suspend. * * Despite kbase_pm_suspend(), it will ignore to update Arbiter - * status if MALI_ARBITER_SUPPORT is enabled. + * status if there is one. * * @note the mechanisms used here rely on all user-space threads being frozen * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up @@ -239,11 +256,10 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev); * Also called when using VM arbiter, when GPU access has been granted. * * Despite kbase_pm_resume(), it will ignore to update Arbiter - * status if MALI_ARBITER_SUPPORT is enabled. + * status if there is one. */ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start); -#ifdef CONFIG_MALI_ARBITER_SUPPORT /** * kbase_pm_handle_gpu_lost() - Handle GPU Lost for the VM * @kbdev: Device pointer @@ -254,6 +270,5 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start); * Kill any running tasks and put the driver into a GPU powered-off state. */ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ #endif /* _KBASE_PM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_reg_track.c b/drivers/gpu/arm/bifrost/mali_kbase_reg_track.c index 3128292a9a30..e490a2a3d179 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_reg_track.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_reg_track.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -648,7 +648,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev, struct kbase_va_regio } else if (!kbase_is_region_free(tmp)) { dev_warn( dev, - "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", + "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%llx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages); err = -ENOMEM; goto exit; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c index 0cee2f0e6fd5..bae1630c94a9 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -143,9 +143,8 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) * delay suspend until we process the atom (which may be at the end of a * long chain of dependencies */ -#ifdef CONFIG_MALI_ARBITER_SUPPORT - atomic_inc(&kctx->kbdev->pm.gpu_users_waiting); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + if (kbase_has_arbiter(kctx->kbdev)) + atomic_inc(&kctx->kbdev->pm.gpu_users_waiting); pm_active_err = kbase_pm_context_active_handle_suspend( kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); if (pm_active_err) { @@ -163,11 +162,8 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) kbasep_add_waiting_soft_job(katom); return pm_active_err; - } -#ifdef CONFIG_MALI_ARBITER_SUPPORT - else + } else if (kbase_has_arbiter(kctx->kbdev)) atomic_dec(&kctx->kbdev->pm.gpu_users_waiting); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, &ts); @@ -553,7 +549,7 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) goto out_cleanup; } - ret = copy_from_user(user_buffers, user_structs, sizeof(*user_buffers) * nr); + ret = copy_from_user(user_buffers, user_structs, size_mul(sizeof(*user_buffers), nr)); if (ret) { ret = -EFAULT; goto out_cleanup; @@ -1235,7 +1231,7 @@ static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) goto free_info; } - if (copy_from_user(ids, data, sizeof(*ids) * count) != 0) { + if (copy_from_user(ids, data, size_mul(sizeof(*ids), count)) != 0) { ret = -EINVAL; goto free_info; } @@ -1408,7 +1404,7 @@ static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) gpu_addr = ext_res->ext_res[i].ext_resource & ~(__u64)BASE_EXT_RES_ACCESS_EXCLUSIVE; if (map) { - if (!kbase_sticky_resource_acquire(katom->kctx, gpu_addr)) + if (!kbase_sticky_resource_acquire(katom->kctx, gpu_addr, NULL)) goto failed_loop; } else { if (!kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr)) @@ -1688,9 +1684,8 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) if (kbase_process_soft_job(katom_iter) == 0) { kbase_finish_soft_job(katom_iter); resched |= kbase_jd_done_nolock(katom_iter, true); -#ifdef CONFIG_MALI_ARBITER_SUPPORT - atomic_dec(&kbdev->pm.gpu_users_waiting); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + if (kbase_has_arbiter(kctx->kbdev)) + atomic_dec(&kbdev->pm.gpu_users_waiting); } mutex_unlock(&kctx->jctx.lock); } diff --git a/drivers/gpu/arm/bifrost/mmu/Kbuild b/drivers/gpu/arm/bifrost/mmu/Kbuild index 416432397b5c..3c3defdb88e9 100644 --- a/drivers/gpu/arm/bifrost/mmu/Kbuild +++ b/drivers/gpu/arm/bifrost/mmu/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -21,10 +21,15 @@ bifrost_kbase-y += \ mmu/mali_kbase_mmu.o \ mmu/mali_kbase_mmu_hw_direct.o \ + mmu/mali_kbase_mmu_faults_decoder_luts.o \ + mmu/mali_kbase_mmu_faults_decoder.o \ mmu/mali_kbase_mmu_mode_aarch64.o ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) - bifrost_kbase-y += mmu/backend/mali_kbase_mmu_csf.o + bifrost_kbase-y += mmu/backend/mali_kbase_mmu_csf.o \ + mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.o else - bifrost_kbase-y += mmu/backend/mali_kbase_mmu_jm.o + bifrost_kbase-y += mmu/backend/mali_kbase_mmu_jm.o \ + mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.o + endif diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c index df027c727a2c..196d481d6827 100644 --- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,7 @@ #include #include #include +#include void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup *const setup) { @@ -99,15 +100,22 @@ void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, struct u32 as_no; /* terminal fault, print info about the fault */ - dev_err(kbdev->dev, - "Unexpected Page fault in firmware address space at VA 0x%016llX\n" - "raw fault status: 0x%X\n" - "exception type 0x%X: %s\n" - "access type 0x%X: %s\n" - "source id 0x%X\n", - fault->addr, fault->status, exception_type, - kbase_gpu_exception_name(exception_type), access_type, - kbase_gpu_access_type_name(fault->status), source_id); + if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) { + dev_err(kbdev->dev, + "Unexpected Page fault in firmware address space at VA 0x%016llX\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n", + fault->addr, fault->status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(fault->status), source_id, + FAULT_SOURCE_ID_CORE_ID_GET(source_id), + FAULT_SOURCE_ID_UTLB_ID_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, access_type)); + } kbase_debug_csf_fault_notify(kbdev, NULL, DF_GPU_PAGE_FAULT); @@ -139,17 +147,25 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbas const uintptr_t fault_addr = fault->addr; /* terminal fault, print info about the fault */ - dev_err(kbdev->dev, - "GPU bus fault in AS%u at PA %pK\n" - "PA_VALID: %s\n" - "raw fault status: 0x%X\n" - "exception type 0x%X: %s\n" - "access type 0x%X: %s\n" - "source id 0x%X\n" - "pid: %d\n", - as_no, (void *)fault_addr, addr_valid, status, exception_type, - kbase_gpu_exception_name(exception_type), access_type, - kbase_gpu_access_type_name(access_type), source_id, kctx->pid); + if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) { + dev_err(kbdev->dev, + "GPU bus fault in AS%u at PA %pK\n" + "PA_VALID: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n" + "pid: %d\n", + as_no, (void *)fault_addr, addr_valid, status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(access_type), source_id, + FAULT_SOURCE_ID_CORE_ID_GET(source_id), + FAULT_SOURCE_ID_UTLB_ID_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, access_type), + kctx->pid); + } /* AS transaction begin */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -195,17 +211,26 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as unsigned int as_no = as->number; /* terminal fault, print info about the fault */ - dev_err(kbdev->dev, - "Unhandled Page fault in AS%u at VA 0x%016llX\n" - "Reason: %s\n" - "raw fault status: 0x%X\n" - "exception type 0x%X: %s\n" - "access type 0x%X: %s\n" - "source id 0x%X\n" - "pid: %d\n", - as_no, fault->addr, reason_str, status, exception_type, - kbase_gpu_exception_name(exception_type), access_type, - kbase_gpu_access_type_name(status), source_id, kctx->pid); + if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) { + dev_err(kbdev->dev, + "Unhandled Page fault in AS%u at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n" + "pid: %d\n", + as_no, fault->addr, reason_str, status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(status), source_id, + FAULT_SOURCE_ID_CORE_ID_GET(source_id), + FAULT_SOURCE_ID_UTLB_ID_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, + access_type), + kctx->pid); + } } /* AS transaction begin */ @@ -214,6 +239,14 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as * will abort all jobs and stop any hw counter dumping */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* Update the page fault counter value in firmware visible memory, just before disabling + * the MMU which would in turn unblock the MCU firmware. + */ + if (kbdev->csf.page_fault_cnt_ptr) { + spin_lock(&kbdev->mmu_mask_change); + *kbdev->csf.page_fault_cnt_ptr = ++kbdev->csf.page_fault_cnt; + spin_unlock(&kbdev->mmu_mask_change); + } kbase_mmu_disable(kctx); kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT); kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT); @@ -407,15 +440,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); } -int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, struct kbase_va_region *const reg) -{ - CSTD_UNUSED(kctx); - CSTD_UNUSED(reg); - - /* Can't soft-stop the provoking job */ - return -EPERM; -} - /** * kbase_mmu_gpu_fault_worker() - Process a GPU fault for the device. * diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.c new file mode 100644 index 000000000000..d8eec91ba887 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: Base kernel MMU faults decoder for CSF GPUs. + */ + +#include + +#define GPU_ID_ARCH_ID_MAJOR_GET(gpu_id) ((gpu_id >> 16) & 0xFF) +#define GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id) (gpu_id & 0xFFFF) +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) + +struct decode_lut_element { + u16 arch_minor_rev; + u16 key; + const char *text; +}; + +static const char *decode_lut_element_lookup(u16 arch_minor_rev, u16 key, + struct decode_lut_element *decode_element_lut, + unsigned int lut_len) +{ + struct decode_lut_element *p; + + for (p = decode_element_lut; p < decode_element_lut + lut_len; p++) { + if (p->key == key && + (p->arch_minor_rev == 0xffff || p->arch_minor_rev == arch_minor_rev)) + break; + } + if (p < decode_element_lut + lut_len) + return p->text; + else + return "unknown"; +} + +/* Auto-generated code: DO NOT MODIFY! */ + +static struct decode_lut_element lut_fault_source_csf_r_t_major_10[] = { + { 0xFFFF, 0, "pref0" }, + { 0xFFFF, 4, "iter0" }, + { 0xFFFF, 12, "lsu" }, + { 0xFFFF, 13, "mcu" }, +}; + +static struct decode_lut_element lut_fault_source_csf_r_t_major_11[] = { + { 0xFFFF, 0, "pref0" }, + { 0xFFFF, 4, "iter0" }, + { 0xFFFF, 12, "lsu" }, + { 0xFFFF, 13, "mcu" }, +}; + +static struct decode_lut_element lut_fault_source_csf_r_t_major_12[] = { + { 0xFFFF, 0, "pref0" }, + { 0xFFFF, 4, "iter0" }, + { 0xFFFF, 12, "lsu" }, + { 0xFFFF, 13, "mcu" }, +}; + +static struct decode_lut_element lut_fault_source_csf_w_t_major_10[] = { + { 0xFFFF, 8, "pcb0" }, + { 0xFFFF, 12, "lsu" }, + { 0xFFFF, 13, "mcu" }, +}; + +static struct decode_lut_element lut_fault_source_csf_w_t_major_11[] = { + { 0xFFFF, 8, "pcb0" }, + { 0xFFFF, 12, "lsu" }, + { 0xFFFF, 13, "mcu" }, +}; + +static struct decode_lut_element lut_fault_source_csf_w_t_major_12[] = { + { 0xFFFF, 8, "pcb0" }, + { 0xFFFF, 12, "lsu" }, + { 0xFFFF, 13, "mcu" }, +}; + + +const char *decode_fault_source_csf_r_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 10: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_r_t_major_10, + NELEMS(lut_fault_source_csf_r_t_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_r_t_major_11, + NELEMS(lut_fault_source_csf_r_t_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_r_t_major_12, + NELEMS(lut_fault_source_csf_r_t_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_csf_w_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 10: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_w_t_major_10, + NELEMS(lut_fault_source_csf_w_t_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_w_t_major_11, + NELEMS(lut_fault_source_csf_w_t_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_w_t_major_12, + NELEMS(lut_fault_source_csf_w_t_major_12)); + break; + } + return ret; +} diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.h b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.h new file mode 100644 index 000000000000..04f5c02ccc3d --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_CSF_H_ +#define _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_CSF_H_ +#include + +/** + * decode_fault_source_csf_r_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for a read + * operation on a CSF core. + */ +const char *decode_fault_source_csf_r_t(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_csf_w_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for a write + * operation on a CSF core. + */ +const char *decode_fault_source_csf_w_t(u16 idx, u32 gpu_id); + +#endif /* _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.c new file mode 100644 index 000000000000..a053a93978b5 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: Base kernel MMU faults decoder for Job Manager GPUs. + */ + +#include + +#define GPU_ID_ARCH_ID_MAJOR_GET(gpu_id) ((gpu_id >> 16) & 0xFF) +#define GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id) (gpu_id & 0xFFFF) +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) + +struct decode_lut_element { + u16 arch_minor_rev; + u16 key; + const char *text; +}; + +static const char *decode_lut_element_lookup(u16 arch_minor_rev, u16 key, + struct decode_lut_element *decode_element_lut, + unsigned int lut_len) +{ + struct decode_lut_element *p; + + for (p = decode_element_lut; p < decode_element_lut + lut_len; p++) { + if (p->key == key && + (p->arch_minor_rev == 0xffff || p->arch_minor_rev == arch_minor_rev)) + break; + } + if (p < decode_element_lut + lut_len) + return p->text; + else + return "unknown"; +} + +/* Auto-generated code: DO NOT MODIFY! */ + +static struct decode_lut_element lut_fault_source_jm_t_major_9[] = { + { 0xFFFF, 0, "js" }, + { 0xFFFF, 1, "pcm" }, +}; + +const char *decode_fault_source_jm_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_jm_t_major_9, + NELEMS(lut_fault_source_jm_t_major_9)); + break; + } + return ret; +} diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.h b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.h new file mode 100644 index 000000000000..f686e555d86a --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_JM_H_ +#define _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_JM_H_ +#include + +/** + * decode_fault_source_jm_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for a JM core. + */ +const char *decode_fault_source_jm_t(u16 idx, u32 gpu_id); + +#endif /* _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_JM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c index 1b2df11f3c3c..a7f3f40ef325 100644 --- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,7 @@ #include #include #include +#include void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup *const setup) { @@ -52,9 +53,10 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbas struct kbase_fault *fault) { struct kbase_device *const kbdev = kctx->kbdev; - u32 const status = fault->status; - u32 const exception_type = (status & 0xFF); - u32 const exception_data = (status >> 8) & 0xFFFFFF; + const u32 status = fault->status; + const u32 exception_type = AS_FAULTSTATUS_EXCEPTION_TYPE_GET(status); + const u32 access_type = AS_FAULTSTATUS_ACCESS_TYPE_GET(status); + const u32 source_id = AS_FAULTSTATUS_SOURCE_ID_GET(status); unsigned int const as_no = as->number; unsigned long flags; const uintptr_t fault_addr = fault->addr; @@ -64,10 +66,17 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbas "GPU bus fault in AS%u at PA %pK\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" - "exception data 0x%X\n" + "access type 0x%X: %s\n" + "source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n" "pid: %d\n", as_no, (void *)fault_addr, status, exception_type, - kbase_gpu_exception_name(exception_type), exception_data, kctx->pid); + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(access_type), source_id, + FAULT_SOURCE_ID_CORE_ID_GET(source_id), FAULT_SOURCE_ID_UTLB_ID_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, access_type), + kctx->pid); /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter * dumping AS transaction begin @@ -105,22 +114,42 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as if (!kbase_ctx_flag(kctx, KCTX_PAGE_FAULT_REPORT_SKIP)) { /* decode the fault status */ - u32 exception_type = fault->status & 0xFF; - u32 access_type = (fault->status >> 8) & 0x3; - u32 source_id = (fault->status >> 16); - + const u32 status = fault->status; + const u32 exception_type = AS_FAULTSTATUS_EXCEPTION_TYPE_GET(status); + const u32 access_type = AS_FAULTSTATUS_ACCESS_TYPE_GET(status); + const u32 source_id = AS_FAULTSTATUS_SOURCE_ID_GET(status); /* terminal fault, print info about the fault */ - dev_err(kbdev->dev, - "Unhandled Page fault in AS%u at VA 0x%016llX\n" - "Reason: %s\n" - "raw fault status: 0x%X\n" - "exception type 0x%X: %s\n" - "access type 0x%X: %s\n" - "source id 0x%X\n" - "pid: %d\n", - as_no, fault->addr, reason_str, fault->status, exception_type, - kbase_gpu_exception_name(exception_type), access_type, - kbase_gpu_access_type_name(fault->status), source_id, kctx->pid); + if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(9, 0)) { + dev_err(kbdev->dev, + "Unhandled Page fault in AS%u at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "pid: %d\n", + as_no, fault->addr, reason_str, status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(status), kctx->pid); + } else { + dev_err(kbdev->dev, + "Unhandled Page fault in AS%u at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n" + "pid: %d\n", + as_no, fault->addr, reason_str, status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(status), source_id, + FAULT_SOURCE_ID_CORE_ID_GET(source_id), + FAULT_SOURCE_ID_UTLB_ID_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, + access_type), + kctx->pid); + } } /* hardware counters dump fault handling */ @@ -256,7 +285,7 @@ static void validate_protected_page_fault(struct kbase_device *kbdev) */ u32 protected_debug_mode = 0; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { + if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { protected_debug_mode = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) & GPU_STATUS_GPU_DBG_ENABLED; } @@ -372,13 +401,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n", __func__, irq_stat); } -int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, struct kbase_va_region *const reg) -{ - dev_dbg(kctx->kbdev->dev, "Switching to incremental rendering for region %pK\n", - (void *)reg); - return kbase_job_slot_softstop_start_rp(kctx, reg); -} - int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) { kbdev->as[i].number = i; diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c index becbb02aa15a..4963d990054f 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #if !MALI_USE_CSF @@ -58,6 +57,257 @@ /* Macro to convert updated PDGs to flags indicating levels skip in flush */ #define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds)&0xF) +/** + * kmap_pgd() - Map a PGD page and return the address of it + * + * @p: Pointer to the PGD page to be mapped. + * @pgd: The physical address of the PGD. May not be PAGE_SIZE aligned but shall be + * GPU_PAGE_SIZE aligned. + * + * Return: The mapped address of the @pgd, adjusted by the offset of @pgd from the start of page. + */ +static inline void *kmap_pgd(struct page *p, phys_addr_t pgd) +{ +#if GPU_PAGES_PER_CPU_PAGE > 1 + return kbase_kmap(p) + (pgd & ~PAGE_MASK); +#else + CSTD_UNUSED(pgd); + return kbase_kmap(p); +#endif +} + +/** + * kmap_atomic_pgd() - Variant of kmap_pgd for atomic mapping + * + * @p: Pointer to the PGD page to be mapped. + * @pgd: The physical address of the PGD. May not be PAGE_SIZE aligned but shall be + * GPU_PAGE_SIZE aligned. + * + * Return: The mapped address of the @pgd. + */ +static inline void *kmap_atomic_pgd(struct page *p, phys_addr_t pgd) +{ +#if GPU_PAGES_PER_CPU_PAGE > 1 + return kbase_kmap_atomic(p) + (pgd & ~PAGE_MASK); +#else + CSTD_UNUSED(pgd); + return kbase_kmap_atomic(p); +#endif +} + +/** + * kunmap_pgd() - Unmap a PGD page + * + * @p: Pointer to the PGD page to be unmapped. + * @pgd_address: The address of the PGD. May not be PAGE_SIZE aligned but shall be + * GPU_PAGE_SIZE aligned. + */ +static inline void kunmap_pgd(struct page *p, void *pgd_address) +{ + /* It is okay to not align pgd_address to PAGE_SIZE boundary */ + kbase_kunmap(p, pgd_address); +} + +/** + * kunmap_atomic_pgd() - Variant of kunmap_pgd for atomic unmapping + * + * @pgd_address: The address of the PGD. May not be PAGE_SIZE aligned but shall be + * GPU_PAGE_SIZE aligned. + */ +static inline void kunmap_atomic_pgd(void *pgd_address) +{ + /* It is okay to not align pgd_address to PAGE_SIZE boundary */ + kbase_kunmap_atomic(pgd_address); +} + +/** + * pgd_dma_addr() - Return dma addr of a PGD + * + * @p: Pointer to the PGD page. + * @pgd: The physical address of the PGD. + * + * Return: DMA address of the PGD + */ +static inline dma_addr_t pgd_dma_addr(struct page *p, phys_addr_t pgd) +{ +#if GPU_PAGES_PER_CPU_PAGE > 1 + return kbase_page_private(p)->dma_addr + (pgd & ~PAGE_MASK); +#else + CSTD_UNUSED(pgd); + return kbase_dma_addr(p); +#endif +} + +/** + * get_pgd_sub_page_index() - Return the index of a sub PGD page in the PGD page. + * + * @pgd: The physical address of the PGD. + * + * Return: The index value ranging from 0 to (GPU_PAGES_PER_CPU_PAGE - 1) + */ +static inline u32 get_pgd_sub_page_index(phys_addr_t pgd) +{ + return (pgd & ~PAGE_MASK) / GPU_PAGE_SIZE; +} + +#if GPU_PAGES_PER_CPU_PAGE > 1 +/** + * alloc_pgd_page_metadata() - Allocate page metadata for a PGD. + * + * @kbdev: Pointer to the instance of a kbase device. + * @mmut: Structure holding details of the MMU table for a kcontext. + * @p: PGD page. + * + * The PGD page, @p is linked to &kbase_mmu_table.pgd_pages_list for allocating + * sub PGD pages from the list. + * + * Return: True on success. + */ +static bool alloc_pgd_page_metadata(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + struct page *p) +{ + struct kbase_page_metadata *page_md; + + if (!kbase_is_page_migration_enabled()) { + page_md = kmem_cache_zalloc(kbdev->page_metadata_slab, GFP_KERNEL); + if (!page_md) + return false; + + page_md->dma_addr = kbase_dma_addr_as_priv(p); + set_page_private(p, (unsigned long)page_md); + } else { + page_md = kbase_page_private(p); + } + + page_md->data.pt_mapped.num_allocated_sub_pages = 1; + set_bit(0, page_md->data.pt_mapped.allocated_sub_pages); + page_md->data.pt_mapped.pgd_page = p; + list_add(&page_md->data.pt_mapped.pgd_link, &mmut->pgd_pages_list); + + return true; +} + +/** + * free_pgd_page_metadata() - Free page metadata for a PGD. + * + * @kbdev: Pointer to the instance of a kbase device. + * @p: PGD page where the metadata belongs to. + * + * The PGD page, @p is removed from &kbase_mmu_table.pgd_pages_list. + */ +static void free_pgd_page_metadata(struct kbase_device *kbdev, struct page *p) +{ + struct kbase_page_metadata *page_md = kbase_page_private(p); + + WARN_ON_ONCE(page_md->data.pt_mapped.num_allocated_sub_pages); + page_md->data.pt_mapped.pgd_page = NULL; + list_del_init(&page_md->data.pt_mapped.pgd_link); + + if (kbase_is_page_migration_enabled()) + return; + + set_page_private(p, (unsigned long)page_md->dma_addr); + kmem_cache_free(kbdev->page_metadata_slab, page_md); +} + +/** + * allocate_pgd_sub_page() - Allocate a PGD sub page + * + * @page_md: Page metadata of a PGD page where a sub page is allocated from. + * + * Return: Physical address of allocated PGD sub page on success. + * KBASE_INVALID_PHYSICAL_ADDRESS on failure. + */ +static inline phys_addr_t allocate_pgd_sub_page(struct kbase_page_metadata *page_md) +{ + unsigned long sub_page_index; + + if (page_md->data.pt_mapped.num_allocated_sub_pages == GPU_PAGES_PER_CPU_PAGE) + return KBASE_INVALID_PHYSICAL_ADDRESS; + sub_page_index = find_first_zero_bit(page_md->data.pt_mapped.allocated_sub_pages, + GPU_PAGES_PER_CPU_PAGE); + +#ifdef CONFIG_MALI_BIFROST_DEBUG + if (WARN_ON_ONCE(sub_page_index >= GPU_PAGES_PER_CPU_PAGE)) + return KBASE_INVALID_PHYSICAL_ADDRESS; + if (WARN_ON_ONCE(page_md->data.pt_mapped.num_allocated_sub_pages > GPU_PAGES_PER_CPU_PAGE)) + return KBASE_INVALID_PHYSICAL_ADDRESS; +#endif + set_bit(sub_page_index, page_md->data.pt_mapped.allocated_sub_pages); + page_md->data.pt_mapped.num_allocated_sub_pages++; + + return (page_to_phys(page_md->data.pt_mapped.pgd_page) + (sub_page_index * GPU_PAGE_SIZE)); +} + +/** + * free_pgd_sub_page() - Free a PGD sub page + * + * @pgd: Sub PGD to be freed. + * + * Return: The number of remaining allocated sub pages in the PGD. + */ +static int free_pgd_sub_page(phys_addr_t pgd) +{ + struct page *p = pfn_to_page(PFN_DOWN(pgd)); + struct kbase_page_metadata *page_md = kbase_page_private(p); + const u32 sub_page_index = get_pgd_sub_page_index(pgd); + +#ifdef CONFIG_MALI_BIFROST_DEBUG + if (WARN_ON_ONCE(!test_bit(sub_page_index, page_md->data.pt_mapped.allocated_sub_pages))) + return page_md->data.pt_mapped.num_allocated_sub_pages; +#endif + clear_bit(sub_page_index, page_md->data.pt_mapped.allocated_sub_pages); + if (!WARN_ON_ONCE(page_md->data.pt_mapped.num_allocated_sub_pages <= 0)) + page_md->data.pt_mapped.num_allocated_sub_pages--; + + return page_md->data.pt_mapped.num_allocated_sub_pages; +} + +/** + * allocate_from_pgd_pages_list() - Allocate a PGD from the PGD pages list + * + * @mmut: Structure holding details of the MMU table for a kcontext. + * + * Return: Physical address of the allocated PGD. + */ +static inline phys_addr_t allocate_from_pgd_pages_list(struct kbase_mmu_table *mmut) +{ + struct list_head *entry; + phys_addr_t pgd; + + lockdep_assert_held(&mmut->mmu_lock); + + if (unlikely(!mmut->num_free_pgd_sub_pages)) + return KBASE_INVALID_PHYSICAL_ADDRESS; + + if (mmut->last_allocated_pgd_page) { + pgd = allocate_pgd_sub_page(kbase_page_private(mmut->last_allocated_pgd_page)); + if (pgd != KBASE_INVALID_PHYSICAL_ADDRESS) + goto success; + } + + if (mmut->last_freed_pgd_page) { + pgd = allocate_pgd_sub_page(kbase_page_private(mmut->last_freed_pgd_page)); + if (pgd != KBASE_INVALID_PHYSICAL_ADDRESS) + goto success; + } + + list_for_each(entry, &mmut->pgd_pages_list) { + struct kbase_page_metadata *page_md = + list_entry(entry, struct kbase_page_metadata, data.pt_mapped.pgd_link); + + pgd = allocate_pgd_sub_page(page_md); + if (pgd != KBASE_INVALID_PHYSICAL_ADDRESS) + goto success; + } + + return KBASE_INVALID_PHYSICAL_ADDRESS; +success: + mmut->num_free_pgd_sub_pages--; + return pgd; +} +#endif + static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, const u64 start_vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int const group_id, u64 *dirty_pgds, @@ -151,6 +401,44 @@ static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kct spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } +/** + * mmu_invalidate_on_teardown() - Perform an invalidate operation on MMU caches on page + * table teardown. + * @kbdev: The Kbase device. + * @kctx: The Kbase context. + * @vpfn: The virtual page frame number at which teardown is done. + * @num_pages: The number of entries that were invalidated in top most level PGD, that + * was affected by the teardown operation. + * @level: The top most PGD level that was touched on teardown. + * @as_nr: GPU address space number for which invalidate is required. + * + * Perform an MMU invalidate operation after the teardown of top most level PGD on a + * particular address space by issuing a UNLOCK command. + */ +static inline void mmu_invalidate_on_teardown(struct kbase_device *kbdev, + struct kbase_context *kctx, u64 vpfn, + size_t num_pages, int level, int as_nr) +{ + u32 invalidate_range_num_pages = num_pages; + u64 invalidate_range_start_vpfn = vpfn; + struct kbase_mmu_hw_op_param op_param; + + if (level != MIDGARD_MMU_BOTTOMLEVEL) { + invalidate_range_num_pages = 1 << ((3 - level) * 9); + invalidate_range_start_vpfn = vpfn - (vpfn & (invalidate_range_num_pages - 1)); + } + + op_param = (struct kbase_mmu_hw_op_param){ + .vpfn = invalidate_range_start_vpfn, + .nr = invalidate_range_num_pages, + .mmu_sync_info = CALLER_MMU_ASYNC, + .kctx_id = kctx ? kctx->id : 0xFFFFFFFF, + .flush_skip_levels = (1ULL << level) - 1, + }; + + mmu_invalidate(kbdev, kctx, as_nr, &op_param); +} + /* Perform a flush/invalidate on a particular address space */ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, @@ -318,14 +606,16 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb * @mmut: GPU MMU page table. * @pgds: Physical addresses of page directories to be freed. * @vpfn: The virtual page frame number. - * @level: The level of MMU page table. + * @level: The level of MMU page table that needs to be updated. * @flush_op: The type of MMU flush operation to perform. * @dirty_pgds: Flags to track every level where a PGD has been updated. + * @as_nr: GPU address space number for which invalidate is required. */ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, u64 vpfn, int level, - enum kbase_mmu_op_type flush_op, u64 *dirty_pgds); + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, + int as_nr); static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { @@ -377,7 +667,7 @@ static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev, * PGD page, which is done inside kbase_mmu_free_pgd() for the * PGD page that did not get isolated. */ - dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE, + dma_sync_single_for_device(kbdev->dev, pgd_dma_addr(p, page_to_phys(p)), PAGE_SIZE, DMA_BIDIRECTIONAL); kbase_mmu_account_freed_pgd(kbdev, mmut); } @@ -404,6 +694,20 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl lockdep_assert_held(&mmut->mmu_lock); p = pfn_to_page(PFN_DOWN(pgd)); +#if GPU_PAGES_PER_CPU_PAGE > 1 + if (free_pgd_sub_page(pgd)) { + mmut->num_free_pgd_sub_pages++; + mmut->last_freed_pgd_page = p; + return; + } + + mmut->num_free_pgd_sub_pages -= (GPU_PAGES_PER_CPU_PAGE - 1); + if (p == mmut->last_freed_pgd_page) + mmut->last_freed_pgd_page = NULL; + if (p == mmut->last_allocated_pgd_page) + mmut->last_allocated_pgd_page = NULL; + free_pgd_page_metadata(kbdev, p); +#endif page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); if (likely(!page_is_isolated)) { @@ -433,19 +737,19 @@ static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mm lockdep_assert_held(&mmut->mmu_lock); for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++) - kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i])); + kbase_mmu_free_pgd(kbdev, mmut, mmut->scratch_mem.free_pgds.pgds[i]); mmut->scratch_mem.free_pgds.head_index = 0; } -static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p) +static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, phys_addr_t pgd) { lockdep_assert_held(&mmut->mmu_lock); if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1))) return; - mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p; + mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = pgd; } static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut) @@ -626,6 +930,14 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, return; } + if (unlikely(region->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill( + kctx, faulting_as, "Unexpected write permission fault on an alias region", + &faulting_as->pf_data); + return; + } + pfn_offset = fault_pfn - region->start_pfn; fault_phys_addr = &kbase_get_gpu_phy_pages(region)[pfn_offset]; @@ -741,6 +1053,7 @@ static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const si * either small or 2 MiB pages, depending on the number of pages requested. * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true for 2 MiB, false for * pool of small pages. + * @fallback_to_small: Whether fallback to small pages or not * @prealloc_sas: Pointer to kbase_sub_alloc structures * * This function will try to allocate as many pages as possible from the context pool, then if @@ -758,7 +1071,7 @@ static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const si */ static bool page_fault_try_alloc(struct kbase_context *kctx, struct kbase_va_region *region, size_t new_pages, size_t *pages_to_grow, bool *grow_2mb_pool, - struct kbase_sub_alloc **prealloc_sas) + bool fallback_to_small, struct kbase_sub_alloc **prealloc_sas) { size_t total_gpu_pages_alloced = 0; size_t total_cpu_pages_alloced = 0; @@ -776,7 +1089,8 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, struct kbase_va_reg return false; } - if (kctx->kbdev->pagesize_2mb && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) { + if (kbase_is_large_pages_enabled() && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE && + !fallback_to_small) { root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id]; *grow_2mb_pool = true; } else { @@ -923,7 +1237,8 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) int err; bool grown = false; size_t pages_to_grow; - bool grow_2mb_pool; + bool grow_2mb_pool = false; + bool fallback_to_small = false; struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; int i; size_t current_backed_size; @@ -964,13 +1279,11 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) #endif #endif -#ifdef CONFIG_MALI_ARBITER_SUPPORT /* check if we still have GPU */ if (unlikely(kbase_is_gpu_removed(kbdev))) { dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); goto fault_done; } -#endif if (unlikely(fault->protected_mode)) { kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Protected mode fault", fault); @@ -1093,7 +1406,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) } page_fault_retry: - if (kbdev->pagesize_2mb) { + if (kbase_is_large_pages_enabled() && !fallback_to_small) { /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { if (!prealloc_sas[i]) { @@ -1130,6 +1443,14 @@ page_fault_retry: goto fault_done; } + if (unlikely(region->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Unexpected page fault on an alias region", + &faulting_as->pf_data); + goto fault_done; + } + if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Bad physical memory group ID", @@ -1180,10 +1501,14 @@ page_fault_retry: */ op_param.mmu_sync_info = mmu_sync_info; op_param.kctx_id = kctx->id; - /* Can safely skip the invalidate for all levels in case - * of duplicate page faults. + /* Usually it is safe to skip the MMU cache invalidate for all levels + * in case of duplicate page faults. But for the pathological scenario + * where the faulty VA gets mapped by the time page fault worker runs it + * becomes imperative to invalidate MMU cache for all levels, otherwise + * there is a possibility of repeated page faults on GPUs which supports + * fine grained MMU cache invalidation. */ - op_param.flush_skip_levels = 0xF; + op_param.flush_skip_levels = 0x0; op_param.vpfn = fault_pfn; op_param.nr = 1; spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); @@ -1217,10 +1542,14 @@ page_fault_retry: /* See comment [1] about UNLOCK usage */ op_param.mmu_sync_info = mmu_sync_info; op_param.kctx_id = kctx->id; - /* Can safely skip the invalidate for all levels in case - * of duplicate page faults. + /* Usually it is safe to skip the MMU cache invalidate for all levels + * in case of duplicate page faults. But for the pathological scenario + * where the faulty VA gets mapped by the time page fault worker runs it + * becomes imperative to invalidate MMU cache for all levels, otherwise + * there is a possibility of repeated page faults on GPUs which supports + * fine grained MMU cache invalidation. */ - op_param.flush_skip_levels = 0xF; + op_param.flush_skip_levels = 0x0; op_param.vpfn = fault_pfn; op_param.nr = 1; spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); @@ -1249,7 +1578,7 @@ page_fault_retry: spin_lock(&kctx->mem_partials_lock); grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, &grow_2mb_pool, - prealloc_sas); + fallback_to_small, prealloc_sas); spin_unlock(&kctx->mem_partials_lock); if (grown) { @@ -1293,22 +1622,6 @@ page_fault_retry: else trace_mali_mmu_page_fault_grow(region, fault, new_pages); -#if MALI_INCREMENTAL_RENDERING_JM - /* Switch to incremental rendering if we have nearly run out of - * memory in a JIT memory allocation. - */ - if (region->threshold_pages && - kbase_reg_current_backed_size(region) > region->threshold_pages) { - dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu", - new_pages + current_backed_size, region->threshold_pages); - - if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { - dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region); - kbase_va_region_alloc_get(kctx, region); - } - } -#endif - /* AS transaction begin */ /* clear MMU interrupt - this needs to be done after updating @@ -1382,7 +1695,7 @@ page_fault_retry: * Otherwise fail the allocation. */ if (pages_to_grow > 0) { - if (kbdev->pagesize_2mb && grow_2mb_pool) { + if (kbase_is_large_pages_enabled() && grow_2mb_pool) { /* Round page requirement up to nearest 2 MB */ struct kbase_mem_pool *const lp_mem_pool = &kctx->mem_pools.large[group_id]; @@ -1392,6 +1705,15 @@ page_fault_retry: lp_mem_pool->order; ret = kbase_mem_pool_grow(lp_mem_pool, pages_to_grow, kctx->task); + /* Retry handling the fault with small pages if required + * number of 2MB pages couldn't be allocated. + */ + if (ret < 0) { + fallback_to_small = true; + dev_dbg(kbdev->dev, + "No room for 2MB pages, fallback to small pages"); + goto page_fault_retry; + } } else { struct kbase_mem_pool *const mem_pool = &kctx->mem_pools.small[group_id]; @@ -1436,12 +1758,32 @@ fault_done: dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data); } +/** + * kbase_mmu_alloc_pgd() - Allocate a PGD + * + * @kbdev: Pointer to the instance of a kbase device. + * @mmut: Structure holding details of the MMU table for a kcontext. + * + * A 4KB sized PGD page is allocated for the PGD from the memory pool if PAGE_SIZE is 4KB. + * Otherwise PGD is sub-allocated from a page that is allocated from the memory pool or + * from one of the pages earlier allocated for the PGD of @mmut. + * + * Return: Physical address of the allocated PGD. + */ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { u64 *page; struct page *p; phys_addr_t pgd; + lockdep_assert_held(&mmut->mmu_lock); + +#if GPU_PAGES_PER_CPU_PAGE > 1 + pgd = allocate_from_pgd_pages_list(mmut); + if (pgd != KBASE_INVALID_PHYSICAL_ADDRESS) + return pgd; +#endif + p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); if (!p) return KBASE_INVALID_PHYSICAL_ADDRESS; @@ -1451,6 +1793,15 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_ if (page == NULL) goto alloc_free; +#if GPU_PAGES_PER_CPU_PAGE > 1 + if (!alloc_pgd_page_metadata(kbdev, mmut, p)) { + kbase_kunmap(p, page); + goto alloc_free; + } + mmut->num_free_pgd_sub_pages += (GPU_PAGES_PER_CPU_PAGE - 1); + mmut->last_allocated_pgd_page = p; +#endif + pgd = page_to_phys(p); /* If the MMU tables belong to a context then account the memory usage @@ -1469,12 +1820,12 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_ kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1); - kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES); + kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES * GPU_PAGES_PER_CPU_PAGE); /* As this page is newly created, therefore there is no content to * clean or invalidate in the GPU caches. */ - kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE); + kbase_mmu_sync_pgd_cpu(kbdev, pgd_dma_addr(p, pgd), PAGE_SIZE); kbase_kunmap(p, page); return pgd; @@ -1516,7 +1867,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * vpfn &= 0x1FF; p = pfn_to_page(PFN_DOWN(*pgd)); - page = kbase_kmap(p); + page = kmap_pgd(p, *pgd); if (page == NULL) { dev_err(kbdev->dev, "%s: kmap failure", __func__); return -EINVAL; @@ -1525,7 +1876,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) { dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level, vpfn); - kbase_kunmap(p, page); + kunmap_pgd(p, page); return -EFAULT; } else { target_pgd = kbdev->mmu_mode->pte_to_phy_addr( @@ -1533,7 +1884,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn])); } - kbase_kunmap(p, page); + kunmap_pgd(p, page); *pgd = target_pgd; return 0; @@ -1595,6 +1946,7 @@ static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu return err; } +KBASE_ALLOW_ERROR_INJECTION_TEST_API(mmu_get_lowest_valid_pgd, ERRNO); /* * On success, sets out_pgd to the PGD for the specified level of translation @@ -1664,10 +2016,10 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; pgds[level] = pgd; - page = kbase_kmap(p); + page = kmap_pgd(p, pgd); if (mmu_mode->ate_is_valid(page[idx], level)) break; /* keep the mapping */ - kbase_kunmap(p, page); + kunmap_pgd(p, page); pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx])); p = phys_to_page(pgd); @@ -1700,12 +2052,21 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, mmu_mode->entries_invalidate(&page[idx], pcount); if (!num_of_valid_entries) { - kbase_kunmap(p, page); + mmu_mode->set_num_valid_entries(page, 0); - kbase_mmu_add_to_free_pgds_list(mmut, p); + kunmap_pgd(p, page); + + kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level - 1, + KBASE_MMU_OP_NONE, dirty_pgds, 0); + + /* No CPU and GPU cache maintenance is done here as caller would do the + * complete flush of GPU cache and invalidation of TLB before the PGD + * page is freed. CPU cache flush would be done when the PGD page is + * returned to the memory pool. + */ + + kbase_mmu_add_to_free_pgds_list(mmut, pgd); - kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - KBASE_MMU_OP_NONE, dirty_pgds); vpfn += count; continue; } @@ -1716,9 +2077,9 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, * going to be done by the caller */ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)), - kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount, + pgd_dma_addr(p, pgd) + sizeof(u64) * idx, sizeof(u64) * pcount, KBASE_MMU_OP_NONE); - kbase_kunmap(p, page); + kunmap_pgd(p, page); next: vpfn += count; } @@ -1728,7 +2089,8 @@ next: * going to happen to these pages at this stage. They might return * movable once they are returned to a memory pool. */ - if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) { + if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys && + !is_huge(*phys) && !is_partial(*phys)) { const u64 num_pages = (to_vpfn - from_vpfn) / GPU_PAGES_PER_CPU_PAGE; u64 i; @@ -1836,7 +2198,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table goto failure_recovery; } - parent_page_va = kbase_kmap(parent_page); + parent_page_va = kmap_pgd(parent_page, parent_pgd); if (unlikely(parent_page_va == NULL)) { dev_err(kbdev->dev, "%s: kmap failure", __func__); @@ -1848,15 +2210,17 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table kbdev->mmu_mode->entry_set_pte(&pte, target_pgd); parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( - kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte); + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, PBHA_ID_DEFAULT, PTE_FLAGS_NONE, + parent_index, pte); kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1); - kbase_kunmap(parent_page, parent_page_va); + kunmap_pgd(parent_page, parent_page_va); if (parent_index != insert_level) { /* Newly allocated PGDs */ - kbase_mmu_sync_pgd_cpu( - kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)), - sizeof(u64)); + kbase_mmu_sync_pgd_cpu(kbdev, + pgd_dma_addr(parent_page, parent_pgd) + + (parent_vpfn * sizeof(u64)), + sizeof(u64)); } else { /* A new valid entry is added to an existing PGD. Perform the * invalidate operation for GPU cache as it could be having a @@ -1864,7 +2228,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table */ kbase_mmu_sync_pgd( kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)), - kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)), + pgd_dma_addr(parent_page, parent_pgd) + (parent_vpfn * sizeof(u64)), sizeof(u64), KBASE_MMU_OP_FLUSH_PT); } @@ -1875,6 +2239,9 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table spin_lock(&page_md->migrate_lock); +#if GPU_PAGES_PER_CPU_PAGE > 1 + page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); +#else WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS || IS_PAGE_ISOLATED(page_md->status)); @@ -1886,6 +2253,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table } else { page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); } +#endif spin_unlock(&page_md->migrate_lock); } @@ -1898,11 +2266,11 @@ failure_recovery: for (; pgd_index < cur_level; pgd_index++) { phys_addr_t pgd = pgds_to_insert[pgd_index]; struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd)); - u64 *pgd_page_va = kbase_kmap(pgd_page); + u64 *pgd_page_va = kmap_pgd(pgd_page, pgd); u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF; kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1); - kbase_kunmap(pgd_page, pgd_page_va); + kunmap_pgd(pgd_page, pgd_page_va); } return err; @@ -1918,6 +2286,8 @@ failure_recovery: * @level_high: The higher bound for the levels for which the PGD allocs are required * @new_pgds: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the * newly allocated PGD addresses to. + * @pool_grown: True if new PGDs required the memory pool to grow to allocate more pages, + * or false otherwise * * Numerically, level_low < level_high, not to be confused with top level and * bottom level concepts for MMU PGDs. They are only used as low and high bounds @@ -1928,19 +2298,22 @@ failure_recovery: * * -ENOMEM - allocation failed for a PGD. */ static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - phys_addr_t *new_pgds, int level_low, int level_high) + phys_addr_t *new_pgds, int level_low, int level_high, + bool *pool_grown) { int err = 0; int i; lockdep_assert_held(&mmut->mmu_lock); + *pool_grown = false; for (i = level_low; i <= level_high; i++) { + if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS) + continue; do { new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut); if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS) break; - mutex_unlock(&mmut->mmu_lock); err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id], (size_t)level_high, NULL); @@ -1948,17 +2321,9 @@ static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_ta if (err) { dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d", __func__, err); - - /* Free all PGDs allocated in previous successful iterations - * from (i-1) to level_low - */ - for (i = (i - 1); i >= level_low; i--) { - if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS) - kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]); - } - return err; } + *pool_grown = true; } while (1); } @@ -1988,6 +2353,8 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp if (WARN_ON(kctx == NULL)) return -EINVAL; + lockdep_assert_held(&kctx->reg_lock); + /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); @@ -2023,6 +2390,7 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp struct page *p; register unsigned int num_of_valid_entries; bool newly_created_pgd = false; + bool pool_grown; if (count > remain) count = remain; @@ -2030,6 +2398,10 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp cur_level = MIDGARD_MMU_BOTTOMLEVEL; insert_level = cur_level; + for (l = MIDGARD_MMU_TOPLEVEL + 1; l <= cur_level; l++) + new_pgds[l] = KBASE_INVALID_PHYSICAL_ADDRESS; + +repeat_page_table_walk: /* * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly * suboptimal. We don't have to re-parse the whole tree @@ -2044,7 +2416,7 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp if (err) { dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d", __func__, err); - goto fail_unlock; + goto fail_unlock_free_pgds; } /* No valid pgd at cur_level */ @@ -2053,9 +2425,12 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp * down to the lowest valid pgd at insert_level */ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1), - cur_level); + cur_level, &pool_grown); if (err) - goto fail_unlock; + goto fail_unlock_free_pgds; + + if (pool_grown) + goto repeat_page_table_walk; newly_created_pgd = true; @@ -2070,7 +2445,7 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp p = pfn_to_page(PFN_DOWN(pgd)); - pgd_page = kbase_kmap(p); + pgd_page = kmap_pgd(p, pgd); if (!pgd_page) { dev_err(kbdev->dev, "%s: kmap failure", __func__); err = -ENOMEM; @@ -2109,8 +2484,8 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)), - kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), - flush_op); + pgd_dma_addr(p, pgd) + (vindex * sizeof(u64)), + count * sizeof(u64), flush_op); if (newly_created_pgd) { err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, @@ -2121,14 +2496,14 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); - kbase_kunmap(p, pgd_page); + kunmap_pgd(p, pgd_page); goto fail_unlock_free_pgds; } } insert_vpfn += count; remain -= count; - kbase_kunmap(p, pgd_page); + kunmap_pgd(p, pgd_page); } mutex_unlock(&mmut->mmu_lock); @@ -2141,9 +2516,9 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp fail_unlock_free_pgds: /* Free the pgds allocated by us from insert_level+1 to bottom level */ for (l = cur_level; l > insert_level; l--) - kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); + if (new_pgds[l] != KBASE_INVALID_PHYSICAL_ADDRESS) + kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); -fail_unlock: if (insert_vpfn != (start_vpfn * GPU_PAGES_PER_CPU_PAGE)) { /* Invalidate the pages we have partially completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn * GPU_PAGES_PER_CPU_PAGE, @@ -2267,10 +2642,15 @@ u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, struct tagged_addr co unsigned long const flags, int const level, int const group_id) { u64 entry; + unsigned int pte_flags = 0; kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level); - return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, (unsigned int)group_id, level, - entry); + + if ((flags & KBASE_REG_GPU_CACHED) && !(flags & KBASE_REG_CPU_CACHED)) + pte_flags |= BIT(MMA_VIOLATION); + + return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, (unsigned int)group_id, + kbdev->mma_wa_id, pte_flags, level, entry); } static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, @@ -2289,6 +2669,9 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm int l, cur_level, insert_level; struct tagged_addr *start_phys = phys; + if (mmut->kctx) + lockdep_assert_held(&mmut->kctx->reg_lock); + /* Note that 0 is a valid start_vpfn */ /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); @@ -2311,17 +2694,30 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm register unsigned int num_of_valid_entries; bool newly_created_pgd = false; enum kbase_mmu_op_type flush_op; + bool pool_grown; if (count > remain) count = remain; - if (!vindex && is_huge_head(*phys)) + /* There are 3 conditions to satisfy in order to create a level 2 ATE: + * + * - The GPU VA is aligned to 2 MB. + * - The physical address is tagged as the head of a 2 MB region, + * which guarantees a contiguous physical address range. + * - There are actually 2 MB of virtual and physical pages to map, + * i.e. 512 entries for the MMU page table. + */ + if (!vindex && is_huge_head(*phys) && (count == KBASE_MMU_PAGE_ENTRIES)) cur_level = MIDGARD_MMU_LEVEL(2); else cur_level = MIDGARD_MMU_BOTTOMLEVEL; insert_level = cur_level; + for (l = MIDGARD_MMU_TOPLEVEL + 1; l <= cur_level; l++) + new_pgds[l] = KBASE_INVALID_PHYSICAL_ADDRESS; + +repeat_page_table_walk: /* * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly * suboptimal. We don't have to re-parse the whole tree @@ -2336,7 +2732,7 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm if (err) { dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d", __func__, err); - goto fail_unlock; + goto fail_unlock_free_pgds; } /* No valid pgd at cur_level */ @@ -2345,9 +2741,12 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm * down to the lowest valid pgd at insert_level */ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1), - cur_level); + cur_level, &pool_grown); if (err) - goto fail_unlock; + goto fail_unlock_free_pgds; + + if (pool_grown) + goto repeat_page_table_walk; newly_created_pgd = true; @@ -2361,7 +2760,7 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm } p = pfn_to_page(PFN_DOWN(pgd)); - pgd_page = kbase_kmap(p); + pgd_page = kmap_pgd(p, pgd); if (!pgd_page) { dev_err(kbdev->dev, "%s: kmap failure", __func__); @@ -2431,8 +2830,8 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)), - kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), - flush_op); + pgd_dma_addr(p, pgd) + (vindex * sizeof(u64)), + count * sizeof(u64), flush_op); if (newly_created_pgd) { err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, @@ -2443,7 +2842,7 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); - kbase_kunmap(p, pgd_page); + kunmap_pgd(p, pgd_page); goto fail_unlock_free_pgds; } } @@ -2451,7 +2850,7 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm phys += (count / GPU_PAGES_PER_CPU_PAGE); insert_vpfn += count; remain -= count; - kbase_kunmap(p, pgd_page); + kunmap_pgd(p, pgd_page); } mutex_unlock(&mmut->mmu_lock); @@ -2461,9 +2860,9 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm fail_unlock_free_pgds: /* Free the pgds allocated by us from insert_level+1 to bottom level */ for (l = cur_level; l > insert_level; l--) - kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); + if (new_pgds[l] != KBASE_INVALID_PHYSICAL_ADDRESS) + kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); -fail_unlock: if (insert_vpfn != (start_vpfn * GPU_PAGES_PER_CPU_PAGE)) { /* Invalidate the pages we have partially completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn * GPU_PAGES_PER_CPU_PAGE, @@ -2525,6 +2924,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m } KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); +KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_pages, ERRNO); int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, @@ -2582,6 +2982,7 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_ return 0; } +KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_aliased_pages, ERRNO); void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr) { @@ -2699,50 +3100,66 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable); static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, u64 vpfn, int level, - enum kbase_mmu_op_type flush_op, u64 *dirty_pgds) + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, + int as_nr) { - int current_level; + phys_addr_t current_pgd = pgds[level]; + struct page *p = phys_to_page(current_pgd); + u64 *current_page = kmap_pgd(p, current_pgd); + unsigned int current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(current_page); + unsigned int index = (vpfn >> ((3 - level) * 9)) & 0x1FFU; lockdep_assert_held(&mmut->mmu_lock); - for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0); current_level--) { - phys_addr_t current_pgd = pgds[current_level]; - struct page *p = phys_to_page(current_pgd); + /* We need to track every level that needs updating */ + if (dirty_pgds) + *dirty_pgds |= 1ULL << level; - u64 *current_page = kbase_kmap(p); - unsigned int current_valid_entries = - kbdev->mmu_mode->get_num_valid_entries(current_page); - unsigned int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FFU; + kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); + if (current_valid_entries == 1 && level != MIDGARD_MMU_LEVEL(0)) { + kbdev->mmu_mode->set_num_valid_entries(current_page, 0); - /* We need to track every level that needs updating */ - if (dirty_pgds) - *dirty_pgds |= 1ULL << current_level; + kunmap_pgd(p, current_page); - kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); - if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) { - kbase_kunmap(p, current_page); + kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level - 1, flush_op, + dirty_pgds, as_nr); - /* Ensure the cacheline containing the last valid entry - * of PGD is invalidated from the GPU cache, before the - * PGD page is freed. + /* Check if fine grained GPU cache maintenance is being used */ + if (flush_op == KBASE_MMU_OP_FLUSH_PT) { + /* Ensure the invalidated PTE is visible in memory right away */ + kbase_mmu_sync_pgd_cpu(kbdev, + pgd_dma_addr(p, current_pgd) + (index * sizeof(u64)), + sizeof(u64)); + /* Invalidate the GPU cache for the whole PGD page and not just for + * the cacheline containing the invalidated PTE, as the PGD page is + * going to be freed. There is an extremely remote possibility that + * other cachelines (containing all invalid PTEs) of PGD page are + * also present in the GPU cache. */ - kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, - current_pgd + (index * sizeof(u64)), sizeof(u64), - flush_op); - - kbase_mmu_add_to_free_pgds_list(mmut, p); - } else { - current_valid_entries--; - - kbdev->mmu_mode->set_num_valid_entries(current_page, current_valid_entries); - - kbase_kunmap(p, current_page); - - kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), - kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), - flush_op); - break; + kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, current_pgd, 512 * sizeof(u64), + KBASE_MMU_OP_FLUSH_PT); } + + kbase_mmu_add_to_free_pgds_list(mmut, current_pgd); + } else { + current_valid_entries--; + + kbdev->mmu_mode->set_num_valid_entries(current_page, current_valid_entries); + + kunmap_pgd(p, current_page); + + kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), + pgd_dma_addr(p, current_pgd) + (index * sizeof(u64)), + sizeof(u64), flush_op); + + /* When fine grained GPU cache maintenance is used then invalidate the MMU caches + * now as the top most level PGD entry, affected by the teardown operation, has + * been invalidated (both in memory as well as in GPU L2 cache). This is to avoid + * the possibility of invalid ATEs being reloaded into the GPU L2 cache whilst the + * teardown is happening. + */ + if (flush_op == KBASE_MMU_OP_FLUSH_PT) + mmu_invalidate_on_teardown(kbdev, mmut->kctx, vpfn, 1, level, as_nr); } } @@ -2783,13 +3200,11 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, } #if MALI_USE_CSF else { - /* Partial GPU cache flush with MMU cache invalidation */ + /* Partial GPU cache flush of the pages that were unmapped */ unsigned long irq_flags; unsigned int i; bool flush_done = false; - mmu_invalidate(kbdev, kctx, as_nr, op_param); - for (i = 0; !flush_done && i < phys_page_nr; i++) { spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) @@ -2809,7 +3224,7 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, u64 *dirty_pgds, struct list_head *free_pgds_list, - enum kbase_mmu_op_type flush_op) + enum kbase_mmu_op_type flush_op, int as_nr) { struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; @@ -2832,41 +3247,29 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase phys_addr_t pgd = mmut->pgd; struct page *p = phys_to_page(pgd); - if (count > nr) - count = nr; + count = MIN(nr, count); /* need to check if this is a 2MB page or a small page */ for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { phys_addr_t next_pgd; index = (vpfn >> ((3 - level) * 9)) & 0x1FF; - page = kbase_kmap(p); + page = kmap_pgd(p, pgd); if (mmu_mode->ate_is_valid(page[index], level)) break; /* keep the mapping */ else if (!mmu_mode->pte_is_valid(page[index], level)) { - /* nothing here, advance */ - switch (level) { - case MIDGARD_MMU_LEVEL(0): - count = 134217728; - break; - case MIDGARD_MMU_LEVEL(1): - count = 262144; - break; - case MIDGARD_MMU_LEVEL(2): - count = 512; - break; - case MIDGARD_MMU_LEVEL(3): - count = 1; - break; - } - if (count > nr) - count = nr; + dev_warn(kbdev->dev, "Invalid PTE found @ level %d for VA %llx", + level, vpfn << PAGE_SHIFT); + /* nothing here, advance to the next PTE of the current level */ + count = (1 << ((3 - level) * 9)); + count -= (vpfn & (count - 1)); + count = MIN(nr, count); goto next; } next_pgd = mmu_mode->pte_to_phy_addr( kbdev->mgm_dev->ops.mgm_pte_to_original_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index])); - kbase_kunmap(p, page); + kunmap_pgd(p, page); pgds[level] = pgd; pgd = next_pgd; p = phys_to_page(pgd); @@ -2877,7 +3280,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase case MIDGARD_MMU_LEVEL(1): dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__, level); - kbase_kunmap(p, page); + kunmap_pgd(p, page); goto out; case MIDGARD_MMU_LEVEL(2): /* can only teardown if count >= 512 */ @@ -2915,19 +3318,36 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase mmu_mode->entries_invalidate(&page[index], pcount); if (!num_of_valid_entries) { - kbase_kunmap(p, page); + mmu_mode->set_num_valid_entries(page, 0); - /* Ensure the cacheline(s) containing the last valid entries - * of PGD is invalidated from the GPU cache, before the - * PGD page is freed. + kunmap_pgd(p, page); + + /* To avoid the invalid ATEs from the PGD page (that is going to be freed) + * from getting reloaded into the GPU L2 cache whilst the teardown is + * happening, the fine grained GPU L2 cache maintenance is done in the top + * to bottom level PGD order. MMU cache invalidation is done after + * invalidating the entry of top most level PGD, affected by the teardown. */ - kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), - pcount * sizeof(u64), flush_op); + kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level - 1, + flush_op, dirty_pgds, as_nr); - kbase_mmu_add_to_free_pgds_list(mmut, p); + /* Check if fine grained GPU cache maintenance is being used */ + if (flush_op == KBASE_MMU_OP_FLUSH_PT) { + /* Ensure the invalidated ATEs are visible in memory right away */ + kbase_mmu_sync_pgd_cpu(kbdev, + pgd_dma_addr(p, pgd) + (index * sizeof(u64)), + pcount * sizeof(u64)); + /* Invalidate the GPU cache for the whole PGD page and not just for + * the cachelines containing the invalidated ATEs, as the PGD page + * is going to be freed. There is an extremely remote possibility + * that other cachelines (containing all invalid ATEs) of PGD page + * are also present in the GPU cache. + */ + kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd, 512 * sizeof(u64), + KBASE_MMU_OP_FLUSH_PT); + } - kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - flush_op, dirty_pgds); + kbase_mmu_add_to_free_pgds_list(mmut, pgd); vpfn += count; nr -= count; @@ -2937,10 +3357,16 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase mmu_mode->set_num_valid_entries(page, num_of_valid_entries); kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), - kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64), - flush_op); + pgd_dma_addr(p, pgd) + (index * sizeof(u64)), + pcount * sizeof(u64), flush_op); + + /* When fine grained GPU cache maintenance is used then invalidation of MMU cache + * is done inline for every bottom level PGD touched in the teardown. + */ + if (flush_op == KBASE_MMU_OP_FLUSH_PT) + mmu_invalidate_on_teardown(kbdev, mmut->kctx, vpfn, pcount, level, as_nr); next: - kbase_kunmap(p, page); + kunmap_pgd(p, page); vpfn += count; nr -= count; } @@ -3032,7 +3458,7 @@ static int mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table mutex_lock(&mmut->mmu_lock); err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds, - &free_pgds_list, flush_op); + &free_pgds_list, flush_op, as_nr); /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ op_param = (struct kbase_mmu_hw_op_param){ @@ -3069,6 +3495,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr, false); } +KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages, @@ -3144,7 +3571,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb goto fail_unlock; p = pfn_to_page(PFN_DOWN(pgd)); - pgd_page = kbase_kmap(p); + pgd_page = kmap_pgd(p, pgd); if (!pgd_page) { dev_warn(kbdev->dev, "kmap failure on update_pages"); err = -ENOMEM; @@ -3164,7 +3591,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb pgd_page[level_index] = kbase_mmu_create_ate( kbdev, *target_phys, flags, MIDGARD_MMU_LEVEL(2), group_id); kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)), - kbase_dma_addr(p) + (level_index * sizeof(u64)), + pgd_dma_addr(p, pgd) + (level_index * sizeof(u64)), sizeof(u64), KBASE_MMU_OP_NONE); } else { for (i = 0; i < count; i += GPU_PAGES_PER_CPU_PAGE) { @@ -3189,7 +3616,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb * will be done by the caller. */ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), - kbase_dma_addr(p) + (index * sizeof(u64)), + pgd_dma_addr(p, pgd) + (index * sizeof(u64)), count * sizeof(u64), KBASE_MMU_OP_NONE); } @@ -3202,7 +3629,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb vpfn += count; nr -= count; - kbase_kunmap(p, pgd_page); + kunmap_pgd(p, pgd_page); } mutex_unlock(&mmut->mmu_lock); @@ -3438,7 +3865,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p goto get_pgd_at_level_error; } - pgd_page = kbase_kmap(phys_to_page(pgd)); + pgd_page = kmap_pgd(phys_to_page(pgd), pgd); if (!pgd_page) { dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__); ret = -EINVAL; @@ -3547,8 +3974,10 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index]))); #endif kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys)); - *target = kbdev->mgm_dev->ops.mgm_update_gpu_pte( - kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte); + *target = kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, + MGM_DEFAULT_PTE_GROUP, + PBHA_ID_DEFAULT, PTE_FLAGS_NONE, + level, managed_pte); } kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); @@ -3559,55 +3988,64 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p * maintenance is necessary. */ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), - kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)), + pgd_dma_addr(phys_to_page(pgd), pgd) + (index * sizeof(u64)), pgd_entries_to_sync * sizeof(u64), KBASE_MMU_OP_FLUSH_PT); /* Unlock MMU region. * - * Notice that GPUs which don't issue flush commands via GPU control - * still need an additional GPU cache flush here, this time only - * for the page table, because the function call above to sync PGDs - * won't have any effect on them. + * For GPUs without FLUSH_PA_RANGE support, the GPU caches were completely + * cleaned and invalidated after locking the virtual address range affected + * by the migration. As long as the lock is in place, GPU access to the + * locked range would remain blocked. So there is no need to clean and + * invalidate the GPU caches again after the copying the page contents + * of old page and updating the page table entry to point to new page. + * + * For GPUs with FLUSH_PA_RANGE support, the contents of old page would + * have been evicted from the GPU caches after locking the virtual address + * range. The page table entry contents also would have been invalidated + * from the GPU's L2 cache by kbase_mmu_sync_pgd() after the page table + * update. + * + * If kbase_mmu_hw_do_unlock_no_addr() fails, GPU reset will be triggered which + * would remove the MMU lock and so there is no need to rollback page migration + * and the failure can be ignored. */ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) { int as_nr = mmut->kctx->as_nr; struct kbase_as *as = &kbdev->as[as_nr]; + int local_ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param); - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { - ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param); - } else { - ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, - GPU_COMMAND_CACHE_CLN_INV_L2); - if (!ret) - ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param); - } + CSTD_UNUSED(local_ret); } + + /* Release the transition prevention in L2 by ending the transaction */ + mmu_page_migration_transaction_end(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); /* Releasing locks before checking the migration transaction error state */ mutex_unlock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); - /* Release the transition prevention in L2 by ending the transaction */ - mmu_page_migration_transaction_end(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); - - /* Checking the final migration transaction error state */ - if (ret < 0) { - dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__); - goto undo_mappings; - } - /* Undertaking metadata transfer, while we are holding the mmu_lock */ spin_lock(&page_md->migrate_lock); if (level == MIDGARD_MMU_BOTTOMLEVEL) { - size_t page_array_index = (page_md->data.mapped.vpfn / GPU_PAGES_PER_CPU_PAGE) - - page_md->data.mapped.reg->start_pfn; + enum kbase_page_status page_status = PAGE_STATUS_GET(page_md->status); - WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED); + if (page_status == ALLOCATED_MAPPED) { + /* Replace page in array of pages of the physical allocation. */ + size_t page_array_index = + div_u64(page_md->data.mapped.vpfn, GPU_PAGES_PER_CPU_PAGE) - + page_md->data.mapped.reg->start_pfn; - /* Replace page in array of pages of the physical allocation. */ - page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys; + page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys; + } else if (page_status == NOT_MOVABLE) { + dev_dbg(kbdev->dev, + "%s: migration completed and page has become NOT_MOVABLE.", + __func__); + } else { + dev_WARN(kbdev->dev, + "%s: migration completed but page has moved to status %d.", + __func__, page_status); + } } /* Update the new page dma_addr with the transferred metadata from the old_page */ page_md->dma_addr = new_dma_addr; @@ -3618,7 +4056,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p set_page_private(as_page(old_phys), 0); l2_state_defer_out: - kbase_kunmap(phys_to_page(pgd), pgd_page); + kunmap_pgd(phys_to_page(pgd), pgd_page); pgd_page_map_error: get_pgd_at_level_error: page_state_change_out: @@ -3633,7 +4071,7 @@ old_page_map_error: undo_mappings: /* Unlock the MMU table and undo mappings. */ mutex_unlock(&mmut->mmu_lock); - kbase_kunmap(phys_to_page(pgd), pgd_page); + kunmap_pgd(phys_to_page(pgd), pgd_page); kbase_kunmap(as_page(new_phys), new_page); kbase_kunmap(as_page(old_phys), old_page); @@ -3652,7 +4090,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl lockdep_assert_held(&mmut->mmu_lock); - pgd_page = kbase_kmap_atomic(p); + pgd_page = kmap_atomic_pgd(p, pgd); /* kmap_atomic should NEVER fail. */ if (WARN_ON_ONCE(pgd_page == NULL)) return; @@ -3661,7 +4099,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl * kmap_atomic usage */ pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level]; - memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); + memcpy(pgd_page_buffer, pgd_page, GPU_PAGE_SIZE); } /* When page migration is enabled, kbase_region_tracker_term() would ensure @@ -3672,7 +4110,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page)); /* Invalidate page after copying */ mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES); - kbase_kunmap_atomic(pgd_page); + kunmap_atomic_pgd(pgd_page); pgd_page = pgd_page_buffer; if (level < MIDGARD_MMU_BOTTOMLEVEL) { @@ -3691,13 +4129,20 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl kbase_mmu_free_pgd(kbdev, mmut, pgd); } -static void kbase_mmu_mark_non_movable(struct page *page) +static void kbase_mmu_mark_non_movable(struct kbase_device *const kbdev, struct page *page) { struct kbase_page_metadata *page_md; if (!kbase_is_page_migration_enabled()) return; + /* Composite large-page is excluded from migration, trigger a warn if a development + * wrongly leads to it. + */ + if (is_huge_head(as_tagged(page_to_phys(page))) || + is_partial(as_tagged(page_to_phys(page)))) + dev_WARN(kbdev->dev, "%s: migration on large-page attempted.", __func__); + page_md = kbase_page_private(page); spin_lock(&page_md->migrate_lock); @@ -3725,6 +4170,10 @@ int kbase_mmu_init(struct kbase_device *const kbdev, struct kbase_mmu_table *con mmut->kctx = kctx; mmut->pgd = KBASE_INVALID_PHYSICAL_ADDRESS; +#if GPU_PAGES_PER_CPU_PAGE > 1 + INIT_LIST_HEAD(&mmut->pgd_pages_list); +#endif + /* We allocate pages into the kbdev memory pool, then * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to * avoid allocations from the kernel happening with the lock held. @@ -3739,10 +4188,12 @@ int kbase_mmu_init(struct kbase_device *const kbdev, struct kbase_mmu_table *con return -ENOMEM; } + mutex_lock(&mmut->mmu_lock); mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut); + mutex_unlock(&mmut->mmu_lock); } - kbase_mmu_mark_non_movable(pfn_to_page(PFN_DOWN(mmut->pgd))); + kbase_mmu_mark_non_movable(kbdev, pfn_to_page(PFN_DOWN(mmut->pgd))); return 0; } @@ -3800,6 +4251,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, size_t dump_size; struct kbase_device *kbdev; struct kbase_mmu_mode const *mmu_mode; + struct page *p; if (WARN_ON(kctx == NULL)) return 0; @@ -3808,7 +4260,8 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, kbdev = kctx->kbdev; mmu_mode = kbdev->mmu_mode; - pgd_page = kbase_kmap(pfn_to_page(PFN_DOWN(pgd))); + p = pfn_to_page(PFN_DOWN(pgd)); + pgd_page = kmap_pgd(p, pgd); if (!pgd_page) { dev_warn(kbdev->dev, "%s: kmap failure", __func__); return 0; @@ -3842,7 +4295,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, buffer, size_left); if (!dump_size) { - kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page); + kunmap_pgd(p, pgd_page); return 0; } size += dump_size; @@ -3850,7 +4303,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, } } - kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page); + kunmap_pgd(p, pgd_page); return size; } @@ -3956,7 +4409,6 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data) return; } -#ifdef CONFIG_MALI_ARBITER_SUPPORT /* check if we still have GPU */ if (unlikely(kbase_is_gpu_removed(kbdev))) { dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); @@ -3964,7 +4416,6 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data) atomic_dec(&kbdev->faults_pending); return; } -#endif if (unlikely(fault->protected_mode)) { kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Permission failure", fault); diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.c new file mode 100644 index 000000000000..548d88cf216e --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: Base kernel MMU faults decoder. + */ + +#include +#include +#if MALI_USE_CSF +#include +#else +#include +#endif + +#include +#include + +unsigned int fault_source_id_internal_requester_get(struct kbase_device *kbdev, + unsigned int source_id) +{ + if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) + return ((source_id >> 4) & 0xF); + else + return (source_id & 0x3F); +} + +static inline const char *source_id_enc_core_type_get_str(struct kbase_device *kbdev, + unsigned int source_id) +{ + if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) + return decode_fault_source_core_id_t_core_type( + FAULT_SOURCE_ID_CORE_ID_GET(source_id), kbdev->gpu_props.gpu_id.arch_id); + else + return decode_fault_source_core_type_t_name( + FAULT_SOURCE_ID_CORE_TYPE_GET(source_id), kbdev->gpu_props.gpu_id.arch_id); +} +const char *fault_source_id_internal_requester_get_str(struct kbase_device *kbdev, + unsigned int source_id, + unsigned int access_type) +{ + unsigned int ir = fault_source_id_internal_requester_get(kbdev, source_id); + bool older_source_id_fmt = + (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)); + unsigned int utlb_id = 0; + + if (older_source_id_fmt) + utlb_id = FAULT_SOURCE_ID_UTLB_ID_GET(source_id); + + if (!strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "shader")) { + if (utlb_id == 0) { + if (access_type == AS_FAULTSTATUS_ACCESS_TYPE_READ) + return decode_fault_source_shader_r_t( + ir, kbdev->gpu_props.gpu_id.arch_id); + else + return decode_fault_source_shader_w_t( + ir, kbdev->gpu_props.gpu_id.arch_id); + } else + return "Load/store cache"; + } else if (!strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "tiler")) { +#if MALI_USE_CSF + if (utlb_id == 0) { + if (access_type == AS_FAULTSTATUS_ACCESS_TYPE_READ) + return decode_fault_source_tiler_r_t( + ir, kbdev->gpu_props.gpu_id.arch_id); + else + return decode_fault_source_tiler_w_t( + ir, kbdev->gpu_props.gpu_id.arch_id); + } else + return "The polygon list writer. No further details."; +#else + return (utlb_id == 0) ? "Anything other than the polygon list writer" : + "The polygon list writer"; +#endif + } +#if MALI_USE_CSF + else if (!strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "csf")) { + if (access_type == AS_FAULTSTATUS_ACCESS_TYPE_READ) + return decode_fault_source_csf_r_t(ir, kbdev->gpu_props.gpu_id.arch_id); + else + return decode_fault_source_csf_w_t(ir, kbdev->gpu_props.gpu_id.arch_id); + } +#else + else if (!strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "jm")) + return decode_fault_source_jm_t(ir, kbdev->gpu_props.gpu_id.arch_id); +#endif + else if (!strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "I2c") || + !strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "memsys") || + !strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "mmu")) { + return "Not used"; + } + + return "unknown"; +} + +const char *fault_source_id_core_type_description_get(struct kbase_device *kbdev, + unsigned int source_id) +{ + if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) { + return decode_fault_source_core_id_t_desc(FAULT_SOURCE_ID_CORE_ID_GET(source_id), + kbdev->gpu_props.gpu_id.arch_id); + } else { + return decode_fault_source_core_type_t_desc( + FAULT_SOURCE_ID_CORE_TYPE_GET(source_id), kbdev->gpu_props.gpu_id.arch_id); + } +} diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.h new file mode 100644 index 000000000000..da5610ec94b0 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ +#ifndef _MALI_KBASE_MMU_FAULTS_DECODER_H_ +#define _MALI_KBASE_MMU_FAULTS_DECODER_H_ + +#include +#include + +/* FAULTSTATUS.SOURCE_ID encoding */ +#define SOURCE_ID_CORE_ID_SHIFT (9) +#define SOURCE_ID_CORE_ID_MASK (0x7F << SOURCE_ID_CORE_ID_SHIFT) +#define SOURCE_ID_UTLB_ID_SHIFT (8) +#define SOURCE_ID_UTLB_ID_MASK (0x01 << SOURCE_ID_UTLB_ID_SHIFT) +#define SOURCE_ID_CORE_TYPE_SHIFT (12) +#define SOURCE_ID_CORE_TYPE_MASK (0x0F << SOURCE_ID_CORE_TYPE_SHIFT) +#define SOURCE_ID_CORE_INDEX_SHIFT (6) +#define SOURCE_ID_CORE_INDEX_MASK (0x3F << SOURCE_ID_CORE_INDEX_SHIFT) + +/** + * FAULT_SOURCE_ID_CORE_ID_GET() - Get core ID of a fault. + * + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * + * Get core ID part of SOURCE_ID field of FAULTSTATUS (MMU) or + * GPU_FAULTSTATUS (GPU) registers. + * + * Return: core ID of the fault. + */ +#define FAULT_SOURCE_ID_CORE_ID_GET(source_id) \ + ((source_id & SOURCE_ID_CORE_ID_MASK) >> SOURCE_ID_CORE_ID_SHIFT) + +/** + * FAULT_SOURCE_ID_UTLB_ID_GET() - Get UTLB ID of a fault. + * + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * + * Get UTLB(micro-TLB) ID part of SOURCE_ID field of FAULTSTATUS (MMU) or + * GPU_FAULTSTATUS (GPU) registers. + * + * Return: UTLB ID of the fault. + */ +#define FAULT_SOURCE_ID_UTLB_ID_GET(source_id) \ + ((source_id & SOURCE_ID_UTLB_ID_MASK) >> SOURCE_ID_UTLB_ID_SHIFT) + +/** + * FAULT_SOURCE_ID_CORE_TYPE_GET() - Get core type of a fault. + * + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * + * Get core type part of SOURCE_ID field of FAULTSTATUS (MMU) or + * GPU_FAULTSTATUS (GPU) registers. + * + * Return: core type code of the fault. + */ +#define FAULT_SOURCE_ID_CORE_TYPE_GET(source_id) \ + ((source_id & SOURCE_ID_CORE_TYPE_MASK) >> SOURCE_ID_CORE_TYPE_SHIFT) + +/** + * FAULT_SOURCE_ID_CORE_INDEX_GET() - Get core index of a fault. + * + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * + * Get core index part of SOURCE_ID field of FAULTSTATUS (MMU) or + * GPU_FAULTSTATUS (GPU) registers. + * + * Return: core index of the fault. + */ +#define FAULT_SOURCE_ID_CORE_INDEX_GET(source_id) \ + ((source_id & SOURCE_ID_CORE_INDEX_MASK) >> SOURCE_ID_CORE_INDEX_SHIFT) + +/** + * fault_source_id_internal_requester_get() - Get internal_requester of a fault. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer). + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * + * Get internal_requester part of SOURCE_ID field of FAULTSTATUS (MMU) or + * GPU_FAULTSTATUS (GPU) registers. + * + * Return: Internal requester code of the fault. + */ +unsigned int fault_source_id_internal_requester_get(struct kbase_device *kbdev, + unsigned int source_id); + +/** + * fault_source_id_internal_requester_get_str() - Get internal_requester of a + * fault in a human readable format. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer). + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * @access_type: the direction of data transfer that caused the fault (atomic, + * execute, read, write) + * + * Get the human readable decoding of internal_requester part of SOURCE_ID field + * of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) registers. + * + * Return: Internal requester of the fault in human readable format. + */ +const char *fault_source_id_internal_requester_get_str(struct kbase_device *kbdev, + unsigned int source_id, + unsigned int access_type); + +/** + * fault_source_id_core_type_description_get() - Get the core type of + * a fault in a human readable format. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer). + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * + * Get the human readable decoding of core type part of SOURCE_ID field + * of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) registers. + * + * Return: core type of the fault in human readable format. + */ +const char *fault_source_id_core_type_description_get(struct kbase_device *kbdev, + unsigned int source_id); + +#endif /* _MALI_KBASE_MMU_FAULTS_DECODER_H_ */ diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.c new file mode 100644 index 000000000000..8e90cacb4efa --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.c @@ -0,0 +1,660 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: Base kernel MMU faults decoder. + */ + +#include + +#define GPU_ID_ARCH_ID_MAJOR_GET(gpu_id) ((gpu_id >> 16) & 0xFF) +#define GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id) (gpu_id & 0xFFFF) +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) + +struct decode_lut_element { + u16 arch_minor_rev; + u16 key; + const char *text; +}; + +static const char *decode_lut_element_lookup(u16 arch_minor_rev, u16 key, + struct decode_lut_element *decode_element_lut, + unsigned int lut_len) +{ + struct decode_lut_element *p; + + for (p = decode_element_lut; p < decode_element_lut + lut_len; p++) { + if (p->key == key && + (p->arch_minor_rev == 0xffff || p->arch_minor_rev == arch_minor_rev)) + break; + } + if (p < decode_element_lut + lut_len) + return p->text; + else + return "unknown"; +} + +/* Auto-generated code: DO NOT MODIFY! */ + +static struct decode_lut_element lut_fault_source_core_type_t_name_major_9[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "l2c" }, { 0xFFFF, 2, "tiler" }, + { 0xFFFF, 3, "mmu" }, { 0xFFFF, 4, "jm" }, { 0xFFFF, 5, "pmb" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_desc_major_9[] = { + { 0xFFFF, 0, "Shader core" }, { 0xFFFF, 1, "Level 2 cache" }, + { 0xFFFF, 2, "Tiler" }, { 0xFFFF, 3, "MMU" }, + { 0xFFFF, 4, "Job Manager" }, { 0xFFFF, 5, "Performance Monitor Block" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_name_major_10[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "l2c" }, { 0xFFFF, 2, "tiler" }, + { 0xFFFF, 3, "mmu" }, { 0xFFFF, 4, "csf" }, { 0xFFFF, 5, "memsys" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_desc_major_10[] = { + { 0xFFFF, 0, "Shader core" }, { 0xFFFF, 1, "Level 2 cache" }, + { 0xFFFF, 2, "Tiler" }, { 0xFFFF, 3, "MMU" }, + { 0xFFFF, 4, "CSF" }, { 0xFFFF, 5, "Memory system" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_name_major_11[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "l2c" }, { 0xFFFF, 2, "tiler" }, + { 0xFFFF, 3, "mmu" }, { 0xFFFF, 4, "csf" }, { 0xFFFF, 5, "memsys" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_desc_major_11[] = { + { 0xFFFF, 0, "Shader core" }, { 0xFFFF, 1, "Level 2 cache" }, + { 0xFFFF, 2, "Tiler" }, { 0xFFFF, 3, "MMU" }, + { 0xFFFF, 4, "CSF" }, { 0xFFFF, 5, "Memory system" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_name_major_12[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "l2c" }, { 0xFFFF, 2, "tiler" }, + { 0xFFFF, 3, "mmu" }, { 0xFFFF, 4, "csf" }, { 0xFFFF, 5, "memsys" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_desc_major_12[] = { + { 0xFFFF, 0, "Shader core" }, { 0xFFFF, 1, "Level 2 cache" }, + { 0xFFFF, 2, "Tiler" }, { 0xFFFF, 3, "MMU" }, + { 0xFFFF, 4, "CSF" }, { 0xFFFF, 5, "Memory system" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_desc_major_9[] = { + { 0xFFFF, 0, "Shader core 0" }, + { 0xFFFF, 1, "Shader core 1" }, + { 0xFFFF, 2, "Shader core 2" }, + { 0xFFFF, 3, "Shader core 3" }, + { 0xFFFF, 4, "Shader core 4" }, + { 0xFFFF, 5, "Shader core 5" }, + { 0xFFFF, 6, "Shader core 6" }, + { 0xFFFF, 7, "Shader core 7" }, + { 0xFFFF, 8, "Shader core 8" }, + { 0xFFFF, 9, "Shader core 9" }, + { 0xFFFF, 10, "Shader core 10" }, + { 0xFFFF, 11, "Shader core 11" }, + { 0xFFFF, 12, "Shader core 12" }, + { 0xFFFF, 13, "Shader core 13" }, + { 0xFFFF, 14, "Shader core 14" }, + { 0xFFFF, 15, "Shader core 15" }, + { 0xFFFF, 16, "Shader core 16" }, + { 0xFFFF, 17, "Shader core 17" }, + { 0xFFFF, 18, "Shader core 18" }, + { 0xFFFF, 19, "Shader core 19" }, + { 0xFFFF, 20, "Shader core 20" }, + { 0xFFFF, 21, "Shader core 21" }, + { 0xFFFF, 22, "Shader core 22" }, + { 0xFFFF, 23, "Shader core 23" }, + { 0xFFFF, 24, "Shader core 24" }, + { 0xFFFF, 25, "Shader core 25" }, + { 0xFFFF, 26, "Shader core 26" }, + { 0xFFFF, 27, "Shader core 27" }, + { 0xFFFF, 28, "Shader core 28" }, + { 0xFFFF, 29, "Shader core 29" }, + { 0xFFFF, 30, "Shader core 30" }, + { 0xFFFF, 31, "Shader core 31" }, + { 0xFFFF, 41, "L2 Slice 3" }, + { 0xFFFF, 43, "L2 Slice 2" }, + { 0xFFFF, 45, "L2 Slice 1" }, + { 0xFFFF, 46, "PMB" }, + { 0xFFFF, 47, "L2 Slice 0" }, + { 0xFFFF, 51, "Tiler" }, + { 0xFFFF, 55, "MMU" }, + { 0xFFFF, 62, "Job Manager" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_core_type_major_9[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "shader" }, { 0xFFFF, 2, "shader" }, + { 0xFFFF, 3, "shader" }, { 0xFFFF, 4, "shader" }, { 0xFFFF, 5, "shader" }, + { 0xFFFF, 6, "shader" }, { 0xFFFF, 7, "shader" }, { 0xFFFF, 8, "shader" }, + { 0xFFFF, 9, "shader" }, { 0xFFFF, 10, "shader" }, { 0xFFFF, 11, "shader" }, + { 0xFFFF, 12, "shader" }, { 0xFFFF, 13, "shader" }, { 0xFFFF, 14, "shader" }, + { 0xFFFF, 15, "shader" }, { 0xFFFF, 16, "shader" }, { 0xFFFF, 17, "shader" }, + { 0xFFFF, 18, "shader" }, { 0xFFFF, 19, "shader" }, { 0xFFFF, 20, "shader" }, + { 0xFFFF, 21, "shader" }, { 0xFFFF, 22, "shader" }, { 0xFFFF, 23, "shader" }, + { 0xFFFF, 24, "shader" }, { 0xFFFF, 25, "shader" }, { 0xFFFF, 26, "shader" }, + { 0xFFFF, 27, "shader" }, { 0xFFFF, 28, "shader" }, { 0xFFFF, 29, "shader" }, + { 0xFFFF, 30, "shader" }, { 0xFFFF, 31, "shader" }, { 0xFFFF, 41, "l2c" }, + { 0xFFFF, 43, "l2c" }, { 0xFFFF, 45, "l2c" }, { 0xFFFF, 46, "pmb" }, + { 0xFFFF, 47, "l2c" }, { 0xFFFF, 51, "tiler" }, { 0xFFFF, 55, "mmu" }, + { 0xFFFF, 62, "jm" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_desc_major_10[] = { + { 0xFFFF, 0, "Shader core 0" }, + { 0xFFFF, 1, "Shader core 1" }, + { 0xFFFF, 2, "Shader core 2" }, + { 0xFFFF, 3, "Shader core 3" }, + { 0xFFFF, 4, "Shader core 4" }, + { 0xFFFF, 5, "Shader core 5" }, + { 0xFFFF, 6, "Shader core 6" }, + { 0xFFFF, 7, "Shader core 7" }, + { 0xFFFF, 8, "Shader core 8" }, + { 0xFFFF, 9, "Shader core 9" }, + { 0xFFFF, 10, "Shader core 10" }, + { 0xFFFF, 11, "Shader core 11" }, + { 0xFFFF, 12, "Shader core 12" }, + { 0xFFFF, 13, "Shader core 13" }, + { 0xFFFF, 14, "Shader core 14" }, + { 0xFFFF, 15, "Shader core 15" }, + { 0xFFFF, 16, "Shader core 16" }, + { 0xFFFF, 17, "Shader core 17" }, + { 0xFFFF, 18, "Shader core 18" }, + { 0xFFFF, 19, "Shader core 19" }, + { 0xFFFF, 20, "Shader core 20" }, + { 0xFFFF, 21, "Shader core 21" }, + { 0xFFFF, 22, "Shader core 22" }, + { 0xFFFF, 23, "Shader core 23" }, + { 0xFFFF, 24, "Shader core 24" }, + { 0xFFFF, 25, "Shader core 25" }, + { 0xFFFF, 26, "Shader core 26" }, + { 0xFFFF, 27, "Shader core 27" }, + { 0xFFFF, 28, "Shader core 28" }, + { 0xFFFF, 29, "Shader core 29" }, + { 0xFFFF, 30, "Shader core 30" }, + { 0xFFFF, 31, "Shader core 31" }, + { 0xFFFF, 41, "L2 Slice 3" }, + { 0xFFFF, 43, "L2 Slice 2" }, + { 0xFFFF, 45, "L2 Slice 1" }, + { 0xFFFF, 47, "L2 Slice 0" }, + { 0xFFFF, 51, "Tiler" }, + { 0xFFFF, 55, "MMU" }, + { 0xFFFF, 33, "L2 Slice 7" }, + { 0xFFFF, 35, "L2 Slice 6" }, + { 0xFFFF, 37, "L2 Slice 5" }, + { 0xFFFF, 39, "L2 Slice 4" }, + { 0xFFFF, 48, "Memory system, undefined" }, + { 0xFFFF, 62, "Command Stream Frontend" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_core_type_major_10[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "shader" }, { 0xFFFF, 2, "shader" }, + { 0xFFFF, 3, "shader" }, { 0xFFFF, 4, "shader" }, { 0xFFFF, 5, "shader" }, + { 0xFFFF, 6, "shader" }, { 0xFFFF, 7, "shader" }, { 0xFFFF, 8, "shader" }, + { 0xFFFF, 9, "shader" }, { 0xFFFF, 10, "shader" }, { 0xFFFF, 11, "shader" }, + { 0xFFFF, 12, "shader" }, { 0xFFFF, 13, "shader" }, { 0xFFFF, 14, "shader" }, + { 0xFFFF, 15, "shader" }, { 0xFFFF, 16, "shader" }, { 0xFFFF, 17, "shader" }, + { 0xFFFF, 18, "shader" }, { 0xFFFF, 19, "shader" }, { 0xFFFF, 20, "shader" }, + { 0xFFFF, 21, "shader" }, { 0xFFFF, 22, "shader" }, { 0xFFFF, 23, "shader" }, + { 0xFFFF, 24, "shader" }, { 0xFFFF, 25, "shader" }, { 0xFFFF, 26, "shader" }, + { 0xFFFF, 27, "shader" }, { 0xFFFF, 28, "shader" }, { 0xFFFF, 29, "shader" }, + { 0xFFFF, 30, "shader" }, { 0xFFFF, 31, "shader" }, { 0xFFFF, 41, "l2c" }, + { 0xFFFF, 43, "l2c" }, { 0xFFFF, 45, "l2c" }, { 0xFFFF, 47, "l2c" }, + { 0xFFFF, 51, "tiler" }, { 0xFFFF, 55, "mmu" }, { 0xFFFF, 33, "l2c" }, + { 0xFFFF, 35, "l2c" }, { 0xFFFF, 37, "l2c" }, { 0xFFFF, 39, "l2c" }, + { 0xFFFF, 48, "memsys" }, { 0xFFFF, 62, "csf" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_desc_major_11[] = { + { 0xFFFF, 0, "Shader core 0" }, + { 0xFFFF, 1, "Shader core 1" }, + { 0xFFFF, 2, "Shader core 2" }, + { 0xFFFF, 3, "Shader core 3" }, + { 0xFFFF, 4, "Shader core 4" }, + { 0xFFFF, 5, "Shader core 5" }, + { 0xFFFF, 6, "Shader core 6" }, + { 0xFFFF, 7, "Shader core 7" }, + { 0xFFFF, 8, "Shader core 8" }, + { 0xFFFF, 9, "Shader core 9" }, + { 0xFFFF, 10, "Shader core 10" }, + { 0xFFFF, 11, "Shader core 11" }, + { 0xFFFF, 12, "Shader core 12" }, + { 0xFFFF, 13, "Shader core 13" }, + { 0xFFFF, 14, "Shader core 14" }, + { 0xFFFF, 15, "Shader core 15" }, + { 0xFFFF, 16, "Shader core 16" }, + { 0xFFFF, 17, "Shader core 17" }, + { 0xFFFF, 18, "Shader core 18" }, + { 0xFFFF, 19, "Shader core 19" }, + { 0xFFFF, 20, "Shader core 20" }, + { 0xFFFF, 21, "Shader core 21" }, + { 0xFFFF, 22, "Shader core 22" }, + { 0xFFFF, 23, "Shader core 23" }, + { 0xFFFF, 24, "Shader core 24" }, + { 0xFFFF, 25, "Shader core 25" }, + { 0xFFFF, 26, "Shader core 26" }, + { 0xFFFF, 27, "Shader core 27" }, + { 0xFFFF, 28, "Shader core 28" }, + { 0xFFFF, 29, "Shader core 29" }, + { 0xFFFF, 30, "Shader core 30" }, + { 0xFFFF, 31, "Shader core 31" }, + { 0xFFFF, 41, "L2 Slice 3" }, + { 0xFFFF, 43, "L2 Slice 2" }, + { 0xFFFF, 45, "L2 Slice 1" }, + { 0xFFFF, 47, "L2 Slice 0" }, + { 0xFFFF, 51, "Tiler" }, + { 0xFFFF, 55, "MMU" }, + { 0xFFFF, 33, "L2 Slice 7" }, + { 0xFFFF, 35, "L2 Slice 6" }, + { 0xFFFF, 37, "L2 Slice 5" }, + { 0xFFFF, 39, "L2 Slice 4" }, + { 0xFFFF, 48, "Memory system, undefined" }, + { 0xFFFF, 62, "Command Stream Frontend" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_core_type_major_11[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "shader" }, { 0xFFFF, 2, "shader" }, + { 0xFFFF, 3, "shader" }, { 0xFFFF, 4, "shader" }, { 0xFFFF, 5, "shader" }, + { 0xFFFF, 6, "shader" }, { 0xFFFF, 7, "shader" }, { 0xFFFF, 8, "shader" }, + { 0xFFFF, 9, "shader" }, { 0xFFFF, 10, "shader" }, { 0xFFFF, 11, "shader" }, + { 0xFFFF, 12, "shader" }, { 0xFFFF, 13, "shader" }, { 0xFFFF, 14, "shader" }, + { 0xFFFF, 15, "shader" }, { 0xFFFF, 16, "shader" }, { 0xFFFF, 17, "shader" }, + { 0xFFFF, 18, "shader" }, { 0xFFFF, 19, "shader" }, { 0xFFFF, 20, "shader" }, + { 0xFFFF, 21, "shader" }, { 0xFFFF, 22, "shader" }, { 0xFFFF, 23, "shader" }, + { 0xFFFF, 24, "shader" }, { 0xFFFF, 25, "shader" }, { 0xFFFF, 26, "shader" }, + { 0xFFFF, 27, "shader" }, { 0xFFFF, 28, "shader" }, { 0xFFFF, 29, "shader" }, + { 0xFFFF, 30, "shader" }, { 0xFFFF, 31, "shader" }, { 0xFFFF, 41, "l2c" }, + { 0xFFFF, 43, "l2c" }, { 0xFFFF, 45, "l2c" }, { 0xFFFF, 47, "l2c" }, + { 0xFFFF, 51, "tiler" }, { 0xFFFF, 55, "mmu" }, { 0xFFFF, 33, "l2c" }, + { 0xFFFF, 35, "l2c" }, { 0xFFFF, 37, "l2c" }, { 0xFFFF, 39, "l2c" }, + { 0xFFFF, 48, "memsys" }, { 0xFFFF, 62, "csf" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_desc_major_12[] = { + { 0xFFFF, 0, "Shader core 0" }, + { 0xFFFF, 1, "Shader core 1" }, + { 0xFFFF, 2, "Shader core 2" }, + { 0xFFFF, 3, "Shader core 3" }, + { 0xFFFF, 4, "Shader core 4" }, + { 0xFFFF, 5, "Shader core 5" }, + { 0xFFFF, 6, "Shader core 6" }, + { 0xFFFF, 7, "Shader core 7" }, + { 0xFFFF, 8, "Shader core 8" }, + { 0xFFFF, 9, "Shader core 9" }, + { 0xFFFF, 10, "Shader core 10" }, + { 0xFFFF, 11, "Shader core 11" }, + { 0xFFFF, 12, "Shader core 12" }, + { 0xFFFF, 13, "Shader core 13" }, + { 0xFFFF, 14, "Shader core 14" }, + { 0xFFFF, 15, "Shader core 15" }, + { 0xFFFF, 16, "Shader core 16" }, + { 0xFFFF, 17, "Shader core 17" }, + { 0xFFFF, 18, "Shader core 18" }, + { 0xFFFF, 19, "Shader core 19" }, + { 0xFFFF, 20, "Shader core 20" }, + { 0xFFFF, 21, "Shader core 21" }, + { 0xFFFF, 22, "Shader core 22" }, + { 0xFFFF, 23, "Shader core 23" }, + { 0xFFFF, 24, "Shader core 24" }, + { 0xFFFF, 25, "Shader core 25" }, + { 0xFFFF, 26, "Shader core 26" }, + { 0xFFFF, 27, "Shader core 27" }, + { 0xFFFF, 28, "Shader core 28" }, + { 0xFFFF, 29, "Shader core 29" }, + { 0xFFFF, 30, "Shader core 30" }, + { 0xFFFF, 31, "Shader core 31" }, + { 0xFFFF, 41, "L2 Slice 3" }, + { 0xFFFF, 43, "L2 Slice 2" }, + { 0xFFFF, 45, "L2 Slice 1" }, + { 0xFFFF, 47, "L2 Slice 0" }, + { 0xFFFF, 51, "Tiler" }, + { 0xFFFF, 55, "MMU" }, + { 0xFFFF, 33, "L2 Slice 7" }, + { 0xFFFF, 35, "L2 Slice 6" }, + { 0xFFFF, 37, "L2 Slice 5" }, + { 0xFFFF, 39, "L2 Slice 4" }, + { 0xFFFF, 48, "Memory system, undefined" }, + { 0xFFFF, 62, "Command Stream Frontend" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_core_type_major_12[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "shader" }, { 0xFFFF, 2, "shader" }, + { 0xFFFF, 3, "shader" }, { 0xFFFF, 4, "shader" }, { 0xFFFF, 5, "shader" }, + { 0xFFFF, 6, "shader" }, { 0xFFFF, 7, "shader" }, { 0xFFFF, 8, "shader" }, + { 0xFFFF, 9, "shader" }, { 0xFFFF, 10, "shader" }, { 0xFFFF, 11, "shader" }, + { 0xFFFF, 12, "shader" }, { 0xFFFF, 13, "shader" }, { 0xFFFF, 14, "shader" }, + { 0xFFFF, 15, "shader" }, { 0xFFFF, 16, "shader" }, { 0xFFFF, 17, "shader" }, + { 0xFFFF, 18, "shader" }, { 0xFFFF, 19, "shader" }, { 0xFFFF, 20, "shader" }, + { 0xFFFF, 21, "shader" }, { 0xFFFF, 22, "shader" }, { 0xFFFF, 23, "shader" }, + { 0xFFFF, 24, "shader" }, { 0xFFFF, 25, "shader" }, { 0xFFFF, 26, "shader" }, + { 0xFFFF, 27, "shader" }, { 0xFFFF, 28, "shader" }, { 0xFFFF, 29, "shader" }, + { 0xFFFF, 30, "shader" }, { 0xFFFF, 31, "shader" }, { 0xFFFF, 41, "l2c" }, + { 0xFFFF, 43, "l2c" }, { 0xFFFF, 45, "l2c" }, { 0xFFFF, 47, "l2c" }, + { 0xFFFF, 51, "tiler" }, { 0xFFFF, 55, "mmu" }, { 0xFFFF, 33, "l2c" }, + { 0xFFFF, 35, "l2c" }, { 0xFFFF, 37, "l2c" }, { 0xFFFF, 39, "l2c" }, + { 0xFFFF, 48, "memsys" }, { 0xFFFF, 62, "csf" }, +}; + +static struct decode_lut_element lut_fault_source_shader_r_t_major_9[] = { + { 0xFFFF, 0, "ic" }, { 0xFFFF, 1, "adc" }, { 0xFFFF, 4, "scm" }, + { 0xFFFF, 5, "vl" }, { 0xFFFF, 6, "plr" }, { 0xFFFF, 7, "fsdc" }, + { 0xFFFF, 8, "lsc" }, { 0xFFFF, 9, "cse" }, { 0xFFFF, 10, "tb" }, + { 0xFFFF, 11, "tmdi" }, { 0xFFFF, 12, "tmu0" }, { 0xFFFF, 13, "tmu1" }, + { 0xFFFF, 14, "tma0" }, { 0xFFFF, 15, "tma1" }, +}; + +static struct decode_lut_element lut_fault_source_shader_r_t_major_10[] = { + { 0xFFFF, 4, "scm" }, { 0xFFFF, 5, "vl" }, { 0xFFFF, 6, "plr" }, + { 0xFFFF, 7, "fsdc" }, { 0xFFFF, 8, "lsc" }, { 0xFFFF, 9, "cse" }, + { 0xFFFF, 10, "tb" }, { 0xFFFF, 11, "tmdi" }, { 0xFFFF, 12, "tmu0" }, + { 0xFFFF, 13, "tmu1" }, { 0xFFFF, 14, "tma0" }, { 0xFFFF, 15, "tma1" }, + { 0xFFFF, 0, "ic0" }, { 0xFFFF, 1, "ic1" }, { 0xFFFF, 2, "adc" }, +}; + +static struct decode_lut_element lut_fault_source_shader_r_t_major_11[] = { + { 0xFFFF, 4, "scm" }, { 0xFFFF, 5, "vl" }, { 0xFFFF, 6, "plr" }, + { 0xFFFF, 7, "fsdc" }, { 0xFFFF, 8, "lsc" }, { 0xFFFF, 9, "cse" }, + { 0xFFFF, 10, "tb" }, { 0xFFFF, 11, "tmdi" }, { 0xFFFF, 12, "tmu0" }, + { 0xFFFF, 13, "tmu1" }, { 0xFFFF, 14, "tma0" }, { 0xFFFF, 15, "tma1" }, + { 0xFFFF, 0, "ic0" }, { 0xFFFF, 1, "ic1" }, { 0xFFFF, 2, "adc" }, +}; + +static struct decode_lut_element lut_fault_source_shader_r_t_major_12[] = { + { 0xFFFF, 4, "scm" }, { 0xFFFF, 6, "plr" }, { 0xFFFF, 7, "fsdc" }, + { 0xFFFF, 8, "lsc" }, { 0xFFFF, 9, "cse" }, { 0xFFFF, 10, "tb" }, + { 0xFFFF, 11, "tmdi" }, { 0xFFFF, 12, "tmu0" }, { 0xFFFF, 13, "tmu1" }, + { 0xFFFF, 14, "tma0" }, { 0xFFFF, 15, "tma1" }, { 0xFFFF, 0, "ic0" }, + { 0xFFFF, 1, "ic1" }, { 0xFFFF, 2, "adc" }, { 0xFFFF, 3, "rtas" }, +}; + +static struct decode_lut_element lut_fault_source_shader_w_t_major_9[] = { + { 0xFFFF, 0, "pcb" }, + { 0xFFFF, 8, "lsc" }, + { 0xFFFF, 10, "tb" }, +}; + +static struct decode_lut_element lut_fault_source_shader_w_t_major_10[] = { + { 0xFFFF, 0, "pcb" }, { 0xFFFF, 8, "lsc" }, { 0xFFFF, 12, "tb0" }, + { 0xFFFF, 13, "tb1" }, { 0xFFFF, 14, "tb2" }, { 0xFFFF, 15, "tb3" }, +}; + +static struct decode_lut_element lut_fault_source_shader_w_t_major_11[] = { + { 0xFFFF, 0, "pcb" }, { 0xFFFF, 8, "lsc" }, { 0xFFFF, 12, "tb0" }, + { 0xFFFF, 13, "tb1" }, { 0xFFFF, 14, "tb2" }, { 0xFFFF, 15, "tb3" }, +}; + +static struct decode_lut_element lut_fault_source_shader_w_t_major_12[] = { + { 0xFFFF, 0, "pcb" }, { 0xFFFF, 8, "lsc" }, { 0xFFFF, 12, "tb0" }, + { 0xFFFF, 13, "tb1" }, { 0xFFFF, 14, "tb2" }, { 0xFFFF, 15, "tb3" }, +}; + +static struct decode_lut_element lut_fault_source_tiler_r_t_major_10[] = { + { 0xFFFF, 0, "pf" }, + { 0xFFFF, 1, "pcache" }, + { 0xFFFF, 2, "tcu" }, + { 0xFFFF, 3, "idx" }, +}; + +static struct decode_lut_element lut_fault_source_tiler_r_t_major_11[] = { + { 0xFFFF, 0, "pf" }, + { 0xFFFF, 1, "pcache" }, + { 0xFFFF, 2, "tcu" }, + { 0xFFFF, 3, "idx" }, +}; + +static struct decode_lut_element lut_fault_source_tiler_r_t_major_12[] = { + { 0xFFFF, 0, "pf" }, + { 0xFFFF, 1, "pcache" }, + { 0xFFFF, 2, "tcu" }, + { 0xFFFF, 3, "idx" }, +}; + +static struct decode_lut_element lut_fault_source_tiler_w_t_major_10[] = { + { 0xFFFF, 1, "pcache_wb" }, + { 0xFFFF, 2, "tcu_pcb" }, +}; + +static struct decode_lut_element lut_fault_source_tiler_w_t_major_11[] = { + { 0xFFFF, 1, "pcache_wb" }, + { 0xFFFF, 2, "tcu_pcb" }, +}; + +static struct decode_lut_element lut_fault_source_tiler_w_t_major_12[] = { + { 0xFFFF, 1, "pcache_wb" }, + { 0xFFFF, 2, "tcu_pcb" }, +}; + + +const char *decode_fault_source_core_type_t_name(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_name_major_9, + NELEMS(lut_fault_source_core_type_t_name_major_9)); + break; + case 10: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_name_major_10, + NELEMS(lut_fault_source_core_type_t_name_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_name_major_11, + NELEMS(lut_fault_source_core_type_t_name_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_name_major_12, + NELEMS(lut_fault_source_core_type_t_name_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_core_type_t_desc(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_desc_major_9, + NELEMS(lut_fault_source_core_type_t_desc_major_9)); + break; + case 10: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_desc_major_10, + NELEMS(lut_fault_source_core_type_t_desc_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_desc_major_11, + NELEMS(lut_fault_source_core_type_t_desc_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_desc_major_12, + NELEMS(lut_fault_source_core_type_t_desc_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_core_id_t_desc(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_id_t_desc_major_9, + NELEMS(lut_fault_source_core_id_t_desc_major_9)); + break; + case 10: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_id_t_desc_major_10, + NELEMS(lut_fault_source_core_id_t_desc_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_id_t_desc_major_11, + NELEMS(lut_fault_source_core_id_t_desc_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_id_t_desc_major_12, + NELEMS(lut_fault_source_core_id_t_desc_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_core_id_t_core_type(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup( + min_rev, idx, lut_fault_source_core_id_t_core_type_major_9, + NELEMS(lut_fault_source_core_id_t_core_type_major_9)); + break; + case 10: + ret = decode_lut_element_lookup( + min_rev, idx, lut_fault_source_core_id_t_core_type_major_10, + NELEMS(lut_fault_source_core_id_t_core_type_major_10)); + break; + case 11: + ret = decode_lut_element_lookup( + min_rev, idx, lut_fault_source_core_id_t_core_type_major_11, + NELEMS(lut_fault_source_core_id_t_core_type_major_11)); + break; + case 12: + ret = decode_lut_element_lookup( + min_rev, idx, lut_fault_source_core_id_t_core_type_major_12, + NELEMS(lut_fault_source_core_id_t_core_type_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_shader_r_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_r_t_major_9, + NELEMS(lut_fault_source_shader_r_t_major_9)); + break; + case 10: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_r_t_major_10, + NELEMS(lut_fault_source_shader_r_t_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_r_t_major_11, + NELEMS(lut_fault_source_shader_r_t_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_r_t_major_12, + NELEMS(lut_fault_source_shader_r_t_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_shader_w_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_w_t_major_9, + NELEMS(lut_fault_source_shader_w_t_major_9)); + break; + case 10: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_w_t_major_10, + NELEMS(lut_fault_source_shader_w_t_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_w_t_major_11, + NELEMS(lut_fault_source_shader_w_t_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_w_t_major_12, + NELEMS(lut_fault_source_shader_w_t_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_tiler_r_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 10: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_r_t_major_10, + NELEMS(lut_fault_source_tiler_r_t_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_r_t_major_11, + NELEMS(lut_fault_source_tiler_r_t_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_r_t_major_12, + NELEMS(lut_fault_source_tiler_r_t_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_tiler_w_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 10: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_w_t_major_10, + NELEMS(lut_fault_source_tiler_w_t_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_w_t_major_11, + NELEMS(lut_fault_source_tiler_w_t_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_w_t_major_12, + NELEMS(lut_fault_source_tiler_w_t_major_12)); + break; + } + return ret; +} diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.h new file mode 100644 index 000000000000..2b0ca5659a6c --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_H_ +#define _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_H_ + +#include + +/** + * decode_fault_source_core_id_t_desc() - Get core description of a + * fault in a human readable format. + * + * @idx: Core ID part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: core ID of the fault in human readable format. + */ +const char *decode_fault_source_core_id_t_desc(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_core_id_t_core_type() - Get core type of a + * fault in a human readable format. + * + * @idx: Core ID part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: core type of the fault in human readable format. + */ +const char *decode_fault_source_core_id_t_core_type(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_core_type_t_name() - Get core type name of a + * fault. + * + * @idx: Core type part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: core type short name of the fault. + */ +const char *decode_fault_source_core_type_t_name(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_core_type_t_desc() - Get core type description of a + * fault. + * + * @idx: Core type part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: core type description of the fault. + */ +const char *decode_fault_source_core_type_t_desc(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_shader_r_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for read + * operations on a shader core. + */ +const char *decode_fault_source_shader_r_t(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_shader_w_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for write + * operations on a shader core. + */ +const char *decode_fault_source_shader_w_t(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_tiler_r_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for read + * operations on a tiler core. + */ +const char *decode_fault_source_tiler_r_t(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_tiler_w_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for write + * operations on a tiler core. + */ +const char *decode_fault_source_tiler_w_t(u16 idx, u32 gpu_id); + +#endif /* _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h index c2b377de54a9..560baceafe8a 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,7 +42,7 @@ struct kbase_context; * enum kbase_mmu_fault_type - MMU fault type descriptor. * @KBASE_MMU_FAULT_TYPE_UNKNOWN: unknown fault * @KBASE_MMU_FAULT_TYPE_PAGE: page fault - * @KBASE_MMU_FAULT_TYPE_BUS: nus fault + * @KBASE_MMU_FAULT_TYPE_BUS: bus fault * @KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED: page_unexpected fault * @KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED: bus_unexpected fault */ diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c index ba67ae0e01e9..46c04f2b1fc1 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -529,8 +529,8 @@ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, return ret; #if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) - /* WA for the BASE_HW_ISSUE_GPU2019_3901. */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3901) && + /* WA for the KBASE_HW_ISSUE_GPU2019_3901. */ + if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_GPU2019_3901) && mmu_cmd == AS_COMMAND_COMMAND_FLUSH_MEM) { ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number); if (ret) { @@ -635,6 +635,15 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, #endif kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_CLEAR), pf_bf_mask); +#if MALI_USE_CSF + /* For valid page faults, this function is called just before unblocking the MMU (which + * would in turn unblock the MCU firmware) and so this is an opportune location to + * update the page fault counter value in firmware visible memory. + */ + if (likely(type == KBASE_MMU_FAULT_TYPE_PAGE) && kbdev->csf.page_fault_cnt_ptr) + *kbdev->csf.page_fault_cnt_ptr = ++kbdev->csf.page_fault_cnt; +#endif + unlock: spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); } diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h index 4c2c1a64ca41..8b68791e4c77 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,19 +38,6 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbas void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as *as, const char *reason_str, struct kbase_fault *fault); -/** - * kbase_mmu_switch_to_ir() - Switch to incremental rendering if possible - * @kctx: kbase_context for the faulting address space. - * @reg: of a growable GPU memory region in the same context. - * Takes ownership of the reference if successful. - * - * Used to switch to incremental rendering if we have nearly run out of - * virtual address space in a growable memory region. - * - * Return: 0 if successful, otherwise a negative error code. - */ -int kbase_mmu_switch_to_ir(struct kbase_context *kctx, struct kbase_va_region *reg); - /** * kbase_mmu_page_fault_worker() - Process a page fault. * diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c index d19579da2f5d..7aace473011f 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,7 +32,7 @@ */ #define ENTRY_IS_ATE_L3 3ULL #define ENTRY_IS_ATE_L02 1ULL -#define ENTRY_IS_INVAL 2ULL +#define ENTRY_IS_INVAL 0ULL #define ENTRY_IS_PTE 3ULL #define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ @@ -179,7 +179,7 @@ static void set_num_valid_entries(u64 *pgd, unsigned int num_of_valid_entries) static void entry_set_pte(u64 *entry, phys_addr_t phy) { - page_table_entry_set(entry, (phy & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_PTE); + page_table_entry_set(entry, (phy & GPU_PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_PTE); } static void entries_invalidate(u64 *entry, u32 count) diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c index 2a5030745586..d0342af60fb3 100644 --- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c +++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/tests/Kbuild b/drivers/gpu/arm/bifrost/tests/Kbuild index 72ca70ac8779..479b91532ed7 100644 --- a/drivers/gpu/arm/bifrost/tests/Kbuild +++ b/drivers/gpu/arm/bifrost/tests/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/tests/Kconfig b/drivers/gpu/arm/bifrost/tests/Kconfig index aa011bac8990..88a4194c5cd7 100644 --- a/drivers/gpu/arm/bifrost/tests/Kconfig +++ b/drivers/gpu/arm/bifrost/tests/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017, 2020-2023 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017, 2020-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_kprobe.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_kprobe.h index f75cd776c60e..d8c3ca88166b 100644 --- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_kprobe.h +++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_kprobe.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,8 @@ #ifndef _KUTF_KPROBE_H_ #define _KUTF_KPROBE_H_ +struct dentry; + int kutf_kprobe_init(struct dentry *base_dir); void kutf_kprobe_exit(void); @@ -30,4 +32,6 @@ typedef void (*kutf_kp_handler)(int argc, char **argv); void kutf_kp_sample_handler(int argc, char **argv); void kutf_kp_sample_kernel_function(void); +void kutf_kp_delay_handler(int argc, char **argv); + #endif /* _KUTF_KPROBE_H_ */ diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_kprobe.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_kprobe.c index f118692c43a1..232809e1ed58 100644 --- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_kprobe.c +++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_kprobe.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,7 @@ #include #include #include +#include #include #define KUTF_KP_REG_MIN_ARGS 3 @@ -86,6 +87,19 @@ const struct file_operations kutf_kp_unreg_debugfs_fops = { struct kprobe kutf_kallsym_kp = { .symbol_name = "kallsyms_lookup_name" }; +void kutf_kp_delay_handler(int argc, char **argv) +{ + long delay; + + if ((!argv) || (!argv[0])) + return; + + if (kstrtol(argv[0], 0, &delay)) + return; + + mdelay(delay); +} + void kutf_kp_sample_kernel_function(void) { pr_debug("%s called\n", __func__); @@ -150,11 +164,9 @@ static ssize_t kutf_kp_reg_debugfs_write(struct file *file, const char __user *u if (count >= KUTF_KP_WRITE_BUFSIZE) return -EINVAL; - kbuf = memdup_user(user_buf, count); - if (IS_ERR(kbuf)) { + kbuf = memdup_user_nul(user_buf, count); + if (IS_ERR(kbuf)) return -ENOMEM; - } - kbuf[count - 1] = '\0'; argv = argv_split(GFP_KERNEL, kbuf, &argc); if (!argv) { @@ -245,11 +257,9 @@ static ssize_t kutf_kp_unreg_debugfs_write(struct file *file, const char __user if (count >= KUTF_KP_WRITE_BUFSIZE) return -EINVAL; - kbuf = memdup_user(user_buf, count); - if (IS_ERR(kbuf)) { + kbuf = memdup_user_nul(user_buf, count); + if (IS_ERR(kbuf)) return -ENOMEM; - } - kbuf[count - 1] = '\0'; argv = argv_split(GFP_KERNEL, kbuf, &argc); if (!argv) { diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c index 6c343cf9f73b..0598d4397e2a 100644 --- a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c +++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -425,7 +425,7 @@ static const char *kutf_clk_trace_do_get_platform(struct kutf_context *context, const void *arbiter_if_node = NULL; const void *power_node = NULL; const char *platform = "GPU"; -#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) +#if defined(CONFIG_OF) struct kutf_clk_rate_trace_fixture_data *data = context->fixture; arbiter_if_node = of_get_property(data->kbdev->dev->of_node, "arbiter-if", NULL); diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c index 8937d69f182f..f341a411324e 100644 --- a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c +++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,7 +95,9 @@ static void mali_kutf_mgm_pte_translation_test(struct kutf_context *context) data->group_id, mmu_level, original_pte); translated_pte = mgm_dev->ops.mgm_update_gpu_pte(mgm_dev, data->group_id, - mmu_level, original_pte); + PBHA_ID_DEFAULT, + PTE_FLAGS_NONE, mmu_level, + original_pte); if (translated_pte == original_pte) { snprintf( msg_buf, sizeof(msg_buf), diff --git a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c index 1592eab806ac..cfb347affa2e 100644 --- a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c +++ b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c @@ -20,18 +20,169 @@ * kbase_context_get_unmapped_area() interface. */ +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) +/** + * move_mt_gap() - Search the maple tree for an existing gap of a particular size + * immediately before another pre-identified gap. + * @gap_start: Pre-identified gap starting address. + * @gap_end: Pre-identified gap ending address. + * @size: Size of the new gap needed before gap_start. + * + * This function will search the calling process' maple tree + * for another gap, one that is immediately preceding the pre-identified + * gap, for a specific size, and upon success it will decrement gap_end + * by the specified size, and replace gap_start with the new gap_start of + * the newly identified gap. + * + * Return: true if large enough preceding gap is found, false otherwise. + */ +static bool move_mt_gap(unsigned long *gap_start, unsigned long *gap_end, unsigned long size) +{ + unsigned long new_gap_start, new_gap_end; + + MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); + + if (*gap_end < size) + return false; + + /* Calculate the gap end for the new, resultant gap */ + new_gap_end = *gap_end - size; + + /* If the new gap_end (i.e. new VA start address) is larger than gap_start, than the + * pre-identified gap already has space to shrink to accommodate the decrease in + * gap_end. + */ + if (new_gap_end >= *gap_start) { + /* Pre-identified gap already has space - just patch gap_end to new + * lower value and exit. + */ + *gap_end = new_gap_end; + return true; + } + + /* Since the new VA start address (new_gap_end) is below the start of the pre-identified + * gap in the maple tree, see if there is a free gap directly before the existing gap, of + * the same size as the alignment shift, such that the effective gap found is "extended". + * This may be larger than needed but leaves the same distance between gap_end and gap_start + * that currently exists. + */ + new_gap_start = *gap_start - size; + if (mas_empty_area_rev(&mas, new_gap_start, *gap_start - 1, size)) { + /* There's no gap between the new start address needed and the + * current start address - so return false to find a new + * gap from the maple tree. + */ + return false; + } + /* Suitable gap found - replace gap_start and gap_end with new values. gap_start takes the + * value of the start of new gap found, which now correctly precedes gap_end, and gap_end + * takes on the new aligned value that has now been decremented by the requested size. + */ + *gap_start = mas.index; + *gap_end = new_gap_end; + return true; +} + /** * align_and_check() - Align the specified pointer to the provided alignment and - * check that it is still in range. - * @gap_end: Highest possible start address for allocation (end of gap in - * address space) - * @gap_start: Start address of current memory area / gap in address space - * @info: vm_unmapped_area_info structure passed to caller, containing - * alignment, length and limits for the allocation - * @is_shader_code: True if the allocation is for shader code (which has - * additional alignment requirements) - * @is_same_4gb_page: True if the allocation needs to reside completely within - * a 4GB chunk + * check that it is still in range. On kernel 6.1 onwards + * this function does not require that the initial requested + * gap is extended with the maximum size needed to guarantee + * an alignment. + * @gap_end: Highest possible start address for allocation (end of gap in + * address space) + * @gap_start: Start address of current memory area / gap in address space + * @info: vm_unmapped_area_info structure passed to caller, containing + * alignment, length and limits for the allocation + * @is_shader_code: True if the allocation is for shader code (which has + * additional alignment requirements) + * @is_same_4gb_page: True if the allocation needs to reside completely within + * a 4GB chunk + * + * Return: true if gap_end is now aligned correctly and is still in range, + * false otherwise + */ +static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, + struct vm_unmapped_area_info *info, bool is_shader_code, + bool is_same_4gb_page) +{ + unsigned long alignment_shift; + + /* Compute highest gap address at the desired alignment */ + *gap_end -= info->length; + alignment_shift = (*gap_end - info->align_offset) & info->align_mask; + + /* Align desired start VA (gap_end) by calculated alignment shift amount */ + if (!move_mt_gap(&gap_start, gap_end, alignment_shift)) + return false; + /* Alignment is done so far - check for further alignment requirements */ + + if (is_shader_code) { + /* Shader code allocations must not start or end on a 4GB boundary */ + alignment_shift = info->align_offset ? info->align_offset : info->length; + if (0 == (*gap_end & BASE_MEM_MASK_4GB)) { + if (!move_mt_gap(&gap_start, gap_end, alignment_shift)) + return false; + } + if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) { + if (!move_mt_gap(&gap_start, gap_end, alignment_shift)) + return false; + } + + if (!(*gap_end & BASE_MEM_MASK_4GB) || + !((*gap_end + info->length) & BASE_MEM_MASK_4GB)) + return false; + } else if (is_same_4gb_page) { + unsigned long start = *gap_end; + unsigned long end = *gap_end + info->length; + unsigned long mask = ~((unsigned long)U32_MAX); + + /* Check if 4GB boundary is straddled */ + if ((start & mask) != ((end - 1) & mask)) { + unsigned long offset = end - (end & mask); + /* This is to ensure that alignment doesn't get + * disturbed in an attempt to prevent straddling at + * 4GB boundary. The GPU VA is aligned to 2MB when the + * allocation size is > 2MB and there is enough CPU & + * GPU virtual space. + */ + unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1); + + if (!move_mt_gap(&gap_start, gap_end, rounded_offset)) + return false; + /* Re-calculate start and end values */ + start = *gap_end; + end = *gap_end + info->length; + + /* The preceding 4GB boundary shall not get straddled, + * even after accounting for the alignment, as the + * size of allocation is limited to 4GB and the initial + * start location was already aligned. + */ + WARN_ON((start & mask) != ((end - 1) & mask)); + } + } + + if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) + return false; + + return true; +} +#else +/** + * align_and_check() - Align the specified pointer to the provided alignment and + * check that it is still in range. For Kernel versions below + * 6.1, it requires that the length of the alignment is already + * extended by a worst-case alignment mask. + * @gap_end: Highest possible start address for allocation (end of gap in + * address space) + * @gap_start: Start address of current memory area / gap in address space + * @info: vm_unmapped_area_info structure passed to caller, containing + * alignment, length and limits for the allocation + * @is_shader_code: True if the allocation is for shader code (which has + * additional alignment requirements) + * @is_same_4gb_page: True if the allocation needs to reside completely within + * a 4GB chunk * * Return: true if gap_end is now aligned correctly and is still in range, * false otherwise @@ -41,8 +192,8 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, bool is_same_4gb_page) { /* Compute highest gap address at the desired alignment */ - (*gap_end) -= info->length; - (*gap_end) -= (*gap_end - info->align_offset) & info->align_mask; + *gap_end -= info->length; + *gap_end -= (*gap_end - info->align_offset) & info->align_mask; if (is_shader_code) { /* Check for 4GB boundary */ @@ -73,6 +224,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, start -= rounded_offset; end -= rounded_offset; + /* Patch gap_end to use new starting address for VA region */ *gap_end = start; /* The preceding 4GB boundary shall not get straddled, @@ -89,6 +241,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, return true; } +#endif /** * kbase_unmapped_area_topdown() - allocates new areas top-down from @@ -218,31 +371,27 @@ check_current: } } #else - unsigned long length, high_limit, gap_start, gap_end; + unsigned long high_limit, gap_start, gap_end; MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); - /* Adjust search length to account for worst case alignment overhead */ - length = info->length + info->align_mask; - if (length < info->length) - return -ENOMEM; /* * Adjust search limits by the desired length. * See implementation comment at top of unmapped_area(). */ gap_end = info->high_limit; - if (gap_end < length) + if (gap_end < info->length) return -ENOMEM; - high_limit = gap_end - length; + high_limit = gap_end - info->length; if (info->low_limit > high_limit) return -ENOMEM; while (true) { - if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length)) + if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, info->length)) return -ENOMEM; gap_end = mas.last + 1; - gap_start = mas.min; + gap_start = mas.index; if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page)) return gap_end; @@ -368,7 +517,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, kbase_gpu_vm_unlock(kctx); #ifndef CONFIG_64BIT } else { - return current->mm->get_unmapped_area(kctx->kfile->filp, addr, len, pgoff, flags); + return current->mm->get_unmapped_area(kctx->filp, addr, len, pgoff, flags); #endif } diff --git a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c index a91278dd3bef..f254aa84dc20 100644 --- a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c +++ b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,7 +35,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) u32 const num_sb_entries = kbdev->gpu_props.gpu_id.arch_major >= 11 ? 16 : 8; u32 const supports_gpu_sleep = #ifdef KBASE_PM_RUNTIME - kbdev->pm.backend.gpu_sleep_supported; + test_bit(KBASE_GPU_SUPPORTS_GPU_SLEEP, &kbdev->pm.backend.gpu_sleep_allowed); #else false; #endif /* KBASE_PM_RUNTIME */ diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c index d98e22880419..719e26124409 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,7 @@ #include #include #include +#include /* Explicitly include epoll header for old kernels. Not required from 4.16. */ #if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE @@ -169,7 +170,7 @@ static inline int copy_stream_header(char __user *buffer, size_t size, ssize_t * const char *hdr, size_t hdr_size, size_t *hdr_btc) { const size_t offset = hdr_size - *hdr_btc; - const size_t copy_size = MIN((size_t)((ssize_t)size - *copy_len), *hdr_btc); + const size_t copy_size = MIN(size_sub((ssize_t)size, *copy_len), *hdr_btc); if (!*hdr_btc) return 0; diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c index 742735846d49..d4465c44addb 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -62,6 +62,7 @@ enum tl_msg_id_obj { KBASE_TL_EVENT_ATOM_SOFTJOB_START, KBASE_TL_EVENT_ATOM_SOFTJOB_END, KBASE_TL_ARBITER_GRANTED, + KBASE_TL_ARBITER_LOST, KBASE_TL_ARBITER_STARTED, KBASE_TL_ARBITER_STOP_REQUESTED, KBASE_TL_ARBITER_STOPPED, @@ -272,6 +273,10 @@ enum tl_msg_id_obj { "Arbiter has granted gpu access", \ "@p", \ "gpu") \ + TRACEPOINT_DESC(KBASE_TL_ARBITER_LOST, \ + "Received a gpu lost event from the arbiter", \ + "@p", \ + "gpu") \ TRACEPOINT_DESC(KBASE_TL_ARBITER_STARTED, \ "Driver is running again and able to process jobs", \ "@p", \ @@ -1546,6 +1551,29 @@ void __kbase_tlstream_tl_arbiter_granted( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_arbiter_lost( + struct kbase_tlstream *stream, + const void *gpu +) +{ + const u32 msg_id = KBASE_TL_ARBITER_LOST; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + void __kbase_tlstream_tl_arbiter_started( struct kbase_tlstream *stream, const void *gpu diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h index b2cbfe6e528d..6dd4b44ea6b2 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -271,6 +271,11 @@ void __kbase_tlstream_tl_arbiter_granted( const void *gpu ); +void __kbase_tlstream_tl_arbiter_lost( + struct kbase_tlstream *stream, + const void *gpu +); + void __kbase_tlstream_tl_arbiter_started( struct kbase_tlstream *stream, const void *gpu @@ -1550,6 +1555,25 @@ struct kbase_tlstream; ); \ } while (0) +/** + * KBASE_TLSTREAM_TL_ARBITER_LOST - Received a gpu lost event from the arbiter + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_ARBITER_LOST( \ + kbdev, \ + gpu \ + ) \ + do { \ + u32 enabled = (u32)atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_arbiter_lost( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu \ + ); \ + } while (0) + /** * KBASE_TLSTREAM_TL_ARBITER_STARTED - Driver is running again and able to process jobs * diff --git a/drivers/hwtracing/coresight/mali/Makefile b/drivers/hwtracing/coresight/mali/Makefile index 923cb0c910d9..d8186bee6e64 100644 --- a/drivers/hwtracing/coresight/mali/Makefile +++ b/drivers/hwtracing/coresight/mali/Makefile @@ -79,9 +79,9 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) endif EXTRA_SYMBOLS += \ - $(M)/../../../base/arm/Module.symvers \ $(GPU_SYMBOLS) + # The following were added to align with W=1 in scripts/Makefile.extrawarn # from the Linux source tree CFLAGS_MODULE += -Wall -Werror @@ -99,6 +99,8 @@ CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation) CFLAGS_MODULE += -Wno-missing-field-initializers CFLAGS_MODULE += -Wno-sign-compare CFLAGS_MODULE += -Wno-type-limits +# The following ensures the stack frame does not get larger than a page +CFLAGS_MODULE += -Wframe-larger-than=4096 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 diff --git a/drivers/hwtracing/coresight/mali/build.bp b/drivers/hwtracing/coresight/mali/build.bp index 33dcd22fa364..d69148c8cb70 100644 --- a/drivers/hwtracing/coresight/mali/build.bp +++ b/drivers/hwtracing/coresight/mali/build.bp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c index e6d2dc71096b..247a8b47f05b 100644 --- a/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c +++ b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,7 +50,11 @@ static void coresight_mali_disable_source(struct coresight_device *csdev, struct } static const struct coresight_ops_source coresight_mali_source_ops = { +#if KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE + .cpu_id = coresight_mali_source_trace_id, +#else .trace_id = coresight_mali_source_trace_id, +#endif .enable = coresight_mali_enable_source, .disable = coresight_mali_disable_source }; diff --git a/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c b/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c index 59d5cd314c2f..727e5c7a552a 100644 --- a/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c +++ b/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -85,14 +85,14 @@ static struct kbase_debug_coresight_csf_op dwt_itm_enable_ops[] = { }; static struct kbase_debug_coresight_csf_op dwt_itm_disable_ops[] = { - // Disable ITM/DWT functionality via DEMCR register - WRITE_IMM_OP(CS_SCS_BASE_ADDR + SCS_DEMCR, 0x00000000), // Unlock ITM configuration WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT), // Check ITM is disabled POLL_OP(CS_ITM_BASE_ADDR + ITM_TCR, ITM_TCR_BUSY_BIT, 0x0), // Lock WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, 0x00000000), + // Disable ITM/DWT functionality via DEMCR register + WRITE_IMM_OP(CS_SCS_BASE_ADDR + SCS_DEMCR, 0x00000000), // Set enabled bit off at the end of sequence BIT_AND_OP(&itm_state.enabled, 0x0), }; diff --git a/drivers/xen/arm/Makefile b/drivers/xen/arm/Makefile index b2ee53723428..27bee59ac787 100644 --- a/drivers/xen/arm/Makefile +++ b/drivers/xen/arm/Makefile @@ -78,6 +78,8 @@ CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation) CFLAGS_MODULE += -Wno-missing-field-initializers CFLAGS_MODULE += -Wno-sign-compare CFLAGS_MODULE += -Wno-type-limits +# The following ensures the stack frame does not get larger than a page +CFLAGS_MODULE += -Wframe-larger-than=4096 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 diff --git a/include/linux/mali_arbiter_interface.h b/include/linux/mali_arbiter_interface.h index b4162f86ebb4..ae44e82ae6dd 100644 --- a/include/linux/mali_arbiter_interface.h +++ b/include/linux/mali_arbiter_interface.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,6 +26,8 @@ #ifndef _MALI_KBASE_ARBITER_INTERFACE_H_ #define _MALI_KBASE_ARBITER_INTERFACE_H_ +#include + /** * DOC: Mali arbiter interface version * diff --git a/include/linux/mali_hw_access.h b/include/linux/mali_hw_access.h new file mode 100644 index 000000000000..106393fc3372 --- /dev/null +++ b/include/linux/mali_hw_access.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_HW_ACCESS_H_ +#define _MALI_HW_ACCESS_H_ + +#include +#include + + +#define mali_readl(addr) readl(addr) + +#define mali_writel(val, addr) writel(val, addr) + +#define mali_ioremap(addr, size) ioremap(addr, size) + +#define mali_iounmap(addr) iounmap(addr) + +#define mali_arch_timer_get_cntfrq() arch_timer_get_cntfrq() + + +#define mali_readq(addr) ((u64)mali_readl(addr) | ((u64)mali_readl(addr + 4) << 32)) + +static inline u64 mali_readq_coherent(const void __iomem *addr) +{ + u32 hi1, hi2, lo; + + do { + hi1 = mali_readl(addr + 4); + lo = mali_readl(addr); + hi2 = mali_readl(addr + 4); + } while (hi1 != hi2); + + return lo | (((u64)hi1) << 32); +} + +#define mali_writeq(val, addr) \ + do { \ + u64 __val = (u64)val; \ + mali_writel((u32)(__val & 0xFFFFFFFF), addr); \ + mali_writel((u32)(__val >> 32), addr + 4); \ + } while (0) + +#endif /* _MALI_HW_ACCESS_H_ */ diff --git a/include/linux/memory_group_manager.h b/include/linux/memory_group_manager.h index 3820f1bff86b..ec55d74f56ad 100644 --- a/include/linux/memory_group_manager.h +++ b/include/linux/memory_group_manager.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,9 +32,19 @@ typedef int vm_fault_t; #define MEMORY_GROUP_MANAGER_NR_GROUPS (16) +#define PTE_PBHA_SHIFT (59) +#define PTE_PBHA_MASK ((uint64_t)0xf << PTE_PBHA_SHIFT) +#define PTE_RES_BIT_MULTI_AS_SHIFT (63) +#define PTE_FLAGS_NONE (0) +#define PBHA_ID_DEFAULT (0) + struct memory_group_manager_device; struct memory_group_manager_import_data; +enum mgm_pte_flags { + MMA_VIOLATION = 0, +}; + /** * struct memory_group_manager_ops - Callbacks for memory group manager * operations @@ -46,6 +56,8 @@ struct memory_group_manager_import_data; * @mgm_pte_to_original_pte: Callback to get the original PTE entry as given * to mgm_update_gpu_pte * @mgm_vmf_insert_pfn_prot: Callback to map a physical memory page for the CPU + * @mgm_get_import_memory_cached_access_permitted: Callback to query if a given imported + * memory is allowed to be accessed as cached or not by the GPU */ struct memory_group_manager_ops { /* @@ -115,6 +127,11 @@ struct memory_group_manager_ops { * @group_id: A physical memory group ID. The meaning of this is * defined by the systems integrator. Its valid range is * 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * + * @pbha_id: PBHA Overrride ID to encode into the PTE + * @pte_flags: PTE related flags, defined in enum mgm_pte_flags + * + * * @mmu_level: The level of the page table entry in @ate. * @pte: The page table entry to modify, in LPAE or AArch64 format * (depending on the driver's configuration). This should be @@ -124,13 +141,14 @@ struct memory_group_manager_ops { * This function allows the memory group manager to modify a GPU page * table entry before it is stored by the kbase module (controller * driver). It may set certain bits in the page table entry attributes - * or modify the physical address, based on the physical memory group ID - * and/or additional data in struct memory_group_manager_device. + * or modify the physical address, based on the physical memory group ID, + * PBHA ID, PTE flags and/or additional data in struct memory_group_manager_device. * * Return: A modified GPU page table entry to be stored in a page table. */ u64 (*mgm_update_gpu_pte)(struct memory_group_manager_device *mgm_dev, - unsigned int group_id, int mmu_level, u64 pte); + unsigned int group_id, unsigned int pbha_id, + unsigned int pte_flags, int mmu_level, u64 pte); /* * mgm_pte_to_original_pte - Undo any modification done during mgm_update_gpu_pte() @@ -178,6 +196,20 @@ struct memory_group_manager_ops { unsigned int group_id, struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot); + + /* + * mgm_get_import_memory_cached_access_permitted - Check if a given imported memory + * is allowed to be accessed as cached or not by the GPU + * + * @mgm_dev: The memory group manager through which the request + * is being made. + * @import_data: Pointer to the data which describes imported memory. + * + * Return: true if cached access is permitted, false otherwise + */ + bool (*mgm_get_import_memory_cached_access_permitted)( + struct memory_group_manager_device *mgm_dev, + struct memory_group_manager_import_data *import_data); }; /** diff --git a/include/linux/version_compat_defs.h b/include/linux/version_compat_defs.h index 9144b719b08d..3a5b5fe405ee 100644 --- a/include/linux/version_compat_defs.h +++ b/include/linux/version_compat_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,6 +26,9 @@ #include #include #include +#include +#include +#include #if (KERNEL_VERSION(4, 4, 267) < LINUX_VERSION_CODE) #include @@ -176,6 +179,7 @@ static inline void kbase_kunmap_atomic(void *address) */ #define check_mul_overflow(a, b, d) __builtin_mul_overflow(a, b, d) #define check_add_overflow(a, b, d) __builtin_add_overflow(a, b, d) +#define check_sub_overflow(a, b, d) __builtin_sub_overflow(a, b, d) #endif /* @@ -337,4 +341,173 @@ static inline long kbase_pin_user_pages_remote(struct task_struct *tsk, struct m #endif /* (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) */ +#if (KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE) +/* Null definition */ +#define ALLOW_ERROR_INJECTION(fname, err_type) +#endif /* (KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE) */ + +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE +#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) register_shrinker(reclaim) + +/* clang-format off */ +#elif ((KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE) && \ + !(defined(__ANDROID_COMMON_KERNEL__) && (KERNEL_VERSION(6, 6, 0) == LINUX_VERSION_CODE))) +/* clang-format on */ +#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) register_shrinker(reclaim, name) + +#else +#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) \ + do { \ + reclaim->private_data = priv_data; \ + shrinker_register(reclaim); \ + } while (0) + +#endif /* KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE */ +/* clang-format off */ +#if ((KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE) && \ + !(defined(__ANDROID_COMMON_KERNEL__) && (KERNEL_VERSION(6, 6, 0) == LINUX_VERSION_CODE))) +/* clang-format on */ +#define KBASE_UNREGISTER_SHRINKER(reclaim) unregister_shrinker(&reclaim) +#define KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, type, var) container_of(s, type, var) +#define DEFINE_KBASE_SHRINKER struct shrinker +#define KBASE_INIT_RECLAIM(var, attr, name) (&((var)->attr)) +#define KBASE_SET_RECLAIM(var, attr, reclaim) ((var)->attr = (*reclaim)) + +#else +#define KBASE_UNREGISTER_SHRINKER(reclaim) shrinker_free(reclaim) +#define KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, type, var) s->private_data +#define DEFINE_KBASE_SHRINKER struct shrinker * +#define KBASE_SHRINKER_ALLOC(name) shrinker_alloc(0, name) +#define KBASE_INIT_RECLAIM(var, attr, name) (KBASE_SHRINKER_ALLOC(name)) +#define KBASE_SET_RECLAIM(var, attr, reclaim) ((var)->attr = reclaim) + +#endif + +static inline int kbase_param_set_uint_minmax(const char *val, const struct kernel_param *kp, + unsigned int min, unsigned int max) +{ +#if (KERNEL_VERSION(5, 15, 0) > LINUX_VERSION_CODE) + uint uint_val; + int ret; + + if (!val) + return -EINVAL; + + ret = kstrtouint(val, 0, &uint_val); + + if (ret == 0) { + if (uint_val < min || uint_val > max) + return -EINVAL; + + *((uint *)kp->arg) = uint_val; + } + + return ret; +#else + return param_set_uint_minmax(val, kp, min, max); +#endif +} + +#if (KERNEL_VERSION(4, 20, 0) <= LINUX_VERSION_CODE) +#include +#endif +#ifndef __maybe_unused +#define __maybe_unused __attribute__((unused)) +#endif + +#if KERNEL_VERSION(5, 4, 103) <= LINUX_VERSION_CODE +#define mali_sysfs_emit(buf, fmt, ...) sysfs_emit(buf, fmt, __VA_ARGS__) +#else +#define mali_sysfs_emit(buf, fmt, ...) scnprintf(buf, PAGE_SIZE, fmt, __VA_ARGS__) +#endif + +#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE +#include +#include + +static inline struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node) +{ + struct platform_device *pdev = of_find_device_by_node(node); + + if (!pdev || !node) + return NULL; + + return devfreq_get_devfreq_by_phandle(&pdev->dev, 0); +} +#endif + +#if (KERNEL_VERSION(5, 16, 0) <= LINUX_VERSION_CODE && \ + KERNEL_VERSION(5, 18, 0) > LINUX_VERSION_CODE) || \ + (KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE && \ + KERNEL_VERSION(5, 15, 85) >= LINUX_VERSION_CODE) || \ + (KERNEL_VERSION(5, 10, 200) >= LINUX_VERSION_CODE) +/* + * Kernel revisions + * - up to 5.10.200 + * - between 5.11.0 and 5.15.85 inclusive + * - between 5.16.0 and 5.17.15 inclusive + * do not provide an implementation of + * size_add, size_sub and size_mul. + * The implementations below provides + * backward compatibility implementations of these functions. + */ + +static inline size_t __must_check size_mul(size_t factor1, size_t factor2) +{ + size_t ret_val; + + if (check_mul_overflow(factor1, factor2, &ret_val)) + return SIZE_MAX; + return ret_val; +} + +static inline size_t __must_check size_add(size_t addend1, size_t addend2) +{ + size_t ret_val; + + if (check_add_overflow(addend1, addend2, &ret_val)) + return SIZE_MAX; + return ret_val; +} + +static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) +{ + size_t ret_val; + + if (minuend == SIZE_MAX || subtrahend == SIZE_MAX || + check_sub_overflow(minuend, subtrahend, &ret_val)) + return SIZE_MAX; + return ret_val; +} +#endif + +#if KERNEL_VERSION(5, 5, 0) > LINUX_VERSION_CODE +static inline unsigned long bitmap_get_value8(const unsigned long *map, unsigned long start) +{ + const size_t index = BIT_WORD(start); + const unsigned long offset = start % BITS_PER_LONG; + + return (map[index] >> offset) & 0xFF; +} + +static inline unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr, + unsigned long size, unsigned long offset) +{ + offset = find_next_bit(addr, size, offset); + if (offset == size) + return size; + + offset = round_down(offset, 8); + *clump = bitmap_get_value8(addr, offset); + + return offset; +} + +#define find_first_clump8(clump, bits, size) find_next_clump8((clump), (bits), (size), 0) + +#define for_each_set_clump8(start, clump, bits, size) \ + for ((start) = find_first_clump8(&(clump), (bits), (size)); (start) < (size); \ + (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8)) +#endif + #endif /* _VERSION_COMPAT_DEFS_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h index 564f477e57d1..b80817f04255 100644 --- a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h +++ b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h index 0fb824267184..2b2fd1dd7bc8 100644 --- a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h +++ b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,7 +46,11 @@ */ #define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19) -#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20) +/* Unused bit for CSF, only used in JM for BASE_MEM_TILER_ALIGN_TOP */ +#define BASE_MEM_UNUSED_BIT_20 ((base_mem_alloc_flags)1 << 20) + +/* Unused bit for CSF, only used in JM for BASE_MEM_FLAG_MAP_FIXED */ +#define BASE_MEM_UNUSED_BIT_27 ((base_mem_alloc_flags)1 << 27) /* Must be FIXABLE memory: its GPU VA will be determined at a later point, * at which time it will be at a fixed GPU VA. @@ -57,14 +61,21 @@ * must be less than BASE_MEM_FLAGS_NR_BITS !!! */ -/* A mask of all the flags which are only valid for allocations within kbase, - * and may not be passed from user space. +/* A mask of all the flags which are only valid within kbase, + * and may not be passed to/from user space. */ #define BASEP_MEM_FLAGS_KERNEL_ONLY (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE) -/* A mask of all currently reserved flags +/* A mask of flags that, when provied, cause other flags to be + * enabled but are not enabled themselves */ -#define BASE_MEM_FLAGS_RESERVED BASE_MEM_RESERVED_BIT_20 +#define BASE_MEM_FLAGS_ACTION_MODIFIERS (BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED) + +/* A mask of all currently reserved flags */ +#define BASE_MEM_FLAGS_RESERVED ((base_mem_alloc_flags)0) + +/* A mask of all bits that are not used by a flag on CSF */ +#define BASE_MEM_FLAGS_UNUSED (BASE_MEM_UNUSED_BIT_20 | BASE_MEM_UNUSED_BIT_27) /* Special base mem handles specific to CSF. */ @@ -474,7 +485,26 @@ struct base_gpu_queue_error_fatal_payload { }; /** - * enum base_gpu_queue_group_error_type - GPU Fatal error type. + * struct base_gpu_queue_error_fault_payload - Recoverable fault + * error information related to GPU command queue. + * + * @sideband: Additional information about this recoverable fault. + * @status: Recoverable fault information. + * This consists of exception type (least significant byte) and + * data (remaining bytes). One example of exception type is + * INSTR_INVALID_PC (0x50). + * @csi_index: Index of the CSF interface the queue is bound to. + * @padding: Padding to make multiple of 64bits + */ +struct base_gpu_queue_error_fault_payload { + __u64 sideband; + __u32 status; + __u8 csi_index; + __u8 padding[3]; +}; + +/** + * enum base_gpu_queue_group_error_type - GPU error type. * * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL: Fatal error associated with GPU * command queue group. @@ -484,7 +514,9 @@ struct base_gpu_queue_error_fatal_payload { * progress timeout. * @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out * of tiler heap memory. - * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of fatal error types + * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT: Fault error associated with GPU + * command queue. + * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of GPU error types * * This type is used for &struct_base_gpu_queue_group_error.error_type. */ @@ -493,6 +525,7 @@ enum base_gpu_queue_group_error_type { BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, + BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT, BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT }; @@ -512,6 +545,7 @@ struct base_gpu_queue_group_error { union { struct base_gpu_queue_group_error_fatal_payload fatal_group; struct base_gpu_queue_error_fatal_payload fatal_queue; + struct base_gpu_queue_error_fault_payload fault_queue; } payload; }; @@ -519,8 +553,7 @@ struct base_gpu_queue_group_error { * enum base_csf_notification_type - Notification type * * @BASE_CSF_NOTIFICATION_EVENT: Notification with kernel event - * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU fatal - * error + * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU error * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP: Notification with dumping cpu * queue * @BASE_CSF_NOTIFICATION_COUNT: The number of notification type diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h index 537c90d6efa5..2b5b8b25fc2c 100644 --- a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,15 +95,35 @@ * 1.22: * - Add comp_pri_threshold and comp_pri_ratio attributes to * kbase_ioctl_cs_queue_group_create. + * - Made the BASE_MEM_DONT_NEED memory flag queryable. * 1.23: * - Disallows changing the sharability on the GPU of imported dma-bufs to * BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE. * 1.24: * - Implement full block state support for hardware counters. + * 1.25: + * - Add support for CS_FAULT reporting to userspace + * 1.26: + * - Made the BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP and BASE_MEM_KERNEL_SYNC memory + * flags queryable. + * 1.27: + * - Implement support for HWC block state availability. + * 1.28: + * - Made the SAME_VA memory flag queryable. + * 1.29: + * - Re-allow child process to do supported file operations (like mmap, ioctl + * read, poll) on the file descriptor of mali device that was inherited + * from the parent process. + * 1.30: + * - Implement support for setting GPU Timestamp Offset register. + * 1.31: + * - Reject non-protected allocations containing the BASE_MEM_PROTECTED memory flag. + * - Reject allocations containing the BASE_MEM_DONT_NEED memory flag (it is only settable). + * - Reject allocations containing the BASE_MEM_UNUSED_BIT_xx memory flags. */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 24 +#define BASE_UK_VERSION_MINOR 31 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -340,6 +360,8 @@ union kbase_ioctl_cs_queue_group_create_1_18 { * @in.csi_handlers: Flags to signal that the application intends to use CSI * exception handlers in some linear buffers to deal with * the given exception types. + * @in.cs_fault_report_enable: Flag to indicate reporting of CS_FAULTs + * to userspace. * @in.padding: Currently unused, must be zero * @out: Output parameters * @out.group_handle: Handle of a newly created queue group. @@ -360,7 +382,8 @@ union kbase_ioctl_cs_queue_group_create { /** * @in.reserved: Reserved, currently unused, must be zero. */ - __u16 reserved; + __u8 reserved; + __u8 cs_fault_report_enable; /** * @in.dvs_buf: buffer for deferred vertex shader */ @@ -480,7 +503,7 @@ union kbase_ioctl_cs_tiler_heap_init { /** * union kbase_ioctl_cs_tiler_heap_init_1_13 - Initialize chunked tiler memory heap, - * earlier version upto 1.13 + * earlier version up to 1.13 * @in: Input parameters * @in.chunk_size: Size of each chunk. * @in.initial_chunks: Initial number of chunks that heap will be created with. @@ -637,6 +660,22 @@ union kbase_ioctl_read_user_page { #define KBASE_IOCTL_READ_USER_PAGE _IOWR(KBASE_IOCTL_TYPE, 60, union kbase_ioctl_read_user_page) +/** + * struct kbase_ioctl_queue_group_clear_faults - Re-enable CS FAULT reporting for the GPU queues + * + * @addr: CPU VA to an array of GPU VAs of the buffers backing the queues + * @nr_queues: Number of queues in the array + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_queue_group_clear_faults { + __u64 addr; + __u32 nr_queues; + __u8 padding[4]; +}; + +#define KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS \ + _IOW(KBASE_IOCTL_TYPE, 61, struct kbase_ioctl_queue_group_clear_faults) + /*************** * test ioctls * ***************/ diff --git a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h index d3478546e244..d4d12aed780d 100644 --- a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h +++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h b/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h index 9478334ce667..fad61299b1c1 100644 --- a/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h +++ b/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,15 +30,11 @@ * See base_mem_alloc_flags. */ -/* Used as BASE_MEM_FIXED in other backends */ -#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) +/* Unused bit for JM, only used in CSF for BASE_MEM_FIXED */ +#define BASE_MEM_UNUSED_BIT_8 ((base_mem_alloc_flags)1 << 8) -/** - * BASE_MEM_RESERVED_BIT_19 - Bit 19 is reserved. - * - * Do not remove, use the next unreserved bit for new flags - */ -#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) +/* Unused bit for JM, only used in CSF for BASE_CSF_EVENT */ +#define BASE_MEM_UNUSED_BIT_19 ((base_mem_alloc_flags)1 << 19) /** * BASE_MEM_TILER_ALIGN_TOP - Memory starting from the end of the initial commit is aligned @@ -57,16 +53,23 @@ * must be less than BASE_MEM_FLAGS_NR_BITS !!! */ -/* A mask of all the flags which are only valid for allocations within kbase, - * and may not be passed from user space. +/* A mask of all the flags which are only valid within kbase, + * and may not be passed to/from user space. */ #define BASEP_MEM_FLAGS_KERNEL_ONLY \ (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | BASE_MEM_FLAG_MAP_FIXED | \ BASEP_MEM_PERFORM_JIT_TRIM) -/* A mask of all currently reserved flags +/* A mask of flags that, when provied, cause other flags to be + * enabled but are not enabled themselves */ -#define BASE_MEM_FLAGS_RESERVED (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) +#define BASE_MEM_FLAGS_ACTION_MODIFIERS (BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED) + +/* A mask of all currently reserved flags */ +#define BASE_MEM_FLAGS_RESERVED ((base_mem_alloc_flags)0) + +/* A mask of all bits that are not used by a flag on JM */ +#define BASE_MEM_FLAGS_UNUSED (BASE_MEM_UNUSED_BIT_8 | BASE_MEM_UNUSED_BIT_19) /* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the * initial commit is aligned to 'extension' pages, where 'extension' must be a power @@ -119,10 +122,6 @@ */ #define BASE_JD_ATOM_COUNT 256 -/* Maximum number of concurrent render passes. - */ -#define BASE_JD_RP_COUNT (256) - /* Set/reset values for a software event */ #define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) #define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) @@ -362,40 +361,6 @@ typedef __u32 base_jd_core_req; */ #define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17) -/* SW-only requirement: The atom is the start of a renderpass. - * - * If this bit is set then the job chain will be soft-stopped if it causes the - * GPU to write beyond the end of the physical pages backing the tiler heap, and - * committing more memory to the heap would exceed an internal threshold. It may - * be resumed after running one of the job chains attached to an atom with - * BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be - * resumed multiple times until it completes without memory usage exceeding the - * threshold. - * - * Usually used with BASE_JD_REQ_T. - */ -#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18) - -/* SW-only requirement: The atom is the end of a renderpass. - * - * If this bit is set then the atom incorporates the CPU address of a - * base_jd_fragment object instead of the GPU address of a job chain. - * - * Which job chain is run depends upon whether the atom with the same renderpass - * ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or - * was soft-stopped when it exceeded an upper threshold for tiler heap memory - * usage. - * - * It also depends upon whether one of the job chains attached to the atom has - * already been run as part of the same renderpass (in which case it would have - * written unresolved multisampled and otherwise-discarded output to temporary - * buffers that need to be read back). The job chain for doing a forced read and - * forced write (from/to temporary buffers) is run as many times as necessary. - * - * Usually used with BASE_JD_REQ_FS. - */ -#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19) - /* SW-only requirement: The atom needs to run on a limited core mask affinity. * * If this bit is set then the kbase_context.limited_core_mask will be applied @@ -411,7 +376,6 @@ typedef __u32 base_jd_core_req; BASE_JD_REQ_EVENT_COALESCE | BASE_JD_REQ_COHERENT_GROUP | \ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | BASE_JD_REQ_JOB_SLOT | \ - BASE_JD_REQ_START_RENDERPASS | BASE_JD_REQ_END_RENDERPASS | \ BASE_JD_REQ_LIMITED_CORE_MASK)) /* Mask of all bits in base_jd_core_req that control the type of the atom. @@ -470,62 +434,6 @@ struct base_dependency { base_jd_dep_type dependency_type; }; -/** - * struct base_jd_fragment - Set of GPU fragment job chains used for rendering. - * - * @norm_read_norm_write: Job chain for full rendering. - * GPU address of a fragment job chain to render in the - * circumstance where the tiler job chain did not exceed - * its memory usage threshold and no fragment job chain - * was previously run for the same renderpass. - * It is used no more than once per renderpass. - * @norm_read_forced_write: Job chain for starting incremental - * rendering. - * GPU address of a fragment job chain to render in - * the circumstance where the tiler job chain exceeded - * its memory usage threshold for the first time and - * no fragment job chain was previously run for the - * same renderpass. - * Writes unresolved multisampled and normally- - * discarded output to temporary buffers that must be - * read back by a subsequent forced_read job chain - * before the renderpass is complete. - * It is used no more than once per renderpass. - * @forced_read_forced_write: Job chain for continuing incremental - * rendering. - * GPU address of a fragment job chain to render in - * the circumstance where the tiler job chain - * exceeded its memory usage threshold again - * and a fragment job chain was previously run for - * the same renderpass. - * Reads unresolved multisampled and - * normally-discarded output from temporary buffers - * written by a previous forced_write job chain and - * writes the same to temporary buffers again. - * It is used as many times as required until - * rendering completes. - * @forced_read_norm_write: Job chain for ending incremental rendering. - * GPU address of a fragment job chain to render in the - * circumstance where the tiler job chain did not - * exceed its memory usage threshold this time and a - * fragment job chain was previously run for the same - * renderpass. - * Reads unresolved multisampled and normally-discarded - * output from temporary buffers written by a previous - * forced_write job chain in order to complete a - * renderpass. - * It is used no more than once per renderpass. - * - * This structure is referenced by the main atom structure if - * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req. - */ -struct base_jd_fragment { - __u64 norm_read_norm_write; - __u64 norm_read_forced_write; - __u64 forced_read_forced_write; - __u64 forced_read_norm_write; -}; - /** * typedef base_jd_prio - Base Atom priority. * @@ -590,9 +498,7 @@ typedef __u8 base_jd_prio; * struct base_jd_atom_v2 - Node of a dependency graph used to submit a * GPU job chain or soft-job to the kernel driver. * - * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS - * is set in the base_jd_core_req) the CPU address of a - * base_jd_fragment object. + * @jc: GPU address of a job chain. * @udata: User data. * @extres_list: List of external resources. * @nr_extres: Number of external resources or JIT allocations. @@ -611,9 +517,6 @@ typedef __u8 base_jd_prio; * specified. * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. * @core_req: Core requirements. - * @renderpass_id: Renderpass identifier used to associate an atom that has - * BASE_JD_REQ_START_RENDERPASS set in its core requirements - * with an atom that has BASE_JD_REQ_END_RENDERPASS set. * @padding: Unused. Must be zero. * * This structure has changed since UK 10.2 for which base_jd_core_req was a @@ -641,8 +544,7 @@ struct base_jd_atom_v2 { __u8 device_nr; __u8 jobslot; base_jd_core_req core_req; - __u8 renderpass_id; - __u8 padding[7]; + __u8 padding[8]; }; /** @@ -650,9 +552,7 @@ struct base_jd_atom_v2 { * at the beginning. * * @seq_nr: Sequence number of logical grouping of atoms. - * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS - * is set in the base_jd_core_req) the CPU address of a - * base_jd_fragment object. + * @jc: GPU address of a job chain. * @udata: User data. * @extres_list: List of external resources. * @nr_extres: Number of external resources or JIT allocations. @@ -834,11 +734,6 @@ enum { * @BASE_JD_EVENT_REMOVED_FROM_NEXT: raised when an atom that was configured in * the GPU has to be retried (but it has not * started) due to e.g., GPU reset - * @BASE_JD_EVENT_END_RP_DONE: this is used for incremental rendering to signal - * the completion of a renderpass. This value - * shouldn't be returned to userspace but I haven't - * seen where it is reset back to JD_EVENT_DONE. - * * HW and low-level SW events are represented by event codes. * The status of jobs which succeeded are also represented by * an event code (see @BASE_JD_EVENT_DONE). @@ -937,8 +832,6 @@ enum base_jd_event_code { BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000, BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | - BASE_JD_SW_EVENT_JOB | 0x001, BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h index 2a7a06a995be..34da87860ee2 100644 --- a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -149,15 +149,31 @@ * from the parent process. * 11.40: * - Remove KBASE_IOCTL_HWCNT_READER_SETUP and KBASE_HWCNT_READER_* ioctls. + * - Made the BASE_MEM_DONT_NEED memory flag queryable. * 11.41: * - Disallows changing the sharability on the GPU of imported dma-bufs to * BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE. * 11.42: * - Implement full block state support for hardware counters. - */ + * 11.43: + * - Made the BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP and BASE_MEM_KERNEL_SYNC memory + * flags queryable. + * 11.44: + * - Made the SAME_VA memory flag queryable. + * 11.45: + * - Re-allow child process to do supported file operations (like mmap, ioctl + * read, poll) on the file descriptor of mali device that was inherited + * from the parent process. + * 11.46: + * - Remove renderpass_id from base_jd_atom_v2 to deprecate support for JM Incremental Rendering + * 11.47: + * - Reject non-protected allocations containing the BASE_MEM_PROTECTED memory flag. + * - Reject allocations containing the BASE_MEM_DONT_NEED memory flag (it is only settable). + * - Reject allocations containing the BASE_MEM_UNUSED_BIT_xx memory flags. + */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 42 +#define BASE_UK_VERSION_MINOR 47 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/include/uapi/gpu/arm/bifrost/mali_base_common_kernel.h b/include/uapi/gpu/arm/bifrost/mali_base_common_kernel.h index c009d5ddd494..bbbee900415e 100644 --- a/include/uapi/gpu/arm/bifrost/mali_base_common_kernel.h +++ b/include/uapi/gpu/arm/bifrost/mali_base_common_kernel.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -146,7 +146,6 @@ struct base_mem_handle { */ #define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) -/* OUT */ /* Kernel side cache sync ops required */ #define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) @@ -156,12 +155,10 @@ struct base_mem_handle { */ #define BASE_MEM_FLAGS_NR_BITS 30 -/* A mask for all output bits, excluding IN/OUT bits. - */ +/* A mask for all bits that are output from kbase, but never input. */ #define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP -/* A mask for all input bits, including IN/OUT bits. - */ +/* A mask for all bits that can be input to kbase. */ #define BASE_MEM_FLAGS_INPUT_MASK \ (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) diff --git a/include/uapi/gpu/arm/bifrost/mali_base_kernel.h b/include/uapi/gpu/arm/bifrost/mali_base_kernel.h index cb1a1e8dd550..9e7294970efb 100644 --- a/include/uapi/gpu/arm/bifrost/mali_base_kernel.h +++ b/include/uapi/gpu/arm/bifrost/mali_base_kernel.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -74,7 +74,7 @@ * More flags can be added to this list, as long as they don't clash * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). */ -typedef __u32 base_mem_alloc_flags; +typedef __u64 base_mem_alloc_flags; #define BASE_MEM_FLAGS_MODIFIABLE_NATIVE (BASE_MEM_DONT_NEED) @@ -89,10 +89,10 @@ typedef __u32 base_mem_alloc_flags; /* A mask of all the flags that can be returned via the base_mem_get_flags() * interface. */ -#define BASE_MEM_FLAGS_QUERYABLE \ - (BASE_MEM_FLAGS_INPUT_MASK & \ - ~(BASE_MEM_SAME_VA | BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED | \ - BASE_MEM_FLAGS_RESERVED | BASEP_MEM_FLAGS_KERNEL_ONLY)) +#define BASE_MEM_FLAGS_QUERYABLE \ + (BASE_MEM_FLAGS_INPUT_MASK & \ + ~(BASE_MEM_FLAGS_RESERVED | BASE_MEM_FLAGS_UNUSED | BASE_MEM_FLAGS_ACTION_MODIFIERS | \ + BASEP_MEM_FLAGS_KERNEL_ONLY)) /** * enum base_mem_import_type - Memory types supported by @a base_mem_import @@ -619,15 +619,15 @@ struct base_gpu_props { #define BASE_TIMEINFO_TIMESTAMP_FLAG (1U << 1) /* For GPU cycle counter */ #define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1U << 2) -/* Specify kernel GPU register timestamp */ -#define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1U << 30) -/* Specify userspace cntvct_el0 timestamp source */ -#define BASE_TIMEINFO_USER_SOURCE_FLAG (1U << 31) -#define BASE_TIMEREQUEST_ALLOWED_FLAGS \ - (BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG | \ - BASE_TIMEINFO_CYCLE_COUNTER_FLAG | BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ - BASE_TIMEINFO_USER_SOURCE_FLAG) +/* Specify TimeReques flags allowed if time source is cpu/gpu register */ +#define BASE_TIMEREQUEST_CPU_GPU_SRC_ALLOWED_FLAGS \ + (BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG | \ + BASE_TIMEINFO_CYCLE_COUNTER_FLAG) + +/* Specify TimeReques flags allowed if time source is system(user) space */ +#define BASE_TIMEREQUEST_SYSTEM_SRC_ALLOWED_FLAGS \ + (BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG) /* Maximum number of source allocations allowed to create an alias allocation. * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h index d60745f564b0..163637c62297 100644 --- a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -169,7 +169,7 @@ struct kbase_ioctl_hwcnt_reader_setup { * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to. * @data: Counter samples for the dummy model. * @size: Size of the counter sample data. - * @padding: Padding. + * @padding: Currently unused, must be zero */ struct kbase_ioctl_hwcnt_values { __u64 data; @@ -193,7 +193,7 @@ struct kbase_ioctl_disjoint_query { * struct kbase_ioctl_get_ddk_version - Query the kernel version * @version_buffer: Buffer to receive the kernel version string * @size: Size of the buffer - * @padding: Padding + * @padding: Currently unused, must be zero * * The ioctl will return the number of bytes written into version_buffer * (which includes a NULL byte) or a negative error code diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h b/include/uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h index 11c51d9c2993..648c166b1e3d 100644 --- a/include/uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h +++ b/include/uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,6 +30,6 @@ * KBASE_MEM_PROFILE_MAX_BUF_SIZE - The size of the buffer to accumulate the histogram report text * in @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT */ -#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t)(64 + ((80 + (56 * 64)) * 57) + 56)) +#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t)(64 + ((80 + (56 * 64)) * 69) + 56)) #endif /*_UAPI_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/