diff --git a/Documentation/ABI/testing/sysfs-device-mali b/Documentation/ABI/testing/sysfs-device-mali index 1ec265c5add4..12a1667feeb2 100644 --- a/Documentation/ABI/testing/sysfs-device-mali +++ b/Documentation/ABI/testing/sysfs-device-mali @@ -341,8 +341,7 @@ Description: device-driver that supports a CSF GPU. Used to enable firmware logs, logging levels valid values - are indicated using 'min and 'max' attribute values - values that are read-only. + are indicated using 'min' and 'max' attributes, which are read-only. Log level can be set using the 'cur' read, write attribute, we can use a valid log level value from min and max range values diff --git a/Documentation/ABI/testing/sysfs-device-mali-coresight-source b/Documentation/ABI/testing/sysfs-device-mali-coresight-source index 0f31a6acaa87..58d9085b8bb6 100644 --- a/Documentation/ABI/testing/sysfs-device-mali-coresight-source +++ b/Documentation/ABI/testing/sysfs-device-mali-coresight-source @@ -19,7 +19,7 @@ Description: What: /sys/bus/coresight/devices/mali-source-etm/is_enabled Description: - Attribute used to check if Coresight Source ITM is enabled. + Attribute used to check if Coresight Source ETM is enabled. What: /sys/bus/coresight/devices/mali-source-etm/trcconfigr Description: diff --git a/Documentation/devicetree/bindings/arm/mali-bifrost.txt b/Documentation/devicetree/bindings/arm/mali-bifrost.txt index 85672c6c6258..3f80d97b0064 100644 --- a/Documentation/devicetree/bindings/arm/mali-bifrost.txt +++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt @@ -111,7 +111,10 @@ for details. - idvs-group-size : Override the IDVS group size value. Tasks are sent to cores in groups of N + 1, so i.e. 0xF means 16 tasks. Valid values are between 0 to 0x3F (including). -- l2-size : Override L2 cache size on GPU that supports it +- l2-size : Override L2 cache size on GPU that supports it. Value should be larger than the minimum + size 1KiB and smaller than the maximum size. Maximum size is Hardware integration dependent. + The value passed should be of log2(Cache Size in Bytes). + For example for a 1KiB of cache size, 0xa should be passed. - l2-hash : Override L2 hash function on GPU that supports it - l2-hash-values : Override L2 hash function using provided hash values, on GPUs that supports it. It is mutually exclusive with 'l2-hash'. Only one or the other must be @@ -237,7 +240,7 @@ gpu@0xfc010000 { ... pbha { int-id-override = <2 0x32>, <9 0x05>, <16 0x32>; - propagate-bits = /bits/ 4 <0x03>; + propagate-bits = /bits/ 8 <0x03>; }; ... }; diff --git a/drivers/base/arm/Makefile b/drivers/base/arm/Makefile index 4aa68f89d3d9..42071f769729 100644 --- a/drivers/base/arm/Makefile +++ b/drivers/base/arm/Makefile @@ -125,6 +125,8 @@ CFLAGS_MODULE += -Wno-sign-compare CFLAGS_MODULE += -Wno-shift-negative-value # This flag is needed to avoid build errors on older kernels CFLAGS_MODULE += $(call cc-option, -Wno-cast-function-type) +# The following ensures the stack frame does not get larger than a page +CFLAGS_MODULE += -Wframe-larger-than=4096 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 diff --git a/drivers/base/arm/memory_group_manager/memory_group_manager.c b/drivers/base/arm/memory_group_manager/memory_group_manager.c index 389b0f051f3a..c5fba5f1b522 100644 --- a/drivers/base/arm/memory_group_manager/memory_group_manager.c +++ b/drivers/base/arm/memory_group_manager/memory_group_manager.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,10 +51,6 @@ static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigne } #endif -#define PTE_PBHA_SHIFT (59) -#define PTE_PBHA_MASK ((uint64_t)0xf << PTE_PBHA_SHIFT) -#define PTE_RES_BIT_MULTI_AS_SHIFT (63) - #define IMPORTED_MEMORY_ID (MEMORY_GROUP_MANAGER_NR_GROUPS - 1) /** @@ -263,7 +259,7 @@ static struct page *example_mgm_alloc_page(struct memory_group_manager_device *m } else { struct mgm_groups *data = mgm_dev->data; - dev_err(data->dev, "alloc_pages failed\n"); + dev_dbg(data->dev, "alloc_pages failed\n"); } return p; diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild index b35fcee88baa..b19e4abf3e2b 100644 --- a/drivers/gpu/arm/bifrost/Kbuild +++ b/drivers/gpu/arm/bifrost/Kbuild @@ -69,7 +69,7 @@ endif # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= '"g22p0-01eac0"' +MALI_RELEASE_NAME ?= '"g24p0-00eac0"' # Set up defaults if not defined by build system ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) MALI_UNIT_TEST = 1 diff --git a/drivers/gpu/arm/bifrost/Kconfig b/drivers/gpu/arm/bifrost/Kconfig index 22fdfe80405a..685ce4f423ad 100644 --- a/drivers/gpu/arm/bifrost/Kconfig +++ b/drivers/gpu/arm/bifrost/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -70,7 +70,6 @@ config MALI_NO_MALI_DEFAULT_GPU help This option sets the default GPU to identify as for No Mali builds. - endchoice menu "Platform specific options" @@ -214,16 +213,6 @@ config MALI_CORESTACK If unsure, say N. -comment "Platform options" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - -config MALI_BIFROST_ERROR_INJECT - bool "Enable No Mali error injection" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_BIFROST_NO_MALI - default n - help - Enables insertion of errors to test module failure and recovery mechanisms. - comment "Debug options" depends on MALI_BIFROST && MALI_BIFROST_EXPERT @@ -304,7 +293,7 @@ endchoice config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS bool "Enable runtime selection of performance counters set via debugfs" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && DEBUG_FS + depends on MALI_BIFROST && MALI_BIFROST_EXPERT && DEBUG_FS && !MALI_CSF_SUPPORT default n help Select this option to make the secondary set of performance counters diff --git a/drivers/gpu/arm/bifrost/Makefile b/drivers/gpu/arm/bifrost/Makefile index 69dbe3750a10..9b636f58c6bc 100644 --- a/drivers/gpu/arm/bifrost/Makefile +++ b/drivers/gpu/arm/bifrost/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -41,11 +41,12 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) CONFIG_MALI_BIFROST_GATOR_SUPPORT ?= y CONFIG_MALI_ARBITRATION ?= n CONFIG_MALI_PARTITION_MANAGER ?= n - CONFIG_MALI_64BIT_HW_ACCESS ?= n + ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y) - # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=y + # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI!=y CONFIG_MALI_REAL_HW ?= y + else CONFIG_MALI_CORESIGHT = n endif @@ -76,7 +77,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) else # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=n CONFIG_MALI_REAL_HW = y - CONFIG_MALI_BIFROST_ERROR_INJECT = n endif @@ -108,7 +108,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) CONFIG_MALI_JOB_DUMP = n CONFIG_MALI_BIFROST_NO_MALI = n CONFIG_MALI_REAL_HW = y - CONFIG_MALI_BIFROST_ERROR_INJECT = n CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n @@ -171,7 +170,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) CONFIG_MALI_PWRSOFT_765 \ CONFIG_MALI_JOB_DUMP \ CONFIG_MALI_BIFROST_NO_MALI \ - CONFIG_MALI_BIFROST_ERROR_INJECT \ CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \ CONFIG_MALI_PRFCNT_SET_PRIMARY \ @@ -272,6 +270,8 @@ CFLAGS_MODULE += -Wmissing-field-initializers CFLAGS_MODULE += -Wno-type-limits CFLAGS_MODULE += $(call cc-option, -Wmaybe-uninitialized) CFLAGS_MODULE += $(call cc-option, -Wunused-macros) +# The following ensures the stack frame does not get larger than a page +CFLAGS_MODULE += -Wframe-larger-than=4096 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 diff --git a/drivers/gpu/arm/bifrost/arbiter/Kbuild b/drivers/gpu/arm/bifrost/arbiter/Kbuild index 2e6b111441ca..de339ccae394 100644 --- a/drivers/gpu/arm/bifrost/arbiter/Kbuild +++ b/drivers/gpu/arm/bifrost/arbiter/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -21,3 +21,4 @@ bifrost_kbase-y += \ arbiter/mali_kbase_arbif.o \ arbiter/mali_kbase_arbiter_pm.o + diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c index c290dd6b086f..8cdae33cf919 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -160,28 +160,19 @@ static void on_gpu_lost(struct device *dev) kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT); } -/** - * kbase_arbif_init() - Kbase Arbiter interface initialisation. - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Initialise Kbase Arbiter interface and assign callback functions. - * - * Return: - * * 0 - the interface was initialized or was not specified - * * in the device tree. - * * -EFAULT - the interface was specified but failed to initialize. - * * -EPROBE_DEFER - module dependencies are not yet available. - */ -int kbase_arbif_init(struct kbase_device *kbdev) +static int kbase_arbif_of_init(struct kbase_device *kbdev) { -#if IS_ENABLED(CONFIG_OF) - struct arbiter_if_arb_vm_ops ops; struct arbiter_if_dev *arb_if; struct device_node *arbiter_if_node; struct platform_device *pdev; - int err; - dev_dbg(kbdev->dev, "%s\n", __func__); + if (!IS_ENABLED(CONFIG_OF)) { + /* + * Return -ENODEV in the event CONFIG_OF is not available and let the + * internal AW check for suitability for arbitration. + */ + return -ENODEV; + } arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, "arbiter-if", 0); if (!arbiter_if_node) @@ -191,7 +182,7 @@ int kbase_arbif_init(struct kbase_device *kbdev) /* no arbiter interface defined in device tree */ kbdev->arb.arb_dev = NULL; kbdev->arb.arb_if = NULL; - return 0; + return -ENODEV; } pdev = of_find_device_by_node(arbiter_if_node); @@ -215,6 +206,47 @@ int kbase_arbif_init(struct kbase_device *kbdev) } kbdev->arb.arb_if = arb_if; + return 0; +} + +static void kbase_arbif_of_term(struct kbase_device *kbdev) +{ + if (!IS_ENABLED(CONFIG_OF)) + return; + + if (kbdev->arb.arb_dev) { + module_put(kbdev->arb.arb_dev->driver->owner); + put_device(kbdev->arb.arb_dev); + } + kbdev->arb.arb_dev = NULL; +} + + +/** + * kbase_arbif_init() - Kbase Arbiter interface initialisation. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Initialise Kbase Arbiter interface and assign callback functions. + * + * Return: + * * 0 - the interface was initialized or was not specified + * * in the device tree. + * * -EFAULT - the interface was specified but failed to initialize. + * * -EPROBE_DEFER - module dependencies are not yet available. + */ +int kbase_arbif_init(struct kbase_device *kbdev) +{ + struct arbiter_if_arb_vm_ops ops; + struct arbiter_if_dev *arb_if; + int err = 0; + + /* Tries to init with 'arbiter-if' if present in devicetree */ + err = kbase_arbif_of_init(kbdev); + + + if (err) + return err; + ops.arb_vm_gpu_stop = on_gpu_stop; ops.arb_vm_gpu_granted = on_gpu_granted; ops.arb_vm_gpu_lost = on_gpu_lost; @@ -225,25 +257,35 @@ int kbase_arbif_init(struct kbase_device *kbdev) kbdev->arb.arb_freq.freq_updated = false; mutex_init(&kbdev->arb.arb_freq.arb_freq_lock); - /* register kbase arbiter_if callbacks */ - if (arb_if->vm_ops.vm_arb_register_dev) { - err = arb_if->vm_ops.vm_arb_register_dev(arb_if, kbdev->dev, &ops); - if (err) { - dev_err(&pdev->dev, "Failed to register with arbiter. (err = %d)\n", err); - module_put(pdev->dev.driver->owner); - put_device(&pdev->dev); - if (err != -EPROBE_DEFER) - err = -EFAULT; - return err; - } + arb_if = kbdev->arb.arb_if; + + if (arb_if == NULL) { + dev_err(kbdev->dev, "No arbiter interface present\n"); + goto failure_term; + } + + if (!arb_if->vm_ops.vm_arb_register_dev) { + dev_err(kbdev->dev, "arbiter_if registration callback not present\n"); + goto failure_term; + } + + /* register kbase arbiter_if callbacks */ + err = arb_if->vm_ops.vm_arb_register_dev(arb_if, kbdev->dev, &ops); + if (err) { + dev_err(kbdev->dev, "Failed to register with arbiter. (err = %d)\n", err); + goto failure_term; } -#else /* CONFIG_OF */ - dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n"); - kbdev->arb.arb_dev = NULL; - kbdev->arb.arb_if = NULL; -#endif return 0; + +failure_term: + { + kbase_arbif_of_term(kbdev); + } + + if (err != -EPROBE_DEFER) + err = -EFAULT; + return err; } /** @@ -256,16 +298,13 @@ void kbase_arbif_destroy(struct kbase_device *kbdev) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; - if (arb_if && arb_if->vm_ops.vm_arb_unregister_dev) { - dev_dbg(kbdev->dev, "%s\n", __func__); + if (arb_if && arb_if->vm_ops.vm_arb_unregister_dev) arb_if->vm_ops.vm_arb_unregister_dev(kbdev->arb.arb_if); + + { + kbase_arbif_of_term(kbdev); } kbdev->arb.arb_if = NULL; - if (kbdev->arb.arb_dev) { - module_put(kbdev->arb.arb_dev->driver->owner); - put_device(kbdev->arb.arb_dev); - } - kbdev->arb.arb_dev = NULL; } /** @@ -278,10 +317,8 @@ void kbase_arbif_get_max_config(struct kbase_device *kbdev) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; - if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) { - dev_dbg(kbdev->dev, "%s\n", __func__); + if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) arb_if->vm_ops.vm_arb_get_max_config(arb_if); - } } /** @@ -295,7 +332,6 @@ void kbase_arbif_gpu_request(struct kbase_device *kbdev) struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) { - dev_dbg(kbdev->dev, "%s\n", __func__); KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); arb_if->vm_ops.vm_arb_gpu_request(arb_if); } @@ -312,7 +348,6 @@ void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required) struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; if (arb_if && arb_if->vm_ops.vm_arb_gpu_stopped) { - dev_dbg(kbdev->dev, "%s\n", __func__); KBASE_TLSTREAM_TL_ARBITER_STOPPED(kbdev, kbdev); if (gpu_required) KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); @@ -330,10 +365,8 @@ void kbase_arbif_gpu_active(struct kbase_device *kbdev) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; - if (arb_if && arb_if->vm_ops.vm_arb_gpu_active) { - dev_dbg(kbdev->dev, "%s\n", __func__); + if (arb_if && arb_if->vm_ops.vm_arb_gpu_active) arb_if->vm_ops.vm_arb_gpu_active(arb_if); - } } /** @@ -346,8 +379,6 @@ void kbase_arbif_gpu_idle(struct kbase_device *kbdev) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; - if (arb_if && arb_if->vm_ops.vm_arb_gpu_idle) { - dev_dbg(kbdev->dev, "vm_arb_gpu_idle\n"); + if (arb_if && arb_if->vm_ops.vm_arb_gpu_idle) arb_if->vm_ops.vm_arb_gpu_idle(arb_if); - } } diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h index 701ffd42f6f7..c77792115e4d 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,6 +50,7 @@ enum kbase_arbif_evt { KBASE_VM_OS_RESUME_EVENT, }; + /** * kbase_arbif_init() - Initialize the arbiter interface functionality. * @kbdev: The kbase device structure for the device (must be a valid pointer) diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c index 616b0a78cbe5..a27085d0f4f4 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -48,7 +48,7 @@ MODULE_PARM_DESC( "On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe"); static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev); -static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device *kbdev); +static inline bool kbase_arbiter_pm_vm_gpu_assigned_locked(struct kbase_device *kbdev); /** * kbase_arbiter_pm_vm_state_str() - Helper function to get string @@ -85,7 +85,6 @@ static inline const char *kbase_arbiter_pm_vm_state_str(enum kbase_vm_state stat case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: return "KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT"; default: - KBASE_DEBUG_ASSERT(false); return "[UnknownState]"; } } @@ -117,14 +116,13 @@ static inline const char *kbase_arbiter_pm_vm_event_str(enum kbase_arbif_evt evt case KBASE_VM_REF_EVENT: return "KBASE_VM_REF_EVENT"; default: - KBASE_DEBUG_ASSERT(false); return "[UnknownEvent]"; } } /** * kbase_arbiter_pm_vm_set_state() - Sets new kbase_arbiter_vm_state - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * @new_state: kbase VM new state * * This function sets the new state for the VM @@ -229,7 +227,7 @@ static enum hrtimer_restart request_timer_callback(struct hrtimer *timer) /** * start_request_timer() - Start a timer after requesting GPU - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Start a timer to track when kbase is waiting for the GPU from the * Arbiter. If the timer expires before GPU is granted, a warning in @@ -245,7 +243,7 @@ static void start_request_timer(struct kbase_device *kbdev) /** * cancel_request_timer() - Stop the request timer - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Stops the request timer once GPU has been granted. Safe to call * even if timer is no longer running. @@ -260,7 +258,7 @@ static void cancel_request_timer(struct kbase_device *kbdev) /** * kbase_arbiter_pm_early_init() - Initialize arbiter for VM * Paravirtualized use. - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Initialize the arbiter and other required resources during the runtime * and request the GPU for the VM for the first time. @@ -272,7 +270,7 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) int err; struct kbase_arbiter_vm_state *arb_vm_state = NULL; - arb_vm_state = kmalloc(sizeof(struct kbase_arbiter_vm_state), GFP_KERNEL); + arb_vm_state = kzalloc(sizeof(struct kbase_arbiter_vm_state), GFP_KERNEL); if (arb_vm_state == NULL) return -ENOMEM; @@ -311,7 +309,7 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) msecs_to_jiffies((unsigned int)gpu_req_timeout)); if (!err) { - dev_dbg(kbdev->dev, + dev_err(kbdev->dev, "Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n", gpu_req_timeout); @@ -336,7 +334,7 @@ arbif_init_fail: /** * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Clean up all the resources */ @@ -344,6 +342,11 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + if (arb_vm_state == NULL) + return; + + kbase_arbiter_pm_release_interrupts(kbdev); + cancel_request_timer(kbdev); mutex_lock(&arb_vm_state->vm_state_lock); if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) { @@ -358,12 +361,6 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) kbdev->pm.arb_vm_state = NULL; } -/** - * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Releases interrupts and set the interrupt flag to false - */ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; @@ -376,29 +373,25 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) mutex_unlock(&arb_vm_state->vm_state_lock); } -/** - * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Install interrupts and set the interrupt_install flag to true. - * - * Return: 0 if success, or a Linux error code - */ int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; - int err; + int err = 0; mutex_lock(&arb_vm_state->vm_state_lock); - arb_vm_state->interrupts_installed = true; - err = kbase_install_interrupts(kbdev); + if (arb_vm_state->interrupts_installed == false) { + arb_vm_state->interrupts_installed = true; + err = kbase_install_interrupts(kbdev); + } else { + dev_dbg(kbdev->dev, "%s: interrupts installed already", __func__); + } mutex_unlock(&arb_vm_state->vm_state_lock); return err; } /** * kbase_arbiter_pm_vm_stopped() - Handle stop state for the VM - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Handles a stop state for the VM */ @@ -416,7 +409,13 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "%s %s\n", __func__, kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); - if (arb_vm_state->interrupts_installed) { + /* + * Release the interrupts on external arb_if to address Xen requirements. + * Interrupts are not released with internal arb_if as the IRQs are required + * to handle messaging to/from Arbiter/Resource Group. + */ + if (arb_vm_state->interrupts_installed + ) { arb_vm_state->interrupts_installed = false; kbase_release_interrupts(kbdev); } @@ -507,7 +506,7 @@ int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev) /** * kbase_arbiter_pm_vm_gpu_start() - Handles the start state of the VM - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Handles the start state of the VM */ @@ -532,7 +531,15 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING); arb_vm_state->interrupts_installed = true; - kbase_install_interrupts(kbdev); + /* + * Re-install interrupts that were released for external arb_if to + * address Xen requirements. Interrupts are not released with internal + * arb_if as the IRQs are required to handle messaging to/from + * Arbiter/Resource Group. + */ + { + kbase_install_interrupts(kbdev); + } /* * GPU GRANTED received while in stop can be a result of a * repartitioning. @@ -561,7 +568,7 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) /** * kbase_arbiter_pm_vm_gpu_stop() - Handles the stop state of the VM - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Handles the start state of the VM */ @@ -603,7 +610,7 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev) /** * kbase_gpu_lost() - Kbase signals GPU is lost on a lost event signal - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * On GPU lost event signals GPU_LOST to the aribiter */ @@ -658,7 +665,7 @@ static void kbase_gpu_lost(struct kbase_device *kbdev) /** * kbase_arbiter_pm_vm_os_suspend_ready_state() - checks if VM is ready * to be moved to suspended state. - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Return: True if its ready to be suspended else False. */ @@ -678,7 +685,7 @@ static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state(struct kbase_devic /** * kbase_arbiter_pm_vm_os_prepare_suspend() - Prepare OS to be in suspend state * until it receives the grant message from arbiter - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Prepares OS to be in suspend state until it receives GRANT message * from Arbiter asynchronously. @@ -745,7 +752,7 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) /** * kbase_arbiter_pm_vm_os_resume() - Resume OS function once it receives * a grant message from arbiter - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * Resume OS function once it receives GRANT message * from Arbiter asynchronously. @@ -774,7 +781,7 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) /** * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine. - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * @evt: VM event * * The state machine function. Receives events and transitions states @@ -853,7 +860,7 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, enum kbase_arbif_evt break; default: - dev_alert(kbdev->dev, "Got Unknown Event!"); + dev_err(kbdev->dev, "Got Unknown Event!"); break; } mutex_unlock(&arb_vm_state->vm_state_lock); @@ -863,7 +870,7 @@ KBASE_EXPORT_TEST_API(kbase_arbiter_pm_vm_event); /** * kbase_arbiter_pm_vm_wait_gpu_assignment() - VM wait for a GPU assignment. - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * * VM waits for a GPU assignment. */ @@ -879,14 +886,14 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) } /** - * kbase_arbiter_pm_vm_gpu_assigned_lockheld() - Check if VM holds VM state lock - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * kbase_arbiter_pm_vm_gpu_assigned_locked() - Check if VM holds VM state lock + * @kbdev: The kbase device structure for the device * * Checks if the virtual machine holds VM state lock. * * Return: true if GPU is assigned, else false. */ -static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device *kbdev) +static inline bool kbase_arbiter_pm_vm_gpu_assigned_locked(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; @@ -898,7 +905,7 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device /** * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for * arbitration mode - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @kbdev: The kbase device structure for the device * @suspend_handler: The handler code for how to handle a suspend * that might occur * @@ -916,7 +923,7 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, if (kbdev->arb.arb_if) { mutex_lock(&arb_vm_state->vm_state_lock); - while (!kbase_arbiter_pm_vm_gpu_assigned_lockheld(kbdev)) { + while (!kbase_arbiter_pm_vm_gpu_assigned_locked(kbdev)) { /* Update VM state since we have GPU work to do */ if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) kbase_arbiter_pm_vm_set_state(kbdev, diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h index 3734d32b6e2b..649f488d4f67 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -102,7 +102,7 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev); * * Install interrupts and set the interrupt_install flag to true. * - * Return: 0 if success, or a Linux error code + * Return: 0 if success or already installed. Otherwise a Linux error code */ int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild index c3db14217c6d..ffec0417aa5c 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild +++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -47,12 +47,7 @@ endif bifrost_kbase-$(CONFIG_MALI_BIFROST_DEVFREQ) += \ backend/gpu/mali_kbase_devfreq.o -ifneq ($(CONFIG_MALI_REAL_HW),y) - bifrost_kbase-y += backend/gpu/mali_kbase_model_linux.o -endif +bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_linux.o # NO_MALI Dummy model interface bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o -# HW error simulation -bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o - diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c index 2649f1815e9f..e223535d01f7 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -366,7 +366,7 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) err = of_property_read_u64(node, "opp-hz-real", real_freqs); #endif if (err < 0) { - dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n", + dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d", err); continue; } @@ -374,8 +374,8 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) err = of_property_read_u32_array(node, "opp-microvolt", opp_volts, kbdev->nr_regulators); if (err < 0) { - dev_warn(kbdev->dev, - "Failed to read opp-microvolt property with error %d\n", err); + dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d", + err); continue; } #endif @@ -386,11 +386,12 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) dev_warn( kbdev->dev, - "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", + "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU", opp_freq); continue; } + core_count_p = of_get_property(node, "opp-core-count", NULL); if (core_count_p) { u64 remaining_core_mask = kbdev->gpu_props.shader_present; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c index 131cfe32df9f..07960713f75a 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,8 @@ #include #include +#define WAIT_FOR_DUMP_TIMEOUT_MS 5000 + static int wait_prfcnt_ready(struct kbase_device *kbdev) { u32 val; @@ -163,6 +165,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) { unsigned long flags, pm_flags; struct kbase_device *kbdev = kctx->kbdev; + const unsigned long timeout = msecs_to_jiffies(WAIT_FOR_DUMP_TIMEOUT_MS); while (1) { spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); @@ -199,7 +202,8 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* Ongoing dump/setup - wait for its completion */ - wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); + wait_event_timeout(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0, + timeout); } kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; @@ -319,8 +323,19 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) unsigned long flags; int err; + unsigned long remaining; + const unsigned long timeout = msecs_to_jiffies(WAIT_FOR_DUMP_TIMEOUT_MS); + /* Wait for dump & cache clean to complete */ - wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); + remaining = wait_event_timeout(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0, timeout); + if (remaining == 0) { + err = -ETIME; + /* Set the backend state so it's clear things have gone bad (could be a HW issue) + */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_UNRECOVERABLE_ERROR; + goto timed_out; + } spin_lock_irqsave(&kbdev->hwcnt.lock, flags); @@ -336,7 +351,7 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - +timed_out: return err; } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h index 34e8178d1d76..feb76757f955 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h @@ -74,7 +74,7 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev); * Return: 0 on success. Error code (negative) on failure. */ int kbase_validate_interrupts(struct kbase_device *const kbdev); -#endif /* CONFIG_MALI_REAL_HW */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_BIFROST_DEBUG */ /** diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c index 9cb367508dde..152b140b5381 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,6 +23,7 @@ #include #include + #include #if IS_ENABLED(CONFIG_MALI_REAL_HW) @@ -163,13 +164,9 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) static irqreturn_t kbase_combined_irq_handler(int irq, void *data) { irqreturn_t irq_state = IRQ_NONE; - - if (kbase_job_irq_handler(irq, data) == IRQ_HANDLED) - irq_state = IRQ_HANDLED; - if (kbase_mmu_irq_handler(irq, data) == IRQ_HANDLED) - irq_state = IRQ_HANDLED; - if (kbase_gpu_irq_handler(irq, data) == IRQ_HANDLED) - irq_state = IRQ_HANDLED; + irq_state |= kbase_job_irq_handler(irq, data); + irq_state |= kbase_mmu_irq_handler(irq, data); + irq_state |= kbase_gpu_irq_handler(irq, data); return irq_state; } @@ -212,8 +209,7 @@ int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custo if (!handler) handler = kbase_get_interrupt_handler(kbdev, irq_tag); - if (request_irq(kbdev->irqs[irq].irq, handler, - kbdev->irqs[irq].flags | ((kbdev->nr_irqs == 1) ? 0 : IRQF_SHARED), + if (request_irq(kbdev->irqs[irq].irq, handler, kbdev->irqs[irq].flags | IRQF_SHARED, dev_name(kbdev->dev), kbase_tag(kbdev, irq)) != 0) { result = -EINVAL; dev_err(kbdev->dev, "Can't request interrupt %u (index %u)\n", kbdev->irqs[irq].irq, @@ -396,8 +392,8 @@ static int validate_interrupt(struct kbase_device *const kbdev, u32 tag) /* restore original interrupt */ if (request_irq(kbdev->irqs[irq].irq, kbase_get_interrupt_handler(kbdev, tag), - kbdev->irqs[irq].flags | ((kbdev->nr_irqs == 1) ? 0 : IRQF_SHARED), - dev_name(kbdev->dev), kbase_tag(kbdev, irq))) { + kbdev->irqs[irq].flags | IRQF_SHARED, dev_name(kbdev->dev), + kbase_tag(kbdev, irq))) { dev_err(kbdev->dev, "Can't restore original interrupt %u (index %u)\n", kbdev->irqs[irq].irq, tag); err = -EINVAL; @@ -449,10 +445,10 @@ int kbase_install_interrupts(struct kbase_device *kbdev) u32 i; for (i = 0; i < kbdev->nr_irqs; i++) { - const int result = request_irq( - kbdev->irqs[i].irq, kbase_get_interrupt_handler(kbdev, i), - kbdev->irqs[i].flags | ((kbdev->nr_irqs == 1) ? 0 : IRQF_SHARED), - dev_name(kbdev->dev), kbase_tag(kbdev, i)); + const int result = request_irq(kbdev->irqs[i].irq, + kbase_get_interrupt_handler(kbdev, i), + kbdev->irqs[i].flags | IRQF_SHARED, + dev_name(kbdev->dev), kbase_tag(kbdev, i)); if (result) { dev_err(kbdev->dev, "Can't request interrupt %u (index %u)\n", kbdev->irqs[i].irq, i); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c index e822dc59977b..cc8a0ff7fa42 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1328,7 +1328,7 @@ void kbase_reset_gpu(struct kbase_device *kbdev) if (!kbase_is_quick_reset_enabled(kbdev)) dev_err(kbdev->dev, - "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + "Preparing to soft-reset GPU: Waiting (up to %d ms) for all jobs to complete soft-stop\n", kbdev->reset_timeout_ms); hrtimer_start(&kbdev->hwaccess.backend.reset_timer, @@ -1350,7 +1350,7 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) if (!kbase_is_quick_reset_enabled(kbdev)) dev_err(kbdev->dev, - "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + "Preparing to soft-reset GPU: Waiting (up to %d ms) for all jobs to complete soft-stop\n", kbdev->reset_timeout_ms); hrtimer_start(&kbdev->hwaccess.backend.reset_timer, HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), HRTIMER_MODE_REL); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c index 842209f9c049..e1105bf90899 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c @@ -1437,7 +1437,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) * then leave it in the RB and next time we're kicked * it will be processed again from the starting state. */ - if (keep_in_jm_rb) { + if (!kbase_is_gpu_removed(kbdev) && keep_in_jm_rb) { katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; /* As the atom was not removed, increment the * index so that we read the correct atom in the diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c index 41b9b37797d3..b034ffef0ceb 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,42 +25,8 @@ * insmod'ing mali_kbase.ko with no arguments after a build with "scons * gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be * overridden by passing the 'no_mali_gpu' argument to insmod. - * - * - if CONFIG_MALI_BIFROST_ERROR_INJECT is defined the error injection system is - * activated. */ -/* Implementation of failure injection system: - * - * Error conditions are generated by gpu_generate_error(). - * According to CONFIG_MALI_BIFROST_ERROR_INJECT definition gpu_generate_error() either - * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or - * checks if there is (in error_track_list) an error configuration to be set for - * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined). - * Each error condition will trigger a specific "state" for a certain set of - * registers as per Midgard Architecture Specifications doc. - * - * According to Midgard Architecture Specifications doc the following registers - * are always affected by error conditions: - * - * JOB Exception: - * JOB_IRQ_RAWSTAT - * JOB STATUS AREA - * - * MMU Exception: - * MMU_IRQ_RAWSTAT - * AS_FAULTSTATUS - * AS_FAULTADDRESS - * - * GPU Exception: - * GPU_IRQ_RAWSTAT - * GPU_FAULTSTATUS - * GPU_FAULTADDRESS - * - * For further clarification on the model behaviour upon specific error - * conditions the user may refer to the Midgard Architecture Specification - * document - */ #include #include #include @@ -126,7 +92,7 @@ struct error_status_t hw_error_status; */ struct control_reg_values_t { const char *name; - u32 gpu_id; + u64 gpu_id; u32 as_present; u32 thread_max_threads; u32 thread_max_workgroup_size; @@ -524,7 +490,7 @@ MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cnt_idx, bool is_low_word) { - u64 *counters_data; + u64 *counters_data = NULL; u32 core_count = 0; u32 event_index; u64 value = 0; @@ -580,6 +546,9 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cn break; } + if (unlikely(counters_data == NULL)) + return 0; + for (core = 0; core < core_count; core++) { value += counters_data[event_index]; event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; @@ -1172,9 +1141,6 @@ static void midgard_model_update(void *h) /*this job is done assert IRQ lines */ signal_int(dummy, i); -#ifdef CONFIG_MALI_BIFROST_ERROR_INJECT - midgard_set_error(i); -#endif /* CONFIG_MALI_BIFROST_ERROR_INJECT */ update_register_statuses(dummy, i); /*if this job slot returned failures we cannot use it */ if (hw_error_status.job_irq_rawstat & (1u << (i + 16))) { @@ -1564,6 +1530,7 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) case L2_PWROFF_HI: case PWR_KEY: case PWR_OVERRIDE0: + case PWR_OVERRIDE1: #if MALI_USE_CSF case SHADER_PWRFEATURES: case CSF_CONFIG: @@ -1607,8 +1574,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) #else /* !MALI_USE_CSF */ if (addr == GPU_CONTROL_REG(GPU_ID)) { #endif /* !MALI_USE_CSF */ - - *value = dummy->control_reg_values->gpu_id; + *value = dummy->control_reg_values->gpu_id & U32_MAX; } else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) { *value = hw_error_status.job_irq_rawstat; pr_debug("%s", "JS_IRQ_RAWSTAT being read"); @@ -2166,9 +2132,3 @@ int gpu_model_control(void *model, struct kbase_model_control_params *params) return 0; } - -u64 midgard_model_arch_timer_get_cntfrq(void *h) -{ - CSTD_UNUSED(h); - return arch_timer_get_cntfrq(); -} diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c deleted file mode 100644 index 86d4e26bd6b4..000000000000 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c +++ /dev/null @@ -1,172 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * - * (C) COPYRIGHT 2014-2015, 2018-2023 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#include -#include -#include "backend/gpu/mali_kbase_model_linux.h" - -static struct kbase_error_atom *error_track_list; - -#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM - -/** Kernel 6.1.0 has dropped prandom_u32(), use get_random_u32() */ -#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) -#define prandom_u32 get_random_u32 -#endif - -/*following error probability are set quite high in order to stress the driver*/ -static unsigned int error_probability = 50; /* to be set between 0 and 100 */ -/* probability to have multiple error give that there is an error */ -static unsigned int multiple_error_probability = 50; - -/* all the error conditions supported by the model */ -#define TOTAL_FAULTS 27 -/* maximum number of levels in the MMU translation table tree */ -#define MAX_MMU_TABLE_LEVEL 4 -/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */ -#define MAX_CONCURRENT_FAULTS 3 - -/** - * gpu_generate_error - Generate GPU error - */ -static void gpu_generate_error(void) -{ - unsigned int errors_num = 0; - - /*is there at least one error? */ - if ((prandom_u32() % 100) < error_probability) { - /* pick up a faulty mmu address space */ - hw_error_status.faulty_mmu_as = prandom_u32() % NUM_MMU_AS; - /* pick up an mmu table level */ - hw_error_status.mmu_table_level = 1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL); - hw_error_status.errors_mask = (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); - - /*is there also one or more errors? */ - if ((prandom_u32() % 100) < multiple_error_probability) { - errors_num = 1 + (prandom_u32() % (MAX_CONCURRENT_FAULTS - 1)); - while (errors_num-- > 0) { - u32 temp_mask; - - temp_mask = (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); - /* below we check that no bit of the same error - * type is set again in the error mask - */ - if ((temp_mask & IS_A_JOB_ERROR) && - (hw_error_status.errors_mask & IS_A_JOB_ERROR)) { - errors_num++; - continue; - } - if ((temp_mask & IS_A_MMU_ERROR) && - (hw_error_status.errors_mask & IS_A_MMU_ERROR)) { - errors_num++; - continue; - } - if ((temp_mask & IS_A_GPU_ERROR) && - (hw_error_status.errors_mask & IS_A_GPU_ERROR)) { - errors_num++; - continue; - } - /* this error mask is already set */ - if ((hw_error_status.errors_mask | temp_mask) == - hw_error_status.errors_mask) { - errors_num++; - continue; - } - hw_error_status.errors_mask |= temp_mask; - } - } - } -} -#endif - -int job_atom_inject_error(struct kbase_error_params *params) -{ - struct kbase_error_atom *new_elem; - - KBASE_DEBUG_ASSERT(params); - - new_elem = kzalloc(sizeof(*new_elem), GFP_KERNEL); - - if (!new_elem) { - model_error_log(KBASE_CORE, - "\njob_atom_inject_error: kzalloc failed for new_elem\n"); - return -ENOMEM; - } - new_elem->params.jc = params->jc; - new_elem->params.errors_mask = params->errors_mask; - new_elem->params.mmu_table_level = params->mmu_table_level; - new_elem->params.faulty_mmu_as = params->faulty_mmu_as; - - /*circular list below */ - if (error_track_list == NULL) { /*no elements */ - error_track_list = new_elem; - new_elem->next = error_track_list; - } else { - struct kbase_error_atom *walker = error_track_list; - - while (walker->next != error_track_list) - walker = walker->next; - - new_elem->next = error_track_list; - walker->next = new_elem; - } - return 0; -} - -void midgard_set_error(u32 job_slot) -{ -#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM - gpu_generate_error(); -#else - struct kbase_error_atom *walker, *auxiliar; - - if (error_track_list != NULL) { - walker = error_track_list->next; - auxiliar = error_track_list; - do { - if (walker->params.jc == hw_error_status.current_jc) { - /* found a faulty atom matching with the - * current one - */ - hw_error_status.errors_mask = walker->params.errors_mask; - hw_error_status.mmu_table_level = walker->params.mmu_table_level; - hw_error_status.faulty_mmu_as = walker->params.faulty_mmu_as; - hw_error_status.current_job_slot = job_slot; - - if (walker->next == walker) { - /* only one element */ - kfree(error_track_list); - error_track_list = NULL; - } else { - auxiliar->next = walker->next; - if (walker == error_track_list) - error_track_list = walker->next; - - kfree(walker); - } - break; - } - auxiliar = walker; - walker = walker->next; - } while (auxiliar->next != error_track_list); - } -#endif /* CONFIG_MALI_ERROR_INJECT_RANDOM */ -} diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h index 77e089ef45c8..d38bb8891be1 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h @@ -48,12 +48,8 @@ /* * Include Model definitions */ - -#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) #include -#endif /* IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -#if !IS_ENABLED(CONFIG_MALI_REAL_HW) /** * kbase_gpu_device_create() - Generic create function. * @@ -116,15 +112,6 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value); */ void midgard_model_read_reg(void *h, u32 addr, u32 *const value); -/** - * midgard_model_arch_timer_get_cntfrq - Get Model specific System Timer Frequency - * - * @h: Model handle. - * - * Return: Frequency in Hz - */ -u64 midgard_model_arch_timer_get_cntfrq(void *h); - /** * gpu_device_raise_irq() - Private IRQ raise function. * @@ -155,6 +142,5 @@ void gpu_device_set_data(void *model, void *data); * Return: Pointer to the data carried by model. */ void *gpu_device_get_data(void *model); -#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* _KBASE_MODEL_LINUX_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c index ca4e73d3fbb7..6db242af0578 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,6 +36,9 @@ #include #include #include +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif /* !CONFIG_MALI_ARBITER_SUPPORT */ #endif /* !MALI_USE_CSF */ #include #include @@ -393,7 +396,7 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) backend->poweron_required = false; kbdev->pm.backend.l2_desired = true; #if MALI_USE_CSF - kbdev->pm.backend.mcu_desired = true; + kbdev->pm.backend.mcu_desired = kbdev->pm.backend.mcu_poweron_required; #endif kbase_pm_update_state(kbdev); kbase_pm_update_cores_state_nolock(kbdev); @@ -860,9 +863,11 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) } KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask); #else -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0, - u64 new_core_mask_js1, u64 new_core_mask_js2) +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 *new_core_mask, + size_t new_core_mask_size) { + size_t i; + lockdep_assert_held(&kbdev->hwaccess_lock); lockdep_assert_held(&kbdev->pm.lock); @@ -870,13 +875,14 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_ dev_warn_once( kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled"); - new_core_mask_js0 = kbdev->pm.debug_core_mask[0]; + new_core_mask[0] = kbdev->pm.debug_core_mask[0]; } - kbdev->pm.debug_core_mask[0] = new_core_mask_js0; - kbdev->pm.debug_core_mask[1] = new_core_mask_js1; - kbdev->pm.debug_core_mask[2] = new_core_mask_js2; - kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | new_core_mask_js2; + kbdev->pm.debug_core_mask_all = 0; + for (i = 0; i < new_core_mask_size; i++) { + kbdev->pm.debug_core_mask[i] = new_core_mask[i]; + kbdev->pm.debug_core_mask_all |= new_core_mask[i]; + } kbase_pm_update_dynamic_cores_onoff(kbdev); } @@ -962,7 +968,9 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) { unsigned long flags; -#if !MALI_USE_CSF +#if MALI_USE_CSF + unsigned long flags_sched; +#else ktime_t end_timestamp = ktime_get_raw(); #endif struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; @@ -981,24 +989,44 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) */ WARN(!kbase_is_gpu_removed(kbdev), "GPU is still available after GPU lost event\n"); - /* Full GPU reset will have been done by hypervisor, so - * cancel - */ +#if MALI_USE_CSF + /* Full GPU reset will have been done by hypervisor, so cancel */ + kbase_reset_gpu_prevent_and_wait(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_csf_scheduler_spin_lock(kbdev, &flags_sched); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); + kbase_csf_scheduler_spin_unlock(kbdev, flags_sched); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_synchronize_irqs(kbdev); + + /* Scheduler reset happens outside of spinlock due to the mutex it acquires */ + kbase_csf_scheduler_reset(kbdev); + + /* Update kbase status */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->protected_mode = false; + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Cancel any pending HWC dumps */ + kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); +#else + /* Full GPU reset will have been done by hypervisor, so cancel */ atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + kbase_synchronize_irqs(kbdev); /* Clear all jobs running on the GPU */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->protected_mode = false; -#if !MALI_USE_CSF kbase_backend_reset(kbdev, &end_timestamp); kbase_pm_metrics_update(kbdev, NULL); -#endif kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -#if !MALI_USE_CSF /* Cancel any pending HWC dumps */ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING || @@ -1008,12 +1036,11 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) wake_up(&kbdev->hwcnt.backend.wait); } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -#endif +#endif /* MALI_USE_CSF */ } mutex_unlock(&arb_vm_state->vm_state_lock); mutex_unlock(&kbdev->pm.lock); } - #endif /* CONFIG_MALI_ARBITER_SUPPORT */ #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) @@ -1063,26 +1090,15 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) } /* Check if a Doorbell mirror interrupt occurred meanwhile. - * Also check if GPU idle work item is pending. If FW had sent the GPU idle notification - * after the wake up of MCU then it can be assumed that Userspace submission didn't make - * GPU non-idle, so runtime suspend doesn't need to be aborted. */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode && - !work_pending(&kbdev->csf.scheduler.gpu_idle_work)) { - u32 glb_req = - kbase_csf_firmware_global_input_read(&kbdev->csf.global_iface, GLB_REQ); - u32 glb_ack = kbase_csf_firmware_global_output(&kbdev->csf.global_iface, GLB_ACK); - - /* Only abort the runtime suspend if GPU idle event is not pending */ - if (!((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK)) { - dev_dbg(kbdev->dev, - "DB mirror interrupt occurred during runtime suspend after L2 power up"); - kbdev->pm.backend.gpu_wakeup_override = false; - kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_DB_MIRROR_IRQ; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - return -EBUSY; - } + if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode) { + dev_dbg(kbdev->dev, + "DB mirror interrupt occurred during runtime suspend after L2 power up"); + kbdev->pm.backend.gpu_wakeup_override = false; + kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_DB_MIRROR_IRQ; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return -EBUSY; } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* Need to release the kbdev->pm.lock to avoid lock ordering issue diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h index a0b8b9500077..34c34df7f82f 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -332,7 +332,11 @@ union kbase_pm_policy_data { * cores may be different, but there should be transitions in * progress that will eventually achieve this state (assuming * that the policy doesn't change its mind in the mean time). - * @mcu_desired: True if the micro-control unit should be powered on + * @mcu_desired: True if the micro-control unit should be powered on by the MCU state + * machine. Updated as per the value of @mcu_poweron_required. + * @mcu_poweron_required: Boolean flag updated mainly by the CSF Scheduler code, + * before updating the PM active count, to indicate to the + * PM code that micro-control unit needs to be powered up/down. * @policy_change_clamp_state_to_off: Signaling the backend is in PM policy * change transition, needs the mcu/L2 to be brought back to the * off state and remain in that state until the flag is cleared. @@ -485,6 +489,7 @@ struct kbase_pm_backend_data { u64 shaders_desired_mask; #if MALI_USE_CSF bool mcu_desired; + bool mcu_poweron_required; bool policy_change_clamp_state_to_off; unsigned int csf_pm_sched_flags; struct mutex policy_change_lock; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c index 506e168f86d2..9e85cf5589f4 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -70,6 +70,19 @@ MODULE_PARM_DESC(corestack_driver_control, "to the Mali GPU is known to be problematic."); KBASE_EXPORT_TEST_API(corestack_driver_control); +/** + * enum kbase_gpu_state - The state of data in the GPU. + * + * @GPU_STATE_INTACT: The GPU state is intact + * @GPU_STATE_LOST: The GPU state is lost + * @GPU_STATE_IN_RESET: The GPU is in reset state + * + * This enumeration is private to the file. It is used as + * the return values of platform specific PM + * callback (*power_on_callback). + */ +enum kbase_gpu_state { GPU_STATE_INTACT = 0, GPU_STATE_LOST, GPU_STATE_IN_RESET }; + /** * enum kbasep_pm_action - Actions that can be performed on a core. * @@ -110,7 +123,15 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) if (kbdev->pm.backend.l2_force_off_after_mcu_halt) return false; - if (kbdev->csf.scheduler.pm_active_count && kbdev->pm.backend.mcu_desired) + /* Check if policy changing transition needs MCU to be off. */ + if (unlikely(kbdev->pm.backend.policy_change_clamp_state_to_off)) + return false; + + if (kbdev->pm.backend.mcu_desired) + return true; + + /* For always_on policy, the MCU needs to be kept on */ + if (kbase_pm_no_mcu_core_pwroff(kbdev)) return true; #ifdef KBASE_PM_RUNTIME @@ -119,13 +140,7 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) return true; #endif - /* MCU is supposed to be ON, only when scheduler.pm_active_count is - * non zero. But for always_on policy, the MCU needs to be kept on, - * unless policy changing transition needs it off. - */ - - return (kbdev->pm.backend.mcu_desired && kbase_pm_no_mcu_core_pwroff(kbdev) && - !kbdev->pm.backend.policy_change_clamp_state_to_off); + return false; } #endif @@ -979,8 +994,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) kbase_hwcnt_backend_csf_set_hw_availability( &kbdev->hwcnt_gpu_iface, kbdev->gpu_props.curr_config.l2_slices, - kbdev->gpu_props.curr_config.shader_present & - kbdev->pm.debug_core_mask); + kbdev->gpu_props.curr_config.shader_present, + kbdev->pm.debug_core_mask); kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); kbase_csf_scheduler_spin_unlock(kbdev, flags); backend->hwcnt_disabled = false; @@ -1342,6 +1357,8 @@ static void kbase_pm_l2_clear_backend_slot_submit_kctx(struct kbase_device *kbde static bool can_power_down_l2(struct kbase_device *kbdev) { + lockdep_assert_held(&kbdev->hwaccess_lock); + /* Defer the power-down if MMU is in process of page migration. */ return !kbdev->mmu_page_migrate_in_progress; } @@ -2797,7 +2814,7 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev) void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; - bool reset_required = is_resume; + int ret = is_resume; unsigned long flags; KBASE_DEBUG_ASSERT(kbdev != NULL); @@ -2836,7 +2853,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) backend->callback_power_resume(kbdev); return; } else if (backend->callback_power_on) { - reset_required = backend->callback_power_on(kbdev); + ret = backend->callback_power_on(kbdev); } spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -2849,7 +2866,12 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) #endif - if (reset_required) { + if (ret == GPU_STATE_IN_RESET) { + /* GPU is already in reset state after power on and no + * soft-reset needed. Just reconfiguration is needed. + */ + kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS | PM_NO_RESET); + } else if (ret == GPU_STATE_LOST) { /* GPU state was lost, reset GPU to ensure it is in a * consistent state */ @@ -2898,7 +2920,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) backend->l2_desired = true; #if MALI_USE_CSF { - if (reset_required) { + if (ret != GPU_STATE_INTACT) { /* GPU reset was done after the power on, so send the post * reset event instead. This is okay as GPU power off event * is same as pre GPU reset event. @@ -3139,6 +3161,7 @@ static int kbase_set_tiler_quirks(struct kbase_device *kbdev) return 0; } + static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) { struct device_node *np = kbdev->dev->of_node; @@ -3191,6 +3214,7 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) error = kbase_set_mmu_quirks(kbdev); } + return error; } @@ -3210,6 +3234,7 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) #else kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(JM_CONFIG), kbdev->hw_quirks_gpu); #endif + } void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) @@ -3257,16 +3282,10 @@ static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) } #endif -static int kbase_pm_do_reset(struct kbase_device *kbdev) +static int kbase_pm_do_reset_soft(struct kbase_device *kbdev) { - struct kbasep_reset_timeout_data rtdata; - u32 reg_offset, reg_val; int ret; - KBASE_KTRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, 0); - - KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev); - if (kbdev->pm.backend.callback_soft_reset) { ret = kbdev->pm.backend.callback_soft_reset(kbdev); if (ret < 0) @@ -3279,12 +3298,30 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) GPU_COMMAND_SOFT_RESET); } } + return 0; +} - reg_offset = GPU_CONTROL_ENUM(GPU_IRQ_MASK); - reg_val = RESET_COMPLETED; +static int kbase_pm_do_reset(struct kbase_device *kbdev) +{ + struct kbasep_reset_timeout_data rtdata; + u32 reg_offset, reg_val; + int ret; - /* Unmask the reset complete interrupt only */ - kbase_reg_write32(kbdev, reg_offset, reg_val); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, 0); + + KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev); + + { + ret = kbase_pm_do_reset_soft(kbdev); + if (ret) + return ret; + + reg_offset = GPU_CONTROL_ENUM(GPU_IRQ_MASK); + reg_val = RESET_COMPLETED; + + /* Unmask the reset complete interrupt only */ + kbase_reg_write32(kbdev, reg_offset, reg_val); + } /* Initialize a structure for tracking the status of the reset */ rtdata.kbdev = kbdev; @@ -3335,7 +3372,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) */ #ifdef CONFIG_MALI_ARBITER_SUPPORT if (!kbdev->arb.arb_if) { -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +#endif dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", RESET_TIMEOUT); @@ -3367,7 +3404,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) RESET_TIMEOUT); #ifdef CONFIG_MALI_ARBITER_SUPPORT } -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +#endif return -EINVAL; } @@ -3418,9 +3455,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); /* Soft reset the GPU */ -#ifdef CONFIG_MALI_ARBITER_SUPPORT if (!(flags & PM_NO_RESET)) -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ err = kbdev->protected_ops->protected_mode_disable(kbdev->protected_dev); spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); @@ -3441,7 +3476,6 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) if (err) goto exit; - if (flags & PM_HW_ISSUES_DETECT) { err = kbase_pm_hw_issues_detect(kbdev); if (err) @@ -3451,6 +3485,9 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_hw_issues_apply(kbdev); kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); kbase_amba_set_shareable_cache_support(kbdev); +#if MALI_USE_CSF + kbase_backend_update_gpu_timestamp_offset(kbdev); +#endif /* Sanity check protected mode was left after reset */ WARN_ON(kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) & diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c index 23e447b15767..c8e3f40335d6 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -127,7 +127,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev) pm->backend.poweroff_wait_in_progress = false; pm->backend.l2_desired = true; #if MALI_USE_CSF - pm->backend.mcu_desired = true; + pm->backend.mcu_desired = pm->backend.mcu_poweron_required; #endif spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c index 0bf0f5a062d3..331c26c6a310 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,10 +30,7 @@ #include #include #include - -#if !IS_ENABLED(CONFIG_MALI_REAL_HW) -#include -#endif +#include struct kbase_timeout_info { char *selector_str; @@ -41,12 +38,16 @@ struct kbase_timeout_info { }; #if MALI_USE_CSF + +#define GPU_TIMESTAMP_OFFSET_INVALID S64_MAX + static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { [CSF_FIRMWARE_TIMEOUT] = { "CSF_FIRMWARE_TIMEOUT", MIN(CSF_FIRMWARE_TIMEOUT_CYCLES, CSF_FIRMWARE_PING_TIMEOUT_CYCLES) }, [CSF_PM_TIMEOUT] = { "CSF_PM_TIMEOUT", CSF_PM_TIMEOUT_CYCLES }, [CSF_GPU_RESET_TIMEOUT] = { "CSF_GPU_RESET_TIMEOUT", CSF_GPU_RESET_TIMEOUT_CYCLES }, [CSF_CSG_SUSPEND_TIMEOUT] = { "CSF_CSG_SUSPEND_TIMEOUT", CSF_CSG_SUSPEND_TIMEOUT_CYCLES }, + [CSF_CSG_TERM_TIMEOUT] = { "CSF_CSG_TERM_TIMEOUT", CSF_CSG_TERM_TIMEOUT_CYCLES }, [CSF_FIRMWARE_BOOT_TIMEOUT] = { "CSF_FIRMWARE_BOOT_TIMEOUT", CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES }, [CSF_FIRMWARE_PING_TIMEOUT] = { "CSF_FIRMWARE_PING_TIMEOUT", @@ -82,6 +83,68 @@ static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { }; #endif +#if MALI_USE_CSF +void kbase_backend_invalidate_gpu_timestamp_offset(struct kbase_device *kbdev) +{ + kbdev->backend_time.gpu_timestamp_offset = GPU_TIMESTAMP_OFFSET_INVALID; +} +KBASE_EXPORT_TEST_API(kbase_backend_invalidate_gpu_timestamp_offset); + +/** + * kbase_backend_compute_gpu_ts_offset() - Compute GPU TS offset. + * + * @kbdev: Kbase device. + * + * This function compute the value of GPU and CPU TS offset: + * - set to zero current TIMESTAMP_OFFSET register + * - read CPU TS and convert it to ticks + * - read GPU TS + * - calculate diff between CPU and GPU ticks + * - cache the diff as the GPU TS offset + * + * To reduce delays, preemption must be disabled during reads of both CPU and GPU TS + * this function require access to GPU register to be enabled + */ +static inline void kbase_backend_compute_gpu_ts_offset(struct kbase_device *kbdev) +{ + s64 cpu_ts_ticks = 0; + s64 gpu_ts_ticks = 0; + + if (kbdev->backend_time.gpu_timestamp_offset != GPU_TIMESTAMP_OFFSET_INVALID) + return; + + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(TIMESTAMP_OFFSET), 0); + + gpu_ts_ticks = kbase_reg_read64_coherent(kbdev, GPU_CONTROL_ENUM(TIMESTAMP)); + cpu_ts_ticks = ktime_get_raw_ns(); + cpu_ts_ticks = div64_u64(cpu_ts_ticks * kbdev->backend_time.divisor, + kbdev->backend_time.multiplier); + kbdev->backend_time.gpu_timestamp_offset = cpu_ts_ticks - gpu_ts_ticks; +} + +void kbase_backend_update_gpu_timestamp_offset(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.lock); + + kbase_backend_compute_gpu_ts_offset(kbdev); + + dev_dbg(kbdev->dev, "Setting GPU timestamp offset register to %lld (%lld ns)", + kbdev->backend_time.gpu_timestamp_offset, + div64_s64(kbdev->backend_time.gpu_timestamp_offset * + (s64)kbdev->backend_time.multiplier, + (s64)kbdev->backend_time.divisor)); + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(TIMESTAMP_OFFSET), + kbdev->backend_time.gpu_timestamp_offset); +} +#if MALI_UNIT_TEST +u64 kbase_backend_read_gpu_timestamp_offset_reg(struct kbase_device *kbdev) +{ + return kbase_reg_read64_coherent(kbdev, GPU_CONTROL_ENUM(TIMESTAMP_OFFSET)); +} +KBASE_EXPORT_TEST_API(kbase_backend_read_gpu_timestamp_offset_reg); +#endif +#endif + void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle_counter, u64 *system_time, struct timespec64 *ts) { @@ -100,6 +163,7 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle ktime_get_raw_ts64(ts); #endif } +KBASE_EXPORT_TEST_API(kbase_backend_get_gpu_time_norequest); #if !MALI_USE_CSF /** @@ -143,6 +207,7 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, kbase_pm_release_gpu_cycle_counter(kbdev); #endif } +KBASE_EXPORT_TEST_API(kbase_backend_get_gpu_time); static u64 kbase_device_get_scaling_frequency(struct kbase_device *kbdev) { @@ -282,36 +347,14 @@ u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kb if (WARN_ON(!kbdev)) return 0; - return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor) + - kbdev->backend_time.offset; -} - -/** - * get_cpu_gpu_time() - Get current CPU and GPU timestamps. - * - * @kbdev: Kbase device. - * @cpu_ts: Output CPU timestamp. - * @gpu_ts: Output GPU timestamp. - * @gpu_cycle: Output GPU cycle counts. - */ -static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_ts, u64 *gpu_cycle) -{ - struct timespec64 ts; - - kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts); - - if (cpu_ts) - *cpu_ts = (u64)(ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec); + return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor); } +KBASE_EXPORT_TEST_API(kbase_backend_time_convert_gpu_to_cpu); #endif u64 kbase_arch_timer_get_cntfrq(struct kbase_device *kbdev) { - u64 freq = arch_timer_get_cntfrq(); - -#if !IS_ENABLED(CONFIG_MALI_REAL_HW) - freq = midgard_model_arch_timer_get_cntfrq(kbdev->model); -#endif + u64 freq = mali_arch_timer_get_cntfrq(); dev_dbg(kbdev->dev, "System Timer Freq = %lluHz", freq); @@ -322,13 +365,10 @@ int kbase_backend_time_init(struct kbase_device *kbdev) { int err = 0; #if MALI_USE_CSF - u64 cpu_ts = 0; - u64 gpu_ts = 0; u64 freq; u64 common_factor; kbase_pm_register_access_enable(kbdev); - get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); freq = kbase_arch_timer_get_cntfrq(kbdev); if (!freq) { @@ -348,9 +388,8 @@ int kbase_backend_time_init(struct kbase_device *kbdev) goto disable_registers; } - kbdev->backend_time.offset = - (s64)(cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier, - kbdev->backend_time.divisor)); + kbase_backend_invalidate_gpu_timestamp_offset( + kbdev); /* force computation of GPU Timestamp offset */ #endif if (kbase_timeout_scaling_init(kbdev)) { diff --git a/drivers/gpu/arm/bifrost/build.bp b/drivers/gpu/arm/bifrost/build.bp index 9ee968af8de5..861282bd4fab 100644 --- a/drivers/gpu/arm/bifrost/build.bp +++ b/drivers/gpu/arm/bifrost/build.bp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,18 +71,6 @@ bob_defaults { mali_real_hw: { kbuild_options: ["CONFIG_MALI_REAL_HW=y"], }, - mali_error_inject_none: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT_NONE=y"], - }, - mali_error_inject_track_list: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT_TRACK_LIST=y"], - }, - mali_error_inject_random: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], - }, - mali_error_inject: { - kbuild_options: ["CONFIG_MALI_BIFROST_ERROR_INJECT=y"], - }, mali_debug: { kbuild_options: [ "CONFIG_MALI_BIFROST_DEBUG=y", @@ -239,6 +227,7 @@ bob_kernel_module { "jm/*.h", "tl/backend/*_jm.c", "mmu/backend/*_jm.c", + "mmu/backend/*_jm.h", "ipa/backend/*_jm.c", "ipa/backend/*_jm.h", ], @@ -263,6 +252,7 @@ bob_kernel_module { "hwcnt/backend/*_csf_*.h", "tl/backend/*_csf.c", "mmu/backend/*_csf.c", + "mmu/backend/*_csf.h", "ipa/backend/*_csf.c", "ipa/backend/*_csf.h", ], diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c index 8b1410886b05..f973d39ebb22 100644 --- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c +++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -116,8 +116,7 @@ static void kbase_context_term_partial(struct kbase_context *kctx, unsigned int struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat, base_context_create_flags const flags, - unsigned long const api_version, - struct kbase_file *const kfile) + unsigned long const api_version, struct file *const filp) { struct kbase_context *kctx; unsigned int i = 0; @@ -136,7 +135,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_c kctx->kbdev = kbdev; kctx->api_version = api_version; - kctx->kfile = kfile; + kctx->filp = filp; kctx->create_flags = flags; memcpy(kctx->comm, current->comm, sizeof(current->comm)); @@ -187,11 +186,17 @@ void kbase_destroy_context(struct kbase_context *kctx) * Customer side that a hang could occur if context termination is * not blocked until the resume of GPU device. */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + atomic_inc(&kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ while (kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { - dev_info(kbdev->dev, "Suspend in progress when destroying context"); + dev_dbg(kbdev->dev, "Suspend in progress when destroying context"); wait_event(kbdev->pm.resume_wait, !kbase_pm_is_suspending(kbdev)); } +#ifdef CONFIG_MALI_ARBITER_SUPPORT + atomic_dec(&kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ /* Have synchronized against the System suspend and incremented the * pm.active_count. So any subsequent invocation of System suspend diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c index f2eefe9ddcd0..06c2ed813de3 100644 --- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c +++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -168,8 +168,7 @@ static void kbase_context_term_partial(struct kbase_context *kctx, unsigned int struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat, base_context_create_flags const flags, - unsigned long const api_version, - struct kbase_file *const kfile) + unsigned long const api_version, struct file *const filp) { struct kbase_context *kctx; unsigned int i = 0; @@ -188,7 +187,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_c kctx->kbdev = kbdev; kctx->api_version = api_version; - kctx->kfile = kfile; + kctx->filp = filp; kctx->create_flags = flags; if (is_compat) diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c index 36cfde3cdab1..2c7417bd6506 100644 --- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -141,7 +141,7 @@ int kbase_context_common_init(struct kbase_context *kctx) kctx->pid = task_pid_vnr(current); /* Check if this is a Userspace created context */ - if (likely(kctx->kfile)) { + if (likely(kctx->filp)) { struct pid *pid_struct; rcu_read_lock(); @@ -184,6 +184,8 @@ int kbase_context_common_init(struct kbase_context *kctx) spin_lock_init(&kctx->waiting_soft_jobs_lock); INIT_LIST_HEAD(&kctx->waiting_soft_jobs); + init_waitqueue_head(&kctx->event_queue); + kbase_gpu_vm_lock(kctx); bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); kbase_gpu_vm_unlock(kctx); @@ -195,7 +197,7 @@ int kbase_context_common_init(struct kbase_context *kctx) mutex_unlock(&kctx->kbdev->kctx_list_lock); if (err) { dev_err(kctx->kbdev->dev, "(err:%d) failed to insert kctx to kbase_process", err); - if (likely(kctx->kfile)) { + if (likely(kctx->filp)) { mmdrop(kctx->process_mm); put_task_struct(kctx->task); } @@ -284,7 +286,7 @@ void kbase_context_common_term(struct kbase_context *kctx) kbase_remove_kctx_from_process(kctx); mutex_unlock(&kctx->kbdev->kctx_list_lock); - if (likely(kctx->kfile)) { + if (likely(kctx->filp)) { mmdrop(kctx->process_mm); put_task_struct(kctx->task); } diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h index e2295d020292..07c235fab11e 100644 --- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -56,9 +56,9 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx); * BASEP_CONTEXT_CREATE_KERNEL_FLAGS. * @api_version: Application program interface version, as encoded in * a single integer by the KBASE_API_VERSION macro. - * @kfile: Pointer to the object representing the /dev/malixx device - * file instance. Shall be passed as NULL for internally created - * contexts. + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. + * Shall be passed as NULL for internally created contexts. * * Up to one context can be created for each client that opens the device file * /dev/malixx. Context creation is deferred until a special ioctl() system call @@ -68,8 +68,7 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx); */ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat, base_context_create_flags const flags, - unsigned long api_version, - struct kbase_file *const kfile); + unsigned long api_version, struct file *filp); /** * kbase_destroy_context - Destroy a kernel base context. diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c index 9dffe34f095b..9a33169ec554 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -539,6 +539,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED; + queue->clear_faults = true; + INIT_LIST_HEAD(&queue->link); atomic_set(&queue->pending_kick, 0); INIT_LIST_HEAD(&queue->pending_kick_link); @@ -589,11 +591,19 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx, u32 const glb_version = iface->version; u32 instr = iface->instr_features; u8 max_size = GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(instr); - u32 min_buf_size = - (1u << reg->ex_event_size) * GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr); + const u8 event_size = reg->ex_event_size; + u64 min_buf_size; /* If cs_trace_command not supported, the call fails */ if (glb_version < kbase_csf_interface_version(1, 1, 0)) + return -EPERM; + + /* Sanity check to avoid shift-out-of-bounds */ + if (event_size >= 32) + return -EINVAL; + + min_buf_size = ((u64)1 << event_size) * GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr); + if (min_buf_size > UINT32_MAX) return -EINVAL; /* Validate the ring buffer configuration parameters */ @@ -605,8 +615,8 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx, /* Validate the cs_trace configuration parameters */ if (reg->ex_buffer_size && - ((reg->ex_event_size > max_size) || (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) || - (reg->ex_buffer_size < min_buf_size))) + ((event_size > max_size) || (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) || + (reg->ex_buffer_size < (u32)min_buf_size))) return -EINVAL; return csf_queue_register_internal(kctx, NULL, reg); @@ -734,7 +744,7 @@ out: } /** - * get_bound_queue_group - Get the group to which a queue was bound + * get_bound_queue_group() - Get the group to which a queue was bound * * @queue: Pointer to the queue for this group * @@ -847,6 +857,47 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, int csi_index kbase_csf_ring_csg_doorbell(kbdev, csg_nr); } +int kbase_csf_queue_group_clear_faults(struct kbase_context *kctx, + struct kbase_ioctl_queue_group_clear_faults *faults) +{ + void __user *user_bufs = u64_to_user_ptr(faults->addr); + u32 i; + struct kbase_device *kbdev = kctx->kbdev; + const u32 nr_queues = faults->nr_queues; + + if (unlikely(nr_queues > kbdev->csf.global_iface.groups[0].stream_num)) { + dev_warn(kbdev->dev, "Invalid nr_queues %u", nr_queues); + return -EINVAL; + } + + for (i = 0; i < nr_queues; ++i) { + u64 buf_gpu_addr; + struct kbase_va_region *region; + + if (copy_from_user(&buf_gpu_addr, user_bufs, sizeof(buf_gpu_addr))) + return -EFAULT; + mutex_lock(&kctx->csf.lock); + kbase_gpu_vm_lock(kctx); + region = kbase_region_tracker_find_region_enclosing_address(kctx, buf_gpu_addr); + if (likely(!kbase_is_region_invalid_or_free(region))) { + struct kbase_queue *queue = region->user_data; + + queue->clear_faults = true; + } else { + dev_warn(kbdev->dev, "GPU queue %u without a valid command buffer region", + i); + kbase_gpu_vm_unlock(kctx); + mutex_unlock(&kctx->csf.lock); + return -EFAULT; + } + kbase_gpu_vm_unlock(kctx); + mutex_unlock(&kctx->csf.lock); + user_bufs = (void __user *)((uintptr_t)user_bufs + sizeof(buf_gpu_addr)); + } + + return 0; +} + int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick) { struct kbase_device *kbdev = kctx->kbdev; @@ -868,7 +919,7 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue struct kbase_queue *queue = region->user_data; if (queue && (queue->bind_state == KBASE_CSF_QUEUE_BOUND)) { - spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock); if (list_empty(&queue->pending_kick_link)) { /* Queue termination shall block until this * kick has been handled. @@ -876,10 +927,12 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue atomic_inc(&queue->pending_kick); list_add_tail( &queue->pending_kick_link, - &kbdev->csf.pending_gpuq_kicks[queue->group_priority]); - complete(&kbdev->csf.scheduler.kthread_signal); + &kbdev->csf.pending_gpuq_kick_queues[queue->group_priority]); + if (atomic_cmpxchg(&kbdev->csf.pending_gpuq_kicks, false, true) == + false) + complete(&kbdev->csf.scheduler.kthread_signal); } - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); } } else { dev_dbg(kbdev->dev, @@ -1095,12 +1148,11 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx, } static void timer_event_worker(struct work_struct *data); -static void protm_event_worker(struct work_struct *data); static void term_normal_suspend_buffer(struct kbase_context *const kctx, struct kbase_normal_suspend_buffer *s_buf); /** - * create_suspend_buffers - Setup normal and protected mode + * create_suspend_buffers() - Setup normal and protected mode * suspend buffers. * * @kctx: Address of the kbase context within which the queue group @@ -1199,6 +1251,8 @@ static int create_queue_group(struct kbase_context *const kctx, group->deschedule_deferred_cnt = 0; #endif + group->cs_fault_report_enable = create->in.cs_fault_report_enable; + group->group_uid = generate_group_uid(); create->out.group_uid = group->group_uid; @@ -1206,7 +1260,8 @@ static int create_queue_group(struct kbase_context *const kctx, INIT_LIST_HEAD(&group->link_to_schedule); INIT_LIST_HEAD(&group->error_fatal.link); INIT_WORK(&group->timer_event_work, timer_event_worker); - INIT_WORK(&group->protm_event_work, protm_event_worker); + INIT_LIST_HEAD(&group->protm_event_work); + atomic_set(&group->pending_protm_event_work, 0); bitmap_zero(group->protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); group->run_state = KBASE_CSF_GROUP_INACTIVE; @@ -1254,10 +1309,8 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, size_t i; for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) { - if (create->in.padding[i] != 0) { - dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); + if (create->in.padding[i] != 0) return -EINVAL; - } } mutex_lock(&kctx->csf.lock); @@ -1379,7 +1432,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) } /** - * term_queue_group - Terminate a GPU command queue group. + * term_queue_group() - Terminate a GPU command queue group. * * @group: Pointer to GPU command queue group data. * @@ -1407,8 +1460,8 @@ static void term_queue_group(struct kbase_queue_group *group) } /** - * wait_group_deferred_deschedule_completion - Wait for refcount of the group to - * become 0 that was taken when the group deschedule had to be deferred. + * wait_group_deferred_deschedule_completion() - Wait for refcount of the group + * to become 0 that was taken when the group deschedule had to be deferred. * * @group: Pointer to GPU command queue group that is being deleted. * @@ -1437,7 +1490,10 @@ static void wait_group_deferred_deschedule_completion(struct kbase_queue_group * static void cancel_queue_group_events(struct kbase_queue_group *group) { cancel_work_sync(&group->timer_event_work); - cancel_work_sync(&group->protm_event_work); + + /* Drain a pending protected mode request if any */ + kbase_csf_scheduler_wait_for_kthread_pending_work(group->kctx->kbdev, + &group->pending_protm_event_work); } static void remove_pending_group_fatal_error(struct kbase_queue_group *group) @@ -1592,6 +1648,7 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->csf.queue_list); INIT_LIST_HEAD(&kctx->csf.link); + atomic_set(&kctx->csf.pending_sync_update, 0); kbase_csf_event_init(kctx); @@ -1827,7 +1884,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) } /** - * handle_oom_event - Handle the OoM event generated by the firmware for the + * handle_oom_event() - Handle the OoM event generated by the firmware for the * CSI. * * @group: Pointer to the CSG group the oom-event belongs to. @@ -1902,7 +1959,7 @@ static int handle_oom_event(struct kbase_queue_group *const group, } /** - * report_tiler_oom_error - Report a CSG error due to a tiler heap OOM event + * report_tiler_oom_error() - Report a CSG error due to a tiler heap OOM event * * @group: Pointer to the GPU command queue group that encountered the error */ @@ -1945,7 +2002,7 @@ static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev) } /** - * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue. + * kbase_queue_oom_event() - Handle tiler out-of-memory for a GPU command queue. * * @queue: Pointer to queue for which out-of-memory event was received. * @@ -2033,7 +2090,7 @@ unlock: } /** - * oom_event_worker - Tiler out-of-memory handler called from a workqueue. + * oom_event_worker() - Tiler out-of-memory handler called from a workqueue. * * @data: Pointer to a work_struct embedded in GPU command queue data. * @@ -2061,7 +2118,8 @@ static void oom_event_worker(struct work_struct *data) } /** - * report_group_timeout_error - Report the timeout error for the group to userspace. + * report_group_timeout_error() - Report the timeout error for the group to + * userspace. * * @group: Pointer to the group for which timeout error occurred */ @@ -2085,7 +2143,7 @@ static void report_group_timeout_error(struct kbase_queue_group *const group) } /** - * timer_event_worker - Handle the progress timeout error for the group + * timer_event_worker() - Handle the progress timeout error for the group * * @data: Pointer to a work_struct embedded in GPU command queue group data. * @@ -2120,7 +2178,7 @@ static void timer_event_worker(struct work_struct *data) } /** - * handle_progress_timer_event - Progress timer timeout event handler. + * handle_progress_timer_event() - Progress timer timeout event handler. * * @group: Pointer to GPU queue group for which the timeout event is received. * @@ -2211,41 +2269,7 @@ static void report_group_fatal_error(struct kbase_queue_group *const group) } /** - * protm_event_worker - Protected mode switch request event handler - * called from a workqueue. - * - * @data: Pointer to a work_struct embedded in GPU command queue group data. - * - * Request to switch to protected mode. - */ -static void protm_event_worker(struct work_struct *data) -{ - struct kbase_queue_group *const group = - container_of(data, struct kbase_queue_group, protm_event_work); - struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; - int err = 0; - - KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u); - - err = alloc_grp_protected_suspend_buffer_pages(group); - if (!err) { - kbase_csf_scheduler_group_protm_enter(group); - } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) { - sbuf->alloc_retries++; - /* try again to allocate pages */ - queue_work(group->kctx->csf.wq, &group->protm_event_work); - } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) { - dev_err(group->kctx->kbdev->dev, - "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d", - group->handle, group->kctx->tgid, group->kctx->id); - report_group_fatal_error(group); - } - - KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u); -} - -/** - * handle_fault_event - Handler for CS fault. + * handle_fault_event() - Handler for CS fault. * * @queue: Pointer to queue for which fault event was received. * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for @@ -2286,47 +2310,32 @@ static void handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack cs_fault_info_exception_data); -#if IS_ENABLED(CONFIG_DEBUG_FS) - /* CS_RESOURCE_TERMINATED type fault event can be ignored from the - * standpoint of dump on error. It is used to report fault for the CSIs - * that are associated with the same CSG as the CSI for which the actual - * fault was reported by the Iterator. - * Dumping would be triggered when the actual fault is reported. + /* If dump-on-fault daemon is waiting for a fault, wake up the daemon. + * Acknowledging the fault is deferred to the bottom-half until the wait + * of the dump completion is done. * - * CS_INHERIT_FAULT can also be ignored. It could happen due to the error - * in other types of queues (cpu/kcpu). If a fault had occurred in some - * other GPU queue then the dump would have been performed anyways when - * that fault was reported. + * Otherwise acknowledge the fault and ring the doorbell for the faulty queue + * to enter into recoverable state. */ - if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) && - (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) { - if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) { - queue->cs_error = cs_fault; - queue->cs_error_info = cs_fault_info; - queue->cs_error_fatal = false; - queue_work(queue->kctx->csf.wq, &queue->cs_error_work); - return; - } - } -#endif + if (likely(!kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) { + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK); + kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, + true); + queue->cs_error_acked = true; + } else + queue->cs_error_acked = false; - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK); - kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true); + queue->cs_error = cs_fault; + queue->cs_error_info = cs_fault_info; + queue->cs_error_fatal = false; + if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) + dev_warn(kbdev->dev, "%s: failed to enqueue a work", __func__); } -static void report_queue_fatal_error(struct kbase_queue *const queue, u32 cs_fatal, - u64 cs_fatal_info, struct kbase_queue_group *group) +static void report_queue_error(struct kbase_queue *const queue, u32 cs_error, u64 cs_error_info, + struct kbase_queue_group *group, bool fatal) { - struct base_csf_notification - error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, - .payload = { - .csg_error = { - .error = { .error_type = - BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, - .payload = { .fatal_queue = { - .sideband = cs_fatal_info, - .status = cs_fatal, - } } } } } }; + struct base_csf_notification error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR }; if (!queue) return; @@ -2335,17 +2344,30 @@ static void report_queue_fatal_error(struct kbase_queue *const queue, u32 cs_fat return; error.payload.csg_error.handle = group->handle; - error.payload.csg_error.error.payload.fatal_queue.csi_index = (__u8)queue->csi_index; + if (fatal) { + error.payload.csg_error.error.error_type = BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL; + error.payload.csg_error.error.payload.fatal_queue.sideband = cs_error_info; + error.payload.csg_error.error.payload.fatal_queue.status = cs_error; + error.payload.csg_error.error.payload.fatal_queue.csi_index = queue->csi_index; + } else { + error.payload.csg_error.error.error_type = BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT; + error.payload.csg_error.error.payload.fault_queue.sideband = cs_error_info; + error.payload.csg_error.error.payload.fault_queue.status = cs_error; + error.payload.csg_error.error.payload.fault_queue.csi_index = queue->csi_index; + } kbase_csf_event_add_error(queue->kctx, &group->error_fatal, &error); kbase_event_wakeup(queue->kctx); + + if (!fatal) + queue->clear_faults = false; } /** - * cs_error_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue + * cs_error_worker() - Handle the CS_FATAL/CS_FAULT error for the GPU queue * * @data: Pointer to a work_struct embedded in GPU command queue. * - * Terminate the CSG and report the error to userspace. + * Terminate the CSG for CS_FATAL and report the error to userspace. */ static void cs_error_worker(struct work_struct *const data) { @@ -2356,6 +2378,7 @@ static void cs_error_worker(struct work_struct *const data) struct kbase_queue_group *group; bool reset_prevented = false; int err; + const bool cs_fatal = queue->cs_error_fatal; kbase_debug_csf_fault_wait_completion(kbdev); err = kbase_reset_gpu_prevent_and_wait(kbdev); @@ -2371,45 +2394,57 @@ static void cs_error_worker(struct work_struct *const data) group = get_bound_queue_group(queue); if (!group) { - dev_warn(kbdev->dev, "queue not bound when handling fatal event"); + dev_warn(kbdev->dev, "queue not bound when handling an error event"); goto unlock; } -#if IS_ENABLED(CONFIG_DEBUG_FS) - if (!queue->cs_error_fatal) { - unsigned long flags; - int slot_num; + if (!cs_fatal) { + if (group->cs_fault_report_enable && queue->clear_faults) + report_queue_error(queue, queue->cs_error, queue->cs_error_info, group, + false); + if (unlikely(!queue->cs_error_acked)) { + unsigned long flags; + int slot_num; - kbase_csf_scheduler_spin_lock(kbdev, &flags); - slot_num = kbase_csf_scheduler_group_get_slot_locked(group); - if (slot_num >= 0) { - struct kbase_csf_cmd_stream_group_info const *ginfo = - &kbdev->csf.global_iface.groups[slot_num]; - struct kbase_csf_cmd_stream_info const *stream = - &ginfo->streams[queue->csi_index]; - u32 const cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + slot_num = kbase_csf_scheduler_group_get_slot_locked(group); + if (likely(slot_num >= 0)) { + struct kbase_csf_cmd_stream_group_info const *ginfo = + &kbdev->csf.global_iface.groups[slot_num]; + struct kbase_csf_cmd_stream_info const *stream = + &ginfo->streams[queue->csi_index]; + u32 const cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK); + u32 const cs_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ); - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK); - kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, slot_num, true); + /* Acknowledge the fault and ring the doorbell for the queue + * if it hasn't yet done. + */ + if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) { + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, + CS_REQ_FAULT_MASK); + kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, + slot_num, true); + } + } + kbase_csf_scheduler_spin_unlock(kbdev, flags); } - kbase_csf_scheduler_spin_unlock(kbdev, flags); - goto unlock; - } -#endif - - term_queue_group(group); - flush_gpu_cache_on_fatal_error(kbdev); - /* For an invalid GPU page fault, CS_BUS_FAULT fatal error is expected after the - * page fault handler disables the AS of faulty context. Need to skip reporting the - * CS_BUS_FAULT fatal error to the Userspace as it doesn't have the full fault info. - * Page fault handler will report the fatal error with full page fault info. - */ - if ((cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT) && group->faulted) { - dev_dbg(kbdev->dev, - "Skipped reporting CS_BUS_FAULT for queue %d of group %d of ctx %d_%d", - queue->csi_index, group->handle, kctx->tgid, kctx->id); } else { - report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, group); + term_queue_group(group); + flush_gpu_cache_on_fatal_error(kbdev); + /* For an invalid GPU page fault, CS_BUS_FAULT fatal error is expected after the + * page fault handler disables the AS of faulty context. Need to skip reporting the + * CS_BUS_FAULT fatal error to the Userspace as it doesn't have the full fault info. + * Page fault handler will report the fatal error with full page fault info. + */ + if ((cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT) && + group->faulted) { + dev_dbg(kbdev->dev, + "Skipped reporting CS_BUS_FAULT for queue %d of group %d of ctx %d_%d", + queue->csi_index, group->handle, kctx->tgid, kctx->id); + } else { + report_queue_error(queue, queue->cs_error, queue->cs_error_info, group, + true); + } } unlock: @@ -2419,7 +2454,7 @@ unlock: } /** - * handle_fatal_event - Handler for CS fatal. + * handle_fatal_event() - Handler for CS fatal. * * @queue: Pointer to queue for which fatal event was received. * @stream: Pointer to the structure containing info provided by the @@ -2481,7 +2516,7 @@ static void handle_fatal_event(struct kbase_queue *const queue, } /** - * process_cs_interrupts - Process interrupts for a CS. + * process_cs_interrupts() - Process interrupts for a CS. * * @group: Pointer to GPU command queue group data. * @ginfo: The CSG interface provided by the firmware. @@ -2595,7 +2630,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, } if (!group->protected_suspend_buf.pma) - queue_work(group->kctx->csf.wq, &group->protm_event_work); + kbase_csf_scheduler_enqueue_protm_event_work(group); if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) { clear_bit(group->csg_nr, scheduler->csg_slots_idle_mask); @@ -2608,7 +2643,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, } /** - * process_csg_interrupts - Process interrupts for a CSG. + * process_csg_interrupts() - Process interrupts for a CSG. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @csg_nr: CSG number. @@ -2728,7 +2763,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, u32 const c } /** - * process_prfcnt_interrupts - Process performance counter interrupts. + * process_prfcnt_interrupts() - Process performance counter interrupts. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @glb_req: Global request register value. @@ -2800,7 +2835,7 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, u } /** - * check_protm_enter_req_complete - Check if PROTM_ENTER request completed + * check_protm_enter_req_complete() - Check if PROTM_ENTER request completed * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @glb_req: Global request register value. @@ -2834,7 +2869,7 @@ static inline void check_protm_enter_req_complete(struct kbase_device *kbdev, u3 } /** - * process_protm_exit - Handle the protected mode exit interrupt + * process_protm_exit() - Handle the protected mode exit interrupt * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @glb_ack: Global acknowledge register value. @@ -2923,7 +2958,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, if (!tock_triggered) { dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n", group->handle, group->csg_nr); - queue_work(group->kctx->csf.wq, &group->protm_event_work); + kbase_csf_scheduler_enqueue_protm_event_work(group); } } } @@ -2952,6 +2987,46 @@ static void order_job_irq_clear_with_iface_mem_read(void) dmb(osh); } +static const char *const glb_fatal_status_errors[GLB_FATAL_STATUS_VALUE_COUNT] = { + [GLB_FATAL_STATUS_VALUE_OK] = "OK", + [GLB_FATAL_STATUS_VALUE_ASSERT] = "Firmware assert triggered", + [GLB_FATAL_STATUS_VALUE_UNEXPECTED_EXCEPTION] = + "Hardware raised an exception firmware did not expect", + [GLB_FATAL_STATUS_VALUE_HANG] = "Firmware hangs and watchdog timer expired", +}; + +/** + * handle_glb_fatal_event() - Handle the GLB fatal event + * + * @kbdev: Instance of GPU device. + * @global_iface: CSF global interface + */ +static void handle_glb_fatal_event(struct kbase_device *kbdev, + const struct kbase_csf_global_iface *const global_iface) +{ + const char *error_string = NULL; + const u32 fatal_status = kbase_csf_firmware_global_output(global_iface, GLB_FATAL_STATUS); + + lockdep_assert_held(&kbdev->hwaccess_lock); + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + dev_warn(kbdev->dev, "MCU encountered unrecoverable error"); + + if (fatal_status < GLB_FATAL_STATUS_VALUE_COUNT) + error_string = glb_fatal_status_errors[fatal_status]; + else { + dev_err(kbdev->dev, "Invalid GLB_FATAL_STATUS (%u)", fatal_status); + return; + } + + if (fatal_status == GLB_FATAL_STATUS_VALUE_OK) + dev_err(kbdev->dev, "GLB_FATAL_STATUS(OK) must be set with proper reason"); + else { + dev_warn(kbdev->dev, "GLB_FATAL_STATUS: %s", error_string); + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu_locked(kbdev); + } +} + void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) { bool deferred_handling_glb_idle_irq = false; @@ -3026,6 +3101,9 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) deferred_handling_glb_idle_irq = true; } + if (glb_ack & GLB_ACK_FATAL_MASK) + handle_glb_fatal_event(kbdev, global_iface); + process_prfcnt_interrupts(kbdev, glb_req, glb_ack); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -3050,13 +3128,10 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) if (deferred_handling_glb_idle_irq) { unsigned long flags; - bool invoke_pm_state_machine; kbase_csf_scheduler_spin_lock(kbdev, &flags); - invoke_pm_state_machine = kbase_csf_scheduler_process_gpu_idle_event(kbdev); + kbase_csf_scheduler_process_gpu_idle_event(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); - if (unlikely(invoke_pm_state_machine)) - kbase_pm_update_state(kbdev); } wake_up_all(&kbdev->csf.event_wait); @@ -3087,6 +3162,11 @@ void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev) if (kbdev->csf.db_filp) { struct page *page = as_page(kbdev->csf.dummy_db_page); + /* This is a shared dummy sink page for avoiding potential segmentation fault + * to user-side library when a csi is off slot. Additionally, the call is on + * module unload path, so the page can be left uncleared before returning it + * back to kbdev memory pool. + */ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); fput(kbdev->csf.db_filp); @@ -3118,26 +3198,27 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) return 0; } -void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev) +void kbase_csf_pending_gpuq_kick_queues_init(struct kbase_device *kbdev) { size_t i; - for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) - INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kicks[i]); - spin_lock_init(&kbdev->csf.pending_gpuq_kicks_lock); + atomic_set(&kbdev->csf.pending_gpuq_kicks, false); + for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kick_queues); ++i) + INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kick_queues[i]); + spin_lock_init(&kbdev->csf.pending_gpuq_kick_queues_lock); } -void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev) +void kbase_csf_pending_gpuq_kick_queues_term(struct kbase_device *kbdev) { size_t i; - spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); - for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) { - if (!list_empty(&kbdev->csf.pending_gpuq_kicks[i])) + spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock); + for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kick_queues); ++i) { + if (!list_empty(&kbdev->csf.pending_gpuq_kick_queues[i])) dev_warn(kbdev->dev, "Some GPU queue kicks for priority %zu were not handled", i); } - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); } void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) @@ -3145,6 +3226,11 @@ void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) if (kbdev->csf.user_reg.filp) { struct page *page = as_page(kbdev->csf.user_reg.dummy_page); + /* This is a shared dummy page in place of the real USER Register page just + * before the GPU is powered down. Additionally, the call is on module unload + * path, so the page can be left uncleared before returning it back to kbdev + * memory pool. + */ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); fput(kbdev->csf.user_reg.filp); } @@ -3227,17 +3313,17 @@ void kbase_csf_process_queue_kick(struct kbase_queue *queue) if (err == -EBUSY) { retry_kick = true; - spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock); if (list_empty(&queue->pending_kick_link)) { /* A failed queue kick shall be pushed to the * back of the queue to avoid potential abuse. */ list_add_tail( &queue->pending_kick_link, - &kbdev->csf.pending_gpuq_kicks[queue->group_priority]); - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + &kbdev->csf.pending_gpuq_kick_queues[queue->group_priority]); + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); } else { - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); WARN_ON(atomic_read(&queue->pending_kick) == 0); } @@ -3260,3 +3346,27 @@ out_release_queue: WARN_ON(atomic_read(&queue->pending_kick) == 0); atomic_dec(&queue->pending_kick); } + +void kbase_csf_process_protm_event_request(struct kbase_queue_group *group) +{ + struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; + int err = 0; + + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u); + + err = alloc_grp_protected_suspend_buffer_pages(group); + if (!err) { + kbase_csf_scheduler_group_protm_enter(group); + } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) { + sbuf->alloc_retries++; + /* try again to allocate pages */ + kbase_csf_scheduler_enqueue_protm_event_work(group); + } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) { + dev_err(group->kctx->kbdev->dev, + "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d", + group->handle, group->kctx->tgid, group->kctx->id); + report_group_fatal_error(group); + } + + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u); +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h index b2f6ab2c4a27..566136342a06 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h @@ -243,6 +243,19 @@ struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, */ int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, u8 group_handle); +/** + * kbase_csf_queue_group_clear_faults - Re-enable CS Fault reporting. + * + * @kctx: Pointer to the kbase context within which the + * CS Faults for the queues has to be re-enabled. + * @clear_faults: Pointer to the structure which contains details of the + * queues for which the CS Fault reporting has to be re-enabled. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_queue_group_clear_faults(struct kbase_context *kctx, + struct kbase_ioctl_queue_group_clear_faults *clear_faults); + /** * kbase_csf_queue_group_create - Create a GPU command queue group. * @@ -379,20 +392,20 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev); void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev); /** - * kbase_csf_pending_gpuq_kicks_init - Initialize the data used for handling - * GPU queue kicks. + * kbase_csf_pending_gpuq_kick_queues_init - Initialize the data used for handling + * GPU queue kicks. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. */ -void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev); +void kbase_csf_pending_gpuq_kick_queues_init(struct kbase_device *kbdev); /** - * kbase_csf_pending_gpuq_kicks_term - De-initialize the data used for handling - * GPU queue kicks. + * kbase_csf_pending_gpuq_kick_queues_term - De-initialize the data used for handling + * GPU queue kicks. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. */ -void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev); +void kbase_csf_pending_gpuq_kick_queues_term(struct kbase_device *kbdev); /** * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface. @@ -546,4 +559,13 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev) */ void kbase_csf_process_queue_kick(struct kbase_queue *queue); +/** + * kbase_csf_process_protm_event_request - Handle protected mode switch request + * + * @group: The group to handle protected mode request + * + * Request to switch to protected mode. + */ +void kbase_csf_process_protm_event_request(struct kbase_queue_group *group); + #endif /* _KBASE_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h index 8d7c896e1051..155c20aaa356 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -268,6 +268,7 @@ enum kbase_queue_group_priority { * Shader, L2 and MCU state. * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete. * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for a CSG to be suspended. + * @CSF_CSG_TERM_TIMEOUT: Timeout given for a CSG to be terminated. * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot. * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond * to a ping from KBase. @@ -290,6 +291,7 @@ enum kbase_timeout_selector { CSF_PM_TIMEOUT, CSF_GPU_RESET_TIMEOUT, CSF_CSG_SUSPEND_TIMEOUT, + CSF_CSG_TERM_TIMEOUT, CSF_FIRMWARE_BOOT_TIMEOUT, CSF_FIRMWARE_PING_TIMEOUT, CSF_SCHED_PROTM_PROGRESS_TIMEOUT, @@ -398,6 +400,10 @@ struct kbase_csf_notification { * @cs_error: Records information about the CS fatal event or * about CS fault event if dump on fault is enabled. * @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred. + * @cs_error_acked: Flag to indicate that acknowledging the fault has been done + * at top-half of fault handler. + * @clear_faults: Flag to track if the CS fault reporting is enabled for this queue. + * It's protected by &kbase_context.csf.lock. * @extract_ofs: The current EXTRACT offset, this is only updated when handling * the GLB IDLE IRQ if the idle timeout value is non-0 in order * to help detect a queue's true idle status. @@ -441,6 +447,8 @@ struct kbase_queue { u64 cs_error_info; u32 cs_error; bool cs_error_fatal; + bool cs_error_acked; + bool clear_faults; u64 extract_ofs; u64 saved_cmd_ptr; }; @@ -501,6 +509,8 @@ struct kbase_protected_suspend_buffer { * @compute_max: Maximum number of compute endpoints the group is * allowed to use. * @csi_handlers: Requested CSI exception handler flags for the group. + * @cs_fault_report_enable: Indicated if reporting of CS_FAULTs to + * userspace is enabled. * @tiler_mask: Mask of tiler endpoints the group is allowed to use. * @fragment_mask: Mask of fragment endpoints the group is allowed to use. * @compute_mask: Mask of compute endpoints the group is allowed to use. @@ -531,8 +541,13 @@ struct kbase_protected_suspend_buffer { * @bound_queues: Array of registered queues bound to this queue group. * @doorbell_nr: Index of the hardware doorbell page assigned to the * group. - * @protm_event_work: Work item corresponding to the protected mode entry - * event for this queue. + * @protm_event_work: List item corresponding to the protected mode entry + * event for this queue. This would be handled by + * kbase_csf_scheduler_kthread(). + * @pending_protm_event_work: Indicates that kbase_csf_scheduler_kthread() should + * handle PROTM request for this group. This would + * be set to false when the work is done. This is used + * mainly for synchronisation with group termination. * @protm_pending_bitmap: Bit array to keep a track of CSs that * have pending protected mode entry requests. * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be @@ -569,7 +584,7 @@ struct kbase_queue_group { u8 compute_max; u8 csi_handlers; - + __u8 cs_fault_report_enable; u64 tiler_mask; u64 fragment_mask; u64 compute_mask; @@ -588,7 +603,8 @@ struct kbase_queue_group { struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP]; int doorbell_nr; - struct work_struct protm_event_work; + struct list_head protm_event_work; + atomic_t pending_protm_event_work; DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); struct kbase_csf_notification error_fatal; @@ -625,6 +641,9 @@ struct kbase_queue_group { * @cmd_seq_num: The sequence number assigned to an enqueued command, * in incrementing order (older commands shall have a * smaller number). + * @kcpu_wq: Work queue to process KCPU commands for all queues in this + * context. This would be used if the context is not prioritised, + * otherwise it would be handled by kbase_csf_scheduler_kthread(). * @jit_lock: Lock to serialise JIT operations. * @jit_cmds_head: A list of the just-in-time memory commands, both * allocate & free, in submission order, protected @@ -640,6 +659,8 @@ struct kbase_csf_kcpu_queue_context { DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES); atomic64_t cmd_seq_num; + struct workqueue_struct *kcpu_wq; + struct mutex jit_lock; struct list_head jit_cmds_head; struct list_head jit_blocked_queues; @@ -747,15 +768,7 @@ struct kbase_csf_ctx_heap_reclaim_info { * GPU command queues are idle and at least one of them * is blocked on a sync wait operation. * @num_idle_wait_grps: Length of the @idle_wait_groups list. - * @sync_update_wq_high_prio: high-priority work queue to process the - * SYNC_UPDATE events by sync_set / sync_add - * instruction execution on command streams bound to - * groups of @idle_wait_groups list. This WQ would - * be used if the context is prioritised. - * @sync_update_wq_normal_prio: similar to sync_update_wq_high_prio, but this - * WQ would be used if the context is not - * prioritised. - * @sync_update_work: Work item to process the SYNC_UPDATE events. + * @sync_update_work: List item to process the SYNC_UPDATE event. * @ngrp_to_schedule: Number of groups added for the context to the * 'groups_to_schedule' list of scheduler instance. * @heap_info: Heap reclaim information data of the kctx. As the @@ -768,9 +781,7 @@ struct kbase_csf_scheduler_context { u32 num_runnable_grps; struct list_head idle_wait_groups; u32 num_idle_wait_grps; - struct workqueue_struct *sync_update_wq_high_prio; - struct workqueue_struct *sync_update_wq_normal_prio; - struct work_struct sync_update_work; + struct list_head sync_update_work; u32 ngrp_to_schedule; struct kbase_csf_ctx_heap_reclaim_info heap_info; }; @@ -865,17 +876,16 @@ struct kbase_csf_user_reg_context { * @wq: Dedicated workqueue to process work items corresponding * to the OoM events raised for chunked tiler heaps being * used by GPU command queues, and progress timeout events. - * @kcpu_wq_high_prio: High-priority work queue to process KCPU commands for - * all queues in this context. This WQ would be used if - * the context is prioritised. - * @kcpu_wq_normal_prio: Similar to kcpu_wq_high_prio, but this WQ would be - * used if the context is not prioritised. * @link: Link to this csf context in the 'runnable_kctxs' list of * the scheduler instance * @sched: Object representing the scheduler's context * @cpu_queue: CPU queue information. Only be available when DEBUG_FS * is enabled. * @user_reg: Collective information to support mapping to USER Register page. + * @pending_sync_update: Indicates that kbase_csf_scheduler_kthread() should + * handle SYNC_UPDATE event for this context. This would + * be set to false when the work is done. This is used + * mainly for synchronisation with context termination. */ struct kbase_csf_context { struct list_head event_pages_head; @@ -888,12 +898,11 @@ struct kbase_csf_context { struct kbase_csf_event event; struct kbase_csf_tiler_heap_context tiler_heaps; struct workqueue_struct *wq; - struct workqueue_struct *kcpu_wq_high_prio; - struct workqueue_struct *kcpu_wq_normal_prio; struct list_head link; struct kbase_csf_scheduler_context sched; struct kbase_csf_cpu_queue_context cpu_queue; struct kbase_csf_user_reg_context user_reg; + atomic_t pending_sync_update; }; /** @@ -936,14 +945,15 @@ struct kbase_csf_csg_slot { * struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim * kctx lists inside the CSF device's scheduler. * - * @heap_reclaim: Tiler heap reclaim shrinker object. + * @heap_reclaim: Defines Tiler heap reclaim shrinker object. * @ctx_lists: Array of kctx lists, size matching CSG defined priorities. The * lists track the kctxs attached to the reclaim manager. * @unused_pages: Estimated number of unused pages from the @ctxlist array. The * number is indicative for use with reclaim shrinker's count method. */ struct kbase_csf_sched_heap_reclaim_mgr { - struct shrinker heap_reclaim; + DEFINE_KBASE_SHRINKER heap_reclaim; + struct list_head ctx_lists[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; atomic_t unused_pages; }; @@ -1042,10 +1052,29 @@ struct kbase_csf_mcu_shared_regions { * workqueue items (kernel-provided delayed_work * items do not use hrtimer and for some reason do * not provide sufficiently reliable periodicity). - * @pending_tick_work: Indicates that kbase_csf_scheduler_kthread() should perform - * a scheduling tick. - * @pending_tock_work: Indicates that kbase_csf_scheduler_kthread() should perform - * a scheduling tock. + * @pending_sync_update_works: Indicates that kbase_csf_scheduler_kthread() + * should handle SYNC_UPDATE events. + * @sync_update_work_ctxs_lock: Lock protecting the list of contexts that + * require handling SYNC_UPDATE events. + * @sync_update_work_ctxs: The list of contexts that require handling + * SYNC_UPDATE events. + * @pending_protm_event_works: Indicates that kbase_csf_scheduler_kthread() + * should handle PROTM requests. + * @protm_event_work_grps_lock: Lock protecting the list of groups that + * have requested protected mode. + * @protm_event_work_grps: The list of groups that have requested + * protected mode. + * @pending_kcpuq_works: Indicates that kbase_csf_scheduler_kthread() + * should process pending KCPU queue works. + * @kcpuq_work_queues_lock: Lock protecting the list of KCPU queues that + * need to be processed. + * @kcpuq_work_queues: The list of KCPU queue that need to be processed + * @pending_tick_work: Indicates that kbase_csf_scheduler_kthread() should + * perform a scheduling tick. + * @pending_tock_work: Indicates that kbase_csf_scheduler_kthread() should + * perform a scheduling tock. + * @pending_gpu_idle_work: Indicates that kbase_csf_scheduler_kthread() should + * handle the GPU IDLE event. * @ping_work: Work item that would ping the firmware at regular * intervals, only if there is a single active CSG * slot, to check if firmware is alive and would @@ -1063,10 +1092,6 @@ struct kbase_csf_mcu_shared_regions { * This pointer being set doesn't necessarily indicates * that GPU is in protected mode, kbdev->protected_mode * needs to be checked for that. - * @idle_wq: Workqueue for executing GPU idle notification - * handler. - * @gpu_idle_work: Work item for facilitating the scheduler to bring - * the GPU to a low-power mode on becoming idle. * @fast_gpu_idle_handling: Indicates whether to relax many of the checks * normally done in the GPU idle worker. This is * set to true when handling the GLB IDLE IRQ if the @@ -1109,7 +1134,8 @@ struct kbase_csf_mcu_shared_regions { * thread when a queue needs attention. * @kthread_running: Whether the GPU queue submission thread should keep * executing. - * @gpuq_kthread: High-priority thread used to handle GPU queue + * @gpuq_kthread: Dedicated thread primarily used to handle + * latency-sensitive tasks such as GPU queue * submissions. */ struct kbase_csf_scheduler { @@ -1134,14 +1160,22 @@ struct kbase_csf_scheduler { unsigned long last_schedule; atomic_t timer_enabled; struct hrtimer tick_timer; + atomic_t pending_sync_update_works; + spinlock_t sync_update_work_ctxs_lock; + struct list_head sync_update_work_ctxs; + atomic_t pending_protm_event_works; + spinlock_t protm_event_work_grps_lock; + struct list_head protm_event_work_grps; + atomic_t pending_kcpuq_works; + spinlock_t kcpuq_work_queues_lock; + struct list_head kcpuq_work_queues; atomic_t pending_tick_work; atomic_t pending_tock_work; + atomic_t pending_gpu_idle_work; struct delayed_work ping_work; struct kbase_context *top_kctx; struct kbase_queue_group *top_grp; struct kbase_queue_group *active_protm_grp; - struct workqueue_struct *idle_wq; - struct work_struct gpu_idle_work; bool fast_gpu_idle_handling; atomic_t gpu_no_longer_idle; atomic_t non_idle_offslot_grps; @@ -1653,12 +1687,16 @@ struct kbase_csf_user_reg { * @dof: Structure for dump on fault. * @user_reg: Collective information to support the mapping to * USER Register page for user processes. - * @pending_gpuq_kicks: Lists of GPU queue that have been kicked but not - * yet processed, categorised by queue group's priority. - * @pending_gpuq_kicks_lock: Protect @pending_gpu_kicks and - * kbase_queue.pending_kick_link. + * @pending_gpuq_kicks: Indicates that kbase_csf_scheduler_kthread() + * should handle GPU queue kicks. + * @pending_gpuq_kick_queues: Lists of GPU queued that have been kicked but not + * yet processed, categorised by queue group's priority. + * @pending_gpuq_kick_queues_lock: Protect @pending_gpuq_kick_queues and + * kbase_queue.pending_kick_link. * @quirks_ext: Pointer to an allocated buffer containing the firmware * workarounds configuration. + * @pmode_sync_sem: RW Semaphore to prevent MMU operations during P.Mode entrance. + * @gpu_idle_timer_enabled: Tracks whether the GPU idle timer is enabled or disabled. */ struct kbase_csf_device { struct kbase_mmu_table mcu_mmu; @@ -1710,9 +1748,12 @@ struct kbase_csf_device { struct kbase_debug_coresight_device coresight; #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ struct kbase_csf_user_reg user_reg; - struct list_head pending_gpuq_kicks[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; - spinlock_t pending_gpuq_kicks_lock; + atomic_t pending_gpuq_kicks; + struct list_head pending_gpuq_kick_queues[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; + spinlock_t pending_gpuq_kick_queues_lock; u32 *quirks_ext; + struct rw_semaphore pmode_sync_sem; + bool gpu_idle_timer_enabled; }; /** diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c index 952a9b9cdd94..35f09028098c 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1552,7 +1552,6 @@ static bool global_request_complete(struct kbase_device *const kbdev, u32 const unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); - if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & req_mask) == (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & req_mask)) complete = true; @@ -1644,6 +1643,23 @@ static void set_timeout_global(const struct kbase_csf_global_iface *const global set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); } +static inline void set_gpu_idle_timer_glb_req(struct kbase_device *const kbdev, bool set) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + if (set) { + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, + GLB_REQ_IDLE_ENABLE_MASK); + } else { + kbase_csf_firmware_global_input_mask( + global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE, GLB_REQ_IDLE_DISABLE_MASK); + } + + kbdev->csf.gpu_idle_timer_enabled = set; +} + static void enable_gpu_idle_timer(struct kbase_device *const kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; @@ -1657,8 +1673,7 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbdev->csf.gpu_idle_dur_count_no_modifier, GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK); - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, - GLB_REQ_IDLE_ENABLE_MASK); + set_gpu_idle_timer_glb_req(kbdev, true); dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", kbdev->csf.gpu_idle_dur_count); } @@ -1890,6 +1905,7 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work) { struct kbase_device *kbdev = container_of(work, struct kbase_device, csf.firmware_reload_work); + unsigned long flags; int err; dev_info(kbdev->dev, "reloading firmware"); @@ -1908,7 +1924,9 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work) return; /* Reboot the firmware */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_csf_firmware_enable_mcu(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev) @@ -2045,29 +2063,33 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, return kbdev->csf.gpu_idle_dur_count; } - /* The 'reg_lock' is also taken and is held till the update is not + /* The scheduler lock is also taken and is held till the update is not * complete, to ensure the update of idle timer value by multiple Users * gets serialized. */ - mutex_lock(&kbdev->csf.reg_lock); - /* The firmware only reads the new idle timer value when the timer is - * disabled. - */ - kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbase_csf_firmware_disable_gpu_idle_timer(kbdev); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - /* Ensure that the request has taken effect */ - wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); - + kbase_csf_scheduler_lock(kbdev); kbase_csf_scheduler_spin_lock(kbdev, &flags); kbdev->csf.gpu_idle_hysteresis_ns = dur_ns; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbdev->csf.gpu_idle_dur_count_no_modifier = no_modifier; - kbase_csf_firmware_enable_gpu_idle_timer(kbdev); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); - mutex_unlock(&kbdev->csf.reg_lock); + if (kbdev->csf.gpu_idle_timer_enabled) { + /* Timer is already enabled. Disable the timer as FW only reads + * the new idle timer value when timer is re-enabled. + */ + kbase_csf_firmware_disable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + /* Ensure that the request has taken effect */ + wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_csf_firmware_enable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); + } else { + kbase_csf_scheduler_spin_unlock(kbdev, flags); + } + + kbase_csf_scheduler_unlock(kbdev); kbase_csf_scheduler_pm_idle(kbdev); kbase_reset_gpu_allow(kbdev); end: @@ -2255,8 +2277,9 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) kbdev->csf.glb_init_request_pending = true; + init_rwsem(&kbdev->csf.pmode_sync_sem); mutex_init(&kbdev->csf.reg_lock); - kbase_csf_pending_gpuq_kicks_init(kbdev); + kbase_csf_pending_gpuq_kick_queues_init(kbdev); kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL }; @@ -2265,7 +2288,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) void kbase_csf_firmware_early_term(struct kbase_device *kbdev) { - kbase_csf_pending_gpuq_kicks_term(kbdev); + kbase_csf_pending_gpuq_kick_queues_term(kbdev); mutex_destroy(&kbdev->csf.reg_lock); } @@ -2731,7 +2754,7 @@ int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 c unsigned long remaining = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT)) + jiffies; - u32 read_val; + u32 read_val = 0; dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask); @@ -2778,12 +2801,9 @@ void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) { - struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; - kbase_csf_scheduler_spin_lock_assert_held(kbdev); - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE, - GLB_REQ_IDLE_DISABLE_MASK); + set_gpu_idle_timer_glb_req(kbdev, false); dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); @@ -2807,6 +2827,7 @@ int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms); } + int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, u64 const timeout) { const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -2845,8 +2866,6 @@ int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) { int err; - lockdep_assert_held(&kbdev->mmu_hw_mutex); - err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); if (!err) { @@ -2912,6 +2931,7 @@ void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) { struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; + lockdep_assert_held(&kbdev->hwaccess_lock); /* Clear the HALT bit before triggering the boot of MCU firmware */ kbase_csf_firmware_global_input_mask(iface, GLB_REQ, 0, GLB_REQ_HALT_MASK); @@ -2927,6 +2947,7 @@ void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP(kbdev, kbase_backend_get_cycle_cnt(kbdev)); kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_gpu_idle_timer_glb_req(kbdev, false); set_global_request(global_iface, GLB_REQ_SLEEP_MASK); dev_dbg(kbdev->dev, "Sending sleep request to MCU"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); @@ -3191,6 +3212,9 @@ void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev, } if (csf_mapping->phys) { + /* This is on module unload path, so the pages can be left uncleared before + * returning them back to kbdev memory pool. + */ kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], csf_mapping->num_pages, csf_mapping->phys, false, false); } diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h index a2948a98e9a7..f7a9c07dd6a9 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -618,6 +618,7 @@ void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev); bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev); #endif + /** * kbase_csf_firmware_trigger_reload() - Trigger the reboot of MCU firmware, for * the cold boot case firmware image would diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c index d08686f5829b..030a1ebf0ac6 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c @@ -367,10 +367,10 @@ int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev) */ entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks-ext"); - if (entry_count == -EINVAL) + if (entry_count < 0) entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks_ext"); - if (entry_count == -EINVAL || entry_count == -ENODATA) + if (entry_count < 0) return 0; entry_bytes = (size_t)entry_count * sizeof(u32); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c index 90568f6fa09f..0af560fd4260 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -670,6 +670,23 @@ static void set_timeout_global(const struct kbase_csf_global_iface *const global set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); } +static inline void set_gpu_idle_timer_glb_req(struct kbase_device *const kbdev, bool set) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + if (set) { + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, + GLB_REQ_IDLE_ENABLE_MASK); + } else { + kbase_csf_firmware_global_input_mask( + global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE, GLB_REQ_IDLE_DISABLE_MASK); + } + + kbdev->csf.gpu_idle_timer_enabled = set; +} + static void enable_gpu_idle_timer(struct kbase_device *const kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; @@ -678,8 +695,11 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, kbdev->csf.gpu_idle_dur_count); - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, - GLB_REQ_IDLE_ENABLE_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_IDLE_TIMER_CONFIG, + kbdev->csf.gpu_idle_dur_count_no_modifier, + GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK); + + set_gpu_idle_timer_glb_req(kbdev, true); dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", kbdev->csf.gpu_idle_dur_count); } @@ -857,11 +877,11 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work) container_of(work, struct kbase_device, csf.firmware_reload_work); unsigned long flags; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* Reboot the firmware */ kbase_csf_firmware_enable_mcu(kbdev); /* Tell MCU state machine to transit to next state */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->csf.firmware_reloaded = true; kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -900,7 +920,7 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_n /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = kbase_arch_timer_get_cntfrq(kbdev); u64 dur_val = dur_ns; - u32 cnt_val_u32, reg_val_u32; + u32 cnt_val_u32, reg_val_u32, timer_src; bool src_system_timestamp = freq > 0; if (!src_system_timestamp) { @@ -932,9 +952,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_n reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); /* add the source flag */ - reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET( - reg_val_u32, (src_system_timestamp ? GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP : - GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER)); + timer_src = src_system_timestamp ? GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP : + GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER; + reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, timer_src); return reg_val_u32; } @@ -989,29 +1009,33 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, return kbdev->csf.gpu_idle_dur_count; } - /* The 'reg_lock' is also taken and is held till the update is not + /* The scheduler lock is also taken and is held till the update is not * complete, to ensure the update of idle timer value by multiple Users * gets serialized. */ - mutex_lock(&kbdev->csf.reg_lock); - /* The firmware only reads the new idle timer value when the timer is - * disabled. - */ - kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbase_csf_firmware_disable_gpu_idle_timer(kbdev); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - /* Ensure that the request has taken effect */ - wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); - + kbase_csf_scheduler_lock(kbdev); kbase_csf_scheduler_spin_lock(kbdev, &flags); kbdev->csf.gpu_idle_hysteresis_ns = dur_ns; kbdev->csf.gpu_idle_dur_count = hysteresis_val; kbdev->csf.gpu_idle_dur_count_no_modifier = no_modifier; - kbase_csf_firmware_enable_gpu_idle_timer(kbdev); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); - mutex_unlock(&kbdev->csf.reg_lock); + if (kbdev->csf.gpu_idle_timer_enabled) { + /* Timer is already enabled. Disable the timer as FW only reads + * the new idle timer value when timer is re-enabled. + */ + kbase_csf_firmware_disable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + /* Ensure that the request has taken effect */ + wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_csf_firmware_enable_gpu_idle_timer(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); + } else { + kbase_csf_scheduler_spin_unlock(kbdev, flags); + } + + kbase_csf_scheduler_unlock(kbdev); kbase_csf_scheduler_pm_idle(kbdev); kbase_reset_gpu_allow(kbdev); end: @@ -1118,15 +1142,16 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) INIT_WORK(&kbdev->csf.firmware_reload_work, kbase_csf_firmware_reload_worker); INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); + init_rwsem(&kbdev->csf.pmode_sync_sem); mutex_init(&kbdev->csf.reg_lock); - kbase_csf_pending_gpuq_kicks_init(kbdev); + kbase_csf_pending_gpuq_kick_queues_init(kbdev); return 0; } void kbase_csf_firmware_early_term(struct kbase_device *kbdev) { - kbase_csf_pending_gpuq_kicks_term(kbdev); + kbase_csf_pending_gpuq_kick_queues_term(kbdev); mutex_destroy(&kbdev->csf.reg_lock); } @@ -1278,13 +1303,9 @@ void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) { - struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; - kbase_csf_scheduler_spin_lock_assert_held(kbdev); - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE, - GLB_REQ_IDLE_DISABLE_MASK); - + set_gpu_idle_timer_glb_req(kbdev, false); dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); @@ -1308,6 +1329,7 @@ int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); } + int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, u64 const timeout) { const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -1370,6 +1392,8 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) { + lockdep_assert_held(&kbdev->hwaccess_lock); + /* Trigger the boot of MCU firmware, Use the AUTO mode as * otherwise on fast reset, to exit protected mode, MCU will * not reboot by itself to enter normal mode. @@ -1384,6 +1408,7 @@ void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_gpu_idle_timer_glb_req(kbdev, false); set_global_request(global_iface, GLB_REQ_SLEEP_MASK); dev_dbg(kbdev->dev, "Sending sleep request to MCU"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c index 76e42e847fc3..ba47b7190395 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,13 +39,7 @@ static DEFINE_SPINLOCK(kbase_csf_fence_lock); #endif -#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG #define FENCE_WAIT_TIMEOUT_MS 3000 -#endif - -static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, bool drain_queue); - -static void kcpu_queue_process_worker(struct work_struct *data); static int kbase_kcpu_map_import_prepare(struct kbase_kcpu_command_queue *kcpu_queue, struct base_kcpu_command_import_info *import_info, @@ -445,6 +439,16 @@ static void kbase_kcpu_jit_allocate_finish(struct kbase_kcpu_command_queue *queu kfree(cmd->info.jit_alloc.info); } +static void enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue) +{ + struct kbase_context *const kctx = queue->kctx; + + if (!atomic_read(&kctx->prioritized)) + queue_work(kctx->csf.kcpu_queues.kcpu_wq, &queue->work); + else + kbase_csf_scheduler_enqueue_kcpuq_work(queue); +} + /** * kbase_kcpu_jit_retry_pending_allocs() - Retry blocked JIT_ALLOC commands * @@ -464,9 +468,7 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx) * kbase_csf_kcpu_queue_context.jit_lock . */ list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &blocked_queue->work); + enqueue_kcpuq_work(blocked_queue); } static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, @@ -717,11 +719,8 @@ static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx, static enum kbase_csf_event_callback_action event_cqs_callback(void *param) { struct kbase_kcpu_command_queue *kcpu_queue = (struct kbase_kcpu_command_queue *)param; - struct kbase_context *kctx = kcpu_queue->kctx; - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &kcpu_queue->work); + enqueue_kcpuq_work(kcpu_queue); return KBASE_CSF_EVENT_CALLBACK_KEEP; } @@ -1322,9 +1321,7 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence, struct dma_fe fence->seqno); /* Resume kcpu command queue processing. */ - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &kcpu_queue->work); + enqueue_kcpuq_work(kcpu_queue); } static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue, @@ -1360,7 +1357,6 @@ static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_ fence_info->fence = NULL; } -#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG /** * fence_timeout_callback() - Timeout callback function for fence-wait * @@ -1399,9 +1395,7 @@ static void fence_timeout_callback(struct timer_list *timer) kbase_sync_fence_info_get(fence, &info); if (info.status == 1) { - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &kcpu_queue->work); + enqueue_kcpuq_work(kcpu_queue); } else if (info.status == 0) { dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums", FENCE_WAIT_TIMEOUT_MS); @@ -1430,7 +1424,6 @@ static void fence_wait_timeout_start(struct kbase_kcpu_command_queue *cmd) { mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS)); } -#endif /** * kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command @@ -1469,9 +1462,8 @@ static int kbase_kcpu_fence_wait_process(struct kbase_kcpu_command_queue *kcpu_q fence_status = cb_err; if (cb_err == 0) { kcpu_queue->fence_wait_processed = true; -#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG - fence_wait_timeout_start(kcpu_queue); -#endif + if (IS_ENABLED(CONFIG_MALI_BIFROST_FENCE_DEBUG)) + fence_wait_timeout_start(kcpu_queue); } else if (cb_err == -ENOENT) { fence_status = dma_fence_get_status(fence); if (!fence_status) { @@ -1692,9 +1684,7 @@ static void fence_signal_timeout_cb(struct timer_list *timer) if (atomic_read(&kcpu_queue->fence_signal_pending_cnt) > 1) fence_signal_timeout_start(kcpu_queue); - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &kcpu_queue->timeout_work); + queue_work(kctx->csf.kcpu_queues.kcpu_wq, &kcpu_queue->timeout_work); } } @@ -1973,7 +1963,7 @@ static void kcpu_queue_process_worker(struct work_struct *data) container_of(data, struct kbase_kcpu_command_queue, work); mutex_lock(&queue->lock); - kcpu_queue_process(queue, false); + kbase_csf_kcpu_queue_process(queue, false); mutex_unlock(&queue->lock); } @@ -2006,7 +1996,7 @@ static int delete_queue(struct kbase_context *kctx, u32 id) /* Drain the remaining work for this queue first and go past * all the waits. */ - kcpu_queue_process(queue, true); + kbase_csf_kcpu_queue_process(queue, true); /* All commands should have been processed */ WARN_ON(queue->num_pending_cmds); @@ -2022,11 +2012,20 @@ static int delete_queue(struct kbase_context *kctx, u32 id) mutex_unlock(&queue->lock); cancel_work_sync(&queue->timeout_work); + + /* + * Drain a pending request to process this queue in + * kbase_csf_scheduler_kthread() if any. By this point the + * queue would be empty so this would be a no-op. + */ + kbase_csf_scheduler_wait_for_kthread_pending_work(kctx->kbdev, + &queue->pending_kick); + cancel_work_sync(&queue->work); mutex_destroy(&queue->lock); - kfree(queue); + vfree(queue); } else { dev_dbg(kctx->kbdev->dev, "Attempt to delete a non-existent KCPU queue"); mutex_unlock(&kctx->csf.kcpu_queues.lock); @@ -2079,7 +2078,7 @@ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(struct kbase_device *kbde KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue); } -static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue) +void kbase_csf_kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue) { struct kbase_device *kbdev = queue->kctx->kbdev; bool process_next = true; @@ -2199,10 +2198,10 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drai KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(kbdev, queue); - kbase_gpu_vm_lock(queue->kctx); + kbase_gpu_vm_lock_with_pmode_sync(queue->kctx); meta = kbase_sticky_resource_acquire(queue->kctx, - cmd->info.import.gpu_va); - kbase_gpu_vm_unlock(queue->kctx); + cmd->info.import.gpu_va, NULL); + kbase_gpu_vm_unlock_with_pmode_sync(queue->kctx); if (meta == NULL) { queue->has_error = true; @@ -2219,10 +2218,10 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drai KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue); - kbase_gpu_vm_lock(queue->kctx); + kbase_gpu_vm_lock_with_pmode_sync(queue->kctx); ret = kbase_sticky_resource_release(queue->kctx, NULL, cmd->info.import.gpu_va); - kbase_gpu_vm_unlock(queue->kctx); + kbase_gpu_vm_unlock_with_pmode_sync(queue->kctx); if (!ret) { queue->has_error = true; @@ -2240,10 +2239,10 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drai KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(kbdev, queue); - kbase_gpu_vm_lock(queue->kctx); + kbase_gpu_vm_lock_with_pmode_sync(queue->kctx); ret = kbase_sticky_resource_release_force(queue->kctx, NULL, cmd->info.import.gpu_va); - kbase_gpu_vm_unlock(queue->kctx); + kbase_gpu_vm_unlock_with_pmode_sync(queue->kctx); if (!ret) { queue->has_error = true; @@ -2642,7 +2641,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, } queue->num_pending_cmds += enq->nr_commands; - kcpu_queue_process(queue, false); + kbase_csf_kcpu_queue_process(queue, false); } out: @@ -2653,23 +2652,14 @@ out: int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx) { - kctx->csf.kcpu_wq_high_prio = alloc_workqueue("mali_kcpu_wq_%i_high_prio", - WQ_UNBOUND | WQ_HIGHPRI, 0, kctx->tgid); - if (kctx->csf.kcpu_wq_high_prio == NULL) { + kctx->csf.kcpu_queues.kcpu_wq = + alloc_workqueue("mali_kcpu_wq_%i_%i", 0, 0, kctx->tgid, kctx->id); + if (kctx->csf.kcpu_queues.kcpu_wq == NULL) { dev_err(kctx->kbdev->dev, "Failed to initialize KCPU queue high-priority workqueue"); return -ENOMEM; } - kctx->csf.kcpu_wq_normal_prio = - alloc_workqueue("mali_kcpu_wq_%i_normal_prio", 0, 0, kctx->tgid); - if (kctx->csf.kcpu_wq_normal_prio == NULL) { - dev_err(kctx->kbdev->dev, - "Failed to initialize KCPU queue normal-priority workqueue"); - destroy_workqueue(kctx->csf.kcpu_wq_high_prio); - return -ENOMEM; - } - mutex_init(&kctx->csf.kcpu_queues.lock); return 0; @@ -2688,8 +2678,7 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) mutex_destroy(&kctx->csf.kcpu_queues.lock); - destroy_workqueue(kctx->csf.kcpu_wq_normal_prio); - destroy_workqueue(kctx->csf.kcpu_wq_high_prio); + destroy_workqueue(kctx->csf.kcpu_queues.kcpu_wq); } KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term); @@ -2699,15 +2688,42 @@ int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, return delete_queue(kctx, (u32)del->id); } +static struct kbase_kcpu_dma_fence_meta * +kbase_csf_kcpu_queue_metadata_new(struct kbase_context *kctx, u64 fence_context) +{ + int n; + struct kbase_kcpu_dma_fence_meta *metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); + + if (!metadata) + goto early_ret; + + *metadata = (struct kbase_kcpu_dma_fence_meta){ + .kbdev = kctx->kbdev, + .kctx_id = kctx->id, + }; + + /* Please update MAX_TIMELINE_NAME macro when making changes to the string. */ + n = scnprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%u-%d_%u-%llu-kcpu", + kctx->kbdev->id, kctx->tgid, kctx->id, fence_context); + if (WARN_ON(n >= MAX_TIMELINE_NAME)) { + kfree(metadata); + metadata = NULL; + goto early_ret; + } + + kbase_refcount_set(&metadata->refcount, 1); + +early_ret: + return metadata; +} +KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_csf_kcpu_queue_metadata_new, ERRNO_NULL); + int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_new *newq) { struct kbase_kcpu_command_queue *queue; - int idx; - int n; - int ret = 0; -#if IS_ENABLED(CONFIG_SYNC_FILE) struct kbase_kcpu_dma_fence_meta *metadata; -#endif + int idx; + int ret = 0; /* The queue id is of u8 type and we use the index of the kcpu_queues * array as an id, so the number of elements in the array can't be * more than 256. @@ -2727,54 +2743,48 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu goto out; } - queue = kzalloc(sizeof(*queue), GFP_KERNEL); - + queue = vzalloc(sizeof(*queue)); if (!queue) { ret = -ENOMEM; goto out; } + *queue = (struct kbase_kcpu_command_queue) + { + .kctx = kctx, .start_offset = 0, .num_pending_cmds = 0, .enqueue_failed = false, + .command_started = false, .has_error = false, .id = idx, +#if IS_ENABLED(CONFIG_SYNC_FILE) + .fence_context = dma_fence_context_alloc(1), .fence_seqno = 0, + .fence_wait_processed = false, +#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */ + }; + + mutex_init(&queue->lock); + INIT_WORK(&queue->work, kcpu_queue_process_worker); + INIT_LIST_HEAD(&queue->high_prio_work); + atomic_set(&queue->pending_kick, 0); + INIT_WORK(&queue->timeout_work, kcpu_queue_timeout_worker); + INIT_LIST_HEAD(&queue->jit_blocked); + + if (IS_ENABLED(CONFIG_SYNC_FILE)) { + metadata = kbase_csf_kcpu_queue_metadata_new(kctx, queue->fence_context); + if (!metadata) { + vfree(queue); + ret = -ENOMEM; + goto out; + } + + queue->metadata = metadata; + atomic_inc(&kctx->kbdev->live_fence_metadata); + atomic_set(&queue->fence_signal_pending_cnt, 0); + kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb); + } + + if (IS_ENABLED(CONFIG_MALI_BIFROST_FENCE_DEBUG)) + kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback); + bitmap_set(kctx->csf.kcpu_queues.in_use, (unsigned int)idx, 1); kctx->csf.kcpu_queues.array[idx] = queue; - mutex_init(&queue->lock); - queue->kctx = kctx; - queue->start_offset = 0; - queue->num_pending_cmds = 0; -#if IS_ENABLED(CONFIG_SYNC_FILE) - queue->fence_context = dma_fence_context_alloc(1); - queue->fence_seqno = 0; - queue->fence_wait_processed = false; - - metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); - if (!metadata) { - kfree(queue); - ret = -ENOMEM; - goto out; - } - - metadata->kbdev = kctx->kbdev; - metadata->kctx_id = kctx->id; - n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%u-%d_%u-%llu-kcpu", - kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context); - if (WARN_ON(n >= MAX_TIMELINE_NAME)) { - kfree(queue); - kfree(metadata); - ret = -EINVAL; - goto out; - } - - kbase_refcount_set(&metadata->refcount, 1); - queue->metadata = metadata; - atomic_inc(&kctx->kbdev->live_fence_metadata); -#endif /* CONFIG_SYNC_FILE */ - queue->enqueue_failed = false; - queue->command_started = false; - INIT_LIST_HEAD(&queue->jit_blocked); - queue->has_error = false; - INIT_WORK(&queue->work, kcpu_queue_process_worker); - INIT_WORK(&queue->timeout_work, kcpu_queue_timeout_worker); - queue->id = idx; - newq->id = idx; /* Fire the tracepoint with the mutex held to enforce correct ordering @@ -2784,14 +2794,6 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu queue->num_pending_cmds); KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue, queue->fence_context, 0); -#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG - kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback); -#endif - -#if IS_ENABLED(CONFIG_SYNC_FILE) - atomic_set(&queue->fence_signal_pending_cnt, 0); - kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb); -#endif out: mutex_unlock(&kctx->csf.kcpu_queues.lock); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h index d1f18ed5caca..291509bef5a6 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h @@ -243,7 +243,19 @@ struct kbase_kcpu_command { * @work: struct work_struct which contains a pointer to * the function which handles processing of kcpu * commands enqueued into a kcpu command queue; - * part of kernel API for processing workqueues + * part of kernel API for processing workqueues. + * This would be used if the context is not + * prioritised, otherwise it would be handled by + * kbase_csf_scheduler_kthread(). + * @high_prio_work: A counterpart to @work, this queue would be + * added to a list to be processed by + * kbase_csf_scheduler_kthread() if it is + * prioritised. + * @pending_kick: Indicates that kbase_csf_scheduler_kthread() + * should re-evaluate pending commands for this + * queue. This would be set to false when the work + * is done. This is used mainly for + * synchronisation with queue termination. * @timeout_work: struct work_struct which contains a pointer to the * function which handles post-timeout actions * queue when a fence signal timeout occurs. @@ -287,6 +299,8 @@ struct kbase_kcpu_command_queue { struct kbase_context *kctx; struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE]; struct work_struct work; + struct list_head high_prio_work; + atomic_t pending_kick; struct work_struct timeout_work; u8 start_offset; u8 id; @@ -299,9 +313,7 @@ struct kbase_kcpu_command_queue { bool command_started; struct list_head jit_blocked; bool has_error; -#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG struct timer_list fence_timeout; -#endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */ #if IS_ENABLED(CONFIG_SYNC_FILE) struct kbase_kcpu_dma_fence_meta *metadata; #endif /* CONFIG_SYNC_FILE */ @@ -334,6 +346,18 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_delete *del); +/** + * kbase_csf_kcpu_queue_process - Proces pending KCPU queue commands + * + * @queue: The queue to process pending commands for + * @drain_queue: Whether to skip all blocking commands in the queue. + * This is expected to be set to true on queue + * termination. + * + * Return: 0 if successful or a negative error code on failure. + */ +void kbase_csf_kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue); + /** * kbase_csf_kcpu_queue_enqueue - Enqueue a KCPU command into a KCPU command * queue. diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h index d01f3070cf5b..9a7c6e451f66 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -250,7 +250,7 @@ #define GLB_ACK 0x0000 /* () Global acknowledge */ #define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */ -#define GLB_HALT_STATUS 0x0010 /* () Global halt status */ +#define GLB_FATAL_STATUS 0x0010 /* () Global fatal error status */ #define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */ #define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */ #define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */ @@ -1422,6 +1422,12 @@ #define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \ (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \ (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & GLB_REQ_PRFCNT_OVERFLOW_MASK)) +#define GLB_ACK_FATAL_SHIFT GPU_U(27) +#define GLB_ACK_FATAL_MASK (GPU_U(0x1) << GLB_ACK_FATAL_SHIFT) +#define GLB_ACK_FATAL_GET(reg_val) (((reg_val)&GLB_ACK_FATAL_MASK) >> GLB_ACK_FATAL_SHIFT) +#define GLB_ACK_FATAL_SET(reg_val, value) \ + (~(~(reg_val) | GLB_ACK_FATAL_MASK) | \ + (((value) << GLB_ACK_FATAL_SHIFT) & GLB_ACK_FATAL_MASK)) #define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30 #define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT) #define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) \ @@ -1822,6 +1828,20 @@ (((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \ (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK)) +/* GLB_FATAL_STATUS register */ +#define GLB_FATAL_STATUS_VALUE_SHIFT GPU_U(0) +#define GLB_FATAL_STATUS_VALUE_MASK (GPU_U(0xFFFFFFFF) << GLB_FATAL_STATUS_VALUE_SHIFT) +#define GLB_FATAL_STATUS_VALUE_GET(reg_val) \ + (((reg_val)&GLB_FATAL_STATUS_VALUE_MASK) >> GLB_FATAL_STATUS_VALUE_SHIFT) + +enum glb_fatal_status { + GLB_FATAL_STATUS_VALUE_OK, + GLB_FATAL_STATUS_VALUE_ASSERT, + GLB_FATAL_STATUS_VALUE_UNEXPECTED_EXCEPTION, + GLB_FATAL_STATUS_VALUE_HANG, + GLB_FATAL_STATUS_VALUE_COUNT +}; + /* GLB_DEBUG_ACK register */ #define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23) #define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c index 240397ebc16d..b07cc9600a04 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -224,8 +224,11 @@ static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev, int err_du static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) { + unsigned long flags; + kbase_io_history_dump(kbdev); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); dev_err(kbdev->dev, "Register state:"); dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x MCU_STATUS=0x%08x", kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT)), @@ -251,6 +254,7 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG))); } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } /** @@ -396,6 +400,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_ini */ if (likely(firmware_inited)) kbase_csf_scheduler_reset(kbdev); + cancel_work_sync(&kbdev->csf.firmware_reload_work); dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n"); @@ -403,6 +408,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_ini kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, silent); + if (ret == SOFT_RESET_FAILED) { dev_err(kbdev->dev, "Soft-reset failed"); goto err; @@ -490,6 +496,13 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data) bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) { +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_pm_is_gpu_lost(kbdev)) { + /* GPU access has been removed, reset will be done by Arbiter instead */ + return false; + } +#endif + if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c index 81ddeb667d06..642531c1033c 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,6 +36,7 @@ #include "mali_kbase_csf_tiler_heap_reclaim.h" #include "mali_kbase_csf_mcu_shared_reg.h" #include +#include #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) #include #include @@ -84,7 +85,8 @@ scheduler_get_protm_enter_async_group(struct kbase_device *const kbdev, struct kbase_queue_group *const group); static struct kbase_queue_group *get_tock_top_group(struct kbase_csf_scheduler *const scheduler); static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev); -static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask); +static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask, + bool reset); static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool system_suspend); static void schedule_in_cycle(struct kbase_queue_group *group, bool force); static bool queue_group_scheduled_locked(struct kbase_queue_group *group); @@ -119,7 +121,7 @@ static inline int gpu_metrics_ctx_init(struct kbase_context *kctx) put_cred(cred); /* Return early if this is not a Userspace created context */ - if (unlikely(!kctx->kfile)) + if (unlikely(!kctx->filp)) return 0; /* Serialize against the other threads trying to create/destroy Kbase contexts. */ @@ -156,7 +158,7 @@ static inline int gpu_metrics_ctx_init(struct kbase_context *kctx) static inline void gpu_metrics_ctx_term(struct kbase_context *kctx) { /* Return early if this is not a Userspace created context */ - if (unlikely(!kctx->kfile)) + if (unlikely(!kctx->filp)) return; /* Serialize against the other threads trying to create/destroy Kbase contexts. */ @@ -458,11 +460,14 @@ static void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group * * This function notifies the Userspace client waiting for the faults and wait * for the Client to complete the dumping. - * The function is called only from Scheduling tick/tock when a request sent by - * the Scheduler to FW times out or from the protm event work item of the group - * when the protected mode entry request times out. - * In the latter case there is no wait done as scheduler lock would be released - * immediately. In the former case the function waits and releases the scheduler + * The function is mainly called from Scheduling tick/tock when a request sent by + * the Scheduler to FW times out. It can be called outside the tick/tock when timeout + * happens in the following 3 cases :- + * - Entry to protected mode is initiated from protm event work item. + * - Forced exit from protected mode is triggered when GPU queue of an on-slot group is kicked. + * - CSG termination request is sent when Userspace tries to delete the queue group. + * In the latter 3 cases there is no wait done as scheduler lock would be released + * immediately. In the tick/tock case the function waits and releases the scheduler * lock before the wait. It has been ensured that the Scheduler view of the groups * won't change meanwhile, so no group can enter/exit the Scheduler, become * runnable or go off slot. @@ -478,10 +483,9 @@ static void schedule_actions_trigger_df(struct kbase_device *kbdev, struct kbase if (!kbase_debug_csf_fault_notify(kbdev, kctx, error)) return; - if (unlikely(scheduler->state != SCHED_BUSY)) { - WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE); + /* Return early if the function was called outside the tick/tock */ + if (unlikely(scheduler->state != SCHED_BUSY)) return; - } mutex_unlock(&scheduler->lock); kbase_debug_csf_fault_wait_completion(kbdev); @@ -788,19 +792,20 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev) static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler) { atomic_set(&scheduler->gpu_no_longer_idle, false); - queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work); + atomic_inc(&scheduler->pending_gpu_idle_work); + complete(&scheduler->kthread_signal); } -bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) +void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; bool can_suspend_on_idle; - bool invoke_pm_state_machine = false; lockdep_assert_held(&kbdev->hwaccess_lock); lockdep_assert_held(&scheduler->interrupt_lock); - can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev); + can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev) && + !kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state); KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL, (((u64)can_suspend_on_idle) << 32)); @@ -812,23 +817,6 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) scheduler->fast_gpu_idle_handling = (kbdev->csf.gpu_idle_hysteresis_ns == 0) || !kbase_csf_scheduler_all_csgs_idle(kbdev); - /* If GPU idle event occurred after the runtime suspend was aborted due to - * DB_MIRROR irq then it suggests that Userspace submission didn't make GPU - * non-idle. So the planned resumption of scheduling can be cancelled and - * MCU can be put back to sleep state to re-trigger the runtime suspend. - */ - if (unlikely(kbdev->pm.backend.exit_gpu_sleep_mode && - kbdev->pm.backend.runtime_suspend_abort_reason == - ABORT_REASON_DB_MIRROR_IRQ)) { - /* Cancel the planned resumption of scheduling */ - kbdev->pm.backend.exit_gpu_sleep_mode = false; - kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_NONE; - /* PM state machine can be invoked to put MCU back to the sleep - * state right away and thereby re-trigger the runtime suspend. - */ - invoke_pm_state_machine = true; - } - /* The GPU idle worker relies on update_on_slot_queues_offsets() to have * finished. It's queued before to reduce the time it takes till execution * but it'll eventually be blocked by the scheduler->interrupt_lock. @@ -839,8 +827,6 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) /* The extract offsets are unused in fast GPU idle handling */ if (!scheduler->fast_gpu_idle_handling) update_on_slot_queues_offsets(kbdev); - - return invoke_pm_state_machine; } u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev) @@ -1027,6 +1013,8 @@ static void scheduler_force_protm_exit(struct kbase_device *kbdev) * * @kbdev: Pointer to the device * @suspend_handler: Handler code for how to handle a suspend that might occur. + * @active_after_sleep: Flag to indicate that Scheduler is being activated from + * the sleeping state. * * This function is usually called when Scheduler needs to be activated. * The PM reference count is acquired for the Scheduler and the power on @@ -1035,7 +1023,8 @@ static void scheduler_force_protm_exit(struct kbase_device *kbdev) * Return: 0 if successful or a negative error code on failure. */ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, - enum kbase_pm_suspend_handler suspend_handler) + enum kbase_pm_suspend_handler suspend_handler, + bool active_after_sleep) { unsigned long flags; u32 prev_count; @@ -1043,24 +1032,35 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, lockdep_assert_held(&kbdev->csf.scheduler.lock); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); prev_count = kbdev->csf.scheduler.pm_active_count; if (!WARN_ON(prev_count == U32_MAX)) kbdev->csf.scheduler.pm_active_count++; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* On 0 => 1, make a pm_ctx_active request */ if (!prev_count) { - ret = kbase_pm_context_active_handle_suspend(kbdev, suspend_handler); - /* Invoke the PM state machines again as the change in MCU - * desired status, due to the update of scheduler.pm_active_count, - * may be missed by the thread that called pm_wait_for_desired_state() - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (ret) + kbase_pm_lock(kbdev); + kbdev->pm.backend.mcu_poweron_required = true; + ret = kbase_pm_context_active_handle_suspend_locked(kbdev, suspend_handler); + if (ret) { kbdev->csf.scheduler.pm_active_count--; - kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.mcu_poweron_required = false; + } else { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (active_after_sleep) { + kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_NONE; + kbdev->pm.backend.gpu_sleep_mode_active = false; + } + /* Check if the GPU is already active */ + if (kbdev->pm.active_count > 1) { + /* GPU is already active, so need to invoke the PM state machines + * explicitly to turn on the MCU. + */ + kbdev->pm.backend.mcu_desired = true; + kbase_pm_update_state(kbdev); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + kbase_pm_unlock(kbdev); } return ret; @@ -1072,8 +1072,6 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, * Scheduler * * @kbdev: Pointer to the device - * @flags: Pointer to the flags variable containing the interrupt state - * when hwaccess lock was acquired. * * This function is called when Scheduler needs to be activated from the * sleeping state. @@ -1081,42 +1079,15 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, * MCU is initiated. It resets the flag that indicates to the MCU state * machine that MCU needs to be put in sleep state. * - * Note: This function shall be called with hwaccess lock held and it may - * release that lock and reacquire it. - * * Return: zero when the PM reference was taken and non-zero when the * system is being suspending/suspended. */ -static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev, unsigned long *flags) +static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev) { - u32 prev_count; - int ret = 0; - lockdep_assert_held(&kbdev->csf.scheduler.lock); - lockdep_assert_held(&kbdev->hwaccess_lock); - prev_count = kbdev->csf.scheduler.pm_active_count; - if (!WARN_ON(prev_count == U32_MAX)) - kbdev->csf.scheduler.pm_active_count++; - - kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_NONE; - - /* On 0 => 1, make a pm_ctx_active request */ - if (!prev_count) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, *flags); - - ret = kbase_pm_context_active_handle_suspend( - kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); - - spin_lock_irqsave(&kbdev->hwaccess_lock, *flags); - if (ret) - kbdev->csf.scheduler.pm_active_count--; - else - kbdev->pm.backend.gpu_sleep_mode_active = false; - kbase_pm_update_state(kbdev); - } - - return ret; + return scheduler_pm_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, + true); } #endif @@ -1136,28 +1107,32 @@ static void scheduler_pm_idle(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->csf.scheduler.lock); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); prev_count = kbdev->csf.scheduler.pm_active_count; if (!WARN_ON(prev_count == 0)) kbdev->csf.scheduler.pm_active_count--; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (prev_count == 1) { - kbase_pm_context_idle(kbdev); - /* Invoke the PM state machines again as the change in MCU - * desired status, due to the update of scheduler.pm_active_count, - * may be missed by the thread that called pm_wait_for_desired_state() - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_pm_lock(kbdev); + kbdev->pm.backend.mcu_poweron_required = false; + kbase_pm_context_idle_locked(kbdev); + /* Check if GPU is still active */ + if (kbdev->pm.active_count) { + /* GPU is still active, so need to invoke the PM state machines + * explicitly to turn off the MCU. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.mcu_desired = false; + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + kbase_pm_unlock(kbdev); } } #ifdef KBASE_PM_RUNTIME /** * scheduler_pm_idle_before_sleep() - Release the PM reference count and - * trigger the tranistion to sleep state. + * trigger the transition to sleep state. * * @kbdev: Pointer to the device * @@ -1168,28 +1143,15 @@ static void scheduler_pm_idle(struct kbase_device *kbdev) static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev) { unsigned long flags; - u32 prev_count; lockdep_assert_held(&kbdev->csf.scheduler.lock); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - prev_count = kbdev->csf.scheduler.pm_active_count; - if (!WARN_ON(prev_count == 0)) - kbdev->csf.scheduler.pm_active_count--; kbdev->pm.backend.gpu_sleep_mode_active = true; kbdev->pm.backend.exit_gpu_sleep_mode = false; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (prev_count == 1) { - kbase_pm_context_idle(kbdev); - /* Invoke the PM state machines again as the change in MCU - * desired status, due to the update of scheduler.pm_active_count, - * may be missed by the thread that called pm_wait_for_desired_state() - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } + scheduler_pm_idle(kbdev); } #endif @@ -1205,8 +1167,8 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) if (scheduler->state == SCHED_SUSPENDED) { dev_dbg(kbdev->dev, "Re-activating the Scheduler after suspend"); - ret = scheduler_pm_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); + ret = scheduler_pm_active_handle_suspend( + kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE, false); #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) hrtimer_start(&scheduler->gpu_metrics_timer, HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_tp_emit_interval()), @@ -1214,13 +1176,8 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) #endif } else { #ifdef KBASE_PM_RUNTIME - unsigned long flags; - dev_dbg(kbdev->dev, "Re-activating the Scheduler out of sleep"); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = scheduler_pm_active_after_sleep(kbdev, &flags); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + ret = scheduler_pm_active_after_sleep(kbdev); #endif } @@ -1228,7 +1185,7 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) /* GPUCORE-29850 would add the handling for the case where * Scheduler could not be activated due to system suspend. */ - dev_info(kbdev->dev, "Couldn't wakeup Scheduler due to system suspend"); + dev_dbg(kbdev->dev, "Couldn't wakeup Scheduler due to system suspend"); return; } @@ -2410,6 +2367,11 @@ static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler) atomic_set(&scheduler->pending_tock_work, false); } +static void cancel_gpu_idle_work(struct kbase_csf_scheduler *const scheduler) +{ + atomic_set(&scheduler->pending_gpu_idle_work, false); +} + static void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, struct kbase_queue_group *group, enum kbase_csf_group_state run_state) @@ -3131,8 +3093,9 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, static int term_group_sync(struct kbase_queue_group *group) { struct kbase_device *kbdev = group->kctx->kbdev; - const unsigned int fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); - long remaining = kbase_csf_timeout_in_jiffies(fw_timeout_ms); + const unsigned int group_term_timeout_ms = + kbase_get_timeout_ms(kbdev, CSF_CSG_TERM_TIMEOUT); + long remaining = kbase_csf_timeout_in_jiffies(group_term_timeout_ms); int err = 0; term_csg_slot(group); @@ -3148,11 +3111,11 @@ static int term_group_sync(struct kbase_queue_group *group) dev_warn( kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d", - kbase_backend_get_cycle_cnt(kbdev), fw_timeout_ms, group->handle, + kbase_backend_get_cycle_cnt(kbdev), group_term_timeout_ms, group->handle, group->kctx->tgid, group->kctx->id, group->csg_nr); if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) error_type = DF_PING_REQUEST_TIMEOUT; - kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type); + schedule_actions_trigger_df(kbdev, group->kctx, error_type); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); @@ -4138,7 +4101,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, * entry to protected mode happens with a memory region being locked and * the same region is then accessed by the GPU in protected mode. */ - mutex_lock(&kbdev->mmu_hw_mutex); + down_write(&kbdev->csf.pmode_sync_sem); spin_lock_irqsave(&scheduler->interrupt_lock, flags); /* Check if the previous transition to enter & exit the protected @@ -4204,7 +4167,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); err = kbase_csf_wait_protected_mode_enter(kbdev); - mutex_unlock(&kbdev->mmu_hw_mutex); + up_write(&kbdev->csf.pmode_sync_sem); if (err) schedule_actions_trigger_df( @@ -4219,7 +4182,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, } spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); + up_write(&kbdev->csf.pmode_sync_sem); } /** @@ -4797,8 +4760,9 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool s { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; + int ret; - int ret = suspend_active_queue_groups(kbdev, slot_mask); + ret = suspend_active_queue_groups(kbdev, slot_mask, false); if (unlikely(ret)) { const int csg_nr = ffs(slot_mask[0]) - 1; @@ -4841,7 +4805,7 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool s * Returns false if any of the queues inside any of the groups that have been * assigned a physical CSG slot have work to execute, or have executed work * since having received a GPU idle notification. This function is used to - * handle a rance condition between firmware reporting GPU idle and userspace + * handle a race condition between firmware reporting GPU idle and userspace * submitting more work by directly ringing a doorbell. * * Return: false if any queue inside any resident group has work to be processed @@ -4988,14 +4952,14 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) return true; } -static void gpu_idle_worker(struct work_struct *work) +static void gpu_idle_worker(struct kbase_device *kbdev) { - struct kbase_device *kbdev = - container_of(work, struct kbase_device, csf.scheduler.gpu_idle_work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool scheduler_is_idle_suspendable = false; bool all_groups_suspended = false; + WARN_ON_ONCE(atomic_read(&scheduler->pending_gpu_idle_work) == 0); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u); #define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \ @@ -5005,7 +4969,7 @@ static void gpu_idle_worker(struct work_struct *work) dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n"); KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL, __ENCODE_KTRACE_INFO(true, false, false)); - return; + goto exit; } kbase_debug_csf_fault_wait_completion(kbdev); mutex_lock(&scheduler->lock); @@ -5014,7 +4978,7 @@ static void gpu_idle_worker(struct work_struct *work) if (unlikely(scheduler->state == SCHED_BUSY)) { mutex_unlock(&scheduler->lock); kbase_reset_gpu_allow(kbdev); - return; + goto exit; } #endif @@ -5039,6 +5003,9 @@ static void gpu_idle_worker(struct work_struct *work) __ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable, all_groups_suspended)); #undef __ENCODE_KTRACE_INFO + +exit: + atomic_dec(&scheduler->pending_gpu_idle_work); } static int scheduler_prepare(struct kbase_device *kbdev) @@ -5376,6 +5343,20 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) } } +static void scheduler_enable_gpu_idle_timer(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + unsigned long flags; + + lockdep_assert_held(&scheduler->lock); + + if (!kbdev->csf.gpu_idle_timer_enabled) { + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + kbase_csf_firmware_enable_gpu_idle_timer(kbdev); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + } +} + static void schedule_actions(struct kbase_device *kbdev, bool is_tick) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; @@ -5417,8 +5398,10 @@ static void schedule_actions(struct kbase_device *kbdev, bool is_tick) * steps and thus extending the previous tick's arrangement, * in particular, no alterations to on-slot CSGs. */ - if (keep_lru_on_slots(kbdev)) + if (keep_lru_on_slots(kbdev)) { + scheduler_enable_gpu_idle_timer(kbdev); return; + } } if (is_tick) @@ -5489,6 +5472,7 @@ redo_local_tock: wait_csg_slots_start(kbdev); wait_csg_slots_finish_prio_update(kbdev); + scheduler_enable_gpu_idle_timer(kbdev); if (new_protm_top_grp) { scheduler_group_check_protm_enter(kbdev, scheduler->top_grp); @@ -5544,9 +5528,10 @@ static bool can_skip_scheduling(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbdev->pm.backend.exit_gpu_sleep_mode) { - int ret = scheduler_pm_active_after_sleep(kbdev, &flags); + int ret; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + ret = scheduler_pm_active_after_sleep(kbdev); if (!ret) { scheduler->state = SCHED_INACTIVE; KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); @@ -5662,7 +5647,9 @@ exit_no_schedule_unlock: kbase_reset_gpu_allow(kbdev); } -static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask) + +static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask, + bool reset) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; u32 num_groups = kbdev->csf.global_iface.group_num; @@ -5675,12 +5662,12 @@ static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long struct kbase_queue_group *group = scheduler->csg_slots[slot_num].resident_group; if (group) { - suspend_queue_group(group); + suspend_queue_group(group); set_bit(slot_num, slot_mask); } } - ret = wait_csg_slots_suspend(kbdev, slot_mask); + ret = wait_csg_slots_suspend(kbdev, slot_mask); return ret; } @@ -5693,7 +5680,7 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) mutex_lock(&scheduler->lock); - ret = suspend_active_queue_groups(kbdev, slot_mask); + ret = suspend_active_queue_groups(kbdev, slot_mask, true); if (ret) { dev_warn( @@ -5830,9 +5817,9 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); /* Cancel any potential queued delayed work(s) */ - cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work); cancel_tick_work(scheduler); cancel_tock_work(scheduler); + cancel_gpu_idle_work(scheduler); cancel_delayed_work_sync(&scheduler->ping_work); mutex_lock(&scheduler->lock); @@ -5860,12 +5847,13 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) void kbase_csf_scheduler_reset(struct kbase_device *kbdev) { struct kbase_context *kctx; - WARN_ON(!kbase_reset_gpu_is_active(kbdev)); KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u); - kbase_debug_csf_fault_wait_completion(kbdev); + if (kbase_reset_gpu_is_active(kbdev)) + kbase_debug_csf_fault_wait_completion(kbdev); + if (scheduler_handle_reset_in_protected_mode(kbdev) && !suspend_active_queue_groups_on_reset(kbdev)) { @@ -6453,8 +6441,8 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) * check_group_sync_update_worker() - Check the sync wait condition for all the * blocked queue groups * - * @work: Pointer to the context-specific work item for evaluating the wait - * condition for all the queue groups in idle_wait_groups list. + * @kctx: The context to evaluate the wait condition for all the queue groups + * in idle_wait_groups list. * * This function checks the gpu queues of all the groups present in both * idle_wait_groups list of a context and all on slot idle groups (if GPU @@ -6464,27 +6452,14 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) * runnable groups so that Scheduler can consider scheduling the group * in next tick or exit protected mode. */ -static void check_group_sync_update_worker(struct work_struct *work) +static void check_group_sync_update_worker(struct kbase_context *kctx) { - struct kbase_context *const kctx = - container_of(work, struct kbase_context, csf.sched.sync_update_work); struct kbase_device *const kbdev = kctx->kbdev; struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool sync_updated = false; mutex_lock(&scheduler->lock); -#if IS_ENABLED(CONFIG_DEBUG_FS) - if (unlikely(scheduler->state == SCHED_BUSY)) { - queue_work(atomic_read(&kctx->prioritized) ? - kctx->csf.sched.sync_update_wq_high_prio : - kctx->csf.sched.sync_update_wq_normal_prio, - &kctx->csf.sched.sync_update_work); - mutex_unlock(&scheduler->lock); - return; - } -#endif - KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u); if (kctx->csf.sched.num_idle_wait_grps != 0) { struct kbase_queue_group *group, *temp; @@ -6522,13 +6497,10 @@ static void check_group_sync_update_worker(struct work_struct *work) static enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param) { struct kbase_context *const kctx = param; - struct workqueue_struct *wq = atomic_read(&kctx->prioritized) ? - kctx->csf.sched.sync_update_wq_high_prio : - kctx->csf.sched.sync_update_wq_normal_prio; KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u); - queue_work(wq, &kctx->csf.sched.sync_update_work); + kbase_csf_scheduler_enqueue_sync_update_work(kctx); return KBASE_CSF_EVENT_CALLBACK_KEEP; } @@ -6539,6 +6511,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) int err; struct kbase_device *kbdev = kctx->kbdev; + WARN_ON_ONCE(!kbdev->csf.scheduler.kthread_running); + #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) err = gpu_metrics_ctx_init(kctx); if (err) @@ -6551,25 +6525,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups); - kctx->csf.sched.sync_update_wq_high_prio = alloc_ordered_workqueue( - "mali_sync_wq_%i_high_prio", WQ_UNBOUND | WQ_HIGHPRI, kctx->tgid); - if (kctx->csf.sched.sync_update_wq_high_prio == NULL) { - dev_err(kbdev->dev, - "Failed to initialize scheduler context high-priority workqueue"); - err = -ENOMEM; - goto alloc_high_prio_wq_failed; - } - - kctx->csf.sched.sync_update_wq_normal_prio = - alloc_ordered_workqueue("mali_sync_wq_%i_normal_prio", 0, kctx->tgid); - if (kctx->csf.sched.sync_update_wq_normal_prio == NULL) { - dev_err(kbdev->dev, - "Failed to initialize scheduler context normal-priority workqueue"); - err = -ENOMEM; - goto alloc_normal_prio_wq_failed; - } - - INIT_WORK(&kctx->csf.sched.sync_update_work, check_group_sync_update_worker); + INIT_LIST_HEAD(&kctx->csf.sched.sync_update_work); kbase_csf_tiler_heap_reclaim_ctx_init(kctx); @@ -6583,10 +6539,6 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) return err; event_wait_add_failed: - destroy_workqueue(kctx->csf.sched.sync_update_wq_normal_prio); -alloc_normal_prio_wq_failed: - destroy_workqueue(kctx->csf.sched.sync_update_wq_high_prio); -alloc_high_prio_wq_failed: kbase_ctx_sched_remove_ctx(kctx); #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) gpu_metrics_ctx_term(kctx); @@ -6597,9 +6549,10 @@ alloc_high_prio_wq_failed: void kbase_csf_scheduler_context_term(struct kbase_context *kctx) { kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); - cancel_work_sync(&kctx->csf.sched.sync_update_work); - destroy_workqueue(kctx->csf.sched.sync_update_wq_normal_prio); - destroy_workqueue(kctx->csf.sched.sync_update_wq_high_prio); + + /* Drain a pending SYNC_UPDATE work if any */ + kbase_csf_scheduler_wait_for_kthread_pending_work(kctx->kbdev, + &kctx->csf.pending_sync_update); kbase_ctx_sched_remove_ctx(kctx); #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) @@ -6607,53 +6560,157 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx) #endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ } +static void handle_pending_sync_update_works(struct kbase_csf_scheduler *scheduler) +{ + struct kbase_context *sync_update_ctx; + + if (atomic_cmpxchg(&scheduler->pending_sync_update_works, true, false) == false) + return; + + do { + unsigned long flags; + + spin_lock_irqsave(&scheduler->sync_update_work_ctxs_lock, flags); + sync_update_ctx = NULL; + if (!list_empty(&scheduler->sync_update_work_ctxs)) { + sync_update_ctx = list_first_entry(&scheduler->sync_update_work_ctxs, + struct kbase_context, + csf.sched.sync_update_work); + list_del_init(&sync_update_ctx->csf.sched.sync_update_work); + } + spin_unlock_irqrestore(&scheduler->sync_update_work_ctxs_lock, flags); + + if (sync_update_ctx != NULL) { + WARN_ON_ONCE(atomic_read(&sync_update_ctx->csf.pending_sync_update) == 0); + check_group_sync_update_worker(sync_update_ctx); + atomic_dec(&sync_update_ctx->csf.pending_sync_update); + } + } while (sync_update_ctx != NULL); +} + +static void handle_pending_protm_requests(struct kbase_csf_scheduler *scheduler) +{ + struct kbase_queue_group *protm_grp; + + if (atomic_cmpxchg(&scheduler->pending_protm_event_works, true, false) == false) + return; + + do { + unsigned long flags; + + spin_lock_irqsave(&scheduler->protm_event_work_grps_lock, flags); + protm_grp = NULL; + if (!list_empty(&scheduler->protm_event_work_grps)) { + protm_grp = list_first_entry(&scheduler->protm_event_work_grps, + struct kbase_queue_group, protm_event_work); + list_del_init(&protm_grp->protm_event_work); + } + spin_unlock_irqrestore(&scheduler->protm_event_work_grps_lock, flags); + + if (protm_grp != NULL) { + WARN_ON_ONCE(atomic_read(&protm_grp->pending_protm_event_work) == 0); + kbase_csf_process_protm_event_request(protm_grp); + atomic_dec(&protm_grp->pending_protm_event_work); + } + } while (protm_grp != NULL); +} + +static void handle_pending_kcpuq_commands(struct kbase_csf_scheduler *scheduler) +{ + struct kbase_kcpu_command_queue *kcpuq; + + if (atomic_cmpxchg(&scheduler->pending_kcpuq_works, true, false) == false) + return; + + do { + unsigned long flags; + + spin_lock_irqsave(&scheduler->kcpuq_work_queues_lock, flags); + kcpuq = NULL; + if (!list_empty(&scheduler->kcpuq_work_queues)) { + kcpuq = list_first_entry(&scheduler->kcpuq_work_queues, + struct kbase_kcpu_command_queue, high_prio_work); + list_del_init(&kcpuq->high_prio_work); + } + spin_unlock_irqrestore(&scheduler->kcpuq_work_queues_lock, flags); + + if (kcpuq != NULL) { + WARN_ON_ONCE(atomic_read(&kcpuq->pending_kick) == 0); + + mutex_lock(&kcpuq->lock); + kbase_csf_kcpu_queue_process(kcpuq, false); + mutex_unlock(&kcpuq->lock); + + atomic_dec(&kcpuq->pending_kick); + } + } while (kcpuq != NULL); +} + +static void handle_pending_queue_kicks(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + struct kbase_queue *queue; + + if (atomic_cmpxchg(&kbdev->csf.pending_gpuq_kicks, true, false) == false) + return; + + do { + u8 prio; + + spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock); + queue = NULL; + for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) { + if (!list_empty(&kbdev->csf.pending_gpuq_kick_queues[prio])) { + queue = list_first_entry(&kbdev->csf.pending_gpuq_kick_queues[prio], + struct kbase_queue, pending_kick_link); + list_del_init(&queue->pending_kick_link); + break; + } + } + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); + + if (queue != NULL) { + WARN_ONCE( + prio != queue->group_priority, + "Queue %pK has priority %u but instead its kick was handled at priority %u", + (void *)queue, queue->group_priority, prio); + WARN_ON_ONCE(atomic_read(&queue->pending_kick) == 0); + + kbase_csf_process_queue_kick(queue); + + /* Perform a scheduling tock for high-priority queue groups if + * required. + */ + BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0); + BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1); + if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) && + atomic_read(&scheduler->pending_tock_work)) + schedule_on_tock(kbdev); + } + } while (queue != NULL); +} + static int kbase_csf_scheduler_kthread(void *data) { struct kbase_device *const kbdev = data; struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; while (scheduler->kthread_running) { - struct kbase_queue *queue; - if (wait_for_completion_interruptible(&scheduler->kthread_signal) != 0) continue; reinit_completion(&scheduler->kthread_signal); - /* Iterate through queues with pending kicks */ - do { - u8 prio; + /* + * The order in which these requests are handled is based on + * how they would influence each other's decisions. As a + * result, the tick & tock requests must be handled after all + * other requests, but before the GPU IDLE work. + */ - spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); - queue = NULL; - for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) { - if (!list_empty(&kbdev->csf.pending_gpuq_kicks[prio])) { - queue = list_first_entry( - &kbdev->csf.pending_gpuq_kicks[prio], - struct kbase_queue, pending_kick_link); - list_del_init(&queue->pending_kick_link); - break; - } - } - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); - - if (queue != NULL) { - WARN_ONCE( - prio != queue->group_priority, - "Queue %pK has priority %hhu but instead its kick was handled at priority %hhu", - (void *)queue, queue->group_priority, prio); - - kbase_csf_process_queue_kick(queue); - - /* Perform a scheduling tock for high-priority queue groups if - * required. - */ - BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0); - BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1); - if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) && - atomic_read(&scheduler->pending_tock_work)) - schedule_on_tock(kbdev); - } - } while (queue != NULL); + handle_pending_sync_update_works(scheduler); + handle_pending_protm_requests(scheduler); + handle_pending_kcpuq_commands(scheduler); + handle_pending_queue_kicks(kbdev); /* Check if we need to perform a scheduling tick/tock. A tick * event shall override a tock event but not vice-versa. @@ -6665,6 +6722,10 @@ static int kbase_csf_scheduler_kthread(void *data) schedule_on_tock(kbdev); } + /* Drain pending GPU idle works */ + while (atomic_read(&scheduler->pending_gpu_idle_work) > 0) + gpu_idle_worker(kbdev); + dev_dbg(kbdev->dev, "Waking up for event after a scheduling iteration."); wake_up_all(&kbdev->csf.event_wait); } @@ -6694,7 +6755,7 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev) scheduler->kthread_running = true; scheduler->gpuq_kthread = kthread_run(&kbase_csf_scheduler_kthread, kbdev, "mali-gpuq-kthread"); - if (!scheduler->gpuq_kthread) { + if (IS_ERR_OR_NULL(scheduler->gpuq_kthread)) { kfree(scheduler->csg_slots); scheduler->csg_slots = NULL; @@ -6734,12 +6795,6 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) atomic_set(&scheduler->timer_enabled, true); - scheduler->idle_wq = alloc_ordered_workqueue("csf_scheduler_gpu_idle_wq", WQ_HIGHPRI); - if (!scheduler->idle_wq) { - dev_err(kbdev->dev, "Failed to allocate GPU idle scheduler workqueue\n"); - return -ENOMEM; - } - INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); mutex_init(&scheduler->lock); @@ -6757,20 +6812,30 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; scheduler_doorbell_init(kbdev); - INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker); hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); scheduler->tick_timer.function = tick_timer_callback; - kbase_csf_tiler_heap_reclaim_mgr_init(kbdev); + atomic_set(&scheduler->pending_sync_update_works, false); + spin_lock_init(&scheduler->sync_update_work_ctxs_lock); + INIT_LIST_HEAD(&scheduler->sync_update_work_ctxs); + atomic_set(&scheduler->pending_protm_event_works, false); + spin_lock_init(&scheduler->protm_event_work_grps_lock); + INIT_LIST_HEAD(&scheduler->protm_event_work_grps); + atomic_set(&scheduler->pending_kcpuq_works, false); + spin_lock_init(&scheduler->kcpuq_work_queues_lock); + INIT_LIST_HEAD(&scheduler->kcpuq_work_queues); + atomic_set(&scheduler->pending_tick_work, false); + atomic_set(&scheduler->pending_tock_work, false); + atomic_set(&scheduler->pending_gpu_idle_work, 0); - return 0; + return kbase_csf_tiler_heap_reclaim_mgr_init(kbdev); } void kbase_csf_scheduler_term(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - if (scheduler->gpuq_kthread) { + if (!IS_ERR_OR_NULL(scheduler->gpuq_kthread)) { scheduler->kthread_running = false; complete(&scheduler->kthread_signal); kthread_stop(scheduler->gpuq_kthread); @@ -6784,7 +6849,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) * to be active at the time of Driver unload. */ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); - flush_work(&kbdev->csf.scheduler.gpu_idle_work); mutex_lock(&kbdev->csf.scheduler.lock); if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) { @@ -6811,9 +6875,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) { - if (kbdev->csf.scheduler.idle_wq) - destroy_workqueue(kbdev->csf.scheduler.idle_wq); - kbase_csf_tiler_heap_reclaim_mgr_term(kbdev); mutex_destroy(&kbdev->csf.scheduler.lock); } @@ -6926,7 +6987,7 @@ int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev) dev_warn(kbdev->dev, "failed to suspend active groups"); goto exit; } else { - dev_info(kbdev->dev, "Scheduler PM suspend"); + dev_dbg(kbdev->dev, "Scheduler PM suspend"); scheduler_suspend(kbdev); cancel_tick_work(scheduler); } @@ -6968,7 +7029,7 @@ void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev) lockdep_assert_held(&scheduler->lock); if ((scheduler->total_runnable_grps > 0) && (scheduler->state == SCHED_SUSPENDED)) { - dev_info(kbdev->dev, "Scheduler PM resume"); + dev_dbg(kbdev->dev, "Scheduler PM resume"); scheduler_wakeup(kbdev, true); } } @@ -6989,7 +7050,7 @@ void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev) * the CSGs before powering down the GPU. */ mutex_lock(&kbdev->csf.scheduler.lock); - scheduler_pm_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); + scheduler_pm_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, false); mutex_unlock(&kbdev->csf.scheduler.lock); } KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active); @@ -7013,9 +7074,7 @@ static int scheduler_wait_mcu_active(struct kbase_device *kbdev, bool killable_w kbase_pm_lock(kbdev); WARN_ON(!kbdev->pm.active_count); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(!scheduler->pm_active_count); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_pm_unlock(kbdev); if (killable_wait) @@ -7096,6 +7155,65 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev) return 0; } +void kbase_csf_scheduler_enqueue_sync_update_work(struct kbase_context *kctx) +{ + struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; + unsigned long flags; + + spin_lock_irqsave(&scheduler->sync_update_work_ctxs_lock, flags); + if (list_empty(&kctx->csf.sched.sync_update_work)) { + list_add_tail(&kctx->csf.sched.sync_update_work, &scheduler->sync_update_work_ctxs); + atomic_inc(&kctx->csf.pending_sync_update); + if (atomic_cmpxchg(&scheduler->pending_sync_update_works, false, true) == false) + complete(&scheduler->kthread_signal); + } + spin_unlock_irqrestore(&scheduler->sync_update_work_ctxs_lock, flags); +} + +void kbase_csf_scheduler_enqueue_protm_event_work(struct kbase_queue_group *group) +{ + struct kbase_context *const kctx = group->kctx; + struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; + unsigned long flags; + + spin_lock_irqsave(&scheduler->protm_event_work_grps_lock, flags); + if (list_empty(&group->protm_event_work)) { + list_add_tail(&group->protm_event_work, &scheduler->protm_event_work_grps); + atomic_inc(&group->pending_protm_event_work); + if (atomic_cmpxchg(&scheduler->pending_protm_event_works, false, true) == false) + complete(&scheduler->kthread_signal); + } + spin_unlock_irqrestore(&scheduler->protm_event_work_grps_lock, flags); +} + +void kbase_csf_scheduler_enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue) +{ + struct kbase_csf_scheduler *const scheduler = &queue->kctx->kbdev->csf.scheduler; + unsigned long flags; + + spin_lock_irqsave(&scheduler->kcpuq_work_queues_lock, flags); + if (list_empty(&queue->high_prio_work)) { + list_add_tail(&queue->high_prio_work, &scheduler->kcpuq_work_queues); + atomic_inc(&queue->pending_kick); + if (atomic_cmpxchg(&scheduler->pending_kcpuq_works, false, true) == false) + complete(&scheduler->kthread_signal); + } + spin_unlock_irqrestore(&scheduler->kcpuq_work_queues_lock, flags); +} + +void kbase_csf_scheduler_wait_for_kthread_pending_work(struct kbase_device *kbdev, + atomic_t *pending) +{ + /* + * Signal kbase_csf_scheduler_kthread() to allow for the + * eventual completion of the current iteration. Once the work is + * done, the event_wait wait queue shall be signalled. + */ + + complete(&kbdev->csf.scheduler.kthread_signal); + wait_event(kbdev->csf.event_wait, atomic_read(pending) == 0); +} + void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev) { u32 csg_nr; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h index 5047092d6650..e84994600809 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -235,7 +235,8 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev); * No explicit re-initialization is done for CSG & CS interface I/O pages; * instead, that happens implicitly on firmware reload. * - * Should be called only after initiating the GPU reset. + * Should be called either after initiating the GPU reset or when MCU reset is + * expected to follow such as GPU_LOST case. */ void kbase_csf_scheduler_reset(struct kbase_device *kbdev); @@ -487,6 +488,48 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev) kbdev->csf.global_iface.group_num); } +/** + * kbase_csf_scheduler_enqueue_sync_update_work() - Add a context to the list + * of contexts to handle + * SYNC_UPDATE events. + * + * @kctx: The context to handle SYNC_UPDATE event + * + * This function wakes up kbase_csf_scheduler_kthread() to handle pending + * SYNC_UPDATE events for all contexts. + */ +void kbase_csf_scheduler_enqueue_sync_update_work(struct kbase_context *kctx); + +/** + * kbase_csf_scheduler_enqueue_protm_event_work() - Add a group to the list + * of groups to handle + * PROTM requests. + * + * @group: The group to handle protected mode request + * + * This function wakes up kbase_csf_scheduler_kthread() to handle pending + * protected mode requests for all groups. + */ +void kbase_csf_scheduler_enqueue_protm_event_work(struct kbase_queue_group *group); + +/** + * kbase_csf_scheduler_enqueue_kcpuq_work() - Wake up kbase_csf_scheduler_kthread() to process + * pending commands for a KCPU queue. + * + * @queue: The queue to process pending commands for + */ +void kbase_csf_scheduler_enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue); + +/** + * kbase_csf_scheduler_wait_for_kthread_pending_work - Wait until a pending work has completed in + * kbase_csf_scheduler_kthread(). + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface + * @pending: The work to wait for + */ +void kbase_csf_scheduler_wait_for_kthread_pending_work(struct kbase_device *kbdev, + atomic_t *pending); + /** * kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick * @@ -591,11 +634,8 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev); * @kbdev: Pointer to the device * * This function is called when a GPU idle IRQ has been raised. - * - * Return: true if the PM state machine needs to be invoked after the processing - * of GPU idle irq, otherwise false. */ -bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev); +void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev); /** * kbase_csf_scheduler_get_nr_active_csgs() - Get the number of active CSGs diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c index aa88b5f59d3b..27b792500bdf 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -117,13 +117,13 @@ static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(char *buffer, int *l timeline_name = fence->ops->get_timeline_name(fence); is_signaled = info.status > 0; - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, fence, is_signaled); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, fence, is_signaled); /* Note: fence->seqno was u32 until 5.1 kernel, then u64 */ - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx", - timeline_name, fence->context, (u64)fence->seqno); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx", + timeline_name, fence->context, (u64)fence->seqno); kbase_fence_put(fence); } @@ -149,19 +149,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait(struct kbase_context *kctx, char int ret = kbasep_csf_sync_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); bool live_val_valid = (ret >= 0); - *length += - snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + *length += scnprintf( + buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); if (live_val_valid) - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "0x%.16llx", (u64)live_val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", (u64)live_val); else - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - CQS_UNREADABLE_LIVE_VALUE); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - " | op:gt arg_value:0x%.8x", cqs_obj->val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:gt arg_value:0x%.8x", cqs_obj->val); } } @@ -187,18 +187,18 @@ static void kbasep_csf_sync_print_kcpu_cqs_set(struct kbase_context *kctx, char bool live_val_valid = (ret >= 0); *length += - snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); if (live_val_valid) - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "0x%.16llx", (u64)live_val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", (u64)live_val); else - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - CQS_UNREADABLE_LIVE_VALUE); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - " | op:add arg_value:0x%.8x", 1); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:add arg_value:0x%.8x", 1); } } @@ -277,19 +277,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct kbase_context *kctx, c bool live_val_valid = (ret >= 0); - *length += - snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr); + *length += scnprintf( + buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr); if (live_val_valid) - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "0x%.16llx", live_val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", live_val); else - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - CQS_UNREADABLE_LIVE_VALUE); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - " | op:%s arg_value:0x%.16llx", op_name, wait_op->val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:%s arg_value:0x%.16llx", op_name, wait_op->val); } } @@ -319,18 +319,18 @@ static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct kbase_context *kctx, ch bool live_val_valid = (ret >= 0); *length += - snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr); + scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr); if (live_val_valid) - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - "0x%.16llx", live_val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", live_val); else - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - CQS_UNREADABLE_LIVE_VALUE); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); - *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, - " | op:%s arg_value:0x%.16llx", op_name, set_op->val); + *length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:%s arg_value:0x%.16llx", op_name, set_op->val); } } @@ -360,8 +360,8 @@ static void kbasep_csf_sync_kcpu_print_queue(struct kbase_context *kctx, int length = 0; started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P'; - length += snprintf(buffer, CSF_SYNC_DUMP_SIZE, "queue:KCPU-%d-%d exec:%c ", - kctx->id, queue->id, started_or_pending); + length += scnprintf(buffer, CSF_SYNC_DUMP_SIZE, "queue:KCPU-%d-%d exec:%c ", + kctx->id, queue->id, started_or_pending); cmd = &queue->commands[(u8)(queue->start_offset + i)]; switch (cmd->type) { @@ -388,12 +388,12 @@ static void kbasep_csf_sync_kcpu_print_queue(struct kbase_context *kctx, kbasep_csf_sync_print_kcpu_cqs_set_op(kctx, buffer, &length, cmd); break; default: - length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, - ", U, Unknown blocking command"); + length += scnprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, + ", U, Unknown blocking command"); break; } - length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, "\n"); + length += scnprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, "\n"); kbasep_print(kbpr, buffer); } diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c index 2d148eea025e..51d665f23970 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c @@ -218,7 +218,7 @@ static void remove_unlinked_chunk(struct kbase_context *kctx, if (WARN_ON(!list_empty(&chunk->link))) return; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); kbase_vunmap(kctx, &chunk->map); /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT * regions), and so we must clear that flag too before freeing. @@ -231,7 +231,7 @@ static void remove_unlinked_chunk(struct kbase_context *kctx, chunk->region->flags &= ~KBASE_REG_DONT_NEED; #endif kbase_mem_free_region(kctx, chunk->region); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); kfree(chunk); } @@ -1058,6 +1058,7 @@ static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 c struct kbase_csf_tiler_heap_chunk *chunk = NULL; lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); chunk = find_chunk(heap, chunk_gpu_va); if (unlikely(!chunk)) { diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c index a2bb49422e98..df4feb77f0cd 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c @@ -331,8 +331,8 @@ static unsigned long kbase_csf_tiler_heap_reclaim_scan_free_pages(struct kbase_d static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) { - struct kbase_device *kbdev = - container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); + struct kbase_device *kbdev = KBASE_GET_KBASE_DATA_FROM_SHRINKER( + s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); return kbase_csf_tiler_heap_reclaim_count_free_pages(kbdev, sc); } @@ -340,8 +340,8 @@ static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s, struct shrink_control *sc) { - struct kbase_device *kbdev = - container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); + struct kbase_device *kbdev = KBASE_GET_KBASE_DATA_FROM_SHRINKER( + s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); return kbase_csf_tiler_heap_reclaim_scan_free_pages(kbdev, sc); } @@ -352,11 +352,17 @@ void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link); } -void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) +int kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - struct shrinker *reclaim = &scheduler->reclaim_mgr.heap_reclaim; u8 prio; + struct shrinker *reclaim; + + reclaim = + KBASE_INIT_RECLAIM(&(scheduler->reclaim_mgr), heap_reclaim, "mali-csf-tiler-heap"); + if (!reclaim) + return -ENOMEM; + KBASE_SET_RECLAIM(&(scheduler->reclaim_mgr), heap_reclaim, reclaim); for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT; prio++) @@ -366,6 +372,11 @@ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects; reclaim->seeks = HEAP_SHRINKER_SEEKS; reclaim->batch = HEAP_SHRINKER_BATCH; + + if (!IS_ENABLED(CONFIG_MALI_VECTOR_DUMP)) + KBASE_REGISTER_SHRINKER(reclaim, "mali-csf-tiler-heap", kbdev); + + return 0; } void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev) @@ -373,6 +384,9 @@ void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev) struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; u8 prio; + if (!IS_ENABLED(CONFIG_MALI_VECTOR_DUMP)) + KBASE_UNREGISTER_SHRINKER(scheduler->reclaim_mgr.heap_reclaim); + for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT; prio++) WARN_ON(!list_empty(&scheduler->reclaim_mgr.ctx_lists[prio])); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h index 7880de04c84f..d41b7baabd02 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h @@ -66,8 +66,10 @@ void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx); * @kbdev: Pointer to the device. * * This function must be called only when a kbase device is initialized. + * + * Return: 0 if issuing reclaim_mgr init was successful, otherwise an error code. */ -void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev); +int kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev); /** * kbase_csf_tiler_heap_reclaim_mgr_term - Termination call for the tiler heap reclaim manger. diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c index 54054661f7a9..eb5c8a40b8c9 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c @@ -151,13 +151,22 @@ static bool tl_reader_overflow_check(struct kbase_csf_tl_reader *self, u16 event * * Reset the reader to the default state, i.e. set all the * mutable fields to zero. + * + * NOTE: this function expects the irq spinlock to be held. */ static void tl_reader_reset(struct kbase_csf_tl_reader *self) { + lockdep_assert_held(&self->read_lock); + self->got_first_event = false; self->is_active = false; self->expected_event_id = 0; self->tl_header.btc = 0; + + /* There might be data left in the trace buffer from the previous + * tracing session. We don't want it to leak into this session. + */ + kbase_csf_firmware_trace_buffer_discard_all(self->trace_buffer); } int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) @@ -324,21 +333,16 @@ static int tl_reader_update_enable_bit(struct kbase_csf_tl_reader *self, bool va void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, struct kbase_tlstream *stream) { - self->timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT; + *self = (struct kbase_csf_tl_reader){ + .timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT, + .stream = stream, + .kbdev = NULL, /* This will be initialized by tl_reader_init_late() */ + .is_active = false, + }; kbase_timer_setup(&self->read_timer, kbasep_csf_tl_reader_read_callback); - self->stream = stream; - - /* This will be initialized by tl_reader_init_late() */ - self->kbdev = NULL; - self->trace_buffer = NULL; - self->tl_header.data = NULL; - self->tl_header.size = 0; - spin_lock_init(&self->read_lock); - - tl_reader_reset(self); } void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self) @@ -348,13 +352,19 @@ void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self) int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_device *kbdev) { + unsigned long flags; int rcode; + spin_lock_irqsave(&self->read_lock, flags); + /* If already running, early exit. */ - if (self->is_active) + if (self->is_active) { + spin_unlock_irqrestore(&self->read_lock, flags); return 0; + } if (tl_reader_init_late(self, kbdev)) { + spin_unlock_irqrestore(&self->read_lock, flags); #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) dev_warn(kbdev->dev, "CSFFW timeline is not available for MALI_BIFROST_NO_MALI builds!"); return 0; @@ -366,6 +376,9 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_dev tl_reader_reset(self); self->is_active = true; + + spin_unlock_irqrestore(&self->read_lock, flags); + /* Set bytes to copy to the header size. This is to trigger copying * of the header to the user space. */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c index 8ed7c91553a6..cdab5a17f70c 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c @@ -519,6 +519,14 @@ void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace } EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_discard); +void kbase_csf_firmware_trace_buffer_discard_all(struct firmware_trace_buffer *trace_buffer) +{ + if (WARN_ON(!trace_buffer)) + return; + + *(trace_buffer->cpu_va.extract_cpu_va) = *(trace_buffer->cpu_va.insert_cpu_va); +} + static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask) { unsigned int i; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h index 90dfcb2699bc..35988eaf8f5a 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h @@ -179,6 +179,15 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(struct firmware_trace_buf */ void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer); +/** + * kbase_csf_firmware_trace_buffer_discard_all - Discard all data from a trace buffer + * + * @trace_buffer: Trace buffer handle + * + * Discard all the data in the trace buffer to make it empty. + */ +void kbase_csf_firmware_trace_buffer_discard_all(struct firmware_trace_buffer *trace_buffer); + /** * kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask * diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c index 7dc32a11bb29..5f13672e70b8 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -115,7 +115,7 @@ struct kbasep_printer *kbasep_printer_buffer_init(struct kbase_device *kbdev, if (kbpr) { if (kfifo_alloc(&kbpr->fifo, KBASEP_PRINTER_BUFFER_MAX_SIZE, GFP_KERNEL)) { - kfree(kbpr); + vfree(kbpr); return NULL; } kbpr->kbdev = kbdev; @@ -224,7 +224,7 @@ __attribute__((format(__printf__, 2, 3))) void kbasep_print(struct kbasep_printe va_list arglist; va_start(arglist, fmt); - len = vsnprintf(buffer, KBASEP_PRINT_FORMAT_BUFFER_MAX_SIZE, fmt, arglist); + len = vscnprintf(buffer, KBASEP_PRINT_FORMAT_BUFFER_MAX_SIZE, fmt, arglist); if (len <= 0) { pr_err("message write to the buffer failed"); goto exit; diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c index ec5ca10e135b..b14ffc69c54c 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,8 +27,8 @@ void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) { - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), - "group,slot,prio,csi,kcpu"), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + "group,slot,prio,csi,kcpu"), 0); } @@ -44,38 +44,39 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, char * if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_GROUP) { const s8 slot = be_msg->gpu.csg_nr; /* group,slot, */ - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%u,%d,", - be_msg->gpu.group_handle, slot), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + "%u,%d,", be_msg->gpu.group_handle, slot), 0); /* prio */ if (slot >= 0) - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), - "%u", be_msg->gpu.slot_prio), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + "%u", be_msg->gpu.slot_prio), 0); /* , */ - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0); + *written += + MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0); } else { /* No group,slot,prio fields, but ensure ending with "," */ *written += - MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ",,,"), 0); + MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ",,,"), 0); } /* queue parts: csi */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_CSF_QUEUE) - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d", - be_msg->gpu.csi_index), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d", + be_msg->gpu.csi_index), 0); /* , */ - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0); + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0); if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_KCPU) { /* kcpu data */ - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), - "kcpu %d (0x%llx)", be_msg->kcpu.id, - be_msg->kcpu.extra_info_val), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + "kcpu %d (0x%llx)", be_msg->kcpu.id, + be_msg->kcpu.extra_info_val), 0); } diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c index beac074f2035..39306e7d45e3 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,8 +27,8 @@ void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) { - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), - "katom,gpu_addr,jobslot,refcount"), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + "katom,gpu_addr,jobslot,refcount"), 0); } @@ -37,34 +37,34 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, char * { /* katom */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_ATOM) - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), - "atom %u (ud: 0x%llx 0x%llx)", - trace_msg->backend.gpu.atom_number, - trace_msg->backend.gpu.atom_udata[0], - trace_msg->backend.gpu.atom_udata[1]), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + "atom %u (ud: 0x%llx 0x%llx)", + trace_msg->backend.gpu.atom_number, + trace_msg->backend.gpu.atom_udata[0], + trace_msg->backend.gpu.atom_udata[1]), 0); /* gpu_addr */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_BACKEND) - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), - ",%.8llx,", trace_msg->backend.gpu.gpu_addr), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), + ",%.8llx,", trace_msg->backend.gpu.gpu_addr), 0); else *written += - MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ",,"), 0); + MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ",,"), 0); /* jobslot */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT) - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d", - trace_msg->backend.gpu.jobslot), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d", + trace_msg->backend.gpu.jobslot), 0); - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0); + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0); /* refcount */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT) - *written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d", - trace_msg->backend.gpu.refcount), + *written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d", + trace_msg->backend.gpu.refcount), 0); } diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c index 0842460bc08a..036d1f5968f6 100644 --- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,15 +71,15 @@ static const char *const kbasep_ktrace_code_string[] = { static void kbasep_ktrace_format_header(char *buffer, int sz, s32 written) { - written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), - "secs,thread_id,cpu,code,kctx,"), + written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), + "secs,thread_id,cpu,code,kctx,"), 0); kbasep_ktrace_backend_format_header(buffer, sz, &written); - written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), - ",info_val,ktrace_version=%u.%u", KBASE_KTRACE_VERSION_MAJOR, - KBASE_KTRACE_VERSION_MINOR), + written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), + ",info_val,ktrace_version=%u.%u", KBASE_KTRACE_VERSION_MAJOR, + KBASE_KTRACE_VERSION_MINOR), 0); buffer[sz - 1] = 0; @@ -93,21 +93,21 @@ static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, char *b * * secs,thread_id,cpu,code, */ - written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), "%d.%.6d,%d,%d,%s,", - (int)trace_msg->timestamp.tv_sec, - (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, - trace_msg->cpu, - kbasep_ktrace_code_string[trace_msg->backend.gpu.code]), + written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), + "%d.%.6d,%d,%d,%s,", (int)trace_msg->timestamp.tv_sec, + (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, + trace_msg->cpu, + kbasep_ktrace_code_string[trace_msg->backend.gpu.code]), 0); /* kctx part: */ if (trace_msg->kctx_tgid) { - written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), "%d_%u", - trace_msg->kctx_tgid, trace_msg->kctx_id), + written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), "%d_%u", + trace_msg->kctx_tgid, trace_msg->kctx_id), 0); } /* Trailing comma */ - written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), ","), 0); + written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), ","), 0); /* Backend parts */ kbasep_ktrace_backend_format_msg(trace_msg, buffer, sz, &written); @@ -119,8 +119,8 @@ static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, char *b * Note that the last column is empty, it's simply to hold the ktrace * version in the header */ - written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), ",0x%.16llx", - (unsigned long long)trace_msg->info_val), + written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), ",0x%.16llx", + (unsigned long long)trace_msg->info_val), 0); buffer[sz - 1] = 0; } diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c index 52aa63330afe..6fae88665d43 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -159,9 +159,11 @@ fail_reset_gpu_init: */ static void kbase_backend_late_term(struct kbase_device *kbdev) { - kbase_backend_devfreq_term(kbdev); - kbasep_pm_metrics_term(kbdev); - kbase_ipa_control_term(kbdev); + { + kbase_backend_devfreq_term(kbdev); + kbasep_pm_metrics_term(kbdev); + kbase_ipa_control_term(kbdev); + } kbase_hwaccess_pm_halt(kbdev); kbase_reset_gpu_term(kbdev); kbase_hwaccess_pm_term(kbdev); @@ -279,10 +281,8 @@ static const struct kbase_device_init dev_init[] = { { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { kbase_get_irqs, NULL, "IRQ search failed" }, -#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ -#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) { registers_map, registers_unmap, "Register map failed" }, -#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) { kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" }, #endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */ diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c index ab9df01610ab..4706a4f15b65 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,6 +28,16 @@ #include #include #include +#include + +bool kbase_is_gpu_removed(struct kbase_device *kbdev) +{ + if (!IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT)) + return false; + + + return (KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0); +} /** * kbase_report_gpu_fault - Report a GPU fault of the device. @@ -173,6 +183,9 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) kbase_pm_power_changed(kbdev); } + if (val & MCU_STATUS_GPU_IRQ) + wake_up_all(&kbdev->csf.event_wait); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); } KBASE_EXPORT_TEST_API(kbase_gpu_interrupt); diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c index f971b3b939df..c0cb835de69c 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,6 +28,14 @@ #include #include +bool kbase_is_gpu_removed(struct kbase_device *kbdev) +{ + if (!IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT)) + return false; + + return (KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0); +} + /** * kbase_report_gpu_fault - Report a GPU fault. * @kbdev: Kbase device pointer diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c index ab46f858a542..0fe76918296a 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -217,16 +217,14 @@ static const struct kbase_device_init dev_init[] = { { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { kbase_get_irqs, NULL, "IRQ search failed" }, -#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ -#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) { registers_map, registers_unmap, "Register map failed" }, -#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) { kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" }, #endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */ + { power_control_init, power_control_term, "Power control initialization failed" }, { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, - { kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" }, { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, { kbase_backend_time_init, NULL, "Time backend initialization failed" }, { kbase_device_misc_init, kbase_device_misc_term, diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c index b191c758c62f..ccb62c2a5cb5 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -556,14 +556,27 @@ int kbase_device_early_init(struct kbase_device *kbdev) /* Ensure we can access the GPU registers */ kbase_pm_register_access_enable(kbdev); - /* Initialize GPU_ID props */ - kbase_gpuprops_parse_gpu_id(&kbdev->gpu_props.gpu_id, kbase_reg_get_gpu_id(kbdev)); - - /* Initialize register mapping LUTs */ - err = kbase_regmap_init(kbdev); - if (err) + /* + * If -EPERM is returned, it means the device backend is not supported, but + * device initialization can continue. + */ + err = kbase_device_backend_init(kbdev); + if (err != 0 && err != -EPERM) goto pm_runtime_term; + /* + * Initialize register mapping LUTs. This would have been initialized on HW + * Arbitration but not on PV or non-arbitration devices. + */ + if (!kbase_reg_is_init(kbdev)) { + /* Initialize GPU_ID props */ + kbase_gpuprops_parse_gpu_id(&kbdev->gpu_props.gpu_id, kbase_reg_get_gpu_id(kbdev)); + + err = kbase_regmap_init(kbdev); + if (err) + goto backend_term; + } + /* Set the list of features available on the current HW * (identified by the GPU_ID register) */ @@ -572,7 +585,7 @@ int kbase_device_early_init(struct kbase_device *kbdev) /* Find out GPU properties based on the GPU feature registers. */ err = kbase_gpuprops_init(kbdev); if (err) - goto regmap_term; + goto backend_term; /* Get the list of workarounds for issues on the current HW * (identified by the GPU_ID register and impl_tech in THREAD_FEATURES) @@ -585,13 +598,16 @@ int kbase_device_early_init(struct kbase_device *kbdev) kbase_pm_register_access_disable(kbdev); #ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbdev->arb.arb_if) - err = kbase_arbiter_pm_install_interrupts(kbdev); - else + if (kbdev->arb.arb_if) { + if (kbdev->pm.arb_vm_state) + err = kbase_arbiter_pm_install_interrupts(kbdev); + } else { err = kbase_install_interrupts(kbdev); + } #else err = kbase_install_interrupts(kbdev); #endif + if (err) goto gpuprops_term; @@ -599,9 +615,13 @@ int kbase_device_early_init(struct kbase_device *kbdev) gpuprops_term: kbase_gpuprops_term(kbdev); -regmap_term: +backend_term: + kbase_device_backend_term(kbdev); kbase_regmap_term(kbdev); pm_runtime_term: + if (kbdev->pm.backend.gpu_powered) + kbase_pm_register_access_disable(kbdev); + kbase_pm_runtime_term(kbdev); platform_device_term: kbasep_platform_device_term(kbdev); @@ -620,8 +640,11 @@ void kbase_device_early_term(struct kbase_device *kbdev) kbase_release_interrupts(kbdev); #else kbase_release_interrupts(kbdev); -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +#endif + kbase_gpuprops_term(kbdev); + kbase_device_backend_term(kbdev); + kbase_regmap_term(kbdev); kbase_pm_runtime_term(kbdev); kbasep_platform_device_term(kbdev); kbase_ktrace_term(kbdev); diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h index 9cca6aff4554..1b15ff059194 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,6 +58,9 @@ void kbase_increment_device_id(void); * When a device file is opened for the first time, * load firmware and initialize hardware counter components. * + * It is safe for this function to be called multiple times without ill + * effects. Only the first call would be effective. + * * Return: 0 on success. An error code on failure. */ int kbase_device_firmware_init_once(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c index da597af9c46e..91379ac6429d 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,14 +27,6 @@ #include #include -bool kbase_is_gpu_removed(struct kbase_device *kbdev) -{ - if (!IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT)) - return false; - - return (kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0); -} - /** * busy_wait_cache_operation - Wait for a pending cache flush to complete * diff --git a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c index ca1ccbfb3dbe..9993b787ed21 100644 --- a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c +++ b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,7 +46,7 @@ u32 kbase_reg_read32(struct kbase_device *kbdev, u32 reg_enum) u32 val = 0; u32 offset; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return 0; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_32_BIT))) @@ -68,7 +68,7 @@ u64 kbase_reg_read64(struct kbase_device *kbdev, u32 reg_enum) u32 val32[2] = { 0 }; u32 offset; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return 0; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) @@ -91,7 +91,7 @@ u64 kbase_reg_read64_coherent(struct kbase_device *kbdev, u32 reg_enum) u32 hi1 = 0, hi2 = 0, lo = 0; u32 offset; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return 0; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) @@ -116,7 +116,7 @@ void kbase_reg_write32(struct kbase_device *kbdev, u32 reg_enum, u32 value) unsigned long flags; u32 offset; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_32_BIT))) @@ -135,7 +135,7 @@ void kbase_reg_write64(struct kbase_device *kbdev, u32 reg_enum, u64 value) unsigned long flags; u32 offset; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_64_BIT))) diff --git a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c index f4afbf55e312..ecf58cb45d15 100644 --- a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c +++ b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,12 +24,13 @@ #include #include +#include u64 kbase_reg_get_gpu_id(struct kbase_device *kbdev) { u32 val[2] = { 0 }; - val[0] = readl(kbdev->reg); + val[0] = mali_readl(kbdev->reg); return (u64)val[0] | ((u64)val[1] << 32); @@ -39,13 +40,13 @@ u32 kbase_reg_read32(struct kbase_device *kbdev, u32 reg_enum) { u32 val; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return 0; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_32_BIT))) return 0; - val = readl(kbdev->regmap.regs[reg_enum]); + val = mali_readl(kbdev->regmap.regs[reg_enum]); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) @@ -63,14 +64,13 @@ u64 kbase_reg_read64(struct kbase_device *kbdev, u32 reg_enum) { u64 val; - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return 0; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) return 0; - val = (u64)readl(kbdev->regmap.regs[reg_enum]) | - ((u64)readl(kbdev->regmap.regs[reg_enum] + 4) << 32); + val = mali_readq(kbdev->regmap.regs[reg_enum]); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) { @@ -90,23 +90,14 @@ KBASE_EXPORT_TEST_API(kbase_reg_read64); u64 kbase_reg_read64_coherent(struct kbase_device *kbdev, u32 reg_enum) { u64 val; -#if !IS_ENABLED(CONFIG_MALI_64BIT_HW_ACCESS) - u32 hi1, hi2, lo; -#endif - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return 0; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) return 0; - do { - hi1 = readl(kbdev->regmap.regs[reg_enum] + 4); - lo = readl(kbdev->regmap.regs[reg_enum]); - hi2 = readl(kbdev->regmap.regs[reg_enum] + 4); - } while (hi1 != hi2); - - val = lo | (((u64)hi1) << 32); + val = mali_readq_coherent(kbdev->regmap.regs[reg_enum]); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) { @@ -125,13 +116,13 @@ KBASE_EXPORT_TEST_API(kbase_reg_read64_coherent); void kbase_reg_write32(struct kbase_device *kbdev, u32 reg_enum, u32 value) { - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_32_BIT))) return; - writel(value, kbdev->regmap.regs[reg_enum]); + mali_writel(value, kbdev->regmap.regs[reg_enum]); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) @@ -145,14 +136,13 @@ KBASE_EXPORT_TEST_API(kbase_reg_write32); void kbase_reg_write64(struct kbase_device *kbdev, u32 reg_enum, u64 value) { - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum))) return; if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_64_BIT))) return; - writel(value & 0xFFFFFFFF, kbdev->regmap.regs[reg_enum]); - writel(value >> 32, kbdev->regmap.regs[reg_enum] + 4); + mali_writeq(value, kbdev->regmap.regs[reg_enum]); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) { diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c index 16a27c780d3b..2cce391606c3 100644 --- a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c +++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,9 +24,50 @@ #include #include "mali_kbase_hw_access.h" +#include "mali_kbase_hw_access_regmap.h" #include +#define KBASE_REGMAP_ACCESS_ALWAYS_POWERED (1U << 16) + +static u32 always_powered_regs[] = { +#if MALI_USE_CSF +#else /* MALI_USE_CSF */ + PTM_AW_IRQ_CLEAR, + PTM_AW_IRQ_INJECTION, + PTM_AW_IRQ_MASK, + PTM_AW_IRQ_RAWSTAT, + PTM_AW_IRQ_STATUS, + PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0, + PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1, + PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0, + PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1, + PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS, + PTM_ID, +#endif /* MALI_USE_CSF */ +}; + +static void kbasep_reg_setup_always_powered_registers(struct kbase_device *kbdev) +{ + u32 i; + + for (i = 0; i < ARRAY_SIZE(always_powered_regs); i++) { + u32 reg_enum = always_powered_regs[i]; + + if (!kbase_reg_is_valid(kbdev, reg_enum)) + continue; + + kbdev->regmap.flags[reg_enum] |= KBASE_REGMAP_ACCESS_ALWAYS_POWERED; + } +} + +bool kbase_reg_is_powered_access_allowed(struct kbase_device *kbdev, u32 reg_enum) +{ + if (kbdev->regmap.flags[reg_enum] & KBASE_REGMAP_ACCESS_ALWAYS_POWERED) + return true; + return kbdev->pm.backend.gpu_powered; +} + bool kbase_reg_is_size64(struct kbase_device *kbdev, u32 reg_enum) { if (WARN_ON(reg_enum >= kbdev->regmap.size)) @@ -67,6 +108,11 @@ bool kbase_reg_is_accessible(struct kbase_device *kbdev, u32 reg_enum, u32 flags return true; } +bool kbase_reg_is_init(struct kbase_device *kbdev) +{ + return (kbdev->regmap.regs != NULL) && (kbdev->regmap.flags != NULL); +} + int kbase_reg_get_offset(struct kbase_device *kbdev, u32 reg_enum, u32 *offset) { if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, 0))) @@ -108,12 +154,12 @@ int kbase_regmap_init(struct kbase_device *kbdev) return -ENOMEM; } + kbasep_reg_setup_always_powered_registers(kbdev); + dev_info(kbdev->dev, "Register LUT %08x initialized for GPU arch 0x%08x\n", lut_arch_id, kbdev->gpu_props.gpu_id.arch_id); -#if IS_ENABLED(CONFIG_MALI_64BIT_HW_ACCESS) && IS_ENABLED(CONFIG_MALI_REAL_HW) - dev_info(kbdev->dev, "64-bit HW access enabled\n"); -#endif + return 0; } diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h index 40356596163d..654fb685fa06 100644 --- a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h +++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -128,6 +128,25 @@ bool kbase_reg_is_valid(struct kbase_device *kbdev, u32 reg_enum); */ bool kbase_reg_is_accessible(struct kbase_device *kbdev, u32 reg_enum, u32 flags); +/** + * kbase_reg_is_powered_access_allowed - check if registered is accessible given + * current power state + * + * @kbdev: Kbase device pointer + * @reg_enum: Register enum + * + * Return: boolean if register is accessible + */ +bool kbase_reg_is_powered_access_allowed(struct kbase_device *kbdev, u32 reg_enum); + +/** + * kbase_reg_is_init - check if regmap is initialized + * + * @kbdev: Kbase device pointer + * Return: boolean if regmap is initialized + */ +bool kbase_reg_is_init(struct kbase_device *kbdev); + /** * kbase_reg_get_offset - get register offset from enum * @kbdev: Kbase device pointer diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h index 97adb1322a35..a8708fafc638 100644 --- a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h +++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h @@ -308,6 +308,16 @@ #define TC_CLOCK_GATE_OVERRIDE (1ul << 0) /* End TILER_CONFIG register */ +/* L2_FEATURES register */ +#define L2_FEATURES_CACHE_SIZE_SHIFT GPU_U(16) +#define L2_FEATURES_CACHE_SIZE_MASK (GPU_U(0xFF) << L2_FEATURES_CACHE_SIZE_SHIFT) +#define L2_FEATURES_CACHE_SIZE_GET(reg_val) \ + (((reg_val)&L2_FEATURES_CACHE_SIZE_MASK) >> L2_FEATURES_CACHE_SIZE_SHIFT) +#define L2_FEATURES_CACHE_SIZE_SET(reg_val, value) \ + (~(~(reg_val) | L2_FEATURES_CACHE_SIZE_MASK) | \ + (((value) << L2_FEATURES_CACHE_SIZE_SHIFT) & L2_FEATURES_CACHE_SIZE_MASK)) +/* End L2_FEATURES register */ + /* L2_CONFIG register */ #define L2_CONFIG_SIZE_SHIFT 16 #define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h index c3d12ad04c4e..56ec5e015d11 100644 --- a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c index 178d45501916..4f41693ff3c2 100644 --- a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -2240,6 +2240,56 @@ static void kbase_regmap_v9_2_init(struct kbase_device *kbdev) kbdev->regmap.regs[GPU_CONTROL__L2_CONFIG] = kbdev->reg + 0x48; } +static void kbase_regmap_v9_14_init(struct kbase_device *kbdev) +{ + if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) { + kbdev->regmap.size = NR_V9_14_REGS; + kbdev->regmap.regs = + kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL); + kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL); + } + + if (WARN_ON(kbdev->regmap.regs == NULL)) + return; + if (WARN_ON(kbdev->regmap.flags == NULL)) + return; + + kbase_regmap_v9_2_init(kbdev); + + kbdev->regmap.flags[PTM_AW_IRQ_CLEAR] = KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[PTM_AW_IRQ_INJECTION] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[PTM_AW_IRQ_MASK] = KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[PTM_AW_IRQ_RAWSTAT] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[PTM_AW_IRQ_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[PTM_ID] = KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ; + + kbdev->regmap.regs[PTM_AW_IRQ_CLEAR] = kbdev->reg + 0x1ffc8; + kbdev->regmap.regs[PTM_AW_IRQ_INJECTION] = kbdev->reg + 0x1ffd4; + kbdev->regmap.regs[PTM_AW_IRQ_MASK] = kbdev->reg + 0x1ffcc; + kbdev->regmap.regs[PTM_AW_IRQ_RAWSTAT] = kbdev->reg + 0x1ffc4; + kbdev->regmap.regs[PTM_AW_IRQ_STATUS] = kbdev->reg + 0x1ffd0; + kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0] = kbdev->reg + 0x1ffd8; + kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1] = kbdev->reg + 0x1ffdc; + kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0] = kbdev->reg + 0x1ffe4; + kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1] = kbdev->reg + 0x1ffe8; + kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS] = kbdev->reg + 0x1ffe0; + kbdev->regmap.regs[PTM_ID] = kbdev->reg + 0x1ffc0; +} + u32 kbase_regmap_backend_init(struct kbase_device *kbdev) { int i = 0; @@ -2254,6 +2304,7 @@ u32 kbase_regmap_backend_init(struct kbase_device *kbdev) { GPU_ID_ARCH_MAKE(7, 2, 0), kbase_regmap_v7_2_init }, { GPU_ID_ARCH_MAKE(9, 0, 0), kbase_regmap_v9_0_init }, { GPU_ID_ARCH_MAKE(9, 2, 0), kbase_regmap_v9_2_init }, + { GPU_ID_ARCH_MAKE(9, 14, 0), kbase_regmap_v9_14_init }, }; for (i = 0; i < ARRAY_SIZE(init_array) - 1; i++) { @@ -2967,6 +3018,18 @@ static char *enum_strings[] = { [GPU_CONTROL__CORE_FEATURES] = "GPU_CONTROL__CORE_FEATURES", [GPU_CONTROL__THREAD_TLS_ALLOC] = "GPU_CONTROL__THREAD_TLS_ALLOC", [GPU_CONTROL__L2_CONFIG] = "GPU_CONTROL__L2_CONFIG", + [PTM_AW_IRQ_CLEAR] = "PTM_AW_IRQ_CLEAR", + [PTM_AW_IRQ_INJECTION] = "PTM_AW_IRQ_INJECTION", + [PTM_AW_IRQ_MASK] = "PTM_AW_IRQ_MASK", + [PTM_AW_IRQ_RAWSTAT] = "PTM_AW_IRQ_RAWSTAT", + [PTM_AW_IRQ_STATUS] = "PTM_AW_IRQ_STATUS", + [PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0] = "PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0", + [PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1] = "PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1", + [PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0] = "PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0", + [PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1] = "PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1", + [PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS] = + "PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS", + [PTM_ID] = "PTM_ID", }; const char *kbase_reg_get_enum_string(u32 reg_enum) diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h index f5618c4794db..59d8745eaf4a 100644 --- a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -759,4 +759,19 @@ enum kbase_regmap_enum_v9_2 { NR_V9_2_REGS, }; +enum kbase_regmap_enum_v9_14 { + PTM_AW_IRQ_CLEAR = NR_V9_2_REGS, /* (RW) 32-bit 0x1FFC8 */ + PTM_AW_IRQ_INJECTION, /* (RW) 32-bit 0x1FFD4 */ + PTM_AW_IRQ_MASK, /* (RW) 32-bit 0x1FFCC */ + PTM_AW_IRQ_RAWSTAT, /* (RO) 32-bit 0x1FFC4 */ + PTM_AW_IRQ_STATUS, /* (RO) 32-bit 0x1FFD0 */ + PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0, /* (RO) 32-bit 0x1FFD8 */ + PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1, /* (RO) 32-bit 0x1FFDC */ + PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0, /* (RW) 32-bit 0x1FFE4 */ + PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1, /* (RW) 32-bit 0x1FFE8 */ + PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS, /* (RO) 32-bit 0x1FFE0 */ + PTM_ID, /* (RO) 32-bit 0x1FFC0 */ + NR_V9_14_REGS, +}; + #endif /* _MALI_KBASE_REGMAP_JM_ENUMS_H_ */ diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h index c3bc0f3e9924..650ed9b31eea 100644 --- a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -47,6 +47,8 @@ #define MMU_AS_OFFSET(n, regname) ENUM_OFFSET(n, MMU_AS_ENUM(0, regname), MMU_AS_ENUM(1, regname)) #define MMU_AS_BASE_OFFSET(n) MMU_AS_OFFSET(n, TRANSTAB) +#define PTM_AW_MESSAGE_ENUM(regname) PTM_AW_MESSAGE__##regname + /* register value macros */ /* GPU_STATUS values */ #define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ @@ -295,4 +297,11 @@ (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED | POWER_CHANGED_ALL | \ PRFCNT_SAMPLE_COMPLETED) +#define WINDOW_IRQ_MESSAGE (1U << 0) +#define WINDOW_IRQ_INVALID_ACCESS (1U << 1) +#define WINDOW_IRQ_GPU (1U << 2) +#define WINDOW_IRQ_JOB (1U << 3) +#define WINDOW_IRQ_MMU (1U << 4) +#define WINDOW_IRQ_EVENT (1U << 5) + #endif /* _MALI_KBASE_REGMAP_JM_MACROS_H_ */ diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c index d605253752ca..8bdfc9a0bfda 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,6 @@ #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h" #include "hwcnt/mali_kbase_hwcnt_gpu.h" -#include "hwcnt/mali_kbase_hwcnt_types.h" #include #include @@ -31,6 +30,7 @@ #include #include #include +#include #ifndef BASE_MAX_NR_CLOCKS_REGULATORS #define BASE_MAX_NR_CLOCKS_REGULATORS 4 @@ -255,7 +255,8 @@ struct kbase_hwcnt_csf_physical_layout { * @hwc_threshold_work: Worker for consuming available samples when * threshold interrupt raised. * @num_l2_slices: Current number of L2 slices allocated to the GPU. - * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. + * @powered_shader_core_mask: The common mask between the debug_core_mask + * and the shader_present_bitmap. */ struct kbase_hwcnt_backend_csf { struct kbase_hwcnt_backend_csf_info *info; @@ -283,7 +284,7 @@ struct kbase_hwcnt_backend_csf { struct work_struct hwc_dump_work; struct work_struct hwc_threshold_work; size_t num_l2_slices; - u64 shader_present_bitmap; + u64 powered_shader_core_mask; }; static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info) @@ -296,9 +297,11 @@ static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_c } void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface, - size_t num_l2_slices, u64 shader_present_bitmap) + size_t num_l2_slices, u64 shader_present, + u64 power_core_mask) { struct kbase_hwcnt_backend_csf_info *csf_info; + u64 norm_shader_present = power_core_mask & shader_present; if (!iface) return; @@ -309,16 +312,17 @@ void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_inte if (!csf_info || !csf_info->backend) return; + if (WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED)) return; if (WARN_ON(num_l2_slices > csf_info->backend->phys_layout.mmu_l2_cnt) || - WARN_ON((shader_present_bitmap & csf_info->backend->phys_layout.shader_avail_mask) != - shader_present_bitmap)) + WARN_ON((norm_shader_present & csf_info->backend->phys_layout.shader_avail_mask) != + norm_shader_present)) return; csf_info->backend->num_l2_slices = num_l2_slices; - csf_info->backend->shader_present_bitmap = shader_present_bitmap; + csf_info->backend->powered_shader_core_mask = norm_shader_present; } /** @@ -424,7 +428,7 @@ static void kbasep_hwcnt_backend_csf_init_layout( WARN_ON(!prfcnt_info); WARN_ON(!phys_layout); - shader_core_cnt = (size_t)fls64(prfcnt_info->core_mask); + shader_core_cnt = (size_t)fls64(prfcnt_info->sc_core_mask); values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; fw_block_cnt = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); hw_block_cnt = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); @@ -445,7 +449,7 @@ static void kbasep_hwcnt_backend_csf_init_layout( .fw_block_cnt = fw_block_cnt, .hw_block_cnt = hw_block_cnt, .block_cnt = fw_block_cnt + hw_block_cnt, - .shader_avail_mask = prfcnt_info->core_mask, + .shader_avail_mask = prfcnt_info->sc_core_mask, .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .values_per_block = values_per_block, .counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, @@ -454,17 +458,20 @@ static void kbasep_hwcnt_backend_csf_init_layout( } static void -kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf) +kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf, + bool user_bufs) { size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; size_t block_state_bytes = backend_csf->phys_layout.block_cnt * KBASE_HWCNT_BLOCK_STATE_BYTES * KBASE_HWCNT_BLOCK_STATE_STRIDE; - memset(backend_csf->to_user_buf, 0, user_buf_bytes); memset(backend_csf->accum_buf, 0, user_buf_bytes); memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); memset(backend_csf->block_states, 0, block_state_bytes); - memset(backend_csf->to_user_block_states, 0, block_state_bytes); + if (user_bufs) { + memset(backend_csf->to_user_buf, 0, user_buf_bytes); + memset(backend_csf->to_user_block_states, 0, block_state_bytes); + } } static void @@ -517,34 +524,21 @@ static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backe memset(backend_csf->block_states, 0, block_state_bytes); } -/** - * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with - * information from a sample. - * @phys_layout: Physical memory layout information of HWC - * sample buffer. - * @enable_mask: Counter enable mask for the block whose state is being updated. - * @enable_state: The CSF backend internal enabled state. - * @exiting_protm: Whether or not the sample is taken when the GPU is exiting - * protected mode. - * @block_idx: Index of block within the ringbuffer. - * @block_state: Pointer to existing block state of the block whose state is being - * updated. - * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling. - */ -static void kbasep_hwcnt_backend_csf_update_block_state( - const struct kbase_hwcnt_csf_physical_layout *phys_layout, const u32 enable_mask, - enum kbase_hwcnt_backend_csf_enable_state enable_state, bool exiting_protm, - size_t block_idx, blk_stt_t *const block_state, bool fw_in_protected_mode) +void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend, + const u32 enable_mask, bool exiting_protm, + size_t block_idx, blk_stt_t *const block_state, + bool fw_in_protected_mode) { + const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout; /* Offset of shader core blocks from the start of the HW blocks in the sample */ size_t shader_core_block_offset = - (size_t)(phys_layout->hw_block_cnt - phys_layout->shader_cnt); + (size_t)(phys_layout->block_cnt - phys_layout->shader_cnt); bool is_shader_core_block; - is_shader_core_block = block_idx >= shader_core_block_offset; + is_shader_core_block = (block_idx >= shader_core_block_offset); /* Set power bits for the block state for the block, for the sample */ - switch (enable_state) { + switch (backend->enable_state) { /* Disabled states */ case KBASE_HWCNT_BACKEND_CSF_DISABLED: case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: @@ -592,21 +586,45 @@ static void kbasep_hwcnt_backend_csf_update_block_state( KBASE_HWCNT_STATE_NORMAL); else kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_NORMAL); + + /* powered_shader_core_mask stored in the backend is a combination of + * the shader present and the debug core mask, so explicit checking of the + * core mask is not required here. + */ + if (is_shader_core_block) { + u64 current_shader_core = 1ULL << (block_idx - shader_core_block_offset); + + WARN_ON_ONCE(backend->phys_layout.shader_cnt > 64); + + if (current_shader_core & backend->info->backend->powered_shader_core_mask) + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE); + else if (current_shader_core & ~backend->info->backend->powered_shader_core_mask) + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_UNAVAILABLE); + else + WARN_ON_ONCE(true); + } + else + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE); } -static void kbasep_hwcnt_backend_csf_accumulate_sample( - const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, - u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, - blk_stt_t *const block_states, bool clearing_samples, - enum kbase_hwcnt_backend_csf_enable_state enable_state, bool fw_in_protected_mode) +static void kbasep_hwcnt_backend_csf_accumulate_sample(struct kbase_hwcnt_backend_csf *backend, + const u32 *old_sample_buf, + const u32 *new_sample_buf) { + const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout; + const size_t dump_bytes = backend->info->prfcnt_info.dump_bytes; + const size_t values_per_block = phys_layout->values_per_block; + blk_stt_t *const block_states = backend->block_states; + const bool fw_in_protected_mode = backend->info->fw_in_protected_mode; + const bool clearing_samples = backend->info->prfcnt_info.clearing_samples; + u64 *accum_buf = backend->accum_buf; + size_t block_idx; const u32 *old_block = old_sample_buf; const u32 *new_block = new_sample_buf; u64 *acc_block = accum_buf; /* Flag to indicate whether current sample is exiting protected mode. */ bool exiting_protm = false; - const size_t values_per_block = phys_layout->values_per_block; /* The block pointers now point to the first HW block, which is always a CSHW/front-end * block. The counter enable mask for this block can be checked to determine whether this @@ -620,9 +638,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset]; const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset]; /* Update block state with information of the current sample */ - kbasep_hwcnt_backend_csf_update_block_state(phys_layout, new_enable_mask, - enable_state, exiting_protm, block_idx, - &block_states[block_idx], + kbasep_hwcnt_backend_csf_update_block_state(backend, new_enable_mask, exiting_protm, + block_idx, &block_states[block_idx], fw_in_protected_mode); if (!(new_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) { @@ -706,7 +723,6 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base; const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt; const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; - bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; u32 *old_sample_buf = backend_csf->old_sample_buf; u32 *new_sample_buf = old_sample_buf; const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend_csf->phys_layout; @@ -740,10 +756,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; - kbasep_hwcnt_backend_csf_accumulate_sample( - phys_layout, buf_dump_bytes, backend_csf->accum_buf, old_sample_buf, - new_sample_buf, backend_csf->block_states, clearing_samples, - backend_csf->enable_state, backend_csf->info->fw_in_protected_mode); + kbasep_hwcnt_backend_csf_accumulate_sample(backend_csf, old_sample_buf, + new_sample_buf); old_sample_buf = new_sample_buf; } @@ -1215,11 +1229,6 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba backend_csf->ring_buf, 0, backend_csf->info->ring_buf_cnt, false); - /* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare - * for next enable. - */ - kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf); - /* Disabling HWCNT is an indication that blocks have been powered off. This is important to * know for L2, CSHW, and Tiler blocks, as this is currently the only way a backend can * know if they are being powered off. @@ -1255,6 +1264,12 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); } + + /* Reset accumulator, old_sample_buf and block_states to all-0 to prepare for next enable. + * Reset user buffers if ownership is transferred to the caller (i.e. dump_buffer + * is provided). + */ + kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf, dump_buffer); } /* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */ @@ -1279,6 +1294,11 @@ static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *bac backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); kbasep_hwcnt_backend_csf_cc_update(backend_csf); + /* There is a possibility that the transition to enabled state will remain + * during multiple dumps, hence append the OFF state. + */ + kbase_hwcnt_block_state_append(&backend_csf->accum_all_blk_stt, + KBASE_HWCNT_STATE_OFF); backend_csf->user_requested = true; backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return 0; @@ -1457,7 +1477,7 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, backend_csf->to_user_block_states, dst_enable_map, backend_csf->num_l2_slices, - backend_csf->shader_present_bitmap, accumulate); + backend_csf->powered_shader_core_mask, accumulate); /* If no error occurred (zero ret value), then update block state for all blocks in the * accumulation with the current sample's block state. @@ -1469,6 +1489,12 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend KBASE_HWCNT_STATE_UNKNOWN); } + /* Clear consumed user buffers. */ + memset(backend_csf->to_user_buf, 0, backend_csf->info->metadata->dump_buf_bytes); + memset(backend_csf->to_user_block_states, 0, + backend_csf->phys_layout.block_cnt * KBASE_HWCNT_BLOCK_STATE_BYTES * + KBASE_HWCNT_BLOCK_STATE_STRIDE); + return ret; } @@ -2098,7 +2124,7 @@ int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface * gpu_info.has_fw_counters = csf_info->prfcnt_info.prfcnt_fw_size > 0; gpu_info.l2_count = csf_info->prfcnt_info.l2_count; gpu_info.csg_cnt = csf_info->prfcnt_info.csg_count; - gpu_info.core_mask = csf_info->prfcnt_info.core_mask; + gpu_info.sc_core_mask = csf_info->prfcnt_info.sc_core_mask; gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; gpu_info.prfcnt_values_per_block = csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; @@ -2115,7 +2141,7 @@ void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; if (csf_info->metadata) { - kbase_hwcnt_csf_metadata_destroy(csf_info->metadata); + kbase_hwcnt_metadata_destroy(csf_info->metadata); csf_info->metadata = NULL; } } diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h index 2487db272a35..104f9c77a945 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,8 +30,10 @@ #include "hwcnt/backend/mali_kbase_hwcnt_backend.h" #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h" #include "hwcnt/mali_kbase_hwcnt_watchdog_if.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" struct kbase_hwcnt_physical_enable_map; +struct kbase_hwcnt_backend_csf; /** * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend @@ -123,11 +125,12 @@ void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interfac * this function is called. * @iface: Non-NULL pointer to HWC backend interface. * @num_l2_slices: Current number of L2 slices allocated to the GPU. - * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. + * @shader_present: Shader_present of the current configuration. + * @power_core_mask: Mask containing changed shader core power state. */ void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface, - size_t num_l2_slices, - uint64_t shader_present_bitmap); + size_t num_l2_slices, u64 shader_present, + u64 power_core_mask); /** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to * guarantee headers are @@ -174,4 +177,21 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interfa */ void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface); +/** + * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with + * information from a sample. + * @backend: CSF hardware counter backend. + * @enable_mask: Counter enable mask for the block whose state is being updated. + * @exiting_protm: Whether or not the sample is taken when the GPU is exiting + * protected mode. + * @block_idx: Index of block within the ringbuffer. + * @block_state: Pointer to existing block state of the block whose state is being + * updated. + * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling. + */ +void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend, + const u32 enable_mask, bool exiting_protm, + size_t block_idx, blk_stt_t *const block_state, + bool fw_in_protected_mode); + #endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h index 65bb965bcf9c..c982900cf755 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -68,7 +68,7 @@ struct kbase_hwcnt_backend_csf_if_enable { * @prfcnt_block_size: Bytes of each performance counter block. * @l2_count: The MMU L2 cache count. * @csg_count: The total number of CSGs in the system - * @core_mask: Shader core mask. + * @sc_core_mask: Shader core mask. * @clk_cnt: Clock domain count in the system. * @clearing_samples: Indicates whether counters are cleared after each sample * is taken. @@ -80,7 +80,7 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info { size_t prfcnt_block_size; size_t l2_count; u32 csg_count; - u64 core_mask; + u64 sc_core_mask; u8 clk_cnt; bool clearing_samples; }; diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c index d79a99e5e89f..3cf039f70056 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -229,7 +229,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ .l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, - .core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1, + .sc_core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1, .prfcnt_hw_size = KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE, .prfcnt_fw_size = @@ -290,12 +290,13 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( .dump_bytes = fw_ctx->buf_bytes, .prfcnt_block_size = prfcnt_block_size, .l2_count = kbdev->gpu_props.num_l2_slices, - .core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props), + .sc_core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props), .csg_count = fw_block_count > 1 ? csg_count : 0, .clk_cnt = fw_ctx->clk_cnt, .clearing_samples = true, }; + /* Block size must be multiple of counter size. */ WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0); /* Total size must be multiple of block size. */ @@ -513,10 +514,15 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c fw_ring_buf->phys, fw_ring_buf->num_pages, fw_ring_buf->num_pages, MCU_AS_NR)); + /* Clear the dump ring_buf content to zeros */ + memset(fw_ring_buf->cpu_dump_base, 0, fw_ring_buf->num_pages * PAGE_SIZE); vunmap(fw_ring_buf->cpu_dump_base); + /* After zeroing, the ring_buf pages are dirty so need to pass the 'dirty' flag + * as true when freeing the pages to the Global pool. + */ kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - fw_ring_buf->num_pages, fw_ring_buf->phys, false, false); + fw_ring_buf->num_pages, fw_ring_buf->phys, true, false); kfree(fw_ring_buf->phys); diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c index 7fbef163976a..1ee0e3b823ad 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -165,7 +165,7 @@ static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, #endif info->l2_count = l2_count; - info->core_mask = core_mask; + info->sc_core_mask = core_mask; info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; /* Determine the number of available clock domains. */ @@ -186,7 +186,7 @@ static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_inf WARN_ON(!gpu_info); WARN_ON(!phys_layout); - shader_core_cnt = fls64(gpu_info->core_mask); + shader_core_cnt = fls64(gpu_info->sc_core_mask); *phys_layout = (struct kbase_hwcnt_jm_physical_layout){ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, @@ -195,7 +195,7 @@ static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_inf .shader_cnt = shader_core_cnt, .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT + gpu_info->l2_count + shader_core_cnt, - .shader_avail_mask = gpu_info->core_mask, + .shader_avail_mask = gpu_info->sc_core_mask, .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .values_per_block = gpu_info->prfcnt_values_per_block, .counters_per_block = @@ -384,14 +384,12 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, enable = (struct kbase_instr_hwcnt_enable) { - .fe_bm = phys_enable_map.fe_bm, - .shader_bm = phys_enable_map.shader_bm, - .tiler_bm = phys_enable_map.tiler_bm, - .mmu_l2_bm = phys_enable_map.mmu_l2_bm, + .fe_bm = phys_enable_map.fe_bm, .shader_bm = phys_enable_map.shader_bm, + .tiler_bm = phys_enable_map.tiler_bm, .mmu_l2_bm = phys_enable_map.mmu_l2_bm, .counter_set = phys_counter_set, #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) /* The dummy model needs the CPU mapping. */ - .dump_buffer = (uintptr_t)backend_jm->cpu_dump_va, + .dump_buffer = (uintptr_t)backend_jm->cpu_dump_va, #else .dump_buffer = backend_jm->gpu_dump_va, #endif /* CONFIG_MALI_BIFROST_NO_MALI */ @@ -411,7 +409,7 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, backend_jm->debug_core_mask = kbase_pm_ca_get_debug_core_mask(kbdev); backend_jm->max_l2_slices = backend_jm->info->hwcnt_gpu_info.l2_count; - backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.core_mask; + backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.sc_core_mask; backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); @@ -660,8 +658,8 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend, #endif /* CONFIG_MALI_BIFROST_NO_MALI */ errcode = kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map, backend_jm->pm_core_mask, backend_jm->debug_core_mask, - backend_jm->max_core_mask, backend_jm->max_l2_slices, - &backend_jm->curr_config, accumulate); + backend_jm->max_l2_slices, &backend_jm->curr_config, + accumulate); if (errcode) return errcode; @@ -864,7 +862,7 @@ static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backen if (!info) return; - kbase_hwcnt_jm_metadata_destroy(info->metadata); + kbase_hwcnt_metadata_destroy(info->metadata); kfree(info); } diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c index 5da564546608..7cd16a0de4ce 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -169,7 +169,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu /* Calculate number of block instances that aren't cores */ non_core_block_count = 2 + gpu_info->l2_count; /* Calculate number of block instances that are shader cores */ - sc_block_count = (size_t)fls64(gpu_info->core_mask); + sc_block_count = (size_t)fls64(gpu_info->sc_core_mask); /* Determine the total number of cores */ core_block_count = sc_block_count; @@ -277,7 +277,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu kbase_hwcnt_set_avail_mask(&desc.avail_mask, 0, 0); kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, 0, non_core_block_count, U64_MAX); kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, non_core_block_count, sc_block_count, - gpu_info->core_mask); + gpu_info->sc_core_mask); return kbase_hwcnt_metadata_create(&desc, metadata); @@ -294,7 +294,7 @@ static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_in { WARN_ON(!gpu_info); - return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->core_mask)) * + return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->sc_core_mask)) * gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES; } @@ -338,14 +338,6 @@ int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, return 0; } -void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) -{ - if (!metadata) - return; - - kbase_hwcnt_metadata_destroy(metadata); -} - int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, enum kbase_hwcnt_set counter_set, const struct kbase_hwcnt_metadata **out_metadata) @@ -365,14 +357,6 @@ int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, return 0; } -void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) -{ - if (!metadata) - return; - - kbase_hwcnt_metadata_destroy(metadata); -} - bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type blk_type) { if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || @@ -384,6 +368,7 @@ bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type b return false; } + bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type blk_type) { if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || @@ -416,7 +401,7 @@ bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_t int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask, - u64 debug_core_mask, u64 max_core_mask, size_t max_l2_slices, + u64 debug_core_mask, size_t max_l2_slices, const struct kbase_hwcnt_curr_config *curr_config, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; @@ -466,9 +451,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, else hw_res_available = true; - /* - * Skip block if no values in the destination block are enabled. - */ + /* Skip block if no values in the destination block are enabled. */ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) { u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; @@ -581,7 +564,6 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, /* Shift each core mask right by 1 */ core_mask >>= 1; debug_core_mask >>= 1; - max_core_mask >>= 1; shader_present >>= 1; } } @@ -592,7 +574,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, blk_stt_t *src_block_stt, const struct kbase_hwcnt_enable_map *dst_enable_map, - size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate) + size_t num_l2_slices, u64 powered_shader_core_mask, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u64 *dump_src = src; @@ -614,9 +596,7 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, blk_stt_t *dst_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); - /* - * Skip block if no values in the destination block are enabled. - */ + /* Skip block if no values in the destination block are enabled. */ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) { u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h index 4339fddd64e2..1f25282d378a 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -169,7 +169,7 @@ enum kbase_hwcnt_physical_set { /** * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs. * @l2_count: L2 cache count. - * @core_mask: Shader core mask. May be sparse. + * @sc_core_mask: Shader core mask. May be sparse. * @clk_cnt: Number of clock domains available. * @csg_cnt: Number of CSGs available. * @prfcnt_values_per_block: Total entries (header + counters) of performance @@ -178,7 +178,7 @@ enum kbase_hwcnt_physical_set { */ struct kbase_hwcnt_gpu_info { size_t l2_count; - u64 core_mask; + u64 sc_core_mask; u8 clk_cnt; u8 csg_cnt; size_t prfcnt_values_per_block; @@ -261,13 +261,6 @@ int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info, const struct kbase_hwcnt_metadata **out_metadata, size_t *out_dump_bytes); -/** - * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata. - * - * @metadata: Pointer to metadata to destroy. - */ -void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); - /** * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the * CSF GPUs. @@ -282,13 +275,6 @@ int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info, enum kbase_hwcnt_set counter_set, const struct kbase_hwcnt_metadata **out_metadata); -/** - * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter - * metadata. - * @metadata: Pointer to metadata to destroy. - */ -void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); - /** * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw * dump buffer in src into the dump buffer @@ -300,9 +286,6 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. * @pm_core_mask: PM state synchronized shaders core mask with the dump. * @debug_core_mask: User-set mask of cores to be used by the GPU. - * @max_core_mask: Core mask of all cores allocated to the GPU (non - * virtualized platforms) or resource group (virtualized - * platforms). * @max_l2_slices: Maximum number of L2 slices allocated to the GPU (non * virtualised platforms) or resource group (virtualized * platforms). @@ -319,23 +302,23 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat */ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, const struct kbase_hwcnt_enable_map *dst_enable_map, - const u64 pm_core_mask, u64 debug_core_mask, u64 max_core_mask, - size_t max_l2_slices, const struct kbase_hwcnt_curr_config *curr_config, - bool accumulate); + const u64 pm_core_mask, u64 debug_core_mask, size_t max_l2_slices, + const struct kbase_hwcnt_curr_config *curr_config, bool accumulate); /** * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw * dump buffer in src into the dump buffer * abstraction in dst. - * @dst: Non-NULL pointer to destination dump buffer. - * @src: Non-NULL pointer to source raw dump buffer, of same length - * as dump_buf_bytes in the metadata of dst dump buffer. - * @src_block_stt: Non-NULL pointer to source block state buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * @num_l2_slices: Current number of L2 slices allocated to the GPU. - * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. - * @accumulate: True if counters in src should be accumulated into - * destination, rather than copied. + * @dst: Non-NULL pointer to destination dump buffer. + * @src: Non-NULL pointer to source raw dump buffer, of same length + * as dump_buf_bytes in the metadata of dst dump buffer. + * @src_block_stt: Non-NULL pointer to source block state buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @powered_shader_core_mask: The common mask between the debug_core_mask + * and the shader_present_bitmap. + * @accumulate: True if counters in src should be accumulated into + * destination, rather than copied. * * The dst and dst_enable_map MUST have been created from the same metadata as * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get @@ -346,7 +329,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, blk_stt_t *src_block_stt, const struct kbase_hwcnt_enable_map *dst_enable_map, - size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate); + size_t num_l2_slices, u64 powered_shader_core_mask, bool accumulate); /** * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block @@ -453,6 +436,7 @@ bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type b bool kbase_hwcnt_is_block_type_tiler(const enum kbase_hwcnt_gpu_v5_block_type blk_type); bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_type); + /** * kbase_hwcnt_gpu_enable_map_from_cm() - Builds enable map abstraction from * counter selection bitmasks. diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c index 3d0ad5af7263..3d2fd5e088da 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -125,6 +125,9 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) { + if (!metadata) + return; + kfree(metadata); } diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h index 1f32fc9dd553..843094076d69 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,6 +27,8 @@ #ifndef _BASE_HWCONFIG_FEATURES_H_ #define _BASE_HWCONFIG_FEATURES_H_ +#include + enum base_hw_feature { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, @@ -45,55 +47,55 @@ enum base_hw_feature { BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_generic[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_generic[] = { BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tMIx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tMIx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tHEx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tHEx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tSIx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tSIx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDVx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tDVx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNOx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tNOx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGOx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tGOx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_THREAD_TLS_ALLOC, BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTRx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tTRx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNAx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tNAx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBEx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tBEx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, @@ -103,7 +105,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tBEx[ BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tBAx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, @@ -113,31 +115,31 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[ BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tODx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tGRx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tVAx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tTUx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_L2_SLICE_HASH, BASE_HW_FEATURE_GPU_SLEEP, BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTIx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tTIx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, @@ -149,7 +151,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTIx[ BASE_HW_FEATURE_END }; -__attribute__((unused)) static const enum base_hw_feature base_hw_features_tKRx[] = { +__maybe_unused static const enum base_hw_feature base_hw_features_tKRx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_L2_SLICE_HASH, BASE_HW_FEATURE_GPU_SLEEP, diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h index 4426bd743b4e..409e2e8bedcf 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,6 +27,8 @@ #ifndef _BASE_HWCONFIG_ISSUES_H_ #define _BASE_HWCONFIG_ISSUES_H_ +#include + enum base_hw_issue { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, @@ -72,13 +74,14 @@ enum base_hw_issue { BASE_HW_ISSUE_KRAKEHW_2151, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_KRAKEHW_2269, + BASE_HW_ISSUE_TURSEHW_2934, BASE_HW_ISSUE_END }; __attribute__(( unused)) static const enum base_hw_issue base_hw_issues_generic[] = { BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, @@ -88,7 +91,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0 BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, @@ -98,7 +101,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0 BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, @@ -108,7 +111,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1 BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMIx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tMIx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, BASE_HW_ISSUE_TMIX_8343, @@ -116,7 +119,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMI BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, @@ -124,7 +127,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, @@ -132,7 +135,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, @@ -140,21 +143,21 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHEx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tHEx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TSIX_1792, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, @@ -162,7 +165,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TSIX_1792, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, @@ -170,77 +173,77 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSIx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tSIx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDVx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tDVx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TNOX_1194, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNOx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tNOx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TNOX_1194, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TGOX_R1_1234, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGOx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tGOx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, @@ -251,7 +254,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0 BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, @@ -262,7 +265,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1 BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -280,14 +283,14 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTRx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tTRx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, @@ -298,7 +301,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0 BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -316,14 +319,14 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNAx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tNAx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, @@ -333,7 +336,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0 BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -349,7 +352,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -365,7 +368,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -381,14 +384,14 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBEx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tBEx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, @@ -398,7 +401,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0 BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -414,7 +417,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -430,7 +433,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p1[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tBAx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -446,7 +449,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p1 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p2[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tBAx_r0p2[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -462,73 +465,56 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p2 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBAx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tBAx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tODx[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tGRx[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_1997, - BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END +__maybe_unused static const enum base_hw_issue base_hw_issues_tVAx_r0p1[] = { + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tVAx[] = { + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_1997, @@ -539,79 +525,96 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1 BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, + BASE_HW_ISSUE_TURSEHW_2934, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = { + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_1997, + BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, + BASE_HW_ISSUE_TURSEHW_2934, BASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tTUx[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END + BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_TURSEHW_2934, + BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END + BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_TURSEHW_2934, + BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END + BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_TURSEHW_2934, + BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END + BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_TURSEHW_2934, + BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END + BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_TURSEHW_2934, + BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tTIx[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2952, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END + BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_TURSEHW_2934, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2952, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END + BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_TURSEHW_2934, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p1[] = { - BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END +__maybe_unused static const enum base_hw_issue base_hw_issues_tTIx_r0p1[] = { + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, + BASE_HW_ISSUE_TURSEHW_2934, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tKRx_r0p0[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tKRx_r0p0[] = { BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_KRAKEHW_2151, BASE_HW_ISSUE_KRAKEHW_2269, BASE_HW_ISSUE_END + BASE_HW_ISSUE_KRAKEHW_2151, BASE_HW_ISSUE_KRAKEHW_2269, BASE_HW_ISSUE_TITANHW_2922, + BASE_HW_ISSUE_TURSEHW_2934, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tKRx_r0p1[] = { - BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_KRAKEHW_2269, BASE_HW_ISSUE_END -}; - -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tKRx[] = { +__maybe_unused static const enum base_hw_issue base_hw_issues_tKRx_r0p1[] = { BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_KRAKEHW_2151, BASE_HW_ISSUE_KRAKEHW_2269, BASE_HW_ISSUE_END + BASE_HW_ISSUE_KRAKEHW_2269, BASE_HW_ISSUE_TURSEHW_2934, BASE_HW_ISSUE_END +}; + +__maybe_unused static const enum base_hw_issue base_hw_issues_model_tKRx[] = { + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_KRAKEHW_2151, BASE_HW_ISSUE_KRAKEHW_2269, BASE_HW_ISSUE_TURSEHW_2934, + BASE_HW_ISSUE_END }; diff --git a/drivers/gpu/arm/bifrost/mali_csffw.bin b/drivers/gpu/arm/bifrost/mali_csffw.bin index 1f8413ba14d7..a1168ffdc743 100644 Binary files a/drivers/gpu/arm/bifrost/mali_csffw.bin and b/drivers/gpu/arm/bifrost/mali_csffw.bin differ diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h index 498d53f15f9e..ee78a1237d56 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase.h +++ b/drivers/gpu/arm/bifrost/mali_kbase.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -204,22 +204,24 @@ int kbase_protected_mode_init(struct kbase_device *kbdev); void kbase_protected_mode_term(struct kbase_device *kbdev); /** - * kbase_device_pm_init() - Performs power management initialization and - * Verifies device tree configurations. + * kbase_device_backend_init() - Performs backend initialization and performs + * devicetree validation. * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Return: 0 if successful, otherwise a standard Linux error code + * If -EPERM is returned, it means the device backend is not supported, but + * device initialization can continue. */ -int kbase_device_pm_init(struct kbase_device *kbdev); +int kbase_device_backend_init(struct kbase_device *kbdev); /** - * kbase_device_pm_term() - Performs power management deinitialization and - * Free resources. + * kbase_device_backend_term() - Performs backend deinitialization and free + * resources. * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Clean up all the resources */ -void kbase_device_pm_term(struct kbase_device *kbdev); +void kbase_device_backend_term(struct kbase_device *kbdev); int power_control_init(struct kbase_device *kbdev); void power_control_term(struct kbase_device *kbdev); @@ -812,108 +814,8 @@ bool kbasep_adjust_prioritized_process(struct kbase_device *kbdev, bool add, uin #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) #endif -/** - * kbase_file_fops_count() - Get the kfile::fops_count value - * - * @kfile: Pointer to the object representing the mali device file. - * - * The value is read with kfile::lock held. - * - * Return: sampled value of kfile::fops_count. - */ -static inline int kbase_file_fops_count(struct kbase_file *kfile) -{ - int fops_count; - - spin_lock(&kfile->lock); - fops_count = kfile->fops_count; - spin_unlock(&kfile->lock); - - return fops_count; -} - -/** - * kbase_file_inc_fops_count_unless_closed() - Increment the kfile::fops_count value if the - * kfile::owner is still set. - * - * @kfile: Pointer to the object representing the /dev/malixx device file instance. - * - * Return: true if the increment was done otherwise false. - */ -static inline bool kbase_file_inc_fops_count_unless_closed(struct kbase_file *kfile) -{ - bool count_incremented = false; - - spin_lock(&kfile->lock); - if (kfile->owner) { - kfile->fops_count++; - count_incremented = true; - } - spin_unlock(&kfile->lock); - - return count_incremented; -} - -/** - * kbase_file_dec_fops_count() - Decrement the kfile::fops_count value - * - * @kfile: Pointer to the object representing the /dev/malixx device file instance. - * - * This function shall only be called to decrement kfile::fops_count if a successful call - * to kbase_file_inc_fops_count_unless_closed() was made previously by the current thread. - * - * The function would enqueue the kfile::destroy_kctx_work if the process that originally - * created the file instance has closed its copy and no Kbase handled file operations are - * in progress and no memory mappings are present for the file instance. - */ -static inline void kbase_file_dec_fops_count(struct kbase_file *kfile) -{ - spin_lock(&kfile->lock); - WARN_ON_ONCE(kfile->fops_count <= 0); - kfile->fops_count--; - if (unlikely(!kfile->fops_count && !kfile->owner && !kfile->map_count)) { - queue_work(system_wq, &kfile->destroy_kctx_work); -#if IS_ENABLED(CONFIG_DEBUG_FS) - wake_up(&kfile->zero_fops_count_wait); +#if !defined(UINT32_MAX) +#define UINT32_MAX ((uint32_t)0xFFFFFFFFU) #endif - } - spin_unlock(&kfile->lock); -} - -/** - * kbase_file_inc_cpu_mapping_count() - Increment the kfile::map_count value. - * - * @kfile: Pointer to the object representing the /dev/malixx device file instance. - * - * This function shall be called when the memory mapping on /dev/malixx device file - * instance is created. The kbase_file::setup_state shall be KBASE_FILE_COMPLETE. - */ -static inline void kbase_file_inc_cpu_mapping_count(struct kbase_file *kfile) -{ - spin_lock(&kfile->lock); - kfile->map_count++; - spin_unlock(&kfile->lock); -} - -/** - * kbase_file_dec_cpu_mapping_count() - Decrement the kfile::map_count value - * - * @kfile: Pointer to the object representing the /dev/malixx device file instance. - * - * This function is called to decrement kfile::map_count value when the memory mapping - * on /dev/malixx device file is closed. - * The function would enqueue the kfile::destroy_kctx_work if the process that originally - * created the file instance has closed its copy and there are no mappings present and no - * Kbase handled file operations are in progress for the file instance. - */ -static inline void kbase_file_dec_cpu_mapping_count(struct kbase_file *kfile) -{ - spin_lock(&kfile->lock); - WARN_ON_ONCE(kfile->map_count <= 0); - kfile->map_count--; - if (unlikely(!kfile->map_count && !kfile->owner && !kfile->fops_count)) - queue_work(system_wq, &kfile->destroy_kctx_work); - spin_unlock(&kfile->lock); -} #endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_caps.h b/drivers/gpu/arm/bifrost/mali_kbase_caps.h index a92569d31f06..f6bcdd06e1aa 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_caps.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_caps.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,15 +33,26 @@ * * @MALI_KBASE_CAP_SYSTEM_MONITOR: System Monitor * @MALI_KBASE_CAP_JIT_PRESSURE_LIMIT: JIT Pressure limit + * @MALI_KBASE_CAP_MEM_DONT_NEED: Not needed physical memory * @MALI_KBASE_CAP_MEM_GROW_ON_GPF: Memory grow on page fault * @MALI_KBASE_CAP_MEM_PROTECTED: Protected memory + * @MALI_KBASE_CAP_MEM_IMPORT_SYNC_ON_MAP_UNMAP: CPU cache maintenance required when + * imported GPU memory is mapped/unmapped + * @MALI_KBASE_CAP_MEM_KERNEL_SYNC: Kernel side cache sync ops required + * @MALI_KBASE_CAP_MEM_SAME_VA: Same VA on CPU and GPU * @MALI_KBASE_NUM_CAPS: Delimiter + * + * New enumerator must not be negative and smaller than @MALI_KBASE_NUM_CAPS. */ enum mali_kbase_cap { MALI_KBASE_CAP_SYSTEM_MONITOR = 0, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT, + MALI_KBASE_CAP_MEM_DONT_NEED, MALI_KBASE_CAP_MEM_GROW_ON_GPF, MALI_KBASE_CAP_MEM_PROTECTED, + MALI_KBASE_CAP_MEM_IMPORT_SYNC_ON_MAP_UNMAP, + MALI_KBASE_CAP_MEM_KERNEL_SYNC, + MALI_KBASE_CAP_MEM_SAME_VA, MALI_KBASE_NUM_CAPS }; @@ -57,6 +68,11 @@ static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_vers return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT); } +static inline bool mali_kbase_supports_mem_dont_need(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_DONT_NEED); +} + static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version) { return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF); @@ -67,4 +83,19 @@ static inline bool mali_kbase_supports_mem_protected(unsigned long api_version) return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED); } +static inline bool mali_kbase_supports_mem_import_sync_on_map_unmap(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_IMPORT_SYNC_ON_MAP_UNMAP); +} + +static inline bool mali_kbase_supports_mem_kernel_sync(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_KERNEL_SYNC); +} + +static inline bool mali_kbase_supports_mem_same_va(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_SAME_VA); +} + #endif /* __KBASE_CAPS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config.h b/drivers/gpu/arm/bifrost/mali_kbase_config.h index 7233e2dd3920..2f9e28aaec9a 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_config.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_config.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -166,8 +166,9 @@ struct kbase_pm_callback_conf { * * The system integrator can decide whether to either do nothing, just switch off * the clocks to the GPU, or to completely power down the GPU. - * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * The platform specific private pointer kbase_device::platform_context can be + * accessed and modified in here. It is the platform \em callbacks responsibility + * to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). * * If runtime PM is enabled and @power_runtime_gpu_idle_callback is used * then this callback should power off the GPU (or switch off the clocks @@ -179,15 +180,18 @@ struct kbase_pm_callback_conf { /** Callback for when the GPU is about to become active and power must be supplied. * - * This function must not return until the GPU is powered and clocked sufficiently for register access to - * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. - * If the GPU state has been lost then this function must return 1, otherwise it should return 0. - * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * This function must not return until the GPU is powered and clocked sufficiently + * for register access to succeed. The return value specifies whether the GPU was + * powered down since the call to power_off_callback. + * If the GPU is in reset state it should return 2, if the GPU state has been lost + * then this function must return 1, otherwise it should return 0. + * The platform specific private pointer kbase_device::platform_context can be + * accessed and modified in here. It is the platform \em callbacks responsibility + * to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). * * The return value of the first call to this function is ignored. * - * @return 1 if the GPU state may have been lost, 0 otherwise. + * @return 2 if GPU in reset state, 1 if the GPU state may have been lost, 0 otherwise. */ int (*power_on_callback)(struct kbase_device *kbdev); @@ -223,9 +227,11 @@ struct kbase_pm_callback_conf { /** Callback for handling runtime power management initialization. * - * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback - * will become active from calls made to the OS from within this function. - * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. + * The runtime power management callbacks @ref power_runtime_off_callback + * and @ref power_runtime_on_callback will become active from calls made + * to the OS from within this function. + * The runtime calls can be triggered by calls from @ref power_off_callback + * and @ref power_on_callback. * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. * * @return 0 on success, else int error code. @@ -234,8 +240,9 @@ struct kbase_pm_callback_conf { /** Callback for handling runtime power management termination. * - * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback - * should no longer be called by the OS on completion of this function. + * The runtime power management callbacks @ref power_runtime_off_callback + * and @ref power_runtime_on_callback should no longer be called by the + * OS on completion of this function. * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. */ void (*power_runtime_term_callback)(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h index 20003c852863..baca78679f0b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -194,9 +194,22 @@ enum { */ #define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (3100000000ull) +/* Waiting timeout in clock cycles for GPU suspend to complete. */ +#define CSF_GPU_SUSPEND_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES) + /* Waiting timeout in clock cycles for GPU reset to complete. */ #define CSF_GPU_RESET_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES * 2) +/* Waiting timeout in clock cycles for a CSG to be terminated. + * + * Based on 0.6s timeout at 100MHZ, scaled from 0.1s at 600Mhz GPU frequency + * which is the timeout defined in FW to wait for iterator to complete the + * transitioning to DISABLED state. + * More cycles (0.4s @ 100Mhz = 40000000) are added up to ensure that + * host timeout is always bigger than FW timeout. + */ +#define CSF_CSG_TERM_TIMEOUT_CYCLES (100000000) + /* Waiting timeout in clock cycles for GPU firmware to boot. * * Based on 250ms timeout at 100MHz, scaled from a 50MHz GPU system. @@ -213,7 +226,10 @@ enum { * * Based on 10s timeout at 100MHz, scaled from a 50MHz GPU system. */ -#if IS_ENABLED(CONFIG_MALI_IS_FPGA) +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) +/* Set a large value to avoid timing out while vector dumping */ +#define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (250000000000ull) +#elif IS_ENABLED(CONFIG_MALI_IS_FPGA) #define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (2500000000ull) #else #define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (1000000000ull) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c index 237a3b829be9..1e7e823f44c4 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -106,6 +106,7 @@ #include #include #include +#include #include @@ -152,13 +153,21 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA #if MALI_USE_CSF { 1, 0 }, /* SYSTEM_MONITOR */ { 1, 0 }, /* JIT_PRESSURE_LIMIT */ + { 1, 22 }, /* MEM_DONT_NEED */ { 1, 0 }, /* MEM_GROW_ON_GPF */ - { 1, 0 } /* MEM_PROTECTED */ + { 1, 0 }, /* MEM_PROTECTED */ + { 1, 26 }, /* MEM_IMPORT_SYNC_ON_MAP_UNMAP */ + { 1, 26 }, /* MEM_KERNEL_SYNC */ + { 1, 28 } /* MEM_SAME_VA */ #else { 11, 15 }, /* SYSTEM_MONITOR */ { 11, 25 }, /* JIT_PRESSURE_LIMIT */ + { 11, 40 }, /* MEM_DONT_NEED */ { 11, 2 }, /* MEM_GROW_ON_GPF */ - { 11, 2 } /* MEM_PROTECTED */ + { 11, 2 }, /* MEM_PROTECTED */ + { 11, 43 }, /* MEM_IMPORT_SYNC_ON_MAP_UNMAP */ + { 11, 43 }, /* MEM_KERNEL_SYNC */ + { 11, 44 } /* MEM_SAME_VA */ #endif }; @@ -167,13 +176,11 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA static struct mutex kbase_probe_mutex; #endif -static void kbase_file_destroy_kctx_worker(struct work_struct *work); - /** * mali_kbase_supports_cap - Query whether a kbase capability is supported * * @api_version: API version to convert - * @cap: Capability to query for - see mali_kbase_caps.h + * @cap: Capability to query for - see mali_kbase_caps.h. Shouldn't be negative. * * Return: true if the capability is supported */ @@ -184,13 +191,10 @@ bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap) struct mali_kbase_capability_def const *cap_def; - if (WARN_ON(cap < 0)) - return false; - if (WARN_ON(cap >= MALI_KBASE_NUM_CAPS)) return false; - cap_def = &kbase_caps_table[(int)cap]; + cap_def = &kbase_caps_table[cap]; required_ver = KBASE_API_VERSION(cap_def->required_major, cap_def->required_minor); supported = (api_version >= required_ver); @@ -212,7 +216,7 @@ bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap) * Return: Address of an object representing a simulated device file, or NULL * on failure. * - * Note: This function always gets called in Userspace context. + * Note: This function shall always be called in Userspace context. */ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, struct file *const filp) { @@ -224,17 +228,6 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, struc kfile->kctx = NULL; kfile->api_version = 0; atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN); - /* Store the pointer to the file table structure of current process. */ - kfile->owner = current->files; - INIT_WORK(&kfile->destroy_kctx_work, kbase_file_destroy_kctx_worker); - spin_lock_init(&kfile->lock); - kfile->fops_count = 0; - kfile->map_count = 0; - typecheck(typeof(kfile->map_count), typeof(current->mm->map_count)); -#if IS_ENABLED(CONFIG_DEBUG_FS) - init_waitqueue_head(&kfile->zero_fops_count_wait); -#endif - init_waitqueue_head(&kfile->event_queue); } return kfile; } @@ -313,33 +306,6 @@ static unsigned long kbase_file_get_api_version(struct kbase_file *const kfile) */ static int kbase_file_create_kctx(struct kbase_file *kfile, base_context_create_flags flags); -/** - * kbase_file_inc_fops_count_if_allowed - Increment the kfile::fops_count value if the file - * operation is allowed for the current process. - * - * @kfile: Pointer to the object representing the /dev/malixx device file instance. - * - * The function shall be called at the beginning of certain file operation methods - * implemented for @kbase_fops, like ioctl, poll, read and mmap. - * - * kbase_file_dec_fops_count() shall be called if the increment was done. - * - * Return: true if the increment was done otherwise false. - * - * Note: This function shall always be called in Userspace context. - */ -static bool kbase_file_inc_fops_count_if_allowed(struct kbase_file *const kfile) -{ - /* Disallow file operations from the other process that shares the instance - * of /dev/malixx file i.e. 'kfile' or disallow file operations if parent - * process has closed the file instance. - */ - if (unlikely(kfile->owner != current->files)) - return false; - - return kbase_file_inc_fops_count_unless_closed(kfile); -} - /** * kbase_file_get_kctx_if_setup_complete - Get a kernel base context * pointer from a device file @@ -352,8 +318,6 @@ static bool kbase_file_inc_fops_count_if_allowed(struct kbase_file *const kfile) * * Return: Address of the kernel base context associated with the @kfile, or * NULL if no context exists. - * - * Note: This function shall always be called in Userspace context. */ static struct kbase_context *kbase_file_get_kctx_if_setup_complete(struct kbase_file *const kfile) { @@ -364,103 +328,38 @@ static struct kbase_context *kbase_file_get_kctx_if_setup_complete(struct kbase_ return kfile->kctx; } -/** - * kbase_file_destroy_kctx - Destroy the Kbase context created for @kfile. - * - * @kfile: A device file created by kbase_file_new() - */ -static void kbase_file_destroy_kctx(struct kbase_file *const kfile) -{ - if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_COMPLETE, KBASE_FILE_DESTROY_CTX) != - KBASE_FILE_COMPLETE) - return; - -#if IS_ENABLED(CONFIG_DEBUG_FS) - kbasep_mem_profile_debugfs_remove(kfile->kctx); - kbase_context_debugfs_term(kfile->kctx); -#endif - - kbase_destroy_context(kfile->kctx); - dev_dbg(kfile->kbdev->dev, "Deleted kbase context"); -} - -/** - * kbase_file_destroy_kctx_worker - Work item to destroy the Kbase context. - * - * @work: Pointer to the kfile::destroy_kctx_work. - * - * The work item shall only be enqueued if the context termination could not - * be done from @kbase_flush(). - */ -static void kbase_file_destroy_kctx_worker(struct work_struct *work) -{ - struct kbase_file *kfile = container_of(work, struct kbase_file, destroy_kctx_work); - - WARN_ON_ONCE(kfile->owner); - WARN_ON_ONCE(kfile->map_count); - WARN_ON_ONCE(kfile->fops_count); - - kbase_file_destroy_kctx(kfile); -} - -/** - * kbase_file_destroy_kctx_on_flush - Try destroy the Kbase context from the flush() - * method of @kbase_fops. - * - * @kfile: A device file created by kbase_file_new() - */ -static void kbase_file_destroy_kctx_on_flush(struct kbase_file *const kfile) -{ - bool can_destroy_context = false; - - spin_lock(&kfile->lock); - kfile->owner = NULL; - /* To destroy the context from flush() method, unlike the release() - * method, need to synchronize manually against the other threads in - * the current process that could be operating on the /dev/malixx file. - * - * Only destroy the context if all the memory mappings on the - * /dev/malixx file instance have been closed. If there are mappings - * present then the context would be destroyed later when the last - * mapping is closed. - * Also, only destroy the context if no file operations are in progress. - */ - can_destroy_context = !kfile->map_count && !kfile->fops_count; - spin_unlock(&kfile->lock); - - if (likely(can_destroy_context)) { - WARN_ON_ONCE(work_pending(&kfile->destroy_kctx_work)); - kbase_file_destroy_kctx(kfile); - } -} - /** * kbase_file_delete - Destroy an object representing a device file * * @kfile: A device file created by kbase_file_new() * - * If any context was created for the @kfile and is still alive, then it is destroyed. + * If any context was created for the @kfile then it is destroyed. */ static void kbase_file_delete(struct kbase_file *const kfile) { + struct kbase_device *kbdev = NULL; + if (WARN_ON(!kfile)) return; - /* All the CPU mappings on the device file should have been closed */ - WARN_ON_ONCE(kfile->map_count); -#if IS_ENABLED(CONFIG_DEBUG_FS) - /* There could still be file operations due to the debugfs file (mem_view) */ - wait_event(kfile->zero_fops_count_wait, !kbase_file_fops_count(kfile)); -#else - /* There shall not be any file operations in progress on the device file */ - WARN_ON_ONCE(kfile->fops_count); -#endif - kfile->filp->private_data = NULL; - cancel_work_sync(&kfile->destroy_kctx_work); - /* Destroy the context if it wasn't done earlier from the flush() method. */ - kbase_file_destroy_kctx(kfile); - kbase_release_device(kfile->kbdev); + kbdev = kfile->kbdev; + + if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) { + struct kbase_context *kctx = kfile->kctx; + +#if IS_ENABLED(CONFIG_DEBUG_FS) + kbasep_mem_profile_debugfs_remove(kctx); +#endif + kbase_context_debugfs_term(kctx); + + kbase_destroy_context(kctx); + + dev_dbg(kbdev->dev, "deleted base context\n"); + } + + kbase_release_device(kbdev); + kfree(kfile); } @@ -585,6 +484,9 @@ int kbase_get_irqs(struct kbase_device *kbdev) kbdev->nr_irqs = 0; result = get_irqs(kbdev, pdev); + if (!result) + return result; + if (result) dev_err(kbdev->dev, "Invalid or No interrupt resources"); @@ -736,7 +638,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, kbdev = kfile->kbdev; - kctx = kbase_create_context(kbdev, in_compat_syscall(), flags, kfile->api_version, kfile); + kctx = kbase_create_context(kbdev, in_compat_syscall(), flags, kfile->api_version, + kfile->filp); /* if bad flags, will stay stuck in setup mode */ if (!kctx) @@ -823,36 +726,6 @@ static int kbase_release(struct inode *inode, struct file *filp) return 0; } -/** - * kbase_flush - Function implementing the flush() method of @kbase_fops. - * - * @filp: Pointer to the /dev/malixx device file instance. - * @id: Pointer to the file table structure of current process. - * If @filp is being shared by multiple processes then @id can differ - * from kfile::owner. - * - * This function is called everytime the copy of @filp is closed. So if 3 processes - * are sharing the @filp then this function would be called 3 times and only after - * that kbase_release() would get called. - * - * Return: 0 if successful, otherwise a negative error code. - * - * Note: This function always gets called in Userspace context when the - * file is closed. - */ -static int kbase_flush(struct file *filp, fl_owner_t id) -{ - struct kbase_file *const kfile = filp->private_data; - - /* Try to destroy the context if the flush() method has been called for the - * process that created the instance of /dev/malixx file i.e. 'kfile'. - */ - if (kfile->owner == id) - kbase_file_destroy_kctx_on_flush(kfile); - - return 0; -} - static int kbase_api_set_flags(struct kbase_file *kfile, struct kbase_ioctl_set_flags *flags) { int err = 0; @@ -1413,10 +1286,11 @@ static int kbase_api_sticky_resource_map(struct kbase_context *kctx, if (ret != 0) return -EFAULT; - kbase_gpu_vm_lock(kctx); + down_read(kbase_mem_get_process_mmap_lock()); + kbase_gpu_vm_lock_with_pmode_sync(kctx); for (i = 0; i < map->count; i++) { - if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i])) { + if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i], current->mm)) { /* Invalid resource */ ret = -EINVAL; break; @@ -1430,7 +1304,8 @@ static int kbase_api_sticky_resource_map(struct kbase_context *kctx, } } - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); + up_read(kbase_mem_get_process_mmap_lock()); return ret; } @@ -1450,7 +1325,7 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, if (ret != 0) return -EFAULT; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); for (i = 0; i < unmap->count; i++) { if (!kbase_sticky_resource_release_force(kctx, NULL, gpu_addr[i])) { @@ -1459,7 +1334,7 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, } } - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return ret; } @@ -1517,6 +1392,12 @@ static int kbasep_cs_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_c return kbase_csf_queue_kick(kctx, kick); } +static int kbasep_queue_group_clear_faults(struct kbase_context *kctx, + struct kbase_ioctl_queue_group_clear_faults *faults) +{ + return kbase_csf_queue_group_clear_faults(kctx, faults); +} + static int kbasep_cs_queue_group_create_1_6(struct kbase_context *kctx, union kbase_ioctl_cs_queue_group_create_1_6 *create) { @@ -1535,10 +1416,8 @@ static int kbasep_cs_queue_group_create_1_6(struct kbase_context *kctx, } }; for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) { - if (create->in.padding[i] != 0) { - dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); + if (create->in.padding[i] != 0) return -EINVAL; - } } ret = kbase_csf_queue_group_create(kctx, &new_create); @@ -1569,10 +1448,8 @@ static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx, } }; for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) { - if (create->in.padding[i] != 0) { - dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); + if (create->in.padding[i] != 0) return -EINVAL; - } } ret = kbase_csf_queue_group_create(kctx, &new_create); @@ -1586,6 +1463,8 @@ static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx, static int kbasep_cs_queue_group_create(struct kbase_context *kctx, union kbase_ioctl_cs_queue_group_create *create) { + /* create->in.reserved only present pre-TDRX configuration. */ + if (create->in.reserved != 0) { dev_warn(kctx->kbdev->dev, "Invalid reserved field not 0 in queue group create\n"); return -EINVAL; @@ -1826,19 +1705,28 @@ static int kbasep_ioctl_set_limited_core_count( struct kbase_ioctl_set_limited_core_count *set_limited_core_count) { const u64 shader_core_mask = kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER); - const u64 limited_core_mask = ((u64)1 << (set_limited_core_count->max_core_count)) - 1; + const u8 max_core_count = set_limited_core_count->max_core_count; + u64 limited_core_mask = 0; - if ((shader_core_mask & limited_core_mask) == 0) { - /* At least one shader core must be available after applying the mask */ + /* Sanity check to avoid shift-out-of-bounds */ + if (max_core_count > 64) + return -EINVAL; + else if (max_core_count == 64) + limited_core_mask = UINT64_MAX; + else + limited_core_mask = ((u64)1 << max_core_count) - 1; + + /* At least one shader core must be available after applying the mask */ + if ((shader_core_mask & limited_core_mask) == 0) return -EINVAL; - } kctx->limited_core_mask = limited_core_mask; return 0; } -static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsigned long arg) +static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct kbase_file *const kfile = filp->private_data; struct kbase_context *kctx = NULL; struct kbase_device *kbdev = kfile->kbdev; void __user *uarg = (void __user *)arg; @@ -2087,6 +1975,11 @@ static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsign KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_ENQUEUE, kbasep_kcpu_queue_enqueue, struct kbase_ioctl_kcpu_queue_enqueue, kctx); break; + case KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS, + kbasep_queue_group_clear_faults, + struct kbase_ioctl_queue_group_clear_faults, kctx); + break; case KBASE_IOCTL_CS_TILER_HEAP_INIT: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT, kbasep_cs_tiler_heap_init, union kbase_ioctl_cs_tiler_heap_init, kctx); @@ -2137,45 +2030,22 @@ static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsign return -ENOIOCTLCMD; } -static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct kbase_file *const kfile = filp->private_data; - long ioctl_ret; - - if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) - return -EPERM; - - ioctl_ret = kbase_kfile_ioctl(kfile, cmd, arg); - kbase_file_dec_fops_count(kfile); - - return ioctl_ret; -} - #if MALI_USE_CSF static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *kctx; + struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile); struct base_csf_notification event_data = { .type = BASE_CSF_NOTIFICATION_EVENT }; const size_t data_size = sizeof(event_data); bool read_event = false, read_error = false; - ssize_t err = 0; CSTD_UNUSED(f_pos); - if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) + if (unlikely(!kctx)) return -EPERM; - kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (unlikely(!kctx)) { - err = -EPERM; - goto out; - } - - if (count < data_size) { - err = -ENOBUFS; - goto out; - } + if (count < data_size) + return -ENOBUFS; if (atomic_read(&kctx->event_count)) read_event = true; @@ -2196,41 +2066,29 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof if (copy_to_user(buf, &event_data, data_size) != 0) { dev_warn(kctx->kbdev->dev, "Failed to copy data\n"); - err = -EFAULT; - goto out; + return -EFAULT; } if (read_event) atomic_set(&kctx->event_count, 0); -out: - kbase_file_dec_fops_count(kfile); - return err ? err : (ssize_t)data_size; + return data_size; } #else /* MALI_USE_CSF */ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *kctx; + struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile); struct base_jd_event_v2 uevent; - size_t out_count = 0; - ssize_t err = 0; + int out_count = 0; CSTD_UNUSED(f_pos); - if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) + if (unlikely(!kctx)) return -EPERM; - kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (unlikely(!kctx)) { - err = -EPERM; - goto out; - } - - if (count < sizeof(uevent)) { - err = -ENOBUFS; - goto out; - } + if (count < sizeof(uevent)) + return -ENOBUFS; memset(&uevent, 0, sizeof(uevent)); @@ -2239,29 +2097,21 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof if (out_count > 0) goto out; - if (filp->f_flags & O_NONBLOCK) { - err = -EAGAIN; - goto out; - } + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; - if (wait_event_interruptible(kfile->event_queue, - kbase_event_pending(kctx)) != 0) { - err = -ERESTARTSYS; - goto out; - } + if (wait_event_interruptible(kctx->event_queue, + kbase_event_pending(kctx)) != 0) + return -ERESTARTSYS; } if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { - if (out_count == 0) { - err = -EPIPE; - goto out; - } + if (out_count == 0) + return -EPIPE; goto out; } - if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) { - err = -EFAULT; - goto out; - } + if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) + return -EFAULT; buf += sizeof(uevent); out_count++; @@ -2269,59 +2119,40 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof } while (count >= sizeof(uevent)); out: - kbase_file_dec_fops_count(kfile); - return err ? err : (ssize_t)(out_count * sizeof(uevent)); + return out_count * sizeof(uevent); } #endif /* MALI_USE_CSF */ static __poll_t kbase_poll(struct file *filp, poll_table *wait) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *kctx; - __poll_t ret = 0; + struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) { -#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) - ret = POLLNVAL; -#else - ret = EPOLLNVAL; -#endif - return ret; - } - - kctx = kbase_file_get_kctx_if_setup_complete(kfile); if (unlikely(!kctx)) { #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) - ret = POLLERR; + return POLLERR; #else - ret = EPOLLERR; + return EPOLLERR; #endif - goto out; } - poll_wait(filp, &kfile->event_queue, wait); + poll_wait(filp, &kctx->event_queue, wait); if (kbase_event_pending(kctx)) { #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) - ret = POLLIN | POLLRDNORM; + return POLLIN | POLLRDNORM; #else - ret = EPOLLIN | EPOLLRDNORM; + return EPOLLIN | EPOLLRDNORM; #endif } -out: - kbase_file_dec_fops_count(kfile); - return ret; + return 0; } void kbase_event_wakeup(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx); dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n", (void *)kctx); -#ifdef CONFIG_MALI_BIFROST_DEBUG - if (WARN_ON_ONCE(!kctx->kfile)) - return; -#endif - wake_up_interruptible(&kctx->kfile->event_queue); + wake_up_interruptible(&kctx->event_queue); } KBASE_EXPORT_TEST_API(kbase_event_wakeup); @@ -2354,20 +2185,12 @@ KBASE_EXPORT_TEST_API(kbase_event_pending); static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *kctx; - int ret; + struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) + if (unlikely(!kctx)) return -EPERM; - kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (likely(kctx)) - ret = kbase_context_mmap(kctx, vma); - else - ret = -EPERM; - - kbase_file_dec_fops_count(kfile); - return ret; + return kbase_context_mmap(kctx, vma); } static int kbase_check_flags(int flags) @@ -2386,26 +2209,17 @@ static unsigned long kbase_get_unmapped_area(struct file *const filp, const unsi const unsigned long flags) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *kctx; - unsigned long address; + struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) + if (unlikely(!kctx)) return -EPERM; - kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (likely(kctx)) - address = kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags); - else - address = -EPERM; - - kbase_file_dec_fops_count(kfile); - return address; + return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags); } static const struct file_operations kbase_fops = { .owner = THIS_MODULE, .open = kbase_open, - .flush = kbase_flush, .release = kbase_release, .read = kbase_read, .poll = kbase_poll, @@ -2544,6 +2358,9 @@ static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, struct kbase_device *kbdev; unsigned long flags; ssize_t ret = 0; +#if !MALI_USE_CSF + size_t i; +#endif CSTD_UNUSED(attr); @@ -2562,22 +2379,147 @@ static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current in use core mask : 0x%llX\n", kbdev->pm.backend.shaders_avail); #else - ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS0) : 0x%llX\n", - kbdev->pm.debug_core_mask[0]); - ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS1) : 0x%llX\n", - kbdev->pm.debug_core_mask[1]); - ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS2) : 0x%llX\n", - kbdev->pm.debug_core_mask[2]); + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; i++) { + if (PAGE_SIZE < ret) + goto out_unlock; + + ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), + "Current core mask (JS%zu) : 0x%llX\n", i, + kbdev->pm.debug_core_mask[i]); + } #endif /* MALI_USE_CSF */ ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Available core mask : 0x%llX\n", kbdev->gpu_props.shader_present); - +#if !MALI_USE_CSF +out_unlock: +#endif spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return ret; } +#if MALI_USE_CSF +struct kbase_core_mask { + u64 new_core_mask; +}; + +static int core_mask_parse(struct kbase_device *const kbdev, const char *const buf, + struct kbase_core_mask *const mask) +{ + int err = kstrtou64(buf, 0, &mask->new_core_mask); + + if (err) + dev_err(kbdev->dev, "Couldn't process core mask write operation.\n"); + + return err; +} + +static int core_mask_set(struct kbase_device *kbdev, struct kbase_core_mask *const new_mask) +{ + u64 new_core_mask = new_mask->new_core_mask; + u64 shader_present = kbdev->gpu_props.shader_present; + + lockdep_assert_held(&kbdev->pm.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); + + if ((new_core_mask & shader_present) != new_core_mask) { + dev_err(kbdev->dev, + "Invalid core mask 0x%llX: Includes non-existent cores (present = 0x%llX)", + new_core_mask, shader_present); + return -EINVAL; + + } else if (!(new_core_mask & shader_present & kbdev->pm.backend.ca_cores_enabled)) { + dev_err(kbdev->dev, + "Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX)", + new_core_mask, kbdev->gpu_props.shader_present, + kbdev->pm.backend.ca_cores_enabled); + return -EINVAL; + } + + + if (kbdev->pm.debug_core_mask != new_core_mask) + kbase_pm_set_debug_core_mask(kbdev, new_core_mask); + + return 0; +} +#else +struct kbase_core_mask { + u64 new_core_mask[BASE_JM_MAX_NR_SLOTS]; +}; + +static int core_mask_parse(struct kbase_device *const kbdev, const char *const buf, + struct kbase_core_mask *const mask) +{ + int items; + + items = sscanf(buf, "%llx %llx %llx", &mask->new_core_mask[0], &mask->new_core_mask[1], + &mask->new_core_mask[2]); + + if (items != 1 && items != BASE_JM_MAX_NR_SLOTS) { + dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" + "Use format \n" + "or \n"); + return -EINVAL; + } + + /* If only one value was provided, set all other core masks equal to the value. */ + if (items == 1) { + size_t i; + + for (i = 1; i < BASE_JM_MAX_NR_SLOTS; i++) + mask->new_core_mask[i] = mask->new_core_mask[0]; + } + + return 0; +} + +static int core_mask_set(struct kbase_device *kbdev, struct kbase_core_mask *const new_mask) +{ + u64 shader_present = kbdev->gpu_props.shader_present; + u64 group_core_mask = kbdev->gpu_props.coherency_info.group.core_mask; + u64 *new_core_mask = &new_mask->new_core_mask[0]; + size_t i; + + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) { + if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { + dev_err(kbdev->dev, + "Invalid core mask 0x%llX for JS %zu: Includes non-existent cores (present = 0x%llX)", + new_core_mask[i], i, shader_present); + return -EINVAL; + + } else if (!(new_core_mask[i] & shader_present & + kbdev->pm.backend.ca_cores_enabled)) { + dev_err(kbdev->dev, + "Invalid core mask 0x%llX for JS %zu: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX)", + new_core_mask[i], i, kbdev->gpu_props.shader_present, + kbdev->pm.backend.ca_cores_enabled); + return -EINVAL; + } else if (!(new_core_mask[i] & group_core_mask)) { + dev_err(kbdev->dev, + "Invalid core mask 0x%llX for JS %zu: No intersection with group 0 core mask 0x%llX", + new_core_mask[i], i, group_core_mask); + return -EINVAL; + } else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) { + dev_err(kbdev->dev, + "Invalid core mask 0x%llX for JS %zu: No intersection with current core mask 0x%llX", + new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present); + return -EINVAL; + } + } + + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; i++) { + if (kbdev->pm.debug_core_mask[i] != new_core_mask[i]) { + kbase_pm_set_debug_core_mask(kbdev, new_core_mask, BASE_JM_MAX_NR_SLOTS); + break; + } + } + + return 0; +} + +#endif + /** * core_mask_store - Store callback for the core_mask sysfs file. * @@ -2594,18 +2536,10 @@ static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr size_t count) { struct kbase_device *kbdev; -#if MALI_USE_CSF - u64 new_core_mask; -#else - u64 new_core_mask[3]; - u64 group_core_mask; - int i; -#endif /* MALI_USE_CSF */ + struct kbase_core_mask core_mask = {}; - int items; - ssize_t err = (ssize_t)count; + int err; unsigned long flags; - u64 shader_present; CSTD_UNUSED(attr); @@ -2614,102 +2548,22 @@ static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr if (!kbdev) return -ENODEV; -#if MALI_USE_CSF - items = sscanf(buf, "%llx", &new_core_mask); - - if (items != 1) { - dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" - "Use format \n"); - err = -EINVAL; - goto end; - } -#else - items = sscanf(buf, "%llx %llx %llx", &new_core_mask[0], &new_core_mask[1], - &new_core_mask[2]); - - if (items != 1 && items != 3) { - dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" - "Use format \n" - "or \n"); - err = -EINVAL; - goto end; - } - - if (items == 1) - new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; -#endif + err = core_mask_parse(kbdev, buf, &core_mask); + if (err) + return err; mutex_lock(&kbdev->pm.lock); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - shader_present = kbdev->gpu_props.shader_present; + err = core_mask_set(kbdev, &core_mask); -#if MALI_USE_CSF - if ((new_core_mask & shader_present) != new_core_mask) { - dev_err(dev, - "Invalid core mask 0x%llX: Includes non-existent cores (present = 0x%llX)", - new_core_mask, shader_present); - err = -EINVAL; - goto unlock; - - } else if (!(new_core_mask & shader_present & kbdev->pm.backend.ca_cores_enabled)) { - dev_err(dev, - "Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", - new_core_mask, kbdev->gpu_props.shader_present, - kbdev->pm.backend.ca_cores_enabled); - err = -EINVAL; - goto unlock; - } - - if (kbdev->pm.debug_core_mask != new_core_mask) - kbase_pm_set_debug_core_mask(kbdev, new_core_mask); -#else - group_core_mask = kbdev->gpu_props.coherency_info.group.core_mask; - - for (i = 0; i < 3; ++i) { - if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { - dev_err(dev, - "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)", - new_core_mask[i], i, shader_present); - err = -EINVAL; - goto unlock; - - } else if (!(new_core_mask[i] & shader_present & - kbdev->pm.backend.ca_cores_enabled)) { - dev_err(dev, - "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", - new_core_mask[i], i, kbdev->gpu_props.shader_present, - kbdev->pm.backend.ca_cores_enabled); - err = -EINVAL; - goto unlock; - } else if (!(new_core_mask[i] & group_core_mask)) { - dev_err(dev, - "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n", - new_core_mask[i], i, group_core_mask); - err = -EINVAL; - goto unlock; - } else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) { - dev_err(dev, - "Invalid core mask 0x%llX for JS %d: No intersection with current core mask 0x%llX\n", - new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present); - err = -EINVAL; - goto unlock; - } - } - - if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || - kbdev->pm.debug_core_mask[1] != new_core_mask[1] || - kbdev->pm.debug_core_mask[2] != new_core_mask[2]) { - kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], new_core_mask[1], - new_core_mask[2]); - } -#endif /* MALI_USE_CSF */ - -unlock: spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->pm.lock); -end: - return err; + + if (err) + return err; + + return count; } /* @@ -3478,12 +3332,8 @@ int kbase_pm_gpu_freq_init(struct kbase_device *kbdev) /* convert found frequency to KHz */ found_freq /= 1000; - /* If lowest frequency in OPP table is still higher - * than the reference, then keep the reference frequency - * as the one to use for scaling . - */ - if (found_freq < lowest_freq_khz) - lowest_freq_khz = found_freq; + /* always use the lowest freqency from opp table */ + lowest_freq_khz = found_freq; } #else dev_err(kbdev->dev, "No operating-points-v2 node or operating-points property in DT"); @@ -4466,7 +4316,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev) goto out_region; } - kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size); + kbdev->reg = mali_ioremap(kbdev->reg_start, kbdev->reg_size); if (!kbdev->reg) { dev_err(kbdev->dev, "Can't remap register window\n"); err = -EINVAL; @@ -4484,7 +4334,7 @@ out_region: static void kbase_common_reg_unmap(struct kbase_device *const kbdev) { if (kbdev->reg) { - iounmap(kbdev->reg); + mali_iounmap(kbdev->reg); release_mem_region(kbdev->reg_start, kbdev->reg_size); kbdev->reg = NULL; kbdev->reg_start = 0; @@ -4536,7 +4386,6 @@ void registers_unmap(struct kbase_device *kbdev) } #if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) - static bool kbase_is_pm_enabled(const struct device_node *gpu_node) { const struct device_node *power_model_node; @@ -4562,17 +4411,6 @@ static bool kbase_is_pm_enabled(const struct device_node *gpu_node) return is_pm_enable; } -static bool kbase_is_pv_enabled(const struct device_node *gpu_node) -{ - const void *arbiter_if_node; - - arbiter_if_node = of_get_property(gpu_node, "arbiter-if", NULL); - if (!arbiter_if_node) - arbiter_if_node = of_get_property(gpu_node, "arbiter_if", NULL); - - return arbiter_if_node ? true : false; -} - static bool kbase_is_full_coherency_enabled(const struct device_node *gpu_node) { const void *coherency_dts; @@ -4586,71 +4424,62 @@ static bool kbase_is_full_coherency_enabled(const struct device_node *gpu_node) } return false; } +#endif /* defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) */ -#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ - -int kbase_device_pm_init(struct kbase_device *kbdev) +int kbase_device_backend_init(struct kbase_device *kbdev) { int err = 0; #if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) - u32 product_model; + /* + * Attempt to initialize arbitration. + * If the platform is not suitable for arbitration, return -EPERM. + * The device initialization should not fail but kbase will + * not support arbitration. + */ + if (kbase_is_pm_enabled(kbdev->dev->of_node)) { + /* Arbitration AND power management invalid */ + dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n"); + return -EPERM; + } - if (kbase_is_pv_enabled(kbdev->dev->of_node)) { - dev_info(kbdev->dev, "Arbitration interface enabled\n"); - if (kbase_is_pm_enabled(kbdev->dev->of_node)) { - /* Arbitration AND power management invalid */ - dev_err(kbdev->dev, - "Invalid combination of arbitration AND power management\n"); - return -EPERM; - } - if (kbase_is_full_coherency_enabled(kbdev->dev->of_node)) { - /* Arbitration AND full coherency invalid */ - dev_err(kbdev->dev, - "Invalid combination of arbitration AND full coherency\n"); - return -EPERM; - } - err = kbase_arbiter_pm_early_init(kbdev); - if (err == 0) { - /* Check if Arbitration is running on - * supported GPU platform - */ - kbase_pm_register_access_enable(kbdev); + if (kbase_is_full_coherency_enabled(kbdev->dev->of_node)) { + /* Arbitration AND full coherency invalid */ + dev_err(kbdev->dev, "Invalid combination of arbitration AND full coherency\n"); + return -EPERM; + } + + err = kbase_arbiter_pm_early_init(kbdev); + if (err == 0) { +#if !MALI_USE_CSF + u32 product_model; + + /* + * Attempt to obtain and parse gpu_id in the event an external AW module + * is used for messaging. We should have access to GPU at this point. + */ + if (kbdev->gpu_props.gpu_id.arch_major == 0) kbase_gpuprops_parse_gpu_id(&kbdev->gpu_props.gpu_id, kbase_reg_get_gpu_id(kbdev)); - kbase_pm_register_access_disable(kbdev); - product_model = kbdev->gpu_props.gpu_id.product_model; - if (product_model != GPU_ID_PRODUCT_TGOX && - product_model != GPU_ID_PRODUCT_TNOX && - product_model != GPU_ID_PRODUCT_TBAX) { - kbase_arbiter_pm_early_term(kbdev); - dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); - return -EPERM; - } + product_model = kbdev->gpu_props.gpu_id.product_model; + if (product_model != GPU_ID_PRODUCT_TGOX && product_model != GPU_ID_PRODUCT_TNOX && + product_model != GPU_ID_PRODUCT_TBAX) { + kbase_arbiter_pm_early_term(kbdev); + dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); + return -EPERM; } - } else { - kbdev->arb.arb_if = NULL; - kbdev->arb.arb_dev = NULL; - err = power_control_init(kbdev); +#endif /* !MALI_USE_CSF */ + dev_info(kbdev->dev, "Arbitration interface enabled\n"); } -#else - err = power_control_init(kbdev); -#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ +#endif /* defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) */ return err; } -void kbase_device_pm_term(struct kbase_device *kbdev) +void kbase_device_backend_term(struct kbase_device *kbdev) { #ifdef CONFIG_MALI_ARBITER_SUPPORT -#if IS_ENABLED(CONFIG_OF) - if (kbase_is_pv_enabled(kbdev->dev->of_node)) - kbase_arbiter_pm_early_term(kbdev); - else - power_control_term(kbdev); -#endif /* CONFIG_OF */ -#else - power_control_term(kbdev); + kbase_arbiter_pm_early_term(kbdev); #endif } @@ -5064,6 +4893,7 @@ static struct dentry *init_debugfs(struct kbase_device *kbdev) return dentry; } + dentry = debugfs_ctx_defaults_init(kbdev); if (IS_ERR_OR_NULL(dentry)) return dentry; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c index dd8f8ff6fe79..48469cdcc34e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -237,7 +237,11 @@ static int debug_mem_open(struct inode *i, struct file *file) int ret; enum kbase_memory_zone idx; - if (!kbase_file_inc_fops_count_unless_closed(kctx->kfile)) +#if (KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE) + if (get_file_rcu(kctx->filp) == 0) +#else + if (get_file_rcu(&kctx->filp) == 0) +#endif return -ENOENT; /* Check if file was opened in write mode. GPU memory contents @@ -297,7 +301,7 @@ out: } seq_release(i, file); open_fail: - kbase_file_dec_fops_count(kctx->kfile); + fput(kctx->filp); return ret; } @@ -327,7 +331,7 @@ static int debug_mem_release(struct inode *inode, struct file *file) kfree(mem_data); } - kbase_file_dec_fops_count(kctx->kfile); + fput(kctx->filp); return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h index 13a5c30dcb61..2335e0b8e449 100755 --- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -178,16 +178,11 @@ struct kbase_gpu_metrics { * * @link: Links the object in kbase_device::gpu_metrics::active_list * or kbase_device::gpu_metrics::inactive_list. - * @first_active_start_time: Records the time at which the application first became + * @active_start_time: Records the time at which the application first became * active in the current work period. - * @last_active_start_time: Records the time at which the application last became - * active in the current work period. - * @last_active_end_time: Records the time at which the application last became - * inactive in the current work period. - * @total_active: Tracks the time for which application has been active - * in the current work period. - * @prev_wp_active_end_time: Records the time at which the application last became - * inactive in the previous work period. + * @active_end_time: Records the time at which the application last became + * inactive in the current work period, or the time of the end of + * previous work period if the application remained active. * @aid: Unique identifier for an application. * @kctx_count: Counter to keep a track of the number of Kbase contexts * created for an application. There may be multiple Kbase @@ -195,19 +190,14 @@ struct kbase_gpu_metrics { * metrics context. * @active_cnt: Counter that is updated every time the GPU activity starts * and ends in the current work period for an application. - * @flags: Flags to track the state of GPU metrics context. */ struct kbase_gpu_metrics_ctx { struct list_head link; - u64 first_active_start_time; - u64 last_active_start_time; - u64 last_active_end_time; - u64 total_active; - u64 prev_wp_active_end_time; + u64 active_start_time; + u64 active_end_time; unsigned int aid; unsigned int kctx_count; u8 active_cnt; - u8 flags; }; #endif @@ -555,7 +545,7 @@ struct kbase_mem_pool { u8 group_id; spinlock_t pool_lock; struct list_head page_list; - struct shrinker reclaim; + DEFINE_KBASE_SHRINKER reclaim; atomic_t isolation_in_progress_cnt; struct kbase_mem_pool *next_pool; @@ -847,8 +837,6 @@ struct kbase_mem_migrate { * @as_free: Bitpattern of free/available GPU address spaces. * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask * register used in the handling of Bus & Page faults. - * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are - * supported and used where possible. * @gpu_props: Object containing complete information about the * configuration/properties of GPU HW device in use. * @hw_issues_mask: List of SW workarounds for HW issues @@ -1144,8 +1132,6 @@ struct kbase_device { spinlock_t mmu_mask_change; - bool pagesize_2mb; - struct kbase_gpu_props gpu_props; unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; @@ -1424,9 +1410,6 @@ struct kbase_device { * @KBASE_FILE_COMPLETE: Indicates if the setup for context has * completed, i.e. flags have been set for the * context. - * @KBASE_FILE_DESTROY_CTX: Indicates that destroying of context has begun or - * is complete. This state can only be reached after - * @KBASE_FILE_COMPLETE. * * The driver allows only limited interaction with user-space until setup * is complete. @@ -1436,8 +1419,7 @@ enum kbase_file_state { KBASE_FILE_VSN_IN_PROGRESS, KBASE_FILE_NEED_CTX, KBASE_FILE_CTX_IN_PROGRESS, - KBASE_FILE_COMPLETE, - KBASE_FILE_DESTROY_CTX + KBASE_FILE_COMPLETE }; /** @@ -1447,12 +1429,6 @@ enum kbase_file_state { * allocated from the probe method of the Mali driver. * @filp: Pointer to the struct file corresponding to device file * /dev/malixx instance, passed to the file's open method. - * @owner: Pointer to the file table structure of a process that - * created the instance of /dev/malixx device file. Set to - * NULL when that process closes the file instance. No more - * file operations would be allowed once set to NULL. - * It would be updated only in the Userspace context, i.e. - * when @kbase_open or @kbase_flush is called. * @kctx: Object representing an entity, among which GPU is * scheduled and which gets its own GPU address space. * Invalid until @setup_state is KBASE_FILE_COMPLETE. @@ -1461,44 +1437,13 @@ enum kbase_file_state { * @setup_state is KBASE_FILE_NEED_CTX. * @setup_state: Initialization state of the file. Values come from * the kbase_file_state enumeration. - * @destroy_kctx_work: Work item for destroying the @kctx, enqueued only when - * @fops_count and @map_count becomes zero after /dev/malixx - * file was previously closed by the @owner. - * @lock: Lock to serialize the access to members like @owner, @fops_count, - * @map_count. - * @fops_count: Counter that is incremented at the beginning of a method - * defined for @kbase_fops and is decremented at the end. - * So the counter keeps a track of the file operations in progress - * for /dev/malixx file, that are being handled by the Kbase. - * The counter is needed to defer the context termination as - * Userspace can close the /dev/malixx file and flush() method - * can get called when some other file operation is in progress. - * @map_count: Counter to keep a track of the memory mappings present on - * /dev/malixx file instance. The counter is needed to defer the - * context termination as Userspace can close the /dev/malixx - * file and flush() method can get called when mappings are still - * present. - * @zero_fops_count_wait: Waitqueue used to wait for the @fops_count to become 0. - * Currently needed only for the "mem_view" debugfs file. - * @event_queue: Wait queue used for blocking the thread, which consumes - * the base_jd_event corresponding to an atom, when there - * are no more posted events. */ struct kbase_file { struct kbase_device *kbdev; struct file *filp; - fl_owner_t owner; struct kbase_context *kctx; unsigned long api_version; atomic_t setup_state; - struct work_struct destroy_kctx_work; - spinlock_t lock; - int fops_count; - int map_count; -#if IS_ENABLED(CONFIG_DEBUG_FS) - wait_queue_head_t zero_fops_count_wait; -#endif - wait_queue_head_t event_queue; }; #if MALI_JIT_PRESSURE_LIMIT_BASE /** @@ -1680,8 +1625,8 @@ struct kbase_sub_alloc { /** * struct kbase_context - Kernel base context * - * @kfile: Pointer to the object representing the /dev/malixx device - * file instance. + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. * @kbdev: Pointer to the Kbase device for which the context is created. * @kctx_list_link: Node into Kbase device list of contexts. * @mmu: Structure holding details of the MMU tables for this @@ -1734,6 +1679,9 @@ struct kbase_sub_alloc { * used in conjunction with @cookies bitmask mainly for * providing a mechansim to have the same value for CPU & * GPU virtual address. + * @event_queue: Wait queue used for blocking the thread, which consumes + * the base_jd_event corresponding to an atom, when there + * are no more posted events. * @tgid: Thread group ID of the process whose thread created * the context (by calling KBASE_IOCTL_VERSION_CHECK or * KBASE_IOCTL_SET_FLAGS, depending on the @api_version). @@ -1945,7 +1893,7 @@ struct kbase_sub_alloc { * is made on the device file. */ struct kbase_context { - struct kbase_file *kfile; + struct file *filp; struct kbase_device *kbdev; struct list_head kctx_list_link; struct kbase_mmu_table mmu; @@ -1997,6 +1945,7 @@ struct kbase_context { DECLARE_BITMAP(cookies, BITS_PER_LONG); struct kbase_va_region *pending_regions[BITS_PER_LONG]; + wait_queue_head_t event_queue; pid_t tgid; pid_t pid; atomic_t prioritized; @@ -2006,7 +1955,8 @@ struct kbase_context { struct kbase_mem_pool_group mem_pools; - struct shrinker reclaim; + DEFINE_KBASE_SHRINKER reclaim; + struct list_head evict_list; atomic_t evict_nents; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h index 06690d4f17bb..d45a0fec4104 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h @@ -35,8 +35,37 @@ #include #if MALI_USE_CSF +/* Number of digits needed to express the max value of given unsigned type. + * + * Details: The number of digits needed to express the max value of given type is log10(t_max) + 1 + * sizeof(t) == log2(t_max)/8 + * log10(t_max) == log2(t_max) / log2(10) + * log2(t_max) == sizeof(type) * 8 + * 1/log2(10) is approx (1233 >> 12) + * Hence, number of digits for given type == log10(t_max) + 1 == sizeof(type) * 8 * (1233 >> 12) + 1 + */ +#define MAX_DIGITS_FOR_UNSIGNED_TYPE(t) ((((sizeof(t) * BITS_PER_BYTE) * 1233) >> 12) + 1) + +/* Number of digits needed to express the max value of given signed type, + * including the sign character, + */ +#define MAX_DIGITS_FOR_SIGNED_TYPE(t) (MAX_DIGITS_FOR_UNSIGNED_TYPE(t) + 1) + +/* Max number of characters for id member of kbase_device struct. */ +#define MAX_KBDEV_ID_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u32) +/* Max number of characters for tgid member of kbase_context struct. */ +#define MAX_KCTX_TGID_LEN MAX_DIGITS_FOR_SIGNED_TYPE(pid_t) +/* Max number of characters for id member of kbase_context struct. */ +#define MAX_KCTX_ID_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u32) +/* Max number of characters for fence_context member of kbase_kcpu_command_queue struct. */ +#define MAX_KCTX_QUEUE_FENCE_CTX_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u64) +/* Max number of characters for timeline name fixed format, including null character. */ +#define FIXED_FORMAT_LEN (9) + /* Maximum number of characters in DMA fence timeline name. */ -#define MAX_TIMELINE_NAME (32) +#define MAX_TIMELINE_NAME \ + (MAX_KBDEV_ID_LEN + MAX_KCTX_TGID_LEN + MAX_KCTX_ID_LEN + MAX_KCTX_QUEUE_FENCE_CTX_LEN + \ + FIXED_FORMAT_LEN) /** * struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c index 3a5b97db7c04..8a2e13f03683 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c @@ -29,46 +29,12 @@ #include #include -/** - * enum gpu_metrics_ctx_flags - Flags for the GPU metrics context - * - * @ACTIVE_INTERVAL_IN_WP: Flag set when the application first becomes active in - * the current work period. - * - * @INSIDE_ACTIVE_LIST: Flag to track if object is in kbase_device::gpu_metrics::active_list - * - * All members need to be separate bits. This enum is intended for use in a - * bitmask where multiple values get OR-ed together. - */ -enum gpu_metrics_ctx_flags { - ACTIVE_INTERVAL_IN_WP = 1 << 0, - INSIDE_ACTIVE_LIST = 1 << 1, -}; - static unsigned long gpu_metrics_tp_emit_interval_ns = DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS; module_param(gpu_metrics_tp_emit_interval_ns, ulong, 0444); MODULE_PARM_DESC(gpu_metrics_tp_emit_interval_ns, "Time interval in nano seconds at which GPU metrics tracepoints are emitted"); -static inline bool gpu_metrics_ctx_flag(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, - enum gpu_metrics_ctx_flags flag) -{ - return (gpu_metrics_ctx->flags & flag); -} - -static inline void gpu_metrics_ctx_flag_set(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, - enum gpu_metrics_ctx_flags flag) -{ - gpu_metrics_ctx->flags |= flag; -} - -static inline void gpu_metrics_ctx_flag_clear(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, - enum gpu_metrics_ctx_flags flag) -{ - gpu_metrics_ctx->flags &= ~flag; -} - static inline void validate_tracepoint_data(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u64 start_time, u64 end_time, u64 total_active) { @@ -82,43 +48,30 @@ static inline void validate_tracepoint_data(struct kbase_gpu_metrics_ctx *gpu_me WARN(total_active > (end_time - start_time), "total_active %llu > end_time %llu - start_time %llu for aid %u active_cnt %u", total_active, end_time, start_time, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); - - WARN(gpu_metrics_ctx->prev_wp_active_end_time > start_time, - "prev_wp_active_end_time %llu > start_time %llu for aid %u active_cnt %u", - gpu_metrics_ctx->prev_wp_active_end_time, start_time, gpu_metrics_ctx->aid, - gpu_metrics_ctx->active_cnt); #endif } static void emit_tracepoint_for_active_gpu_metrics_ctx( struct kbase_device *kbdev, struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u64 current_time) { - const u64 start_time = gpu_metrics_ctx->first_active_start_time; - u64 total_active = gpu_metrics_ctx->total_active; - u64 end_time; + const u64 start_time = gpu_metrics_ctx->active_start_time; + u64 total_active, end_time = current_time; /* Check if the GPU activity is currently ongoing */ if (gpu_metrics_ctx->active_cnt) { /* The following check is to handle the race on CSF GPUs that can happen between * the draining of trace buffer and FW emitting the ACT=1 event . */ - if (unlikely(current_time == gpu_metrics_ctx->last_active_start_time)) - current_time++; - end_time = current_time; - total_active += end_time - gpu_metrics_ctx->last_active_start_time; - - gpu_metrics_ctx->first_active_start_time = current_time; - gpu_metrics_ctx->last_active_start_time = current_time; - } else { - end_time = gpu_metrics_ctx->last_active_end_time; - gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP); + if (unlikely(end_time == start_time)) + end_time++; + gpu_metrics_ctx->active_start_time = end_time; } + total_active = end_time - start_time; trace_gpu_work_period(kbdev->id, gpu_metrics_ctx->aid, start_time, end_time, total_active); validate_tracepoint_data(gpu_metrics_ctx, start_time, end_time, total_active); - gpu_metrics_ctx->prev_wp_active_end_time = end_time; - gpu_metrics_ctx->total_active = 0; + gpu_metrics_ctx->active_end_time = end_time; } void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev, @@ -131,7 +84,8 @@ void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev, if (gpu_metrics_ctx->kctx_count) return; - if (gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) + /* Generate a tracepoint if there's still activity */ + if (gpu_metrics_ctx->active_cnt) emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ktime_get_raw_ns()); @@ -166,12 +120,11 @@ struct kbase_gpu_metrics_ctx *kbase_gpu_metrics_ctx_get(struct kbase_device *kbd void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev, struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, unsigned int aid) { + gpu_metrics_ctx->active_start_time = 0; + gpu_metrics_ctx->active_end_time = 0; gpu_metrics_ctx->aid = aid; - gpu_metrics_ctx->total_active = 0; gpu_metrics_ctx->kctx_count = 1; gpu_metrics_ctx->active_cnt = 0; - gpu_metrics_ctx->prev_wp_active_end_time = 0; - gpu_metrics_ctx->flags = 0; list_add_tail(&gpu_metrics_ctx->link, &kbdev->gpu_metrics.inactive_list); } @@ -180,17 +133,9 @@ void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timest struct kbase_gpu_metrics_ctx *gpu_metrics_ctx = kctx->gpu_metrics_ctx; gpu_metrics_ctx->active_cnt++; - if (gpu_metrics_ctx->active_cnt == 1) - gpu_metrics_ctx->last_active_start_time = timestamp_ns; - - if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) { - gpu_metrics_ctx->first_active_start_time = timestamp_ns; - gpu_metrics_ctx_flag_set(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP); - } - - if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)) { + if (gpu_metrics_ctx->active_cnt == 1) { + gpu_metrics_ctx->active_start_time = timestamp_ns; list_move_tail(&gpu_metrics_ctx->link, &kctx->kbdev->gpu_metrics.active_list); - gpu_metrics_ctx_flag_set(gpu_metrics_ctx, INSIDE_ACTIVE_LIST); } } @@ -201,22 +146,22 @@ void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestam if (WARN_ON_ONCE(!gpu_metrics_ctx->active_cnt)) return; + /* Do not emit tracepoint if GPU activity still continues. */ if (--gpu_metrics_ctx->active_cnt) return; - if (likely(timestamp_ns > gpu_metrics_ctx->last_active_start_time)) { - gpu_metrics_ctx->last_active_end_time = timestamp_ns; - gpu_metrics_ctx->total_active += - timestamp_ns - gpu_metrics_ctx->last_active_start_time; + if (likely(timestamp_ns > gpu_metrics_ctx->active_start_time)) { + emit_tracepoint_for_active_gpu_metrics_ctx(kctx->kbdev, gpu_metrics_ctx, + timestamp_ns); return; } /* Due to conversion from system timestamp to CPU timestamp (which involves rounding) * the value for start and end timestamp could come as same on CSF GPUs. */ - if (timestamp_ns == gpu_metrics_ctx->last_active_start_time) { - gpu_metrics_ctx->last_active_end_time = timestamp_ns + 1; - gpu_metrics_ctx->total_active += 1; + if (timestamp_ns == gpu_metrics_ctx->active_start_time) { + emit_tracepoint_for_active_gpu_metrics_ctx(kctx->kbdev, gpu_metrics_ctx, + timestamp_ns + 1); return; } @@ -224,12 +169,9 @@ void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestam * visible to the Kbase even though the system timestamp value sampled by FW was less than * the system timestamp value sampled by Kbase just before the draining of trace buffer. */ - if (gpu_metrics_ctx->last_active_start_time == gpu_metrics_ctx->first_active_start_time && - gpu_metrics_ctx->prev_wp_active_end_time == gpu_metrics_ctx->first_active_start_time) { - WARN_ON_ONCE(gpu_metrics_ctx->total_active); - gpu_metrics_ctx->last_active_end_time = - gpu_metrics_ctx->prev_wp_active_end_time + 1; - gpu_metrics_ctx->total_active = 1; + if (gpu_metrics_ctx->active_end_time == gpu_metrics_ctx->active_start_time) { + emit_tracepoint_for_active_gpu_metrics_ctx(kctx->kbdev, gpu_metrics_ctx, + gpu_metrics_ctx->active_end_time + 1); return; } @@ -242,15 +184,12 @@ void kbase_gpu_metrics_emit_tracepoint(struct kbase_device *kbdev, u64 ts) struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, *tmp; list_for_each_entry_safe(gpu_metrics_ctx, tmp, &gpu_metrics->active_list, link) { - if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) { - WARN_ON(!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)); - WARN_ON(gpu_metrics_ctx->active_cnt); - list_move_tail(&gpu_metrics_ctx->link, &gpu_metrics->inactive_list); - gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, INSIDE_ACTIVE_LIST); + if (gpu_metrics_ctx->active_cnt) { + emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ts); continue; } - emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ts); + list_move_tail(&gpu_metrics_ctx->link, &gpu_metrics->inactive_list); } } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h index c445dff32dc9..658cf1c164c5 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h @@ -106,7 +106,7 @@ void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev, * @kctx: Pointer to the Kbase context contributing data to the GPU metrics context. * @timestamp_ns: CPU timestamp at which the GPU activity started. * - * The provided timestamp would be later used as the "start_time_ns" for the + * The provided timestamp is used as the "start_time_ns" for the * power/gpu_work_period tracepoint if this is the first GPU activity for the GPU * metrics context in the current work period. * @@ -122,9 +122,9 @@ void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timest * @kctx: Pointer to the Kbase context contributing data to the GPU metrics context. * @timestamp_ns: CPU timestamp at which the GPU activity ended. * - * The provided timestamp would be later used as the "end_time_ns" for the - * power/gpu_work_period tracepoint if this is the last GPU activity for the GPU - * metrics context in the current work period. + * The provided timestamp is used as the "end_time_ns" for the power/gpu_work_period + * tracepoint if this is the last GPU activity for the GPU metrics context + * in the current work period. * * Note: The caller must appropriately serialize the call to this function with the * call to other GPU metrics functions declared in this file. @@ -138,8 +138,8 @@ void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestam * @kbdev: Pointer to the GPU device. * @ts: Timestamp at which the tracepoint is being emitted. * - * This function would loop through all the active GPU metrics contexts and emit a - * power/gpu_work_period tracepoint for them. + * This function would loop through all GPU metrics contexts in the active list and + * emit a power/gpu_work_period tracepoint if the GPU work in the context still active. * The GPU metrics context that is found to be inactive since the last tracepoint * was emitted would be moved to the inactive list. * The current work period would be considered as over and a new work period would diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c index 10b3b506e84e..25ee8c1042a2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c @@ -357,6 +357,7 @@ enum l2_config_override_result { /** * kbase_read_l2_config_from_dt - Read L2 configuration * @kbdev: The kbase device for which to get the L2 configuration. + * @regdump: Pointer to struct kbase_gpuprops_regdump structure. * * Check for L2 configuration overrides in module parameters and device tree. * Override values in module parameters take priority over override values in @@ -366,9 +367,16 @@ enum l2_config_override_result { * overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided. * L2_CONFIG_OVERRIDE_FAIL otherwise. */ -static enum l2_config_override_result kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) +static enum l2_config_override_result +kbase_read_l2_config_from_dt(struct kbase_device *const kbdev, + struct kbasep_gpuprops_regdump *regdump) { struct device_node *np = kbdev->dev->of_node; + /* + * CACHE_SIZE bit fields in L2_FEATURES register, default value after the reset/powerup + * holds the maximum size of the cache that can be programmed in L2_CONFIG register. + */ + const u8 l2_size_max = L2_FEATURES_CACHE_SIZE_GET(regdump->l2_features); if (!np) return L2_CONFIG_OVERRIDE_NONE; @@ -378,8 +386,12 @@ static enum l2_config_override_result kbase_read_l2_config_from_dt(struct kbase_ else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override)) kbdev->l2_size_override = 0; - if (kbdev->l2_size_override != 0 && kbdev->l2_size_override < OVERRIDE_L2_SIZE_MIN_LOG2) + if (kbdev->l2_size_override != 0 && (kbdev->l2_size_override < OVERRIDE_L2_SIZE_MIN_LOG2 || + kbdev->l2_size_override > l2_size_max)) { + dev_err(kbdev->dev, "Invalid Cache Size in %s", + override_l2_size ? "Module parameters" : "Device tree node"); return L2_CONFIG_OVERRIDE_FAIL; + } /* Check overriding value is supported, if not will result in * undefined behavior. @@ -429,7 +441,7 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) struct kbasep_gpuprops_regdump *regdump = &PRIV_DATA_REGDUMP(kbdev); /* Check for L2 cache size & hash overrides */ - switch (kbase_read_l2_config_from_dt(kbdev)) { + switch (kbase_read_l2_config_from_dt(kbdev, regdump)) { case L2_CONFIG_OVERRIDE_FAIL: err = -EIO; goto exit; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c index c92d54c9e663..5e59bf60aa38 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,9 +30,10 @@ #include static inline void kbase_gpu_gwt_setup_page_permission(struct kbase_context *kctx, - unsigned long flag, struct rb_node *node) + unsigned long flag, + struct kbase_reg_zone *zone) { - struct rb_node *rbnode = node; + struct rb_node *rbnode = rb_first(&zone->reg_rbtree); while (rbnode) { struct kbase_va_region *reg; @@ -55,17 +56,15 @@ static inline void kbase_gpu_gwt_setup_page_permission(struct kbase_context *kct static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, unsigned long flag) { - kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&kctx->reg_zone[SAME_VA_ZONE].reg_rbtree)); - kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&kctx->reg_zone[CUSTOM_VA_ZONE].reg_rbtree)); + kbase_gpu_gwt_setup_page_permission(kctx, flag, &kctx->reg_zone[SAME_VA_ZONE]); + kbase_gpu_gwt_setup_page_permission(kctx, flag, &kctx->reg_zone[CUSTOM_VA_ZONE]); } int kbase_gpu_gwt_start(struct kbase_context *kctx) { - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); if (kctx->gwt_enabled) { - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return -EBUSY; } @@ -91,7 +90,7 @@ int kbase_gpu_gwt_start(struct kbase_context *kctx) kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c index 7d4200e96fd3..1fde75b996c4 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -225,6 +225,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(struct kbase_dev { GPU_ID_PRODUCT_TVAX, { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 5), base_hw_issues_tVAx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 1, 0), base_hw_issues_tVAx_r0p1 }, { U32_MAX, NULL } } }, { GPU_ID_PRODUCT_TTUX, @@ -334,6 +336,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(struct kbase_dev gpu_id->version_id = fallback_version; } } + + return issues; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h index 7a0ea49099ba..982547d16022 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h @@ -129,14 +129,14 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) * kbase_pm_set_debug_core_mask - Set the debug core mask. * * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @new_core_mask_js0: The core mask to use for job slot 0 - * @new_core_mask_js1: The core mask to use for job slot 1 - * @new_core_mask_js2: The core mask to use for job slot 2 + * @new_core_mask: The core mask to use, as an array where each element refers + * to a job slot. + * @new_core_mask_size: Number of elements in the core mask array. * * This determines which cores the power manager is allowed to use. */ -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0, - u64 new_core_mask_js1, u64 new_core_mask_js2); +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 *new_core_mask, + size_t new_core_mask_size); #endif /* MALI_USE_CSF */ /** diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h index 0630dfa6db3a..222ff2001e56 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2018-2021, 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2018-2021, 2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,8 @@ * * @multiplier: Numerator of the converter's fraction. * @divisor: Denominator of the converter's fraction. - * @offset: Converter's offset term. + * @gpu_timestamp_offset: Cached CPU to GPU TS offset computed whenever whole system + * enters into standby mode where CPU Monotonic time is suspend. * @device_scaled_timeouts: Timeouts in milliseconds that were scaled to be * consistent with the minimum MCU frequency. This * array caches the results of all of the conversions @@ -55,7 +56,7 @@ struct kbase_backend_time { #if MALI_USE_CSF u64 multiplier; u64 divisor; - s64 offset; + s64 gpu_timestamp_offset; #endif unsigned int device_scaled_timeouts[KBASE_TIMEOUT_SELECTOR_COUNT]; }; @@ -70,6 +71,40 @@ struct kbase_backend_time { * Return: The CPU timestamp. */ u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts); + +/** + * kbase_backend_update_gpu_timestamp_offset() - Updates GPU timestamp offset register with the + * cached value. + * + * @kbdev: Kbase device pointer + * + * Compute the new cached value for GPU timestamp offset if the previously cached value has been + * invalidated and update the GPU timestamp offset register with the cached value. + */ +void kbase_backend_update_gpu_timestamp_offset(struct kbase_device *kbdev); + +/** + * kbase_backend_invalidate_gpu_timestamp_offset() - Invalidate cached GPU timestamp offset value + * + * @kbdev: Kbase device pointer + * + * This function invalidates cached GPU timestamp offset value whenever system suspend + * is about to happen where CPU TS counter will be stopped. + */ +void kbase_backend_invalidate_gpu_timestamp_offset(struct kbase_device *kbdev); + +#if MALI_UNIT_TEST +/** + * kbase_backend_read_gpu_timestamp_offset_reg() - Read GPU TIMESTAMP OFFSET Register + * + * @kbdev: Kbase device pointer + * + * This function read GPU TIMESTAMP OFFSET Register with proper register access + * + * Return: GPU TIMESTAMP OFFSET Register value, as unsigned 64 bit value + */ +u64 kbase_backend_read_gpu_timestamp_offset_reg(struct kbase_device *kbdev); +#endif #endif /** diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c index 55c1f4be25d5..3e6b6b5eb066 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -161,7 +161,7 @@ static inline int gpu_metrics_ctx_init(struct kbase_context *kctx) put_cred(cred); /* Return early if this is not a Userspace created context */ - if (unlikely(!kctx->kfile)) + if (unlikely(!kctx->filp)) return 0; /* Serialize against the other threads trying to create/destroy Kbase contexts. */ @@ -200,7 +200,7 @@ static inline void gpu_metrics_ctx_term(struct kbase_context *kctx) unsigned long flags; /* Return early if this is not a Userspace created context */ - if (unlikely(!kctx->kfile)) + if (unlikely(!kctx->filp)) return; /* Serialize against the other threads trying to create/destroy Kbase contexts. */ @@ -2615,7 +2615,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) * * Remove all post dependencies of an atom from the context ringbuffers. * - * The original atom's event_code will be propogated to all dependent atoms. + * The original atom's event_code will be propagated to all dependent atoms. * * Context: Caller must hold the HW access lock */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_linux.h index 9195be347e2b..cb55d4b417c4 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_linux.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_linux.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,8 +35,13 @@ #if IS_ENABLED(MALI_KERNEL_TEST_API) #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) +/* Note: due to the 2-layer macro translation, using the NULL _etype does not + * compile, and one workaround is to use ERRNO_NULL instead. + */ +#define KBASE_ALLOW_ERROR_INJECTION_TEST_API(func, etype) ALLOW_ERROR_INJECTION(func, etype) #else #define KBASE_EXPORT_TEST_API(func) +#define KBASE_ALLOW_ERROR_INJECTION_TEST_API(func, etype) #endif #define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c index ddf6ea352e72..7470a94a44ad 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,6 +46,9 @@ #include #include +/* Static key used to determine if large pages are enabled or not */ +static DEFINE_STATIC_KEY_FALSE(large_pages_static_key); + #define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-" #define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1) @@ -143,20 +146,20 @@ MODULE_PARM_DESC(large_page_conf, "User override for large page usage on support static void kbasep_mem_page_size_init(struct kbase_device *kbdev) { if (!IS_ENABLED(CONFIG_LARGE_PAGE_SUPPORT)) { - kbdev->pagesize_2mb = false; dev_info(kbdev->dev, "Large page support was disabled at compile-time!"); return; } switch (large_page_conf) { case LARGE_PAGE_AUTO: { - kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC); + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC)) + static_branch_inc(&large_pages_static_key); dev_info(kbdev->dev, "Large page allocation set to %s after hardware feature check", - kbdev->pagesize_2mb ? "true" : "false"); + static_branch_unlikely(&large_pages_static_key) ? "true" : "false"); break; } case LARGE_PAGE_ON: { - kbdev->pagesize_2mb = true; + static_branch_inc(&large_pages_static_key); if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC)) dev_warn(kbdev->dev, "Enabling large page allocations on unsupporting GPU!"); @@ -165,12 +168,10 @@ static void kbasep_mem_page_size_init(struct kbase_device *kbdev) break; } case LARGE_PAGE_OFF: { - kbdev->pagesize_2mb = false; dev_info(kbdev->dev, "Large page allocation override: turned off\n"); break; } default: { - kbdev->pagesize_2mb = false; dev_info(kbdev->dev, "Invalid large page override, turning off large pages\n"); break; } @@ -180,12 +181,18 @@ static void kbasep_mem_page_size_init(struct kbase_device *kbdev) * so that userspace could read it to figure out the state of the configuration * if necessary. */ - if (kbdev->pagesize_2mb) + if (static_branch_unlikely(&large_pages_static_key)) large_page_conf = LARGE_PAGE_ON; else large_page_conf = LARGE_PAGE_OFF; } +inline bool kbase_is_large_pages_enabled(void) +{ + return static_branch_unlikely(&large_pages_static_key); +} +KBASE_EXPORT_TEST_API(kbase_is_large_pages_enabled); + int kbase_mem_init(struct kbase_device *kbdev) { int err = 0; @@ -524,15 +531,20 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) switch (alloc->imported.user_buf.state) { case KBASE_USER_BUF_STATE_GPU_MAPPED: { alloc->imported.user_buf.current_mapping_usage_count = 0; - kbase_user_buf_from_gpu_mapped_to_empty(kctx, reg); + kbase_mem_phy_alloc_ref_read(alloc) ? + kbase_user_buf_from_gpu_mapped_to_pinned(kctx, reg) : + kbase_user_buf_from_gpu_mapped_to_empty(kctx, reg); break; } case KBASE_USER_BUF_STATE_DMA_MAPPED: { - kbase_user_buf_from_dma_mapped_to_empty(kctx, reg); + kbase_mem_phy_alloc_ref_read(alloc) ? + kbase_user_buf_from_dma_mapped_to_pinned(kctx, reg) : + kbase_user_buf_from_dma_mapped_to_empty(kctx, reg); break; } case KBASE_USER_BUF_STATE_PINNED: { - kbase_user_buf_from_pinned_to_empty(kctx, reg); + if (!kbase_mem_phy_alloc_ref_read(alloc)) + kbase_user_buf_from_pinned_to_empty(kctx, reg); break; } case KBASE_USER_BUF_STATE_EMPTY: { @@ -672,7 +684,9 @@ void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr t_cpu_pa, dma_addr_t dma_addr; WARN_ON(!cpu_page); - WARN_ON((size_t)offset + size > PAGE_SIZE); + + if ((size_t)offset + size > PAGE_SIZE) + dev_warn(kctx->kbdev->dev, "Size and offset exceed page size"); dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + (dma_addr_t)offset; @@ -713,19 +727,105 @@ void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr t_cpu_pa, } } +static int kbase_get_sync_scope_params(struct kbase_context *kctx, unsigned long start, size_t size, + u64 *page_off, u64 *page_cnt, u64 *offset) +{ + u64 tmp_off; + struct kbase_cpu_mapping *map = + kbasep_find_enclosing_cpu_mapping(kctx, start, size, &tmp_off); + + if (!map) { + dev_dbg(kctx->kbdev->dev, "%s: Can't find CPU mapping 0x%016lX", __func__, start); + return -EINVAL; + } + + *page_off = tmp_off >> PAGE_SHIFT; + tmp_off &= ~PAGE_MASK; + *page_cnt = (size + tmp_off + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + *offset = tmp_off; + + return 0; +} + +static int kbase_sync_imported_user_buf(struct kbase_context *kctx, struct kbase_va_region *reg, + struct basep_syncset *sset, enum kbase_sync_type sync_fn) +{ + unsigned long start = (uintptr_t)sset->user_addr; + size_t size = (size_t)sset->size; + dma_addr_t *dma_addr = reg->gpu_alloc->imported.user_buf.dma_addrs; + u64 page_off = 0, page_count = 0, offset = 0; + u64 i; + size_t sz; + int err; + + lockdep_assert_held(&kctx->reg_lock); + + if (sync_fn != KBASE_SYNC_TO_CPU && sync_fn != KBASE_SYNC_TO_DEVICE) { + dev_dbg(kctx->kbdev->dev, "%s: Unknown kbase sync_fn type!", __func__); + return -EINVAL; + } + + /* Early return if the imported user_buffer is not yet mapped to GPU */ + if (reg->gpu_alloc->imported.user_buf.state != KBASE_USER_BUF_STATE_GPU_MAPPED) + return -EINVAL; + + err = kbase_get_sync_scope_params(kctx, start, size, &page_off, &page_count, &offset); + if (err) + return err; + + /* Check the sync is inside the imported range */ + if ((page_off >= reg->gpu_alloc->nents) || + ((page_off + page_count) > reg->gpu_alloc->nents)) + return -EINVAL; + + dma_addr = reg->gpu_alloc->imported.user_buf.dma_addrs; + /* Sync first page */ + sz = MIN(((size_t)PAGE_SIZE - offset), size); + if (sync_fn == KBASE_SYNC_TO_CPU) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr[page_off] + offset, sz, + DMA_BIDIRECTIONAL); + else + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr[page_off] + offset, sz, + DMA_BIDIRECTIONAL); + + /* Calculate the size for last page */ + sz = ((start + size - 1) & ~PAGE_MASK) + 1; + + /* Sync middle pages (if any) */ + for (i = 1; page_count > 2 && i < page_count - 1; i++) { + if (sync_fn == KBASE_SYNC_TO_CPU) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr[page_off + i], PAGE_SIZE, + DMA_BIDIRECTIONAL); + else + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr[page_off + i], + PAGE_SIZE, DMA_BIDIRECTIONAL); + } + + /* Sync last page (if any) */ + if (page_count > 1) { + i = page_off + page_count - 1; + if (sync_fn == KBASE_SYNC_TO_CPU) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr[i], sz, + DMA_BIDIRECTIONAL); + else + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr[i], sz, + DMA_BIDIRECTIONAL); + } + + return 0; +} + static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *sset, enum kbase_sync_type sync_fn) { int err = 0; struct kbase_va_region *reg; - struct kbase_cpu_mapping *map; unsigned long start; size_t size; struct tagged_addr *cpu_pa; struct tagged_addr *gpu_pa; - u64 page_off, page_count; + u64 page_off = 0, page_count = 0, offset = 0; u64 i; - u64 offset; size_t sz; kbase_os_mem_map_lock(kctx); @@ -748,7 +848,10 @@ static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *ss * memory may be cached. */ if (kbase_mem_is_imported(reg->gpu_alloc->type)) { - err = kbase_mem_do_sync_imported(kctx, reg, sync_fn); + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) + err = kbase_sync_imported_user_buf(kctx, reg, sset, sync_fn); + else + err = kbase_sync_imported_umm(kctx, reg, sync_fn); goto out_unlock; } @@ -758,17 +861,10 @@ static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *ss start = (uintptr_t)sset->user_addr; size = (size_t)sset->size; - map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset); - if (!map) { - dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", - start, sset->mem_handle.basep.handle); - err = -EINVAL; + err = kbase_get_sync_scope_params(kctx, start, size, &page_off, &page_count, &offset); + if (err) goto out_unlock; - } - page_off = offset >> PAGE_SHIFT; - offset &= ~PAGE_MASK; - page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT; cpu_pa = kbase_get_cpu_phy_pages(reg); gpu_pa = kbase_get_gpu_phy_pages(reg); @@ -777,7 +873,6 @@ static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *ss err = -EINVAL; goto out_unlock; } - if (page_off >= reg->gpu_alloc->nents) { /* Start of sync range is outside the physically backed region * so nothing to do @@ -942,7 +1037,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) __func__); return -EINVAL; } - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); if (gpu_addr >= BASE_MEM_COOKIE_BASE && gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { unsigned int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); @@ -981,7 +1076,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) } out_unlock: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return err; } @@ -1156,7 +1251,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa /* Check if we have enough pages requested so we can allocate a large * page (512 * 4KB = 2MB ) */ - if (kbdev->pagesize_2mb && nr_left >= NUM_PAGES_IN_2MB_LARGE_PAGE) { + if (kbase_is_large_pages_enabled() && nr_left >= NUM_PAGES_IN_2MB_LARGE_PAGE) { size_t nr_lp = nr_left / NUM_PAGES_IN_2MB_LARGE_PAGE; res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id], @@ -1307,6 +1402,7 @@ alloc_failed: invalid_request: return -ENOMEM; } +KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages_helper); static size_t free_partial_locked(struct kbase_context *kctx, struct kbase_mem_pool *pool, struct tagged_addr tp) @@ -1363,7 +1459,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_all kctx = alloc->imported.native.kctx; kbdev = kctx->kbdev; - if (!kbdev->pagesize_2mb) + if (!kbase_is_large_pages_enabled()) WARN_ON(pool->order); if (alloc->reg) { @@ -1386,7 +1482,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_all tp = alloc->pages + alloc->nents; new_pages = tp; - if (kbdev->pagesize_2mb && pool->order) { + if (kbase_is_large_pages_enabled() && pool->order) { size_t nr_lp = nr_left / NUM_PAGES_IN_2MB_LARGE_PAGE; res = kbase_mem_pool_alloc_pages_locked(pool, nr_lp * NUM_PAGES_IN_2MB_LARGE_PAGE, @@ -1503,7 +1599,7 @@ alloc_failed: struct tagged_addr *start_free = alloc->pages + alloc->nents; - if (kbdev->pagesize_2mb && pool->order) { + if (kbase_is_large_pages_enabled() && pool->order) { while (nr_pages_to_free) { if (is_huge_head(*start_free)) { kbase_mem_pool_free_pages_locked( @@ -1659,6 +1755,7 @@ int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pag return 0; } +KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper); void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, struct tagged_addr *pages, @@ -2156,17 +2253,31 @@ void kbase_gpu_vm_lock(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(kctx != NULL); mutex_lock(&kctx->reg_lock); } - KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); +void kbase_gpu_vm_lock_with_pmode_sync(struct kbase_context *kctx) +{ +#if MALI_USE_CSF + down_read(&kctx->kbdev->csf.pmode_sync_sem); +#endif + kbase_gpu_vm_lock(kctx); +} + void kbase_gpu_vm_unlock(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx != NULL); mutex_unlock(&kctx->reg_lock); } - KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); +void kbase_gpu_vm_unlock_with_pmode_sync(struct kbase_context *kctx) +{ + kbase_gpu_vm_unlock(kctx); +#if MALI_USE_CSF + up_read(&kctx->kbdev->csf.pmode_sync_sem); +#endif +} + #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_jit_debugfs_data { int (*func)(struct kbase_jit_debugfs_data *data); @@ -2708,7 +2819,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo delta = info->commit_pages - reg->gpu_alloc->nents; pages_required = delta; - if (kctx->kbdev->pagesize_2mb && pages_required >= NUM_PAGES_IN_2MB_LARGE_PAGE) { + if (kbase_is_large_pages_enabled() && pages_required >= NUM_PAGES_IN_2MB_LARGE_PAGE) { pool = &kctx->mem_pools.large[kctx->jit_group_id]; /* Round up to number of 2 MB pages required */ pages_required += (NUM_PAGES_IN_2MB_LARGE_PAGE - 1); @@ -2746,10 +2857,10 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo kbase_mem_pool_lock(pool); } - if (reg->gpu_alloc->nents > info->commit_pages) { + if (reg->gpu_alloc->nents >= info->commit_pages) { kbase_mem_pool_unlock(pool); spin_unlock(&kctx->mem_partials_lock); - dev_warn( + dev_info( kctx->kbdev->dev, "JIT alloc grown beyond the required number of initially required pages, this grow no longer needed."); goto done; @@ -2999,7 +3110,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) return NULL; - if (kctx->kbdev->pagesize_2mb) { + if (kbase_is_large_pages_enabled()) { /* Preallocate memory for the sub-allocation structs */ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); @@ -3008,7 +3119,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, } } - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); mutex_lock(&kctx->jit_evict_lock); /* @@ -3086,7 +3197,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, kbase_jit_done_phys_increase(kctx, needed_pages); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); if (ret) { /* @@ -3147,7 +3258,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ mutex_unlock(&kctx->jit_evict_lock); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension, &flags, &gpu_addr, mmu_sync_info); @@ -3249,9 +3360,9 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) u64 delta = old_pages - new_size; if (delta) { - mutex_lock(&kctx->reg_lock); + kbase_gpu_vm_lock_with_pmode_sync(kctx); kbase_mem_shrink(kctx, reg, old_pages - delta); - mutex_unlock(&kctx->reg_lock); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); } } @@ -3356,8 +3467,7 @@ void kbase_jit_term(struct kbase_context *kctx) struct kbase_va_region *walker; /* Free all allocations for this context */ - - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); mutex_lock(&kctx->jit_evict_lock); /* Free all allocations from the pool */ while (!list_empty(&kctx->jit_pool_head)) { @@ -3398,7 +3508,7 @@ void kbase_jit_term(struct kbase_context *kctx) WARN_ON(kctx->jit_phys_pages_to_be_allocated); #endif mutex_unlock(&kctx->jit_evict_lock); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); /* * Flush the freeing of allocations whose backing has been freed @@ -3916,9 +4026,6 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { user_buf_original_state = reg->gpu_alloc->imported.user_buf.state; - if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && (!reg->gpu_alloc->nents)) - return -EINVAL; - /* This function is reachable through many code paths, and the imported * memory handle could be in any of the possible states: consider all * of them as a valid starting point, and progress through all stages @@ -3928,19 +4035,31 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi * Error recovery restores the original state and goes no further. */ switch (user_buf_original_state) { - case KBASE_USER_BUF_STATE_EMPTY: - case KBASE_USER_BUF_STATE_PINNED: + case KBASE_USER_BUF_STATE_EMPTY: { + if (reg->gpu_alloc->imported.user_buf.mm != locked_mm) + return -EINVAL; + err = kbase_user_buf_from_empty_to_gpu_mapped(kctx, reg); + break; + } + case KBASE_USER_BUF_STATE_PINNED: { + if (!reg->gpu_alloc->nents) + return -EINVAL; + err = kbase_user_buf_from_pinned_to_gpu_mapped(kctx, reg); + break; + } case KBASE_USER_BUF_STATE_DMA_MAPPED: { - if (user_buf_original_state == KBASE_USER_BUF_STATE_EMPTY) - err = kbase_user_buf_from_empty_to_gpu_mapped(kctx, reg); - else if (user_buf_original_state == KBASE_USER_BUF_STATE_PINNED) - err = kbase_user_buf_from_pinned_to_gpu_mapped(kctx, reg); - else - err = kbase_user_buf_from_dma_mapped_to_gpu_mapped(kctx, reg); - - if (err) - return err; - + /* If the imported handle has not pinned any physical pages yet: + * this function can only be called within the context of a user + * process, which must be the same process as the one that + * originally created the memory handle. + * + * In all other transitions: make sure that the imported handle + * has already pinned physical pages before proceeding to mapping + * operations. + */ + if (!reg->gpu_alloc->nents) + return -EINVAL; + err = kbase_user_buf_from_dma_mapped_to_gpu_mapped(kctx, reg); break; } case KBASE_USER_BUF_STATE_GPU_MAPPED: { @@ -3954,6 +4073,8 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi reg->gpu_alloc->imported.user_buf.state); return -EINVAL; } + if (err) + return err; /* If the state was valid and the transition is happening, then the handle * must be in GPU_MAPPED state now and the reference counter of GPU mappings @@ -4021,13 +4142,8 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r kbase_va_region_alloc_put(kctx, reg); } -static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg) -{ - return reg->start_pfn << PAGE_SHIFT; -} - -struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(struct kbase_context *kctx, - u64 gpu_addr) +struct kbase_ctx_ext_res_meta * +kbase_sticky_resource_acquire(struct kbase_context *kctx, u64 gpu_addr, struct mm_struct *locked_mm) { struct kbase_ctx_ext_res_meta *meta = NULL; struct kbase_ctx_ext_res_meta *walker; @@ -4066,7 +4182,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(struct kbase_contex /* Map the external resource to the GPU allocation of the region * and acquire the reference to the VA region */ - if (kbase_map_external_resource(kctx, meta->reg, NULL)) + if (kbase_map_external_resource(kctx, meta->reg, locked_mm)) goto fail_map; meta->ref = 1; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h index e4a7d6bd0a30..19b50f5bf08b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -589,6 +589,11 @@ int kbase_mem_init(struct kbase_device *kbdev); void kbase_mem_halt(struct kbase_device *kbdev); void kbase_mem_term(struct kbase_device *kbdev); +static inline unsigned int kbase_mem_phy_alloc_ref_read(struct kbase_mem_phy_alloc *alloc) +{ + return kref_read(&alloc->kref); +} + static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc) { kref_get(&alloc->kref); @@ -1408,12 +1413,30 @@ int kbase_update_region_flags(struct kbase_context *kctx, struct kbase_va_region */ void kbase_gpu_vm_lock(struct kbase_context *kctx); +/** + * kbase_gpu_vm_lock_with_pmode_sync() - Wrapper of kbase_gpu_vm_lock. + * @kctx: KBase context + * + * Same as kbase_gpu_vm_lock for JM GPU. + * Additionally acquire P.mode read-write semaphore for CSF GPU. + */ +void kbase_gpu_vm_lock_with_pmode_sync(struct kbase_context *kctx); + /** * kbase_gpu_vm_unlock() - Release the per-context region list lock * @kctx: KBase context */ void kbase_gpu_vm_unlock(struct kbase_context *kctx); +/** + * kbase_gpu_vm_unlock_with_pmode_sync() - Wrapper of kbase_gpu_vm_unlock. + * @kctx: KBase context + * + * Same as kbase_gpu_vm_unlock for JM GPU. + * Additionally release P.mode read-write semaphore for CSF GPU. + */ +void kbase_gpu_vm_unlock_with_pmode_sync(struct kbase_context *kctx); + int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); /** @@ -1651,7 +1674,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa * * @prealloc_sa: Information about the partial allocation if the amount of memory requested * is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be - * allocated by the caller if kbdev->pagesize_2mb is enabled. + * allocated by the caller if large pages are enabled. * * Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new * pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be @@ -1679,9 +1702,9 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa * This ensures that the pool can be grown to the required size and that the allocation can * complete without another thread using the newly grown pages. * - * If kbdev->pagesize_2mb is enabled and the allocation is >= 2MB, then @pool must be one of the - * pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the - * mempools from alloc->imported.native.kctx->mem_pools.small[]. + * If large (2MiB) pages are enabled and the allocation is >= 2MiB, then @pool + * must be one of the pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it + * must be one of the mempools from alloc->imported.native.kctx->mem_pools.small[]. * * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be pre-allocated because we * must not sleep (due to the usage of kmalloc()) whilst holding pool->pool_lock. @prealloc_sa @@ -2070,7 +2093,8 @@ bool kbase_has_exec_va_zone(struct kbase_context *kctx); * kbase_map_external_resource - Map an external resource to the GPU. * @kctx: kbase context. * @reg: External resource to map. - * @locked_mm: The mm_struct which has been locked for this operation. + * @locked_mm: The mm_struct which has been locked for this operation, + * or NULL if none is available. * * On successful mapping, the VA region and the gpu_alloc refcounts will be * increased, making it safe to use and store both values directly. @@ -2335,12 +2359,15 @@ int kbase_sticky_resource_init(struct kbase_context *kctx); * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. * @kctx: kbase context. * @gpu_addr: The GPU address of the external resource. + * @locked_mm: The mm_struct which has been locked for this operation, + * or NULL if none is available. * * Return: The metadata object which represents the binding between the * external resource and the kbase context on success or NULL on failure. */ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(struct kbase_context *kctx, - u64 gpu_addr); + u64 gpu_addr, + struct mm_struct *locked_mm); /** * kbase_sticky_resource_release - Release a reference on a sticky resource. @@ -2494,19 +2521,19 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, struct kbase_va_region *reg struct kbase_mem_phy_alloc *alloc); /** - * kbase_mem_do_sync_imported - Sync caches for imported memory + * kbase_sync_imported_umm - Sync caches for imported UMM memory * @kctx: Pointer to the kbase context * @reg: Pointer to the region with imported memory to sync * @sync_fn: The type of sync operation to perform * - * Sync CPU caches for supported (currently only dma-buf (UMM)) memory. + * Sync CPU caches for supported dma-buf (UMM) memory. * Attempting to sync unsupported imported memory types will result in an error * code, -EINVAL. * * Return: 0 on success, or a negative error code. */ -int kbase_mem_do_sync_imported(struct kbase_context *kctx, struct kbase_va_region *reg, - enum kbase_sync_type sync_fn); +int kbase_sync_imported_umm(struct kbase_context *kctx, struct kbase_va_region *reg, + enum kbase_sync_type sync_fn); /** * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to @@ -2595,4 +2622,7 @@ static inline base_mem_alloc_flags kbase_mem_group_id_set(int id) { return BASE_MEM_GROUP_ID_SET(id); } + +bool kbase_is_large_pages_enabled(void); + #endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c index 9a30001634f1..7c319abf381b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,6 +46,7 @@ #include #include #include +#include #if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \ (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) @@ -433,7 +434,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } reg->initial_commit = commit_pages; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) { /* Permanent kernel mappings must happen as soon as @@ -443,7 +444,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages */ int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages, commit_pages); if (err < 0) { - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); goto no_kern_mapping; } } @@ -455,7 +456,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages /* Bind to a cookie */ if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { dev_err(dev, "No cookies available for allocation!"); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); goto no_cookie; } /* return a cookie */ @@ -472,7 +473,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } else /* we control the VA */ { size_t align = 1; - if (kctx->kbdev->pagesize_2mb) { + if (kbase_is_large_pages_enabled()) { /* If there's enough (> 33 bits) of GPU VA space, align to 2MB * boundaries. The similar condition is used for mapping from * the SAME_VA zone inside kbase_context_get_unmapped_area(). @@ -490,7 +491,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align, mmu_sync_info) != 0) { dev_warn(dev, "Failed to map memory on GPU"); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); goto no_mmap; } /* return real GPU VA */ @@ -508,7 +509,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); #if MALI_USE_CSF if (*flags & BASE_MEM_FIXABLE) @@ -596,8 +597,10 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co *out |= BASE_MEM_COHERENT_SYSTEM; if (KBASE_REG_SHARE_IN & reg->flags) *out |= BASE_MEM_COHERENT_LOCAL; - if (KBASE_REG_DONT_NEED & reg->flags) - *out |= BASE_MEM_DONT_NEED; + if (mali_kbase_supports_mem_dont_need(kctx->api_version)) { + if (KBASE_REG_DONT_NEED & reg->flags) + *out |= BASE_MEM_DONT_NEED; + } if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) { /* Prior to this version, this was known about by * user-side but we did not return them. Returning @@ -634,9 +637,30 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co else *out |= BASE_MEM_FIXABLE; } -#endif +#endif /* MALI_USE_CSF */ if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags) *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE; + if (mali_kbase_supports_mem_import_sync_on_map_unmap(kctx->api_version)) { + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + if (reg->gpu_alloc->imported.umm.need_sync) + *out |= BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP; + } + } + if (mali_kbase_supports_mem_kernel_sync(kctx->api_version)) { + if (unlikely(reg->cpu_alloc != reg->gpu_alloc)) + *out |= BASE_MEM_KERNEL_SYNC; + } + if (mali_kbase_supports_mem_same_va(kctx->api_version)) { + if (kbase_bits_to_zone(reg->flags) == SAME_VA_ZONE) { + /* Imported memory is an edge case, where declaring it SAME_VA + * would be ambiguous. + */ + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM && + reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF) { + *out |= BASE_MEM_SAME_VA; + } + } + } *out |= kbase_mem_group_id_set(reg->cpu_alloc->group_id); @@ -667,7 +691,9 @@ out_unlock: static unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) { - struct kbase_context *kctx = container_of(s, struct kbase_context, reclaim); + struct kbase_context *kctx = + KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_context, reclaim); + int evict_nents = atomic_read(&kctx->evict_nents); unsigned long nr_freeable_items; @@ -717,8 +743,15 @@ static unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s struct kbase_mem_phy_alloc *tmp; unsigned long freed = 0; - kctx = container_of(s, struct kbase_context, reclaim); + kctx = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_context, reclaim); +#if MALI_USE_CSF + if (!down_read_trylock(&kctx->kbdev->csf.pmode_sync_sem)) { + dev_warn(kctx->kbdev->dev, + "Can't shrink GPU memory when P.Mode entrance is in progress"); + return 0; + } +#endif mutex_lock(&kctx->jit_evict_lock); list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { @@ -757,32 +790,36 @@ static unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s } mutex_unlock(&kctx->jit_evict_lock); - +#if MALI_USE_CSF + up_read(&kctx->kbdev->csf.pmode_sync_sem); +#endif return freed; } int kbase_mem_evictable_init(struct kbase_context *kctx) { + struct shrinker *reclaim; + INIT_LIST_HEAD(&kctx->evict_list); mutex_init(&kctx->jit_evict_lock); - kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; - kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; - kctx->reclaim.seeks = DEFAULT_SEEKS; - /* Kernel versions prior to 3.1 : - * struct shrinker does not define batch - */ -#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE - register_shrinker(&kctx->reclaim); -#else - register_shrinker(&kctx->reclaim, "mali-mem"); -#endif + reclaim = KBASE_INIT_RECLAIM(kctx, reclaim, "mali-mem"); + if (!reclaim) + return -ENOMEM; + KBASE_SET_RECLAIM(kctx, reclaim, reclaim); + + reclaim->count_objects = kbase_mem_evictable_reclaim_count_objects; + reclaim->scan_objects = kbase_mem_evictable_reclaim_scan_objects; + reclaim->seeks = DEFAULT_SEEKS; + + KBASE_REGISTER_SHRINKER(reclaim, "mali-mem", kctx); + return 0; } void kbase_mem_evictable_deinit(struct kbase_context *kctx) { - unregister_shrinker(&kctx->reclaim); + KBASE_UNREGISTER_SHRINKER(kctx->reclaim); } /** @@ -1058,7 +1095,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in /* Lock down the context, and find the region */ down_write(kbase_mem_get_process_mmap_lock()); - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); @@ -1110,7 +1147,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in } out_unlock: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); up_write(kbase_mem_get_process_mmap_lock()); return ret; @@ -1118,8 +1155,8 @@ out_unlock: #define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS) -int kbase_mem_do_sync_imported(struct kbase_context *kctx, struct kbase_va_region *reg, - enum kbase_sync_type sync_fn) +int kbase_sync_imported_umm(struct kbase_context *kctx, struct kbase_va_region *reg, + enum kbase_sync_type sync_fn) { int ret = -EINVAL; struct dma_buf __maybe_unused *dma_buf; @@ -1317,7 +1354,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg) if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || alloc->imported.umm.need_sync) { if (!kbase_is_region_invalid_or_free(reg)) { - err = kbase_mem_do_sync_imported(kctx, reg, KBASE_SYNC_TO_DEVICE); + err = kbase_sync_imported_umm(kctx, reg, KBASE_SYNC_TO_DEVICE); WARN_ON_ONCE(err); } } @@ -1379,7 +1416,7 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, struct kbase_va_region *reg if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || alloc->imported.umm.need_sync) { if (!kbase_is_region_invalid_or_free(reg)) { - int err = kbase_mem_do_sync_imported(kctx, reg, KBASE_SYNC_TO_CPU); + int err = kbase_sync_imported_umm(kctx, reg, KBASE_SYNC_TO_CPU); WARN_ON_ONCE(err); } } @@ -1794,7 +1831,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nent if (!reg->gpu_alloc->imported.alias.aliased) goto no_aliased_array; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* validate and add src handles */ for (i = 0; i < nents; i++) { @@ -1904,7 +1941,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nent reg->flags &= ~KBASE_REG_FREE; reg->flags &= ~KBASE_REG_GROWABLE; - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return gpu_va; @@ -1915,7 +1952,7 @@ bad_handle: * them is handled by putting reg's allocs, so no rollback of those * actions is done here. */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); no_aliased_array: invalid_flags: kbase_mem_phy_alloc_put(reg->cpu_alloc); @@ -2016,7 +2053,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, if (!reg) goto no_reg; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* mmap needed to setup VA? */ if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) { @@ -2050,13 +2087,13 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, /* clear out private flags */ *flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return 0; no_gpu_va: no_cookie: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); kfree(reg); @@ -2096,7 +2133,7 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, struct kbase_va_re /* Nothing to do */ return; - unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping, + unmap_mapping_range(kctx->filp->f_inode->i_mapping, (loff_t)(gpu_va_start + new_pages) << PAGE_SHIFT, (loff_t)(old_pages - new_pages) << PAGE_SHIFT, 1); } @@ -2142,7 +2179,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) } down_write(kbase_mem_get_process_mmap_lock()); - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); @@ -2250,7 +2287,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) } out_unlock: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); if (read_locked) up_read(kbase_mem_get_process_mmap_lock()); else @@ -2274,11 +2311,16 @@ int kbase_mem_shrink(struct kbase_context *const kctx, struct kbase_va_region *c return -EINVAL; old_pages = kbase_reg_current_backed_size(reg); - if (WARN_ON(old_pages < new_pages)) + if (old_pages < new_pages) { + dev_warn( + kctx->kbdev->dev, + "Requested number of pages (%llu) is larger than the current number of pages (%llu)", + new_pages, old_pages); return -EINVAL; + } delta = old_pages - new_pages; - if (kctx->kbdev->pagesize_2mb) { + if (kbase_is_large_pages_enabled()) { struct tagged_addr *start_free = reg->gpu_alloc->pages + new_pages; /* Move the end of new commited range to a valid location. @@ -2332,7 +2374,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) KBASE_DEBUG_ASSERT(map->kctx); KBASE_DEBUG_ASSERT(map->alloc); - kbase_gpu_vm_lock(map->kctx); + kbase_gpu_vm_lock_with_pmode_sync(map->kctx); if (map->free_on_close) { KBASE_DEBUG_ASSERT(kbase_bits_to_zone(map->region->flags) == SAME_VA_ZONE); @@ -2346,10 +2388,9 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) list_del(&map->mappings_list); kbase_va_region_alloc_put(map->kctx, map->region); - kbase_gpu_vm_unlock(map->kctx); + kbase_gpu_vm_unlock_with_pmode_sync(map->kctx); kbase_mem_phy_alloc_put(map->alloc); - kbase_file_dec_cpu_mapping_count(map->kctx->kfile); kfree(map); } @@ -2549,7 +2590,6 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, struct kbase_va_region *re map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; list_add(&map->mappings_list, &map->alloc->mappings); - kbase_file_inc_cpu_mapping_count(kctx->kfile); out: return err; @@ -2749,7 +2789,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct * goto out; } - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) { /* The non-mapped tracking helper page */ @@ -2784,11 +2824,11 @@ int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct * #endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ #if MALI_USE_CSF case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE): - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); err = kbase_csf_cpu_mmap_user_reg_page(kctx, vma); goto out; case PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE)... PFN_DOWN(BASE_MEM_COOKIE_BASE) - 1: { - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); mutex_lock(&kctx->csf.lock); err = kbase_csf_cpu_mmap_user_io_pages(kctx, vma); mutex_unlock(&kctx->csf.lock); @@ -2882,7 +2922,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct * } #endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ out_unlock: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); out: if (err) dev_err(dev, "mmap failed %d\n", err); @@ -3276,25 +3316,6 @@ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) #endif } -static void kbase_special_vm_open(struct vm_area_struct *vma) -{ - struct kbase_context *kctx = vma->vm_private_data; - - kbase_file_inc_cpu_mapping_count(kctx->kfile); -} - -static void kbase_special_vm_close(struct vm_area_struct *vma) -{ - struct kbase_context *kctx = vma->vm_private_data; - - kbase_file_dec_cpu_mapping_count(kctx->kfile); -} - -static const struct vm_operations_struct kbase_vm_special_ops = { - .open = kbase_special_vm_open, - .close = kbase_special_vm_close, -}; - static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) { if (vma_pages(vma) != 1) @@ -3303,10 +3324,7 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_ /* no real access */ vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; - vma->vm_ops = &kbase_vm_special_ops; - vma->vm_private_data = kctx; - kbase_file_inc_cpu_mapping_count(kctx->kfile); return 0; } @@ -3367,7 +3385,6 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) struct kbase_device *kbdev; int err; bool reset_prevented = false; - struct kbase_file *kfile; if (!queue) { pr_debug("Close method called for the new User IO pages mapping vma\n"); @@ -3376,7 +3393,6 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) kctx = queue->kctx; kbdev = kctx->kbdev; - kfile = kctx->kfile; err = kbase_reset_gpu_prevent_and_wait(kbdev); if (err) @@ -3394,9 +3410,8 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) if (reset_prevented) kbase_reset_gpu_allow(kbdev); - kbase_file_dec_cpu_mapping_count(kfile); /* Now as the vma is closed, drop the reference on mali device file */ - fput(kfile->filp); + fput(kctx->filp); } #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) @@ -3546,7 +3561,6 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct v /* Also adjust the vm_pgoff */ vma->vm_pgoff = queue->db_file_offset; - kbase_file_inc_cpu_mapping_count(kctx->kfile); return 0; map_failed: @@ -3586,7 +3600,6 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) { struct kbase_context *kctx = vma->vm_private_data; struct kbase_device *kbdev; - struct kbase_file *kfile; if (unlikely(!kctx)) { pr_debug("Close function called for the unexpected mapping"); @@ -3594,7 +3607,6 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) } kbdev = kctx->kbdev; - kfile = kctx->kfile; if (unlikely(!kctx->csf.user_reg.vma)) dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d", @@ -3606,9 +3618,8 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) kctx->csf.user_reg.vma = NULL; - kbase_file_dec_cpu_mapping_count(kfile); /* Now as the VMA is closed, drop the reference on mali device file */ - fput(kfile->filp); + fput(kctx->filp); } /** @@ -3738,7 +3749,6 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct v vma->vm_ops = &kbase_csf_user_reg_vm_ops; vma->vm_private_data = kctx; - kbase_file_inc_cpu_mapping_count(kctx->kfile); return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c index 93a07e7db4fa..f0ce0cf4e56c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,6 +28,9 @@ #include #include +/* Static key used to determine if page migration is enabled or not */ +static DEFINE_STATIC_KEY_FALSE(page_migration_static_key); + /* Global integer used to determine if module parameter value has been * provided and if page migration feature is enabled. * Feature is disabled on all platforms by default. @@ -50,15 +53,6 @@ MODULE_PARM_DESC(kbase_page_migration_enabled, KBASE_EXPORT_TEST_API(kbase_page_migration_enabled); -bool kbase_is_page_migration_enabled(void) -{ - /* Handle uninitialised int case */ - if (kbase_page_migration_enabled < 0) - return false; - return IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) && kbase_page_migration_enabled; -} -KBASE_EXPORT_SYMBOL(kbase_is_page_migration_enabled); - #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) static const struct movable_operations movable_ops; #endif @@ -225,7 +219,7 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new * This blocks the CPU page fault handler from remapping pages. * Only MCU's mmut is device wide, i.e. no corresponding kctx. */ - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); ret = kbase_mmu_migrate_page( as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr, @@ -254,7 +248,7 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new dma_unmap_page(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); /* Page fault handler for CPU mapping unblocked. */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return ret; } @@ -293,10 +287,10 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa /* Lock context to protect access to array of pages in physical allocation. * This blocks the CPU page fault handler from remapping pages. */ - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* Unmap the old physical range. */ - unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping, + unmap_mapping_range(kctx->filp->f_inode->i_mapping, (loff_t)(page_md->data.mapped.vpfn / GPU_PAGES_PER_CPU_PAGE) << PAGE_SHIFT, PAGE_SIZE, 1); @@ -332,7 +326,7 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); /* Page fault handler for CPU mapping unblocked. */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return ret; } @@ -685,11 +679,15 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev) * integer for a negative value to see if insmod parameter was * passed in at all (it will override the default negative value). */ - if (kbase_page_migration_enabled < 0) - kbase_page_migration_enabled = kbdev->pagesize_2mb ? 1 : 0; - else + if (kbase_page_migration_enabled < 0) { + if (kbase_is_large_pages_enabled()) + static_branch_inc(&page_migration_static_key); + } else { dev_info(kbdev->dev, "Page migration support explicitly %s at insmod.", kbase_page_migration_enabled ? "enabled" : "disabled"); + if (kbase_page_migration_enabled) + static_branch_inc(&page_migration_static_key); + } spin_lock_init(&mem_migrate->free_pages_lock); INIT_LIST_HEAD(&mem_migrate->free_pages_list); @@ -714,3 +712,9 @@ void kbase_mem_migrate_term(struct kbase_device *kbdev) iput(mem_migrate->inode); #endif } + +bool kbase_is_page_migration_enabled(void) +{ + return static_branch_unlikely(&page_migration_static_key); +} +KBASE_EXPORT_TEST_API(kbase_is_page_migration_enabled); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h index ece8734de792..70c3135a7829 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c index cb862d5b029c..5984730c337c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c @@ -480,7 +480,7 @@ static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, CSTD_UNUSED(sc); - pool = container_of(s, struct kbase_mem_pool, reclaim); + pool = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_mem_pool, reclaim); kbase_mem_pool_lock(pool); if (pool->dont_reclaim && !pool->dying) { @@ -502,7 +502,7 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, struct kbase_mem_pool *pool; unsigned long freed; - pool = container_of(s, struct kbase_mem_pool, reclaim); + pool = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_mem_pool, reclaim); kbase_mem_pool_lock(pool); if (pool->dont_reclaim && !pool->dying) { @@ -528,6 +528,8 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool unsigned int order, int group_id, struct kbase_device *kbdev, struct kbase_mem_pool *next_pool) { + struct shrinker *reclaim; + if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { return -EINVAL; } @@ -544,18 +546,17 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool spin_lock_init(&pool->pool_lock); INIT_LIST_HEAD(&pool->page_list); - pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; - pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; - pool->reclaim.seeks = DEFAULT_SEEKS; - /* Kernel versions prior to 3.1 : - * struct shrinker does not define batch - */ - pool->reclaim.batch = 0; -#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE - register_shrinker(&pool->reclaim); -#else - register_shrinker(&pool->reclaim, "mali-mem-pool"); -#endif + reclaim = KBASE_INIT_RECLAIM(pool, reclaim, "mali-mem-pool"); + if (!reclaim) + return -ENOMEM; + KBASE_SET_RECLAIM(pool, reclaim, reclaim); + + reclaim->count_objects = kbase_mem_pool_reclaim_count_objects; + reclaim->scan_objects = kbase_mem_pool_reclaim_scan_objects; + reclaim->seeks = DEFAULT_SEEKS; + reclaim->batch = 0; + + KBASE_REGISTER_SHRINKER(reclaim, "mali-mem-pool", pool); pool_dbg(pool, "initialized\n"); @@ -581,7 +582,7 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool) pool_dbg(pool, "terminate()\n"); - unregister_shrinker(&pool->reclaim); + KBASE_UNREGISTER_SHRINKER(pool->reclaim); kbase_mem_pool_lock(pool); pool->max_size = 0; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c index 5e3d1eeb6d28..d688509cee03 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c @@ -121,44 +121,20 @@ static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot(struct memory_group_manag return vmf_insert_pfn_prot(vma, addr, pfn, pgprot); } -/** - * kbase_native_mgm_update_gpu_pte - Native method to modify a GPU page table - * entry - * - * @mgm_dev: The memory group manager the request is being made through. - * @group_id: A physical memory group ID, which must be valid but is not used. - * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. - * @mmu_level: The level of the MMU page table where the page is getting mapped. - * @pte: The prepared page table entry. - * - * This function simply returns the @pte without modification. - * - * Return: A GPU page table entry to be stored in a page table. - */ static u64 kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev, unsigned int group_id, int mmu_level, u64 pte) { - CSTD_UNUSED(mgm_dev); - CSTD_UNUSED(group_id); - CSTD_UNUSED(mmu_level); + if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return pte; + + pte |= ((u64)group_id << PTE_PBHA_SHIFT) & PTE_PBHA_MASK; + + /* Address could be translated into a different bus address here */ + pte |= ((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT); return pte; } -/** - * kbase_native_mgm_pte_to_original_pte - Native method to undo changes done in - * kbase_native_mgm_update_gpu_pte() - * - * @mgm_dev: The memory group manager the request is being made through. - * @group_id: A physical memory group ID, which must be valid but is not used. - * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. - * @mmu_level: The level of the MMU page table where the page is getting mapped. - * @pte: The prepared page table entry. - * - * This function simply returns the @pte without modification. - * - * Return: A GPU page table entry to be stored in a page table. - */ static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_device *mgm_dev, unsigned int group_id, int mmu_level, u64 pte) { @@ -166,6 +142,11 @@ static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_devi CSTD_UNUSED(group_id); CSTD_UNUSED(mmu_level); + /* Undo the group ID modification */ + pte &= ~PTE_PBHA_MASK; + /* Undo the bit set */ + pte &= ~((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT); + return pte; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c index 341ea901e2e1..c5b6fada2451 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pbha.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c @@ -277,16 +277,16 @@ static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev, static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, const struct device_node *pbha_node) { - u32 bits = 0; + u8 bits = 0; int err; if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) return 0; - err = of_property_read_u32(pbha_node, "propagate-bits", &bits); + err = of_property_read_u8(pbha_node, "propagate-bits", &bits); if (err == -EINVAL) { - err = of_property_read_u32(pbha_node, "propagate_bits", &bits); + err = of_property_read_u8(pbha_node, "propagate_bits", &bits); } if (err < 0) { diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c index f1d2794dd86a..8ab0d1823165 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,8 +50,8 @@ static int int_id_overrides_show(struct seq_file *sfile, void *data) #endif /* MALI_USE_CSF */ for (j = 0; j < sizeof(u32); ++j) { - u8 r_val; - u8 w_val; + u8 r_val = 0; + u8 w_val = 0; switch (j) { case 0: diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.c b/drivers/gpu/arm/bifrost/mali_kbase_pm.c index ff71524eeaaa..17c34f334aad 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,21 +52,19 @@ void kbase_pm_context_active(struct kbase_device *kbdev) (void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); } -int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, - enum kbase_pm_suspend_handler suspend_handler) +int kbase_pm_context_active_handle_suspend_locked(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler) { int c; KBASE_DEBUG_ASSERT(kbdev != NULL); dev_dbg(kbdev->dev, "%s - reason = %d, pid = %d\n", __func__, suspend_handler, current->pid); - kbase_pm_lock(kbdev); + lockdep_assert_held(&kbdev->pm.lock); #ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler)) { - kbase_pm_unlock(kbdev); + if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler)) return 1; - } #endif /* CONFIG_MALI_ARBITER_SUPPORT */ if (kbase_pm_is_suspending(kbdev)) { @@ -76,7 +74,6 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, break; fallthrough; case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: - kbase_pm_unlock(kbdev); return 1; case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: @@ -100,21 +97,31 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, kbase_clk_rate_trace_manager_gpu_active(kbdev); } - kbase_pm_unlock(kbdev); dev_dbg(kbdev->dev, "%s %d\n", __func__, kbdev->pm.active_count); return 0; } +int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler) +{ + int ret; + + kbase_pm_lock(kbdev); + ret = kbase_pm_context_active_handle_suspend_locked(kbdev, suspend_handler); + kbase_pm_unlock(kbdev); + + return ret; +} + KBASE_EXPORT_TEST_API(kbase_pm_context_active); -void kbase_pm_context_idle(struct kbase_device *kbdev) +void kbase_pm_context_idle_locked(struct kbase_device *kbdev) { int c; KBASE_DEBUG_ASSERT(kbdev != NULL); - - kbase_pm_lock(kbdev); + lockdep_assert_held(&kbdev->pm.lock); c = --kbdev->pm.active_count; KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_IDLE, NULL, (u64)c); @@ -133,10 +140,16 @@ void kbase_pm_context_idle(struct kbase_device *kbdev) wake_up(&kbdev->pm.zero_active_count_wait); } - kbase_pm_unlock(kbdev); dev_dbg(kbdev->dev, "%s %d (pid = %d)\n", __func__, kbdev->pm.active_count, current->pid); } +void kbase_pm_context_idle(struct kbase_device *kbdev) +{ + kbase_pm_lock(kbdev); + kbase_pm_context_idle_locked(kbdev); + kbase_pm_unlock(kbdev); +} + KBASE_EXPORT_TEST_API(kbase_pm_context_idle); static void reenable_hwcnt_on_resume(struct kbase_device *kbdev) @@ -155,7 +168,12 @@ static void reenable_hwcnt_on_resume(struct kbase_device *kbdev) #endif /* Resume HW counters intermediaries. */ - kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx); +#if MALI_USE_CSF + if (kbdev->csf.firmware_inited) +#endif + { + kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx); + } } static void resume_job_scheduling(struct kbase_device *kbdev) @@ -183,7 +201,12 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev) /* Suspend HW counter intermediaries. This blocks until workers and timers * are no longer running. */ - kbase_kinstr_prfcnt_suspend(kbdev->kinstr_prfcnt_ctx); +#if MALI_USE_CSF + if (kbdev->csf.firmware_inited) +#endif + { + kbase_kinstr_prfcnt_suspend(kbdev->kinstr_prfcnt_ctx); + } /* Disable GPU hardware counters. * This call will block until counters are disabled. @@ -200,19 +223,24 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev) mutex_unlock(&kbdev->pm.lock); #ifdef CONFIG_MALI_ARBITER_SUPPORT -#if !MALI_USE_CSF if (kbdev->arb.arb_if) { - unsigned int i; unsigned long flags; +#if MALI_USE_CSF + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_disjoint_state_up(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#else + unsigned int i; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->js_data.runpool_irq.submit_allowed = 0; kbase_disjoint_state_up(kbdev); for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) kbase_job_slot_softstop(kbdev, i, NULL); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#endif } -#endif /* !MALI_USE_CSF */ #endif /* CONFIG_MALI_ARBITER_SUPPORT */ /* From now on, the active count will drop towards zero. Sometimes, @@ -273,6 +301,10 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev) } #endif /* CONFIG_MALI_ARBITER_SUPPORT */ +#if MALI_USE_CSF + kbase_backend_invalidate_gpu_timestamp_offset(kbdev); +#endif + return 0; exit: @@ -338,6 +370,7 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) int kbase_pm_suspend(struct kbase_device *kbdev) { int result = 0; + #ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbdev->arb.arb_if) kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_SUSPEND_EVENT); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_pm.h index 46db4db5ffe0..0c3575bfd54a 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,13 +33,12 @@ struct kbase_device; #define PM_ENABLE_IRQS 0x01 #define PM_HW_ISSUES_DETECT 0x02 -#ifdef CONFIG_MALI_ARBITER_SUPPORT -/* In the case that the GPU was granted by the Arbiter, it will have +/* Case 1: the GPU was granted by the Arbiter, it will have * already been reset. The following flag ensures it is not reset * twice. + * Case 2: GPU already in reset state after power on, then no soft-reset is needed. */ #define PM_NO_RESET 0x04 -#endif /** * kbase_pm_init - Initialize the power management framework. @@ -148,6 +147,18 @@ enum kbase_pm_suspend_handler { int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler); +/** + * kbase_pm_context_active_handle_suspend_locked - Same as kbase_pm_context_active_handle_suspend(), + * except that pm.lock is held by the caller. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @suspend_handler: The handler code for how to handle a suspend that might occur + * + * Return: 0 on success, non-zero othrewise. + */ +int kbase_pm_context_active_handle_suspend_locked(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler); + /** * kbase_pm_context_idle - Decrement the reference count of active contexts. * @@ -159,6 +170,14 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, */ void kbase_pm_context_idle(struct kbase_device *kbdev); +/** + * kbase_pm_context_idle_locked - Same as kbase_pm_context_idle(), except that + * pm.lock is held by the caller. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_context_idle_locked(struct kbase_device *kbdev); + /* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline * function */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c index 0cee2f0e6fd5..c5c16b497369 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1408,7 +1408,7 @@ static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) gpu_addr = ext_res->ext_res[i].ext_resource & ~(__u64)BASE_EXT_RES_ACCESS_EXCLUSIVE; if (map) { - if (!kbase_sticky_resource_acquire(katom->kctx, gpu_addr)) + if (!kbase_sticky_resource_acquire(katom->kctx, gpu_addr, NULL)) goto failed_loop; } else { if (!kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr)) diff --git a/drivers/gpu/arm/bifrost/mmu/Kbuild b/drivers/gpu/arm/bifrost/mmu/Kbuild index 416432397b5c..3c3defdb88e9 100644 --- a/drivers/gpu/arm/bifrost/mmu/Kbuild +++ b/drivers/gpu/arm/bifrost/mmu/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -21,10 +21,15 @@ bifrost_kbase-y += \ mmu/mali_kbase_mmu.o \ mmu/mali_kbase_mmu_hw_direct.o \ + mmu/mali_kbase_mmu_faults_decoder_luts.o \ + mmu/mali_kbase_mmu_faults_decoder.o \ mmu/mali_kbase_mmu_mode_aarch64.o ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) - bifrost_kbase-y += mmu/backend/mali_kbase_mmu_csf.o + bifrost_kbase-y += mmu/backend/mali_kbase_mmu_csf.o \ + mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.o else - bifrost_kbase-y += mmu/backend/mali_kbase_mmu_jm.o + bifrost_kbase-y += mmu/backend/mali_kbase_mmu_jm.o \ + mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.o + endif diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c index df027c727a2c..bd5f3914b8c7 100644 --- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,7 @@ #include #include #include +#include void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup *const setup) { @@ -99,15 +100,38 @@ void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, struct u32 as_no; /* terminal fault, print info about the fault */ - dev_err(kbdev->dev, - "Unexpected Page fault in firmware address space at VA 0x%016llX\n" - "raw fault status: 0x%X\n" - "exception type 0x%X: %s\n" - "access type 0x%X: %s\n" - "source id 0x%X\n", - fault->addr, fault->status, exception_type, - kbase_gpu_exception_name(exception_type), access_type, - kbase_gpu_access_type_name(fault->status), source_id); + if (kbdev->gpu_props.gpu_id.product_model <= GPU_ID_MODEL_MAKE(13, 0)) { + dev_err(kbdev->dev, + "Unexpected Page fault in firmware address space at VA 0x%016llX\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n", + fault->addr, fault->status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(fault->status), source_id, + FAULT_SOURCE_ID_CORE_ID_GET(source_id), + FAULT_SOURCE_ID_UTLB_ID_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, access_type)); + } else { + dev_err(kbdev->dev, + "Unexpected Page fault in firmware address space at VA 0x%016llX\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X (type:idx:IR 0x%X:0x%X:0x%X): %s %u, %s\n", + fault->addr, fault->status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(fault->status), source_id, + FAULT_SOURCE_ID_CORE_TYPE_GET(source_id), + FAULT_SOURCE_ID_CORE_INDEX_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + FAULT_SOURCE_ID_CORE_INDEX_GET(source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, access_type)); + } kbase_debug_csf_fault_notify(kbdev, NULL, DF_GPU_PAGE_FAULT); @@ -139,17 +163,44 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbas const uintptr_t fault_addr = fault->addr; /* terminal fault, print info about the fault */ - dev_err(kbdev->dev, - "GPU bus fault in AS%u at PA %pK\n" - "PA_VALID: %s\n" - "raw fault status: 0x%X\n" - "exception type 0x%X: %s\n" - "access type 0x%X: %s\n" - "source id 0x%X\n" - "pid: %d\n", - as_no, (void *)fault_addr, addr_valid, status, exception_type, - kbase_gpu_exception_name(exception_type), access_type, - kbase_gpu_access_type_name(access_type), source_id, kctx->pid); + if (kbdev->gpu_props.gpu_id.product_model <= GPU_ID_MODEL_MAKE(13, 0)) { + dev_err(kbdev->dev, + "GPU bus fault in AS%u at PA %pK\n" + "PA_VALID: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n" + "pid: %d\n", + as_no, (void *)fault_addr, addr_valid, status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(access_type), source_id, + FAULT_SOURCE_ID_CORE_ID_GET(source_id), + FAULT_SOURCE_ID_UTLB_ID_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, access_type), + kctx->pid); + } else { + dev_err(kbdev->dev, + "GPU bus fault in AS%u at PA %pK\n" + "PA_VALID: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X (type:idx:IR 0x%X:0x%X:0x%X): %s %u, %s\n" + "pid: %d\n", + as_no, (void *)fault_addr, addr_valid, status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(access_type), source_id, + FAULT_SOURCE_ID_CORE_TYPE_GET(source_id), + FAULT_SOURCE_ID_CORE_INDEX_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + FAULT_SOURCE_ID_CORE_INDEX_GET(source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, access_type), + kctx->pid); + } /* AS transaction begin */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -195,17 +246,46 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as unsigned int as_no = as->number; /* terminal fault, print info about the fault */ - dev_err(kbdev->dev, - "Unhandled Page fault in AS%u at VA 0x%016llX\n" - "Reason: %s\n" - "raw fault status: 0x%X\n" - "exception type 0x%X: %s\n" - "access type 0x%X: %s\n" - "source id 0x%X\n" - "pid: %d\n", - as_no, fault->addr, reason_str, status, exception_type, - kbase_gpu_exception_name(exception_type), access_type, - kbase_gpu_access_type_name(status), source_id, kctx->pid); + if (kbdev->gpu_props.gpu_id.product_model <= GPU_ID_MODEL_MAKE(13, 0)) { + dev_err(kbdev->dev, + "Unhandled Page fault in AS%u at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n" + "pid: %d\n", + as_no, fault->addr, reason_str, status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(status), source_id, + FAULT_SOURCE_ID_CORE_ID_GET(source_id), + FAULT_SOURCE_ID_UTLB_ID_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, + access_type), + kctx->pid); + } else { + dev_err(kbdev->dev, + "Unhandled Page fault in AS%u at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X (type:idx:IR 0x%X:0x%X:0x%X): %s %u, %s\n" + "pid: %d\n", + as_no, fault->addr, reason_str, status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(status), source_id, + FAULT_SOURCE_ID_CORE_TYPE_GET(source_id), + FAULT_SOURCE_ID_CORE_INDEX_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + FAULT_SOURCE_ID_CORE_INDEX_GET(source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, + access_type), + kctx->pid); + } } /* AS transaction begin */ diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.c new file mode 100644 index 000000000000..d8eec91ba887 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: Base kernel MMU faults decoder for CSF GPUs. + */ + +#include + +#define GPU_ID_ARCH_ID_MAJOR_GET(gpu_id) ((gpu_id >> 16) & 0xFF) +#define GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id) (gpu_id & 0xFFFF) +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) + +struct decode_lut_element { + u16 arch_minor_rev; + u16 key; + const char *text; +}; + +static const char *decode_lut_element_lookup(u16 arch_minor_rev, u16 key, + struct decode_lut_element *decode_element_lut, + unsigned int lut_len) +{ + struct decode_lut_element *p; + + for (p = decode_element_lut; p < decode_element_lut + lut_len; p++) { + if (p->key == key && + (p->arch_minor_rev == 0xffff || p->arch_minor_rev == arch_minor_rev)) + break; + } + if (p < decode_element_lut + lut_len) + return p->text; + else + return "unknown"; +} + +/* Auto-generated code: DO NOT MODIFY! */ + +static struct decode_lut_element lut_fault_source_csf_r_t_major_10[] = { + { 0xFFFF, 0, "pref0" }, + { 0xFFFF, 4, "iter0" }, + { 0xFFFF, 12, "lsu" }, + { 0xFFFF, 13, "mcu" }, +}; + +static struct decode_lut_element lut_fault_source_csf_r_t_major_11[] = { + { 0xFFFF, 0, "pref0" }, + { 0xFFFF, 4, "iter0" }, + { 0xFFFF, 12, "lsu" }, + { 0xFFFF, 13, "mcu" }, +}; + +static struct decode_lut_element lut_fault_source_csf_r_t_major_12[] = { + { 0xFFFF, 0, "pref0" }, + { 0xFFFF, 4, "iter0" }, + { 0xFFFF, 12, "lsu" }, + { 0xFFFF, 13, "mcu" }, +}; + +static struct decode_lut_element lut_fault_source_csf_w_t_major_10[] = { + { 0xFFFF, 8, "pcb0" }, + { 0xFFFF, 12, "lsu" }, + { 0xFFFF, 13, "mcu" }, +}; + +static struct decode_lut_element lut_fault_source_csf_w_t_major_11[] = { + { 0xFFFF, 8, "pcb0" }, + { 0xFFFF, 12, "lsu" }, + { 0xFFFF, 13, "mcu" }, +}; + +static struct decode_lut_element lut_fault_source_csf_w_t_major_12[] = { + { 0xFFFF, 8, "pcb0" }, + { 0xFFFF, 12, "lsu" }, + { 0xFFFF, 13, "mcu" }, +}; + + +const char *decode_fault_source_csf_r_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 10: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_r_t_major_10, + NELEMS(lut_fault_source_csf_r_t_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_r_t_major_11, + NELEMS(lut_fault_source_csf_r_t_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_r_t_major_12, + NELEMS(lut_fault_source_csf_r_t_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_csf_w_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 10: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_w_t_major_10, + NELEMS(lut_fault_source_csf_w_t_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_w_t_major_11, + NELEMS(lut_fault_source_csf_w_t_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_w_t_major_12, + NELEMS(lut_fault_source_csf_w_t_major_12)); + break; + } + return ret; +} diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.h b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.h new file mode 100644 index 000000000000..04f5c02ccc3d --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_CSF_H_ +#define _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_CSF_H_ +#include + +/** + * decode_fault_source_csf_r_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for a read + * operation on a CSF core. + */ +const char *decode_fault_source_csf_r_t(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_csf_w_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for a write + * operation on a CSF core. + */ +const char *decode_fault_source_csf_w_t(u16 idx, u32 gpu_id); + +#endif /* _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.c new file mode 100644 index 000000000000..a053a93978b5 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: Base kernel MMU faults decoder for Job Manager GPUs. + */ + +#include + +#define GPU_ID_ARCH_ID_MAJOR_GET(gpu_id) ((gpu_id >> 16) & 0xFF) +#define GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id) (gpu_id & 0xFFFF) +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) + +struct decode_lut_element { + u16 arch_minor_rev; + u16 key; + const char *text; +}; + +static const char *decode_lut_element_lookup(u16 arch_minor_rev, u16 key, + struct decode_lut_element *decode_element_lut, + unsigned int lut_len) +{ + struct decode_lut_element *p; + + for (p = decode_element_lut; p < decode_element_lut + lut_len; p++) { + if (p->key == key && + (p->arch_minor_rev == 0xffff || p->arch_minor_rev == arch_minor_rev)) + break; + } + if (p < decode_element_lut + lut_len) + return p->text; + else + return "unknown"; +} + +/* Auto-generated code: DO NOT MODIFY! */ + +static struct decode_lut_element lut_fault_source_jm_t_major_9[] = { + { 0xFFFF, 0, "js" }, + { 0xFFFF, 1, "pcm" }, +}; + +const char *decode_fault_source_jm_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_jm_t_major_9, + NELEMS(lut_fault_source_jm_t_major_9)); + break; + } + return ret; +} diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.h b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.h new file mode 100644 index 000000000000..f686e555d86a --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_JM_H_ +#define _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_JM_H_ +#include + +/** + * decode_fault_source_jm_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for a JM core. + */ +const char *decode_fault_source_jm_t(u16 idx, u32 gpu_id); + +#endif /* _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_JM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c index 1b2df11f3c3c..e8e136117fd8 100644 --- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,7 @@ #include #include #include +#include void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup *const setup) { @@ -52,9 +53,10 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbas struct kbase_fault *fault) { struct kbase_device *const kbdev = kctx->kbdev; - u32 const status = fault->status; - u32 const exception_type = (status & 0xFF); - u32 const exception_data = (status >> 8) & 0xFFFFFF; + const u32 status = fault->status; + const u32 exception_type = AS_FAULTSTATUS_EXCEPTION_TYPE_GET(status); + const u32 access_type = AS_FAULTSTATUS_ACCESS_TYPE_GET(status); + const u32 source_id = AS_FAULTSTATUS_SOURCE_ID_GET(status); unsigned int const as_no = as->number; unsigned long flags; const uintptr_t fault_addr = fault->addr; @@ -64,10 +66,17 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbas "GPU bus fault in AS%u at PA %pK\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" - "exception data 0x%X\n" + "access type 0x%X: %s\n" + "source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n" "pid: %d\n", as_no, (void *)fault_addr, status, exception_type, - kbase_gpu_exception_name(exception_type), exception_data, kctx->pid); + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(access_type), source_id, + FAULT_SOURCE_ID_CORE_ID_GET(source_id), FAULT_SOURCE_ID_UTLB_ID_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, access_type), + kctx->pid); /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter * dumping AS transaction begin @@ -105,10 +114,10 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as if (!kbase_ctx_flag(kctx, KCTX_PAGE_FAULT_REPORT_SKIP)) { /* decode the fault status */ - u32 exception_type = fault->status & 0xFF; - u32 access_type = (fault->status >> 8) & 0x3; - u32 source_id = (fault->status >> 16); - + const u32 status = fault->status; + const u32 exception_type = AS_FAULTSTATUS_EXCEPTION_TYPE_GET(status); + const u32 access_type = AS_FAULTSTATUS_ACCESS_TYPE_GET(status); + const u32 source_id = AS_FAULTSTATUS_SOURCE_ID_GET(status); /* terminal fault, print info about the fault */ dev_err(kbdev->dev, "Unhandled Page fault in AS%u at VA 0x%016llX\n" @@ -116,11 +125,17 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" "access type 0x%X: %s\n" - "source id 0x%X\n" + "source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n" "pid: %d\n", - as_no, fault->addr, reason_str, fault->status, exception_type, + as_no, fault->addr, reason_str, status, exception_type, kbase_gpu_exception_name(exception_type), access_type, - kbase_gpu_access_type_name(fault->status), source_id, kctx->pid); + kbase_gpu_access_type_name(status), source_id, + FAULT_SOURCE_ID_CORE_ID_GET(source_id), + FAULT_SOURCE_ID_UTLB_ID_GET(source_id), + fault_source_id_internal_requester_get(kbdev, source_id), + fault_source_id_core_type_description_get(kbdev, source_id), + fault_source_id_internal_requester_get_str(kbdev, source_id, access_type), + kctx->pid); } /* hardware counters dump fault handling */ diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c index becbb02aa15a..8d52e90f9579 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -151,6 +151,44 @@ static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kct spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } +/** + * mmu_invalidate_on_teardown() - Perform an invalidate operation on MMU caches on page + * table teardown. + * @kbdev: The Kbase device. + * @kctx: The Kbase context. + * @vpfn: The virtual page frame number at which teardown is done. + * @num_pages: The number of entries that were invalidated in top most level PGD, that + * was affected by the teardown operation. + * @level: The top most PGD level that was touched on teardown. + * @as_nr: GPU address space number for which invalidate is required. + * + * Perform an MMU invalidate operation after the teardown of top most level PGD on a + * particular address space by issuing a UNLOCK command. + */ +static inline void mmu_invalidate_on_teardown(struct kbase_device *kbdev, + struct kbase_context *kctx, u64 vpfn, + size_t num_pages, int level, int as_nr) +{ + u32 invalidate_range_num_pages = num_pages; + u64 invalidate_range_start_vpfn = vpfn; + struct kbase_mmu_hw_op_param op_param; + + if (level != MIDGARD_MMU_BOTTOMLEVEL) { + invalidate_range_num_pages = 1 << ((3 - level) * 9); + invalidate_range_start_vpfn = vpfn - (vpfn & (invalidate_range_num_pages - 1)); + } + + op_param = (struct kbase_mmu_hw_op_param){ + .vpfn = invalidate_range_start_vpfn, + .nr = invalidate_range_num_pages, + .mmu_sync_info = CALLER_MMU_ASYNC, + .kctx_id = kctx ? kctx->id : 0xFFFFFFFF, + .flush_skip_levels = (1ULL << level) - 1, + }; + + mmu_invalidate(kbdev, kctx, as_nr, &op_param); +} + /* Perform a flush/invalidate on a particular address space */ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, @@ -318,14 +356,16 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb * @mmut: GPU MMU page table. * @pgds: Physical addresses of page directories to be freed. * @vpfn: The virtual page frame number. - * @level: The level of MMU page table. + * @level: The level of MMU page table that needs to be updated. * @flush_op: The type of MMU flush operation to perform. * @dirty_pgds: Flags to track every level where a PGD has been updated. + * @as_nr: GPU address space number for which invalidate is required. */ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, u64 vpfn, int level, - enum kbase_mmu_op_type flush_op, u64 *dirty_pgds); + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, + int as_nr); static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { @@ -776,7 +816,7 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, struct kbase_va_reg return false; } - if (kctx->kbdev->pagesize_2mb && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) { + if (kbase_is_large_pages_enabled() && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) { root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id]; *grow_2mb_pool = true; } else { @@ -923,7 +963,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) int err; bool grown = false; size_t pages_to_grow; - bool grow_2mb_pool; + bool grow_2mb_pool = false; struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; int i; size_t current_backed_size; @@ -1093,7 +1133,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) } page_fault_retry: - if (kbdev->pagesize_2mb) { + if (kbase_is_large_pages_enabled()) { /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { if (!prealloc_sas[i]) { @@ -1180,10 +1220,14 @@ page_fault_retry: */ op_param.mmu_sync_info = mmu_sync_info; op_param.kctx_id = kctx->id; - /* Can safely skip the invalidate for all levels in case - * of duplicate page faults. + /* Usually it is safe to skip the MMU cache invalidate for all levels + * in case of duplicate page faults. But for the pathological scenario + * where the faulty VA gets mapped by the time page fault worker runs it + * becomes imperative to invalidate MMU cache for all levels, otherwise + * there is a possibility of repeated page faults on GPUs which supports + * fine grained MMU cache invalidation. */ - op_param.flush_skip_levels = 0xF; + op_param.flush_skip_levels = 0x0; op_param.vpfn = fault_pfn; op_param.nr = 1; spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); @@ -1217,10 +1261,14 @@ page_fault_retry: /* See comment [1] about UNLOCK usage */ op_param.mmu_sync_info = mmu_sync_info; op_param.kctx_id = kctx->id; - /* Can safely skip the invalidate for all levels in case - * of duplicate page faults. + /* Usually it is safe to skip the MMU cache invalidate for all levels + * in case of duplicate page faults. But for the pathological scenario + * where the faulty VA gets mapped by the time page fault worker runs it + * becomes imperative to invalidate MMU cache for all levels, otherwise + * there is a possibility of repeated page faults on GPUs which supports + * fine grained MMU cache invalidation. */ - op_param.flush_skip_levels = 0xF; + op_param.flush_skip_levels = 0x0; op_param.vpfn = fault_pfn; op_param.nr = 1; spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); @@ -1382,7 +1430,7 @@ page_fault_retry: * Otherwise fail the allocation. */ if (pages_to_grow > 0) { - if (kbdev->pagesize_2mb && grow_2mb_pool) { + if (kbase_is_large_pages_enabled() && grow_2mb_pool) { /* Round page requirement up to nearest 2 MB */ struct kbase_mem_pool *const lp_mem_pool = &kctx->mem_pools.large[group_id]; @@ -1595,6 +1643,7 @@ static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu return err; } +KBASE_ALLOW_ERROR_INJECTION_TEST_API(mmu_get_lowest_valid_pgd, ERRNO); /* * On success, sets out_pgd to the PGD for the specified level of translation @@ -1700,12 +1749,21 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, mmu_mode->entries_invalidate(&page[idx], pcount); if (!num_of_valid_entries) { + mmu_mode->set_num_valid_entries(page, 0); + kbase_kunmap(p, page); + kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level - 1, + KBASE_MMU_OP_NONE, dirty_pgds, 0); + + /* No CPU and GPU cache maintenance is done here as caller would do the + * complete flush of GPU cache and invalidation of TLB before the PGD + * page is freed. CPU cache flush would be done when the PGD page is + * returned to the memory pool. + */ + kbase_mmu_add_to_free_pgds_list(mmut, p); - kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - KBASE_MMU_OP_NONE, dirty_pgds); vpfn += count; continue; } @@ -1728,7 +1786,8 @@ next: * going to happen to these pages at this stage. They might return * movable once they are returned to a memory pool. */ - if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) { + if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys && + !is_huge(*phys) && !is_partial(*phys)) { const u64 num_pages = (to_vpfn - from_vpfn) / GPU_PAGES_PER_CPU_PAGE; u64 i; @@ -2315,7 +2374,15 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm if (count > remain) count = remain; - if (!vindex && is_huge_head(*phys)) + /* There are 3 conditions to satisfy in order to create a level 2 ATE: + * + * - The GPU VA is aligned to 2 MB. + * - The physical address is tagged as the head of a 2 MB region, + * which guarantees a contiguous physical address range. + * - There are actually 2 MB of virtual and physical pages to map, + * i.e. 512 entries for the MMU page table. + */ + if (!vindex && is_huge_head(*phys) && (count == KBASE_MMU_PAGE_ENTRIES)) cur_level = MIDGARD_MMU_LEVEL(2); else cur_level = MIDGARD_MMU_BOTTOMLEVEL; @@ -2525,6 +2592,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m } KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); +KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_pages, ERRNO); int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, @@ -2582,6 +2650,7 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_ return 0; } +KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_aliased_pages, ERRNO); void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr) { @@ -2699,50 +2768,65 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable); static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, u64 vpfn, int level, - enum kbase_mmu_op_type flush_op, u64 *dirty_pgds) + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, + int as_nr) { - int current_level; + phys_addr_t current_pgd = pgds[level]; + struct page *p = phys_to_page(current_pgd); + u64 *current_page = kbase_kmap(p); + unsigned int current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(current_page); + unsigned int index = (vpfn >> ((3 - level) * 9)) & 0x1FFU; lockdep_assert_held(&mmut->mmu_lock); - for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0); current_level--) { - phys_addr_t current_pgd = pgds[current_level]; - struct page *p = phys_to_page(current_pgd); + /* We need to track every level that needs updating */ + if (dirty_pgds) + *dirty_pgds |= 1ULL << level; - u64 *current_page = kbase_kmap(p); - unsigned int current_valid_entries = - kbdev->mmu_mode->get_num_valid_entries(current_page); - unsigned int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FFU; + kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); + if (current_valid_entries == 1 && level != MIDGARD_MMU_LEVEL(0)) { + kbdev->mmu_mode->set_num_valid_entries(current_page, 0); - /* We need to track every level that needs updating */ - if (dirty_pgds) - *dirty_pgds |= 1ULL << current_level; + kbase_kunmap(p, current_page); - kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); - if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) { - kbase_kunmap(p, current_page); + kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level - 1, flush_op, + dirty_pgds, as_nr); - /* Ensure the cacheline containing the last valid entry - * of PGD is invalidated from the GPU cache, before the - * PGD page is freed. + /* Check if fine grained GPU cache maintenance is being used */ + if (flush_op == KBASE_MMU_OP_FLUSH_PT) { + /* Ensure the invalidated PTE is visible in memory right away */ + kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p) + (index * sizeof(u64)), + sizeof(u64)); + /* Invalidate the GPU cache for the whole PGD page and not just for + * the cacheline containing the invalidated PTE, as the PGD page is + * going to be freed. There is an extremely remote possibility that + * other cachelines (containing all invalid PTEs) of PGD page are + * also present in the GPU cache. */ - kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, - current_pgd + (index * sizeof(u64)), sizeof(u64), - flush_op); - - kbase_mmu_add_to_free_pgds_list(mmut, p); - } else { - current_valid_entries--; - - kbdev->mmu_mode->set_num_valid_entries(current_page, current_valid_entries); - - kbase_kunmap(p, current_page); - - kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), - kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), - flush_op); - break; + kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, current_pgd, 512 * sizeof(u64), + KBASE_MMU_OP_FLUSH_PT); } + + kbase_mmu_add_to_free_pgds_list(mmut, p); + } else { + current_valid_entries--; + + kbdev->mmu_mode->set_num_valid_entries(current_page, current_valid_entries); + + kbase_kunmap(p, current_page); + + kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), + kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), + flush_op); + + /* When fine grained GPU cache maintenance is used then invalidate the MMU caches + * now as the top most level PGD entry, affected by the teardown operation, has + * been invalidated (both in memory as well as in GPU L2 cache). This is to avoid + * the possibility of invalid ATEs being reloaded into the GPU L2 cache whilst the + * teardown is happening. + */ + if (flush_op == KBASE_MMU_OP_FLUSH_PT) + mmu_invalidate_on_teardown(kbdev, mmut->kctx, vpfn, 1, level, as_nr); } } @@ -2783,13 +2867,11 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, } #if MALI_USE_CSF else { - /* Partial GPU cache flush with MMU cache invalidation */ + /* Partial GPU cache flush of the pages that were unmapped */ unsigned long irq_flags; unsigned int i; bool flush_done = false; - mmu_invalidate(kbdev, kctx, as_nr, op_param); - for (i = 0; !flush_done && i < phys_page_nr; i++) { spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) @@ -2809,7 +2891,7 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, u64 *dirty_pgds, struct list_head *free_pgds_list, - enum kbase_mmu_op_type flush_op) + enum kbase_mmu_op_type flush_op, int as_nr) { struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; @@ -2832,8 +2914,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase phys_addr_t pgd = mmut->pgd; struct page *p = phys_to_page(pgd); - if (count > nr) - count = nr; + count = MIN(nr, count); /* need to check if this is a 2MB page or a small page */ for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { @@ -2844,23 +2925,12 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase if (mmu_mode->ate_is_valid(page[index], level)) break; /* keep the mapping */ else if (!mmu_mode->pte_is_valid(page[index], level)) { - /* nothing here, advance */ - switch (level) { - case MIDGARD_MMU_LEVEL(0): - count = 134217728; - break; - case MIDGARD_MMU_LEVEL(1): - count = 262144; - break; - case MIDGARD_MMU_LEVEL(2): - count = 512; - break; - case MIDGARD_MMU_LEVEL(3): - count = 1; - break; - } - if (count > nr) - count = nr; + dev_warn(kbdev->dev, "Invalid PTE found @ level %d for VA %llx", + level, vpfn << PAGE_SHIFT); + /* nothing here, advance to the next PTE of the current level */ + count = (1 << ((3 - level) * 9)); + count -= (vpfn & (count - 1)); + count = MIN(nr, count); goto next; } next_pgd = mmu_mode->pte_to_phy_addr( @@ -2915,20 +2985,37 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase mmu_mode->entries_invalidate(&page[index], pcount); if (!num_of_valid_entries) { + mmu_mode->set_num_valid_entries(page, 0); + kbase_kunmap(p, page); - /* Ensure the cacheline(s) containing the last valid entries - * of PGD is invalidated from the GPU cache, before the - * PGD page is freed. + /* To avoid the invalid ATEs from the PGD page (that is going to be freed) + * from getting reloaded into the GPU L2 cache whilst the teardown is + * happening, the fine grained GPU L2 cache maintenance is done in the top + * to bottom level PGD order. MMU cache invalidation is done after + * invalidating the entry of top most level PGD, affected by the teardown. */ - kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), - pcount * sizeof(u64), flush_op); + kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level - 1, + flush_op, dirty_pgds, as_nr); + + /* Check if fine grained GPU cache maintenance is being used */ + if (flush_op == KBASE_MMU_OP_FLUSH_PT) { + /* Ensure the invalidated ATEs are visible in memory right away */ + kbase_mmu_sync_pgd_cpu(kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + pcount * sizeof(u64)); + /* Invalidate the GPU cache for the whole PGD page and not just for + * the cachelines containing the invalidated ATEs, as the PGD page + * is going to be freed. There is an extremely remote possibility + * that other cachelines (containing all invalid ATEs) of PGD page + * are also present in the GPU cache. + */ + kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd, 512 * sizeof(u64), + KBASE_MMU_OP_FLUSH_PT); + } kbase_mmu_add_to_free_pgds_list(mmut, p); - kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - flush_op, dirty_pgds); - vpfn += count; nr -= count; continue; @@ -2939,6 +3026,12 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64), flush_op); + + /* When fine grained GPU cache maintenance is used then invalidation of MMU cache + * is done inline for every bottom level PGD touched in the teardown. + */ + if (flush_op == KBASE_MMU_OP_FLUSH_PT) + mmu_invalidate_on_teardown(kbdev, mmut->kctx, vpfn, pcount, level, as_nr); next: kbase_kunmap(p, page); vpfn += count; @@ -3032,7 +3125,7 @@ static int mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table mutex_lock(&mmut->mmu_lock); err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds, - &free_pgds_list, flush_op); + &free_pgds_list, flush_op, as_nr); /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ op_param = (struct kbase_mmu_hw_op_param){ @@ -3069,6 +3162,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr, false); } +KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages, @@ -3583,14 +3677,12 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param); } } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); - /* Releasing locks before checking the migration transaction error state */ - mutex_unlock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); /* Release the transition prevention in L2 by ending the transaction */ mmu_page_migration_transaction_end(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + /* Releasing locks before checking the migration transaction error state */ + mutex_unlock(&kbdev->mmu_hw_mutex); /* Checking the final migration transaction error state */ if (ret < 0) { diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.c new file mode 100644 index 000000000000..e1b72ed24321 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: Base kernel MMU faults decoder. + */ + +#include +#include +#if MALI_USE_CSF +#include +#else +#include +#endif + +#include +#include + +unsigned int fault_source_id_internal_requester_get(struct kbase_device *kbdev, + unsigned int source_id) +{ + if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) + return ((source_id >> 4) & 0xF); + else + return (source_id & 0x3F); +} + +static inline const char *source_id_enc_core_type_get_str(struct kbase_device *kbdev, + unsigned int source_id) +{ + if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) + return decode_fault_source_core_id_t_core_type( + FAULT_SOURCE_ID_CORE_ID_GET(source_id), kbdev->gpu_props.gpu_id.arch_id); + else + return decode_fault_source_core_type_t_name( + FAULT_SOURCE_ID_CORE_TYPE_GET(source_id), kbdev->gpu_props.gpu_id.arch_id); +} +const char *fault_source_id_internal_requester_get_str(struct kbase_device *kbdev, + unsigned int source_id, + unsigned int access_type) +{ + unsigned int ir = fault_source_id_internal_requester_get(kbdev, source_id); + bool older_source_id_fmt = + (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)); + unsigned int utlb_id = 0; + + if (older_source_id_fmt) + utlb_id = FAULT_SOURCE_ID_UTLB_ID_GET(source_id); + + if (strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "shader") == 0) { + if (utlb_id == 0) { + if (access_type == AS_FAULTSTATUS_ACCESS_TYPE_READ) + return decode_fault_source_shader_r_t( + ir, kbdev->gpu_props.gpu_id.arch_id); + else + return decode_fault_source_shader_w_t( + ir, kbdev->gpu_props.gpu_id.arch_id); + } else + return "Load/store cache"; + } else if (strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "tiler")) { +#if MALI_USE_CSF + if (utlb_id == 0) { + if (access_type == AS_FAULTSTATUS_ACCESS_TYPE_READ) + return decode_fault_source_tiler_r_t( + ir, kbdev->gpu_props.gpu_id.arch_id); + else + return decode_fault_source_tiler_w_t( + ir, kbdev->gpu_props.gpu_id.arch_id); + } else + return "The polygon list writer. No further details."; +#else + return (utlb_id == 0) ? "Anything other than the polygon list writer" : + "The polygon list writer"; +#endif + } +#if MALI_USE_CSF + else if (strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "csf")) { + if (access_type == AS_FAULTSTATUS_ACCESS_TYPE_READ) + return decode_fault_source_csf_r_t(ir, kbdev->gpu_props.gpu_id.arch_id); + else + return decode_fault_source_csf_w_t(ir, kbdev->gpu_props.gpu_id.arch_id); + } +#else + else if (strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "jm")) + return decode_fault_source_jm_t(ir, kbdev->gpu_props.gpu_id.arch_id); +#endif + else if (!strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "I2c") || + !strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "memsys") || + !strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "mmu")) { + return "Not used"; + } + + return "unknown"; +} + +const char *fault_source_id_core_type_description_get(struct kbase_device *kbdev, + unsigned int source_id) +{ + if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) { + return decode_fault_source_core_id_t_desc(FAULT_SOURCE_ID_CORE_ID_GET(source_id), + kbdev->gpu_props.gpu_id.arch_id); + } else { + return decode_fault_source_core_type_t_desc( + FAULT_SOURCE_ID_CORE_TYPE_GET(source_id), kbdev->gpu_props.gpu_id.arch_id); + } +} diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.h new file mode 100644 index 000000000000..da5610ec94b0 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ +#ifndef _MALI_KBASE_MMU_FAULTS_DECODER_H_ +#define _MALI_KBASE_MMU_FAULTS_DECODER_H_ + +#include +#include + +/* FAULTSTATUS.SOURCE_ID encoding */ +#define SOURCE_ID_CORE_ID_SHIFT (9) +#define SOURCE_ID_CORE_ID_MASK (0x7F << SOURCE_ID_CORE_ID_SHIFT) +#define SOURCE_ID_UTLB_ID_SHIFT (8) +#define SOURCE_ID_UTLB_ID_MASK (0x01 << SOURCE_ID_UTLB_ID_SHIFT) +#define SOURCE_ID_CORE_TYPE_SHIFT (12) +#define SOURCE_ID_CORE_TYPE_MASK (0x0F << SOURCE_ID_CORE_TYPE_SHIFT) +#define SOURCE_ID_CORE_INDEX_SHIFT (6) +#define SOURCE_ID_CORE_INDEX_MASK (0x3F << SOURCE_ID_CORE_INDEX_SHIFT) + +/** + * FAULT_SOURCE_ID_CORE_ID_GET() - Get core ID of a fault. + * + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * + * Get core ID part of SOURCE_ID field of FAULTSTATUS (MMU) or + * GPU_FAULTSTATUS (GPU) registers. + * + * Return: core ID of the fault. + */ +#define FAULT_SOURCE_ID_CORE_ID_GET(source_id) \ + ((source_id & SOURCE_ID_CORE_ID_MASK) >> SOURCE_ID_CORE_ID_SHIFT) + +/** + * FAULT_SOURCE_ID_UTLB_ID_GET() - Get UTLB ID of a fault. + * + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * + * Get UTLB(micro-TLB) ID part of SOURCE_ID field of FAULTSTATUS (MMU) or + * GPU_FAULTSTATUS (GPU) registers. + * + * Return: UTLB ID of the fault. + */ +#define FAULT_SOURCE_ID_UTLB_ID_GET(source_id) \ + ((source_id & SOURCE_ID_UTLB_ID_MASK) >> SOURCE_ID_UTLB_ID_SHIFT) + +/** + * FAULT_SOURCE_ID_CORE_TYPE_GET() - Get core type of a fault. + * + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * + * Get core type part of SOURCE_ID field of FAULTSTATUS (MMU) or + * GPU_FAULTSTATUS (GPU) registers. + * + * Return: core type code of the fault. + */ +#define FAULT_SOURCE_ID_CORE_TYPE_GET(source_id) \ + ((source_id & SOURCE_ID_CORE_TYPE_MASK) >> SOURCE_ID_CORE_TYPE_SHIFT) + +/** + * FAULT_SOURCE_ID_CORE_INDEX_GET() - Get core index of a fault. + * + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * + * Get core index part of SOURCE_ID field of FAULTSTATUS (MMU) or + * GPU_FAULTSTATUS (GPU) registers. + * + * Return: core index of the fault. + */ +#define FAULT_SOURCE_ID_CORE_INDEX_GET(source_id) \ + ((source_id & SOURCE_ID_CORE_INDEX_MASK) >> SOURCE_ID_CORE_INDEX_SHIFT) + +/** + * fault_source_id_internal_requester_get() - Get internal_requester of a fault. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer). + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * + * Get internal_requester part of SOURCE_ID field of FAULTSTATUS (MMU) or + * GPU_FAULTSTATUS (GPU) registers. + * + * Return: Internal requester code of the fault. + */ +unsigned int fault_source_id_internal_requester_get(struct kbase_device *kbdev, + unsigned int source_id); + +/** + * fault_source_id_internal_requester_get_str() - Get internal_requester of a + * fault in a human readable format. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer). + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * @access_type: the direction of data transfer that caused the fault (atomic, + * execute, read, write) + * + * Get the human readable decoding of internal_requester part of SOURCE_ID field + * of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) registers. + * + * Return: Internal requester of the fault in human readable format. + */ +const char *fault_source_id_internal_requester_get_str(struct kbase_device *kbdev, + unsigned int source_id, + unsigned int access_type); + +/** + * fault_source_id_core_type_description_get() - Get the core type of + * a fault in a human readable format. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer). + * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) + * registers. + * + * Get the human readable decoding of core type part of SOURCE_ID field + * of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) registers. + * + * Return: core type of the fault in human readable format. + */ +const char *fault_source_id_core_type_description_get(struct kbase_device *kbdev, + unsigned int source_id); + +#endif /* _MALI_KBASE_MMU_FAULTS_DECODER_H_ */ diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.c new file mode 100644 index 000000000000..8e90cacb4efa --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.c @@ -0,0 +1,660 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: Base kernel MMU faults decoder. + */ + +#include + +#define GPU_ID_ARCH_ID_MAJOR_GET(gpu_id) ((gpu_id >> 16) & 0xFF) +#define GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id) (gpu_id & 0xFFFF) +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) + +struct decode_lut_element { + u16 arch_minor_rev; + u16 key; + const char *text; +}; + +static const char *decode_lut_element_lookup(u16 arch_minor_rev, u16 key, + struct decode_lut_element *decode_element_lut, + unsigned int lut_len) +{ + struct decode_lut_element *p; + + for (p = decode_element_lut; p < decode_element_lut + lut_len; p++) { + if (p->key == key && + (p->arch_minor_rev == 0xffff || p->arch_minor_rev == arch_minor_rev)) + break; + } + if (p < decode_element_lut + lut_len) + return p->text; + else + return "unknown"; +} + +/* Auto-generated code: DO NOT MODIFY! */ + +static struct decode_lut_element lut_fault_source_core_type_t_name_major_9[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "l2c" }, { 0xFFFF, 2, "tiler" }, + { 0xFFFF, 3, "mmu" }, { 0xFFFF, 4, "jm" }, { 0xFFFF, 5, "pmb" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_desc_major_9[] = { + { 0xFFFF, 0, "Shader core" }, { 0xFFFF, 1, "Level 2 cache" }, + { 0xFFFF, 2, "Tiler" }, { 0xFFFF, 3, "MMU" }, + { 0xFFFF, 4, "Job Manager" }, { 0xFFFF, 5, "Performance Monitor Block" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_name_major_10[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "l2c" }, { 0xFFFF, 2, "tiler" }, + { 0xFFFF, 3, "mmu" }, { 0xFFFF, 4, "csf" }, { 0xFFFF, 5, "memsys" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_desc_major_10[] = { + { 0xFFFF, 0, "Shader core" }, { 0xFFFF, 1, "Level 2 cache" }, + { 0xFFFF, 2, "Tiler" }, { 0xFFFF, 3, "MMU" }, + { 0xFFFF, 4, "CSF" }, { 0xFFFF, 5, "Memory system" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_name_major_11[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "l2c" }, { 0xFFFF, 2, "tiler" }, + { 0xFFFF, 3, "mmu" }, { 0xFFFF, 4, "csf" }, { 0xFFFF, 5, "memsys" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_desc_major_11[] = { + { 0xFFFF, 0, "Shader core" }, { 0xFFFF, 1, "Level 2 cache" }, + { 0xFFFF, 2, "Tiler" }, { 0xFFFF, 3, "MMU" }, + { 0xFFFF, 4, "CSF" }, { 0xFFFF, 5, "Memory system" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_name_major_12[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "l2c" }, { 0xFFFF, 2, "tiler" }, + { 0xFFFF, 3, "mmu" }, { 0xFFFF, 4, "csf" }, { 0xFFFF, 5, "memsys" }, +}; + +static struct decode_lut_element lut_fault_source_core_type_t_desc_major_12[] = { + { 0xFFFF, 0, "Shader core" }, { 0xFFFF, 1, "Level 2 cache" }, + { 0xFFFF, 2, "Tiler" }, { 0xFFFF, 3, "MMU" }, + { 0xFFFF, 4, "CSF" }, { 0xFFFF, 5, "Memory system" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_desc_major_9[] = { + { 0xFFFF, 0, "Shader core 0" }, + { 0xFFFF, 1, "Shader core 1" }, + { 0xFFFF, 2, "Shader core 2" }, + { 0xFFFF, 3, "Shader core 3" }, + { 0xFFFF, 4, "Shader core 4" }, + { 0xFFFF, 5, "Shader core 5" }, + { 0xFFFF, 6, "Shader core 6" }, + { 0xFFFF, 7, "Shader core 7" }, + { 0xFFFF, 8, "Shader core 8" }, + { 0xFFFF, 9, "Shader core 9" }, + { 0xFFFF, 10, "Shader core 10" }, + { 0xFFFF, 11, "Shader core 11" }, + { 0xFFFF, 12, "Shader core 12" }, + { 0xFFFF, 13, "Shader core 13" }, + { 0xFFFF, 14, "Shader core 14" }, + { 0xFFFF, 15, "Shader core 15" }, + { 0xFFFF, 16, "Shader core 16" }, + { 0xFFFF, 17, "Shader core 17" }, + { 0xFFFF, 18, "Shader core 18" }, + { 0xFFFF, 19, "Shader core 19" }, + { 0xFFFF, 20, "Shader core 20" }, + { 0xFFFF, 21, "Shader core 21" }, + { 0xFFFF, 22, "Shader core 22" }, + { 0xFFFF, 23, "Shader core 23" }, + { 0xFFFF, 24, "Shader core 24" }, + { 0xFFFF, 25, "Shader core 25" }, + { 0xFFFF, 26, "Shader core 26" }, + { 0xFFFF, 27, "Shader core 27" }, + { 0xFFFF, 28, "Shader core 28" }, + { 0xFFFF, 29, "Shader core 29" }, + { 0xFFFF, 30, "Shader core 30" }, + { 0xFFFF, 31, "Shader core 31" }, + { 0xFFFF, 41, "L2 Slice 3" }, + { 0xFFFF, 43, "L2 Slice 2" }, + { 0xFFFF, 45, "L2 Slice 1" }, + { 0xFFFF, 46, "PMB" }, + { 0xFFFF, 47, "L2 Slice 0" }, + { 0xFFFF, 51, "Tiler" }, + { 0xFFFF, 55, "MMU" }, + { 0xFFFF, 62, "Job Manager" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_core_type_major_9[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "shader" }, { 0xFFFF, 2, "shader" }, + { 0xFFFF, 3, "shader" }, { 0xFFFF, 4, "shader" }, { 0xFFFF, 5, "shader" }, + { 0xFFFF, 6, "shader" }, { 0xFFFF, 7, "shader" }, { 0xFFFF, 8, "shader" }, + { 0xFFFF, 9, "shader" }, { 0xFFFF, 10, "shader" }, { 0xFFFF, 11, "shader" }, + { 0xFFFF, 12, "shader" }, { 0xFFFF, 13, "shader" }, { 0xFFFF, 14, "shader" }, + { 0xFFFF, 15, "shader" }, { 0xFFFF, 16, "shader" }, { 0xFFFF, 17, "shader" }, + { 0xFFFF, 18, "shader" }, { 0xFFFF, 19, "shader" }, { 0xFFFF, 20, "shader" }, + { 0xFFFF, 21, "shader" }, { 0xFFFF, 22, "shader" }, { 0xFFFF, 23, "shader" }, + { 0xFFFF, 24, "shader" }, { 0xFFFF, 25, "shader" }, { 0xFFFF, 26, "shader" }, + { 0xFFFF, 27, "shader" }, { 0xFFFF, 28, "shader" }, { 0xFFFF, 29, "shader" }, + { 0xFFFF, 30, "shader" }, { 0xFFFF, 31, "shader" }, { 0xFFFF, 41, "l2c" }, + { 0xFFFF, 43, "l2c" }, { 0xFFFF, 45, "l2c" }, { 0xFFFF, 46, "pmb" }, + { 0xFFFF, 47, "l2c" }, { 0xFFFF, 51, "tiler" }, { 0xFFFF, 55, "mmu" }, + { 0xFFFF, 62, "jm" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_desc_major_10[] = { + { 0xFFFF, 0, "Shader core 0" }, + { 0xFFFF, 1, "Shader core 1" }, + { 0xFFFF, 2, "Shader core 2" }, + { 0xFFFF, 3, "Shader core 3" }, + { 0xFFFF, 4, "Shader core 4" }, + { 0xFFFF, 5, "Shader core 5" }, + { 0xFFFF, 6, "Shader core 6" }, + { 0xFFFF, 7, "Shader core 7" }, + { 0xFFFF, 8, "Shader core 8" }, + { 0xFFFF, 9, "Shader core 9" }, + { 0xFFFF, 10, "Shader core 10" }, + { 0xFFFF, 11, "Shader core 11" }, + { 0xFFFF, 12, "Shader core 12" }, + { 0xFFFF, 13, "Shader core 13" }, + { 0xFFFF, 14, "Shader core 14" }, + { 0xFFFF, 15, "Shader core 15" }, + { 0xFFFF, 16, "Shader core 16" }, + { 0xFFFF, 17, "Shader core 17" }, + { 0xFFFF, 18, "Shader core 18" }, + { 0xFFFF, 19, "Shader core 19" }, + { 0xFFFF, 20, "Shader core 20" }, + { 0xFFFF, 21, "Shader core 21" }, + { 0xFFFF, 22, "Shader core 22" }, + { 0xFFFF, 23, "Shader core 23" }, + { 0xFFFF, 24, "Shader core 24" }, + { 0xFFFF, 25, "Shader core 25" }, + { 0xFFFF, 26, "Shader core 26" }, + { 0xFFFF, 27, "Shader core 27" }, + { 0xFFFF, 28, "Shader core 28" }, + { 0xFFFF, 29, "Shader core 29" }, + { 0xFFFF, 30, "Shader core 30" }, + { 0xFFFF, 31, "Shader core 31" }, + { 0xFFFF, 41, "L2 Slice 3" }, + { 0xFFFF, 43, "L2 Slice 2" }, + { 0xFFFF, 45, "L2 Slice 1" }, + { 0xFFFF, 47, "L2 Slice 0" }, + { 0xFFFF, 51, "Tiler" }, + { 0xFFFF, 55, "MMU" }, + { 0xFFFF, 33, "L2 Slice 7" }, + { 0xFFFF, 35, "L2 Slice 6" }, + { 0xFFFF, 37, "L2 Slice 5" }, + { 0xFFFF, 39, "L2 Slice 4" }, + { 0xFFFF, 48, "Memory system, undefined" }, + { 0xFFFF, 62, "Command Stream Frontend" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_core_type_major_10[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "shader" }, { 0xFFFF, 2, "shader" }, + { 0xFFFF, 3, "shader" }, { 0xFFFF, 4, "shader" }, { 0xFFFF, 5, "shader" }, + { 0xFFFF, 6, "shader" }, { 0xFFFF, 7, "shader" }, { 0xFFFF, 8, "shader" }, + { 0xFFFF, 9, "shader" }, { 0xFFFF, 10, "shader" }, { 0xFFFF, 11, "shader" }, + { 0xFFFF, 12, "shader" }, { 0xFFFF, 13, "shader" }, { 0xFFFF, 14, "shader" }, + { 0xFFFF, 15, "shader" }, { 0xFFFF, 16, "shader" }, { 0xFFFF, 17, "shader" }, + { 0xFFFF, 18, "shader" }, { 0xFFFF, 19, "shader" }, { 0xFFFF, 20, "shader" }, + { 0xFFFF, 21, "shader" }, { 0xFFFF, 22, "shader" }, { 0xFFFF, 23, "shader" }, + { 0xFFFF, 24, "shader" }, { 0xFFFF, 25, "shader" }, { 0xFFFF, 26, "shader" }, + { 0xFFFF, 27, "shader" }, { 0xFFFF, 28, "shader" }, { 0xFFFF, 29, "shader" }, + { 0xFFFF, 30, "shader" }, { 0xFFFF, 31, "shader" }, { 0xFFFF, 41, "l2c" }, + { 0xFFFF, 43, "l2c" }, { 0xFFFF, 45, "l2c" }, { 0xFFFF, 47, "l2c" }, + { 0xFFFF, 51, "tiler" }, { 0xFFFF, 55, "mmu" }, { 0xFFFF, 33, "l2c" }, + { 0xFFFF, 35, "l2c" }, { 0xFFFF, 37, "l2c" }, { 0xFFFF, 39, "l2c" }, + { 0xFFFF, 48, "memsys" }, { 0xFFFF, 62, "csf" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_desc_major_11[] = { + { 0xFFFF, 0, "Shader core 0" }, + { 0xFFFF, 1, "Shader core 1" }, + { 0xFFFF, 2, "Shader core 2" }, + { 0xFFFF, 3, "Shader core 3" }, + { 0xFFFF, 4, "Shader core 4" }, + { 0xFFFF, 5, "Shader core 5" }, + { 0xFFFF, 6, "Shader core 6" }, + { 0xFFFF, 7, "Shader core 7" }, + { 0xFFFF, 8, "Shader core 8" }, + { 0xFFFF, 9, "Shader core 9" }, + { 0xFFFF, 10, "Shader core 10" }, + { 0xFFFF, 11, "Shader core 11" }, + { 0xFFFF, 12, "Shader core 12" }, + { 0xFFFF, 13, "Shader core 13" }, + { 0xFFFF, 14, "Shader core 14" }, + { 0xFFFF, 15, "Shader core 15" }, + { 0xFFFF, 16, "Shader core 16" }, + { 0xFFFF, 17, "Shader core 17" }, + { 0xFFFF, 18, "Shader core 18" }, + { 0xFFFF, 19, "Shader core 19" }, + { 0xFFFF, 20, "Shader core 20" }, + { 0xFFFF, 21, "Shader core 21" }, + { 0xFFFF, 22, "Shader core 22" }, + { 0xFFFF, 23, "Shader core 23" }, + { 0xFFFF, 24, "Shader core 24" }, + { 0xFFFF, 25, "Shader core 25" }, + { 0xFFFF, 26, "Shader core 26" }, + { 0xFFFF, 27, "Shader core 27" }, + { 0xFFFF, 28, "Shader core 28" }, + { 0xFFFF, 29, "Shader core 29" }, + { 0xFFFF, 30, "Shader core 30" }, + { 0xFFFF, 31, "Shader core 31" }, + { 0xFFFF, 41, "L2 Slice 3" }, + { 0xFFFF, 43, "L2 Slice 2" }, + { 0xFFFF, 45, "L2 Slice 1" }, + { 0xFFFF, 47, "L2 Slice 0" }, + { 0xFFFF, 51, "Tiler" }, + { 0xFFFF, 55, "MMU" }, + { 0xFFFF, 33, "L2 Slice 7" }, + { 0xFFFF, 35, "L2 Slice 6" }, + { 0xFFFF, 37, "L2 Slice 5" }, + { 0xFFFF, 39, "L2 Slice 4" }, + { 0xFFFF, 48, "Memory system, undefined" }, + { 0xFFFF, 62, "Command Stream Frontend" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_core_type_major_11[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "shader" }, { 0xFFFF, 2, "shader" }, + { 0xFFFF, 3, "shader" }, { 0xFFFF, 4, "shader" }, { 0xFFFF, 5, "shader" }, + { 0xFFFF, 6, "shader" }, { 0xFFFF, 7, "shader" }, { 0xFFFF, 8, "shader" }, + { 0xFFFF, 9, "shader" }, { 0xFFFF, 10, "shader" }, { 0xFFFF, 11, "shader" }, + { 0xFFFF, 12, "shader" }, { 0xFFFF, 13, "shader" }, { 0xFFFF, 14, "shader" }, + { 0xFFFF, 15, "shader" }, { 0xFFFF, 16, "shader" }, { 0xFFFF, 17, "shader" }, + { 0xFFFF, 18, "shader" }, { 0xFFFF, 19, "shader" }, { 0xFFFF, 20, "shader" }, + { 0xFFFF, 21, "shader" }, { 0xFFFF, 22, "shader" }, { 0xFFFF, 23, "shader" }, + { 0xFFFF, 24, "shader" }, { 0xFFFF, 25, "shader" }, { 0xFFFF, 26, "shader" }, + { 0xFFFF, 27, "shader" }, { 0xFFFF, 28, "shader" }, { 0xFFFF, 29, "shader" }, + { 0xFFFF, 30, "shader" }, { 0xFFFF, 31, "shader" }, { 0xFFFF, 41, "l2c" }, + { 0xFFFF, 43, "l2c" }, { 0xFFFF, 45, "l2c" }, { 0xFFFF, 47, "l2c" }, + { 0xFFFF, 51, "tiler" }, { 0xFFFF, 55, "mmu" }, { 0xFFFF, 33, "l2c" }, + { 0xFFFF, 35, "l2c" }, { 0xFFFF, 37, "l2c" }, { 0xFFFF, 39, "l2c" }, + { 0xFFFF, 48, "memsys" }, { 0xFFFF, 62, "csf" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_desc_major_12[] = { + { 0xFFFF, 0, "Shader core 0" }, + { 0xFFFF, 1, "Shader core 1" }, + { 0xFFFF, 2, "Shader core 2" }, + { 0xFFFF, 3, "Shader core 3" }, + { 0xFFFF, 4, "Shader core 4" }, + { 0xFFFF, 5, "Shader core 5" }, + { 0xFFFF, 6, "Shader core 6" }, + { 0xFFFF, 7, "Shader core 7" }, + { 0xFFFF, 8, "Shader core 8" }, + { 0xFFFF, 9, "Shader core 9" }, + { 0xFFFF, 10, "Shader core 10" }, + { 0xFFFF, 11, "Shader core 11" }, + { 0xFFFF, 12, "Shader core 12" }, + { 0xFFFF, 13, "Shader core 13" }, + { 0xFFFF, 14, "Shader core 14" }, + { 0xFFFF, 15, "Shader core 15" }, + { 0xFFFF, 16, "Shader core 16" }, + { 0xFFFF, 17, "Shader core 17" }, + { 0xFFFF, 18, "Shader core 18" }, + { 0xFFFF, 19, "Shader core 19" }, + { 0xFFFF, 20, "Shader core 20" }, + { 0xFFFF, 21, "Shader core 21" }, + { 0xFFFF, 22, "Shader core 22" }, + { 0xFFFF, 23, "Shader core 23" }, + { 0xFFFF, 24, "Shader core 24" }, + { 0xFFFF, 25, "Shader core 25" }, + { 0xFFFF, 26, "Shader core 26" }, + { 0xFFFF, 27, "Shader core 27" }, + { 0xFFFF, 28, "Shader core 28" }, + { 0xFFFF, 29, "Shader core 29" }, + { 0xFFFF, 30, "Shader core 30" }, + { 0xFFFF, 31, "Shader core 31" }, + { 0xFFFF, 41, "L2 Slice 3" }, + { 0xFFFF, 43, "L2 Slice 2" }, + { 0xFFFF, 45, "L2 Slice 1" }, + { 0xFFFF, 47, "L2 Slice 0" }, + { 0xFFFF, 51, "Tiler" }, + { 0xFFFF, 55, "MMU" }, + { 0xFFFF, 33, "L2 Slice 7" }, + { 0xFFFF, 35, "L2 Slice 6" }, + { 0xFFFF, 37, "L2 Slice 5" }, + { 0xFFFF, 39, "L2 Slice 4" }, + { 0xFFFF, 48, "Memory system, undefined" }, + { 0xFFFF, 62, "Command Stream Frontend" }, +}; + +static struct decode_lut_element lut_fault_source_core_id_t_core_type_major_12[] = { + { 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "shader" }, { 0xFFFF, 2, "shader" }, + { 0xFFFF, 3, "shader" }, { 0xFFFF, 4, "shader" }, { 0xFFFF, 5, "shader" }, + { 0xFFFF, 6, "shader" }, { 0xFFFF, 7, "shader" }, { 0xFFFF, 8, "shader" }, + { 0xFFFF, 9, "shader" }, { 0xFFFF, 10, "shader" }, { 0xFFFF, 11, "shader" }, + { 0xFFFF, 12, "shader" }, { 0xFFFF, 13, "shader" }, { 0xFFFF, 14, "shader" }, + { 0xFFFF, 15, "shader" }, { 0xFFFF, 16, "shader" }, { 0xFFFF, 17, "shader" }, + { 0xFFFF, 18, "shader" }, { 0xFFFF, 19, "shader" }, { 0xFFFF, 20, "shader" }, + { 0xFFFF, 21, "shader" }, { 0xFFFF, 22, "shader" }, { 0xFFFF, 23, "shader" }, + { 0xFFFF, 24, "shader" }, { 0xFFFF, 25, "shader" }, { 0xFFFF, 26, "shader" }, + { 0xFFFF, 27, "shader" }, { 0xFFFF, 28, "shader" }, { 0xFFFF, 29, "shader" }, + { 0xFFFF, 30, "shader" }, { 0xFFFF, 31, "shader" }, { 0xFFFF, 41, "l2c" }, + { 0xFFFF, 43, "l2c" }, { 0xFFFF, 45, "l2c" }, { 0xFFFF, 47, "l2c" }, + { 0xFFFF, 51, "tiler" }, { 0xFFFF, 55, "mmu" }, { 0xFFFF, 33, "l2c" }, + { 0xFFFF, 35, "l2c" }, { 0xFFFF, 37, "l2c" }, { 0xFFFF, 39, "l2c" }, + { 0xFFFF, 48, "memsys" }, { 0xFFFF, 62, "csf" }, +}; + +static struct decode_lut_element lut_fault_source_shader_r_t_major_9[] = { + { 0xFFFF, 0, "ic" }, { 0xFFFF, 1, "adc" }, { 0xFFFF, 4, "scm" }, + { 0xFFFF, 5, "vl" }, { 0xFFFF, 6, "plr" }, { 0xFFFF, 7, "fsdc" }, + { 0xFFFF, 8, "lsc" }, { 0xFFFF, 9, "cse" }, { 0xFFFF, 10, "tb" }, + { 0xFFFF, 11, "tmdi" }, { 0xFFFF, 12, "tmu0" }, { 0xFFFF, 13, "tmu1" }, + { 0xFFFF, 14, "tma0" }, { 0xFFFF, 15, "tma1" }, +}; + +static struct decode_lut_element lut_fault_source_shader_r_t_major_10[] = { + { 0xFFFF, 4, "scm" }, { 0xFFFF, 5, "vl" }, { 0xFFFF, 6, "plr" }, + { 0xFFFF, 7, "fsdc" }, { 0xFFFF, 8, "lsc" }, { 0xFFFF, 9, "cse" }, + { 0xFFFF, 10, "tb" }, { 0xFFFF, 11, "tmdi" }, { 0xFFFF, 12, "tmu0" }, + { 0xFFFF, 13, "tmu1" }, { 0xFFFF, 14, "tma0" }, { 0xFFFF, 15, "tma1" }, + { 0xFFFF, 0, "ic0" }, { 0xFFFF, 1, "ic1" }, { 0xFFFF, 2, "adc" }, +}; + +static struct decode_lut_element lut_fault_source_shader_r_t_major_11[] = { + { 0xFFFF, 4, "scm" }, { 0xFFFF, 5, "vl" }, { 0xFFFF, 6, "plr" }, + { 0xFFFF, 7, "fsdc" }, { 0xFFFF, 8, "lsc" }, { 0xFFFF, 9, "cse" }, + { 0xFFFF, 10, "tb" }, { 0xFFFF, 11, "tmdi" }, { 0xFFFF, 12, "tmu0" }, + { 0xFFFF, 13, "tmu1" }, { 0xFFFF, 14, "tma0" }, { 0xFFFF, 15, "tma1" }, + { 0xFFFF, 0, "ic0" }, { 0xFFFF, 1, "ic1" }, { 0xFFFF, 2, "adc" }, +}; + +static struct decode_lut_element lut_fault_source_shader_r_t_major_12[] = { + { 0xFFFF, 4, "scm" }, { 0xFFFF, 6, "plr" }, { 0xFFFF, 7, "fsdc" }, + { 0xFFFF, 8, "lsc" }, { 0xFFFF, 9, "cse" }, { 0xFFFF, 10, "tb" }, + { 0xFFFF, 11, "tmdi" }, { 0xFFFF, 12, "tmu0" }, { 0xFFFF, 13, "tmu1" }, + { 0xFFFF, 14, "tma0" }, { 0xFFFF, 15, "tma1" }, { 0xFFFF, 0, "ic0" }, + { 0xFFFF, 1, "ic1" }, { 0xFFFF, 2, "adc" }, { 0xFFFF, 3, "rtas" }, +}; + +static struct decode_lut_element lut_fault_source_shader_w_t_major_9[] = { + { 0xFFFF, 0, "pcb" }, + { 0xFFFF, 8, "lsc" }, + { 0xFFFF, 10, "tb" }, +}; + +static struct decode_lut_element lut_fault_source_shader_w_t_major_10[] = { + { 0xFFFF, 0, "pcb" }, { 0xFFFF, 8, "lsc" }, { 0xFFFF, 12, "tb0" }, + { 0xFFFF, 13, "tb1" }, { 0xFFFF, 14, "tb2" }, { 0xFFFF, 15, "tb3" }, +}; + +static struct decode_lut_element lut_fault_source_shader_w_t_major_11[] = { + { 0xFFFF, 0, "pcb" }, { 0xFFFF, 8, "lsc" }, { 0xFFFF, 12, "tb0" }, + { 0xFFFF, 13, "tb1" }, { 0xFFFF, 14, "tb2" }, { 0xFFFF, 15, "tb3" }, +}; + +static struct decode_lut_element lut_fault_source_shader_w_t_major_12[] = { + { 0xFFFF, 0, "pcb" }, { 0xFFFF, 8, "lsc" }, { 0xFFFF, 12, "tb0" }, + { 0xFFFF, 13, "tb1" }, { 0xFFFF, 14, "tb2" }, { 0xFFFF, 15, "tb3" }, +}; + +static struct decode_lut_element lut_fault_source_tiler_r_t_major_10[] = { + { 0xFFFF, 0, "pf" }, + { 0xFFFF, 1, "pcache" }, + { 0xFFFF, 2, "tcu" }, + { 0xFFFF, 3, "idx" }, +}; + +static struct decode_lut_element lut_fault_source_tiler_r_t_major_11[] = { + { 0xFFFF, 0, "pf" }, + { 0xFFFF, 1, "pcache" }, + { 0xFFFF, 2, "tcu" }, + { 0xFFFF, 3, "idx" }, +}; + +static struct decode_lut_element lut_fault_source_tiler_r_t_major_12[] = { + { 0xFFFF, 0, "pf" }, + { 0xFFFF, 1, "pcache" }, + { 0xFFFF, 2, "tcu" }, + { 0xFFFF, 3, "idx" }, +}; + +static struct decode_lut_element lut_fault_source_tiler_w_t_major_10[] = { + { 0xFFFF, 1, "pcache_wb" }, + { 0xFFFF, 2, "tcu_pcb" }, +}; + +static struct decode_lut_element lut_fault_source_tiler_w_t_major_11[] = { + { 0xFFFF, 1, "pcache_wb" }, + { 0xFFFF, 2, "tcu_pcb" }, +}; + +static struct decode_lut_element lut_fault_source_tiler_w_t_major_12[] = { + { 0xFFFF, 1, "pcache_wb" }, + { 0xFFFF, 2, "tcu_pcb" }, +}; + + +const char *decode_fault_source_core_type_t_name(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_name_major_9, + NELEMS(lut_fault_source_core_type_t_name_major_9)); + break; + case 10: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_name_major_10, + NELEMS(lut_fault_source_core_type_t_name_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_name_major_11, + NELEMS(lut_fault_source_core_type_t_name_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_name_major_12, + NELEMS(lut_fault_source_core_type_t_name_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_core_type_t_desc(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_desc_major_9, + NELEMS(lut_fault_source_core_type_t_desc_major_9)); + break; + case 10: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_desc_major_10, + NELEMS(lut_fault_source_core_type_t_desc_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_desc_major_11, + NELEMS(lut_fault_source_core_type_t_desc_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_type_t_desc_major_12, + NELEMS(lut_fault_source_core_type_t_desc_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_core_id_t_desc(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_id_t_desc_major_9, + NELEMS(lut_fault_source_core_id_t_desc_major_9)); + break; + case 10: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_id_t_desc_major_10, + NELEMS(lut_fault_source_core_id_t_desc_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_id_t_desc_major_11, + NELEMS(lut_fault_source_core_id_t_desc_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, + lut_fault_source_core_id_t_desc_major_12, + NELEMS(lut_fault_source_core_id_t_desc_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_core_id_t_core_type(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup( + min_rev, idx, lut_fault_source_core_id_t_core_type_major_9, + NELEMS(lut_fault_source_core_id_t_core_type_major_9)); + break; + case 10: + ret = decode_lut_element_lookup( + min_rev, idx, lut_fault_source_core_id_t_core_type_major_10, + NELEMS(lut_fault_source_core_id_t_core_type_major_10)); + break; + case 11: + ret = decode_lut_element_lookup( + min_rev, idx, lut_fault_source_core_id_t_core_type_major_11, + NELEMS(lut_fault_source_core_id_t_core_type_major_11)); + break; + case 12: + ret = decode_lut_element_lookup( + min_rev, idx, lut_fault_source_core_id_t_core_type_major_12, + NELEMS(lut_fault_source_core_id_t_core_type_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_shader_r_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_r_t_major_9, + NELEMS(lut_fault_source_shader_r_t_major_9)); + break; + case 10: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_r_t_major_10, + NELEMS(lut_fault_source_shader_r_t_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_r_t_major_11, + NELEMS(lut_fault_source_shader_r_t_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_r_t_major_12, + NELEMS(lut_fault_source_shader_r_t_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_shader_w_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 9: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_w_t_major_9, + NELEMS(lut_fault_source_shader_w_t_major_9)); + break; + case 10: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_w_t_major_10, + NELEMS(lut_fault_source_shader_w_t_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_w_t_major_11, + NELEMS(lut_fault_source_shader_w_t_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_w_t_major_12, + NELEMS(lut_fault_source_shader_w_t_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_tiler_r_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 10: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_r_t_major_10, + NELEMS(lut_fault_source_tiler_r_t_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_r_t_major_11, + NELEMS(lut_fault_source_tiler_r_t_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_r_t_major_12, + NELEMS(lut_fault_source_tiler_r_t_major_12)); + break; + } + return ret; +} + +const char *decode_fault_source_tiler_w_t(u16 idx, u32 gpu_id) +{ + u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id); + const char *ret = "unknown"; + + switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) { + case 10: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_w_t_major_10, + NELEMS(lut_fault_source_tiler_w_t_major_10)); + break; + case 11: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_w_t_major_11, + NELEMS(lut_fault_source_tiler_w_t_major_11)); + break; + case 12: + ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_w_t_major_12, + NELEMS(lut_fault_source_tiler_w_t_major_12)); + break; + } + return ret; +} diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.h new file mode 100644 index 000000000000..2b0ca5659a6c --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_H_ +#define _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_H_ + +#include + +/** + * decode_fault_source_core_id_t_desc() - Get core description of a + * fault in a human readable format. + * + * @idx: Core ID part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: core ID of the fault in human readable format. + */ +const char *decode_fault_source_core_id_t_desc(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_core_id_t_core_type() - Get core type of a + * fault in a human readable format. + * + * @idx: Core ID part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: core type of the fault in human readable format. + */ +const char *decode_fault_source_core_id_t_core_type(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_core_type_t_name() - Get core type name of a + * fault. + * + * @idx: Core type part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: core type short name of the fault. + */ +const char *decode_fault_source_core_type_t_name(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_core_type_t_desc() - Get core type description of a + * fault. + * + * @idx: Core type part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: core type description of the fault. + */ +const char *decode_fault_source_core_type_t_desc(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_shader_r_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for read + * operations on a shader core. + */ +const char *decode_fault_source_shader_r_t(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_shader_w_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for write + * operations on a shader core. + */ +const char *decode_fault_source_shader_w_t(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_tiler_r_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for read + * operations on a tiler core. + */ +const char *decode_fault_source_tiler_r_t(u16 idx, u32 gpu_id); + +/** + * decode_fault_source_tiler_w_t() - Get internal requester of a + * fault in a human readable format. + * + * @idx: Internal requester part of SOURCE_ID field of the fault. + * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev. + * + * Return: Internal requester of a fault in a human readable format for write + * operations on a tiler core. + */ +const char *decode_fault_source_tiler_w_t(u16 idx, u32 gpu_id); + +#endif /* _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c index d19579da2f5d..e3ad78daed59 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c @@ -32,7 +32,7 @@ */ #define ENTRY_IS_ATE_L3 3ULL #define ENTRY_IS_ATE_L02 1ULL -#define ENTRY_IS_INVAL 2ULL +#define ENTRY_IS_INVAL 0ULL #define ENTRY_IS_PTE 3ULL #define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c index 2a5030745586..d0342af60fb3 100644 --- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c +++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/tests/Kbuild b/drivers/gpu/arm/bifrost/tests/Kbuild index 72ca70ac8779..479b91532ed7 100644 --- a/drivers/gpu/arm/bifrost/tests/Kbuild +++ b/drivers/gpu/arm/bifrost/tests/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017-2024 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_kprobe.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_kprobe.h index f75cd776c60e..d8c3ca88166b 100644 --- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_kprobe.h +++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_kprobe.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,8 @@ #ifndef _KUTF_KPROBE_H_ #define _KUTF_KPROBE_H_ +struct dentry; + int kutf_kprobe_init(struct dentry *base_dir); void kutf_kprobe_exit(void); @@ -30,4 +32,6 @@ typedef void (*kutf_kp_handler)(int argc, char **argv); void kutf_kp_sample_handler(int argc, char **argv); void kutf_kp_sample_kernel_function(void); +void kutf_kp_delay_handler(int argc, char **argv); + #endif /* _KUTF_KPROBE_H_ */ diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_kprobe.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_kprobe.c index f118692c43a1..232809e1ed58 100644 --- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_kprobe.c +++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_kprobe.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,7 @@ #include #include #include +#include #include #define KUTF_KP_REG_MIN_ARGS 3 @@ -86,6 +87,19 @@ const struct file_operations kutf_kp_unreg_debugfs_fops = { struct kprobe kutf_kallsym_kp = { .symbol_name = "kallsyms_lookup_name" }; +void kutf_kp_delay_handler(int argc, char **argv) +{ + long delay; + + if ((!argv) || (!argv[0])) + return; + + if (kstrtol(argv[0], 0, &delay)) + return; + + mdelay(delay); +} + void kutf_kp_sample_kernel_function(void) { pr_debug("%s called\n", __func__); @@ -150,11 +164,9 @@ static ssize_t kutf_kp_reg_debugfs_write(struct file *file, const char __user *u if (count >= KUTF_KP_WRITE_BUFSIZE) return -EINVAL; - kbuf = memdup_user(user_buf, count); - if (IS_ERR(kbuf)) { + kbuf = memdup_user_nul(user_buf, count); + if (IS_ERR(kbuf)) return -ENOMEM; - } - kbuf[count - 1] = '\0'; argv = argv_split(GFP_KERNEL, kbuf, &argc); if (!argv) { @@ -245,11 +257,9 @@ static ssize_t kutf_kp_unreg_debugfs_write(struct file *file, const char __user if (count >= KUTF_KP_WRITE_BUFSIZE) return -EINVAL; - kbuf = memdup_user(user_buf, count); - if (IS_ERR(kbuf)) { + kbuf = memdup_user_nul(user_buf, count); + if (IS_ERR(kbuf)) return -ENOMEM; - } - kbuf[count - 1] = '\0'; argv = argv_split(GFP_KERNEL, kbuf, &argc); if (!argv) { diff --git a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c index 1592eab806ac..cfb347affa2e 100644 --- a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c +++ b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c @@ -20,18 +20,169 @@ * kbase_context_get_unmapped_area() interface. */ +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) +/** + * move_mt_gap() - Search the maple tree for an existing gap of a particular size + * immediately before another pre-identified gap. + * @gap_start: Pre-identified gap starting address. + * @gap_end: Pre-identified gap ending address. + * @size: Size of the new gap needed before gap_start. + * + * This function will search the calling process' maple tree + * for another gap, one that is immediately preceding the pre-identified + * gap, for a specific size, and upon success it will decrement gap_end + * by the specified size, and replace gap_start with the new gap_start of + * the newly identified gap. + * + * Return: true if large enough preceding gap is found, false otherwise. + */ +static bool move_mt_gap(unsigned long *gap_start, unsigned long *gap_end, unsigned long size) +{ + unsigned long new_gap_start, new_gap_end; + + MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); + + if (*gap_end < size) + return false; + + /* Calculate the gap end for the new, resultant gap */ + new_gap_end = *gap_end - size; + + /* If the new gap_end (i.e. new VA start address) is larger than gap_start, than the + * pre-identified gap already has space to shrink to accommodate the decrease in + * gap_end. + */ + if (new_gap_end >= *gap_start) { + /* Pre-identified gap already has space - just patch gap_end to new + * lower value and exit. + */ + *gap_end = new_gap_end; + return true; + } + + /* Since the new VA start address (new_gap_end) is below the start of the pre-identified + * gap in the maple tree, see if there is a free gap directly before the existing gap, of + * the same size as the alignment shift, such that the effective gap found is "extended". + * This may be larger than needed but leaves the same distance between gap_end and gap_start + * that currently exists. + */ + new_gap_start = *gap_start - size; + if (mas_empty_area_rev(&mas, new_gap_start, *gap_start - 1, size)) { + /* There's no gap between the new start address needed and the + * current start address - so return false to find a new + * gap from the maple tree. + */ + return false; + } + /* Suitable gap found - replace gap_start and gap_end with new values. gap_start takes the + * value of the start of new gap found, which now correctly precedes gap_end, and gap_end + * takes on the new aligned value that has now been decremented by the requested size. + */ + *gap_start = mas.index; + *gap_end = new_gap_end; + return true; +} + /** * align_and_check() - Align the specified pointer to the provided alignment and - * check that it is still in range. - * @gap_end: Highest possible start address for allocation (end of gap in - * address space) - * @gap_start: Start address of current memory area / gap in address space - * @info: vm_unmapped_area_info structure passed to caller, containing - * alignment, length and limits for the allocation - * @is_shader_code: True if the allocation is for shader code (which has - * additional alignment requirements) - * @is_same_4gb_page: True if the allocation needs to reside completely within - * a 4GB chunk + * check that it is still in range. On kernel 6.1 onwards + * this function does not require that the initial requested + * gap is extended with the maximum size needed to guarantee + * an alignment. + * @gap_end: Highest possible start address for allocation (end of gap in + * address space) + * @gap_start: Start address of current memory area / gap in address space + * @info: vm_unmapped_area_info structure passed to caller, containing + * alignment, length and limits for the allocation + * @is_shader_code: True if the allocation is for shader code (which has + * additional alignment requirements) + * @is_same_4gb_page: True if the allocation needs to reside completely within + * a 4GB chunk + * + * Return: true if gap_end is now aligned correctly and is still in range, + * false otherwise + */ +static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, + struct vm_unmapped_area_info *info, bool is_shader_code, + bool is_same_4gb_page) +{ + unsigned long alignment_shift; + + /* Compute highest gap address at the desired alignment */ + *gap_end -= info->length; + alignment_shift = (*gap_end - info->align_offset) & info->align_mask; + + /* Align desired start VA (gap_end) by calculated alignment shift amount */ + if (!move_mt_gap(&gap_start, gap_end, alignment_shift)) + return false; + /* Alignment is done so far - check for further alignment requirements */ + + if (is_shader_code) { + /* Shader code allocations must not start or end on a 4GB boundary */ + alignment_shift = info->align_offset ? info->align_offset : info->length; + if (0 == (*gap_end & BASE_MEM_MASK_4GB)) { + if (!move_mt_gap(&gap_start, gap_end, alignment_shift)) + return false; + } + if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) { + if (!move_mt_gap(&gap_start, gap_end, alignment_shift)) + return false; + } + + if (!(*gap_end & BASE_MEM_MASK_4GB) || + !((*gap_end + info->length) & BASE_MEM_MASK_4GB)) + return false; + } else if (is_same_4gb_page) { + unsigned long start = *gap_end; + unsigned long end = *gap_end + info->length; + unsigned long mask = ~((unsigned long)U32_MAX); + + /* Check if 4GB boundary is straddled */ + if ((start & mask) != ((end - 1) & mask)) { + unsigned long offset = end - (end & mask); + /* This is to ensure that alignment doesn't get + * disturbed in an attempt to prevent straddling at + * 4GB boundary. The GPU VA is aligned to 2MB when the + * allocation size is > 2MB and there is enough CPU & + * GPU virtual space. + */ + unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1); + + if (!move_mt_gap(&gap_start, gap_end, rounded_offset)) + return false; + /* Re-calculate start and end values */ + start = *gap_end; + end = *gap_end + info->length; + + /* The preceding 4GB boundary shall not get straddled, + * even after accounting for the alignment, as the + * size of allocation is limited to 4GB and the initial + * start location was already aligned. + */ + WARN_ON((start & mask) != ((end - 1) & mask)); + } + } + + if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) + return false; + + return true; +} +#else +/** + * align_and_check() - Align the specified pointer to the provided alignment and + * check that it is still in range. For Kernel versions below + * 6.1, it requires that the length of the alignment is already + * extended by a worst-case alignment mask. + * @gap_end: Highest possible start address for allocation (end of gap in + * address space) + * @gap_start: Start address of current memory area / gap in address space + * @info: vm_unmapped_area_info structure passed to caller, containing + * alignment, length and limits for the allocation + * @is_shader_code: True if the allocation is for shader code (which has + * additional alignment requirements) + * @is_same_4gb_page: True if the allocation needs to reside completely within + * a 4GB chunk * * Return: true if gap_end is now aligned correctly and is still in range, * false otherwise @@ -41,8 +192,8 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, bool is_same_4gb_page) { /* Compute highest gap address at the desired alignment */ - (*gap_end) -= info->length; - (*gap_end) -= (*gap_end - info->align_offset) & info->align_mask; + *gap_end -= info->length; + *gap_end -= (*gap_end - info->align_offset) & info->align_mask; if (is_shader_code) { /* Check for 4GB boundary */ @@ -73,6 +224,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, start -= rounded_offset; end -= rounded_offset; + /* Patch gap_end to use new starting address for VA region */ *gap_end = start; /* The preceding 4GB boundary shall not get straddled, @@ -89,6 +241,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, return true; } +#endif /** * kbase_unmapped_area_topdown() - allocates new areas top-down from @@ -218,31 +371,27 @@ check_current: } } #else - unsigned long length, high_limit, gap_start, gap_end; + unsigned long high_limit, gap_start, gap_end; MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); - /* Adjust search length to account for worst case alignment overhead */ - length = info->length + info->align_mask; - if (length < info->length) - return -ENOMEM; /* * Adjust search limits by the desired length. * See implementation comment at top of unmapped_area(). */ gap_end = info->high_limit; - if (gap_end < length) + if (gap_end < info->length) return -ENOMEM; - high_limit = gap_end - length; + high_limit = gap_end - info->length; if (info->low_limit > high_limit) return -ENOMEM; while (true) { - if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length)) + if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, info->length)) return -ENOMEM; gap_end = mas.last + 1; - gap_start = mas.min; + gap_start = mas.index; if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page)) return gap_end; @@ -368,7 +517,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, kbase_gpu_vm_unlock(kctx); #ifndef CONFIG_64BIT } else { - return current->mm->get_unmapped_area(kctx->kfile->filp, addr, len, pgoff, flags); + return current->mm->get_unmapped_area(kctx->filp, addr, len, pgoff, flags); #endif } diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c index 742735846d49..34cabbd6e535 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h index b2cbfe6e528d..987d4d3dbf27 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/hwtracing/coresight/mali/Makefile b/drivers/hwtracing/coresight/mali/Makefile index 923cb0c910d9..d8186bee6e64 100644 --- a/drivers/hwtracing/coresight/mali/Makefile +++ b/drivers/hwtracing/coresight/mali/Makefile @@ -79,9 +79,9 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) endif EXTRA_SYMBOLS += \ - $(M)/../../../base/arm/Module.symvers \ $(GPU_SYMBOLS) + # The following were added to align with W=1 in scripts/Makefile.extrawarn # from the Linux source tree CFLAGS_MODULE += -Wall -Werror @@ -99,6 +99,8 @@ CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation) CFLAGS_MODULE += -Wno-missing-field-initializers CFLAGS_MODULE += -Wno-sign-compare CFLAGS_MODULE += -Wno-type-limits +# The following ensures the stack frame does not get larger than a page +CFLAGS_MODULE += -Wframe-larger-than=4096 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 diff --git a/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c index e6d2dc71096b..247a8b47f05b 100644 --- a/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c +++ b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,7 +50,11 @@ static void coresight_mali_disable_source(struct coresight_device *csdev, struct } static const struct coresight_ops_source coresight_mali_source_ops = { +#if KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE + .cpu_id = coresight_mali_source_trace_id, +#else .trace_id = coresight_mali_source_trace_id, +#endif .enable = coresight_mali_enable_source, .disable = coresight_mali_disable_source }; diff --git a/drivers/xen/arm/Makefile b/drivers/xen/arm/Makefile index b2ee53723428..27bee59ac787 100644 --- a/drivers/xen/arm/Makefile +++ b/drivers/xen/arm/Makefile @@ -78,6 +78,8 @@ CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation) CFLAGS_MODULE += -Wno-missing-field-initializers CFLAGS_MODULE += -Wno-sign-compare CFLAGS_MODULE += -Wno-type-limits +# The following ensures the stack frame does not get larger than a page +CFLAGS_MODULE += -Wframe-larger-than=4096 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 diff --git a/include/linux/mali_arbiter_interface.h b/include/linux/mali_arbiter_interface.h index b4162f86ebb4..ae44e82ae6dd 100644 --- a/include/linux/mali_arbiter_interface.h +++ b/include/linux/mali_arbiter_interface.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,6 +26,8 @@ #ifndef _MALI_KBASE_ARBITER_INTERFACE_H_ #define _MALI_KBASE_ARBITER_INTERFACE_H_ +#include + /** * DOC: Mali arbiter interface version * diff --git a/include/linux/mali_hw_access.h b/include/linux/mali_hw_access.h new file mode 100644 index 000000000000..4ed9da994e57 --- /dev/null +++ b/include/linux/mali_hw_access.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_HW_ACCESS_H_ +#define _MALI_HW_ACCESS_H_ + +#include +#include + + +#define mali_readl(addr) readl(addr) + +#define mali_writel(val, addr) writel(val, addr) + +#define mali_readq(addr) ((u64)mali_readl(addr) | ((u64)mali_readl(addr + 4) << 32)) + +static inline u64 mali_readq_coherent(const void __iomem *addr) +{ + u32 hi1, hi2, lo; + + do { + hi1 = mali_readl(addr + 4); + lo = mali_readl(addr); + hi2 = mali_readl(addr + 4); + } while (hi1 != hi2); + + return lo | (((u64)hi1) << 32); +} + +#define mali_writeq(val, addr) \ + do { \ + mali_writel(val & 0xFFFFFFFF, addr); \ + mali_writel(val >> 32, addr + 4); \ + } while (0) + +#define mali_ioremap(addr, size) ioremap(addr, size) + +#define mali_iounmap(addr) iounmap(addr) + +#define mali_arch_timer_get_cntfrq() arch_timer_get_cntfrq() + + +#endif /* _MALI_HW_ACCESS_H_ */ diff --git a/include/linux/memory_group_manager.h b/include/linux/memory_group_manager.h index 3820f1bff86b..e92d3dea0178 100644 --- a/include/linux/memory_group_manager.h +++ b/include/linux/memory_group_manager.h @@ -32,6 +32,10 @@ typedef int vm_fault_t; #define MEMORY_GROUP_MANAGER_NR_GROUPS (16) +#define PTE_PBHA_SHIFT (59) +#define PTE_PBHA_MASK ((uint64_t)0xf << PTE_PBHA_SHIFT) +#define PTE_RES_BIT_MULTI_AS_SHIFT (63) + struct memory_group_manager_device; struct memory_group_manager_import_data; diff --git a/include/linux/version_compat_defs.h b/include/linux/version_compat_defs.h index 366b50c4e3ec..46f227e108f6 100644 --- a/include/linux/version_compat_defs.h +++ b/include/linux/version_compat_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -348,4 +348,50 @@ static inline long kbase_pin_user_pages_remote(struct task_struct *tsk, struct m #endif /* (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) */ +#if (KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE) +/* Null definition */ +#define ALLOW_ERROR_INJECTION(fname, err_type) +#endif /* (KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE) */ + +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE +#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) register_shrinker(reclaim) + +#elif ((KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE) && \ + !(defined(__ANDROID_COMMON_KERNEL__) && (KERNEL_VERSION(6, 6, 0) == LINUX_VERSION_CODE))) +#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) register_shrinker(reclaim, name) + +#else +#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) \ + do { \ + reclaim->private_data = priv_data; \ + shrinker_register(reclaim); \ + } while (0) + +#endif /* KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE */ + +#if ((KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE) && \ + !(defined(__ANDROID_COMMON_KERNEL__) && (KERNEL_VERSION(6, 6, 0) == LINUX_VERSION_CODE))) +#define KBASE_UNREGISTER_SHRINKER(reclaim) unregister_shrinker(&reclaim) +#define KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, type, var) container_of(s, type, var) +#define DEFINE_KBASE_SHRINKER struct shrinker +#define KBASE_INIT_RECLAIM(var, attr, name) (&((var)->attr)) +#define KBASE_SET_RECLAIM(var, attr, reclaim) ((var)->attr = (*reclaim)) + +#else +#define KBASE_UNREGISTER_SHRINKER(reclaim) shrinker_free(reclaim) +#define KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, type, var) s->private_data +#define DEFINE_KBASE_SHRINKER struct shrinker * +#define KBASE_SHRINKER_ALLOC(name) shrinker_alloc(0, name) +#define KBASE_INIT_RECLAIM(var, attr, name) (KBASE_SHRINKER_ALLOC(name)) +#define KBASE_SET_RECLAIM(var, attr, reclaim) ((var)->attr = reclaim) + +#endif + +#if (KERNEL_VERSION(4, 20, 0) <= LINUX_VERSION_CODE) +#include +#endif +#ifndef __maybe_unused +#define __maybe_unused __attribute__((unused)) +#endif + #endif /* _VERSION_COMPAT_DEFS_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h index 564f477e57d1..b80817f04255 100644 --- a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h +++ b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h index 0fb824267184..b4b38f242bd2 100644 --- a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h +++ b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,7 +46,11 @@ */ #define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19) -#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20) +/* Unused bit for CSF, only used in JM for BASE_MEM_TILER_ALIGN_TOP */ +#define BASE_MEM_UNUSED_BIT_20 ((base_mem_alloc_flags)1 << 20) + +/* Unused bit for CSF, only used in JM for BASE_MEM_FLAG_MAP_FIXED */ +#define BASE_MEM_UNUSED_BIT_27 ((base_mem_alloc_flags)1 << 27) /* Must be FIXABLE memory: its GPU VA will be determined at a later point, * at which time it will be at a fixed GPU VA. @@ -62,9 +66,14 @@ */ #define BASEP_MEM_FLAGS_KERNEL_ONLY (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE) -/* A mask of all currently reserved flags - */ -#define BASE_MEM_FLAGS_RESERVED BASE_MEM_RESERVED_BIT_20 +/* A mask of all flags that should not be queried */ +#define BASE_MEM_DONT_QUERY (BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED) + +/* A mask of all currently reserved flags */ +#define BASE_MEM_FLAGS_RESERVED ((base_mem_alloc_flags)0) + +/* A mask of all bits that are not used by a flag on CSF */ +#define BASE_MEM_FLAGS_UNUSED (BASE_MEM_UNUSED_BIT_20 | BASE_MEM_UNUSED_BIT_27) /* Special base mem handles specific to CSF. */ @@ -474,7 +483,26 @@ struct base_gpu_queue_error_fatal_payload { }; /** - * enum base_gpu_queue_group_error_type - GPU Fatal error type. + * struct base_gpu_queue_error_fault_payload - Recoverable fault + * error information related to GPU command queue. + * + * @sideband: Additional information about this recoverable fault. + * @status: Recoverable fault information. + * This consists of exception type (least significant byte) and + * data (remaining bytes). One example of exception type is + * INSTR_INVALID_PC (0x50). + * @csi_index: Index of the CSF interface the queue is bound to. + * @padding: Padding to make multiple of 64bits + */ +struct base_gpu_queue_error_fault_payload { + __u64 sideband; + __u32 status; + __u8 csi_index; + __u8 padding[3]; +}; + +/** + * enum base_gpu_queue_group_error_type - GPU error type. * * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL: Fatal error associated with GPU * command queue group. @@ -484,7 +512,9 @@ struct base_gpu_queue_error_fatal_payload { * progress timeout. * @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out * of tiler heap memory. - * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of fatal error types + * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT: Fault error associated with GPU + * command queue. + * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of GPU error types * * This type is used for &struct_base_gpu_queue_group_error.error_type. */ @@ -493,6 +523,7 @@ enum base_gpu_queue_group_error_type { BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, + BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT, BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT }; @@ -512,6 +543,7 @@ struct base_gpu_queue_group_error { union { struct base_gpu_queue_group_error_fatal_payload fatal_group; struct base_gpu_queue_error_fatal_payload fatal_queue; + struct base_gpu_queue_error_fault_payload fault_queue; } payload; }; @@ -519,8 +551,7 @@ struct base_gpu_queue_group_error { * enum base_csf_notification_type - Notification type * * @BASE_CSF_NOTIFICATION_EVENT: Notification with kernel event - * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU fatal - * error + * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU error * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP: Notification with dumping cpu * queue * @BASE_CSF_NOTIFICATION_COUNT: The number of notification type diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h index 537c90d6efa5..28e7db49f676 100644 --- a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,15 +95,31 @@ * 1.22: * - Add comp_pri_threshold and comp_pri_ratio attributes to * kbase_ioctl_cs_queue_group_create. + * - Made the BASE_MEM_DONT_NEED memory flag queryable. * 1.23: * - Disallows changing the sharability on the GPU of imported dma-bufs to * BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE. * 1.24: * - Implement full block state support for hardware counters. + * 1.25: + * - Add support for CS_FAULT reporting to userspace + * 1.26: + * - Made the BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP and BASE_MEM_KERNEL_SYNC memory + * flags queryable. + * 1.27: + * - Implement support for HWC block state availability. + * 1.28: + * - Made the SAME_VA memory flag queryable. + * 1.29: + * - Re-allow child process to do supported file operations (like mmap, ioctl + * read, poll) on the file descriptor of mali device that was inherited + * from the parent process. + * 1.30: + * - Implement support for setting GPU Timestamp Offset register. */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 24 +#define BASE_UK_VERSION_MINOR 30 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -340,6 +356,8 @@ union kbase_ioctl_cs_queue_group_create_1_18 { * @in.csi_handlers: Flags to signal that the application intends to use CSI * exception handlers in some linear buffers to deal with * the given exception types. + * @in.cs_fault_report_enable: Flag to indicate reporting of CS_FAULTs + * to userspace. * @in.padding: Currently unused, must be zero * @out: Output parameters * @out.group_handle: Handle of a newly created queue group. @@ -360,7 +378,8 @@ union kbase_ioctl_cs_queue_group_create { /** * @in.reserved: Reserved, currently unused, must be zero. */ - __u16 reserved; + __u8 reserved; + __u8 cs_fault_report_enable; /** * @in.dvs_buf: buffer for deferred vertex shader */ @@ -480,7 +499,7 @@ union kbase_ioctl_cs_tiler_heap_init { /** * union kbase_ioctl_cs_tiler_heap_init_1_13 - Initialize chunked tiler memory heap, - * earlier version upto 1.13 + * earlier version up to 1.13 * @in: Input parameters * @in.chunk_size: Size of each chunk. * @in.initial_chunks: Initial number of chunks that heap will be created with. @@ -637,6 +656,22 @@ union kbase_ioctl_read_user_page { #define KBASE_IOCTL_READ_USER_PAGE _IOWR(KBASE_IOCTL_TYPE, 60, union kbase_ioctl_read_user_page) +/** + * struct kbase_ioctl_queue_group_clear_faults - Re-enable CS FAULT reporting for the GPU queues + * + * @addr: CPU VA to an array of GPU VAs of the buffers backing the queues + * @nr_queues: Number of queues in the array + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_queue_group_clear_faults { + __u64 addr; + __u32 nr_queues; + __u8 padding[4]; +}; + +#define KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS \ + _IOW(KBASE_IOCTL_TYPE, 61, struct kbase_ioctl_queue_group_clear_faults) + /*************** * test ioctls * ***************/ diff --git a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h index d3478546e244..d4d12aed780d 100644 --- a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h +++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h b/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h index 9478334ce667..b5351c50aa30 100644 --- a/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h +++ b/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h @@ -30,15 +30,11 @@ * See base_mem_alloc_flags. */ -/* Used as BASE_MEM_FIXED in other backends */ -#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) +/* Unused bit for JM, only used in CSF for BASE_MEM_FIXED */ +#define BASE_MEM_UNUSED_BIT_8 ((base_mem_alloc_flags)1 << 8) -/** - * BASE_MEM_RESERVED_BIT_19 - Bit 19 is reserved. - * - * Do not remove, use the next unreserved bit for new flags - */ -#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) +/* Unused bit for JM, only used in CSF for BASE_CSF_EVENT */ +#define BASE_MEM_UNUSED_BIT_19 ((base_mem_alloc_flags)1 << 19) /** * BASE_MEM_TILER_ALIGN_TOP - Memory starting from the end of the initial commit is aligned @@ -64,9 +60,14 @@ (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | BASE_MEM_FLAG_MAP_FIXED | \ BASEP_MEM_PERFORM_JIT_TRIM) -/* A mask of all currently reserved flags - */ -#define BASE_MEM_FLAGS_RESERVED (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) +/* A mask of all flags that should not be queried */ +#define BASE_MEM_DONT_QUERY (BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED) + +/* A mask of all currently reserved flags */ +#define BASE_MEM_FLAGS_RESERVED ((base_mem_alloc_flags)0) + +/* A mask of all bits that are not used by a flag on JM */ +#define BASE_MEM_FLAGS_UNUSED (BASE_MEM_UNUSED_BIT_8 | BASE_MEM_UNUSED_BIT_19) /* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the * initial commit is aligned to 'extension' pages, where 'extension' must be a power diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h index 2a7a06a995be..1c115adb8172 100644 --- a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -149,15 +149,25 @@ * from the parent process. * 11.40: * - Remove KBASE_IOCTL_HWCNT_READER_SETUP and KBASE_HWCNT_READER_* ioctls. + * - Made the BASE_MEM_DONT_NEED memory flag queryable. * 11.41: * - Disallows changing the sharability on the GPU of imported dma-bufs to * BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE. * 11.42: * - Implement full block state support for hardware counters. + * 11.43: + * - Made the BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP and BASE_MEM_KERNEL_SYNC memory + * flags queryable. + * 11.44: + * - Made the SAME_VA memory flag queryable. + * 11.45: + * - Re-allow child process to do supported file operations (like mmap, ioctl + * read, poll) on the file descriptor of mali device that was inherited + * from the parent process. */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 42 +#define BASE_UK_VERSION_MINOR 45 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/include/uapi/gpu/arm/bifrost/mali_base_kernel.h b/include/uapi/gpu/arm/bifrost/mali_base_kernel.h index cb1a1e8dd550..198d6b8a3942 100644 --- a/include/uapi/gpu/arm/bifrost/mali_base_kernel.h +++ b/include/uapi/gpu/arm/bifrost/mali_base_kernel.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -89,10 +89,9 @@ typedef __u32 base_mem_alloc_flags; /* A mask of all the flags that can be returned via the base_mem_get_flags() * interface. */ -#define BASE_MEM_FLAGS_QUERYABLE \ - (BASE_MEM_FLAGS_INPUT_MASK & \ - ~(BASE_MEM_SAME_VA | BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED | \ - BASE_MEM_FLAGS_RESERVED | BASEP_MEM_FLAGS_KERNEL_ONLY)) +#define BASE_MEM_FLAGS_QUERYABLE \ + (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_DONT_QUERY | BASE_MEM_FLAGS_RESERVED | \ + BASE_MEM_FLAGS_UNUSED | BASEP_MEM_FLAGS_KERNEL_ONLY)) /** * enum base_mem_import_type - Memory types supported by @a base_mem_import @@ -619,15 +618,15 @@ struct base_gpu_props { #define BASE_TIMEINFO_TIMESTAMP_FLAG (1U << 1) /* For GPU cycle counter */ #define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1U << 2) -/* Specify kernel GPU register timestamp */ -#define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1U << 30) -/* Specify userspace cntvct_el0 timestamp source */ -#define BASE_TIMEINFO_USER_SOURCE_FLAG (1U << 31) -#define BASE_TIMEREQUEST_ALLOWED_FLAGS \ - (BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG | \ - BASE_TIMEINFO_CYCLE_COUNTER_FLAG | BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ - BASE_TIMEINFO_USER_SOURCE_FLAG) +/* Specify TimeReques flags allowed if time source is cpu/gpu register */ +#define BASE_TIMEREQUEST_CPU_GPU_SRC_ALLOWED_FLAGS \ + (BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG | \ + BASE_TIMEINFO_CYCLE_COUNTER_FLAG) + +/* Specify TimeReques flags allowed if time source is system(user) space */ +#define BASE_TIMEREQUEST_SYSTEM_SRC_ALLOWED_FLAGS \ + (BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG) /* Maximum number of source allocations allowed to create an alias allocation. * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array