MALI: rockchip: Add separate src directory for Valhall driver from DDK g28p0-00eac0

Previously, Valhall and Bifrost GPUs shared a single driver source directory (drivers/gpu/arm/bifrost). However, starting from DDK r52 (g27), Bifrost GPUs are no longer supported. As a result, the Valhall GPU driver from DDK r53 (g28) must use a separate source directory (drivers/gpu/arm/valhall). There are also modifications in some header files outside of drivers/gpu/arm/. In addition, the configs related to Bifrost and Valhall GPUs have been removed from the defconfig file like rockchip_linux_defconfig, which does not reflect the current SoC. Instead, these configs have been migrated to the .config files such as rk3576.config, whose file names can reflect the current SoC. Therefore, for some SoCs, the kernel compilation command line needs to be adjusted. Change-Id: I0c4384212b4b679a728401f7f89ae839530f002b Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
2026-06-05 18:41:58 +09:00 · 2024-09-24 17:19:46 +08:00
parent 74328f507f
commit e54469c723
486 changed files with 187883 additions and 186 deletions
--- a/include/uapi/gpu/arm/valhall/backend/gpu/mali_kbase_model_dummy.h
+++ b/include/uapi/gpu/arm/valhall/backend/gpu/mali_kbase_model_dummy.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Dummy Model interface
+ */
+
+#ifndef _UAPI_KBASE_MODEL_DUMMY_H_
+#define _UAPI_KBASE_MODEL_DUMMY_H_
+
+#include <linux/types.h>
+
+#define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4)
+#if MALI_USE_CSF
+#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (65)
+#else /* MALI_USE_CSF */
+#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (60)
+#endif /* MALI_USE_CSF */
+#define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT (4)
+#define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \
+	(enable_mask & (1U << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT)))
+
+#define KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK 4
+#define KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK KBASE_DUMMY_MODEL_COUNTER_PER_CORE
+#define KBASE_DUMMY_MODEL_VALUES_PER_BLOCK \
+	(KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK + KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK)
+#define KBASE_DUMMY_MODEL_BLOCK_SIZE (KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32))
+#define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS 8
+#define KBASE_DUMMY_MODEL_MAX_SHADER_CORES 32
+#define KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS 0
+#define KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS \
+	(1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES)
+#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \
+	(KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS + KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS)
+#define KBASE_DUMMY_MODEL_COUNTER_TOTAL \
+	(KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_COUNTER_PER_CORE)
+#define KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE \
+	(KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_VALUES_PER_BLOCK)
+#define KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE \
+	(KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE)
+
+/*
+ * Bit mask - no. bits set is no. cores
+ * Values obtained from talking to HW team
+ * Example: tODx has 10 cores, 0b11 1111 1111 -> 0x3FF
+ */
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX (0x7FFFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX (0x3FFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX (0x7FFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX (0xFFFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TKRX (0x1FFFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TDRX (0x3FFFull)
+#define DUMMY_IMPLEMENTATION_L2_PRESENT (0x1ull)
+#define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull)
+#define DUMMY_IMPLEMENTATION_STACK_PRESENT (0xFull)
+
+#define DUMMY_IMPLEMENTATION_BASE_PRESENT (0x3FFFull)
+#define DUMMY_IMPLEMENTATION_NEURAL_PRESENT DUMMY_IMPLEMENTATION_BASE_PRESENT
+
+#endif /* _UAPI_KBASE_MODEL_DUMMY_H_ */
--- a/include/uapi/gpu/arm/valhall/backend/gpu/mali_kbase_model_linux.h
+++ b/include/uapi/gpu/arm/valhall/backend/gpu/mali_kbase_model_linux.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Dummy Model interface
+ */
+
+#ifndef _UAPI_KBASE_MODEL_LINUX_H_
+#define _UAPI_KBASE_MODEL_LINUX_H_
+
+/* Generic model IRQs */
+#define MODEL_LINUX_JOB_IRQ (0x1 << 0)
+#define MODEL_LINUX_GPU_IRQ (0x1 << 1)
+#define MODEL_LINUX_MMU_IRQ (0x1 << 2)
+#define MODEL_LINUX_IRQAW_IRQ (0x1 << 3)
+
+#define MODEL_LINUX_IRQ_MASK \
+	(MODEL_LINUX_JOB_IRQ | MODEL_LINUX_GPU_IRQ | MODEL_LINUX_MMU_IRQ | MODEL_LINUX_IRQAW_IRQ)
+
+#endif /* _UAPI_KBASE_MODEL_LINUX_H_ */
--- a/include/uapi/gpu/arm/valhall/csf/mali_base_csf_kernel.h
+++ b/include/uapi/gpu/arm/valhall/csf/mali_base_csf_kernel.h
@@ -0,0 +1,610 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_BASE_CSF_KERNEL_H_
+#define _UAPI_BASE_CSF_KERNEL_H_
+
+#include <linux/types.h>
+#include "../mali_base_common_kernel.h"
+
+/* Valid set of just-in-time memory allocation flags */
+#define BASE_JIT_ALLOC_VALID_FLAGS ((__u8)0)
+
+/* flags for base context specific to CSF */
+
+/* Base context creates a CSF event notification thread.
+ *
+ * The creation of a CSF event notification thread is conditional but
+ * mandatory for the handling of CSF events.
+ */
+#define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2)
+
+/* Bitpattern describing the ::base_context_create_flags that can be
+ * passed to base_context_init()
+ */
+#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS                            \
+	(BASE_CONTEXT_CCTX_EMBEDDED | BASE_CONTEXT_CSF_EVENT_THREAD | \
+	 BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
+
+/* Flags for base tracepoint specific to CSF */
+
+/* Enable KBase tracepoints for CSF builds */
+#define BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS (1U << 2)
+
+/* Enable additional CSF Firmware side tracepoints */
+#define BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS (1U << 3)
+
+#define BASE_TLSTREAM_FLAGS_MASK                                                        \
+	(BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | BASE_TLSTREAM_JOB_DUMPING_ENABLED | \
+	 BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS | BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS)
+
+/* Number of pages mapped into the process address space for a bound GPU
+ * command queue. A pair of input/output pages and a Hw doorbell page
+ * are mapped to enable direct submission of commands to Hw.
+ */
+#define BASEP_QUEUE_NR_MMAP_USER_PAGES ((size_t)3)
+
+#define BASE_QUEUE_MAX_PRIORITY (15U)
+
+/* Sync32 object fields definition */
+#define BASEP_EVENT32_VAL_OFFSET (0U)
+#define BASEP_EVENT32_ERR_OFFSET (4U)
+#define BASEP_EVENT32_SIZE_BYTES (8U)
+
+/* Sync64 object fields definition */
+#define BASEP_EVENT64_VAL_OFFSET (0U)
+#define BASEP_EVENT64_ERR_OFFSET (8U)
+#define BASEP_EVENT64_SIZE_BYTES (16U)
+
+/* Sync32 object alignment, equal to its size */
+#define BASEP_EVENT32_ALIGN_BYTES (8U)
+
+/* Sync64 object alignment, equal to its size */
+#define BASEP_EVENT64_ALIGN_BYTES (16U)
+
+/* The upper limit for number of objects that could be waited/set per command.
+ * This limit is now enforced as internally the error inherit inputs are
+ * converted to 32-bit flags in a __u32 variable occupying a previously padding
+ * field.
+ */
+#define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32)
+
+/* Minimum number of queue group supported by the GPU */
+#define BASEP_QUEUE_GROUP_MIN 1
+/* Maximum number of queue group supported by the GPU */
+#define BASEP_QUEUE_GROUP_MAX 31
+/* Minimum number of GPU queues per queue groups supported by the driver */
+#define BASEP_GPU_QUEUE_PER_QUEUE_GROUP_MIN 8
+/* Maximum number of GPU queues per queue groups supported by the driver */
+#define BASEP_GPU_QUEUE_PER_QUEUE_GROUP_MAX 32
+
+/* CSF CSI EXCEPTION_HANDLER_FLAGS */
+#define BASE_CSF_TILER_OOM_EXCEPTION_FLAG (1u << 0)
+#define BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK (BASE_CSF_TILER_OOM_EXCEPTION_FLAG)
+
+/* Initial value for LATEST_FLUSH register */
+#define POWER_DOWN_LATEST_FLUSH_VALUE ((__u32)1)
+
+/**
+ * enum base_kcpu_command_type - Kernel CPU queue command type.
+ * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:       fence_signal,
+ * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:         fence_wait,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT:           cqs_wait,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_SET:            cqs_set,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:  cqs_set_operation,
+ * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:         map_import,
+ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:       unmap_import,
+ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force,
+ * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC:          jit_alloc,
+ * @BASE_KCPU_COMMAND_TYPE_JIT_FREE:           jit_free,
+ * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:      group_suspend,
+ * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:      error_barrier
+ */
+enum base_kcpu_command_type {
+	BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL,
+	BASE_KCPU_COMMAND_TYPE_FENCE_WAIT,
+	BASE_KCPU_COMMAND_TYPE_CQS_WAIT,
+	BASE_KCPU_COMMAND_TYPE_CQS_SET,
+	BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION,
+	BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION,
+	BASE_KCPU_COMMAND_TYPE_MAP_IMPORT,
+	BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT,
+	BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE,
+	BASE_KCPU_COMMAND_TYPE_JIT_ALLOC,
+	BASE_KCPU_COMMAND_TYPE_JIT_FREE,
+	BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND,
+	BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER
+};
+
+/**
+ * enum base_queue_group_priority - Priority of a GPU Command Queue Group.
+ * @BASE_QUEUE_GROUP_PRIORITY_HIGH:     GPU Command Queue Group is of high
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_MEDIUM:   GPU Command Queue Group is of medium
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_LOW:      GPU Command Queue Group is of low
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_REALTIME: GPU Command Queue Group is of real-time
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_COUNT:    Number of GPU Command Queue Group
+ *                                      priority levels.
+ *
+ * Currently this is in order of highest to lowest, but if new levels are added
+ * then those new levels may be out of order to preserve the ABI compatibility
+ * with previous releases. At that point, ensure assignment to
+ * the 'priority' member in &kbase_queue_group is updated to ensure it remains
+ * a linear ordering.
+ *
+ * There should be no gaps in the enum, otherwise use of
+ * BASE_QUEUE_GROUP_PRIORITY_COUNT in kbase must be updated.
+ */
+enum base_queue_group_priority {
+	BASE_QUEUE_GROUP_PRIORITY_HIGH = 0,
+	BASE_QUEUE_GROUP_PRIORITY_MEDIUM,
+	BASE_QUEUE_GROUP_PRIORITY_LOW,
+	BASE_QUEUE_GROUP_PRIORITY_REALTIME,
+	BASE_QUEUE_GROUP_PRIORITY_COUNT
+};
+
+struct base_kcpu_command_fence_info {
+	__u64 fence;
+};
+
+struct base_cqs_wait_info {
+	__u64 addr;
+	__u32 val;
+	__u32 padding;
+};
+
+struct base_kcpu_command_cqs_wait_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 inherit_err_flags;
+};
+
+struct base_cqs_set {
+	__u64 addr;
+};
+
+struct base_kcpu_command_cqs_set_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 padding;
+};
+
+/**
+ * typedef basep_cqs_data_type - Enumeration of CQS Data Types
+ *
+ * @BASEP_CQS_DATA_TYPE_U32: The Data Type of a CQS Object's value
+ *                           is an unsigned 32-bit integer
+ * @BASEP_CQS_DATA_TYPE_U64: The Data Type of a CQS Object's value
+ *                           is an unsigned 64-bit integer
+ */
+typedef enum PACKED {
+	BASEP_CQS_DATA_TYPE_U32 = 0,
+	BASEP_CQS_DATA_TYPE_U64 = 1,
+} basep_cqs_data_type;
+
+/**
+ * typedef basep_cqs_wait_operation_op - Enumeration of CQS Object Wait
+ *                                Operation conditions
+ *
+ * @BASEP_CQS_WAIT_OPERATION_LE: CQS Wait Operation indicating that a
+ *                                wait will be satisfied when a CQS Object's
+ *                                value is Less than or Equal to
+ *                                the Wait Operation value
+ * @BASEP_CQS_WAIT_OPERATION_GT: CQS Wait Operation indicating that a
+ *                                wait will be satisfied when a CQS Object's
+ *                                value is Greater than the Wait Operation value
+ */
+typedef enum {
+	BASEP_CQS_WAIT_OPERATION_LE = 0,
+	BASEP_CQS_WAIT_OPERATION_GT = 1,
+} basep_cqs_wait_operation_op;
+
+struct base_cqs_wait_operation_info {
+	__u64 addr;
+	__u64 val;
+	__u8 operation;
+	__u8 data_type;
+	__u8 padding[6];
+};
+
+/**
+ * struct base_kcpu_command_cqs_wait_operation_info - structure which contains information
+ *		about the Timeline CQS wait objects
+ *
+ * @objs:              An array of Timeline CQS waits.
+ * @nr_objs:           Number of Timeline CQS waits in the array.
+ * @inherit_err_flags: Bit-pattern for the CQSs in the array who's error field
+ *                     to be served as the source for importing into the
+ *                     queue's error-state.
+ */
+struct base_kcpu_command_cqs_wait_operation_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 inherit_err_flags;
+};
+
+/**
+ * typedef basep_cqs_set_operation_op - Enumeration of CQS Set Operations
+ *
+ * @BASEP_CQS_SET_OPERATION_ADD: CQS Set operation for adding a value
+ *                                to a synchronization object
+ * @BASEP_CQS_SET_OPERATION_SET: CQS Set operation for setting the value
+ *                                of a synchronization object
+ */
+typedef enum {
+	BASEP_CQS_SET_OPERATION_ADD = 0,
+	BASEP_CQS_SET_OPERATION_SET = 1,
+} basep_cqs_set_operation_op;
+
+struct base_cqs_set_operation_info {
+	__u64 addr;
+	__u64 val;
+	__u8 operation;
+	__u8 data_type;
+	__u8 padding[6];
+};
+
+/**
+ * struct base_kcpu_command_cqs_set_operation_info - structure which contains information
+ *		about the Timeline CQS set objects
+ *
+ * @objs:    An array of Timeline CQS sets.
+ * @nr_objs: Number of Timeline CQS sets in the array.
+ * @padding: Structure padding, unused bytes.
+ */
+struct base_kcpu_command_cqs_set_operation_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 padding;
+};
+
+/**
+ * struct base_kcpu_command_import_info - structure which contains information
+ *		about the imported buffer.
+ *
+ * @handle:	Address of imported user buffer.
+ */
+struct base_kcpu_command_import_info {
+	__u64 handle;
+};
+
+/**
+ * struct base_kcpu_command_jit_alloc_info - structure which contains
+ *		information about jit memory allocation.
+ *
+ * @info:	An array of elements of the
+ *		struct base_jit_alloc_info type.
+ * @count:	The number of elements in the info array.
+ * @padding:	Padding to a multiple of 64 bits.
+ */
+struct base_kcpu_command_jit_alloc_info {
+	__u64 info;
+	__u8 count;
+	__u8 padding[7];
+};
+
+/**
+ * struct base_kcpu_command_jit_free_info - structure which contains
+ *		information about jit memory which is to be freed.
+ *
+ * @ids:	An array containing the JIT IDs to free.
+ * @count:	The number of elements in the ids array.
+ * @padding:	Padding to a multiple of 64 bits.
+ */
+struct base_kcpu_command_jit_free_info {
+	__u64 ids;
+	__u8 count;
+	__u8 padding[7];
+};
+
+/**
+ * struct base_kcpu_command_group_suspend_info - structure which contains
+ *		suspend buffer data captured for a suspended queue group.
+ *
+ * @buffer:		Pointer to an array of elements of the type char.
+ * @size:		Number of elements in the @buffer array.
+ * @group_handle:	Handle to the mapping of CSG.
+ * @padding:		padding to a multiple of 64 bits.
+ */
+struct base_kcpu_command_group_suspend_info {
+	__u64 buffer;
+	__u32 size;
+	__u8 group_handle;
+	__u8 padding[3];
+};
+
+
+/**
+ * struct base_kcpu_command - kcpu command.
+ * @type:	type of the kcpu command, one enum base_kcpu_command_type
+ * @padding:	padding to a multiple of 64 bits
+ * @info:	structure which contains information about the kcpu command;
+ *		actual type is determined by @p type
+ * @info.fence:              Fence
+ * @info.cqs_wait:           CQS wait
+ * @info.cqs_set:            CQS set
+ * @info.cqs_wait_operation: CQS wait operation
+ * @info.cqs_set_operation:  CQS set operation
+ * @info.import:             import
+ * @info.jit_alloc:          JIT allocation
+ * @info.jit_free:           JIT deallocation
+ * @info.suspend_buf_copy:   suspend buffer copy
+ * @info.sample_time:        sample time
+ * @info.padding:            padding
+ */
+struct base_kcpu_command {
+	__u8 type;
+	__u8 padding[sizeof(__u64) - sizeof(__u8)];
+	union {
+		struct base_kcpu_command_fence_info fence;
+		struct base_kcpu_command_cqs_wait_info cqs_wait;
+		struct base_kcpu_command_cqs_set_info cqs_set;
+		struct base_kcpu_command_cqs_wait_operation_info cqs_wait_operation;
+		struct base_kcpu_command_cqs_set_operation_info cqs_set_operation;
+		struct base_kcpu_command_import_info import;
+		struct base_kcpu_command_jit_alloc_info jit_alloc;
+		struct base_kcpu_command_jit_free_info jit_free;
+		struct base_kcpu_command_group_suspend_info suspend_buf_copy;
+		__u64 padding[2]; /* No sub-struct should be larger */
+	} info;
+};
+
+/**
+ * struct basep_cs_stream_control - CSI capabilities.
+ *
+ * @features: Features of this stream
+ * @padding:  Padding to a multiple of 64 bits.
+ */
+struct basep_cs_stream_control {
+	__u32 features;
+	__u32 padding;
+};
+
+/**
+ * struct basep_cs_group_control - CSG interface capabilities.
+ *
+ * @features:     Features of this group
+ * @stream_num:   Number of streams in this group
+ * @suspend_size: Size in bytes of the suspend buffer for this group
+ * @padding:      Padding to a multiple of 64 bits.
+ */
+struct basep_cs_group_control {
+	__u32 features;
+	__u32 stream_num;
+	__u32 suspend_size;
+	__u32 padding;
+};
+
+/**
+ * struct base_gpu_queue_group_error_fatal_payload - Unrecoverable fault
+ *        error information associated with GPU command queue group.
+ *
+ * @sideband:     Additional information of the unrecoverable fault.
+ * @status:       Unrecoverable fault information.
+ *                This consists of exception type (least significant byte) and
+ *                data (remaining bytes). One example of exception type is
+ *                CS_INVALID_INSTRUCTION (0x49).
+ * @padding:      Padding to make multiple of 64bits
+ */
+struct base_gpu_queue_group_error_fatal_payload {
+	__u64 sideband;
+	__u32 status;
+	__u8 padding[20];
+};
+
+/**
+ * struct base_gpu_queue_error_fatal_payload - Unrecoverable fault
+ *        error information related to GPU command queue.
+ *
+ * @sideband:     Additional information about this unrecoverable fault.
+ * @status:       Unrecoverable fault information.
+ *                This consists of exception type (least significant byte) and
+ *                data (remaining bytes). One example of exception type is
+ *                CS_INVALID_INSTRUCTION (0x49).
+ * @csi_index:    Index of the CSF interface the queue is bound to.
+ * @padding:      Padding to make multiple of 64bits
+ */
+struct base_gpu_queue_error_fatal_payload {
+	__u64 sideband;
+	__u32 status;
+	__u8 csi_index;
+	__u8 padding[6];
+	/**
+	 * @has_extra: Set to 0x1 (true) when the extra trace data is filled,
+	 * otherwise 0 (false)
+	 */
+	__u8 has_extra;
+	/** @trace_id0: The extra EXCEPTION_TRACE_ID0 value */
+	__u32 trace_id0;
+	/** @trace_id1: The extra EXCEPTION_TRACE_ID1 value */
+	__u32 trace_id1;
+	/** @trace_task: The extra EXCEPTION_TRACE_TASK value */
+	__u32 trace_task;
+};
+
+/**
+ * struct base_gpu_queue_error_fault_payload - Recoverable fault
+ *        error information related to GPU command queue.
+ *
+ * @sideband:     Additional information about this recoverable fault.
+ * @status:       Recoverable fault information.
+ *                This consists of exception type (least significant byte) and
+ *                data (remaining bytes). One example of exception type is
+ *                INSTR_INVALID_PC (0x50).
+ * @csi_index:    Index of the CSF interface the queue is bound to.
+ * @padding:      Padding to make multiple of 64bits
+ * @has_extra:    Set to 0x1 (true) when the extra trace data is filled,
+ *                otherwise 0 (false)
+ * @trace_id0:    The extra EXCEPTION_TRACE_ID0 value.
+ * @trace_id1:    The extra EXCEPTION_TRACE_ID1 value.
+ * @trace_task:   The extra EXCEPTION_TRACE_TASK value.
+ */
+struct base_gpu_queue_error_fault_payload {
+	__u64 sideband;
+	__u32 status;
+	__u8 csi_index;
+	__u8 padding[6];
+	__u8 has_extra;
+	__u32 trace_id0;
+	__u32 trace_id1;
+	__u32 trace_task;
+};
+
+/**
+ * enum base_gpu_queue_group_error_type - GPU error type.
+ *
+ * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL:       Fatal error associated with GPU
+ *                                          command queue group.
+ * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL: Fatal error associated with GPU
+ *                                          command queue.
+ * @BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT:     Fatal error associated with
+ *                                          progress timeout.
+ * @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out
+ *                                             of tiler heap memory.
+ * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT: Fault error associated with GPU
+ *                                          command queue.
+ * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of GPU error types
+ *
+ * This type is used for &struct_base_gpu_queue_group_error.error_type.
+ */
+enum base_gpu_queue_group_error_type {
+	BASE_GPU_QUEUE_GROUP_ERROR_FATAL = 0,
+	BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
+	BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT,
+	BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
+	BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT,
+	BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT
+};
+
+/**
+ * struct base_gpu_queue_group_error - Unrecoverable fault information
+ * @error_type:          Error type of @base_gpu_queue_group_error_type
+ *                       indicating which field in union payload is filled
+ * @padding:             Unused bytes for 64bit boundary
+ * @payload:             Input Payload
+ * @payload.fatal_group: Unrecoverable fault error associated with
+ *                       GPU command queue group
+ * @payload.fatal_queue: Unrecoverable fault error associated with command queue
+ */
+struct base_gpu_queue_group_error {
+	__u8 error_type;
+	__u8 padding[7];
+	union {
+		struct base_gpu_queue_group_error_fatal_payload fatal_group;
+		struct base_gpu_queue_error_fatal_payload fatal_queue;
+		struct base_gpu_queue_error_fault_payload fault_queue;
+	} payload;
+};
+
+/**
+ * enum base_csf_notification_type - Notification type
+ *
+ * @BASE_CSF_NOTIFICATION_EVENT:                 Notification with kernel event
+ * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU error
+ * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP:        Notification with dumping cpu
+ *                                               queue
+ * @BASE_CSF_NOTIFICATION_COUNT:                 The number of notification type
+ *
+ * This type is used for &struct_base_csf_notification.type.
+ */
+enum base_csf_notification_type {
+	BASE_CSF_NOTIFICATION_EVENT = 0,
+	BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+	BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP,
+	BASE_CSF_NOTIFICATION_COUNT
+};
+
+/**
+ * struct base_csf_notification - Event or error notification
+ *
+ * @type:                      Notification type of @base_csf_notification_type
+ * @padding:                   Padding for 64bit boundary
+ * @payload:                   Input Payload
+ * @payload.align:             To fit the struct into a 64-byte cache line
+ * @payload.csg_error:         CSG error
+ * @payload.csg_error.handle:  Handle of GPU command queue group associated with
+ *                             fatal error
+ * @payload.csg_error.padding: Padding
+ * @payload.csg_error.error:   Unrecoverable fault error
+ *
+ */
+struct base_csf_notification {
+	__u8 type;
+	__u8 padding[7];
+	union {
+		struct {
+			__u8 handle;
+			__u8 padding[7];
+			struct base_gpu_queue_group_error error;
+		} csg_error;
+
+		__u8 align[56];
+	} payload;
+};
+
+/**
+ * struct mali_base_gpu_core_props - GPU core props info
+ *
+ * @product_id: Pro specific value.
+ * @version_status: Status of the GPU release. No defined values, but starts at
+ *   0 and increases by one for each release status (alpha, beta, EAC, etc.).
+ *   4 bit values (0-15).
+ * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn"
+ *   release number.
+ *   8 bit values (0-255).
+ * @major_revision: Major release number of the GPU. "R" part of an "RnPn"
+ *   release number.
+ *   4 bit values (0-15).
+ * @padding: padding to align to 8-byte
+ * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by
+ *   clGetDeviceInfo()
+ * @log2_program_counter_size: Size of the shader program counter, in bits.
+ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This
+ *   is a bitpattern where a set bit indicates that the format is supported.
+ *   Before using a texture format, it is recommended that the corresponding
+ *   bit be checked.
+ * @paddings: Padding bytes.
+ * @gpu_available_memory_size: Theoretical maximum memory available to the GPU.
+ *   It is unlikely that a client will be able to allocate all of this memory
+ *   for their own purposes, but this at least provides an upper bound on the
+ *   memory available to the GPU.
+ *   This is required for OpenCL's clGetDeviceInfo() call when
+ *   CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+ *   client will not be expecting to allocate anywhere near this value.
+ */
+struct mali_base_gpu_core_props {
+	__u32 product_id;
+	__u16 version_status;
+	__u16 minor_revision;
+	__u16 major_revision;
+	__u16 padding;
+	__u32 gpu_freq_khz_max;
+	__u32 log2_program_counter_size;
+	__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	__u8 paddings[4];
+	__u64 gpu_available_memory_size;
+};
+
+#endif /* _UAPI_BASE_CSF_KERNEL_H_ */
--- a/include/uapi/gpu/arm/valhall/csf/mali_kbase_csf_errors_dumpfault.h
+++ b/include/uapi/gpu/arm/valhall/csf/mali_kbase_csf_errors_dumpfault.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_CSF_ERRORS_DUMPFAULT_H_
+#define _UAPI_KBASE_CSF_ERRORS_DUMPFAULT_H_
+
+/**
+ * enum dumpfault_error_type - Enumeration to define errors to be dumped
+ *
+ * @DF_NO_ERROR:                       No pending error
+ * @DF_CSG_SUSPEND_TIMEOUT:            CSG suspension timeout
+ * @DF_CSG_TERMINATE_TIMEOUT:          CSG group termination timeout
+ * @DF_CSG_START_TIMEOUT:              CSG start timeout
+ * @DF_CSG_RESUME_TIMEOUT:             CSG resume timeout
+ * @DF_CSG_EP_CFG_TIMEOUT:             CSG end point configuration timeout
+ * @DF_CSG_STATUS_UPDATE_TIMEOUT:      CSG status update timeout
+ * @DF_PROGRESS_TIMER_TIMEOUT:         Progress timer timeout
+ * @DF_FW_INTERNAL_ERROR:              Firmware internal error
+ * @DF_CS_FATAL:                       CS fatal error
+ * @DF_CS_FAULT:                       CS fault error
+ * @DF_FENCE_WAIT_TIMEOUT:             Fence wait timeout
+ * @DF_PROTECTED_MODE_EXIT_TIMEOUT:    P.mode exit timeout
+ * @DF_PROTECTED_MODE_ENTRY_FAILURE:   P.mode entrance failure
+ * @DF_PING_REQUEST_TIMEOUT:           Ping request timeout
+ * @DF_CORE_DOWNSCALE_REQUEST_TIMEOUT: DCS downscale request timeout
+ * @DF_TILER_OOM:                      Tiler Out-of-memory error
+ * @DF_GPU_PAGE_FAULT:                 GPU page fault
+ * @DF_BUS_FAULT:                      MMU BUS Fault
+ * @DF_GPU_PROTECTED_FAULT:            GPU P.mode fault
+ * @DF_AS_ACTIVE_STUCK:                AS active stuck
+ * @DF_GPU_SOFT_RESET_FAILURE:         GPU soft reset falure
+ *
+ * This is used for kbase to notify error type of an event whereby
+ * user space client will dump relevant debugging information via debugfs.
+ * @DF_NO_ERROR is used to indicate no pending fault, thus the client will
+ * be blocked on reading debugfs file till a fault happens.
+ */
+enum dumpfault_error_type {
+	DF_NO_ERROR = 0,
+	DF_CSG_SUSPEND_TIMEOUT,
+	DF_CSG_TERMINATE_TIMEOUT,
+	DF_CSG_START_TIMEOUT,
+	DF_CSG_RESUME_TIMEOUT,
+	DF_CSG_EP_CFG_TIMEOUT,
+	DF_CSG_STATUS_UPDATE_TIMEOUT,
+	DF_PROGRESS_TIMER_TIMEOUT,
+	DF_FW_INTERNAL_ERROR,
+	DF_CS_FATAL,
+	DF_CS_FAULT,
+	DF_FENCE_WAIT_TIMEOUT,
+	DF_PROTECTED_MODE_EXIT_TIMEOUT,
+	DF_PROTECTED_MODE_ENTRY_FAILURE,
+	DF_PING_REQUEST_TIMEOUT,
+	DF_CORE_DOWNSCALE_REQUEST_TIMEOUT,
+	DF_TILER_OOM,
+	DF_GPU_PAGE_FAULT,
+	DF_BUS_FAULT,
+	DF_GPU_PROTECTED_FAULT,
+	DF_AS_ACTIVE_STUCK,
+	DF_GPU_SOFT_RESET_FAILURE,
+};
+
+#endif /* _UAPI_KBASE_CSF_ERRORS_DUMPFAULT_H_ */
--- a/include/uapi/gpu/arm/valhall/csf/mali_kbase_csf_ioctl.h
+++ b/include/uapi/gpu/arm/valhall/csf/mali_kbase_csf_ioctl.h
@@ -0,0 +1,825 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_CSF_IOCTL_H_
+#define _UAPI_KBASE_CSF_IOCTL_H_
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * 1.0:
+ * - CSF IOCTL header separated from JM
+ * 1.1:
+ * - Add a new priority level BASE_QUEUE_GROUP_PRIORITY_REALTIME
+ * - Add ioctl 54: This controls the priority setting.
+ * 1.2:
+ * - Add new CSF GPU_FEATURES register into the property structure
+ *   returned by KBASE_IOCTL_GET_GPUPROPS
+ * 1.3:
+ * - Add __u32 group_uid member to
+ *   &struct_kbase_ioctl_cs_queue_group_create.out
+ * 1.4:
+ * - Replace padding in kbase_ioctl_cs_get_glb_iface with
+ *   instr_features member of same size
+ * 1.5:
+ * - Add ioctl 40: kbase_ioctl_cs_queue_register_ex, this is a new
+ *   queue registration call with extended format for supporting CS
+ *   trace configurations with CSF trace_command.
+ * 1.6:
+ * - Added new HW performance counters interface to all GPUs.
+ * 1.7:
+ * - Added reserved field to QUEUE_GROUP_CREATE ioctl for future use
+ * 1.8:
+ * - Removed Kernel legacy HWC interface
+ * 1.9:
+ * - Reorganization of GPU-VA memory zones, including addition of
+ *   FIXED_VA zone and auto-initialization of EXEC_VA zone.
+ * - Added new Base memory allocation interface
+ * 1.10:
+ * - First release of new HW performance counters interface.
+ * 1.11:
+ * - Dummy model (no mali) backend will now clear HWC values after each sample
+ * 1.12:
+ * - Added support for incremental rendering flag in CSG create call
+ * 1.13:
+ * - Added ioctl to query a register of USER page.
+ * 1.14:
+ * - Added support for passing down the buffer descriptor VA in tiler heap init
+ * 1.15:
+ * - Enable new sync_wait GE condition
+ * 1.16:
+ * - Remove legacy definitions:
+ *   - base_jit_alloc_info_10_2
+ *   - base_jit_alloc_info_11_5
+ *   - kbase_ioctl_mem_jit_init_10_2
+ *   - kbase_ioctl_mem_jit_init_11_5
+ * 1.17:
+ * - Fix kinstr_prfcnt issues:
+ *   - Missing implicit sample for CMD_STOP when HWCNT buffer is full.
+ *   - Race condition when stopping periodic sampling.
+ *   - prfcnt_block_metadata::block_idx gaps.
+ *   - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed.
+ * 1.18:
+ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
+ *   before allocating GPU memory for the context.
+ * - CPU mappings of USER_BUFFER imported memory handles must be cached.
+ * 1.19:
+ * - Add NE support in queue_group_create IOCTL fields
+ * - Previous version retained as KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18 for
+ *     backward compatibility.
+ * 1.20:
+ * - Restrict child process from doing supported file operations (like mmap, ioctl,
+ *   read, poll) on the file descriptor of mali device file that was inherited
+ *   from the parent process.
+ * 1.21:
+ * - Remove KBASE_IOCTL_HWCNT_READER_SETUP and KBASE_HWCNT_READER_* ioctls.
+ * 1.22:
+ * - Add comp_pri_threshold and comp_pri_ratio attributes to
+ *   kbase_ioctl_cs_queue_group_create.
+ * - Made the BASE_MEM_DONT_NEED memory flag queryable.
+ * 1.23:
+ * - Disallows changing the sharability on the GPU of imported dma-bufs to
+ *   BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE.
+ * 1.24:
+ * - Implement full block state support for hardware counters.
+ * 1.25:
+ * - Add support for CS_FAULT reporting to userspace
+ * 1.26:
+ * - Made the BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP and BASE_MEM_KERNEL_SYNC memory
+ *   flags queryable.
+ * 1.27:
+ * - Implement support for HWC block state availability.
+ * 1.28:
+ * - Made the SAME_VA memory flag queryable.
+ * 1.29:
+ * - Re-allow child process to do supported file operations (like mmap, ioctl
+ *   read, poll) on the file descriptor of mali device that was inherited
+ *   from the parent process.
+ * 1.30:
+ * - Implement support for setting GPU Timestamp Offset register.
+ * 1.31:
+ * - Reject non-protected allocations containing the BASE_MEM_PROTECTED memory flag.
+ * - Reject allocations containing the BASE_MEM_DONT_NEED memory flag (it is only settable).
+ * - Reject allocations containing the BASE_MEM_UNUSED_BIT_xx memory flags.
+ * 1.32:
+ * - Add UNUSED_BIT_5 and UNUSED_BIT_7 previously occupied by kernel-only flags
+ *   to kbase cap table.
+ * 1.33:
+ * - Increased KBASE_MEM_PROFILE_MAX_BUF_SIZE for more cctx memory classes.
+ * 1.34:
+ * - Added ioctl to query tiler heap size and peak size.
+ * 1.35:
+ * - Move CS_USER IO allocation from CS binding to CSG creation.
+ * 1.36:
+ * - Add reserved field to QUEUE_GROUP_CREATE ioctl for future use.
+ * - Previous version retained as KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_35
+ *   for backwards compatibility.
+ */
+
+#define BASE_UK_VERSION_MAJOR 1
+#define BASE_UK_VERSION_MINOR 36
+
+/**
+ * struct kbase_ioctl_version_check - Check version compatibility between
+ * kernel and userspace
+ *
+ * @major: Major version number
+ * @minor: Minor version number
+ */
+struct kbase_ioctl_version_check {
+	__u16 major;
+	__u16 minor;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK_RESERVED \
+	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
+
+/**
+ * struct kbase_ioctl_cs_queue_register - Register a GPU command queue with the
+ *                                        base back-end
+ *
+ * @buffer_gpu_addr: GPU address of the buffer backing the queue
+ * @buffer_size: Size of the buffer in bytes
+ * @priority: Priority of the queue within a group when run within a process
+ * @padding: Currently unused, must be zero
+ *
+ * Note: There is an identical sub-section in kbase_ioctl_cs_queue_register_ex.
+ *        Any change of this struct should also be mirrored to the latter.
+ */
+struct kbase_ioctl_cs_queue_register {
+	__u64 buffer_gpu_addr;
+	__u32 buffer_size;
+	__u8 priority;
+	__u8 padding[3];
+};
+
+#define KBASE_IOCTL_CS_QUEUE_REGISTER \
+	_IOW(KBASE_IOCTL_TYPE, 36, struct kbase_ioctl_cs_queue_register)
+
+/**
+ * struct kbase_ioctl_cs_queue_kick - Kick the GPU command queue group scheduler
+ *                                    to notify that a queue has been updated
+ *
+ * @buffer_gpu_addr: GPU address of the buffer backing the queue
+ */
+struct kbase_ioctl_cs_queue_kick {
+	__u64 buffer_gpu_addr;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_KICK _IOW(KBASE_IOCTL_TYPE, 37, struct kbase_ioctl_cs_queue_kick)
+
+/**
+ * union kbase_ioctl_cs_queue_bind - Bind a GPU command queue to a group
+ *
+ * @in:                 Input parameters
+ * @in.buffer_gpu_addr: GPU address of the buffer backing the queue
+ * @in.group_handle:    Handle of the group to which the queue should be bound
+ * @in.csi_index:       Index of the CSF interface the queue should be bound to
+ * @in.padding:         Currently unused, must be zero
+ * @out:                Output parameters
+ * @out.mmap_handle:    Handle to be used for creating the mapping of CS
+ *                      input/output pages
+ */
+union kbase_ioctl_cs_queue_bind {
+	struct {
+		__u64 buffer_gpu_addr;
+		__u8 group_handle;
+		__u8 csi_index;
+		__u8 padding[6];
+	} in;
+	struct {
+		__u64 mmap_handle;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_BIND _IOWR(KBASE_IOCTL_TYPE, 39, union kbase_ioctl_cs_queue_bind)
+
+/**
+ * struct kbase_ioctl_cs_queue_register_ex - Register a GPU command queue with the
+ *                                           base back-end in extended format,
+ *                                           involving trace buffer configuration
+ *
+ * @buffer_gpu_addr: GPU address of the buffer backing the queue
+ * @buffer_size: Size of the buffer in bytes
+ * @priority: Priority of the queue within a group when run within a process
+ * @padding: Currently unused, must be zero
+ * @ex_offset_var_addr: GPU address of the trace buffer write offset variable
+ * @ex_buffer_base: Trace buffer GPU base address for the queue
+ * @ex_buffer_size: Size of the trace buffer in bytes
+ * @ex_event_size: Trace event write size, in log2 designation
+ * @ex_event_state: Trace event states configuration
+ * @ex_padding: Currently unused, must be zero
+ *
+ * Note: There is an identical sub-section at the start of this struct to that
+ *        of @ref kbase_ioctl_cs_queue_register. Any change of this sub-section
+ *        must also be mirrored to the latter. Following the said sub-section,
+ *        the remaining fields forms the extension, marked with ex_*.
+ */
+struct kbase_ioctl_cs_queue_register_ex {
+	__u64 buffer_gpu_addr;
+	__u32 buffer_size;
+	__u8 priority;
+	__u8 padding[3];
+	__u64 ex_offset_var_addr;
+	__u64 ex_buffer_base;
+	__u32 ex_buffer_size;
+	__u8 ex_event_size;
+	__u8 ex_event_state;
+	__u8 ex_padding[2];
+};
+
+#define KBASE_IOCTL_CS_QUEUE_REGISTER_EX \
+	_IOW(KBASE_IOCTL_TYPE, 40, struct kbase_ioctl_cs_queue_register_ex)
+
+/**
+ * struct kbase_ioctl_cs_queue_terminate - Terminate a GPU command queue
+ *
+ * @buffer_gpu_addr: GPU address of the buffer backing the queue
+ */
+struct kbase_ioctl_cs_queue_terminate {
+	__u64 buffer_gpu_addr;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_TERMINATE \
+	_IOW(KBASE_IOCTL_TYPE, 41, struct kbase_ioctl_cs_queue_terminate)
+
+/**
+ * union kbase_ioctl_cs_queue_group_create_1_6 - Create a GPU command queue
+ *                                               group
+ * @in:               Input parameters
+ * @in.tiler_mask:    Mask of tiler endpoints the group is allowed to use.
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
+ * @in.compute_mask:  Mask of compute endpoints the group is allowed to use.
+ * @in.cs_min:        Minimum number of CSs required.
+ * @in.priority:      Queue group's priority within a process.
+ * @in.tiler_max:     Maximum number of tiler endpoints the group is allowed
+ *                    to use.
+ * @in.fragment_max:  Maximum number of fragment endpoints the group is
+ *                    allowed to use.
+ * @in.compute_max:   Maximum number of compute endpoints the group is allowed
+ *                    to use.
+ * @in.padding:       Currently unused, must be zero
+ * @out:              Output parameters
+ * @out.group_handle: Handle of a newly created queue group.
+ * @out.padding:      Currently unused, must be zero
+ * @out.group_uid:    UID of the queue group available to base.
+ */
+union kbase_ioctl_cs_queue_group_create_1_6 {
+	struct {
+		__u64 tiler_mask;
+		__u64 fragment_mask;
+		__u64 compute_mask;
+		__u8 cs_min;
+		__u8 priority;
+		__u8 tiler_max;
+		__u8 fragment_max;
+		__u8 compute_max;
+		__u8 padding[3];
+
+	} in;
+	struct {
+		__u8 group_handle;
+		__u8 padding[3];
+		__u32 group_uid;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6 \
+	_IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create_1_6)
+
+/**
+ * union kbase_ioctl_cs_queue_group_create_1_18 - Create a GPU command queue group
+ * @in:               Input parameters
+ * @in.tiler_mask:    Mask of tiler endpoints the group is allowed to use.
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
+ * @in.compute_mask:  Mask of compute endpoints the group is allowed to use.
+ * @in.cs_min:        Minimum number of CSs required.
+ * @in.priority:      Queue group's priority within a process.
+ * @in.tiler_max:     Maximum number of tiler endpoints the group is allowed
+ *                    to use.
+ * @in.fragment_max:  Maximum number of fragment endpoints the group is
+ *                    allowed to use.
+ * @in.compute_max:   Maximum number of compute endpoints the group is allowed
+ *                    to use.
+ * @in.csi_handlers:  Flags to signal that the application intends to use CSI
+ *                    exception handlers in some linear buffers to deal with
+ *                    the given exception types.
+ * @in.padding:       Currently unused, must be zero
+ * @out:              Output parameters
+ * @out.group_handle: Handle of a newly created queue group.
+ * @out.padding:      Currently unused, must be zero
+ * @out.group_uid:    UID of the queue group available to base.
+ */
+union kbase_ioctl_cs_queue_group_create_1_18 {
+	struct {
+		__u64 tiler_mask;
+		__u64 fragment_mask;
+		__u64 compute_mask;
+		__u8 cs_min;
+		__u8 priority;
+		__u8 tiler_max;
+		__u8 fragment_max;
+		__u8 compute_max;
+		__u8 csi_handlers;
+		__u8 padding[2];
+		/**
+		 * @in.dvs_buf: buffer for deferred vertex shader
+		 */
+		__u64 dvs_buf;
+	} in;
+	struct {
+		__u8 group_handle;
+		__u8 padding[3];
+		__u32 group_uid;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18 \
+	_IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create_1_18)
+
+/**
+ * union kbase_ioctl_cs_queue_group_create_1_35 - Create a GPU command queue group
+ * @in:               Input parameters
+ * @in.tiler_mask:    Mask of tiler endpoints the group is allowed to use.
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
+ * @in.compute_mask:  Mask of compute endpoints the group is allowed to use.
+ * @in.cs_min:        Minimum number of CSs required.
+ * @in.priority:      Queue group's priority within a process.
+ * @in.tiler_max:     Maximum number of tiler endpoints the group is allowed
+ *                    to use.
+ * @in.fragment_max:  Maximum number of fragment endpoints the group is
+ *                    allowed to use.
+ * @in.compute_max:   Maximum number of compute endpoints the group is allowed
+ *                    to use.
+ * @in.csi_handlers:  Flags to signal that the application intends to use CSI
+ *                    exception handlers in some linear buffers to deal with
+ *                    the given exception types.
+ * @in.neural_max:    Maximum number of neural endpoints the group is
+ *                    allowed to use.
+ * @in.cs_fault_report_enable:  Flag to indicate reporting of CS_FAULTs
+ *                    to userspace.
+ * @in.dvs_buf: buffer for deferred vertex shader
+ * @in.neural_mask:   Mask of neural endpoints the group is allowed to use.
+ * @in.comp_pri_threshold: The number of compute endpoints required to be
+ *                         allocated to the GPU queue group before compute
+ *                         endpoints are prioritized for compute iterator.
+ * @in.comp_pri_ratio: The ratio of the cores after comp_pri_threshold
+ *                     has been reached which are prioritized for compute
+ *                     iterator tasks.
+ * @in.padding:       Currently unused, must be zero
+ * @out:              Output parameters
+ * @out.group_handle: Handle of a newly created queue group.
+ * @out.padding:      Currently unused, must be zero
+ * @out.group_uid:    UID of the queue group available to base.
+ */
+union kbase_ioctl_cs_queue_group_create_1_35 {
+	struct {
+		__u64 tiler_mask;
+		__u64 fragment_mask;
+		__u64 compute_mask;
+		__u8 cs_min;
+		__u8 priority;
+		__u8 tiler_max;
+		__u8 fragment_max;
+		__u8 compute_max;
+		__u8 csi_handlers;
+		__u8 neural_max;
+		__u8 cs_fault_report_enable;
+		__u64 dvs_buf;
+		__u64 neural_mask;
+		__u8 comp_pri_threshold;
+		__u8 comp_pri_ratio;
+		__u8 padding[62];
+	} in;
+	struct {
+		__u8 group_handle;
+		__u8 padding[3];
+		__u32 group_uid;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_35 \
+	_IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create_1_35)
+
+/**
+ * struct kbase_ioctl_cs_queue_group_term - Terminate a GPU command queue group
+ *
+ * @group_handle: Handle of the queue group to be terminated
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_cs_queue_group_term {
+	__u8 group_handle;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE \
+	_IOW(KBASE_IOCTL_TYPE, 43, struct kbase_ioctl_cs_queue_group_term)
+
+#define KBASE_IOCTL_CS_EVENT_SIGNAL _IO(KBASE_IOCTL_TYPE, 44)
+
+typedef __u8 base_kcpu_queue_id; /* We support up to 256 active KCPU queues */
+
+/**
+ * struct kbase_ioctl_kcpu_queue_new - Create a KCPU command queue
+ *
+ * @id: ID of the new command queue returned by the kernel
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_kcpu_queue_new {
+	base_kcpu_queue_id id;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_KCPU_QUEUE_CREATE _IOR(KBASE_IOCTL_TYPE, 45, struct kbase_ioctl_kcpu_queue_new)
+
+/**
+ * struct kbase_ioctl_kcpu_queue_delete - Destroy a KCPU command queue
+ *
+ * @id: ID of the command queue to be destroyed
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_kcpu_queue_delete {
+	base_kcpu_queue_id id;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_KCPU_QUEUE_DELETE \
+	_IOW(KBASE_IOCTL_TYPE, 46, struct kbase_ioctl_kcpu_queue_delete)
+
+/**
+ * struct kbase_ioctl_kcpu_queue_enqueue - Enqueue commands into the KCPU queue
+ *
+ * @addr: Memory address of an array of struct base_kcpu_queue_command
+ * @nr_commands: Number of commands in the array
+ * @id: kcpu queue identifier, returned by KBASE_IOCTL_KCPU_QUEUE_CREATE ioctl
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_kcpu_queue_enqueue {
+	__u64 addr;
+	__u32 nr_commands;
+	base_kcpu_queue_id id;
+	__u8 padding[3];
+};
+
+#define KBASE_IOCTL_KCPU_QUEUE_ENQUEUE \
+	_IOW(KBASE_IOCTL_TYPE, 47, struct kbase_ioctl_kcpu_queue_enqueue)
+
+/**
+ * union kbase_ioctl_cs_tiler_heap_init - Initialize chunked tiler memory heap
+ * @in:                Input parameters
+ * @in.chunk_size:     Size of each chunk.
+ * @in.initial_chunks: Initial number of chunks that heap will be created with.
+ * @in.max_chunks:     Maximum number of chunks that the heap is allowed to use.
+ * @in.target_in_flight: Number of render-passes that the driver should attempt to
+ *                     keep in flight for which allocation of new chunks is
+ *                     allowed.
+ * @in.group_id:       Group ID to be used for physical allocations.
+ * @in.padding:        Padding
+ * @in.buf_desc_va:    Buffer descriptor GPU VA for tiler heap reclaims.
+ * @out:               Output parameters
+ * @out.gpu_heap_va:   GPU VA (virtual address) of Heap context that was set up
+ *                     for the heap.
+ * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap,
+ *                     actually points to the header of heap chunk and not to
+ *                     the low address of free memory in the chunk.
+ */
+union kbase_ioctl_cs_tiler_heap_init {
+	struct {
+		__u32 chunk_size;
+		__u32 initial_chunks;
+		__u32 max_chunks;
+		__u16 target_in_flight;
+		__u8 group_id;
+		__u8 padding;
+		__u64 buf_desc_va;
+	} in;
+	struct {
+		__u64 gpu_heap_va;
+		__u64 first_chunk_va;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_TILER_HEAP_INIT \
+	_IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init)
+
+/**
+ * union kbase_ioctl_cs_tiler_heap_init_1_13 - Initialize chunked tiler memory heap,
+ *                                             earlier version up to 1.13
+ * @in:                Input parameters
+ * @in.chunk_size:     Size of each chunk.
+ * @in.initial_chunks: Initial number of chunks that heap will be created with.
+ * @in.max_chunks:     Maximum number of chunks that the heap is allowed to use.
+ * @in.target_in_flight: Number of render-passes that the driver should attempt to
+ *                     keep in flight for which allocation of new chunks is
+ *                     allowed.
+ * @in.group_id:       Group ID to be used for physical allocations.
+ * @in.padding:        Padding
+ * @out:               Output parameters
+ * @out.gpu_heap_va:   GPU VA (virtual address) of Heap context that was set up
+ *                     for the heap.
+ * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap,
+ *                     actually points to the header of heap chunk and not to
+ *                     the low address of free memory in the chunk.
+ */
+union kbase_ioctl_cs_tiler_heap_init_1_13 {
+	struct {
+		__u32 chunk_size;
+		__u32 initial_chunks;
+		__u32 max_chunks;
+		__u16 target_in_flight;
+		__u8 group_id;
+		__u8 padding;
+	} in;
+	struct {
+		__u64 gpu_heap_va;
+		__u64 first_chunk_va;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13 \
+	_IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init_1_13)
+
+/**
+ * struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap
+ *                                         instance
+ *
+ * @gpu_heap_va: GPU VA of Heap context that was set up for the heap.
+ */
+struct kbase_ioctl_cs_tiler_heap_term {
+	__u64 gpu_heap_va;
+};
+
+#define KBASE_IOCTL_CS_TILER_HEAP_TERM \
+	_IOW(KBASE_IOCTL_TYPE, 49, struct kbase_ioctl_cs_tiler_heap_term)
+
+/**
+ * union kbase_ioctl_cs_get_glb_iface - Request the global control block
+ *                                        of CSF interface capabilities
+ *
+ * @in:                    Input parameters
+ * @in.max_group_num:      The maximum number of groups to be read. Can be 0, in
+ *                         which case groups_ptr is unused.
+ * @in.max_total_stream_num: The maximum number of CSs to be read. Can be 0, in
+ *                         which case streams_ptr is unused.
+ * @in.groups_ptr:         Pointer where to store all the group data (sequentially).
+ * @in.streams_ptr:        Pointer where to store all the CS data (sequentially).
+ * @out:                   Output parameters
+ * @out.glb_version:       Global interface version.
+ * @out.features:          Bit mask of features (e.g. whether certain types of job
+ *                         can be suspended).
+ * @out.group_num:         Number of CSGs supported.
+ * @out.prfcnt_size:       Size of CSF performance counters, in bytes. Bits 31:16
+ *                         hold the size of firmware performance counter data
+ *                         and 15:0 hold the size of hardware performance counter
+ *                         data.
+ * @out.total_stream_num:  Total number of CSs, summed across all groups.
+ * @out.instr_features:    Instrumentation features. Bits 7:4 hold the maximum
+ *                         size of events. Bits 3:0 hold the offset update rate.
+ *                         (csf >= 1.1.0)
+ *
+ */
+union kbase_ioctl_cs_get_glb_iface {
+	struct {
+		__u32 max_group_num;
+		__u32 max_total_stream_num;
+		__u64 groups_ptr;
+		__u64 streams_ptr;
+	} in;
+	struct {
+		__u32 glb_version;
+		__u32 features;
+		__u32 group_num;
+		__u32 prfcnt_size;
+		__u32 total_stream_num;
+		__u32 instr_features;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_GET_GLB_IFACE _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_ioctl_cs_get_glb_iface)
+
+struct kbase_ioctl_cs_cpu_queue_info {
+	__u64 buffer;
+	__u64 size;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
+
+#define KBASE_IOCTL_CS_CPU_QUEUE_DUMP \
+	_IOW(KBASE_IOCTL_TYPE, 53, struct kbase_ioctl_cs_cpu_queue_info)
+
+/**
+ * union kbase_ioctl_mem_alloc_ex - Allocate memory on the GPU
+ * @in: Input parameters
+ * @in.va_pages: The number of pages of virtual address space to reserve
+ * @in.commit_pages: The number of physical pages to allocate
+ * @in.extension: The number of extra pages to allocate on each GPU fault which grows the region
+ * @in.flags: Flags
+ * @in.fixed_address: The GPU virtual address requested for the allocation,
+ *                    if the allocation is using the BASE_MEM_FIXED flag.
+ * @in.extra: Space for extra parameters that may be added in the future.
+ * @out: Output parameters
+ * @out.flags: Flags
+ * @out.gpu_va: The GPU virtual address which is allocated
+ */
+union kbase_ioctl_mem_alloc_ex {
+	struct {
+		__u64 va_pages;
+		__u64 commit_pages;
+		__u64 extension;
+		__u64 flags;
+		__u64 fixed_address;
+		__u64 extra[3];
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALLOC_EX _IOWR(KBASE_IOCTL_TYPE, 59, union kbase_ioctl_mem_alloc_ex)
+
+/**
+ * union kbase_ioctl_read_user_page - Read a register of USER page
+ *
+ * @in:               Input parameters.
+ * @in.offset:        Register offset in USER page.
+ * @in.padding:       Padding to round up to a multiple of 8 bytes, must be zero.
+ * @out:              Output parameters.
+ * @out.val_lo:       Value of 32bit register or the 1st half of 64bit register to be read.
+ * @out.val_hi:       Value of the 2nd half of 64bit register to be read.
+ */
+union kbase_ioctl_read_user_page {
+	struct {
+		__u32 offset;
+		__u32 padding;
+	} in;
+	struct {
+		__u32 val_lo;
+		__u32 val_hi;
+	} out;
+};
+
+#define KBASE_IOCTL_READ_USER_PAGE _IOWR(KBASE_IOCTL_TYPE, 60, union kbase_ioctl_read_user_page)
+
+/**
+ * struct kbase_ioctl_queue_group_clear_faults - Re-enable CS FAULT reporting for the GPU queues
+ *
+ * @addr: CPU VA to an array of GPU VAs of the buffers backing the queues
+ * @nr_queues: Number of queues in the array
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_queue_group_clear_faults {
+	__u64 addr;
+	__u32 nr_queues;
+	__u8 padding[4];
+};
+
+#define KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS \
+	_IOW(KBASE_IOCTL_TYPE, 61, struct kbase_ioctl_queue_group_clear_faults)
+
+/**
+ * union kbase_ioctl_cs_tiler_heap_size - Query size information from a tiler heap.
+ *
+ * @in:               Input parameters.
+ * @in.heap_ptr:      GPU virtual address of the tiler heap context.
+ * @out:              Output parameters.
+ * @out.size:         Current size of the tiler heap.
+ * @out.peak_size:    Peak size of the tiler heap.
+ */
+union kbase_ioctl_cs_tiler_heap_size {
+	struct {
+		__u64 heap_ptr;
+	} in;
+	struct {
+		__u64 size;
+		__u64 peak_size;
+	} out;
+};
+#define KBASE_IOCTL_CS_TILER_HEAP_SIZE \
+	_IOWR(KBASE_IOCTL_TYPE, 62, union kbase_ioctl_cs_tiler_heap_size)
+
+/**
+ * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group
+ * @in:               Input parameters
+ * @in.tiler_mask:    Mask of tiler endpoints the group is allowed to use.
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
+ * @in.compute_mask:  Mask of compute endpoints the group is allowed to use.
+ * @in.cs_min:        Minimum number of CSs required.
+ * @in.priority:      Queue group's priority within a process.
+ * @in.tiler_max:     Maximum number of tiler endpoints the group is allowed
+ *                    to use.
+ * @in.fragment_max:  Maximum number of fragment endpoints the group is
+ *                    allowed to use.
+ * @in.compute_max:   Maximum number of compute endpoints the group is allowed
+ *                    to use.
+ * @in.csi_handlers:  Flags to signal that the application intends to use CSI
+ *                    exception handlers in some linear buffers to deal with
+ *                    the given exception types.
+ * @in.neural_max:    Maximum number of neural endpoints the group is
+ *                    allowed to use.
+ * @in.cs_fault_report_enable:  Flag to indicate reporting of CS_FAULTs
+ *                    to userspace.
+ * @in.dvs_buf: buffer for deferred vertex shader
+ * @in.neural_mask:   Mask of neural endpoints the group is allowed to use.
+ * @in.comp_pri_threshold: The number of compute endpoints required to be
+ *                         allocated to the GPU queue group before compute
+ *                         endpoints are prioritized for compute iterator.
+ * @in.comp_pri_ratio: The ratio of the cores after comp_pri_threshold
+ *                     has been reached which are prioritized for compute
+ *                     iterator tasks.
+ * @in.padding:       Currently unused, must be zero
+ * @out:              Output parameters
+ * @out.group_handle: Handle of a newly created queue group.
+ * @out.padding:      Currently unused, must be zero
+ * @out.group_uid:    UID of the queue group available to base.
+ */
+union kbase_ioctl_cs_queue_group_create {
+	struct {
+		__u64 tiler_mask;
+		__u64 fragment_mask;
+		__u64 compute_mask;
+		__u8 cs_min;
+		__u8 priority;
+		__u8 tiler_max;
+		__u8 fragment_max;
+		__u8 compute_max;
+		__u8 csi_handlers;
+		__u8 neural_max;
+		__u8 cs_fault_report_enable;
+		__u64 dvs_buf;
+		__u64 neural_mask;
+		__u8 comp_pri_threshold;
+		__u8 comp_pri_ratio;
+		__u8 padding[62];
+		__u64 reserved;
+	} in;
+	struct {
+		__u8 group_handle;
+		__u8 padding[3];
+		__u32 group_uid;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \
+	_IOWR(KBASE_IOCTL_TYPE, 63, union kbase_ioctl_cs_queue_group_create)
+
+/***************
+ * test ioctls *
+ ***************/
+#if MALI_UNIT_TEST
+/* These ioctls are purely for test purposes and are not used in the production
+ * driver, they therefore may change without notice
+ */
+
+/**
+ * struct kbase_ioctl_cs_event_memory_write - Write an event memory address
+ * @cpu_addr: Memory address to write
+ * @value: Value to write
+ * @padding: Currently unused, must be zero
+ */
+struct kbase_ioctl_cs_event_memory_write {
+	__u64 cpu_addr;
+	__u8 value;
+	__u8 padding[7];
+};
+
+/**
+ * union kbase_ioctl_cs_event_memory_read - Read an event memory address
+ * @in: Input parameters
+ * @in.cpu_addr: Memory address to read
+ * @out: Output parameters
+ * @out.value: Value read
+ * @out.padding: Currently unused, must be zero
+ */
+union kbase_ioctl_cs_event_memory_read {
+	struct {
+		__u64 cpu_addr;
+	} in;
+	struct {
+		__u8 value;
+		__u8 padding[7];
+	} out;
+};
+
+#endif /* MALI_UNIT_TEST */
+
+#endif /* _UAPI_KBASE_CSF_IOCTL_H_ */
--- a/include/uapi/gpu/arm/valhall/csf/mali_kbase_csf_mem_flags.h
+++ b/include/uapi/gpu/arm/valhall/csf/mali_kbase_csf_mem_flags.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_CSF_MEM_FLAGS_H_
+#define _UAPI_KBASE_CSF_MEM_FLAGS_H_
+
+/* Memory allocation, access/hint flags & mask specific to CSF GPU.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* Must be FIXED memory. */
+#define BASE_MEM_FIXED ((base_mem_alloc_flags)1 << 8)
+
+/* CSF event memory
+ *
+ * If Outer shareable coherence is not specified or not available, then on
+ * allocation kbase will automatically use the uncached GPU mapping.
+ * There is no need for the client to specify BASE_MEM_UNCACHED_GPU
+ * themselves when allocating memory with the BASE_MEM_CSF_EVENT flag.
+ *
+ * This memory requires a permanent mapping
+ *
+ * See also kbase_reg_needs_kernel_mapping()
+ */
+#define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19)
+
+/* Unused bit for CSF, only used in JM for BASE_MEM_TILER_ALIGN_TOP */
+#define BASE_MEM_UNUSED_BIT_20 ((base_mem_alloc_flags)1 << 20)
+
+/* Must be FIXABLE memory: its GPU VA will be determined at a later point,
+ * at which time it will be at a fixed GPU VA.
+ */
+#define BASE_MEM_FIXABLE ((base_mem_alloc_flags)1 << 29)
+
+/* A mask of flags that, when provided, cause other flags to be
+ * enabled but are not enabled themselves
+ */
+#define BASE_MEM_FLAGS_ACTION_MODIFIERS (BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED)
+
+/* A mask of all currently reserved flags */
+#define BASE_MEM_FLAGS_RESERVED ((base_mem_alloc_flags)0)
+
+/* Number of bits used as flags for base memory management from kernel-side
+ * only (ie BASEP_MEM_* flags), located from 63 bit downwards:
+ *   < 63 .. (64 - BASEP_MEM_FLAGS_NR_BITS) >
+ */
+#define BASEP_MEM_FLAGS_NR_BITS (4)
+
+/* A mask of all bits that are not used by a flag on CSF */
+#define BASE_MEM_FLAGS_UNUSED                                                     \
+	(BASE_MEM_UNUSED_BIT_5 | BASE_MEM_UNUSED_BIT_7 | BASE_MEM_UNUSED_BIT_20 | \
+	 BASE_MEM_UNUSED_BIT_27)
+
+/* Special base mem handles specific to CSF.
+ */
+#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << LOCAL_PAGE_SHIFT)
+#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << LOCAL_PAGE_SHIFT)
+
+#define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \
+	((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> LOCAL_PAGE_SHIFT)
+
+#endif /* _UAPI_KBASE_CSF_MEM_FLAGS_H_ */
--- a/include/uapi/gpu/arm/valhall/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/include/uapi/gpu/arm/valhall/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_
+#define _UAPI_KBASE_GPU_REGMAP_CSF_H_
+
+/* USER base address */
+#define USER_BASE 0x0010000
+#define USER_REG(r) (USER_BASE + (r))
+
+/* USER register offsets */
+#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
+
+/* DOORBELLS base address */
+#define DOORBELLS_BASE 0x0080000
+#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r))
+
+#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */
--- a/include/uapi/gpu/arm/valhall/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/include/uapi/gpu/arm/valhall/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_REGMAP_JM_H_
+#define _UAPI_KBASE_GPU_REGMAP_JM_H_
+
+#endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */
--- a/include/uapi/gpu/arm/valhall/gpu/mali_kbase_gpu_coherency.h
+++ b/include/uapi/gpu/arm/valhall/gpu/mali_kbase_gpu_coherency.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_COHERENCY_H_
+#define _UAPI_KBASE_GPU_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0U
+#define COHERENCY_ACE 1U
+#define COHERENCY_NONE 31U
+#define COHERENCY_FEATURE_BIT(x) (1U << (x))
+
+#endif /* _UAPI_KBASE_GPU_COHERENCY_H_ */
--- a/include/uapi/gpu/arm/valhall/gpu/mali_kbase_gpu_id.h
+++ b/include/uapi/gpu/arm/valhall/gpu/mali_kbase_gpu_id.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2015-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_ID_H_
+#define _UAPI_KBASE_GPU_ID_H_
+
+#if defined(__linux)
+#include <linux/types.h>
+#endif
+
+#define GPU_ID2_VERSION_STATUS_SHIFT 0
+#define GPU_ID2_VERSION_MINOR_SHIFT 4
+#define GPU_ID2_VERSION_MAJOR_SHIFT 12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16
+#define GPU_ID2_ARCH_REV_SHIFT 20
+#define GPU_ID2_ARCH_MINOR_SHIFT 24
+#define GPU_ID2_ARCH_MAJOR_SHIFT 28
+#define GPU_ID2_VERSION_STATUS (0xFu << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV (0xFu << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR (0xFu << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+#define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | GPU_ID2_VERSION_MINOR | GPU_ID2_VERSION_STATUS)
+
+#define GPU_ID2_ARCH_REV_GET(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_ARCH_REV) >> GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR_GET(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_ARCH_MINOR) >> GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR_GET(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_ARCH_MAJOR) >> GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_VERSION_MINOR_GET(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_VERSION_MINOR) >> GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR_GET(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR_GET(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_PRODUCT_MAJOR) >> GPU_ID2_PRODUCT_MAJOR_SHIFT)
+/* Helper macro to construct a value consisting of arch major and revision
+ * using the value of gpu_id.
+ */
+#define GPU_ID2_ARCH_MAJOR_REV_REG(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_ARCH_MAJOR) | (((__u32)gpu_id) & GPU_ID2_ARCH_REV))
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+ * a arch major and revision.
+ */
+#define GPU_ID2_ARCH_MAJOR_REV_MAKE(arch_major, arch_rev)    \
+	((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \
+	 (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+ * a product ignoring its version.
+ */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+	((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) |                  \
+	 (((__u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) |                  \
+	 (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) |                      \
+	 (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+ * revision (major, minor, status) of a product
+ */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+	((((__u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) |         \
+	 (((__u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) |         \
+	 (((__u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, version_major, \
+		     version_minor, version_status)                                  \
+	(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) |     \
+	 GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+ * a particular GPU model by its arch_major and product_major.
+ */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major)        \
+	((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \
+	 (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+ * for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+ * model.
+ */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+	((((__u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1)
+#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7, 0)
+#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7, 3)
+#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1)
+#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2)
+#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0)
+#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1)
+#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2)
+#define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4)
+#define GPU_ID2_PRODUCT_TBAX GPU_ID2_MODEL_MAKE(9, 5)
+#define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2)
+#define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3)
+#define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4)
+#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7)
+#define GPU_ID2_PRODUCT_TTUX GPU_ID2_MODEL_MAKE(11, 2)
+#define GPU_ID2_PRODUCT_LTUX GPU_ID2_MODEL_MAKE(11, 3)
+#define GPU_ID2_PRODUCT_TTIX GPU_ID2_MODEL_MAKE(12, 0)
+#define GPU_ID2_PRODUCT_LTIX GPU_ID2_MODEL_MAKE(12, 1)
+#define GPU_ID2_PRODUCT_TKRX GPU_ID2_MODEL_MAKE(13, 0)
+#define GPU_ID2_PRODUCT_LKRX GPU_ID2_MODEL_MAKE(13, 1)
+/* Do not increase the size of this macros any more.
+ * ID2 macros are deprecated but keeping them for backward compatibility.
+ * New GPU_ID_PRODUCT_XXXX macros in current file replace these macros.
+ */
+
+
+#define GPU_ID_U8_COMP(val3, val2, val1, val0) \
+	((((__u32)val3) << 24U) | (((__u32)val2) << 16U) | (((__u32)val1) << 8U) | ((__u32)val0))
+#define GPU_ID_U8_COMP_SHIFT(comp, idx) (((__u32)comp) >> (idx * 8U))
+#define GPU_ID_U8_COMP_GET(comp, idx) (GPU_ID_U8_COMP_SHIFT(comp, idx) & 0xFF)
+
+#define GPU_ID_PRODUCT_ID_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+	GPU_ID_U8_COMP(arch_major, arch_minor, arch_rev, product_major)
+#define GPU_ID_MODEL_MAKE(arch_major, product_major) GPU_ID_U8_COMP(arch_major, 0, 0, product_major)
+#define GPU_ID_VERSION_MAKE(version_major, version_minor, version_status) \
+	GPU_ID_U8_COMP(0, version_major, version_minor, version_status)
+#define GPU_ID_ARCH_MAKE(arch_major, arch_minor, arch_rev) \
+	GPU_ID_U8_COMP(0, arch_major, arch_minor, arch_rev)
+
+/* Convert ID created from GPU_ID_PRODUCT_ID_MAKE() to match the format of
+ * GPU_ID_MODEL_MAKE()
+ */
+#define GPU_ID_MODEL_MATCH_VALUE(product_id) (((__u32)product_id) & GPU_ID_MODEL_MAKE(0xFF, 0xFF))
+
+#define GPU_ID_VERSION_ID_MAJOR_MINOR_GET(version_id) GPU_ID_U8_COMP_SHIFT(version_id, 1)
+#define GPU_ID_VERSION_ID_STATUS_GET(version_id) GPU_ID_U8_COMP_GET(version_id, 0)
+#define GPU_ID_VERSION_ID_MINOR_GET(version_id) GPU_ID_U8_COMP_GET(version_id, 1)
+#define GPU_ID_VERSION_ID_MAJOR_GET(version_id) GPU_ID_U8_COMP_GET(version_id, 2)
+
+#define GPU_ID_PRODUCT_TMIX GPU_ID_MODEL_MAKE(6, 0)
+#define GPU_ID_PRODUCT_THEX GPU_ID_MODEL_MAKE(6, 1)
+#define GPU_ID_PRODUCT_TSIX GPU_ID_MODEL_MAKE(7, 0)
+#define GPU_ID_PRODUCT_TDVX GPU_ID_MODEL_MAKE(7, 3)
+#define GPU_ID_PRODUCT_TNOX GPU_ID_MODEL_MAKE(7, 1)
+#define GPU_ID_PRODUCT_TGOX GPU_ID_MODEL_MAKE(7, 2)
+#define GPU_ID_PRODUCT_TTRX GPU_ID_MODEL_MAKE(9, 0)
+#define GPU_ID_PRODUCT_TNAX GPU_ID_MODEL_MAKE(9, 1)
+#define GPU_ID_PRODUCT_TBEX GPU_ID_MODEL_MAKE(9, 2)
+#define GPU_ID_PRODUCT_LBEX GPU_ID_MODEL_MAKE(9, 4)
+#define GPU_ID_PRODUCT_TBAX GPU_ID_MODEL_MAKE(9, 5)
+#define GPU_ID_PRODUCT_TODX GPU_ID_MODEL_MAKE(10, 2)
+#define GPU_ID_PRODUCT_TGRX GPU_ID_MODEL_MAKE(10, 3)
+#define GPU_ID_PRODUCT_TVAX GPU_ID_MODEL_MAKE(10, 4)
+#define GPU_ID_PRODUCT_LODX GPU_ID_MODEL_MAKE(10, 7)
+#define GPU_ID_PRODUCT_TTUX GPU_ID_MODEL_MAKE(11, 2)
+#define GPU_ID_PRODUCT_LTUX GPU_ID_MODEL_MAKE(11, 3)
+#define GPU_ID_PRODUCT_TTIX GPU_ID_MODEL_MAKE(12, 0)
+#define GPU_ID_PRODUCT_LTIX GPU_ID_MODEL_MAKE(12, 1)
+#define GPU_ID_PRODUCT_TKRX GPU_ID_MODEL_MAKE(13, 0)
+#define GPU_ID_PRODUCT_LKRX GPU_ID_MODEL_MAKE(13, 1)
+#define GPU_ID_PRODUCT_IDRX GPU_ID_MODEL_MAKE(14, 0)
+#define GPU_ID_PRODUCT_TDRX GPU_ID_MODEL_MAKE(14, 1)
+#define GPU_ID_PRODUCT_LDRX GPU_ID_MODEL_MAKE(14, 3)
+
+#endif /* _UAPI_KBASE_GPU_ID_H_ */
--- a/include/uapi/gpu/arm/valhall/gpu/mali_kbase_gpu_regmap.h
+++ b/include/uapi/gpu/arm/valhall/gpu/mali_kbase_gpu_regmap.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_REGMAP_H_
+#define _UAPI_KBASE_GPU_REGMAP_H_
+
+#if MALI_USE_CSF
+#include "backend/mali_kbase_gpu_regmap_csf.h"
+#else
+#include "backend/mali_kbase_gpu_regmap_jm.h"
+#endif /* !MALI_USE_CSF */
+
+#endif /* _UAPI_KBASE_GPU_REGMAP_H_ */
--- a/include/uapi/gpu/arm/valhall/jm/mali_base_jm_kernel.h
+++ b/include/uapi/gpu/arm/valhall/jm/mali_base_jm_kernel.h
@@ -0,0 +1,892 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_BASE_JM_KERNEL_H_
+#define _UAPI_BASE_JM_KERNEL_H_
+
+#include <linux/types.h>
+#include "../mali_base_common_kernel.h"
+
+/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
+ * initial commit is aligned to 'extension' pages, where 'extension' must be a power
+ * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
+ */
+#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0)
+
+/**
+ * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE - If set, the heap info address points
+ * to a __u32 holding the used size in bytes;
+ * otherwise it points to a __u64 holding the lowest address of unused memory.
+ */
+#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1)
+
+/**
+ * BASE_JIT_ALLOC_VALID_FLAGS - Valid set of just-in-time memory allocation flags
+ *
+ * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr
+ * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set
+ * and heap_info_gpu_addr being 0 will be rejected).
+ */
+#define BASE_JIT_ALLOC_VALID_FLAGS \
+	(BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
+
+/* Bitpattern describing the ::base_context_create_flags that can be
+ * passed to base_context_init()
+ */
+#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
+
+/*
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as base_context_create_flags, and so must
+ * not collide with them.
+ */
+
+/* Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((base_context_create_flags)(1 << 31))
+
+/* Flags for base tracepoint specific to JM */
+#define BASE_TLSTREAM_FLAGS_MASK \
+	(BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | BASE_TLSTREAM_JOB_DUMPING_ENABLED)
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT 256
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0)
+
+/**
+ * struct base_jd_udata - Per-job data
+ *
+ * @blob: per-job data array
+ *
+ * This structure is used to store per-job data, and is completely unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+struct base_jd_udata {
+	__u64 blob[2];
+};
+
+/**
+ * typedef base_jd_dep_type - Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a
+ * dependency is a data or ordering dependency (by putting it before/after
+ * 'core_req' in the structure it should be possible to add without changing
+ * the structure size).
+ * When the flag is set for a particular dependency to signal that it is an
+ * ordering only dependency then errors will not be propagated.
+ */
+typedef __u8 base_jd_dep_type;
+
+#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */
+
+/**
+ * typedef base_jd_core_req - Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef __u32 base_jd_core_req;
+
+/* Requirements that come from the HW */
+
+/* No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/* Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0)
+
+/* Requires compute shaders
+ *
+ * This covers any of the following GPU job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1)
+
+/* Requires tiling */
+#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2)
+
+/* Requires cache flushes */
+#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3)
+
+/* Requires value writeback */
+#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4)
+
+/* SW-only requirements - the HW does not expose these as part of the job slot
+ * capabilities
+ */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13)
+
+/* SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
+
+/* SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6)
+
+/* SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7)
+
+/* SW Only requirement: External resources are referenced by this atom.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and
+ * BASE_JD_REQ_SOFT_EVENT_WAIT.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8)
+
+/* SW Only requirement: Software defined job. Jobs with this bit set will not be
+ * submitted to the hardware but will cause some action to happen within the
+ * driver
+ */
+#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/* 0x4 RESERVED for now */
+
+/* SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/* SW only requirement: Just In Time allocation
+ *
+ * This job requests a single or multiple just-in-time allocations through a
+ * list of base_jit_alloc_info structure which is passed via the jc element of
+ * the atom. The number of base_jit_alloc_info structures present in the
+ * list is passed via the nr_extres element of the atom
+ *
+ * It should be noted that the id entry in base_jit_alloc_info must not
+ * be reused until it has been released via BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9)
+
+/* SW only requirement: Just In Time free
+ *
+ * This job requests a single or multiple just-in-time allocations created by
+ * BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the just-in-time
+ * allocations is passed via the jc element of the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/* SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb)
+
+/* SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains GPU jobs of the 'Compute
+ * Shaders' type.
+ *
+ * In contrast to BASE_JD_REQ_CS, this does not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10)
+
+/* HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag
+ * takes priority
+ *
+ * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
+
+/* SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12)
+
+/* SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
+
+/* SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use
+ * if the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
+
+/* SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the BASE_JD_REQ_SKIP_CACHE_START bit set. Do not use
+ * if the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
+
+/* Request the atom be executed on a specific job slot.
+ *
+ * When this flag is specified, it takes precedence over any existing job slot
+ * selection logic.
+ */
+#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17)
+
+/* SW-only requirement: The atom needs to run on a limited core mask affinity.
+ *
+ * If this bit is set then the kbase_context.limited_core_mask will be applied
+ * to the affinity.
+ */
+#define BASE_JD_REQ_LIMITED_CORE_MASK ((base_jd_core_req)1 << 20)
+
+/* These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED                                                                 \
+	(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES |                           \
+	   BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER |                     \
+	   BASE_JD_REQ_EVENT_COALESCE | BASE_JD_REQ_COHERENT_GROUP |                          \
+	   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON |   \
+	   BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | BASE_JD_REQ_JOB_SLOT | \
+	   BASE_JD_REQ_LIMITED_CORE_MASK))
+
+/* Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASE_JD_REQ_ATOM_TYPE                                                               \
+	(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | BASE_JD_REQ_V | \
+	 BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * BASE_JD_REQ_SOFT_JOB_TYPE - Mask of all bits in base_jd_core_req that
+ * controls the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
+
+/* Returns non-zero value if core requirements passed define a soft job or
+ * a dependency only job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
+	(((core_req)&BASE_JD_REQ_SOFT_JOB) || ((core_req)&BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
+
+/**
+ * enum kbase_jd_atom_state - Atom states
+ *
+ * @KBASE_JD_ATOM_STATE_UNUSED: Atom is not used.
+ * @KBASE_JD_ATOM_STATE_QUEUED: Atom is queued in JD.
+ * @KBASE_JD_ATOM_STATE_IN_JS:  Atom has been given to JS (is runnable/running).
+ * @KBASE_JD_ATOM_STATE_HW_COMPLETED: Atom has been completed, but not yet
+ *                                    handed back to job dispatcher for
+ *                                    dependency resolution.
+ * @KBASE_JD_ATOM_STATE_COMPLETED: Atom has been completed, but not yet handed
+ *                                 back to userspace.
+ */
+enum kbase_jd_atom_state {
+	KBASE_JD_ATOM_STATE_UNUSED,
+	KBASE_JD_ATOM_STATE_QUEUED,
+	KBASE_JD_ATOM_STATE_IN_JS,
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+/**
+ * typedef base_atom_id - Type big enough to store an atom number in.
+ */
+typedef __u8 base_atom_id;
+
+/**
+ * struct base_dependency - base dependency
+ *
+ * @atom_id:         An atom number
+ * @dependency_type: Dependency type
+ */
+struct base_dependency {
+	base_atom_id atom_id;
+	base_jd_dep_type dependency_type;
+};
+
+/**
+ * typedef base_jd_prio - Base Atom priority.
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling after the atoms have had dependencies
+ * resolved. For example, a low priority atom that has had its dependencies
+ * resolved might run before a higher priority atom that has not had its
+ * dependencies resolved.
+ *
+ * In general, fragment atoms do not affect non-fragment atoms with
+ * lower priorities, and vice versa. One exception is that there is only one
+ * priority value for each context. So a high-priority (e.g.) fragment atom
+ * could increase its context priority, causing its non-fragment atoms to also
+ * be scheduled sooner.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * * Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * * If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * * If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * * Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ *
+ * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are
+ * scheduled between contexts. The default value, 0, will cause higher-priority
+ * atoms to be scheduled first, regardless of their context. The value 1 will
+ * use a round-robin algorithm when deciding which context's atoms to schedule
+ * next, so higher-priority atoms can only preempt lower priority atoms within
+ * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and
+ * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details.
+ */
+typedef __u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW
+ */
+#define BASE_JD_PRIO_HIGH ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW ((base_jd_prio)2)
+/* Real-Time atom priority. This is a priority higher than BASE_JD_PRIO_HIGH,
+ * BASE_JD_PRIO_MEDIUM, and BASE_JD_PRIO_LOW
+ */
+#define BASE_JD_PRIO_REALTIME ((base_jd_prio)3)
+
+/* Invalid atom priority (max uint8_t value) */
+#define BASE_JD_PRIO_INVALID ((base_jd_prio)255)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting
+ */
+#define BASE_JD_NR_PRIO_LEVELS 4
+
+/**
+ * struct base_jd_atom_v2 - Node of a dependency graph used to submit a
+ *                          GPU job chain or soft-job to the kernel driver.
+ *
+ * @jc:            GPU address of a job chain.
+ * @udata:         User data.
+ * @extres_list:   List of external resources.
+ * @nr_extres:     Number of external resources or JIT allocations.
+ * @jit_id:        Zero-terminated array of IDs of just-in-time memory
+ *                 allocations written to by the atom. When the atom
+ *                 completes, the value stored at the
+ *                 &struct_base_jit_alloc_info.heap_info_gpu_addr of
+ *                 each allocation is read in order to enforce an
+ *                 overall physical memory usage limit.
+ * @pre_dep:       Pre-dependencies. One need to use SETTER function to assign
+ *                 this field; this is done in order to reduce possibility of
+ *                 improper assignment of a dependency field.
+ * @atom_number:   Unique number to identify the atom.
+ * @prio:          Atom priority. Refer to base_jd_prio for more details.
+ * @device_nr:     Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
+ *                 specified.
+ * @jobslot:       Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
+ * @core_req:      Core requirements.
+ * @padding:       Unused. Must be zero.
+ *
+ * This structure has changed since UK 10.2 for which base_jd_core_req was a
+ * __u16 value.
+ *
+ * In UK 10.3 a core_req field of a __u32 type was added to the end of the
+ * structure, and the place in the structure previously occupied by __u16
+ * core_req was kept but renamed to compat_core_req.
+ *
+ * From UK 11.20 - compat_core_req is now occupied by __u8 jit_id[2].
+ * Compatibility with UK 10.x from UK 11.y is not handled because
+ * the major version increase prevents this.
+ *
+ * For UK 11.20 jit_id[2] must be initialized to zero.
+ */
+struct base_jd_atom_v2 {
+	__u64 jc;
+	struct base_jd_udata udata;
+	__u64 extres_list;
+	__u16 nr_extres;
+	__u8 jit_id[2];
+	struct base_dependency pre_dep[2];
+	base_atom_id atom_number;
+	base_jd_prio prio;
+	__u8 device_nr;
+	__u8 jobslot;
+	base_jd_core_req core_req;
+	__u8 padding[8];
+};
+
+/**
+ * struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr
+ *                          at the beginning.
+ *
+ * @seq_nr:        Sequence number of logical grouping of atoms.
+ * @jc:            GPU address of a job chain.
+ * @udata:         User data.
+ * @extres_list:   List of external resources.
+ * @nr_extres:     Number of external resources or JIT allocations.
+ * @jit_id:        Zero-terminated array of IDs of just-in-time memory
+ *                 allocations written to by the atom. When the atom
+ *                 completes, the value stored at the
+ *                 &struct_base_jit_alloc_info.heap_info_gpu_addr of
+ *                 each allocation is read in order to enforce an
+ *                 overall physical memory usage limit.
+ * @pre_dep:       Pre-dependencies. One need to use SETTER function to assign
+ *                 this field; this is done in order to reduce possibility of
+ *                 improper assignment of a dependency field.
+ * @atom_number:   Unique number to identify the atom.
+ * @prio:          Atom priority. Refer to base_jd_prio for more details.
+ * @device_nr:     Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
+ *                 specified.
+ * @jobslot:       Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
+ * @core_req:      Core requirements.
+ * @renderpass_id: Renderpass identifier used to associate an atom that has
+ *                 BASE_JD_REQ_START_RENDERPASS set in its core requirements
+ *                 with an atom that has BASE_JD_REQ_END_RENDERPASS set.
+ * @padding:       Unused. Must be zero.
+ */
+typedef struct base_jd_atom {
+	__u64 seq_nr;
+	__u64 jc;
+	struct base_jd_udata udata;
+	__u64 extres_list;
+	__u16 nr_extres;
+	__u8 jit_id[2];
+	struct base_dependency pre_dep[2];
+	base_atom_id atom_number;
+	base_jd_prio prio;
+	__u8 device_nr;
+	__u8 jobslot;
+	base_jd_core_req core_req;
+	__u8 renderpass_id;
+	__u8 padding[7];
+} base_jd_atom;
+
+/* Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /* Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /* SW defined event */
+	/* Event indicates success (SW events only) */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13),
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */
+	/* Mask to extract the type from an event code */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)
+};
+
+/**
+ * enum base_jd_event_code - Job chain event codes
+ *
+ * @BASE_JD_EVENT_RANGE_HW_NONFAULT_START: Start of hardware non-fault status
+ *                                         codes.
+ *                                         Obscurely, BASE_JD_EVENT_TERMINATED
+ *                                         indicates a real fault, because the
+ *                                         job was hard-stopped.
+ * @BASE_JD_EVENT_NOT_STARTED: Can't be seen by userspace, treated as
+ *                             'previous job done'.
+ * @BASE_JD_EVENT_STOPPED:     Can't be seen by userspace, becomes
+ *                             TERMINATED, DONE or JOB_CANCELLED.
+ * @BASE_JD_EVENT_TERMINATED:  This is actually a fault status code - the job
+ *                             was hard stopped.
+ * @BASE_JD_EVENT_ACTIVE: Can't be seen by userspace, jobs only returned on
+ *                        complete/fail/cancel.
+ * @BASE_JD_EVENT_RANGE_HW_NONFAULT_END: End of hardware non-fault status codes.
+ *                                       Obscurely, BASE_JD_EVENT_TERMINATED
+ *                                       indicates a real fault,
+ *                                       because the job was hard-stopped.
+ * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START: Start of hardware fault and
+ *                                                  software error status codes.
+ * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END: End of hardware fault and
+ *                                                software error status codes.
+ * @BASE_JD_EVENT_RANGE_SW_SUCCESS_START: Start of software success status
+ *                                        codes.
+ * @BASE_JD_EVENT_RANGE_SW_SUCCESS_END: End of software success status codes.
+ * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_START: Start of kernel-only status codes.
+ *                                         Such codes are never returned to
+ *                                         user-space.
+ * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes.
+ * @BASE_JD_EVENT_DONE: atom has completed successfull
+ * @BASE_JD_EVENT_JOB_CONFIG_FAULT: Atom dependencies configuration error which
+ *                                  shall result in a failed atom
+ * @BASE_JD_EVENT_JOB_POWER_FAULT:  The job could not be executed because the
+ *                                  part of the memory system required to access
+ *                                  job descriptors was not powered on
+ * @BASE_JD_EVENT_JOB_READ_FAULT:   Reading a job descriptor into the Job
+ *                                  manager failed
+ * @BASE_JD_EVENT_JOB_WRITE_FAULT:  Writing a job descriptor from the Job
+ *                                  manager failed
+ * @BASE_JD_EVENT_JOB_AFFINITY_FAULT: The job could not be executed because the
+ *                                    specified affinity mask does not intersect
+ *                                    any available cores
+ * @BASE_JD_EVENT_JOB_BUS_FAULT:    A bus access failed while executing a job
+ * @BASE_JD_EVENT_INSTR_INVALID_PC: A shader instruction with an illegal program
+ *                                  counter was executed.
+ * @BASE_JD_EVENT_INSTR_INVALID_ENC: A shader instruction with an illegal
+ *                                  encoding was executed.
+ * @BASE_JD_EVENT_INSTR_TYPE_MISMATCH: A shader instruction was executed where
+ *                                  the instruction encoding did not match the
+ *                                  instruction type encoded in the program
+ *                                  counter.
+ * @BASE_JD_EVENT_INSTR_OPERAND_FAULT: A shader instruction was executed that
+ *                                  contained invalid combinations of operands.
+ * @BASE_JD_EVENT_INSTR_TLS_FAULT:  A shader instruction was executed that tried
+ *                                  to access the thread local storage section
+ *                                  of another thread.
+ * @BASE_JD_EVENT_INSTR_ALIGN_FAULT: A shader instruction was executed that
+ *                                  tried to do an unsupported unaligned memory
+ *                                  access.
+ * @BASE_JD_EVENT_INSTR_BARRIER_FAULT: A shader instruction was executed that
+ *                                  failed to complete an instruction barrier.
+ * @BASE_JD_EVENT_DATA_INVALID_FAULT: Any data structure read as part of the job
+ *                                  contains invalid combinations of data.
+ * @BASE_JD_EVENT_TILE_RANGE_FAULT: Tile or fragment shading was asked to
+ *                                  process a tile that is entirely outside the
+ *                                  bounding box of the frame.
+ * @BASE_JD_EVENT_STATE_FAULT:      Matches ADDR_RANGE_FAULT. A virtual address
+ *                                  has been found that exceeds the virtual
+ *                                  address range.
+ * @BASE_JD_EVENT_OUT_OF_MEMORY:    The tiler ran out of memory when executing a job.
+ * @BASE_JD_EVENT_UNKNOWN:          If multiple jobs in a job chain fail, only
+ *                                  the first one the reports an error will set
+ *                                  and return full error information.
+ *                                  Subsequent failing jobs will not update the
+ *                                  error status registers, and may write an
+ *                                  error status of UNKNOWN.
+ * @BASE_JD_EVENT_DELAYED_BUS_FAULT: The GPU received a bus fault for access to
+ *                                  physical memory where the original virtual
+ *                                  address is no longer available.
+ * @BASE_JD_EVENT_SHAREABILITY_FAULT: Matches GPU_SHAREABILITY_FAULT. A cache
+ *                                  has detected that the same line has been
+ *                                  accessed as both shareable and non-shareable
+ *                                  memory from inside the GPU.
+ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1: A memory access hit an invalid table
+ *                                  entry at level 1 of the translation table.
+ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2: A memory access hit an invalid table
+ *                                  entry at level 2 of the translation table.
+ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3: A memory access hit an invalid table
+ *                                  entry at level 3 of the translation table.
+ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4: A memory access hit an invalid table
+ *                                  entry at level 4 of the translation table.
+ * @BASE_JD_EVENT_PERMISSION_FAULT: A memory access could not be allowed due to
+ *                                  the permission flags set in translation
+ *                                  table
+ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1: A bus fault occurred while reading
+ *                                  level 0 of the translation tables.
+ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2: A bus fault occurred while reading
+ *                                  level 1 of the translation tables.
+ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3: A bus fault occurred while reading
+ *                                  level 2 of the translation tables.
+ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4: A bus fault occurred while reading
+ *                                  level 3 of the translation tables.
+ * @BASE_JD_EVENT_ACCESS_FLAG:      Matches ACCESS_FLAG_0. A memory access hit a
+ *                                  translation table entry with the ACCESS_FLAG
+ *                                  bit set to zero in level 0 of the
+ *                                  page table, and the DISABLE_AF_FAULT flag
+ *                                  was not set.
+ * @BASE_JD_EVENT_MEM_GROWTH_FAILED: raised for JIT_ALLOC atoms that failed to
+ *                                   grow memory on demand
+ * @BASE_JD_EVENT_JOB_CANCELLED: raised when this atom was hard-stopped or its
+ *                               dependencies failed
+ * @BASE_JD_EVENT_JOB_INVALID: raised for many reasons, including invalid data
+ *                             in the atom which overlaps with
+ *                             BASE_JD_EVENT_JOB_CONFIG_FAULT, or if the
+ *                             platform doesn't support the feature specified in
+ *                             the atom.
+ * @BASE_JD_EVENT_DRV_TERMINATED: this is a special event generated to indicate
+ *                                to userspace that the KBase context has been
+ *                                destroyed and Base should stop listening for
+ *                                further events
+ * @BASE_JD_EVENT_REMOVED_FROM_NEXT: raised when an atom that was configured in
+ *                                   the GPU has to be retried (but it has not
+ *                                   started) due to e.g., GPU reset
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see @BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a &struct base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * * 10:0  - subtype
+ * * 12:11 - type
+ * * 13    - SW success (only valid if the SW bit is set)
+ * * 14    - SW event (HW event if not set)
+ * * 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * * BASE_JD_EVENT_RANGE_<description>_START
+ * * BASE_JD_EVENT_RANGE_<description>_END
+ *
+ * code is in <description>'s range when:
+ * BASE_JD_EVENT_RANGE_<description>_START <= code <
+ *   BASE_JD_EVENT_RANGE_<description>_END
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+enum base_jd_event_code {
+	/* HW defined exceptions */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00,
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,
+	BASE_JD_EVENT_TERMINATED = 0x04,
+	BASE_JD_EVENT_ACTIVE = 0x08,
+
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT |
+						       BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS |
+				       BASE_JD_SW_EVENT_INFO | 0x000,
+
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS |
+					     BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL |
+					  BASE_JD_SW_EVENT_JOB | 0x000,
+
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL |
+					      BASE_JD_SW_EVENT_RESERVED | 0x3FF
+};
+
+/**
+ * struct base_jd_event_v2 - Event reporting structure
+ *
+ * @event_code:  event code of type @ref base_jd_event_code.
+ * @atom_number: the atom number that has completed.
+ * @padding:     padding.
+ * @udata:       user data.
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. They can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with BASE_JD_SW_EVENT_TYPE_MASK.
+ */
+struct base_jd_event_v2 {
+	__u32 event_code;
+	base_atom_id atom_number;
+	__u8 padding[3];
+	struct base_jd_udata udata;
+};
+
+/**
+ * struct base_dump_cpu_gpu_counters - Structure for
+ *                                     BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS
+ *                                     jobs.
+ * @system_time:   gpu timestamp
+ * @cycle_counter: gpu cycle count
+ * @sec:           cpu time(sec)
+ * @usec:          cpu time(usec)
+ * @padding:       padding
+ *
+ * This structure is stored into the memory pointed to by the @jc field
+ * of &struct base_jd_atom.
+ *
+ * It must not occupy the same CPU cache line(s) as any neighboring data.
+ * This is to avoid cases where access to pages containing the structure
+ * is shared between cached and un-cached memory regions, which would
+ * cause memory corruption.
+ */
+
+struct base_dump_cpu_gpu_counters {
+	__u64 system_time;
+	__u64 cycle_counter;
+	__u64 sec;
+	__u32 usec;
+	__u8 padding[36];
+};
+
+/**
+ * struct mali_base_gpu_core_props - GPU core props info
+ *
+ * @product_id: Pro specific value.
+ * @version_status: Status of the GPU release. No defined values, but starts at
+ *   0 and increases by one for each release status (alpha, beta, EAC, etc.).
+ *   4 bit values (0-15).
+ * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn"
+ *   release number.
+ *   8 bit values (0-255).
+ * @major_revision: Major release number of the GPU. "R" part of an "RnPn"
+ *   release number.
+ *   4 bit values (0-15).
+ * @padding: padding to align to 8-byte
+ * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by
+ *   clGetDeviceInfo()
+ * @log2_program_counter_size: Size of the shader program counter, in bits.
+ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This
+ *   is a bitpattern where a set bit indicates that the format is supported.
+ *   Before using a texture format, it is recommended that the corresponding
+ *   bit be checked.
+ * @paddings_1: Padding bytes.
+ * @gpu_available_memory_size: Theoretical maximum memory available to the GPU.
+ *   It is unlikely that a client will be able to allocate all of this memory
+ *   for their own purposes, but this at least provides an upper bound on the
+ *   memory available to the GPU.
+ *   This is required for OpenCL's clGetDeviceInfo() call when
+ *   CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+ *   client will not be expecting to allocate anywhere near this value.
+ * @num_exec_engines: The number of execution engines. Only valid for tGOX
+ *   (Bifrost) GPUs, where GPU_HAS_REG_CORE_FEATURES is defined. Otherwise,
+ *   this is always 0.
+ * @paddings_2: Padding bytes.
+ */
+struct mali_base_gpu_core_props {
+	__u32 product_id;
+	__u16 version_status;
+	__u16 minor_revision;
+	__u16 major_revision;
+	__u16 padding;
+	__u32 gpu_freq_khz_max;
+	__u32 log2_program_counter_size;
+	__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	__u8 paddings_1[4];
+	__u64 gpu_available_memory_size;
+	__u8 num_exec_engines;
+	__u8 paddings_2[7];
+};
+
+#endif /* _UAPI_BASE_JM_KERNEL_H_ */
--- a/include/uapi/gpu/arm/valhall/jm/mali_kbase_jm_ioctl.h
+++ b/include/uapi/gpu/arm/valhall/jm/mali_kbase_jm_ioctl.h
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_JM_IOCTL_H_
+#define _UAPI_KBASE_JM_IOCTL_H_
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * 11.1:
+ * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags
+ * 11.2:
+ * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED,
+ *   which some user-side clients prior to 11.2 might fault if they received
+ *   them
+ * 11.3:
+ * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and
+ *   KBASE_IOCTL_STICKY_RESOURCE_UNMAP
+ * 11.4:
+ * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET
+ * 11.5:
+ * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD)
+ * 11.6:
+ * - Added flags field to base_jit_alloc_info structure, which can be used to
+ *   specify pseudo chunked tiler alignment for JIT allocations.
+ * 11.7:
+ * - Removed UMP support
+ * 11.8:
+ * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags
+ * 11.9:
+ * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY
+ *   under base_mem_alloc_flags
+ * 11.10:
+ * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for
+ *   JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations
+ *   with one softjob.
+ * 11.11:
+ * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
+ * 11.12:
+ * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS
+ * 11.13:
+ * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT
+ * 11.14:
+ * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set
+ *   under base_mem_alloc_flags
+ * 11.15:
+ * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags.
+ * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be
+ *   passed to mmap().
+ * 11.16:
+ * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf.
+ * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for
+ *   dma-buf. Now, buffers are mapped on GPU when first imported, no longer
+ *   requiring external resource or sticky resource tracking. UNLESS,
+ *   CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled.
+ * 11.17:
+ * - Added BASE_JD_REQ_JOB_SLOT.
+ * - Reused padding field in base_jd_atom_v2 to pass job slot number.
+ * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO
+ * 11.18:
+ * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags
+ * 11.19:
+ * - Extended base_jd_atom_v2 to allow a renderpass ID to be specified.
+ * 11.20:
+ * - Added new phys_pages member to kbase_ioctl_mem_jit_init for
+ *   KBASE_IOCTL_MEM_JIT_INIT, previous variants of this renamed to use _10_2
+ *   (replacing '_OLD') and _11_5 suffixes
+ * - Replaced compat_core_req (deprecated in 10.3) with jit_id[2] in
+ *   base_jd_atom_v2. It must currently be initialized to zero.
+ * - Added heap_info_gpu_addr to base_jit_alloc_info, and
+ *   BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE allowable in base_jit_alloc_info's
+ *   flags member. Previous variants of this structure are kept and given _10_2
+ *   and _11_5 suffixes.
+ * - The above changes are checked for safe values in usual builds
+ * 11.21:
+ * - v2.0 of mali_trace debugfs file, which now versions the file separately
+ * 11.22:
+ * - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2.
+ *   KBASE_IOCTL_JOB_SUBMIT supports both in parallel.
+ * 11.23:
+ * - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify
+ *   the physical memory backing of JIT allocations. This was not supposed
+ *   to be a valid use case, but it was allowed by the previous implementation.
+ * 11.24:
+ * - Added a sysfs file 'serialize_jobs' inside a new sub-directory
+ *   'scheduling'.
+ * 11.25:
+ * - Enabled JIT pressure limit in base/kbase by default
+ * 11.26
+ * - Added kinstr_jm API
+ * 11.27
+ * - Backwards compatible extension to HWC ioctl.
+ * 11.28:
+ * - Added kernel side cache ops needed hint
+ * 11.29:
+ * - Reserve ioctl 52
+ * 11.30:
+ * - Add a new priority level BASE_JD_PRIO_REALTIME
+ * - Add ioctl 54: This controls the priority setting.
+ * 11.31:
+ * - Added BASE_JD_REQ_LIMITED_CORE_MASK.
+ * - Added ioctl 55: set_limited_core_count.
+ * 11.32:
+ * - Added new HW performance counters interface to all GPUs.
+ * 11.33:
+ * - Removed Kernel legacy HWC interface
+ * 11.34:
+ * - First release of new HW performance counters interface.
+ * 11.35:
+ * - Dummy model (no mali) backend will now clear HWC values after each sample
+ * 11.36:
+ * - Remove legacy definitions:
+ *   - base_jit_alloc_info_10_2
+ *   - base_jit_alloc_info_11_5
+ *   - kbase_ioctl_mem_jit_init_10_2
+ *   - kbase_ioctl_mem_jit_init_11_5
+ * 11.37:
+ * - Fix kinstr_prfcnt issues:
+ *   - Missing implicit sample for CMD_STOP when HWCNT buffer is full.
+ *   - Race condition when stopping periodic sampling.
+ *   - prfcnt_block_metadata::block_idx gaps.
+ *   - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed.
+ * 11.38:
+ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
+ *   before allocating GPU memory for the context.
+ * - CPU mappings of USER_BUFFER imported memory handles must be cached.
+ * 11.39:
+ * - Restrict child process from doing supported file operations (like mmap, ioctl,
+ *   read, poll) on the file descriptor of mali device file that was inherited
+ *   from the parent process.
+ * 11.40:
+ * - Remove KBASE_IOCTL_HWCNT_READER_SETUP and KBASE_HWCNT_READER_* ioctls.
+ * - Made the BASE_MEM_DONT_NEED memory flag queryable.
+ * 11.41:
+ * - Disallows changing the sharability on the GPU of imported dma-bufs to
+ *   BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE.
+ * 11.42:
+ * - Implement full block state support for hardware counters.
+ * 11.43:
+ * - Made the BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP and BASE_MEM_KERNEL_SYNC memory
+ *   flags queryable.
+ * 11.44:
+ * - Made the SAME_VA memory flag queryable.
+ * 11.45:
+ * - Re-allow child process to do supported file operations (like mmap, ioctl
+ *   read, poll) on the file descriptor of mali device that was inherited
+ *   from the parent process.
+ * 11.46:
+ * - Remove renderpass_id from base_jd_atom_v2 to deprecate support for JM Incremental Rendering
+ * 11.47:
+ * - Reject non-protected allocations containing the BASE_MEM_PROTECTED memory flag.
+ * - Reject allocations containing the BASE_MEM_DONT_NEED memory flag (it is only settable).
+ * - Reject allocations containing the BASE_MEM_UNUSED_BIT_xx memory flags.
+ * 11.48:
+ * - Add UNUSED_BIT_5, UNUSED_BIT_7, UNUSED_BIT_27 and UNUSED_BIT_29 previously occupied by
+ *   kernel-only flags to kbase cap table.
+ * 11.49:
+ * - Increased KBASE_MEM_PROFILE_MAX_BUF_SIZE for more cctx memory classes.
+  */
+
+#define BASE_UK_VERSION_MAJOR 11
+#define BASE_UK_VERSION_MINOR 49
+
+/**
+ * struct kbase_ioctl_version_check - Check version compatibility between
+ * kernel and userspace
+ *
+ * @major: Major version number
+ * @minor: Minor version number
+ */
+struct kbase_ioctl_version_check {
+	__u16 major;
+	__u16 minor;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
+
+/**
+ * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
+ *
+ * @addr: Memory address of an array of struct base_jd_atom_v2 or v3
+ * @nr_atoms: Number of entries in the array
+ * @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom)
+ */
+struct kbase_ioctl_job_submit {
+	__u64 addr;
+	__u32 nr_atoms;
+	__u32 stride;
+};
+
+#define KBASE_IOCTL_JOB_SUBMIT _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit)
+
+#define KBASE_IOCTL_POST_TERM _IO(KBASE_IOCTL_TYPE, 4)
+
+/**
+ * struct kbase_ioctl_soft_event_update - Update the status of a soft-event
+ * @event: GPU address of the event which has been updated
+ * @new_status: The new status to set
+ * @flags: Flags for future expansion
+ */
+struct kbase_ioctl_soft_event_update {
+	__u64 event;
+	__u32 new_status;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
+	_IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
+
+/**
+ * struct kbase_kinstr_jm_fd_out - Explains the compatibility information for
+ * the `struct kbase_kinstr_jm_atom_state_change` structure returned from the
+ * kernel
+ *
+ * @size:    The size of the `struct kbase_kinstr_jm_atom_state_change`
+ * @version: Represents a breaking change in the
+ *           `struct kbase_kinstr_jm_atom_state_change`
+ * @padding: Explicit padding to get the structure up to 64bits. See
+ * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
+ *
+ * The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the
+ * end of the structure that older user space might not understand. If the
+ * `version` is the same, the structure is still compatible with newer kernels.
+ * The `size` can be used to cast the opaque memory returned from the kernel.
+ */
+struct kbase_kinstr_jm_fd_out {
+	__u16 size;
+	__u8 version;
+	__u8 padding[5];
+};
+
+/**
+ * struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor
+ *
+ * @count: Number of atom states that can be stored in the kernel circular
+ *         buffer. Must be a power of two
+ * @padding: Explicit padding to get the structure up to 64bits. See
+ * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
+ */
+struct kbase_kinstr_jm_fd_in {
+	__u16 count;
+	__u8 padding[6];
+};
+
+union kbase_kinstr_jm_fd {
+	struct kbase_kinstr_jm_fd_in in;
+	struct kbase_kinstr_jm_fd_out out;
+};
+
+#define KBASE_IOCTL_KINSTR_JM_FD _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd)
+
+#define KBASE_IOCTL_VERSION_CHECK_RESERVED \
+	_IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
+
+#endif /* _UAPI_KBASE_JM_IOCTL_H_ */
--- a/include/uapi/gpu/arm/valhall/jm/mali_kbase_jm_mem_flags.h
+++ b/include/uapi/gpu/arm/valhall/jm/mali_kbase_jm_mem_flags.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_JM_MEM_FLAGS_H_
+#define _UAPI_KBASE_JM_MEM_FLAGS_H_
+
+/* Memory allocation, access/hint flags & mask specific to JM GPU.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* Unused bit for JM, only used in CSF for BASE_MEM_FIXED */
+#define BASE_MEM_UNUSED_BIT_8 ((base_mem_alloc_flags)1 << 8)
+
+/* Unused bit for JM, only used in CSF for BASE_CSF_EVENT */
+#define BASE_MEM_UNUSED_BIT_19 ((base_mem_alloc_flags)1 << 19)
+
+/**
+ * BASE_MEM_TILER_ALIGN_TOP - Memory starting from the end of the initial commit is aligned
+ * to 'extension' pages, where 'extension' must be a power of 2 and no more than
+ * BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
+ */
+#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
+
+/* Previously BASEP_MEM_PERFORM_JIT_TRIM, can be reused in the future */
+#define BASE_MEM_UNUSED_BIT_29 ((base_mem_alloc_flags)1 << 29)
+
+/* A mask of flags that, when provided, cause other flags to be
+ * enabled but are not enabled themselves
+ */
+#define BASE_MEM_FLAGS_ACTION_MODIFIERS (BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED)
+
+/* A mask of all currently reserved flags */
+#define BASE_MEM_FLAGS_RESERVED ((base_mem_alloc_flags)0)
+
+/* Number of bits used as flags for base memory management from kernel-side
+ * only (ie BASEP_MEM_* flags), located from 63 bit downwards:
+ *   < 63 .. (64 - BASEP_MEM_FLAGS_NR_BITS) >
+ */
+#define BASEP_MEM_FLAGS_NR_BITS (6)
+
+/* A mask of all bits that are not used by a flag on JM */
+#define BASE_MEM_FLAGS_UNUSED                                                    \
+	(BASE_MEM_UNUSED_BIT_5 | BASE_MEM_UNUSED_BIT_7 | BASE_MEM_UNUSED_BIT_8 | \
+	 BASE_MEM_UNUSED_BIT_19 | BASE_MEM_UNUSED_BIT_27 | BASE_MEM_UNUSED_BIT_29)
+
+#endif /* _UAPI_KBASE_JM_MEM_FLAGS_H_ */
--- a/include/uapi/gpu/arm/valhall/mali_base_common_kernel.h
+++ b/include/uapi/gpu/arm/valhall/mali_base_common_kernel.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_BASE_COMMON_KERNEL_H_
+#define _UAPI_BASE_COMMON_KERNEL_H_
+
+#include <linux/types.h>
+#include "mali_kbase_mem_flags.h"
+
+struct base_mem_handle {
+	struct {
+		__u64 handle;
+	} basep;
+};
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+/* Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+typedef __u32 base_context_create_flags;
+
+/* Flags for base context */
+
+/* No flags set */
+#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
+
+/* Base context is embedded in a cctx object (flag used for CINSTR
+ * software counter macros)
+ */
+#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
+
+/* Base context is a 'System Monitor' context for Hardware counters.
+ *
+ * One important side effect of this is that job submission is disabled.
+ */
+#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED ((base_context_create_flags)1 << 1)
+
+/* Bit-shift used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
+
+/* Bitmask used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
+	((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
+
+/* Bitpattern describing the base_context_create_flags that can be
+ * passed to the kernel
+ */
+#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
+	(BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | BASEP_CONTEXT_MMU_GROUP_ID_MASK)
+
+/* Flags for base tracepoint
+ */
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST)
+ */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1U << 0)
+
+/* Indicate that job dumping is enabled. This could affect certain timers
+ * to account for the performance impact.
+ */
+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1U << 1)
+
+#endif /* _UAPI_BASE_COMMON_KERNEL_H_ */
--- a/include/uapi/gpu/arm/valhall/mali_base_kernel.h
+++ b/include/uapi/gpu/arm/valhall/mali_base_kernel.h
@@ -0,0 +1,636 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _UAPI_BASE_KERNEL_H_
+#define _UAPI_BASE_KERNEL_H_
+
+#include <linux/types.h>
+#include "mali_gpu_props.h"
+#include "mali_base_common_kernel.h"
+#include "mali_base_mem_priv.h"
+#include "gpu/mali_kbase_gpu_id.h"
+#include "gpu/mali_kbase_gpu_coherency.h"
+
+#ifdef __KERNEL__
+#include <linux/mm.h>
+
+#if defined(PAGE_MASK) && defined(PAGE_SHIFT)
+#define LOCAL_PAGE_SHIFT PAGE_SHIFT
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#error "Missing kernel definitions: PAGE_MASK, PAGE_SHIFT"
+#endif
+
+#else
+
+#define LOCAL_PAGE_SHIFT (__builtin_ctz((unsigned int)sysconf(_SC_PAGESIZE)))
+
+#define LOCAL_PAGE_LSB ((1ul << LOCAL_PAGE_SHIFT) - 1)
+
+#endif
+
+/* Physical memory group ID for normal usage.
+ */
+#define BASE_MEM_GROUP_DEFAULT (0)
+
+/* Number of physical memory groups.
+ */
+#define BASE_MEM_GROUP_COUNT (16)
+
+/**
+ * typedef base_mem_alloc_flags - Memory allocation, access/hint flags.
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD),
+ * which defines a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef __u64 base_mem_alloc_flags;
+
+#define BASE_MEM_FLAGS_MODIFIABLE_NATIVE (BASE_MEM_DONT_NEED)
+
+#define BASE_MEM_FLAGS_MODIFIABLE_IMPORTED_UMM (BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)
+
+/* A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_FLAGS_MODIFIABLE_NATIVE | BASE_MEM_FLAGS_MODIFIABLE_IMPORTED_UMM)
+
+/* A mask of all the flags that can be returned via the base_mem_get_flags()
+ * interface.
+ */
+#define BASE_MEM_FLAGS_QUERYABLE                                                               \
+	(BASE_MEM_FLAGS_INPUT_MASK &                                                           \
+	 ~(BASE_MEM_FLAGS_RESERVED | BASE_MEM_FLAGS_UNUSED | BASE_MEM_FLAGS_ACTION_MODIFIERS | \
+	   BASE_MEM_FLAGS_KERNEL_ONLY))
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	/*
+	 * Import type with value 1 is deprecated.
+	 */
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+};
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	address of imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	__u64 ptr;
+	__u64 length;
+};
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB 0xfffff000UL
+/* Mask to detect 4GB boundary (in page units) alignment */
+#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
+
+/* Limit on the 'extension' parameter for an allocation with the
+ * BASE_MEM_TILER_ALIGN_TOP flag set
+ *
+ * This is the same as the maximum limit for a Buffer Descriptor's chunk size
+ */
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2 (21u - (LOCAL_PAGE_SHIFT))
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES \
+	(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2))
+
+/* Bit mask of cookies used for memory allocation setup */
+#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */
+
+/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */
+#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
+
+/*
+ * struct base_fence - Cross-device synchronisation fence.
+ *
+ * A fence is used to signal when the GPU has finished accessing a resource that
+ * may be shared with other devices, and also to delay work done asynchronously
+ * by the GPU until other devices have finished accessing a shared resource.
+ */
+struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+};
+
+/**
+ * struct base_mem_aliasing_info - Memory aliasing info
+ *
+ * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset: Offset within the handle to start aliasing from, in pages.
+ *          Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *          specifies the number of times the special page is needed.
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ */
+struct base_mem_aliasing_info {
+	struct base_mem_handle handle;
+	__u64 offset;
+	__u64 length;
+};
+
+/* Maximum percentage of just-in-time memory allocation trimming to perform
+ * on free.
+ */
+#define BASE_JIT_MAX_TRIM_LEVEL (100)
+
+/* Maximum number of concurrent just-in-time memory allocations.
+ */
+#define BASE_JIT_ALLOC_COUNT (255)
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extension:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ * @bin_id:                     The JIT allocation bin, used in conjunction with
+ *                              @max_allocations to limit the number of each
+ *                              type of JIT allocation.
+ * @max_allocations:            The maximum number of allocations allowed within
+ *                              the bin specified by @bin_id. Should be the same
+ *                              for all allocations within the same bin.
+ * @flags:                      flags specifying the special requirements for
+ *                              the JIT allocation, see
+ *                              %BASE_JIT_ALLOC_VALID_FLAGS
+ * @padding:                    Expansion space - should be initialised to zero
+ * @usage_id:                   A hint about which allocation should be reused.
+ *                              The kernel should attempt to use a previous
+ *                              allocation with the same usage_id
+ * @heap_info_gpu_addr:         Pointer to an object in GPU memory describing
+ *                              the actual usage of the region.
+ *
+ * Kbase version history:
+ * 11.20: added @heap_info_gpu_addr
+ */
+struct base_jit_alloc_info {
+	__u64 gpu_alloc_addr;
+	__u64 va_pages;
+	__u64 commit_pages;
+	__u64 extension;
+	__u8 id;
+	__u8 bin_id;
+	__u8 max_allocations;
+	__u8 flags;
+	__u8 padding[2];
+	__u16 usage_id;
+	__u64 heap_info_gpu_addr;
+};
+
+enum base_external_resource_access { BASE_EXT_RES_ACCESS_SHARED, BASE_EXT_RES_ACCESS_EXCLUSIVE };
+
+struct base_external_resource {
+	__u64 ext_resource;
+};
+
+/**
+ * BASE_EXT_RES_COUNT_MAX - The maximum number of external resources
+ * which can be mapped/unmapped in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	__u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	__u64 address;
+	__u64 size;
+	struct base_external_resource extres;
+};
+
+/**
+ * DOC: User-side Base GPU Property Queries
+ *
+ * The User-side Base GPU Property Query interface encapsulates two
+ * sub-modules:
+ *
+ * - "Dynamic GPU Properties"
+ * - "Base Platform Config GPU Properties"
+ *
+ * Base only deals with properties that vary between different GPU
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the GPU Architecture, refer to the
+ * GPU module. However, we will discuss their relevance here just to
+ * provide background information.
+ *
+ * About the GPU Properties in Base and GPU modules
+ *
+ * The compile-time properties (Platform Config, GPU Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the GPU Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistent guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * 1. Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * 2. If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * 3. If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or GPU Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * 1. the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props
+ * (also a member of base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * The raw properties in gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it does not need to be processed
+ * by the driver. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the GPU Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the GPU
+ * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function.
+ *
+ * The GPU properties are obtained by a call to
+ * base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * Coherency Group calculation
+ *
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algorithm can be determined either by a __u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * required for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/*
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	__u8 log2_line_size;
+	__u8 log2_cache_size;
+	__u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	__u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	__u32 bin_size_bytes; /* Max is 4*2^15 */
+	__u32 max_active_levels; /* Max is 2^15 */
+};
+
+/**
+ * struct mali_base_gpu_thread_props - GPU threading system details.
+ * @max_threads: Max. number of threads per core
+ * @max_workgroup_size:     Max. number of threads per workgroup
+ * @max_barrier_size:       Max. number of threads that can synchronize on a
+ *                          simple barrier
+ * @max_registers:          Total size [1..65535] of the register file available
+ *                          per core.
+ * @max_task_queue:         Max. tasks [1..255] which may be sent to a core
+ *                          before it becomes blocked.
+ * @max_thread_group_split: Max. allowed value [1..15] of the Thread Group Split
+ *                          field.
+ * @impl_tech:              0 = Not specified, 1 = Silicon, 2 = FPGA,
+ *                          3 = SW Model/Emulation,
+ *                          U8_MAX (255) = NO_MALI, not in spec
+ * @padding:                padding to align to 8-byte
+ * @tls_alloc:              Number of threads per core that TLS must be
+ *                          allocated for
+ */
+struct mali_base_gpu_thread_props {
+	__u32 max_threads;
+	__u32 max_workgroup_size;
+	__u32 max_barrier_size;
+	__u32 max_registers;
+	__u8 max_task_queue;
+	__u8 max_thread_group_split;
+	__u8 impl_tech;
+	__u8 padding;
+	__u32 tls_alloc;
+};
+
+/**
+ * struct mali_base_gpu_coherent_group - descriptor for a coherent group
+ * @core_mask: Core restriction mask required for the group
+ * @num_cores: Number of cores in the group
+ * @padding:   padding to align to 8-byte
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ *       the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of
+ *       wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	__u64 core_mask;
+	__u16 num_cores;
+	__u16 padding[3];
+};
+
+/**
+ * struct mali_base_gpu_coherent_group_info - Coherency group information
+ * @num_groups: Number of coherent groups in the GPU.
+ * @num_core_groups: Number of core groups (coherent or not) in the GPU.
+ *                   Equivalent to the number of L2 Caches.
+ *                   The GPU Counter dumping writes 2048 bytes per core group,
+ *                   regardless of whether the core groups are coherent or not.
+ *                   Hence this member is needed to calculate how much memory
+ *                   is required for dumping.
+ *                   @note Do not use it to work out how many valid elements
+ *                         are in the group[] member. Use num_groups instead.
+ * @coherency: Coherency features of the memory, accessed by gpu_mem_features
+ *             methods
+ * @padding: padding to align to 8-byte
+ * @group: Descriptors of coherent groups
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the __u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	__u32 num_groups;
+	__u32 num_core_groups;
+	__u32 coherency;
+	__u32 padding;
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+#if MALI_USE_CSF
+#include "csf/mali_base_csf_kernel.h"
+#else
+#include "jm/mali_base_jm_kernel.h"
+#endif
+
+/**
+ * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware
+ *                            Configuration Discovery registers.
+ * @shader_present: Shader core present bitmap
+ * @tiler_present: Tiler core present bitmap
+ * @l2_present: Level 2 cache present bitmap
+ * @stack_present: Core stack present bitmap
+ * @l2_features: L2 features
+ * @core_features: Core features
+ * @mem_features: Mem features
+ * @mmu_features: Mmu features
+ * @as_present: Bitmap of address spaces present
+ * @js_present: Job slots present
+ * @js_features: Array of job slot features.
+ * @tiler_features: Tiler features
+ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU
+ * @gpu_id: GPU and revision identifier
+ * @thread_max_threads: Maximum number of threads per core
+ * @thread_max_workgroup_size: Maximum number of threads per workgroup
+ * @thread_max_barrier_size: Maximum number of threads per barrier
+ * @thread_features: Thread features
+ * @coherency_mode: Note: This is the _selected_ coherency mode rather than the
+ *                  available modes as exposed in the coherency_features register
+ * @thread_tls_alloc: Number of threads per core that TLS must be allocated for
+ * @gpu_features: GPU features
+ * @neural_present: Neural engine present bitmap
+ * @base_present: Shader core base present bitmap
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * The raw properties in gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it does not need to be processed
+ * by the driver. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	__u64 shader_present;
+	__u64 tiler_present;
+	__u64 l2_present;
+	__u64 stack_present;
+	__u32 l2_features;
+	__u32 core_features;
+	__u32 mem_features;
+	__u32 mmu_features;
+	__u32 as_present;
+	__u32 js_present;
+	__u32 js_features[GPU_MAX_JOB_SLOTS];
+	__u32 tiler_features;
+	__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	__u32 gpu_id;
+	__u32 thread_max_threads;
+	__u32 thread_max_workgroup_size;
+	__u32 thread_max_barrier_size;
+	__u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	__u32 coherency_mode;
+
+	__u32 thread_tls_alloc;
+	__u64 gpu_features;
+	__u64 neural_present;
+	__u64 base_present;
+};
+
+/**
+ * struct base_gpu_props - Return structure for base_get_gpu_props().
+ * @core_props:     Core props.
+ * @l2_props:       L2 props.
+ * @unused_1:       Keep for backwards compatibility.
+ * @tiler_props:    Tiler props.
+ * @thread_props:   Thread props.
+ * @raw_props:      This member is large, likely to be 128 bytes.
+ * @coherency_info: This must be last member of the structure.
+ *
+ * NOTE: the raw_props member in this data structure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ */
+struct base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	__u64 unused_1;
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+	struct gpu_raw_gpu_props raw_props;
+	struct mali_base_gpu_coherent_group_info coherency_info;
+};
+
+#define BASE_MEM_GROUP_ID_GET(flags) ((flags & BASE_MEM_GROUP_ID_MASK) >> BASEP_MEM_GROUP_ID_SHIFT)
+
+#define BASE_MEM_GROUP_ID_SET(id)                                                                  \
+	(((base_mem_alloc_flags)((id < 0 || id >= BASE_MEM_GROUP_COUNT) ? BASE_MEM_GROUP_DEFAULT : \
+										id)                      \
+	  << BASEP_MEM_GROUP_ID_SHIFT) &                                                           \
+	 BASE_MEM_GROUP_ID_MASK)
+
+#define BASE_CONTEXT_MMU_GROUP_ID_SET(group_id) \
+	(BASEP_CONTEXT_MMU_GROUP_ID_MASK &      \
+	 ((base_context_create_flags)(group_id) << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT))
+
+#define BASE_CONTEXT_MMU_GROUP_ID_GET(flags) \
+	((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
+
+/*
+ * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These
+ * flags are also used, where applicable, for specifying which fields
+ * are valid following the request operation.
+ */
+
+/* For monotonic (counter) timefield */
+#define BASE_TIMEINFO_MONOTONIC_FLAG (1U << 0)
+/* For system wide timestamp */
+#define BASE_TIMEINFO_TIMESTAMP_FLAG (1U << 1)
+/* For GPU cycle counter */
+#define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1U << 2)
+
+/* Specify TimeReques flags allowed if time source is cpu/gpu register */
+#define BASE_TIMEREQUEST_CPU_GPU_SRC_ALLOWED_FLAGS                     \
+	(BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG | \
+	 BASE_TIMEINFO_CYCLE_COUNTER_FLAG)
+
+/* Specify TimeReques flags allowed if time source is system(user) space */
+#define BASE_TIMEREQUEST_SYSTEM_SRC_ALLOWED_FLAGS \
+	(BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG)
+
+/* Maximum number of source allocations allowed to create an alias allocation.
+ * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array
+ * layers, since each cube map in the array will have 6 faces.
+ */
+#define BASE_MEM_ALIAS_MAX_ENTS ((size_t)24576)
+
+#endif /* _UAPI_BASE_KERNEL_H_ */
--- a/include/uapi/gpu/arm/valhall/mali_base_mem_priv.h
+++ b/include/uapi/gpu/arm/valhall/mali_base_mem_priv.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_BASE_MEM_PRIV_H_
+#define _UAPI_BASE_MEM_PRIV_H_
+
+#include <linux/types.h>
+#include "mali_base_common_kernel.h"
+
+#define BASE_SYNCSET_OP_MSYNC (1U << 0)
+#define BASE_SYNCSET_OP_CSYNC (1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	struct base_mem_handle mem_handle;
+	__u64 user_addr;
+	__u64 size;
+	__u8 type;
+	__u8 padding[7];
+};
+
+#endif /* _UAPI_BASE_MEM_PRIV_H_ */
--- a/include/uapi/gpu/arm/valhall/mali_gpu_props.h
+++ b/include/uapi/gpu/arm/valhall/mali_gpu_props.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_MALI_GPUPROPS_H_
+#define _UAPI_MALI_GPUPROPS_H_
+
+#include <linux/types.h>
+#include "mali_base_common_kernel.h"
+
+#define BASE_MAX_COHERENT_GROUPS 16
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * struct gpu_props_user_data - structure for gpu props user buffer.
+ * @core_props:     Core props.
+ * @l2_props:       L2 props.
+ * @tiler_props:    Tiler props.
+ * @thread_props:   Thread props.
+ * @raw_props:      Raw register values kept for backwards compatibility. Kbase
+ *                  and base should never reference values within this struct.
+ * @coherency_info: Coherency information.
+ *
+ * This structure is used solely for the encoding and decoding of the prop_buffer
+ * returned by kbase.
+ */
+struct gpu_props_user_data {
+	struct {
+		__u32 product_id;
+		__u16 version_status;
+		__u16 minor_revision;
+		__u16 major_revision;
+		__u32 gpu_freq_khz_max;
+		__u32 log2_program_counter_size;
+		__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+		__u64 gpu_available_memory_size;
+		__u8 num_exec_engines;
+	} core_props;
+	struct {
+		__u8 log2_line_size;
+		__u8 log2_cache_size;
+		__u8 num_l2_slices;
+	} l2_props;
+	struct {
+		__u32 bin_size_bytes;
+		__u32 max_active_levels;
+	} tiler_props;
+	struct {
+		__u32 max_threads;
+		__u32 max_workgroup_size;
+		__u32 max_barrier_size;
+		__u32 max_registers;
+		__u8 max_task_queue;
+		__u8 max_thread_group_split;
+		__u8 impl_tech;
+		__u32 tls_alloc;
+	} thread_props;
+
+	/* kept for backward compatibility, should not be used in the future. */
+	struct {
+		__u64 shader_present;
+		__u64 tiler_present;
+		__u64 l2_present;
+		__u64 stack_present;
+		__u64 l2_features;
+		__u64 core_features;
+		__u64 mem_features;
+		__u64 mmu_features;
+		__u32 as_present;
+		__u32 js_present;
+		__u32 js_features[GPU_MAX_JOB_SLOTS];
+		__u64 tiler_features;
+		__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+		__u64 gpu_id;
+		__u32 thread_max_threads;
+		__u32 thread_max_workgroup_size;
+		__u32 thread_max_barrier_size;
+		__u32 thread_features;
+		__u32 coherency_mode;
+		__u32 thread_tls_alloc;
+		__u64 gpu_features;
+		__u64 base_present;
+		__u64 neural_present;
+	} raw_props;
+	struct {
+		__u32 num_groups;
+		__u32 num_core_groups;
+		__u32 coherency;
+		struct {
+			__u64 core_mask;
+			__u32 num_cores;
+		} group[BASE_MAX_COHERENT_GROUPS];
+	} coherency_info;
+};
+
+#endif /* _UAPI_MALI_GPUPROPS_H_ */
--- a/include/uapi/gpu/arm/valhall/mali_kbase_hwcnt_reader.h
+++ b/include/uapi/gpu/arm/valhall/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,506 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2015-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_HWCNT_READER_H_
+#define _UAPI_KBASE_HWCNT_READER_H_
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, __u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, __u32)
+#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, __u32)
+#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, __u32)
+#define KBASE_HWCNT_READER_GET_BUFFER             \
+	_IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20, \
+	     offsetof(struct kbase_hwcnt_reader_metadata, cycles))
+#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES \
+	_IOR(KBASE_HWCNT_READER, 0x20, struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER              \
+	_IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21, \
+	     offsetof(struct kbase_hwcnt_reader_metadata, cycles))
+#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES \
+	_IOW(KBASE_HWCNT_READER, 0x21, struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, __u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, __u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, __u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, __u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \
+	_IOW(KBASE_HWCNT_READER, 0xFF, struct kbase_hwcnt_reader_api_version)
+
+/**
+ * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles
+ * @top:           the number of cycles associated with the main clock for the
+ *                 GPU
+ * @shader_cores:  the cycles that have elapsed on the GPU shader cores
+ */
+struct kbase_hwcnt_reader_metadata_cycles {
+	__u64 top;
+	__u64 shader_cores;
+};
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ * @cycles:     the GPU cycles that occurred since the last sample
+ */
+struct kbase_hwcnt_reader_metadata {
+	__u64 timestamp;
+	__u32 event_id;
+	__u32 buffer_idx;
+	struct kbase_hwcnt_reader_metadata_cycles cycles;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0)
+#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0)
+#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1)
+
+/**
+ * struct kbase_hwcnt_reader_api_version - hwcnt reader API version
+ * @version:  API version
+ * @features: available features in this API version
+ */
+struct kbase_hwcnt_reader_api_version {
+	__u32 version;
+	__u32 features;
+};
+
+/** Hardware counters reader API version */
+#define PRFCNT_READER_API_VERSION (0)
+
+/**
+ * enum prfcnt_list_type - Type of list item
+ * @PRFCNT_LIST_TYPE_ENUM:        Enumeration of performance counters.
+ * @PRFCNT_LIST_TYPE_REQUEST:     Request for configuration setup.
+ * @PRFCNT_LIST_TYPE_SAMPLE_META: Sample metadata.
+ */
+enum prfcnt_list_type {
+	PRFCNT_LIST_TYPE_ENUM,
+	PRFCNT_LIST_TYPE_REQUEST,
+	PRFCNT_LIST_TYPE_SAMPLE_META,
+};
+
+#define FLEX_LIST_TYPE(type, subtype) ((__u16)(((type & 0xf) << 12) | (subtype & 0xfff)))
+#define FLEX_LIST_TYPE_NONE FLEX_LIST_TYPE(0, 0)
+
+#define PRFCNT_ENUM_TYPE_BLOCK FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_ENUM, 0)
+#define PRFCNT_ENUM_TYPE_REQUEST FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_ENUM, 1)
+#define PRFCNT_ENUM_TYPE_SAMPLE_INFO FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_ENUM, 2)
+
+#define PRFCNT_REQUEST_TYPE_MODE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 0)
+#define PRFCNT_REQUEST_TYPE_ENABLE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 1)
+#define PRFCNT_REQUEST_TYPE_SCOPE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 2)
+
+#define PRFCNT_SAMPLE_META_TYPE_SAMPLE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 0)
+#define PRFCNT_SAMPLE_META_TYPE_CLOCK FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 1)
+#define PRFCNT_SAMPLE_META_TYPE_BLOCK FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 2)
+
+/**
+ * struct prfcnt_item_header - Header for an item of the list.
+ * @item_type:    Type of item.
+ * @item_version: Protocol version.
+ */
+struct prfcnt_item_header {
+	__u16 item_type;
+	__u16 item_version;
+};
+
+/**
+ * enum prfcnt_block_type - Type of performance counter block.
+ * @PRFCNT_BLOCK_TYPE_FE:          Front End.
+ * @PRFCNT_BLOCK_TYPE_TILER:       Tiler.
+ * @PRFCNT_BLOCK_TYPE_MEMORY:      Memory System.
+ * @PRFCNT_BLOCK_TYPE_SHADER_CORE: Shader Core.
+ * @PRFCNT_BLOCK_TYPE_FW:          Firmware.
+ * @PRFCNT_BLOCK_TYPE_CSG:         CSG.
+ * @PRFCNT_BLOCK_TYPE_RESERVED:    Reserved.
+ * @PRFCNT_BLOCK_TYPE_NE:          Neural Engine.
+ */
+enum prfcnt_block_type {
+	PRFCNT_BLOCK_TYPE_FE,
+	PRFCNT_BLOCK_TYPE_TILER,
+	PRFCNT_BLOCK_TYPE_MEMORY,
+	PRFCNT_BLOCK_TYPE_SHADER_CORE,
+	PRFCNT_BLOCK_TYPE_FW,
+	PRFCNT_BLOCK_TYPE_CSG,
+	PRFCNT_BLOCK_TYPE_NE,
+	PRFCNT_BLOCK_TYPE_RESERVED = 255,
+};
+
+/**
+ * enum prfcnt_set - Type of performance counter block set.
+ * @PRFCNT_SET_PRIMARY:   Primary.
+ * @PRFCNT_SET_SECONDARY: Secondary.
+ * @PRFCNT_SET_TERTIARY:  Tertiary.
+ * @PRFCNT_SET_RESERVED:  Reserved.
+ */
+enum prfcnt_set {
+	PRFCNT_SET_PRIMARY,
+	PRFCNT_SET_SECONDARY,
+	PRFCNT_SET_TERTIARY,
+	PRFCNT_SET_RESERVED = 255,
+};
+
+/**
+ * struct prfcnt_enum_block_counter - Performance counter block descriptor.
+ * @block_type:    Type of performance counter block.
+ * @set:           Which SET this represents: primary, secondary or tertiary.
+ * @pad:           Padding bytes.
+ * @num_instances: How many instances of this block type exist in the hardware.
+ * @num_values:    How many entries in the values array there are for samples
+ *                 from this block.
+ * @counter_mask:  Bitmask that indicates counter availability in this block.
+ *                 A '0' indicates that a counter is not available at that
+ *                 index and will always return zeroes if requested.
+ */
+struct prfcnt_enum_block_counter {
+	__u8 block_type;
+	__u8 set;
+	__u8 pad[2];
+	__u16 num_instances;
+	__u16 num_values;
+	__u64 counter_mask[2];
+};
+
+/**
+ * struct prfcnt_enum_request - Request descriptor.
+ * @request_item_type:       Type of request.
+ * @pad:                     Padding bytes.
+ * @versions_mask: Bitmask of versions that support this request.
+ */
+struct prfcnt_enum_request {
+	__u16 request_item_type;
+	__u16 pad;
+	__u32 versions_mask;
+};
+
+/**
+ * struct prfcnt_enum_sample_info - Sample information descriptor.
+ * @num_clock_domains:       Number of clock domains of the GPU.
+ * @pad:                     Padding bytes.
+ */
+struct prfcnt_enum_sample_info {
+	__u32 num_clock_domains;
+	__u32 pad;
+};
+
+/**
+ * struct prfcnt_enum_item - Performance counter enumeration item.
+ * @padding:         Padding bytes.
+ * @hdr:             Header describing the type of item in the list.
+ * @u:               Structure containing discriptor for enumeration item type.
+ * @u.block_counter: Performance counter block descriptor.
+ * @u.request:       Request descriptor.
+ * @u.sample_info:   Performance counter sample information descriptor.
+ */
+struct prfcnt_enum_item {
+	struct prfcnt_item_header hdr;
+	__u8 padding[4];
+	/** union u - union of block_counter and request */
+	union {
+		struct prfcnt_enum_block_counter block_counter;
+		struct prfcnt_enum_request request;
+		struct prfcnt_enum_sample_info sample_info;
+	} u;
+};
+
+/**
+ * enum prfcnt_mode - Capture mode for counter sampling.
+ * @PRFCNT_MODE_MANUAL:   Manual sampling mode.
+ * @PRFCNT_MODE_PERIODIC: Periodic sampling mode.
+ * @PRFCNT_MODE_RESERVED: Reserved.
+ */
+enum prfcnt_mode {
+	PRFCNT_MODE_MANUAL,
+	PRFCNT_MODE_PERIODIC,
+	PRFCNT_MODE_RESERVED = 255,
+};
+
+/**
+ * struct prfcnt_request_mode - Mode request descriptor.
+ * @mode:                           Capture mode for the session, either manual or periodic.
+ * @pad:                            Padding bytes.
+ * @mode_config:                    Structure containing configuration for periodic mode.
+ * @mode_config.periodic:           Periodic config.
+ * @mode_config.periodic.period_ns: Period in nanoseconds, for periodic mode.
+ */
+struct prfcnt_request_mode {
+	__u8 mode;
+	__u8 pad[7];
+	/** union mode_config - request mode configuration*/
+	union {
+		struct {
+			__u64 period_ns;
+		} periodic;
+	} mode_config;
+};
+
+/**
+ * struct prfcnt_request_enable - Enable request descriptor.
+ * @block_type:  Type of performance counter block.
+ * @set:         Which SET to use: primary, secondary or tertiary.
+ * @pad:         Padding bytes.
+ * @enable_mask: Bitmask that indicates which performance counters to enable.
+ *               Unavailable counters will be ignored.
+ */
+struct prfcnt_request_enable {
+	__u8 block_type;
+	__u8 set;
+	__u8 pad[6];
+	__u64 enable_mask[2];
+};
+
+/**
+ * enum prfcnt_scope - Scope of performance counters.
+ * @PRFCNT_SCOPE_GLOBAL:   Global scope.
+ * @PRFCNT_SCOPE_RESERVED: Reserved.
+ */
+enum prfcnt_scope {
+	PRFCNT_SCOPE_GLOBAL,
+	PRFCNT_SCOPE_RESERVED = 255,
+};
+
+/**
+ * struct prfcnt_request_scope - Scope request descriptor.
+ * @scope: Scope of the performance counters to capture.
+ * @pad:   Padding bytes.
+ */
+struct prfcnt_request_scope {
+	__u8 scope;
+	__u8 pad[7];
+};
+
+/**
+ * struct prfcnt_request_item - Performance counter request item.
+ * @padding:      Padding bytes.
+ * @hdr:          Header describing the type of item in the list.
+ * @u:            Structure containing descriptor for request type.
+ * @u.req_mode:   Mode request descriptor.
+ * @u.req_enable: Enable request descriptor.
+ * @u.req_scope:  Scope request descriptor.
+ */
+struct prfcnt_request_item {
+	struct prfcnt_item_header hdr;
+	__u8 padding[4];
+	/** union u - union on req_mode and req_enable */
+	union {
+		struct prfcnt_request_mode req_mode;
+		struct prfcnt_request_enable req_enable;
+		struct prfcnt_request_scope req_scope;
+	} u;
+};
+
+/**
+ * enum prfcnt_request_type - Type of request descriptor.
+ * @PRFCNT_REQUEST_MODE:   Specify the capture mode to be used for the session.
+ * @PRFCNT_REQUEST_ENABLE: Specify which performance counters to capture.
+ * @PRFCNT_REQUEST_SCOPE:  Specify the scope of the performance counters.
+ */
+enum prfcnt_request_type {
+	PRFCNT_REQUEST_MODE,
+	PRFCNT_REQUEST_ENABLE,
+	PRFCNT_REQUEST_SCOPE,
+};
+
+/* This sample contains overflows from dump duration stretch because the sample buffer was full */
+#define SAMPLE_FLAG_OVERFLOW (1u << 0)
+/* This sample has had an error condition for sample duration */
+#define SAMPLE_FLAG_ERROR (1u << 30)
+
+/**
+ * struct prfcnt_sample_metadata - Metadata for counter sample data.
+ * @timestamp_start: Earliest timestamp that values in this sample represent.
+ * @timestamp_end:   Latest timestamp that values in this sample represent.
+ * @seq:             Sequence number of this sample. Must match the value from
+ *                   GET_SAMPLE.
+ * @user_data:       User data provided to HWC_CMD_START or HWC_CMD_SAMPLE_*
+ * @flags:           Property flags.
+ * @pad:             Padding bytes.
+ */
+struct prfcnt_sample_metadata {
+	__u64 timestamp_start;
+	__u64 timestamp_end;
+	__u64 seq;
+	__u64 user_data;
+	__u32 flags;
+	__u32 pad;
+};
+
+/* Maximum number of domains a metadata for clock cycles can refer to */
+#define MAX_REPORTED_DOMAINS (4)
+
+/**
+ * struct prfcnt_clock_metadata - Metadata for clock cycles.
+ * @num_domains: Number of domains this metadata refers to.
+ * @pad:         Padding bytes.
+ * @cycles:      Number of cycles elapsed in each counter domain between
+ *               timestamp_start and timestamp_end. Valid only for the
+ *               first @p num_domains.
+ */
+struct prfcnt_clock_metadata {
+	__u32 num_domains;
+	__u32 pad;
+	__u64 cycles[MAX_REPORTED_DOMAINS];
+};
+
+/* This block state is unknown */
+#define BLOCK_STATE_UNKNOWN (0)
+/* This block was powered on for at least some portion of the sample */
+#define BLOCK_STATE_ON (1 << 0)
+/* This block was powered off for at least some portion of the sample */
+#define BLOCK_STATE_OFF (1 << 1)
+/* This block was available to this VM for at least some portion of the sample */
+#define BLOCK_STATE_AVAILABLE (1 << 2)
+/* This block was not available to this VM for at least some portion of the sample
+ *  Note that no data is collected when the block is not available to the VM.
+ */
+#define BLOCK_STATE_UNAVAILABLE (1 << 3)
+/* This block was operating in "normal" (non-protected) mode for at least some portion of the sample */
+#define BLOCK_STATE_NORMAL (1 << 4)
+/* This block was operating in "protected" mode for at least some portion of the sample.
+ * Note that no data is collected when the block is in protected mode.
+ */
+#define BLOCK_STATE_PROTECTED (1 << 5)
+
+/**
+ * struct prfcnt_block_metadata - Metadata for counter block.
+ * @block_type:    Type of performance counter block.
+ * @block_idx:     Index of performance counter block.
+ * @set:           Set of performance counter block.
+ * @pad_u8:        Padding bytes.
+ * @block_state:   Bits set indicate the states which the block is known
+ *                 to have operated in during this sample.
+ * @values_offset: Offset from the start of the mmapped region, to the values
+ *                 for this block. The values themselves are an array of __u64.
+ * @pad_u32:       Padding bytes.
+ */
+struct prfcnt_block_metadata {
+	__u8 block_type;
+	__u8 block_idx;
+	__u8 set;
+	__u8 pad_u8;
+	__u32 block_state;
+	__u32 values_offset;
+	__u32 pad_u32;
+};
+
+/**
+ * struct prfcnt_metadata - Performance counter metadata item.
+ * @padding:     Padding bytes.
+ * @hdr:         Header describing the type of item in the list.
+ * @u:           Structure containing descriptor for metadata type.
+ * @u.sample_md: Counter sample data metadata descriptor.
+ * @u.clock_md:  Clock cycles metadata descriptor.
+ * @u.block_md:  Counter block metadata descriptor.
+ */
+struct prfcnt_metadata {
+	struct prfcnt_item_header hdr;
+	__u8 padding[4];
+	union {
+		struct prfcnt_sample_metadata sample_md;
+		struct prfcnt_clock_metadata clock_md;
+		struct prfcnt_block_metadata block_md;
+	} u;
+};
+
+/**
+ * enum prfcnt_control_cmd_code - Control command code for client session.
+ * @PRFCNT_CONTROL_CMD_START:        Start the counter data dump run for
+ *                                   the calling client session.
+ * @PRFCNT_CONTROL_CMD_STOP:         Stop the counter data dump run for the
+ *                                   calling client session.
+ * @PRFCNT_CONTROL_CMD_SAMPLE_SYNC:  Trigger a synchronous manual sample.
+ * @PRFCNT_CONTROL_CMD_RESERVED:     Previously SAMPLE_ASYNC not supported any more.
+ * @PRFCNT_CONTROL_CMD_DISCARD:      Discard all samples which have not yet
+ *                                   been consumed by userspace. Note that
+ *                                   this can race with new samples if
+ *                                   HWC_CMD_STOP is not called first.
+ */
+enum prfcnt_control_cmd_code {
+	PRFCNT_CONTROL_CMD_START = 1,
+	PRFCNT_CONTROL_CMD_STOP,
+	PRFCNT_CONTROL_CMD_SAMPLE_SYNC,
+	PRFCNT_CONTROL_CMD_RESERVED,
+	PRFCNT_CONTROL_CMD_DISCARD,
+};
+
+/** struct prfcnt_control_cmd - Control command
+ * @cmd:       Control command for the session.
+ * @pad:       Padding bytes.
+ * @user_data: Pointer to user data, which will be returned as part of
+ *             sample metadata. It only affects a single sample if used
+ *             with CMD_SAMPLE_SYNC or CMD_SAMPLE_ASYNC. It affects all
+ *             samples between CMD_START and CMD_STOP if used with the
+ *             periodic sampling.
+ */
+struct prfcnt_control_cmd {
+	__u16 cmd;
+	__u16 pad[3];
+	__u64 user_data;
+};
+
+/** struct prfcnt_sample_access - Metadata to access a sample.
+ * @sequence:            Sequence number for the sample.
+ *                       For GET_SAMPLE, it will be set by the kernel.
+ *                       For PUT_SAMPLE, it shall be equal to the same value
+ *                       provided by the kernel for GET_SAMPLE.
+ * @sample_offset_bytes: Offset from the start of the mapped area to the first
+ *                       entry in the metadata list (sample_metadata) for this
+ *                       sample.
+ */
+struct prfcnt_sample_access {
+	__u64 sequence;
+	__u64 sample_offset_bytes;
+};
+
+/* The ids of ioctl commands, on a reader file descriptor, magic number */
+#define KBASE_KINSTR_PRFCNT_READER 0xBF
+/* Ioctl ID for issuing a session operational command */
+#define KBASE_IOCTL_KINSTR_PRFCNT_CMD \
+	_IOW(KBASE_KINSTR_PRFCNT_READER, 0x00, struct prfcnt_control_cmd)
+/* Ioctl ID for fetching a dumpped sample */
+#define KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE \
+	_IOR(KBASE_KINSTR_PRFCNT_READER, 0x01, struct prfcnt_sample_access)
+/* Ioctl ID for release internal buffer of the previously fetched sample */
+#define KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE \
+	_IOW(KBASE_KINSTR_PRFCNT_READER, 0x10, struct prfcnt_sample_access)
+
+#endif /* _UAPI_KBASE_HWCNT_READER_H_ */
--- a/include/uapi/gpu/arm/valhall/mali_kbase_ioctl.h
+++ b/include/uapi/gpu/arm/valhall/mali_kbase_ioctl.h
@@ -0,0 +1,755 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2017-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_IOCTL_H_
+#define _UAPI_KBASE_IOCTL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+#if MALI_USE_CSF
+#include "csf/mali_kbase_csf_ioctl.h"
+#else
+#include "jm/mali_kbase_jm_ioctl.h"
+#endif /* MALI_USE_CSF */
+
+#define KBASE_IOCTL_TYPE 0x80
+
+/**
+ * struct kbase_ioctl_set_flags - Set kernel context creation flags
+ *
+ * @create_flags: Flags - see base_context_create_flags
+ */
+struct kbase_ioctl_set_flags {
+	__u32 create_flags;
+};
+
+#define KBASE_IOCTL_SET_FLAGS _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
+
+/**
+ * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel
+ *
+ * @buffer: Pointer to the buffer to store properties into
+ * @size: Size of the buffer
+ * @flags: Flags - must be zero for now
+ *
+ * The ioctl will return the number of bytes stored into @buffer or an error
+ * on failure (e.g. @size is too small). If @size is specified as 0 then no
+ * data will be written but the return value will be the number of bytes needed
+ * for all the properties.
+ *
+ * @flags may be used in the future to request a different format for the
+ * buffer. With @flags == 0 the following format is used.
+ *
+ * The buffer will be filled with pairs of values, a __u32 key identifying the
+ * property followed by the value. The size of the value is identified using
+ * the bottom bits of the key. The value then immediately followed the key and
+ * is tightly packed (there is no padding). All keys and values are
+ * little-endian.
+ *
+ * 00 = __u8
+ * 01 = __u16
+ * 10 = __u32
+ * 11 = __u64
+ */
+struct kbase_ioctl_get_gpuprops {
+	__u64 buffer;
+	__u32 size;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_GET_GPUPROPS _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
+
+/**
+ * union kbase_ioctl_mem_alloc - Allocate memory on the GPU
+ * @in: Input parameters
+ * @in.va_pages: The number of pages of virtual address space to reserve
+ * @in.commit_pages: The number of physical pages to allocate
+ * @in.extension: The number of extra pages to allocate on each GPU fault which grows the region
+ * @in.flags: Flags
+ * @out: Output parameters
+ * @out.flags: Flags
+ * @out.gpu_va: The GPU virtual address which is allocated
+ */
+union kbase_ioctl_mem_alloc {
+	struct {
+		__u64 va_pages;
+		__u64 commit_pages;
+		__u64 extension;
+		__u64 flags;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALLOC _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc)
+
+/**
+ * struct kbase_ioctl_mem_query - Query properties of a GPU memory region
+ * @in: Input parameters
+ * @in.gpu_addr: A GPU address contained within the region
+ * @in.query: The type of query
+ * @out: Output parameters
+ * @out.value: The result of the query
+ *
+ * Use a %KBASE_MEM_QUERY_xxx flag as input for @query.
+ */
+union kbase_ioctl_mem_query {
+	struct {
+		__u64 gpu_addr;
+		__u64 query;
+	} in;
+	struct {
+		__u64 value;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_QUERY _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
+
+#define KBASE_MEM_QUERY_COMMIT_SIZE ((__u64)1)
+#define KBASE_MEM_QUERY_VA_SIZE ((__u64)2)
+#define KBASE_MEM_QUERY_FLAGS ((__u64)3)
+
+/**
+ * struct kbase_ioctl_mem_free - Free a memory region
+ * @gpu_addr: Handle to the region to free
+ */
+struct kbase_ioctl_mem_free {
+	__u64 gpu_addr;
+};
+
+#define KBASE_IOCTL_MEM_FREE _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free)
+
+/**
+ * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
+ * @buffer_count: requested number of dumping buffers
+ * @fe_bm:        counters selection bitmask (Front end)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ *
+ * A fd is returned from the ioctl if successful, or a negative value on error
+ */
+struct kbase_ioctl_hwcnt_reader_setup {
+	__u32 buffer_count;
+	__u32 fe_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_READER_SETUP \
+	_IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup)
+
+/**
+ * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to.
+ * @data:    Counter samples for the dummy model.
+ * @size:    Size of the counter sample data.
+ * @padding: Currently unused, must be zero
+ */
+struct kbase_ioctl_hwcnt_values {
+	__u64 data;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_HWCNT_SET _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values)
+
+/**
+ * struct kbase_ioctl_disjoint_query - Query the disjoint counter
+ * @counter:   A counter of disjoint events in the kernel
+ */
+struct kbase_ioctl_disjoint_query {
+	__u32 counter;
+};
+
+#define KBASE_IOCTL_DISJOINT_QUERY _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query)
+
+/**
+ * struct kbase_ioctl_get_ddk_version - Query the kernel version
+ * @version_buffer: Buffer to receive the kernel version string
+ * @size: Size of the buffer
+ * @padding: Currently unused, must be zero
+ *
+ * The ioctl will return the number of bytes written into version_buffer
+ * (which includes a NULL byte) or a negative error code
+ *
+ * The ioctl request code has to be _IOW because the data in ioctl struct is
+ * being copied to the kernel, even though the kernel then writes out the
+ * version info to the buffer specified in the ioctl.
+ */
+struct kbase_ioctl_get_ddk_version {
+	__u64 version_buffer;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_GET_DDK_VERSION _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
+
+/**
+ * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory
+ *                                   allocator
+ * @va_pages: Number of GPU virtual address pages to reserve for just-in-time
+ *            memory allocations
+ * @max_allocations: Maximum number of concurrent allocations
+ * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
+ * @group_id: Group ID to be used for physical allocations
+ * @padding: Currently unused, must be zero
+ * @phys_pages: Maximum number of physical pages to allocate just-in-time
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ */
+struct kbase_ioctl_mem_jit_init {
+	__u64 va_pages;
+	__u8 max_allocations;
+	__u8 trim_level;
+	__u8 group_id;
+	__u8 padding[5];
+	__u64 phys_pages;
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init)
+
+/**
+ * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory
+ *
+ * @handle: GPU memory handle (GPU VA)
+ * @user_addr: The address where it is mapped in user space
+ * @size: The number of bytes to synchronise
+ * @type: The direction to synchronise: 0 is sync to memory (clean),
+ * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants.
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_mem_sync {
+	__u64 handle;
+	__u64 user_addr;
+	__u64 size;
+	__u8 type;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_MEM_SYNC _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync)
+
+/**
+ * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer
+ *
+ * @in: Input parameters
+ * @in.gpu_addr: The GPU address of the memory region
+ * @in.cpu_addr: The CPU address to locate
+ * @in.size: A size in bytes to validate is contained within the region
+ * @out: Output parameters
+ * @out.offset: The offset from the start of the memory region to @cpu_addr
+ */
+union kbase_ioctl_mem_find_cpu_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 cpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset)
+
+/**
+ * struct kbase_ioctl_get_context_id - Get the kernel context ID
+ *
+ * @id: The kernel context ID
+ */
+struct kbase_ioctl_get_context_id {
+	__u32 id;
+};
+
+#define KBASE_IOCTL_GET_CONTEXT_ID _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id)
+
+/**
+ * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd
+ *
+ * @flags: Flags
+ *
+ * The ioctl returns a file descriptor when successful
+ */
+struct kbase_ioctl_tlstream_acquire {
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_TLSTREAM_ACQUIRE _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire)
+
+#define KBASE_IOCTL_TLSTREAM_FLUSH _IO(KBASE_IOCTL_TYPE, 19)
+
+/**
+ * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region
+ *
+ * @gpu_addr: The memory region to modify
+ * @pages:    The number of physical pages that should be present
+ *
+ * The ioctl may return on the following error codes or 0 for success:
+ *   -ENOMEM: Out of memory
+ *   -EINVAL: Invalid arguments
+ */
+struct kbase_ioctl_mem_commit {
+	__u64 gpu_addr;
+	__u64 pages;
+};
+
+#define KBASE_IOCTL_MEM_COMMIT _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit)
+
+/**
+ * union kbase_ioctl_mem_alias - Create an alias of memory regions
+ * @in: Input parameters
+ * @in.flags: Flags, see BASE_MEM_xxx
+ * @in.stride: Bytes between start of each memory region
+ * @in.nents: The number of regions to pack together into the alias
+ * @in.aliasing_info: Pointer to an array of struct base_mem_aliasing_info
+ * @out: Output parameters
+ * @out.flags: Flags, see BASE_MEM_xxx
+ * @out.gpu_va: Address of the new alias
+ * @out.va_pages: Size of the new alias
+ */
+union kbase_ioctl_mem_alias {
+	struct {
+		__u64 flags;
+		__u64 stride;
+		__u64 nents;
+		__u64 aliasing_info;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALIAS _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias)
+
+/**
+ * union kbase_ioctl_mem_import - Import memory for use by the GPU
+ * @in: Input parameters
+ * @in.flags: Flags, see BASE_MEM_xxx
+ * @in.phandle: Handle to the external memory
+ * @in.type: Type of external memory, see base_mem_import_type
+ * @in.padding: Amount of extra VA pages to append to the imported buffer
+ * @out: Output parameters
+ * @out.flags: Flags, see BASE_MEM_xxx
+ * @out.gpu_va: Address of the new alias
+ * @out.va_pages: Size of the new alias
+ */
+union kbase_ioctl_mem_import {
+	struct {
+		__u64 flags;
+		__u64 phandle;
+		__u32 type;
+		__u32 padding;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_IMPORT _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import)
+
+/**
+ * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region
+ * @gpu_va: The GPU region to modify
+ * @flags: The new flags to set
+ * @mask: Mask of the flags to modify
+ */
+struct kbase_ioctl_mem_flags_change {
+	__u64 gpu_va;
+	__u64 flags;
+	__u64 mask;
+};
+
+#define KBASE_IOCTL_MEM_FLAGS_CHANGE _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change)
+
+/**
+ * struct kbase_ioctl_stream_create - Create a synchronisation stream
+ * @name: A name to identify this stream. Must be NULL-terminated.
+ *
+ * Note that this is also called a "timeline", but is named stream to avoid
+ * confusion with other uses of the word.
+ *
+ * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes.
+ *
+ * The ioctl returns a file descriptor.
+ */
+struct kbase_ioctl_stream_create {
+	char name[32];
+};
+
+#define KBASE_IOCTL_STREAM_CREATE _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create)
+
+/**
+ * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence
+ * @fd: The file descriptor to validate
+ */
+struct kbase_ioctl_fence_validate {
+	int fd;
+};
+
+#define KBASE_IOCTL_FENCE_VALIDATE _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
+
+/**
+ * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel
+ * @buffer: Pointer to the information
+ * @len: Length
+ * @padding: Padding
+ *
+ * The data provided is accessible through a debugfs file
+ */
+struct kbase_ioctl_mem_profile_add {
+	__u64 buffer;
+	__u32 len;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_MEM_PROFILE_ADD _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
+
+/**
+ * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource
+ * @count: Number of resources
+ * @address: Array of __u64 GPU addresses of the external resources to map
+ */
+struct kbase_ioctl_sticky_resource_map {
+	__u64 count;
+	__u64 address;
+};
+
+#define KBASE_IOCTL_STICKY_RESOURCE_MAP \
+	_IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map)
+
+/**
+ * struct kbase_ioctl_sticky_resource_unmap - Unmap a resource mapped which was
+ *                                          previously permanently mapped
+ * @count: Number of resources
+ * @address: Array of __u64 GPU addresses of the external resources to unmap
+ */
+struct kbase_ioctl_sticky_resource_unmap {
+	__u64 count;
+	__u64 address;
+};
+
+#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \
+	_IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap)
+
+/**
+ * union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of
+ *                                                   the GPU memory region for
+ *                                                   the given gpu address and
+ *                                                   the offset of that address
+ *                                                   into the region
+ * @in: Input parameters
+ * @in.gpu_addr: GPU virtual address
+ * @in.size: Size in bytes within the region
+ * @out: Output parameters
+ * @out.start: Address of the beginning of the memory region enclosing @gpu_addr
+ *             for the length of @offset bytes
+ * @out.offset: The offset from the start of the memory region to @gpu_addr
+ */
+union kbase_ioctl_mem_find_gpu_start_and_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 start;
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset)
+
+/**
+ * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone
+ *
+ * @va_pages: Number of VA pages to reserve for EXEC_VA
+ */
+struct kbase_ioctl_mem_exec_init {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_EXEC_INIT _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init)
+
+/**
+ * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of
+ *                                          cpu/gpu time (counter values)
+ * @in: Input parameters
+ * @in.request_flags: Bit-flags indicating the requested types.
+ * @in.paddings:      Unused, size alignment matching the out.
+ * @out: Output parameters
+ * @out.sec:           Integer field of the monotonic time, unit in seconds.
+ * @out.nsec:          Fractional sec of the monotonic time, in nano-seconds.
+ * @out.padding:       Unused, for __u64 alignment
+ * @out.timestamp:     System wide timestamp (counter) value.
+ * @out.cycle_counter: GPU cycle counter value.
+ */
+union kbase_ioctl_get_cpu_gpu_timeinfo {
+	struct {
+		__u32 request_flags;
+		__u32 paddings[7];
+	} in;
+	struct {
+		__u64 sec;
+		__u32 nsec;
+		__u32 padding;
+		__u64 timestamp;
+		__u64 cycle_counter;
+	} out;
+};
+
+#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \
+	_IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo)
+
+/**
+ * struct kbase_ioctl_context_priority_check - Check the max possible priority
+ * @priority: Input priority & output priority
+ */
+
+struct kbase_ioctl_context_priority_check {
+	__u8 priority;
+};
+
+#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check)
+
+/**
+ * struct kbase_ioctl_set_limited_core_count - Set the limited core count.
+ *
+ * @max_core_count: Maximum core count
+ */
+struct kbase_ioctl_set_limited_core_count {
+	__u8 max_core_count;
+};
+
+#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \
+	_IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count)
+
+/**
+ * struct kbase_ioctl_kinstr_prfcnt_enum_info - Enum Performance counter
+ *                                              information
+ * @info_item_size:  Performance counter item size in bytes.
+ * @info_item_count: Performance counter item count in the info_list_ptr.
+ * @info_list_ptr:   Performance counter item list pointer which points to a
+ *                   list with info_item_count of items.
+ *
+ * On success: returns info_item_size and info_item_count if info_list_ptr is
+ * NULL, returns performance counter information if info_list_ptr is not NULL.
+ * On error: returns a negative error code.
+ */
+struct kbase_ioctl_kinstr_prfcnt_enum_info {
+	__u32 info_item_size;
+	__u32 info_item_count;
+	__u64 info_list_ptr;
+};
+
+#define KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO \
+	_IOWR(KBASE_IOCTL_TYPE, 56, struct kbase_ioctl_kinstr_prfcnt_enum_info)
+
+/**
+ * struct kbase_ioctl_kinstr_prfcnt_setup - Setup HWC dumper/reader
+ * @in: input parameters.
+ * @in.request_item_count: Number of requests in the requests array.
+ * @in.request_item_size:  Size in bytes of each request in the requests array.
+ * @in.requests_ptr:       Pointer to the requests array.
+ * @out: output parameters.
+ * @out.prfcnt_metadata_item_size: Size of each item in the metadata array for
+ *                                 each sample.
+ * @out.prfcnt_mmap_size_bytes:    Size in bytes that user-space should mmap
+ *                                 for reading performance counter samples.
+ *
+ * A fd is returned from the ioctl if successful, or a negative value on error.
+ */
+union kbase_ioctl_kinstr_prfcnt_setup {
+	struct {
+		__u32 request_item_count;
+		__u32 request_item_size;
+		__u64 requests_ptr;
+	} in;
+	struct {
+		__u32 prfcnt_metadata_item_size;
+		__u32 prfcnt_mmap_size_bytes;
+	} out;
+};
+
+#define KBASE_IOCTL_KINSTR_PRFCNT_SETUP \
+	_IOWR(KBASE_IOCTL_TYPE, 57, union kbase_ioctl_kinstr_prfcnt_setup)
+
+/***************
+ * test ioctls *
+ ***************/
+#if MALI_UNIT_TEST
+/* These ioctls are purely for test purposes and are not used in the production
+ * driver, they therefore may change without notice
+ */
+
+#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1)
+
+
+/**
+ * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ */
+struct kbase_ioctl_tlstream_stats {
+	__u32 bytes_collected;
+	__u32 bytes_generated;
+};
+
+#define KBASE_IOCTL_TLSTREAM_STATS _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
+
+#endif /* MALI_UNIT_TEST */
+
+/* Customer extension range */
+#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2)
+
+/* If the integration needs extra ioctl add them there
+ * like this:
+ *
+ * struct my_ioctl_args {
+ *  ....
+ * }
+ *
+ * #define KBASE_IOCTL_MY_IOCTL \
+ *         _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args)
+ */
+
+/**********************************
+ * Definitions for GPU properties *
+ **********************************/
+#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0)
+#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1)
+#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2)
+#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3)
+
+#define KBASE_GPUPROP_PRODUCT_ID 1
+#define KBASE_GPUPROP_VERSION_STATUS 2
+#define KBASE_GPUPROP_MINOR_REVISION 3
+#define KBASE_GPUPROP_MAJOR_REVISION 4
+/* 5 previously used for GPU speed */
+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6
+/* 7 previously used for minimum GPU speed */
+#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8
+#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9
+#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10
+#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11
+#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12
+
+#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13
+#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14
+#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15
+
+#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16
+#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17
+
+#define KBASE_GPUPROP_MAX_THREADS 18
+#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19
+#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20
+#define KBASE_GPUPROP_MAX_REGISTERS 21
+#define KBASE_GPUPROP_MAX_TASK_QUEUE 22
+#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23
+#define KBASE_GPUPROP_IMPL_TECH 24
+
+#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25
+#define KBASE_GPUPROP_RAW_TILER_PRESENT 26
+#define KBASE_GPUPROP_RAW_L2_PRESENT 27
+#define KBASE_GPUPROP_RAW_STACK_PRESENT 28
+#define KBASE_GPUPROP_RAW_L2_FEATURES 29
+#define KBASE_GPUPROP_RAW_CORE_FEATURES 30
+#define KBASE_GPUPROP_RAW_MEM_FEATURES 31
+#define KBASE_GPUPROP_RAW_MMU_FEATURES 32
+#define KBASE_GPUPROP_RAW_AS_PRESENT 33
+#define KBASE_GPUPROP_RAW_JS_PRESENT 34
+#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35
+#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36
+#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37
+#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38
+#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39
+#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40
+#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41
+#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42
+#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43
+#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44
+#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45
+#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46
+#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47
+#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48
+#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49
+#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50
+#define KBASE_GPUPROP_RAW_TILER_FEATURES 51
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54
+#define KBASE_GPUPROP_RAW_GPU_ID 55
+#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56
+#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57
+#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58
+#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59
+#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60
+
+#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61
+#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62
+#define KBASE_GPUPROP_COHERENCY_COHERENCY 63
+#define KBASE_GPUPROP_COHERENCY_GROUP_0 64
+#define KBASE_GPUPROP_COHERENCY_GROUP_1 65
+#define KBASE_GPUPROP_COHERENCY_GROUP_2 66
+#define KBASE_GPUPROP_COHERENCY_GROUP_3 67
+#define KBASE_GPUPROP_COHERENCY_GROUP_4 68
+#define KBASE_GPUPROP_COHERENCY_GROUP_5 69
+#define KBASE_GPUPROP_COHERENCY_GROUP_6 70
+#define KBASE_GPUPROP_COHERENCY_GROUP_7 71
+#define KBASE_GPUPROP_COHERENCY_GROUP_8 72
+#define KBASE_GPUPROP_COHERENCY_GROUP_9 73
+#define KBASE_GPUPROP_COHERENCY_GROUP_10 74
+#define KBASE_GPUPROP_COHERENCY_GROUP_11 75
+#define KBASE_GPUPROP_COHERENCY_GROUP_12 76
+#define KBASE_GPUPROP_COHERENCY_GROUP_13 77
+#define KBASE_GPUPROP_COHERENCY_GROUP_14 78
+#define KBASE_GPUPROP_COHERENCY_GROUP_15 79
+
+#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81
+
+#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82
+
+#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83
+#define KBASE_GPUPROP_TLS_ALLOC 84
+#define KBASE_GPUPROP_RAW_GPU_FEATURES 85
+#define KBASE_GPUPROP_RAW_BASE_PRESENT 86
+#define KBASE_GPUPROP_RAW_NEURAL_PRESENT 87
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UAPI_KBASE_IOCTL_H_ */
--- a/include/uapi/gpu/arm/valhall/mali_kbase_kinstr_jm_reader.h
+++ b/include/uapi/gpu/arm/valhall/mali_kbase_kinstr_jm_reader.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * mali_kbase_kinstr_jm_reader.h
+ * Provides an ioctl API to read kernel atom state changes. The flow of the
+ * API is:
+ *    1. Obtain the file descriptor with ``KBASE_IOCTL_KINSTR_JM_FD``
+ *    2. Determine the buffer structure layout via the above ioctl's returned
+ *       size and version fields in ``struct kbase_kinstr_jm_fd_out``
+ *    4. Poll the file descriptor for ``POLLIN``
+ *    5. Get data with read() on the fd
+ *    6. Use the structure version to understand how to read the data from the
+ *       buffer
+ *    7. Repeat 4-6
+ *    8. Close the file descriptor
+ */
+
+#ifndef _UAPI_KBASE_KINSTR_JM_READER_H_
+#define _UAPI_KBASE_KINSTR_JM_READER_H_
+
+/**
+ * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE:    Signifies that an atom has
+ *                                              entered a hardware queue
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_START:    Signifies that work has started
+ *                                              on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_STOP:     Signifies that work has stopped
+ *                                              on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE: Signifies that work has
+ *                                              completed on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT:    The number of state enumerations
+ *
+ * We can add new states to the end of this if they do not break the existing
+ * state machine. Old user mode code can gracefully ignore states they do not
+ * understand.
+ *
+ * If we need to make a breaking change to the state machine, we can do that by
+ * changing the version reported by KBASE_IOCTL_KINSTR_JM_FD. This will
+ * mean that old user mode code will fail to understand the new state field in
+ * the structure and gracefully not use the state change API.
+ */
+enum kbase_kinstr_jm_reader_atom_state {
+	KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_START,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_STOP,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT
+};
+
+#endif /* _UAPI_KBASE_KINSTR_JM_READER_H_ */
--- a/include/uapi/gpu/arm/valhall/mali_kbase_mem_flags.h
+++ b/include/uapi/gpu/arm/valhall/mali_kbase_mem_flags.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_MEM_FLAGS_H_
+#define _UAPI_KBASE_MEM_FLAGS_H_
+
+#if MALI_USE_CSF
+#include "csf/mali_kbase_csf_mem_flags.h"
+#else
+#include "jm/mali_kbase_jm_mem_flags.h"
+#endif
+
+/* Memory allocation, access/hint flags & mask.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* IN */
+/* Read access CPU side
+ */
+#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
+
+/* Write access CPU side
+ */
+#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
+
+/* Read access GPU side
+ */
+#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
+
+/* Write access GPU side
+ */
+#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
+
+/* Execute allowed on the GPU side
+ */
+#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
+
+/* Unused bit, previously for BASEP_MEM_PERMANENT_KERNEL_MAPPING
+ */
+#define BASE_MEM_UNUSED_BIT_5 ((base_mem_alloc_flags)1 << 5)
+
+/* The allocation will completely reside within the same 4GB chunk in the GPU
+ * virtual space.
+ * Since this flag is primarily required only for the TLS memory which will
+ * not be used to contain executable code and also not used for Tiler heap,
+ * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
+ */
+#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
+
+/* Unused bit, previously for BASEP_MEM_NO_USER_FREE
+ */
+#define BASE_MEM_UNUSED_BIT_7 ((base_mem_alloc_flags)1 << 7)
+
+/* Grow backing store on GPU Page Fault
+ */
+#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
+
+/* Page coherence Outer shareable, if available
+ */
+#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
+
+/* Page coherence Inner shareable
+ */
+#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
+
+/* IN/OUT */
+/* Should be cached on the CPU, returned if actually cached
+ */
+#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
+
+/* IN/OUT */
+/* Must have same VA on both the GPU and the CPU
+ */
+#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
+
+/* OUT */
+/* Must call mmap to acquire a GPU address for the allocation
+ */
+#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
+
+/* IN */
+/* Page coherence Outer shareable, required.
+ */
+#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
+
+/* Protected memory
+ */
+#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
+
+/* Not needed physical memory
+ */
+#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
+
+/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
+ * addresses to be the same
+ */
+#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
+
+/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu
+ * mode. Some components within the GPU might only be able to access memory
+ * that is GPU cacheable. Refer to the specific GPU implementation for more
+ * details. The 3 shareability flags will be ignored for GPU uncached memory.
+ * If used while importing USER_BUFFER type memory, then the import will fail
+ * if the memory is not aligned to GPU and CPU cache line width.
+ */
+#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
+
+/*
+ * Bits [22:25] for group_id (0~15).
+ *
+ * base_mem_group_id_set() should be used to pack a memory group ID into a
+ * base_mem_alloc_flags value instead of accessing the bits directly.
+ * base_mem_group_id_get() should be used to extract the memory group ID from
+ * a base_mem_alloc_flags value.
+ */
+#define BASEP_MEM_GROUP_ID_SHIFT 22
+#define BASE_MEM_GROUP_ID_MASK ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
+
+/* Must do CPU cache maintenance when imported memory is mapped/unmapped
+ * on GPU. Currently applicable to dma-buf type only.
+ */
+#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
+
+/* Unused bit, only previously used in JM for BASEP_MEM_FLAG_MAP_FIXED */
+#define BASE_MEM_UNUSED_BIT_27 ((base_mem_alloc_flags)1 << 27)
+
+/* Kernel side cache sync ops required */
+#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28)
+
+/* Note that the number of bits used for base_mem_alloc_flags
+ * must be less than BASE_MEM_FLAGS_NR_BITS (for both user
+ * and kernel-side usage) or allocated from bit 63 downwards
+ * (for kernel-only usage) which is controlled by BASEP_MEM_FLAGS_NR_BITS
+ */
+
+/* Number of bits used as flags for base memory management from user-side
+ * ie BASE_MEM_* flags.
+ *
+ * Must be kept in sync with the base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 30
+
+/* A mask of all the flags which are only valid within kbase,
+ * and may not be passed to/from user space.
+ */
+#define BASE_MEM_FLAGS_KERNEL_ONLY \
+	(~(((base_mem_alloc_flags)1 << (64 - BASEP_MEM_FLAGS_NR_BITS)) - 1))
+
+/* A mask for all bits that are output from kbase, but never input. */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/* A mask for all bits that can be input to kbase. */
+#define BASE_MEM_FLAGS_INPUT_MASK                                     \
+	(((((base_mem_alloc_flags)1 << BASE_MEM_FLAGS_NR_BITS) - 1) | \
+	  BASE_MEM_FLAGS_KERNEL_ONLY) &                               \
+	 ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/* A mask for all input and output bits */
+#define BASE_MEM_ALL_FLAGS_MASK (BASE_MEM_FLAGS_INPUT_MASK | BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/* Special base mem handles.
+ */
+#define BASEP_MEM_INVALID_HANDLE (0ul)
+#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT)
+/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE)
+
+#endif /* _UAPI_KBASE_MEM_FLAGS_H_ */
--- a/include/uapi/gpu/arm/valhall/mali_kbase_mem_profile_debugfs_buf_size.h
+++ b/include/uapi/gpu/arm/valhall/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * DOC: Header file for the size of the buffer to accumulate the memory report text in
+ */
+
+#ifndef _UAPI_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _UAPI_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * KBASE_MEM_PROFILE_MAX_BUF_SIZE - The size of the buffer to accumulate the memory report text
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t)(64 + ((80 + (56 * 64)) * 85) + 56))
+
+#endif /*_UAPI_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/