driver: gpu: switch to mali vendor driver

2026-06-07 19:30:30 +09:00 · 2024-01-25 13:22:45 -03:00
parent 56cb3cf9e6
commit db3f0eb142
480 changed files with 183004 additions and 17 deletions
--- a/include/uapi/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.h
+++ b/include/uapi/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_DMA_BUF_TEST_EXPORTER_H_
+#define _UAPI_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version {
+	/** Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int op;
+	/** Major version */
+	int major;
+	/** Minor version */
+	int minor;
+};
+
+struct dma_buf_te_ioctl_alloc {
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status {
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings; /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing {
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill {
+	int fd;
+	int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING \
+	_IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _UAPI_DMA_BUF_TEST_EXPORTER_H_ */
--- a/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
+++ b/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Dummy Model interface
+ */
+
+#ifndef _UAPI_KBASE_MODEL_DUMMY_H_
+#define _UAPI_KBASE_MODEL_DUMMY_H_
+
+#include <linux/types.h>
+
+#define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4)
+#if MALI_USE_CSF
+#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (65)
+#else /* MALI_USE_CSF */
+#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (60)
+#endif /* MALI_USE_CSF */
+#define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT (4)
+#define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \
+	(enable_mask & (1U << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT)))
+
+#define KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK 4
+#define KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK KBASE_DUMMY_MODEL_COUNTER_PER_CORE
+#define KBASE_DUMMY_MODEL_VALUES_PER_BLOCK \
+	(KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK + KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK)
+#define KBASE_DUMMY_MODEL_BLOCK_SIZE (KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32))
+#define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS 8
+#define KBASE_DUMMY_MODEL_MAX_SHADER_CORES 32
+#define KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS 0
+#define KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS \
+	(1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES)
+#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \
+	(KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS + KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS)
+#define KBASE_DUMMY_MODEL_COUNTER_TOTAL \
+	(KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_COUNTER_PER_CORE)
+#define KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE \
+	(KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_VALUES_PER_BLOCK)
+#define KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE \
+	(KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE)
+
+/*
+ * Bit mask - no. bits set is no. cores
+ * Values obtained from talking to HW team
+ * Example: tODx has 10 cores, 0b11 1111 1111 -> 0x3FF
+ */
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX (0x7FFFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX (0x3FFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX (0x7FFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX (0xFFFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TKRX (0x1FFFull)
+#define DUMMY_IMPLEMENTATION_L2_PRESENT (0x1ull)
+#define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull)
+#define DUMMY_IMPLEMENTATION_STACK_PRESENT (0xFull)
+
+
+#endif /* _UAPI_KBASE_MODEL_DUMMY_H_ */
--- a/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_linux.h
+++ b/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_linux.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Dummy Model interface
+ */
+
+#ifndef _UAPI_KBASE_MODEL_LINUX_H_
+#define _UAPI_KBASE_MODEL_LINUX_H_
+
+/* Generic model IRQs */
+#define MODEL_LINUX_JOB_IRQ (0x1 << 0)
+#define MODEL_LINUX_GPU_IRQ (0x1 << 1)
+#define MODEL_LINUX_MMU_IRQ (0x1 << 2)
+#define MODEL_LINUX_IRQ_MASK (MODEL_LINUX_JOB_IRQ | MODEL_LINUX_GPU_IRQ | MODEL_LINUX_MMU_IRQ)
+
+#endif /* _UAPI_KBASE_MODEL_LINUX_H_ */
--- a/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
+++ b/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
@@ -0,0 +1,608 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_BASE_CSF_KERNEL_H_
+#define _UAPI_BASE_CSF_KERNEL_H_
+
+#include <linux/types.h>
+#include "../mali_base_common_kernel.h"
+
+/* Memory allocation, access/hint flags & mask specific to CSF GPU.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* Must be FIXED memory. */
+#define BASE_MEM_FIXED ((base_mem_alloc_flags)1 << 8)
+
+/* CSF event memory
+ *
+ * If Outer shareable coherence is not specified or not available, then on
+ * allocation kbase will automatically use the uncached GPU mapping.
+ * There is no need for the client to specify BASE_MEM_UNCACHED_GPU
+ * themselves when allocating memory with the BASE_MEM_CSF_EVENT flag.
+ *
+ * This memory requires a permanent mapping
+ *
+ * See also kbase_reg_needs_kernel_mapping()
+ */
+#define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19)
+
+#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20)
+
+/* Must be FIXABLE memory: its GPU VA will be determined at a later point,
+ * at which time it will be at a fixed GPU VA.
+ */
+#define BASE_MEM_FIXABLE ((base_mem_alloc_flags)1 << 29)
+
+/* Note that the number of bits used for base_mem_alloc_flags
+ * must be less than BASE_MEM_FLAGS_NR_BITS !!!
+ */
+
+/* A mask of all the flags which are only valid for allocations within kbase,
+ * and may not be passed from user space.
+ */
+#define BASEP_MEM_FLAGS_KERNEL_ONLY (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE)
+
+/* A mask of all currently reserved flags
+ */
+#define BASE_MEM_FLAGS_RESERVED BASE_MEM_RESERVED_BIT_20
+
+/* Special base mem handles specific to CSF.
+ */
+#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << LOCAL_PAGE_SHIFT)
+#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << LOCAL_PAGE_SHIFT)
+
+#define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \
+	((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> LOCAL_PAGE_SHIFT)
+
+/* Valid set of just-in-time memory allocation flags */
+#define BASE_JIT_ALLOC_VALID_FLAGS ((__u8)0)
+
+/* flags for base context specific to CSF */
+
+/* Base context creates a CSF event notification thread.
+ *
+ * The creation of a CSF event notification thread is conditional but
+ * mandatory for the handling of CSF events.
+ */
+#define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2)
+
+/* Bitpattern describing the ::base_context_create_flags that can be
+ * passed to base_context_init()
+ */
+#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS                            \
+	(BASE_CONTEXT_CCTX_EMBEDDED | BASE_CONTEXT_CSF_EVENT_THREAD | \
+	 BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
+
+/* Flags for base tracepoint specific to CSF */
+
+/* Enable KBase tracepoints for CSF builds */
+#define BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS (1U << 2)
+
+/* Enable additional CSF Firmware side tracepoints */
+#define BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS (1U << 3)
+
+#define BASE_TLSTREAM_FLAGS_MASK                                                        \
+	(BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | BASE_TLSTREAM_JOB_DUMPING_ENABLED | \
+	 BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS | BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS)
+
+/* Number of pages mapped into the process address space for a bound GPU
+ * command queue. A pair of input/output pages and a Hw doorbell page
+ * are mapped to enable direct submission of commands to Hw.
+ */
+#define BASEP_QUEUE_NR_MMAP_USER_PAGES ((size_t)3)
+
+#define BASE_QUEUE_MAX_PRIORITY (15U)
+
+/* Sync32 object fields definition */
+#define BASEP_EVENT32_VAL_OFFSET (0U)
+#define BASEP_EVENT32_ERR_OFFSET (4U)
+#define BASEP_EVENT32_SIZE_BYTES (8U)
+
+/* Sync64 object fields definition */
+#define BASEP_EVENT64_VAL_OFFSET (0U)
+#define BASEP_EVENT64_ERR_OFFSET (8U)
+#define BASEP_EVENT64_SIZE_BYTES (16U)
+
+/* Sync32 object alignment, equal to its size */
+#define BASEP_EVENT32_ALIGN_BYTES (8U)
+
+/* Sync64 object alignment, equal to its size */
+#define BASEP_EVENT64_ALIGN_BYTES (16U)
+
+/* The upper limit for number of objects that could be waited/set per command.
+ * This limit is now enforced as internally the error inherit inputs are
+ * converted to 32-bit flags in a __u32 variable occupying a previously padding
+ * field.
+ */
+#define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32)
+
+/* CSF CSI EXCEPTION_HANDLER_FLAGS */
+#define BASE_CSF_TILER_OOM_EXCEPTION_FLAG (1u << 0)
+#define BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK (BASE_CSF_TILER_OOM_EXCEPTION_FLAG)
+
+/* Initial value for LATEST_FLUSH register */
+#define POWER_DOWN_LATEST_FLUSH_VALUE ((__u32)1)
+
+/**
+ * enum base_kcpu_command_type - Kernel CPU queue command type.
+ * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:       fence_signal,
+ * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:         fence_wait,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT:           cqs_wait,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_SET:            cqs_set,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:  cqs_set_operation,
+ * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:         map_import,
+ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:       unmap_import,
+ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force,
+ * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC:          jit_alloc,
+ * @BASE_KCPU_COMMAND_TYPE_JIT_FREE:           jit_free,
+ * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:      group_suspend,
+ * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:      error_barrier
+ */
+enum base_kcpu_command_type {
+	BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL,
+	BASE_KCPU_COMMAND_TYPE_FENCE_WAIT,
+	BASE_KCPU_COMMAND_TYPE_CQS_WAIT,
+	BASE_KCPU_COMMAND_TYPE_CQS_SET,
+	BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION,
+	BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION,
+	BASE_KCPU_COMMAND_TYPE_MAP_IMPORT,
+	BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT,
+	BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE,
+	BASE_KCPU_COMMAND_TYPE_JIT_ALLOC,
+	BASE_KCPU_COMMAND_TYPE_JIT_FREE,
+	BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND,
+	BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER
+};
+
+/**
+ * enum base_queue_group_priority - Priority of a GPU Command Queue Group.
+ * @BASE_QUEUE_GROUP_PRIORITY_HIGH:     GPU Command Queue Group is of high
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_MEDIUM:   GPU Command Queue Group is of medium
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_LOW:      GPU Command Queue Group is of low
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_REALTIME: GPU Command Queue Group is of real-time
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_COUNT:    Number of GPU Command Queue Group
+ *                                      priority levels.
+ *
+ * Currently this is in order of highest to lowest, but if new levels are added
+ * then those new levels may be out of order to preserve the ABI compatibility
+ * with previous releases. At that point, ensure assignment to
+ * the 'priority' member in &kbase_queue_group is updated to ensure it remains
+ * a linear ordering.
+ *
+ * There should be no gaps in the enum, otherwise use of
+ * BASE_QUEUE_GROUP_PRIORITY_COUNT in kbase must be updated.
+ */
+enum base_queue_group_priority {
+	BASE_QUEUE_GROUP_PRIORITY_HIGH = 0,
+	BASE_QUEUE_GROUP_PRIORITY_MEDIUM,
+	BASE_QUEUE_GROUP_PRIORITY_LOW,
+	BASE_QUEUE_GROUP_PRIORITY_REALTIME,
+	BASE_QUEUE_GROUP_PRIORITY_COUNT
+};
+
+struct base_kcpu_command_fence_info {
+	__u64 fence;
+};
+
+struct base_cqs_wait_info {
+	__u64 addr;
+	__u32 val;
+	__u32 padding;
+};
+
+struct base_kcpu_command_cqs_wait_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 inherit_err_flags;
+};
+
+struct base_cqs_set {
+	__u64 addr;
+};
+
+struct base_kcpu_command_cqs_set_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 padding;
+};
+
+/**
+ * typedef basep_cqs_data_type - Enumeration of CQS Data Types
+ *
+ * @BASEP_CQS_DATA_TYPE_U32: The Data Type of a CQS Object's value
+ *                           is an unsigned 32-bit integer
+ * @BASEP_CQS_DATA_TYPE_U64: The Data Type of a CQS Object's value
+ *                           is an unsigned 64-bit integer
+ */
+typedef enum PACKED {
+	BASEP_CQS_DATA_TYPE_U32 = 0,
+	BASEP_CQS_DATA_TYPE_U64 = 1,
+} basep_cqs_data_type;
+
+/**
+ * typedef basep_cqs_wait_operation_op - Enumeration of CQS Object Wait
+ *                                Operation conditions
+ *
+ * @BASEP_CQS_WAIT_OPERATION_LE: CQS Wait Operation indicating that a
+ *                                wait will be satisfied when a CQS Object's
+ *                                value is Less than or Equal to
+ *                                the Wait Operation value
+ * @BASEP_CQS_WAIT_OPERATION_GT: CQS Wait Operation indicating that a
+ *                                wait will be satisfied when a CQS Object's
+ *                                value is Greater than the Wait Operation value
+ */
+typedef enum {
+	BASEP_CQS_WAIT_OPERATION_LE = 0,
+	BASEP_CQS_WAIT_OPERATION_GT = 1,
+} basep_cqs_wait_operation_op;
+
+struct base_cqs_wait_operation_info {
+	__u64 addr;
+	__u64 val;
+	__u8 operation;
+	__u8 data_type;
+	__u8 padding[6];
+};
+
+/**
+ * struct base_kcpu_command_cqs_wait_operation_info - structure which contains information
+ *		about the Timeline CQS wait objects
+ *
+ * @objs:              An array of Timeline CQS waits.
+ * @nr_objs:           Number of Timeline CQS waits in the array.
+ * @inherit_err_flags: Bit-pattern for the CQSs in the array who's error field
+ *                     to be served as the source for importing into the
+ *                     queue's error-state.
+ */
+struct base_kcpu_command_cqs_wait_operation_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 inherit_err_flags;
+};
+
+/**
+ * typedef basep_cqs_set_operation_op - Enumeration of CQS Set Operations
+ *
+ * @BASEP_CQS_SET_OPERATION_ADD: CQS Set operation for adding a value
+ *                                to a synchronization object
+ * @BASEP_CQS_SET_OPERATION_SET: CQS Set operation for setting the value
+ *                                of a synchronization object
+ */
+typedef enum {
+	BASEP_CQS_SET_OPERATION_ADD = 0,
+	BASEP_CQS_SET_OPERATION_SET = 1,
+} basep_cqs_set_operation_op;
+
+struct base_cqs_set_operation_info {
+	__u64 addr;
+	__u64 val;
+	__u8 operation;
+	__u8 data_type;
+	__u8 padding[6];
+};
+
+/**
+ * struct base_kcpu_command_cqs_set_operation_info - structure which contains information
+ *		about the Timeline CQS set objects
+ *
+ * @objs:    An array of Timeline CQS sets.
+ * @nr_objs: Number of Timeline CQS sets in the array.
+ * @padding: Structure padding, unused bytes.
+ */
+struct base_kcpu_command_cqs_set_operation_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 padding;
+};
+
+/**
+ * struct base_kcpu_command_import_info - structure which contains information
+ *		about the imported buffer.
+ *
+ * @handle:	Address of imported user buffer.
+ */
+struct base_kcpu_command_import_info {
+	__u64 handle;
+};
+
+/**
+ * struct base_kcpu_command_jit_alloc_info - structure which contains
+ *		information about jit memory allocation.
+ *
+ * @info:	An array of elements of the
+ *		struct base_jit_alloc_info type.
+ * @count:	The number of elements in the info array.
+ * @padding:	Padding to a multiple of 64 bits.
+ */
+struct base_kcpu_command_jit_alloc_info {
+	__u64 info;
+	__u8 count;
+	__u8 padding[7];
+};
+
+/**
+ * struct base_kcpu_command_jit_free_info - structure which contains
+ *		information about jit memory which is to be freed.
+ *
+ * @ids:	An array containing the JIT IDs to free.
+ * @count:	The number of elements in the ids array.
+ * @padding:	Padding to a multiple of 64 bits.
+ */
+struct base_kcpu_command_jit_free_info {
+	__u64 ids;
+	__u8 count;
+	__u8 padding[7];
+};
+
+/**
+ * struct base_kcpu_command_group_suspend_info - structure which contains
+ *		suspend buffer data captured for a suspended queue group.
+ *
+ * @buffer:		Pointer to an array of elements of the type char.
+ * @size:		Number of elements in the @buffer array.
+ * @group_handle:	Handle to the mapping of CSG.
+ * @padding:		padding to a multiple of 64 bits.
+ */
+struct base_kcpu_command_group_suspend_info {
+	__u64 buffer;
+	__u32 size;
+	__u8 group_handle;
+	__u8 padding[3];
+};
+
+
+/**
+ * struct base_kcpu_command - kcpu command.
+ * @type:	type of the kcpu command, one enum base_kcpu_command_type
+ * @padding:	padding to a multiple of 64 bits
+ * @info:	structure which contains information about the kcpu command;
+ *		actual type is determined by @p type
+ * @info.fence:              Fence
+ * @info.cqs_wait:           CQS wait
+ * @info.cqs_set:            CQS set
+ * @info.cqs_wait_operation: CQS wait operation
+ * @info.cqs_set_operation:  CQS set operation
+ * @info.import:             import
+ * @info.jit_alloc:          JIT allocation
+ * @info.jit_free:           JIT deallocation
+ * @info.suspend_buf_copy:   suspend buffer copy
+ * @info.sample_time:        sample time
+ * @info.padding:            padding
+ */
+struct base_kcpu_command {
+	__u8 type;
+	__u8 padding[sizeof(__u64) - sizeof(__u8)];
+	union {
+		struct base_kcpu_command_fence_info fence;
+		struct base_kcpu_command_cqs_wait_info cqs_wait;
+		struct base_kcpu_command_cqs_set_info cqs_set;
+		struct base_kcpu_command_cqs_wait_operation_info cqs_wait_operation;
+		struct base_kcpu_command_cqs_set_operation_info cqs_set_operation;
+		struct base_kcpu_command_import_info import;
+		struct base_kcpu_command_jit_alloc_info jit_alloc;
+		struct base_kcpu_command_jit_free_info jit_free;
+		struct base_kcpu_command_group_suspend_info suspend_buf_copy;
+		__u64 padding[2]; /* No sub-struct should be larger */
+	} info;
+};
+
+/**
+ * struct basep_cs_stream_control - CSI capabilities.
+ *
+ * @features: Features of this stream
+ * @padding:  Padding to a multiple of 64 bits.
+ */
+struct basep_cs_stream_control {
+	__u32 features;
+	__u32 padding;
+};
+
+/**
+ * struct basep_cs_group_control - CSG interface capabilities.
+ *
+ * @features:     Features of this group
+ * @stream_num:   Number of streams in this group
+ * @suspend_size: Size in bytes of the suspend buffer for this group
+ * @padding:      Padding to a multiple of 64 bits.
+ */
+struct basep_cs_group_control {
+	__u32 features;
+	__u32 stream_num;
+	__u32 suspend_size;
+	__u32 padding;
+};
+
+/**
+ * struct base_gpu_queue_group_error_fatal_payload - Unrecoverable fault
+ *        error information associated with GPU command queue group.
+ *
+ * @sideband:     Additional information of the unrecoverable fault.
+ * @status:       Unrecoverable fault information.
+ *                This consists of exception type (least significant byte) and
+ *                data (remaining bytes). One example of exception type is
+ *                CS_INVALID_INSTRUCTION (0x49).
+ * @padding:      Padding to make multiple of 64bits
+ */
+struct base_gpu_queue_group_error_fatal_payload {
+	__u64 sideband;
+	__u32 status;
+	__u32 padding;
+};
+
+/**
+ * struct base_gpu_queue_error_fatal_payload - Unrecoverable fault
+ *        error information related to GPU command queue.
+ *
+ * @sideband:     Additional information about this unrecoverable fault.
+ * @status:       Unrecoverable fault information.
+ *                This consists of exception type (least significant byte) and
+ *                data (remaining bytes). One example of exception type is
+ *                CS_INVALID_INSTRUCTION (0x49).
+ * @csi_index:    Index of the CSF interface the queue is bound to.
+ * @padding:      Padding to make multiple of 64bits
+ */
+struct base_gpu_queue_error_fatal_payload {
+	__u64 sideband;
+	__u32 status;
+	__u8 csi_index;
+	__u8 padding[3];
+};
+
+/**
+ * enum base_gpu_queue_group_error_type - GPU Fatal error type.
+ *
+ * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL:       Fatal error associated with GPU
+ *                                          command queue group.
+ * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL: Fatal error associated with GPU
+ *                                          command queue.
+ * @BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT:     Fatal error associated with
+ *                                          progress timeout.
+ * @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out
+ *                                             of tiler heap memory.
+ * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of fatal error types
+ *
+ * This type is used for &struct_base_gpu_queue_group_error.error_type.
+ */
+enum base_gpu_queue_group_error_type {
+	BASE_GPU_QUEUE_GROUP_ERROR_FATAL = 0,
+	BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
+	BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT,
+	BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
+	BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT
+};
+
+/**
+ * struct base_gpu_queue_group_error - Unrecoverable fault information
+ * @error_type:          Error type of @base_gpu_queue_group_error_type
+ *                       indicating which field in union payload is filled
+ * @padding:             Unused bytes for 64bit boundary
+ * @payload:             Input Payload
+ * @payload.fatal_group: Unrecoverable fault error associated with
+ *                       GPU command queue group
+ * @payload.fatal_queue: Unrecoverable fault error associated with command queue
+ */
+struct base_gpu_queue_group_error {
+	__u8 error_type;
+	__u8 padding[7];
+	union {
+		struct base_gpu_queue_group_error_fatal_payload fatal_group;
+		struct base_gpu_queue_error_fatal_payload fatal_queue;
+	} payload;
+};
+
+/**
+ * enum base_csf_notification_type - Notification type
+ *
+ * @BASE_CSF_NOTIFICATION_EVENT:                 Notification with kernel event
+ * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU fatal
+ *                                               error
+ * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP:        Notification with dumping cpu
+ *                                               queue
+ * @BASE_CSF_NOTIFICATION_COUNT:                 The number of notification type
+ *
+ * This type is used for &struct_base_csf_notification.type.
+ */
+enum base_csf_notification_type {
+	BASE_CSF_NOTIFICATION_EVENT = 0,
+	BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+	BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP,
+	BASE_CSF_NOTIFICATION_COUNT
+};
+
+/**
+ * struct base_csf_notification - Event or error notification
+ *
+ * @type:                      Notification type of @base_csf_notification_type
+ * @padding:                   Padding for 64bit boundary
+ * @payload:                   Input Payload
+ * @payload.align:             To fit the struct into a 64-byte cache line
+ * @payload.csg_error:         CSG error
+ * @payload.csg_error.handle:  Handle of GPU command queue group associated with
+ *                             fatal error
+ * @payload.csg_error.padding: Padding
+ * @payload.csg_error.error:   Unrecoverable fault error
+ *
+ */
+struct base_csf_notification {
+	__u8 type;
+	__u8 padding[7];
+	union {
+		struct {
+			__u8 handle;
+			__u8 padding[7];
+			struct base_gpu_queue_group_error error;
+		} csg_error;
+
+		__u8 align[56];
+	} payload;
+};
+
+/**
+ * struct mali_base_gpu_core_props - GPU core props info
+ *
+ * @product_id: Pro specific value.
+ * @version_status: Status of the GPU release. No defined values, but starts at
+ *   0 and increases by one for each release status (alpha, beta, EAC, etc.).
+ *   4 bit values (0-15).
+ * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn"
+ *   release number.
+ *   8 bit values (0-255).
+ * @major_revision: Major release number of the GPU. "R" part of an "RnPn"
+ *   release number.
+ *   4 bit values (0-15).
+ * @padding: padding to align to 8-byte
+ * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by
+ *   clGetDeviceInfo()
+ * @log2_program_counter_size: Size of the shader program counter, in bits.
+ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This
+ *   is a bitpattern where a set bit indicates that the format is supported.
+ *   Before using a texture format, it is recommended that the corresponding
+ *   bit be checked.
+ * @paddings: Padding bytes.
+ * @gpu_available_memory_size: Theoretical maximum memory available to the GPU.
+ *   It is unlikely that a client will be able to allocate all of this memory
+ *   for their own purposes, but this at least provides an upper bound on the
+ *   memory available to the GPU.
+ *   This is required for OpenCL's clGetDeviceInfo() call when
+ *   CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+ *   client will not be expecting to allocate anywhere near this value.
+ */
+struct mali_base_gpu_core_props {
+	__u32 product_id;
+	__u16 version_status;
+	__u16 minor_revision;
+	__u16 major_revision;
+	__u16 padding;
+	__u32 gpu_freq_khz_max;
+	__u32 log2_program_counter_size;
+	__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	__u8 paddings[4];
+	__u64 gpu_available_memory_size;
+};
+
+#endif /* _UAPI_BASE_CSF_KERNEL_H_ */
--- a/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_errors_dumpfault.h
+++ b/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_errors_dumpfault.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_CSF_ERRORS_DUMPFAULT_H_
+#define _UAPI_KBASE_CSF_ERRORS_DUMPFAULT_H_
+
+/**
+ * enum dumpfault_error_type - Enumeration to define errors to be dumped
+ *
+ * @DF_NO_ERROR:                       No pending error
+ * @DF_CSG_SUSPEND_TIMEOUT:            CSG suspension timeout
+ * @DF_CSG_TERMINATE_TIMEOUT:          CSG group termination timeout
+ * @DF_CSG_START_TIMEOUT:              CSG start timeout
+ * @DF_CSG_RESUME_TIMEOUT:             CSG resume timeout
+ * @DF_CSG_EP_CFG_TIMEOUT:             CSG end point configuration timeout
+ * @DF_CSG_STATUS_UPDATE_TIMEOUT:      CSG status update timeout
+ * @DF_PROGRESS_TIMER_TIMEOUT:         Progress timer timeout
+ * @DF_FW_INTERNAL_ERROR:              Firmware internal error
+ * @DF_CS_FATAL:                       CS fatal error
+ * @DF_CS_FAULT:                       CS fault error
+ * @DF_FENCE_WAIT_TIMEOUT:             Fence wait timeout
+ * @DF_PROTECTED_MODE_EXIT_TIMEOUT:    P.mode exit timeout
+ * @DF_PROTECTED_MODE_ENTRY_FAILURE:   P.mode entrance failure
+ * @DF_PING_REQUEST_TIMEOUT:           Ping request timeout
+ * @DF_CORE_DOWNSCALE_REQUEST_TIMEOUT: DCS downscale request timeout
+ * @DF_TILER_OOM:                      Tiler Out-of-memory error
+ * @DF_GPU_PAGE_FAULT:                 GPU page fault
+ * @DF_BUS_FAULT:                      MMU BUS Fault
+ * @DF_GPU_PROTECTED_FAULT:            GPU P.mode fault
+ * @DF_AS_ACTIVE_STUCK:                AS active stuck
+ * @DF_GPU_SOFT_RESET_FAILURE:         GPU soft reset falure
+ *
+ * This is used for kbase to notify error type of an event whereby
+ * user space client will dump relevant debugging information via debugfs.
+ * @DF_NO_ERROR is used to indicate no pending fault, thus the client will
+ * be blocked on reading debugfs file till a fault happens.
+ */
+enum dumpfault_error_type {
+	DF_NO_ERROR = 0,
+	DF_CSG_SUSPEND_TIMEOUT,
+	DF_CSG_TERMINATE_TIMEOUT,
+	DF_CSG_START_TIMEOUT,
+	DF_CSG_RESUME_TIMEOUT,
+	DF_CSG_EP_CFG_TIMEOUT,
+	DF_CSG_STATUS_UPDATE_TIMEOUT,
+	DF_PROGRESS_TIMER_TIMEOUT,
+	DF_FW_INTERNAL_ERROR,
+	DF_CS_FATAL,
+	DF_CS_FAULT,
+	DF_FENCE_WAIT_TIMEOUT,
+	DF_PROTECTED_MODE_EXIT_TIMEOUT,
+	DF_PROTECTED_MODE_ENTRY_FAILURE,
+	DF_PING_REQUEST_TIMEOUT,
+	DF_CORE_DOWNSCALE_REQUEST_TIMEOUT,
+	DF_TILER_OOM,
+	DF_GPU_PAGE_FAULT,
+	DF_BUS_FAULT,
+	DF_GPU_PROTECTED_FAULT,
+	DF_AS_ACTIVE_STUCK,
+	DF_GPU_SOFT_RESET_FAILURE,
+};
+
+#endif /* _UAPI_KBASE_CSF_ERRORS_DUMPFAULT_H_ */
--- a/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
+++ b/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
@@ -0,0 +1,680 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_CSF_IOCTL_H_
+#define _UAPI_KBASE_CSF_IOCTL_H_
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * 1.0:
+ * - CSF IOCTL header separated from JM
+ * 1.1:
+ * - Add a new priority level BASE_QUEUE_GROUP_PRIORITY_REALTIME
+ * - Add ioctl 54: This controls the priority setting.
+ * 1.2:
+ * - Add new CSF GPU_FEATURES register into the property structure
+ *   returned by KBASE_IOCTL_GET_GPUPROPS
+ * 1.3:
+ * - Add __u32 group_uid member to
+ *   &struct_kbase_ioctl_cs_queue_group_create.out
+ * 1.4:
+ * - Replace padding in kbase_ioctl_cs_get_glb_iface with
+ *   instr_features member of same size
+ * 1.5:
+ * - Add ioctl 40: kbase_ioctl_cs_queue_register_ex, this is a new
+ *   queue registration call with extended format for supporting CS
+ *   trace configurations with CSF trace_command.
+ * 1.6:
+ * - Added new HW performance counters interface to all GPUs.
+ * 1.7:
+ * - Added reserved field to QUEUE_GROUP_CREATE ioctl for future use
+ * 1.8:
+ * - Removed Kernel legacy HWC interface
+ * 1.9:
+ * - Reorganization of GPU-VA memory zones, including addition of
+ *   FIXED_VA zone and auto-initialization of EXEC_VA zone.
+ * - Added new Base memory allocation interface
+ * 1.10:
+ * - First release of new HW performance counters interface.
+ * 1.11:
+ * - Dummy model (no mali) backend will now clear HWC values after each sample
+ * 1.12:
+ * - Added support for incremental rendering flag in CSG create call
+ * 1.13:
+ * - Added ioctl to query a register of USER page.
+ * 1.14:
+ * - Added support for passing down the buffer descriptor VA in tiler heap init
+ * 1.15:
+ * - Enable new sync_wait GE condition
+ * 1.16:
+ * - Remove legacy definitions:
+ *   - base_jit_alloc_info_10_2
+ *   - base_jit_alloc_info_11_5
+ *   - kbase_ioctl_mem_jit_init_10_2
+ *   - kbase_ioctl_mem_jit_init_11_5
+ * 1.17:
+ * - Fix kinstr_prfcnt issues:
+ *   - Missing implicit sample for CMD_STOP when HWCNT buffer is full.
+ *   - Race condition when stopping periodic sampling.
+ *   - prfcnt_block_metadata::block_idx gaps.
+ *   - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed.
+ * 1.18:
+ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
+ *   before allocating GPU memory for the context.
+ * - CPU mappings of USER_BUFFER imported memory handles must be cached.
+ * 1.19:
+ * - Add NE support in queue_group_create IOCTL fields
+ * - Previous version retained as KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18 for
+ *     backward compatibility.
+ * 1.20:
+ * - Restrict child process from doing supported file operations (like mmap, ioctl,
+ *   read, poll) on the file descriptor of mali device file that was inherited
+ *   from the parent process.
+ * 1.21:
+ * - Remove KBASE_IOCTL_HWCNT_READER_SETUP and KBASE_HWCNT_READER_* ioctls.
+ * 1.22:
+ * - Add comp_pri_threshold and comp_pri_ratio attributes to
+ *   kbase_ioctl_cs_queue_group_create.
+ * 1.23:
+ * - Disallows changing the sharability on the GPU of imported dma-bufs to
+ *   BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE.
+ * 1.24:
+ * - Implement full block state support for hardware counters.
+ */
+
+#define BASE_UK_VERSION_MAJOR 1
+#define BASE_UK_VERSION_MINOR 24
+
+/**
+ * struct kbase_ioctl_version_check - Check version compatibility between
+ * kernel and userspace
+ *
+ * @major: Major version number
+ * @minor: Minor version number
+ */
+struct kbase_ioctl_version_check {
+	__u16 major;
+	__u16 minor;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK_RESERVED \
+	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
+
+/**
+ * struct kbase_ioctl_cs_queue_register - Register a GPU command queue with the
+ *                                        base back-end
+ *
+ * @buffer_gpu_addr: GPU address of the buffer backing the queue
+ * @buffer_size: Size of the buffer in bytes
+ * @priority: Priority of the queue within a group when run within a process
+ * @padding: Currently unused, must be zero
+ *
+ * Note: There is an identical sub-section in kbase_ioctl_cs_queue_register_ex.
+ *        Any change of this struct should also be mirrored to the latter.
+ */
+struct kbase_ioctl_cs_queue_register {
+	__u64 buffer_gpu_addr;
+	__u32 buffer_size;
+	__u8 priority;
+	__u8 padding[3];
+};
+
+#define KBASE_IOCTL_CS_QUEUE_REGISTER \
+	_IOW(KBASE_IOCTL_TYPE, 36, struct kbase_ioctl_cs_queue_register)
+
+/**
+ * struct kbase_ioctl_cs_queue_kick - Kick the GPU command queue group scheduler
+ *                                    to notify that a queue has been updated
+ *
+ * @buffer_gpu_addr: GPU address of the buffer backing the queue
+ */
+struct kbase_ioctl_cs_queue_kick {
+	__u64 buffer_gpu_addr;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_KICK _IOW(KBASE_IOCTL_TYPE, 37, struct kbase_ioctl_cs_queue_kick)
+
+/**
+ * union kbase_ioctl_cs_queue_bind - Bind a GPU command queue to a group
+ *
+ * @in:                 Input parameters
+ * @in.buffer_gpu_addr: GPU address of the buffer backing the queue
+ * @in.group_handle:    Handle of the group to which the queue should be bound
+ * @in.csi_index:       Index of the CSF interface the queue should be bound to
+ * @in.padding:         Currently unused, must be zero
+ * @out:                Output parameters
+ * @out.mmap_handle:    Handle to be used for creating the mapping of CS
+ *                      input/output pages
+ */
+union kbase_ioctl_cs_queue_bind {
+	struct {
+		__u64 buffer_gpu_addr;
+		__u8 group_handle;
+		__u8 csi_index;
+		__u8 padding[6];
+	} in;
+	struct {
+		__u64 mmap_handle;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_BIND _IOWR(KBASE_IOCTL_TYPE, 39, union kbase_ioctl_cs_queue_bind)
+
+/**
+ * struct kbase_ioctl_cs_queue_register_ex - Register a GPU command queue with the
+ *                                           base back-end in extended format,
+ *                                           involving trace buffer configuration
+ *
+ * @buffer_gpu_addr: GPU address of the buffer backing the queue
+ * @buffer_size: Size of the buffer in bytes
+ * @priority: Priority of the queue within a group when run within a process
+ * @padding: Currently unused, must be zero
+ * @ex_offset_var_addr: GPU address of the trace buffer write offset variable
+ * @ex_buffer_base: Trace buffer GPU base address for the queue
+ * @ex_buffer_size: Size of the trace buffer in bytes
+ * @ex_event_size: Trace event write size, in log2 designation
+ * @ex_event_state: Trace event states configuration
+ * @ex_padding: Currently unused, must be zero
+ *
+ * Note: There is an identical sub-section at the start of this struct to that
+ *        of @ref kbase_ioctl_cs_queue_register. Any change of this sub-section
+ *        must also be mirrored to the latter. Following the said sub-section,
+ *        the remaining fields forms the extension, marked with ex_*.
+ */
+struct kbase_ioctl_cs_queue_register_ex {
+	__u64 buffer_gpu_addr;
+	__u32 buffer_size;
+	__u8 priority;
+	__u8 padding[3];
+	__u64 ex_offset_var_addr;
+	__u64 ex_buffer_base;
+	__u32 ex_buffer_size;
+	__u8 ex_event_size;
+	__u8 ex_event_state;
+	__u8 ex_padding[2];
+};
+
+#define KBASE_IOCTL_CS_QUEUE_REGISTER_EX \
+	_IOW(KBASE_IOCTL_TYPE, 40, struct kbase_ioctl_cs_queue_register_ex)
+
+/**
+ * struct kbase_ioctl_cs_queue_terminate - Terminate a GPU command queue
+ *
+ * @buffer_gpu_addr: GPU address of the buffer backing the queue
+ */
+struct kbase_ioctl_cs_queue_terminate {
+	__u64 buffer_gpu_addr;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_TERMINATE \
+	_IOW(KBASE_IOCTL_TYPE, 41, struct kbase_ioctl_cs_queue_terminate)
+
+/**
+ * union kbase_ioctl_cs_queue_group_create_1_6 - Create a GPU command queue
+ *                                               group
+ * @in:               Input parameters
+ * @in.tiler_mask:    Mask of tiler endpoints the group is allowed to use.
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
+ * @in.compute_mask:  Mask of compute endpoints the group is allowed to use.
+ * @in.cs_min:        Minimum number of CSs required.
+ * @in.priority:      Queue group's priority within a process.
+ * @in.tiler_max:     Maximum number of tiler endpoints the group is allowed
+ *                    to use.
+ * @in.fragment_max:  Maximum number of fragment endpoints the group is
+ *                    allowed to use.
+ * @in.compute_max:   Maximum number of compute endpoints the group is allowed
+ *                    to use.
+ * @in.padding:       Currently unused, must be zero
+ * @out:              Output parameters
+ * @out.group_handle: Handle of a newly created queue group.
+ * @out.padding:      Currently unused, must be zero
+ * @out.group_uid:    UID of the queue group available to base.
+ */
+union kbase_ioctl_cs_queue_group_create_1_6 {
+	struct {
+		__u64 tiler_mask;
+		__u64 fragment_mask;
+		__u64 compute_mask;
+		__u8 cs_min;
+		__u8 priority;
+		__u8 tiler_max;
+		__u8 fragment_max;
+		__u8 compute_max;
+		__u8 padding[3];
+
+	} in;
+	struct {
+		__u8 group_handle;
+		__u8 padding[3];
+		__u32 group_uid;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6 \
+	_IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create_1_6)
+
+/**
+ * union kbase_ioctl_cs_queue_group_create_1_18 - Create a GPU command queue group
+ * @in:               Input parameters
+ * @in.tiler_mask:    Mask of tiler endpoints the group is allowed to use.
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
+ * @in.compute_mask:  Mask of compute endpoints the group is allowed to use.
+ * @in.cs_min:        Minimum number of CSs required.
+ * @in.priority:      Queue group's priority within a process.
+ * @in.tiler_max:     Maximum number of tiler endpoints the group is allowed
+ *                    to use.
+ * @in.fragment_max:  Maximum number of fragment endpoints the group is
+ *                    allowed to use.
+ * @in.compute_max:   Maximum number of compute endpoints the group is allowed
+ *                    to use.
+ * @in.csi_handlers:  Flags to signal that the application intends to use CSI
+ *                    exception handlers in some linear buffers to deal with
+ *                    the given exception types.
+ * @in.padding:       Currently unused, must be zero
+ * @out:              Output parameters
+ * @out.group_handle: Handle of a newly created queue group.
+ * @out.padding:      Currently unused, must be zero
+ * @out.group_uid:    UID of the queue group available to base.
+ */
+union kbase_ioctl_cs_queue_group_create_1_18 {
+	struct {
+		__u64 tiler_mask;
+		__u64 fragment_mask;
+		__u64 compute_mask;
+		__u8 cs_min;
+		__u8 priority;
+		__u8 tiler_max;
+		__u8 fragment_max;
+		__u8 compute_max;
+		__u8 csi_handlers;
+		__u8 padding[2];
+		/**
+		 * @in.dvs_buf: buffer for deferred vertex shader
+		 */
+		__u64 dvs_buf;
+	} in;
+	struct {
+		__u8 group_handle;
+		__u8 padding[3];
+		__u32 group_uid;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18 \
+	_IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create_1_18)
+
+/**
+ * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group
+ * @in:               Input parameters
+ * @in.tiler_mask:    Mask of tiler endpoints the group is allowed to use.
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
+ * @in.compute_mask:  Mask of compute endpoints the group is allowed to use.
+ * @in.cs_min:        Minimum number of CSs required.
+ * @in.priority:      Queue group's priority within a process.
+ * @in.tiler_max:     Maximum number of tiler endpoints the group is allowed
+ *                    to use.
+ * @in.fragment_max:  Maximum number of fragment endpoints the group is
+ *                    allowed to use.
+ * @in.compute_max:   Maximum number of compute endpoints the group is allowed
+ *                    to use.
+ * @in.csi_handlers:  Flags to signal that the application intends to use CSI
+ *                    exception handlers in some linear buffers to deal with
+ *                    the given exception types.
+ * @in.padding:       Currently unused, must be zero
+ * @out:              Output parameters
+ * @out.group_handle: Handle of a newly created queue group.
+ * @out.padding:      Currently unused, must be zero
+ * @out.group_uid:    UID of the queue group available to base.
+ */
+union kbase_ioctl_cs_queue_group_create {
+	struct {
+		__u64 tiler_mask;
+		__u64 fragment_mask;
+		__u64 compute_mask;
+		__u8 cs_min;
+		__u8 priority;
+		__u8 tiler_max;
+		__u8 fragment_max;
+		__u8 compute_max;
+		__u8 csi_handlers;
+		/**
+		 * @in.reserved:   Reserved, currently unused, must be zero.
+		 */
+		__u16 reserved;
+		/**
+		 * @in.dvs_buf: buffer for deferred vertex shader
+		 */
+		__u64 dvs_buf;
+		__u64 padding[9];
+	} in;
+	struct {
+		__u8 group_handle;
+		__u8 padding[3];
+		__u32 group_uid;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \
+	_IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create)
+
+/**
+ * struct kbase_ioctl_cs_queue_group_term - Terminate a GPU command queue group
+ *
+ * @group_handle: Handle of the queue group to be terminated
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_cs_queue_group_term {
+	__u8 group_handle;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE \
+	_IOW(KBASE_IOCTL_TYPE, 43, struct kbase_ioctl_cs_queue_group_term)
+
+#define KBASE_IOCTL_CS_EVENT_SIGNAL _IO(KBASE_IOCTL_TYPE, 44)
+
+typedef __u8 base_kcpu_queue_id; /* We support up to 256 active KCPU queues */
+
+/**
+ * struct kbase_ioctl_kcpu_queue_new - Create a KCPU command queue
+ *
+ * @id: ID of the new command queue returned by the kernel
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_kcpu_queue_new {
+	base_kcpu_queue_id id;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_KCPU_QUEUE_CREATE _IOR(KBASE_IOCTL_TYPE, 45, struct kbase_ioctl_kcpu_queue_new)
+
+/**
+ * struct kbase_ioctl_kcpu_queue_delete - Destroy a KCPU command queue
+ *
+ * @id: ID of the command queue to be destroyed
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_kcpu_queue_delete {
+	base_kcpu_queue_id id;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_KCPU_QUEUE_DELETE \
+	_IOW(KBASE_IOCTL_TYPE, 46, struct kbase_ioctl_kcpu_queue_delete)
+
+/**
+ * struct kbase_ioctl_kcpu_queue_enqueue - Enqueue commands into the KCPU queue
+ *
+ * @addr: Memory address of an array of struct base_kcpu_queue_command
+ * @nr_commands: Number of commands in the array
+ * @id: kcpu queue identifier, returned by KBASE_IOCTL_KCPU_QUEUE_CREATE ioctl
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_kcpu_queue_enqueue {
+	__u64 addr;
+	__u32 nr_commands;
+	base_kcpu_queue_id id;
+	__u8 padding[3];
+};
+
+#define KBASE_IOCTL_KCPU_QUEUE_ENQUEUE \
+	_IOW(KBASE_IOCTL_TYPE, 47, struct kbase_ioctl_kcpu_queue_enqueue)
+
+/**
+ * union kbase_ioctl_cs_tiler_heap_init - Initialize chunked tiler memory heap
+ * @in:                Input parameters
+ * @in.chunk_size:     Size of each chunk.
+ * @in.initial_chunks: Initial number of chunks that heap will be created with.
+ * @in.max_chunks:     Maximum number of chunks that the heap is allowed to use.
+ * @in.target_in_flight: Number of render-passes that the driver should attempt to
+ *                     keep in flight for which allocation of new chunks is
+ *                     allowed.
+ * @in.group_id:       Group ID to be used for physical allocations.
+ * @in.padding:        Padding
+ * @in.buf_desc_va:    Buffer descriptor GPU VA for tiler heap reclaims.
+ * @out:               Output parameters
+ * @out.gpu_heap_va:   GPU VA (virtual address) of Heap context that was set up
+ *                     for the heap.
+ * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap,
+ *                     actually points to the header of heap chunk and not to
+ *                     the low address of free memory in the chunk.
+ */
+union kbase_ioctl_cs_tiler_heap_init {
+	struct {
+		__u32 chunk_size;
+		__u32 initial_chunks;
+		__u32 max_chunks;
+		__u16 target_in_flight;
+		__u8 group_id;
+		__u8 padding;
+		__u64 buf_desc_va;
+	} in;
+	struct {
+		__u64 gpu_heap_va;
+		__u64 first_chunk_va;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_TILER_HEAP_INIT \
+	_IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init)
+
+/**
+ * union kbase_ioctl_cs_tiler_heap_init_1_13 - Initialize chunked tiler memory heap,
+ *                                             earlier version upto 1.13
+ * @in:                Input parameters
+ * @in.chunk_size:     Size of each chunk.
+ * @in.initial_chunks: Initial number of chunks that heap will be created with.
+ * @in.max_chunks:     Maximum number of chunks that the heap is allowed to use.
+ * @in.target_in_flight: Number of render-passes that the driver should attempt to
+ *                     keep in flight for which allocation of new chunks is
+ *                     allowed.
+ * @in.group_id:       Group ID to be used for physical allocations.
+ * @in.padding:        Padding
+ * @out:               Output parameters
+ * @out.gpu_heap_va:   GPU VA (virtual address) of Heap context that was set up
+ *                     for the heap.
+ * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap,
+ *                     actually points to the header of heap chunk and not to
+ *                     the low address of free memory in the chunk.
+ */
+union kbase_ioctl_cs_tiler_heap_init_1_13 {
+	struct {
+		__u32 chunk_size;
+		__u32 initial_chunks;
+		__u32 max_chunks;
+		__u16 target_in_flight;
+		__u8 group_id;
+		__u8 padding;
+	} in;
+	struct {
+		__u64 gpu_heap_va;
+		__u64 first_chunk_va;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13 \
+	_IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init_1_13)
+
+/**
+ * struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap
+ *                                         instance
+ *
+ * @gpu_heap_va: GPU VA of Heap context that was set up for the heap.
+ */
+struct kbase_ioctl_cs_tiler_heap_term {
+	__u64 gpu_heap_va;
+};
+
+#define KBASE_IOCTL_CS_TILER_HEAP_TERM \
+	_IOW(KBASE_IOCTL_TYPE, 49, struct kbase_ioctl_cs_tiler_heap_term)
+
+/**
+ * union kbase_ioctl_cs_get_glb_iface - Request the global control block
+ *                                        of CSF interface capabilities
+ *
+ * @in:                    Input parameters
+ * @in.max_group_num:      The maximum number of groups to be read. Can be 0, in
+ *                         which case groups_ptr is unused.
+ * @in.max_total_stream_num: The maximum number of CSs to be read. Can be 0, in
+ *                         which case streams_ptr is unused.
+ * @in.groups_ptr:         Pointer where to store all the group data (sequentially).
+ * @in.streams_ptr:        Pointer where to store all the CS data (sequentially).
+ * @out:                   Output parameters
+ * @out.glb_version:       Global interface version.
+ * @out.features:          Bit mask of features (e.g. whether certain types of job
+ *                         can be suspended).
+ * @out.group_num:         Number of CSGs supported.
+ * @out.prfcnt_size:       Size of CSF performance counters, in bytes. Bits 31:16
+ *                         hold the size of firmware performance counter data
+ *                         and 15:0 hold the size of hardware performance counter
+ *                         data.
+ * @out.total_stream_num:  Total number of CSs, summed across all groups.
+ * @out.instr_features:    Instrumentation features. Bits 7:4 hold the maximum
+ *                         size of events. Bits 3:0 hold the offset update rate.
+ *                         (csf >= 1.1.0)
+ *
+ */
+union kbase_ioctl_cs_get_glb_iface {
+	struct {
+		__u32 max_group_num;
+		__u32 max_total_stream_num;
+		__u64 groups_ptr;
+		__u64 streams_ptr;
+	} in;
+	struct {
+		__u32 glb_version;
+		__u32 features;
+		__u32 group_num;
+		__u32 prfcnt_size;
+		__u32 total_stream_num;
+		__u32 instr_features;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_GET_GLB_IFACE _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_ioctl_cs_get_glb_iface)
+
+struct kbase_ioctl_cs_cpu_queue_info {
+	__u64 buffer;
+	__u64 size;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
+
+#define KBASE_IOCTL_CS_CPU_QUEUE_DUMP \
+	_IOW(KBASE_IOCTL_TYPE, 53, struct kbase_ioctl_cs_cpu_queue_info)
+
+/**
+ * union kbase_ioctl_mem_alloc_ex - Allocate memory on the GPU
+ * @in: Input parameters
+ * @in.va_pages: The number of pages of virtual address space to reserve
+ * @in.commit_pages: The number of physical pages to allocate
+ * @in.extension: The number of extra pages to allocate on each GPU fault which grows the region
+ * @in.flags: Flags
+ * @in.fixed_address: The GPU virtual address requested for the allocation,
+ *                    if the allocation is using the BASE_MEM_FIXED flag.
+ * @in.extra: Space for extra parameters that may be added in the future.
+ * @out: Output parameters
+ * @out.flags: Flags
+ * @out.gpu_va: The GPU virtual address which is allocated
+ */
+union kbase_ioctl_mem_alloc_ex {
+	struct {
+		__u64 va_pages;
+		__u64 commit_pages;
+		__u64 extension;
+		__u64 flags;
+		__u64 fixed_address;
+		__u64 extra[3];
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALLOC_EX _IOWR(KBASE_IOCTL_TYPE, 59, union kbase_ioctl_mem_alloc_ex)
+
+/**
+ * union kbase_ioctl_read_user_page - Read a register of USER page
+ *
+ * @in:               Input parameters.
+ * @in.offset:        Register offset in USER page.
+ * @in.padding:       Padding to round up to a multiple of 8 bytes, must be zero.
+ * @out:              Output parameters.
+ * @out.val_lo:       Value of 32bit register or the 1st half of 64bit register to be read.
+ * @out.val_hi:       Value of the 2nd half of 64bit register to be read.
+ */
+union kbase_ioctl_read_user_page {
+	struct {
+		__u32 offset;
+		__u32 padding;
+	} in;
+	struct {
+		__u32 val_lo;
+		__u32 val_hi;
+	} out;
+};
+
+#define KBASE_IOCTL_READ_USER_PAGE _IOWR(KBASE_IOCTL_TYPE, 60, union kbase_ioctl_read_user_page)
+
+/***************
+ * test ioctls *
+ ***************/
+#if MALI_UNIT_TEST
+/* These ioctls are purely for test purposes and are not used in the production
+ * driver, they therefore may change without notice
+ */
+
+/**
+ * struct kbase_ioctl_cs_event_memory_write - Write an event memory address
+ * @cpu_addr: Memory address to write
+ * @value: Value to write
+ * @padding: Currently unused, must be zero
+ */
+struct kbase_ioctl_cs_event_memory_write {
+	__u64 cpu_addr;
+	__u8 value;
+	__u8 padding[7];
+};
+
+/**
+ * union kbase_ioctl_cs_event_memory_read - Read an event memory address
+ * @in: Input parameters
+ * @in.cpu_addr: Memory address to read
+ * @out: Output parameters
+ * @out.value: Value read
+ * @out.padding: Currently unused, must be zero
+ */
+union kbase_ioctl_cs_event_memory_read {
+	struct {
+		__u64 cpu_addr;
+	} in;
+	struct {
+		__u8 value;
+		__u8 padding[7];
+	} out;
+};
+
+#endif /* MALI_UNIT_TEST */
+
+#endif /* _UAPI_KBASE_CSF_IOCTL_H_ */
--- a/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_
+#define _UAPI_KBASE_GPU_REGMAP_CSF_H_
+
+/* USER base address */
+#define USER_BASE 0x0010000
+#define USER_REG(r) (USER_BASE + (r))
+
+/* USER register offsets */
+#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
+
+/* DOORBELLS base address */
+#define DOORBELLS_BASE 0x0080000
+#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r))
+
+#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */
--- a/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_REGMAP_JM_H_
+#define _UAPI_KBASE_GPU_REGMAP_JM_H_
+
+#endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */
--- a/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h
+++ b/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_COHERENCY_H_
+#define _UAPI_KBASE_GPU_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0U
+#define COHERENCY_ACE 1U
+#define COHERENCY_NONE 31U
+#define COHERENCY_FEATURE_BIT(x) (1U << (x))
+
+#endif /* _UAPI_KBASE_GPU_COHERENCY_H_ */
--- a/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h
+++ b/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_ID_H_
+#define _UAPI_KBASE_GPU_ID_H_
+
+#if defined(__linux)
+#include <linux/types.h>
+#endif
+
+#define GPU_ID2_VERSION_STATUS_SHIFT 0
+#define GPU_ID2_VERSION_MINOR_SHIFT 4
+#define GPU_ID2_VERSION_MAJOR_SHIFT 12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16
+#define GPU_ID2_ARCH_REV_SHIFT 20
+#define GPU_ID2_ARCH_MINOR_SHIFT 24
+#define GPU_ID2_ARCH_MAJOR_SHIFT 28
+#define GPU_ID2_VERSION_STATUS (0xFu << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV (0xFu << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR (0xFu << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+#define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | GPU_ID2_VERSION_MINOR | GPU_ID2_VERSION_STATUS)
+
+#define GPU_ID2_ARCH_REV_GET(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_ARCH_REV) >> GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR_GET(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_ARCH_MINOR) >> GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR_GET(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_ARCH_MAJOR) >> GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_VERSION_MINOR_GET(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_VERSION_MINOR) >> GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR_GET(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR_GET(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_PRODUCT_MAJOR) >> GPU_ID2_PRODUCT_MAJOR_SHIFT)
+/* Helper macro to construct a value consisting of arch major and revision
+ * using the value of gpu_id.
+ */
+#define GPU_ID2_ARCH_MAJOR_REV_REG(gpu_id) \
+	((((__u32)gpu_id) & GPU_ID2_ARCH_MAJOR) | (((__u32)gpu_id) & GPU_ID2_ARCH_REV))
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+ * a arch major and revision.
+ */
+#define GPU_ID2_ARCH_MAJOR_REV_MAKE(arch_major, arch_rev)    \
+	((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \
+	 (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+ * a product ignoring its version.
+ */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+	((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) |                  \
+	 (((__u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) |                  \
+	 (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) |                      \
+	 (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+ * revision (major, minor, status) of a product
+ */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+	((((__u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) |         \
+	 (((__u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) |         \
+	 (((__u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, version_major, \
+		     version_minor, version_status)                                  \
+	(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) |     \
+	 GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+ * a particular GPU model by its arch_major and product_major.
+ */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major)        \
+	((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \
+	 (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+ * for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+ * model.
+ */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+	((((__u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1)
+#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7, 0)
+#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7, 3)
+#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1)
+#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2)
+#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0)
+#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1)
+#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2)
+#define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4)
+#define GPU_ID2_PRODUCT_TBAX GPU_ID2_MODEL_MAKE(9, 5)
+#define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2)
+#define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3)
+#define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4)
+#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7)
+#define GPU_ID2_PRODUCT_TTUX GPU_ID2_MODEL_MAKE(11, 2)
+#define GPU_ID2_PRODUCT_LTUX GPU_ID2_MODEL_MAKE(11, 3)
+#define GPU_ID2_PRODUCT_TTIX GPU_ID2_MODEL_MAKE(12, 0)
+#define GPU_ID2_PRODUCT_LTIX GPU_ID2_MODEL_MAKE(12, 1)
+#define GPU_ID2_PRODUCT_TKRX GPU_ID2_MODEL_MAKE(13, 0)
+#define GPU_ID2_PRODUCT_LKRX GPU_ID2_MODEL_MAKE(13, 1)
+
+
+
+#define GPU_ID_U8_COMP(val3, val2, val1, val0) \
+	((((__u32)val3) << 24U) | (((__u32)val2) << 16U) | (((__u32)val1) << 8U) | ((__u32)val0))
+#define GPU_ID_U8_COMP_SHIFT(comp, idx) (((__u32)comp) >> (idx * 8U))
+#define GPU_ID_U8_COMP_GET(comp, idx) (GPU_ID_U8_COMP_SHIFT(comp, idx) & 0xFF)
+
+#define GPU_ID_PRODUCT_ID_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+	GPU_ID_U8_COMP(arch_major, arch_minor, arch_rev, product_major)
+#define GPU_ID_MODEL_MAKE(arch_major, product_major) GPU_ID_U8_COMP(arch_major, 0, 0, product_major)
+#define GPU_ID_VERSION_MAKE(version_major, version_minor, version_status) \
+	GPU_ID_U8_COMP(0, version_major, version_minor, version_status)
+#define GPU_ID_ARCH_MAKE(arch_major, arch_minor, arch_rev) \
+	GPU_ID_U8_COMP(0, arch_major, arch_minor, arch_rev)
+
+/* Convert ID created from GPU_ID_PRODUCT_ID_MAKE() to match the format of
+ * GPU_ID_MODEL_MAKE()
+ */
+#define GPU_ID_MODEL_MATCH_VALUE(product_id) (((__u32)product_id) & GPU_ID_MODEL_MAKE(0xFF, 0xFF))
+
+#define GPU_ID_VERSION_ID_MAJOR_MINOR_GET(version_id) GPU_ID_U8_COMP_SHIFT(version_id, 1)
+#define GPU_ID_VERSION_ID_STATUS_GET(version_id) GPU_ID_U8_COMP_GET(version_id, 0)
+#define GPU_ID_VERSION_ID_MINOR_GET(version_id) GPU_ID_U8_COMP_GET(version_id, 1)
+#define GPU_ID_VERSION_ID_MAJOR_GET(version_id) GPU_ID_U8_COMP_GET(version_id, 2)
+
+#define GPU_ID_PRODUCT_TMIX GPU_ID_MODEL_MAKE(6, 0)
+#define GPU_ID_PRODUCT_THEX GPU_ID_MODEL_MAKE(6, 1)
+#define GPU_ID_PRODUCT_TSIX GPU_ID_MODEL_MAKE(7, 0)
+#define GPU_ID_PRODUCT_TDVX GPU_ID_MODEL_MAKE(7, 3)
+#define GPU_ID_PRODUCT_TNOX GPU_ID_MODEL_MAKE(7, 1)
+#define GPU_ID_PRODUCT_TGOX GPU_ID_MODEL_MAKE(7, 2)
+#define GPU_ID_PRODUCT_TTRX GPU_ID_MODEL_MAKE(9, 0)
+#define GPU_ID_PRODUCT_TNAX GPU_ID_MODEL_MAKE(9, 1)
+#define GPU_ID_PRODUCT_TBEX GPU_ID_MODEL_MAKE(9, 2)
+#define GPU_ID_PRODUCT_LBEX GPU_ID_MODEL_MAKE(9, 4)
+#define GPU_ID_PRODUCT_TBAX GPU_ID_MODEL_MAKE(9, 5)
+#define GPU_ID_PRODUCT_TODX GPU_ID_MODEL_MAKE(10, 2)
+#define GPU_ID_PRODUCT_TGRX GPU_ID_MODEL_MAKE(10, 3)
+#define GPU_ID_PRODUCT_TVAX GPU_ID_MODEL_MAKE(10, 4)
+#define GPU_ID_PRODUCT_LODX GPU_ID_MODEL_MAKE(10, 7)
+#define GPU_ID_PRODUCT_TTUX GPU_ID_MODEL_MAKE(11, 2)
+#define GPU_ID_PRODUCT_LTUX GPU_ID_MODEL_MAKE(11, 3)
+#define GPU_ID_PRODUCT_TTIX GPU_ID_MODEL_MAKE(12, 0)
+#define GPU_ID_PRODUCT_LTIX GPU_ID_MODEL_MAKE(12, 1)
+#define GPU_ID_PRODUCT_TKRX GPU_ID_MODEL_MAKE(13, 0)
+#define GPU_ID_PRODUCT_LKRX GPU_ID_MODEL_MAKE(13, 1)
+
+#endif /* _UAPI_KBASE_GPU_ID_H_ */
--- a/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
+++ b/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_REGMAP_H_
+#define _UAPI_KBASE_GPU_REGMAP_H_
+
+#if MALI_USE_CSF
+#include "backend/mali_kbase_gpu_regmap_csf.h"
+#else
+#include "backend/mali_kbase_gpu_regmap_jm.h"
+#endif /* !MALI_USE_CSF */
+
+#endif /* _UAPI_KBASE_GPU_REGMAP_H_ */
--- a/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
+++ b/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
--- a/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
+++ b/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_JM_IOCTL_H_
+#define _UAPI_KBASE_JM_IOCTL_H_
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * 11.1:
+ * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags
+ * 11.2:
+ * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED,
+ *   which some user-side clients prior to 11.2 might fault if they received
+ *   them
+ * 11.3:
+ * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and
+ *   KBASE_IOCTL_STICKY_RESOURCE_UNMAP
+ * 11.4:
+ * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET
+ * 11.5:
+ * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD)
+ * 11.6:
+ * - Added flags field to base_jit_alloc_info structure, which can be used to
+ *   specify pseudo chunked tiler alignment for JIT allocations.
+ * 11.7:
+ * - Removed UMP support
+ * 11.8:
+ * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags
+ * 11.9:
+ * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY
+ *   under base_mem_alloc_flags
+ * 11.10:
+ * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for
+ *   JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations
+ *   with one softjob.
+ * 11.11:
+ * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
+ * 11.12:
+ * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS
+ * 11.13:
+ * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT
+ * 11.14:
+ * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set
+ *   under base_mem_alloc_flags
+ * 11.15:
+ * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags.
+ * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be
+ *   passed to mmap().
+ * 11.16:
+ * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf.
+ * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for
+ *   dma-buf. Now, buffers are mapped on GPU when first imported, no longer
+ *   requiring external resource or sticky resource tracking. UNLESS,
+ *   CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled.
+ * 11.17:
+ * - Added BASE_JD_REQ_JOB_SLOT.
+ * - Reused padding field in base_jd_atom_v2 to pass job slot number.
+ * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO
+ * 11.18:
+ * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags
+ * 11.19:
+ * - Extended base_jd_atom_v2 to allow a renderpass ID to be specified.
+ * 11.20:
+ * - Added new phys_pages member to kbase_ioctl_mem_jit_init for
+ *   KBASE_IOCTL_MEM_JIT_INIT, previous variants of this renamed to use _10_2
+ *   (replacing '_OLD') and _11_5 suffixes
+ * - Replaced compat_core_req (deprecated in 10.3) with jit_id[2] in
+ *   base_jd_atom_v2. It must currently be initialized to zero.
+ * - Added heap_info_gpu_addr to base_jit_alloc_info, and
+ *   BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE allowable in base_jit_alloc_info's
+ *   flags member. Previous variants of this structure are kept and given _10_2
+ *   and _11_5 suffixes.
+ * - The above changes are checked for safe values in usual builds
+ * 11.21:
+ * - v2.0 of mali_trace debugfs file, which now versions the file separately
+ * 11.22:
+ * - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2.
+ *   KBASE_IOCTL_JOB_SUBMIT supports both in parallel.
+ * 11.23:
+ * - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify
+ *   the physical memory backing of JIT allocations. This was not supposed
+ *   to be a valid use case, but it was allowed by the previous implementation.
+ * 11.24:
+ * - Added a sysfs file 'serialize_jobs' inside a new sub-directory
+ *   'scheduling'.
+ * 11.25:
+ * - Enabled JIT pressure limit in base/kbase by default
+ * 11.26
+ * - Added kinstr_jm API
+ * 11.27
+ * - Backwards compatible extension to HWC ioctl.
+ * 11.28:
+ * - Added kernel side cache ops needed hint
+ * 11.29:
+ * - Reserve ioctl 52
+ * 11.30:
+ * - Add a new priority level BASE_JD_PRIO_REALTIME
+ * - Add ioctl 54: This controls the priority setting.
+ * 11.31:
+ * - Added BASE_JD_REQ_LIMITED_CORE_MASK.
+ * - Added ioctl 55: set_limited_core_count.
+ * 11.32:
+ * - Added new HW performance counters interface to all GPUs.
+ * 11.33:
+ * - Removed Kernel legacy HWC interface
+ * 11.34:
+ * - First release of new HW performance counters interface.
+ * 11.35:
+ * - Dummy model (no mali) backend will now clear HWC values after each sample
+ * 11.36:
+ * - Remove legacy definitions:
+ *   - base_jit_alloc_info_10_2
+ *   - base_jit_alloc_info_11_5
+ *   - kbase_ioctl_mem_jit_init_10_2
+ *   - kbase_ioctl_mem_jit_init_11_5
+ * 11.37:
+ * - Fix kinstr_prfcnt issues:
+ *   - Missing implicit sample for CMD_STOP when HWCNT buffer is full.
+ *   - Race condition when stopping periodic sampling.
+ *   - prfcnt_block_metadata::block_idx gaps.
+ *   - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed.
+ * 11.38:
+ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
+ *   before allocating GPU memory for the context.
+ * - CPU mappings of USER_BUFFER imported memory handles must be cached.
+ * 11.39:
+ * - Restrict child process from doing supported file operations (like mmap, ioctl,
+ *   read, poll) on the file descriptor of mali device file that was inherited
+ *   from the parent process.
+ * 11.40:
+ * - Remove KBASE_IOCTL_HWCNT_READER_SETUP and KBASE_HWCNT_READER_* ioctls.
+ * 11.41:
+ * - Disallows changing the sharability on the GPU of imported dma-bufs to
+ *   BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE.
+ * 11.42:
+ * - Implement full block state support for hardware counters.
+ */
+
+#define BASE_UK_VERSION_MAJOR 11
+#define BASE_UK_VERSION_MINOR 42
+
+/**
+ * struct kbase_ioctl_version_check - Check version compatibility between
+ * kernel and userspace
+ *
+ * @major: Major version number
+ * @minor: Minor version number
+ */
+struct kbase_ioctl_version_check {
+	__u16 major;
+	__u16 minor;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
+
+/**
+ * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
+ *
+ * @addr: Memory address of an array of struct base_jd_atom_v2 or v3
+ * @nr_atoms: Number of entries in the array
+ * @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom)
+ */
+struct kbase_ioctl_job_submit {
+	__u64 addr;
+	__u32 nr_atoms;
+	__u32 stride;
+};
+
+#define KBASE_IOCTL_JOB_SUBMIT _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit)
+
+#define KBASE_IOCTL_POST_TERM _IO(KBASE_IOCTL_TYPE, 4)
+
+/**
+ * struct kbase_ioctl_soft_event_update - Update the status of a soft-event
+ * @event: GPU address of the event which has been updated
+ * @new_status: The new status to set
+ * @flags: Flags for future expansion
+ */
+struct kbase_ioctl_soft_event_update {
+	__u64 event;
+	__u32 new_status;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
+	_IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
+
+/**
+ * struct kbase_kinstr_jm_fd_out - Explains the compatibility information for
+ * the `struct kbase_kinstr_jm_atom_state_change` structure returned from the
+ * kernel
+ *
+ * @size:    The size of the `struct kbase_kinstr_jm_atom_state_change`
+ * @version: Represents a breaking change in the
+ *           `struct kbase_kinstr_jm_atom_state_change`
+ * @padding: Explicit padding to get the structure up to 64bits. See
+ * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
+ *
+ * The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the
+ * end of the structure that older user space might not understand. If the
+ * `version` is the same, the structure is still compatible with newer kernels.
+ * The `size` can be used to cast the opaque memory returned from the kernel.
+ */
+struct kbase_kinstr_jm_fd_out {
+	__u16 size;
+	__u8 version;
+	__u8 padding[5];
+};
+
+/**
+ * struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor
+ *
+ * @count: Number of atom states that can be stored in the kernel circular
+ *         buffer. Must be a power of two
+ * @padding: Explicit padding to get the structure up to 64bits. See
+ * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
+ */
+struct kbase_kinstr_jm_fd_in {
+	__u16 count;
+	__u8 padding[6];
+};
+
+union kbase_kinstr_jm_fd {
+	struct kbase_kinstr_jm_fd_in in;
+	struct kbase_kinstr_jm_fd_out out;
+};
+
+#define KBASE_IOCTL_KINSTR_JM_FD _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd)
+
+#define KBASE_IOCTL_VERSION_CHECK_RESERVED \
+	_IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
+
+#endif /* _UAPI_KBASE_JM_IOCTL_H_ */
--- a/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h
+++ b/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_BASE_COMMON_KERNEL_H_
+#define _UAPI_BASE_COMMON_KERNEL_H_
+
+#include <linux/types.h>
+
+struct base_mem_handle {
+	struct {
+		__u64 handle;
+	} basep;
+};
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+/* Memory allocation, access/hint flags & mask.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* IN */
+/* Read access CPU side
+ */
+#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
+
+/* Write access CPU side
+ */
+#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
+
+/* Read access GPU side
+ */
+#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
+
+/* Write access GPU side
+ */
+#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
+
+/* Execute allowed on the GPU side
+ */
+#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
+
+/* Will be permanently mapped in kernel space.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
+
+/* The allocation will completely reside within the same 4GB chunk in the GPU
+ * virtual space.
+ * Since this flag is primarily required only for the TLS memory which will
+ * not be used to contain executable code and also not used for Tiler heap,
+ * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
+ */
+#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
+
+/* Userspace is not allowed to free this memory.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
+
+/* Grow backing store on GPU Page Fault
+ */
+#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
+
+/* Page coherence Outer shareable, if available
+ */
+#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
+
+/* Page coherence Inner shareable
+ */
+#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
+
+/* IN/OUT */
+/* Should be cached on the CPU, returned if actually cached
+ */
+#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
+
+/* IN/OUT */
+/* Must have same VA on both the GPU and the CPU
+ */
+#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
+
+/* OUT */
+/* Must call mmap to acquire a GPU address for the allocation
+ */
+#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
+
+/* IN */
+/* Page coherence Outer shareable, required.
+ */
+#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
+
+/* Protected memory
+ */
+#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
+
+/* Not needed physical memory
+ */
+#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
+
+/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
+ * addresses to be the same
+ */
+#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
+
+/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu
+ * mode. Some components within the GPU might only be able to access memory
+ * that is GPU cacheable. Refer to the specific GPU implementation for more
+ * details. The 3 shareability flags will be ignored for GPU uncached memory.
+ * If used while importing USER_BUFFER type memory, then the import will fail
+ * if the memory is not aligned to GPU and CPU cache line width.
+ */
+#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
+
+/*
+ * Bits [22:25] for group_id (0~15).
+ *
+ * base_mem_group_id_set() should be used to pack a memory group ID into a
+ * base_mem_alloc_flags value instead of accessing the bits directly.
+ * base_mem_group_id_get() should be used to extract the memory group ID from
+ * a base_mem_alloc_flags value.
+ */
+#define BASEP_MEM_GROUP_ID_SHIFT 22
+#define BASE_MEM_GROUP_ID_MASK ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
+
+/* Must do CPU cache maintenance when imported memory is mapped/unmapped
+ * on GPU. Currently applicable to dma-buf type only.
+ */
+#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
+
+/* OUT */
+/* Kernel side cache sync ops required */
+#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28)
+
+/* Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 30
+
+/* A mask for all output bits, excluding IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/* A mask for all input bits, including IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/* Special base mem handles.
+ */
+#define BASEP_MEM_INVALID_HANDLE (0ul)
+#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT)
+/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE)
+
+/* Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+typedef __u32 base_context_create_flags;
+
+/* Flags for base context */
+
+/* No flags set */
+#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
+
+/* Base context is embedded in a cctx object (flag used for CINSTR
+ * software counter macros)
+ */
+#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
+
+/* Base context is a 'System Monitor' context for Hardware counters.
+ *
+ * One important side effect of this is that job submission is disabled.
+ */
+#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED ((base_context_create_flags)1 << 1)
+
+/* Bit-shift used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
+
+/* Bitmask used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
+	((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
+
+/* Bitpattern describing the base_context_create_flags that can be
+ * passed to the kernel
+ */
+#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
+	(BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | BASEP_CONTEXT_MMU_GROUP_ID_MASK)
+
+/* Flags for base tracepoint
+ */
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST)
+ */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1U << 0)
+
+/* Indicate that job dumping is enabled. This could affect certain timers
+ * to account for the performance impact.
+ */
+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1U << 1)
+
+#endif /* _UAPI_BASE_COMMON_KERNEL_H_ */
--- a/include/uapi/gpu/arm/midgard/mali_base_kernel.h
+++ b/include/uapi/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,638 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _UAPI_BASE_KERNEL_H_
+#define _UAPI_BASE_KERNEL_H_
+
+#include <linux/types.h>
+#include "mali_gpu_props.h"
+#include "mali_base_mem_priv.h"
+#include "gpu/mali_kbase_gpu_id.h"
+#include "gpu/mali_kbase_gpu_coherency.h"
+
+#ifdef __KERNEL__
+#include <linux/mm.h>
+
+#if defined(PAGE_MASK) && defined(PAGE_SHIFT)
+#define LOCAL_PAGE_SHIFT PAGE_SHIFT
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#error "Missing kernel definitions: PAGE_MASK, PAGE_SHIFT"
+#endif
+
+#else
+
+#if defined(MALI_PAGE_SIZE_AGNOSTIC)
+#define LOCAL_PAGE_SHIFT (__builtin_ctz((unsigned int)sysconf(_SC_PAGESIZE)))
+#else
+#define LOCAL_PAGE_SHIFT 12
+#endif
+
+#define LOCAL_PAGE_LSB ((1ul << LOCAL_PAGE_SHIFT) - 1)
+
+#endif
+
+/* Physical memory group ID for normal usage.
+ */
+#define BASE_MEM_GROUP_DEFAULT (0)
+
+/* Number of physical memory groups.
+ */
+#define BASE_MEM_GROUP_COUNT (16)
+
+/**
+ * typedef base_mem_alloc_flags - Memory allocation, access/hint flags.
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD),
+ * which defines a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef __u32 base_mem_alloc_flags;
+
+#define BASE_MEM_FLAGS_MODIFIABLE_NATIVE (BASE_MEM_DONT_NEED)
+
+#define BASE_MEM_FLAGS_MODIFIABLE_IMPORTED_UMM (BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)
+
+/* A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_FLAGS_MODIFIABLE_NATIVE | BASE_MEM_FLAGS_MODIFIABLE_IMPORTED_UMM)
+
+/* A mask of all the flags that can be returned via the base_mem_get_flags()
+ * interface.
+ */
+#define BASE_MEM_FLAGS_QUERYABLE                                                           \
+	(BASE_MEM_FLAGS_INPUT_MASK &                                                       \
+	 ~(BASE_MEM_SAME_VA | BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED | \
+	   BASE_MEM_FLAGS_RESERVED | BASEP_MEM_FLAGS_KERNEL_ONLY))
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	/*
+	 * Import type with value 1 is deprecated.
+	 */
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+};
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	address of imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	__u64 ptr;
+	__u64 length;
+};
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB 0xfffff000UL
+/* Mask to detect 4GB boundary (in page units) alignment */
+#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
+
+/* Limit on the 'extension' parameter for an allocation with the
+ * BASE_MEM_TILER_ALIGN_TOP flag set
+ *
+ * This is the same as the maximum limit for a Buffer Descriptor's chunk size
+ */
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2 (21u - (LOCAL_PAGE_SHIFT))
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES \
+	(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2))
+
+/* Bit mask of cookies used for memory allocation setup */
+#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */
+
+/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */
+#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
+
+/*
+ * struct base_fence - Cross-device synchronisation fence.
+ *
+ * A fence is used to signal when the GPU has finished accessing a resource that
+ * may be shared with other devices, and also to delay work done asynchronously
+ * by the GPU until other devices have finished accessing a shared resource.
+ */
+struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+};
+
+/**
+ * struct base_mem_aliasing_info - Memory aliasing info
+ *
+ * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset: Offset within the handle to start aliasing from, in pages.
+ *          Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *          specifies the number of times the special page is needed.
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ */
+struct base_mem_aliasing_info {
+	struct base_mem_handle handle;
+	__u64 offset;
+	__u64 length;
+};
+
+/* Maximum percentage of just-in-time memory allocation trimming to perform
+ * on free.
+ */
+#define BASE_JIT_MAX_TRIM_LEVEL (100)
+
+/* Maximum number of concurrent just-in-time memory allocations.
+ */
+#define BASE_JIT_ALLOC_COUNT (255)
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extension:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ * @bin_id:                     The JIT allocation bin, used in conjunction with
+ *                              @max_allocations to limit the number of each
+ *                              type of JIT allocation.
+ * @max_allocations:            The maximum number of allocations allowed within
+ *                              the bin specified by @bin_id. Should be the same
+ *                              for all allocations within the same bin.
+ * @flags:                      flags specifying the special requirements for
+ *                              the JIT allocation, see
+ *                              %BASE_JIT_ALLOC_VALID_FLAGS
+ * @padding:                    Expansion space - should be initialised to zero
+ * @usage_id:                   A hint about which allocation should be reused.
+ *                              The kernel should attempt to use a previous
+ *                              allocation with the same usage_id
+ * @heap_info_gpu_addr:         Pointer to an object in GPU memory describing
+ *                              the actual usage of the region.
+ *
+ * Kbase version history:
+ * 11.20: added @heap_info_gpu_addr
+ */
+struct base_jit_alloc_info {
+	__u64 gpu_alloc_addr;
+	__u64 va_pages;
+	__u64 commit_pages;
+	__u64 extension;
+	__u8 id;
+	__u8 bin_id;
+	__u8 max_allocations;
+	__u8 flags;
+	__u8 padding[2];
+	__u16 usage_id;
+	__u64 heap_info_gpu_addr;
+};
+
+enum base_external_resource_access { BASE_EXT_RES_ACCESS_SHARED, BASE_EXT_RES_ACCESS_EXCLUSIVE };
+
+struct base_external_resource {
+	__u64 ext_resource;
+};
+
+/**
+ * BASE_EXT_RES_COUNT_MAX - The maximum number of external resources
+ * which can be mapped/unmapped in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	__u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	__u64 address;
+	__u64 size;
+	struct base_external_resource extres;
+};
+
+/**
+ * DOC: User-side Base GPU Property Queries
+ *
+ * The User-side Base GPU Property Query interface encapsulates two
+ * sub-modules:
+ *
+ * - "Dynamic GPU Properties"
+ * - "Base Platform Config GPU Properties"
+ *
+ * Base only deals with properties that vary between different GPU
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the GPU Architecture, refer to the
+ * GPU module. However, we will discuss their relevance here just to
+ * provide background information.
+ *
+ * About the GPU Properties in Base and GPU modules
+ *
+ * The compile-time properties (Platform Config, GPU Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the GPU Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistent guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * 1. Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * 2. If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * 3. If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or GPU Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * 1. the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props
+ * (also a member of base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * The raw properties in gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it does not need to be processed
+ * by the driver. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the GPU Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the GPU
+ * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function.
+ *
+ * The GPU properties are obtained by a call to
+ * base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * Coherency Group calculation
+ *
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algorithm can be determined either by a __u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * required for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/*
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	__u8 log2_line_size;
+	__u8 log2_cache_size;
+	__u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	__u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	__u32 bin_size_bytes; /* Max is 4*2^15 */
+	__u32 max_active_levels; /* Max is 2^15 */
+};
+
+/**
+ * struct mali_base_gpu_thread_props - GPU threading system details.
+ * @max_threads: Max. number of threads per core
+ * @max_workgroup_size:     Max. number of threads per workgroup
+ * @max_barrier_size:       Max. number of threads that can synchronize on a
+ *                          simple barrier
+ * @max_registers:          Total size [1..65535] of the register file available
+ *                          per core.
+ * @max_task_queue:         Max. tasks [1..255] which may be sent to a core
+ *                          before it becomes blocked.
+ * @max_thread_group_split: Max. allowed value [1..15] of the Thread Group Split
+ *                          field.
+ * @impl_tech:              0 = Not specified, 1 = Silicon, 2 = FPGA,
+ *                          3 = SW Model/Emulation
+ * @padding:                padding to align to 8-byte
+ * @tls_alloc:              Number of threads per core that TLS must be
+ *                          allocated for
+ */
+struct mali_base_gpu_thread_props {
+	__u32 max_threads;
+	__u32 max_workgroup_size;
+	__u32 max_barrier_size;
+	__u32 max_registers;
+	__u8 max_task_queue;
+	__u8 max_thread_group_split;
+	__u8 impl_tech;
+	__u8 padding;
+	__u32 tls_alloc;
+};
+
+/**
+ * struct mali_base_gpu_coherent_group - descriptor for a coherent group
+ * @core_mask: Core restriction mask required for the group
+ * @num_cores: Number of cores in the group
+ * @padding:   padding to align to 8-byte
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ *       the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of
+ *       wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	__u64 core_mask;
+	__u16 num_cores;
+	__u16 padding[3];
+};
+
+/**
+ * struct mali_base_gpu_coherent_group_info - Coherency group information
+ * @num_groups: Number of coherent groups in the GPU.
+ * @num_core_groups: Number of core groups (coherent or not) in the GPU.
+ *                   Equivalent to the number of L2 Caches.
+ *                   The GPU Counter dumping writes 2048 bytes per core group,
+ *                   regardless of whether the core groups are coherent or not.
+ *                   Hence this member is needed to calculate how much memory
+ *                   is required for dumping.
+ *                   @note Do not use it to work out how many valid elements
+ *                         are in the group[] member. Use num_groups instead.
+ * @coherency: Coherency features of the memory, accessed by gpu_mem_features
+ *             methods
+ * @padding: padding to align to 8-byte
+ * @group: Descriptors of coherent groups
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the __u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	__u32 num_groups;
+	__u32 num_core_groups;
+	__u32 coherency;
+	__u32 padding;
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+#if MALI_USE_CSF
+#include "csf/mali_base_csf_kernel.h"
+#else
+#include "jm/mali_base_jm_kernel.h"
+#endif
+
+/**
+ * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware
+ *                            Configuration Discovery registers.
+ * @shader_present: Shader core present bitmap
+ * @tiler_present: Tiler core present bitmap
+ * @l2_present: Level 2 cache present bitmap
+ * @stack_present: Core stack present bitmap
+ * @l2_features: L2 features
+ * @core_features: Core features
+ * @mem_features: Mem features
+ * @mmu_features: Mmu features
+ * @as_present: Bitmap of address spaces present
+ * @js_present: Job slots present
+ * @js_features: Array of job slot features.
+ * @tiler_features: Tiler features
+ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU
+ * @gpu_id: GPU and revision identifier
+ * @thread_max_threads: Maximum number of threads per core
+ * @thread_max_workgroup_size: Maximum number of threads per workgroup
+ * @thread_max_barrier_size: Maximum number of threads per barrier
+ * @thread_features: Thread features
+ * @coherency_mode: Note: This is the _selected_ coherency mode rather than the
+ *                  available modes as exposed in the coherency_features register
+ * @thread_tls_alloc: Number of threads per core that TLS must be allocated for
+ * @gpu_features: GPU features
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * The raw properties in gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it does not need to be processed
+ * by the driver. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	__u64 shader_present;
+	__u64 tiler_present;
+	__u64 l2_present;
+	__u64 stack_present;
+	__u32 l2_features;
+	__u32 core_features;
+	__u32 mem_features;
+	__u32 mmu_features;
+
+	__u32 as_present;
+
+	__u32 js_present;
+	__u32 js_features[GPU_MAX_JOB_SLOTS];
+	__u32 tiler_features;
+	__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	__u32 gpu_id;
+
+	__u32 thread_max_threads;
+	__u32 thread_max_workgroup_size;
+	__u32 thread_max_barrier_size;
+	__u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	__u32 coherency_mode;
+
+	__u32 thread_tls_alloc;
+	__u64 gpu_features;
+};
+
+/**
+ * struct base_gpu_props - Return structure for base_get_gpu_props().
+ * @core_props:     Core props.
+ * @l2_props:       L2 props.
+ * @unused_1:       Keep for backwards compatibility.
+ * @tiler_props:    Tiler props.
+ * @thread_props:   Thread props.
+ * @raw_props:      This member is large, likely to be 128 bytes.
+ * @coherency_info: This must be last member of the structure.
+ *
+ * NOTE: the raw_props member in this data structure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ */
+struct base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	__u64 unused_1;
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+	struct gpu_raw_gpu_props raw_props;
+	struct mali_base_gpu_coherent_group_info coherency_info;
+};
+
+#define BASE_MEM_GROUP_ID_GET(flags) ((flags & BASE_MEM_GROUP_ID_MASK) >> BASEP_MEM_GROUP_ID_SHIFT)
+
+#define BASE_MEM_GROUP_ID_SET(id)                                                                  \
+	(((base_mem_alloc_flags)((id < 0 || id >= BASE_MEM_GROUP_COUNT) ? BASE_MEM_GROUP_DEFAULT : \
+										id)                      \
+	  << BASEP_MEM_GROUP_ID_SHIFT) &                                                           \
+	 BASE_MEM_GROUP_ID_MASK)
+
+#define BASE_CONTEXT_MMU_GROUP_ID_SET(group_id) \
+	(BASEP_CONTEXT_MMU_GROUP_ID_MASK &      \
+	 ((base_context_create_flags)(group_id) << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT))
+
+#define BASE_CONTEXT_MMU_GROUP_ID_GET(flags) \
+	((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
+
+/*
+ * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These
+ * flags are also used, where applicable, for specifying which fields
+ * are valid following the request operation.
+ */
+
+/* For monotonic (counter) timefield */
+#define BASE_TIMEINFO_MONOTONIC_FLAG (1U << 0)
+/* For system wide timestamp */
+#define BASE_TIMEINFO_TIMESTAMP_FLAG (1U << 1)
+/* For GPU cycle counter */
+#define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1U << 2)
+/* Specify kernel GPU register timestamp */
+#define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1U << 30)
+/* Specify userspace cntvct_el0 timestamp source */
+#define BASE_TIMEINFO_USER_SOURCE_FLAG (1U << 31)
+
+#define BASE_TIMEREQUEST_ALLOWED_FLAGS                                         \
+	(BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG |         \
+	 BASE_TIMEINFO_CYCLE_COUNTER_FLAG | BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \
+	 BASE_TIMEINFO_USER_SOURCE_FLAG)
+
+/* Maximum number of source allocations allowed to create an alias allocation.
+ * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array
+ * layers, since each cube map in the array will have 6 faces.
+ */
+#define BASE_MEM_ALIAS_MAX_ENTS ((size_t)24576)
+
+#endif /* _UAPI_BASE_KERNEL_H_ */
--- a/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h
+++ b/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_BASE_MEM_PRIV_H_
+#define _UAPI_BASE_MEM_PRIV_H_
+
+#include <linux/types.h>
+#include "mali_base_common_kernel.h"
+
+#define BASE_SYNCSET_OP_MSYNC (1U << 0)
+#define BASE_SYNCSET_OP_CSYNC (1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	struct base_mem_handle mem_handle;
+	__u64 user_addr;
+	__u64 size;
+	__u8 type;
+	__u8 padding[7];
+};
+
+#endif /* _UAPI_BASE_MEM_PRIV_H_ */
--- a/include/uapi/gpu/arm/midgard/mali_gpu_props.h
+++ b/include/uapi/gpu/arm/midgard/mali_gpu_props.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_MALI_GPUPROPS_H_
+#define _UAPI_MALI_GPUPROPS_H_
+
+#include <linux/types.h>
+#include "mali_base_common_kernel.h"
+
+#define BASE_MAX_COHERENT_GROUPS 16
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * struct gpu_props_user_data - structure for gpu props user buffer.
+ * @core_props:     Core props.
+ * @l2_props:       L2 props.
+ * @tiler_props:    Tiler props.
+ * @thread_props:   Thread props.
+ * @raw_props:      Raw register values kept for backwards compatibility. Kbase
+ *                  and base should never reference values within this struct.
+ * @coherency_info: Coherency information.
+ *
+ * This structure is used solely for the encoding and decoding of the prop_buffer
+ * returned by kbase.
+ */
+struct gpu_props_user_data {
+	struct {
+		__u32 product_id;
+		__u16 version_status;
+		__u16 minor_revision;
+		__u16 major_revision;
+		__u32 gpu_freq_khz_max;
+		__u32 log2_program_counter_size;
+		__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+		__u64 gpu_available_memory_size;
+		__u8 num_exec_engines;
+	} core_props;
+	struct {
+		__u8 log2_line_size;
+		__u8 log2_cache_size;
+		__u8 num_l2_slices;
+	} l2_props;
+	struct {
+		__u32 bin_size_bytes;
+		__u32 max_active_levels;
+	} tiler_props;
+	struct {
+		__u32 max_threads;
+		__u32 max_workgroup_size;
+		__u32 max_barrier_size;
+		__u32 max_registers;
+		__u8 max_task_queue;
+		__u8 max_thread_group_split;
+		__u8 impl_tech;
+		__u32 tls_alloc;
+	} thread_props;
+
+	/* kept for backward compatibility, should not be used in the future. */
+	struct {
+		__u64 shader_present;
+		__u64 tiler_present;
+		__u64 l2_present;
+		__u64 stack_present;
+		__u64 l2_features;
+		__u64 core_features;
+		__u64 mem_features;
+		__u64 mmu_features;
+		__u32 as_present;
+		__u32 js_present;
+		__u32 js_features[GPU_MAX_JOB_SLOTS];
+		__u64 tiler_features;
+		__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+		__u64 gpu_id;
+		__u32 thread_max_threads;
+		__u32 thread_max_workgroup_size;
+		__u32 thread_max_barrier_size;
+		__u32 thread_features;
+		__u32 coherency_mode;
+		__u32 thread_tls_alloc;
+		__u64 gpu_features;
+	} raw_props;
+	struct {
+		__u32 num_groups;
+		__u32 num_core_groups;
+		__u32 coherency;
+		struct {
+			__u64 core_mask;
+			__u32 num_cores;
+		} group[BASE_MAX_COHERENT_GROUPS];
+	} coherency_info;
+};
+
+#endif /* _UAPI_MALI_GPUPROPS_H_ */
--- a/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
+++ b/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,504 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_HWCNT_READER_H_
+#define _UAPI_KBASE_HWCNT_READER_H_
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, __u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, __u32)
+#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, __u32)
+#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, __u32)
+#define KBASE_HWCNT_READER_GET_BUFFER             \
+	_IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20, \
+	     offsetof(struct kbase_hwcnt_reader_metadata, cycles))
+#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES \
+	_IOR(KBASE_HWCNT_READER, 0x20, struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER              \
+	_IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21, \
+	     offsetof(struct kbase_hwcnt_reader_metadata, cycles))
+#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES \
+	_IOW(KBASE_HWCNT_READER, 0x21, struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, __u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, __u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, __u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, __u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \
+	_IOW(KBASE_HWCNT_READER, 0xFF, struct kbase_hwcnt_reader_api_version)
+
+/**
+ * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles
+ * @top:           the number of cycles associated with the main clock for the
+ *                 GPU
+ * @shader_cores:  the cycles that have elapsed on the GPU shader cores
+ */
+struct kbase_hwcnt_reader_metadata_cycles {
+	__u64 top;
+	__u64 shader_cores;
+};
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ * @cycles:     the GPU cycles that occurred since the last sample
+ */
+struct kbase_hwcnt_reader_metadata {
+	__u64 timestamp;
+	__u32 event_id;
+	__u32 buffer_idx;
+	struct kbase_hwcnt_reader_metadata_cycles cycles;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0)
+#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0)
+#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1)
+
+/**
+ * struct kbase_hwcnt_reader_api_version - hwcnt reader API version
+ * @version:  API version
+ * @features: available features in this API version
+ */
+struct kbase_hwcnt_reader_api_version {
+	__u32 version;
+	__u32 features;
+};
+
+/** Hardware counters reader API version */
+#define PRFCNT_READER_API_VERSION (0)
+
+/**
+ * enum prfcnt_list_type - Type of list item
+ * @PRFCNT_LIST_TYPE_ENUM:        Enumeration of performance counters.
+ * @PRFCNT_LIST_TYPE_REQUEST:     Request for configuration setup.
+ * @PRFCNT_LIST_TYPE_SAMPLE_META: Sample metadata.
+ */
+enum prfcnt_list_type {
+	PRFCNT_LIST_TYPE_ENUM,
+	PRFCNT_LIST_TYPE_REQUEST,
+	PRFCNT_LIST_TYPE_SAMPLE_META,
+};
+
+#define FLEX_LIST_TYPE(type, subtype) ((__u16)(((type & 0xf) << 12) | (subtype & 0xfff)))
+#define FLEX_LIST_TYPE_NONE FLEX_LIST_TYPE(0, 0)
+
+#define PRFCNT_ENUM_TYPE_BLOCK FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_ENUM, 0)
+#define PRFCNT_ENUM_TYPE_REQUEST FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_ENUM, 1)
+#define PRFCNT_ENUM_TYPE_SAMPLE_INFO FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_ENUM, 2)
+
+#define PRFCNT_REQUEST_TYPE_MODE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 0)
+#define PRFCNT_REQUEST_TYPE_ENABLE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 1)
+#define PRFCNT_REQUEST_TYPE_SCOPE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 2)
+
+#define PRFCNT_SAMPLE_META_TYPE_SAMPLE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 0)
+#define PRFCNT_SAMPLE_META_TYPE_CLOCK FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 1)
+#define PRFCNT_SAMPLE_META_TYPE_BLOCK FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 2)
+
+/**
+ * struct prfcnt_item_header - Header for an item of the list.
+ * @item_type:    Type of item.
+ * @item_version: Protocol version.
+ */
+struct prfcnt_item_header {
+	__u16 item_type;
+	__u16 item_version;
+};
+
+/**
+ * enum prfcnt_block_type - Type of performance counter block.
+ * @PRFCNT_BLOCK_TYPE_FE:          Front End.
+ * @PRFCNT_BLOCK_TYPE_TILER:       Tiler.
+ * @PRFCNT_BLOCK_TYPE_MEMORY:      Memory System.
+ * @PRFCNT_BLOCK_TYPE_SHADER_CORE: Shader Core.
+ * @PRFCNT_BLOCK_TYPE_FW:          Firmware.
+ * @PRFCNT_BLOCK_TYPE_CSG:         CSG.
+ * @PRFCNT_BLOCK_TYPE_RESERVED:    Reserved.
+ */
+enum prfcnt_block_type {
+	PRFCNT_BLOCK_TYPE_FE,
+	PRFCNT_BLOCK_TYPE_TILER,
+	PRFCNT_BLOCK_TYPE_MEMORY,
+	PRFCNT_BLOCK_TYPE_SHADER_CORE,
+	PRFCNT_BLOCK_TYPE_FW,
+	PRFCNT_BLOCK_TYPE_CSG,
+	PRFCNT_BLOCK_TYPE_RESERVED = 255,
+};
+
+/**
+ * enum prfcnt_set - Type of performance counter block set.
+ * @PRFCNT_SET_PRIMARY:   Primary.
+ * @PRFCNT_SET_SECONDARY: Secondary.
+ * @PRFCNT_SET_TERTIARY:  Tertiary.
+ * @PRFCNT_SET_RESERVED:  Reserved.
+ */
+enum prfcnt_set {
+	PRFCNT_SET_PRIMARY,
+	PRFCNT_SET_SECONDARY,
+	PRFCNT_SET_TERTIARY,
+	PRFCNT_SET_RESERVED = 255,
+};
+
+/**
+ * struct prfcnt_enum_block_counter - Performance counter block descriptor.
+ * @block_type:    Type of performance counter block.
+ * @set:           Which SET this represents: primary, secondary or tertiary.
+ * @pad:           Padding bytes.
+ * @num_instances: How many instances of this block type exist in the hardware.
+ * @num_values:    How many entries in the values array there are for samples
+ *                 from this block.
+ * @counter_mask:  Bitmask that indicates counter availability in this block.
+ *                 A '0' indicates that a counter is not available at that
+ *                 index and will always return zeroes if requested.
+ */
+struct prfcnt_enum_block_counter {
+	__u8 block_type;
+	__u8 set;
+	__u8 pad[2];
+	__u16 num_instances;
+	__u16 num_values;
+	__u64 counter_mask[2];
+};
+
+/**
+ * struct prfcnt_enum_request - Request descriptor.
+ * @request_item_type:       Type of request.
+ * @pad:                     Padding bytes.
+ * @versions_mask: Bitmask of versions that support this request.
+ */
+struct prfcnt_enum_request {
+	__u16 request_item_type;
+	__u16 pad;
+	__u32 versions_mask;
+};
+
+/**
+ * struct prfcnt_enum_sample_info - Sample information descriptor.
+ * @num_clock_domains:       Number of clock domains of the GPU.
+ * @pad:                     Padding bytes.
+ */
+struct prfcnt_enum_sample_info {
+	__u32 num_clock_domains;
+	__u32 pad;
+};
+
+/**
+ * struct prfcnt_enum_item - Performance counter enumeration item.
+ * @padding:         Padding bytes.
+ * @hdr:             Header describing the type of item in the list.
+ * @u:               Structure containing discriptor for enumeration item type.
+ * @u.block_counter: Performance counter block descriptor.
+ * @u.request:       Request descriptor.
+ * @u.sample_info:   Performance counter sample information descriptor.
+ */
+struct prfcnt_enum_item {
+	struct prfcnt_item_header hdr;
+	__u8 padding[4];
+	/** union u - union of block_counter and request */
+	union {
+		struct prfcnt_enum_block_counter block_counter;
+		struct prfcnt_enum_request request;
+		struct prfcnt_enum_sample_info sample_info;
+	} u;
+};
+
+/**
+ * enum prfcnt_mode - Capture mode for counter sampling.
+ * @PRFCNT_MODE_MANUAL:   Manual sampling mode.
+ * @PRFCNT_MODE_PERIODIC: Periodic sampling mode.
+ * @PRFCNT_MODE_RESERVED: Reserved.
+ */
+enum prfcnt_mode {
+	PRFCNT_MODE_MANUAL,
+	PRFCNT_MODE_PERIODIC,
+	PRFCNT_MODE_RESERVED = 255,
+};
+
+/**
+ * struct prfcnt_request_mode - Mode request descriptor.
+ * @mode:                           Capture mode for the session, either manual or periodic.
+ * @pad:                            Padding bytes.
+ * @mode_config:                    Structure containing configuration for periodic mode.
+ * @mode_config.periodic:           Periodic config.
+ * @mode_config.periodic.period_ns: Period in nanoseconds, for periodic mode.
+ */
+struct prfcnt_request_mode {
+	__u8 mode;
+	__u8 pad[7];
+	/** union mode_config - request mode configuration*/
+	union {
+		struct {
+			__u64 period_ns;
+		} periodic;
+	} mode_config;
+};
+
+/**
+ * struct prfcnt_request_enable - Enable request descriptor.
+ * @block_type:  Type of performance counter block.
+ * @set:         Which SET to use: primary, secondary or tertiary.
+ * @pad:         Padding bytes.
+ * @enable_mask: Bitmask that indicates which performance counters to enable.
+ *               Unavailable counters will be ignored.
+ */
+struct prfcnt_request_enable {
+	__u8 block_type;
+	__u8 set;
+	__u8 pad[6];
+	__u64 enable_mask[2];
+};
+
+/**
+ * enum prfcnt_scope - Scope of performance counters.
+ * @PRFCNT_SCOPE_GLOBAL:   Global scope.
+ * @PRFCNT_SCOPE_RESERVED: Reserved.
+ */
+enum prfcnt_scope {
+	PRFCNT_SCOPE_GLOBAL,
+	PRFCNT_SCOPE_RESERVED = 255,
+};
+
+/**
+ * struct prfcnt_request_scope - Scope request descriptor.
+ * @scope: Scope of the performance counters to capture.
+ * @pad:   Padding bytes.
+ */
+struct prfcnt_request_scope {
+	__u8 scope;
+	__u8 pad[7];
+};
+
+/**
+ * struct prfcnt_request_item - Performance counter request item.
+ * @padding:      Padding bytes.
+ * @hdr:          Header describing the type of item in the list.
+ * @u:            Structure containing descriptor for request type.
+ * @u.req_mode:   Mode request descriptor.
+ * @u.req_enable: Enable request descriptor.
+ * @u.req_scope:  Scope request descriptor.
+ */
+struct prfcnt_request_item {
+	struct prfcnt_item_header hdr;
+	__u8 padding[4];
+	/** union u - union on req_mode and req_enable */
+	union {
+		struct prfcnt_request_mode req_mode;
+		struct prfcnt_request_enable req_enable;
+		struct prfcnt_request_scope req_scope;
+	} u;
+};
+
+/**
+ * enum prfcnt_request_type - Type of request descriptor.
+ * @PRFCNT_REQUEST_MODE:   Specify the capture mode to be used for the session.
+ * @PRFCNT_REQUEST_ENABLE: Specify which performance counters to capture.
+ * @PRFCNT_REQUEST_SCOPE:  Specify the scope of the performance counters.
+ */
+enum prfcnt_request_type {
+	PRFCNT_REQUEST_MODE,
+	PRFCNT_REQUEST_ENABLE,
+	PRFCNT_REQUEST_SCOPE,
+};
+
+/* This sample contains overflows from dump duration stretch because the sample buffer was full */
+#define SAMPLE_FLAG_OVERFLOW (1u << 0)
+/* This sample has had an error condition for sample duration */
+#define SAMPLE_FLAG_ERROR (1u << 30)
+
+/**
+ * struct prfcnt_sample_metadata - Metadata for counter sample data.
+ * @timestamp_start: Earliest timestamp that values in this sample represent.
+ * @timestamp_end:   Latest timestamp that values in this sample represent.
+ * @seq:             Sequence number of this sample. Must match the value from
+ *                   GET_SAMPLE.
+ * @user_data:       User data provided to HWC_CMD_START or HWC_CMD_SAMPLE_*
+ * @flags:           Property flags.
+ * @pad:             Padding bytes.
+ */
+struct prfcnt_sample_metadata {
+	__u64 timestamp_start;
+	__u64 timestamp_end;
+	__u64 seq;
+	__u64 user_data;
+	__u32 flags;
+	__u32 pad;
+};
+
+/* Maximum number of domains a metadata for clock cycles can refer to */
+#define MAX_REPORTED_DOMAINS (4)
+
+/**
+ * struct prfcnt_clock_metadata - Metadata for clock cycles.
+ * @num_domains: Number of domains this metadata refers to.
+ * @pad:         Padding bytes.
+ * @cycles:      Number of cycles elapsed in each counter domain between
+ *               timestamp_start and timestamp_end. Valid only for the
+ *               first @p num_domains.
+ */
+struct prfcnt_clock_metadata {
+	__u32 num_domains;
+	__u32 pad;
+	__u64 cycles[MAX_REPORTED_DOMAINS];
+};
+
+/* This block state is unknown */
+#define BLOCK_STATE_UNKNOWN (0)
+/* This block was powered on for at least some portion of the sample */
+#define BLOCK_STATE_ON (1 << 0)
+/* This block was powered off for at least some portion of the sample */
+#define BLOCK_STATE_OFF (1 << 1)
+/* This block was available to this VM for at least some portion of the sample */
+#define BLOCK_STATE_AVAILABLE (1 << 2)
+/* This block was not available to this VM for at least some portion of the sample
+ *  Note that no data is collected when the block is not available to the VM.
+ */
+#define BLOCK_STATE_UNAVAILABLE (1 << 3)
+/* This block was operating in "normal" (non-protected) mode for at least some portion of the sample */
+#define BLOCK_STATE_NORMAL (1 << 4)
+/* This block was operating in "protected" mode for at least some portion of the sample.
+ * Note that no data is collected when the block is in protected mode.
+ */
+#define BLOCK_STATE_PROTECTED (1 << 5)
+
+/**
+ * struct prfcnt_block_metadata - Metadata for counter block.
+ * @block_type:    Type of performance counter block.
+ * @block_idx:     Index of performance counter block.
+ * @set:           Set of performance counter block.
+ * @pad_u8:        Padding bytes.
+ * @block_state:   Bits set indicate the states which the block is known
+ *                 to have operated in during this sample.
+ * @values_offset: Offset from the start of the mmapped region, to the values
+ *                 for this block. The values themselves are an array of __u64.
+ * @pad_u32:       Padding bytes.
+ */
+struct prfcnt_block_metadata {
+	__u8 block_type;
+	__u8 block_idx;
+	__u8 set;
+	__u8 pad_u8;
+	__u32 block_state;
+	__u32 values_offset;
+	__u32 pad_u32;
+};
+
+/**
+ * struct prfcnt_metadata - Performance counter metadata item.
+ * @padding:     Padding bytes.
+ * @hdr:         Header describing the type of item in the list.
+ * @u:           Structure containing descriptor for metadata type.
+ * @u.sample_md: Counter sample data metadata descriptor.
+ * @u.clock_md:  Clock cycles metadata descriptor.
+ * @u.block_md:  Counter block metadata descriptor.
+ */
+struct prfcnt_metadata {
+	struct prfcnt_item_header hdr;
+	__u8 padding[4];
+	union {
+		struct prfcnt_sample_metadata sample_md;
+		struct prfcnt_clock_metadata clock_md;
+		struct prfcnt_block_metadata block_md;
+	} u;
+};
+
+/**
+ * enum prfcnt_control_cmd_code - Control command code for client session.
+ * @PRFCNT_CONTROL_CMD_START:        Start the counter data dump run for
+ *                                   the calling client session.
+ * @PRFCNT_CONTROL_CMD_STOP:         Stop the counter data dump run for the
+ *                                   calling client session.
+ * @PRFCNT_CONTROL_CMD_SAMPLE_SYNC:  Trigger a synchronous manual sample.
+ * @PRFCNT_CONTROL_CMD_RESERVED:     Previously SAMPLE_ASYNC not supported any more.
+ * @PRFCNT_CONTROL_CMD_DISCARD:      Discard all samples which have not yet
+ *                                   been consumed by userspace. Note that
+ *                                   this can race with new samples if
+ *                                   HWC_CMD_STOP is not called first.
+ */
+enum prfcnt_control_cmd_code {
+	PRFCNT_CONTROL_CMD_START = 1,
+	PRFCNT_CONTROL_CMD_STOP,
+	PRFCNT_CONTROL_CMD_SAMPLE_SYNC,
+	PRFCNT_CONTROL_CMD_RESERVED,
+	PRFCNT_CONTROL_CMD_DISCARD,
+};
+
+/** struct prfcnt_control_cmd - Control command
+ * @cmd:       Control command for the session.
+ * @pad:       Padding bytes.
+ * @user_data: Pointer to user data, which will be returned as part of
+ *             sample metadata. It only affects a single sample if used
+ *             with CMD_SAMPLE_SYNC or CMD_SAMPLE_ASYNC. It affects all
+ *             samples between CMD_START and CMD_STOP if used with the
+ *             periodic sampling.
+ */
+struct prfcnt_control_cmd {
+	__u16 cmd;
+	__u16 pad[3];
+	__u64 user_data;
+};
+
+/** struct prfcnt_sample_access - Metadata to access a sample.
+ * @sequence:            Sequence number for the sample.
+ *                       For GET_SAMPLE, it will be set by the kernel.
+ *                       For PUT_SAMPLE, it shall be equal to the same value
+ *                       provided by the kernel for GET_SAMPLE.
+ * @sample_offset_bytes: Offset from the start of the mapped area to the first
+ *                       entry in the metadata list (sample_metadata) for this
+ *                       sample.
+ */
+struct prfcnt_sample_access {
+	__u64 sequence;
+	__u64 sample_offset_bytes;
+};
+
+/* The ids of ioctl commands, on a reader file descriptor, magic number */
+#define KBASE_KINSTR_PRFCNT_READER 0xBF
+/* Ioctl ID for issuing a session operational command */
+#define KBASE_IOCTL_KINSTR_PRFCNT_CMD \
+	_IOW(KBASE_KINSTR_PRFCNT_READER, 0x00, struct prfcnt_control_cmd)
+/* Ioctl ID for fetching a dumpped sample */
+#define KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE \
+	_IOR(KBASE_KINSTR_PRFCNT_READER, 0x01, struct prfcnt_sample_access)
+/* Ioctl ID for release internal buffer of the previously fetched sample */
+#define KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE \
+	_IOW(KBASE_KINSTR_PRFCNT_READER, 0x10, struct prfcnt_sample_access)
+
+#endif /* _UAPI_KBASE_HWCNT_READER_H_ */
--- a/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
+++ b/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -0,0 +1,789 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_IOCTL_H_
+#define _UAPI_KBASE_IOCTL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+#if MALI_USE_CSF
+#include "csf/mali_kbase_csf_ioctl.h"
+#else
+#include "jm/mali_kbase_jm_ioctl.h"
+#endif /* MALI_USE_CSF */
+
+#define KBASE_IOCTL_TYPE 0x80
+
+/**
+ * struct kbase_ioctl_set_flags - Set kernel context creation flags
+ *
+ * @create_flags: Flags - see base_context_create_flags
+ */
+struct kbase_ioctl_set_flags {
+	__u32 create_flags;
+};
+
+#define KBASE_IOCTL_SET_FLAGS _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
+
+/**
+ * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel
+ *
+ * @buffer: Pointer to the buffer to store properties into
+ * @size: Size of the buffer
+ * @flags: Flags - must be zero for now
+ *
+ * The ioctl will return the number of bytes stored into @buffer or an error
+ * on failure (e.g. @size is too small). If @size is specified as 0 then no
+ * data will be written but the return value will be the number of bytes needed
+ * for all the properties.
+ *
+ * @flags may be used in the future to request a different format for the
+ * buffer. With @flags == 0 the following format is used.
+ *
+ * The buffer will be filled with pairs of values, a __u32 key identifying the
+ * property followed by the value. The size of the value is identified using
+ * the bottom bits of the key. The value then immediately followed the key and
+ * is tightly packed (there is no padding). All keys and values are
+ * little-endian.
+ *
+ * 00 = __u8
+ * 01 = __u16
+ * 10 = __u32
+ * 11 = __u64
+ */
+struct kbase_ioctl_get_gpuprops {
+	__u64 buffer;
+	__u32 size;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_GET_GPUPROPS _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
+
+/**
+ * union kbase_ioctl_mem_alloc - Allocate memory on the GPU
+ * @in: Input parameters
+ * @in.va_pages: The number of pages of virtual address space to reserve
+ * @in.commit_pages: The number of physical pages to allocate
+ * @in.extension: The number of extra pages to allocate on each GPU fault which grows the region
+ * @in.flags: Flags
+ * @out: Output parameters
+ * @out.flags: Flags
+ * @out.gpu_va: The GPU virtual address which is allocated
+ */
+union kbase_ioctl_mem_alloc {
+	struct {
+		__u64 va_pages;
+		__u64 commit_pages;
+		__u64 extension;
+		__u64 flags;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALLOC _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc)
+
+/**
+ * struct kbase_ioctl_mem_query - Query properties of a GPU memory region
+ * @in: Input parameters
+ * @in.gpu_addr: A GPU address contained within the region
+ * @in.query: The type of query
+ * @out: Output parameters
+ * @out.value: The result of the query
+ *
+ * Use a %KBASE_MEM_QUERY_xxx flag as input for @query.
+ */
+union kbase_ioctl_mem_query {
+	struct {
+		__u64 gpu_addr;
+		__u64 query;
+	} in;
+	struct {
+		__u64 value;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_QUERY _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
+
+#define KBASE_MEM_QUERY_COMMIT_SIZE ((__u64)1)
+#define KBASE_MEM_QUERY_VA_SIZE ((__u64)2)
+#define KBASE_MEM_QUERY_FLAGS ((__u64)3)
+
+/**
+ * struct kbase_ioctl_mem_free - Free a memory region
+ * @gpu_addr: Handle to the region to free
+ */
+struct kbase_ioctl_mem_free {
+	__u64 gpu_addr;
+};
+
+#define KBASE_IOCTL_MEM_FREE _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free)
+
+/**
+ * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
+ * @buffer_count: requested number of dumping buffers
+ * @fe_bm:        counters selection bitmask (Front end)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ *
+ * A fd is returned from the ioctl if successful, or a negative value on error
+ */
+struct kbase_ioctl_hwcnt_reader_setup {
+	__u32 buffer_count;
+	__u32 fe_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_READER_SETUP \
+	_IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup)
+
+/**
+ * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to.
+ * @data:    Counter samples for the dummy model.
+ * @size:    Size of the counter sample data.
+ * @padding: Padding.
+ */
+struct kbase_ioctl_hwcnt_values {
+	__u64 data;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_HWCNT_SET _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values)
+
+/**
+ * struct kbase_ioctl_disjoint_query - Query the disjoint counter
+ * @counter:   A counter of disjoint events in the kernel
+ */
+struct kbase_ioctl_disjoint_query {
+	__u32 counter;
+};
+
+#define KBASE_IOCTL_DISJOINT_QUERY _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query)
+
+/**
+ * struct kbase_ioctl_get_ddk_version - Query the kernel version
+ * @version_buffer: Buffer to receive the kernel version string
+ * @size: Size of the buffer
+ * @padding: Padding
+ *
+ * The ioctl will return the number of bytes written into version_buffer
+ * (which includes a NULL byte) or a negative error code
+ *
+ * The ioctl request code has to be _IOW because the data in ioctl struct is
+ * being copied to the kernel, even though the kernel then writes out the
+ * version info to the buffer specified in the ioctl.
+ */
+struct kbase_ioctl_get_ddk_version {
+	__u64 version_buffer;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_GET_DDK_VERSION _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
+
+/**
+ * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory
+ *                                   allocator
+ * @va_pages: Number of GPU virtual address pages to reserve for just-in-time
+ *            memory allocations
+ * @max_allocations: Maximum number of concurrent allocations
+ * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
+ * @group_id: Group ID to be used for physical allocations
+ * @padding: Currently unused, must be zero
+ * @phys_pages: Maximum number of physical pages to allocate just-in-time
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ */
+struct kbase_ioctl_mem_jit_init {
+	__u64 va_pages;
+	__u8 max_allocations;
+	__u8 trim_level;
+	__u8 group_id;
+	__u8 padding[5];
+	__u64 phys_pages;
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init)
+
+/**
+ * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory
+ *
+ * @handle: GPU memory handle (GPU VA)
+ * @user_addr: The address where it is mapped in user space
+ * @size: The number of bytes to synchronise
+ * @type: The direction to synchronise: 0 is sync to memory (clean),
+ * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants.
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_mem_sync {
+	__u64 handle;
+	__u64 user_addr;
+	__u64 size;
+	__u8 type;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_MEM_SYNC _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync)
+
+/**
+ * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer
+ *
+ * @in: Input parameters
+ * @in.gpu_addr: The GPU address of the memory region
+ * @in.cpu_addr: The CPU address to locate
+ * @in.size: A size in bytes to validate is contained within the region
+ * @out: Output parameters
+ * @out.offset: The offset from the start of the memory region to @cpu_addr
+ */
+union kbase_ioctl_mem_find_cpu_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 cpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset)
+
+/**
+ * struct kbase_ioctl_get_context_id - Get the kernel context ID
+ *
+ * @id: The kernel context ID
+ */
+struct kbase_ioctl_get_context_id {
+	__u32 id;
+};
+
+#define KBASE_IOCTL_GET_CONTEXT_ID _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id)
+
+/**
+ * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd
+ *
+ * @flags: Flags
+ *
+ * The ioctl returns a file descriptor when successful
+ */
+struct kbase_ioctl_tlstream_acquire {
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_TLSTREAM_ACQUIRE _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire)
+
+#define KBASE_IOCTL_TLSTREAM_FLUSH _IO(KBASE_IOCTL_TYPE, 19)
+
+/**
+ * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region
+ *
+ * @gpu_addr: The memory region to modify
+ * @pages:    The number of physical pages that should be present
+ *
+ * The ioctl may return on the following error codes or 0 for success:
+ *   -ENOMEM: Out of memory
+ *   -EINVAL: Invalid arguments
+ */
+struct kbase_ioctl_mem_commit {
+	__u64 gpu_addr;
+	__u64 pages;
+};
+
+#define KBASE_IOCTL_MEM_COMMIT _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit)
+
+/**
+ * union kbase_ioctl_mem_alias - Create an alias of memory regions
+ * @in: Input parameters
+ * @in.flags: Flags, see BASE_MEM_xxx
+ * @in.stride: Bytes between start of each memory region
+ * @in.nents: The number of regions to pack together into the alias
+ * @in.aliasing_info: Pointer to an array of struct base_mem_aliasing_info
+ * @out: Output parameters
+ * @out.flags: Flags, see BASE_MEM_xxx
+ * @out.gpu_va: Address of the new alias
+ * @out.va_pages: Size of the new alias
+ */
+union kbase_ioctl_mem_alias {
+	struct {
+		__u64 flags;
+		__u64 stride;
+		__u64 nents;
+		__u64 aliasing_info;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALIAS _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias)
+
+/**
+ * union kbase_ioctl_mem_import - Import memory for use by the GPU
+ * @in: Input parameters
+ * @in.flags: Flags, see BASE_MEM_xxx
+ * @in.phandle: Handle to the external memory
+ * @in.type: Type of external memory, see base_mem_import_type
+ * @in.padding: Amount of extra VA pages to append to the imported buffer
+ * @out: Output parameters
+ * @out.flags: Flags, see BASE_MEM_xxx
+ * @out.gpu_va: Address of the new alias
+ * @out.va_pages: Size of the new alias
+ */
+union kbase_ioctl_mem_import {
+	struct {
+		__u64 flags;
+		__u64 phandle;
+		__u32 type;
+		__u32 padding;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_IMPORT _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import)
+
+/**
+ * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region
+ * @gpu_va: The GPU region to modify
+ * @flags: The new flags to set
+ * @mask: Mask of the flags to modify
+ */
+struct kbase_ioctl_mem_flags_change {
+	__u64 gpu_va;
+	__u64 flags;
+	__u64 mask;
+};
+
+#define KBASE_IOCTL_MEM_FLAGS_CHANGE _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change)
+
+/**
+ * struct kbase_ioctl_stream_create - Create a synchronisation stream
+ * @name: A name to identify this stream. Must be NULL-terminated.
+ *
+ * Note that this is also called a "timeline", but is named stream to avoid
+ * confusion with other uses of the word.
+ *
+ * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes.
+ *
+ * The ioctl returns a file descriptor.
+ */
+struct kbase_ioctl_stream_create {
+	char name[32];
+};
+
+#define KBASE_IOCTL_STREAM_CREATE _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create)
+
+/**
+ * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence
+ * @fd: The file descriptor to validate
+ */
+struct kbase_ioctl_fence_validate {
+	int fd;
+};
+
+#define KBASE_IOCTL_FENCE_VALIDATE _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
+
+/**
+ * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel
+ * @buffer: Pointer to the information
+ * @len: Length
+ * @padding: Padding
+ *
+ * The data provided is accessible through a debugfs file
+ */
+struct kbase_ioctl_mem_profile_add {
+	__u64 buffer;
+	__u32 len;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_MEM_PROFILE_ADD _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
+
+/**
+ * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource
+ * @count: Number of resources
+ * @address: Array of __u64 GPU addresses of the external resources to map
+ */
+struct kbase_ioctl_sticky_resource_map {
+	__u64 count;
+	__u64 address;
+};
+
+#define KBASE_IOCTL_STICKY_RESOURCE_MAP \
+	_IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map)
+
+/**
+ * struct kbase_ioctl_sticky_resource_unmap - Unmap a resource mapped which was
+ *                                          previously permanently mapped
+ * @count: Number of resources
+ * @address: Array of __u64 GPU addresses of the external resources to unmap
+ */
+struct kbase_ioctl_sticky_resource_unmap {
+	__u64 count;
+	__u64 address;
+};
+
+#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \
+	_IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap)
+
+/**
+ * union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of
+ *                                                   the GPU memory region for
+ *                                                   the given gpu address and
+ *                                                   the offset of that address
+ *                                                   into the region
+ * @in: Input parameters
+ * @in.gpu_addr: GPU virtual address
+ * @in.size: Size in bytes within the region
+ * @out: Output parameters
+ * @out.start: Address of the beginning of the memory region enclosing @gpu_addr
+ *             for the length of @offset bytes
+ * @out.offset: The offset from the start of the memory region to @gpu_addr
+ */
+union kbase_ioctl_mem_find_gpu_start_and_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 start;
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset)
+
+#define KBASE_IOCTL_CINSTR_GWT_START _IO(KBASE_IOCTL_TYPE, 33)
+
+#define KBASE_IOCTL_CINSTR_GWT_STOP _IO(KBASE_IOCTL_TYPE, 34)
+
+/**
+ * union kbase_ioctl_cinstr_gwt_dump - Used to collect all GPU write fault
+ *                                     addresses.
+ * @in: Input parameters
+ * @in.addr_buffer: Address of buffer to hold addresses of gpu modified areas.
+ * @in.size_buffer: Address of buffer to hold size of modified areas (in pages)
+ * @in.len: Number of addresses the buffers can hold.
+ * @in.padding: padding
+ * @out: Output parameters
+ * @out.no_of_addr_collected: Number of addresses collected into addr_buffer.
+ * @out.more_data_available: Status indicating if more addresses are available.
+ * @out.padding: padding
+ *
+ * This structure is used when performing a call to dump GPU write fault
+ * addresses.
+ */
+union kbase_ioctl_cinstr_gwt_dump {
+	struct {
+		__u64 addr_buffer;
+		__u64 size_buffer;
+		__u32 len;
+		__u32 padding;
+
+	} in;
+	struct {
+		__u32 no_of_addr_collected;
+		__u8 more_data_available;
+		__u8 padding[27];
+	} out;
+};
+
+#define KBASE_IOCTL_CINSTR_GWT_DUMP _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
+
+/**
+ * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone
+ *
+ * @va_pages: Number of VA pages to reserve for EXEC_VA
+ */
+struct kbase_ioctl_mem_exec_init {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_EXEC_INIT _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init)
+
+/**
+ * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of
+ *                                          cpu/gpu time (counter values)
+ * @in: Input parameters
+ * @in.request_flags: Bit-flags indicating the requested types.
+ * @in.paddings:      Unused, size alignment matching the out.
+ * @out: Output parameters
+ * @out.sec:           Integer field of the monotonic time, unit in seconds.
+ * @out.nsec:          Fractional sec of the monotonic time, in nano-seconds.
+ * @out.padding:       Unused, for __u64 alignment
+ * @out.timestamp:     System wide timestamp (counter) value.
+ * @out.cycle_counter: GPU cycle counter value.
+ */
+union kbase_ioctl_get_cpu_gpu_timeinfo {
+	struct {
+		__u32 request_flags;
+		__u32 paddings[7];
+	} in;
+	struct {
+		__u64 sec;
+		__u32 nsec;
+		__u32 padding;
+		__u64 timestamp;
+		__u64 cycle_counter;
+	} out;
+};
+
+#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \
+	_IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo)
+
+/**
+ * struct kbase_ioctl_context_priority_check - Check the max possible priority
+ * @priority: Input priority & output priority
+ */
+
+struct kbase_ioctl_context_priority_check {
+	__u8 priority;
+};
+
+#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check)
+
+/**
+ * struct kbase_ioctl_set_limited_core_count - Set the limited core count.
+ *
+ * @max_core_count: Maximum core count
+ */
+struct kbase_ioctl_set_limited_core_count {
+	__u8 max_core_count;
+};
+
+#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \
+	_IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count)
+
+/**
+ * struct kbase_ioctl_kinstr_prfcnt_enum_info - Enum Performance counter
+ *                                              information
+ * @info_item_size:  Performance counter item size in bytes.
+ * @info_item_count: Performance counter item count in the info_list_ptr.
+ * @info_list_ptr:   Performance counter item list pointer which points to a
+ *                   list with info_item_count of items.
+ *
+ * On success: returns info_item_size and info_item_count if info_list_ptr is
+ * NULL, returns performance counter information if info_list_ptr is not NULL.
+ * On error: returns a negative error code.
+ */
+struct kbase_ioctl_kinstr_prfcnt_enum_info {
+	__u32 info_item_size;
+	__u32 info_item_count;
+	__u64 info_list_ptr;
+};
+
+#define KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO \
+	_IOWR(KBASE_IOCTL_TYPE, 56, struct kbase_ioctl_kinstr_prfcnt_enum_info)
+
+/**
+ * struct kbase_ioctl_kinstr_prfcnt_setup - Setup HWC dumper/reader
+ * @in: input parameters.
+ * @in.request_item_count: Number of requests in the requests array.
+ * @in.request_item_size:  Size in bytes of each request in the requests array.
+ * @in.requests_ptr:       Pointer to the requests array.
+ * @out: output parameters.
+ * @out.prfcnt_metadata_item_size: Size of each item in the metadata array for
+ *                                 each sample.
+ * @out.prfcnt_mmap_size_bytes:    Size in bytes that user-space should mmap
+ *                                 for reading performance counter samples.
+ *
+ * A fd is returned from the ioctl if successful, or a negative value on error.
+ */
+union kbase_ioctl_kinstr_prfcnt_setup {
+	struct {
+		__u32 request_item_count;
+		__u32 request_item_size;
+		__u64 requests_ptr;
+	} in;
+	struct {
+		__u32 prfcnt_metadata_item_size;
+		__u32 prfcnt_mmap_size_bytes;
+	} out;
+};
+
+#define KBASE_IOCTL_KINSTR_PRFCNT_SETUP \
+	_IOWR(KBASE_IOCTL_TYPE, 57, union kbase_ioctl_kinstr_prfcnt_setup)
+
+/***************
+ * test ioctls *
+ ***************/
+#if MALI_UNIT_TEST
+/* These ioctls are purely for test purposes and are not used in the production
+ * driver, they therefore may change without notice
+ */
+
+#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1)
+
+
+/**
+ * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ */
+struct kbase_ioctl_tlstream_stats {
+	__u32 bytes_collected;
+	__u32 bytes_generated;
+};
+
+#define KBASE_IOCTL_TLSTREAM_STATS _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
+
+#endif /* MALI_UNIT_TEST */
+
+/* Customer extension range */
+#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2)
+
+/* If the integration needs extra ioctl add them there
+ * like this:
+ *
+ * struct my_ioctl_args {
+ *  ....
+ * }
+ *
+ * #define KBASE_IOCTL_MY_IOCTL \
+ *         _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args)
+ */
+
+/**********************************
+ * Definitions for GPU properties *
+ **********************************/
+#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0)
+#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1)
+#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2)
+#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3)
+
+#define KBASE_GPUPROP_PRODUCT_ID 1
+#define KBASE_GPUPROP_VERSION_STATUS 2
+#define KBASE_GPUPROP_MINOR_REVISION 3
+#define KBASE_GPUPROP_MAJOR_REVISION 4
+/* 5 previously used for GPU speed */
+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6
+/* 7 previously used for minimum GPU speed */
+#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8
+#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9
+#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10
+#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11
+#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12
+
+#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13
+#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14
+#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15
+
+#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16
+#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17
+
+#define KBASE_GPUPROP_MAX_THREADS 18
+#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19
+#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20
+#define KBASE_GPUPROP_MAX_REGISTERS 21
+#define KBASE_GPUPROP_MAX_TASK_QUEUE 22
+#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23
+#define KBASE_GPUPROP_IMPL_TECH 24
+
+#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25
+#define KBASE_GPUPROP_RAW_TILER_PRESENT 26
+#define KBASE_GPUPROP_RAW_L2_PRESENT 27
+#define KBASE_GPUPROP_RAW_STACK_PRESENT 28
+#define KBASE_GPUPROP_RAW_L2_FEATURES 29
+#define KBASE_GPUPROP_RAW_CORE_FEATURES 30
+#define KBASE_GPUPROP_RAW_MEM_FEATURES 31
+#define KBASE_GPUPROP_RAW_MMU_FEATURES 32
+#define KBASE_GPUPROP_RAW_AS_PRESENT 33
+#define KBASE_GPUPROP_RAW_JS_PRESENT 34
+#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35
+#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36
+#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37
+#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38
+#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39
+#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40
+#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41
+#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42
+#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43
+#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44
+#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45
+#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46
+#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47
+#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48
+#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49
+#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50
+#define KBASE_GPUPROP_RAW_TILER_FEATURES 51
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54
+#define KBASE_GPUPROP_RAW_GPU_ID 55
+#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56
+#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57
+#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58
+#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59
+#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60
+
+#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61
+#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62
+#define KBASE_GPUPROP_COHERENCY_COHERENCY 63
+#define KBASE_GPUPROP_COHERENCY_GROUP_0 64
+#define KBASE_GPUPROP_COHERENCY_GROUP_1 65
+#define KBASE_GPUPROP_COHERENCY_GROUP_2 66
+#define KBASE_GPUPROP_COHERENCY_GROUP_3 67
+#define KBASE_GPUPROP_COHERENCY_GROUP_4 68
+#define KBASE_GPUPROP_COHERENCY_GROUP_5 69
+#define KBASE_GPUPROP_COHERENCY_GROUP_6 70
+#define KBASE_GPUPROP_COHERENCY_GROUP_7 71
+#define KBASE_GPUPROP_COHERENCY_GROUP_8 72
+#define KBASE_GPUPROP_COHERENCY_GROUP_9 73
+#define KBASE_GPUPROP_COHERENCY_GROUP_10 74
+#define KBASE_GPUPROP_COHERENCY_GROUP_11 75
+#define KBASE_GPUPROP_COHERENCY_GROUP_12 76
+#define KBASE_GPUPROP_COHERENCY_GROUP_13 77
+#define KBASE_GPUPROP_COHERENCY_GROUP_14 78
+#define KBASE_GPUPROP_COHERENCY_GROUP_15 79
+
+#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81
+
+#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82
+
+#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83
+#define KBASE_GPUPROP_TLS_ALLOC 84
+#define KBASE_GPUPROP_RAW_GPU_FEATURES 85
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UAPI_KBASE_IOCTL_H_ */
--- a/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h
+++ b/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * mali_kbase_kinstr_jm_reader.h
+ * Provides an ioctl API to read kernel atom state changes. The flow of the
+ * API is:
+ *    1. Obtain the file descriptor with ``KBASE_IOCTL_KINSTR_JM_FD``
+ *    2. Determine the buffer structure layout via the above ioctl's returned
+ *       size and version fields in ``struct kbase_kinstr_jm_fd_out``
+ *    4. Poll the file descriptor for ``POLLIN``
+ *    5. Get data with read() on the fd
+ *    6. Use the structure version to understand how to read the data from the
+ *       buffer
+ *    7. Repeat 4-6
+ *    8. Close the file descriptor
+ */
+
+#ifndef _UAPI_KBASE_KINSTR_JM_READER_H_
+#define _UAPI_KBASE_KINSTR_JM_READER_H_
+
+/**
+ * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE:    Signifies that an atom has
+ *                                              entered a hardware queue
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_START:    Signifies that work has started
+ *                                              on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_STOP:     Signifies that work has stopped
+ *                                              on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE: Signifies that work has
+ *                                              completed on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT:    The number of state enumerations
+ *
+ * We can add new states to the end of this if they do not break the existing
+ * state machine. Old user mode code can gracefully ignore states they do not
+ * understand.
+ *
+ * If we need to make a breaking change to the state machine, we can do that by
+ * changing the version reported by KBASE_IOCTL_KINSTR_JM_FD. This will
+ * mean that old user mode code will fail to understand the new state field in
+ * the structure and gracefully not use the state change API.
+ */
+enum kbase_kinstr_jm_reader_atom_state {
+	KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_START,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_STOP,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT
+};
+
+#endif /* _UAPI_KBASE_KINSTR_JM_READER_H_ */
--- a/include/uapi/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
+++ b/include/uapi/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * DOC: Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _UAPI_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _UAPI_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * KBASE_MEM_PROFILE_MAX_BUF_SIZE - The size of the buffer to accumulate the histogram report text
+ *                                  in @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t)(64 + ((80 + (56 * 64)) * 57) + 56))
+
+#endif /*_UAPI_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/