mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-06 10:58:48 +09:00
Mali: bifrost: Using Upstream For MT Unmapped Area Topdown-Search
Comes from 03490297.diff in mail "241108: 21:51: ARM support: Inchara: ".
The original patch contains the following information:
{
Subject: [PATCH] [Official] GPUCORE-43479 Using Upstream For MT Unmapped Area Topdown-Search
This commit addresses the unmapped area search not ending its loop
operation issue. The change applies the upstream vm_unmapped_area()
with Maple-Tree (kernel 6.1.x) for search operations and then uses
a kbase specific adjustment loop for revisions to meet some extra
hardware alignment constraints.
}
The "not ending its loop operation issue" is reflected in various RK tests as
a "rcu_sched self-detected stall on CPU" with a call trace within Mali driver.
An instance:
[ 1442.379022] rcu: INFO: rcu_sched self-detected stall on CPU
[ 1442.379028] rcu: 4-....: (14999 ticks this GP) idle=a8b4/1/0x4000000000000000 softirq=152230/152230 fqs=7492
[ 1442.379033] (t=15000 jiffies g=391269 q=378 ncpus=8)
[ 1442.379038] CPU: 4 PID: 1456 Comm: glmark2-es2-way Not tainted 6.1.99 #406
[ 1442.379041] Hardware name: Rockchip RK3588 EVB7 V11 Board (DT)
[ 1442.379044] pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 1442.379047] pc : mas_empty_area_rev+0x150/0x540
[ 1442.379054] lr : mas_empty_area_rev+0x198/0x540
[ 1442.379056] sp : ffffffc00f6ebae0
...
[ 1442.379097] Call trace:
[ 1442.379099] mas_empty_area_rev+0x150/0x540
[ 1442.379102] kbase_unmapped_area_topdown+0x144/0x264
[ 1442.379107] kbase_context_get_unmapped_area+0x244/0x2ec
[ 1442.379111] kbase_get_unmapped_area+0x48/0x70
[ 1442.379116] get_unmapped_area+0x5c/0x100
[ 1442.379121] do_mmap+0xe0/0x450
[ 1442.379124] vm_mmap_pgoff+0xa0/0x150
[ 1442.379128] ksys_mmap_pgoff+0x9c/0xd0
[ 1442.379132] __arm64_sys_mmap+0x34/0x44
[ 1442.379136] invoke_syscall+0x4c/0x114
[ 1442.379141] el0_svc_common.constprop.0+0x54/0x180
[ 1442.379144] do_el0_svc+0x20/0x2c
[ 1442.379148] el0_svc+0x14/0x80
[ 1442.379151] el0t_64_sync_handler+0xb0/0xb4
[ 1442.379155] el0t_64_sync+0x158/0x15c
[ 1442.379158]
[ 1442.379158] PC: 0xffffffc009375950:
...
Change-Id: I6c5b1d37cf7f0853282126c23ce779032b7fabe7
Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
This commit is contained in:
344
drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
vendored
344
drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
vendored
@@ -20,155 +20,88 @@
|
||||
* kbase_context_get_unmapped_area() interface.
|
||||
*/
|
||||
|
||||
#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
|
||||
/**
|
||||
* move_mt_gap() - Search the maple tree for an existing gap of a particular size
|
||||
* immediately before another pre-identified gap.
|
||||
* @gap_start: Pre-identified gap starting address.
|
||||
* @gap_end: Pre-identified gap ending address.
|
||||
* @size: Size of the new gap needed before gap_start.
|
||||
* shader_code_align_and_check() - Align the specified pointer according to shader code
|
||||
* requirement.
|
||||
*
|
||||
* This function will search the calling process' maple tree
|
||||
* for another gap, one that is immediately preceding the pre-identified
|
||||
* gap, for a specific size, and upon success it will decrement gap_end
|
||||
* by the specified size, and replace gap_start with the new gap_start of
|
||||
* the newly identified gap.
|
||||
* @gap_end: Highest possible start address for alignment. The caller must ensure
|
||||
* the input has already been properly aligned with info contained fields.
|
||||
* @info: vm_unmapped_area_info structure passed, containing alignment, length
|
||||
* and limits for the allocation
|
||||
* The function only undertakes the shader code alignment adjustment. It's the caller's
|
||||
* responsibility that the input value provided via gap_end has already been properly aligned
|
||||
* in compliance to the fields specified in the info structure. Irrespective the return result,
|
||||
* the value of the variable pointed by the pointer gap_end may have been decreased in
|
||||
* reaching the required alignment, but will not drop below info->low_limit.
|
||||
*
|
||||
* Return: true if large enough preceding gap is found, false otherwise.
|
||||
* Return: true if gap_end is now aligned correctly, false otherwise
|
||||
*/
|
||||
static bool move_mt_gap(unsigned long *gap_start, unsigned long *gap_end, unsigned long size)
|
||||
static bool shader_code_align_and_check(unsigned long *gap_end, struct vm_unmapped_area_info *info)
|
||||
{
|
||||
unsigned long new_gap_start, new_gap_end;
|
||||
unsigned long align_adjust = (info->align_offset ? info->align_offset : info->length);
|
||||
unsigned long align_floor = info->low_limit + align_adjust;
|
||||
|
||||
MA_STATE(mas, ¤t->mm->mm_mt, 0, 0);
|
||||
/* Check for 4GB address inner high-bit pattern, make adjustment if all zeros */
|
||||
if (0 == (*gap_end & BASE_MEM_MASK_4GB) && *gap_end >= align_floor)
|
||||
(*gap_end) -= align_adjust;
|
||||
if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB) && *gap_end >= align_floor)
|
||||
(*gap_end) -= align_adjust;
|
||||
|
||||
if (*gap_end < size)
|
||||
return false;
|
||||
|
||||
/* Calculate the gap end for the new, resultant gap */
|
||||
new_gap_end = *gap_end - size;
|
||||
|
||||
/* If the new gap_end (i.e. new VA start address) is larger than gap_start, than the
|
||||
* pre-identified gap already has space to shrink to accommodate the decrease in
|
||||
* gap_end.
|
||||
*/
|
||||
if (new_gap_end >= *gap_start) {
|
||||
/* Pre-identified gap already has space - just patch gap_end to new
|
||||
* lower value and exit.
|
||||
*/
|
||||
*gap_end = new_gap_end;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Since the new VA start address (new_gap_end) is below the start of the pre-identified
|
||||
* gap in the maple tree, see if there is a free gap directly before the existing gap, of
|
||||
* the same size as the alignment shift, such that the effective gap found is "extended".
|
||||
* This may be larger than needed but leaves the same distance between gap_end and gap_start
|
||||
* that currently exists.
|
||||
*/
|
||||
new_gap_start = *gap_start - size;
|
||||
if (mas_empty_area_rev(&mas, new_gap_start, *gap_start - 1, size)) {
|
||||
/* There's no gap between the new start address needed and the
|
||||
* current start address - so return false to find a new
|
||||
* gap from the maple tree.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
/* Suitable gap found - replace gap_start and gap_end with new values. gap_start takes the
|
||||
* value of the start of new gap found, which now correctly precedes gap_end, and gap_end
|
||||
* takes on the new aligned value that has now been decremented by the requested size.
|
||||
*/
|
||||
*gap_start = mas.index;
|
||||
*gap_end = new_gap_end;
|
||||
return true;
|
||||
return ((*gap_end & BASE_MEM_MASK_4GB) && ((*gap_end + info->length) & BASE_MEM_MASK_4GB));
|
||||
}
|
||||
|
||||
/**
|
||||
* align_and_check() - Align the specified pointer to the provided alignment and
|
||||
* check that it is still in range. On kernel 6.1 onwards
|
||||
* this function does not require that the initial requested
|
||||
* gap is extended with the maximum size needed to guarantee
|
||||
* an alignment.
|
||||
* @gap_end: Highest possible start address for allocation (end of gap in
|
||||
* address space)
|
||||
* @gap_start: Start address of current memory area / gap in address space
|
||||
* @info: vm_unmapped_area_info structure passed to caller, containing
|
||||
* alignment, length and limits for the allocation
|
||||
* @is_shader_code: True if the allocation is for shader code (which has
|
||||
* additional alignment requirements)
|
||||
* @is_same_4gb_page: True if the allocation needs to reside completely within
|
||||
* a 4GB chunk
|
||||
* align_4gb_no_straddle() - Align the specified pointer not to straddle over a 4_GB boundary.
|
||||
*
|
||||
* Return: true if gap_end is now aligned correctly and is still in range,
|
||||
* false otherwise
|
||||
* @gap_end: Highest possible start address for alignment. The caller must ensure
|
||||
* the input has already been properly aligned with info contained fields.
|
||||
* @info: vm_unmapped_area_info structure passed, containing alignment, length
|
||||
* and limits for the allocation
|
||||
*
|
||||
* The function only undertakes the 4GB boundary alignment adjustment. It's the caller's
|
||||
* responsibility that the input value provided via gap_end has already been properly aligned
|
||||
* in compliance to the fields specified in the info structure.
|
||||
*
|
||||
* Return: true is always expected and the gap_end is aligned correctly, false can only
|
||||
* be possible when the code has been wrongly modified.
|
||||
*/
|
||||
static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
|
||||
struct vm_unmapped_area_info *info, bool is_shader_code,
|
||||
bool is_same_4gb_page)
|
||||
static bool align_4gb_no_straddle(unsigned long *gap_end, struct vm_unmapped_area_info *info)
|
||||
{
|
||||
unsigned long alignment_shift;
|
||||
unsigned long start = *gap_end;
|
||||
unsigned long end = *gap_end + info->length;
|
||||
unsigned long mask = ~((unsigned long)U32_MAX);
|
||||
|
||||
/* Compute highest gap address at the desired alignment */
|
||||
*gap_end -= info->length;
|
||||
alignment_shift = (*gap_end - info->align_offset) & info->align_mask;
|
||||
/* Check if 4GB boundary is straddled */
|
||||
if ((start & mask) != ((end - 1) & mask)) {
|
||||
unsigned long offset = end - (end & mask);
|
||||
/* This is to ensure that alignment doesn't get
|
||||
* disturbed in an attempt to prevent straddling at
|
||||
* 4GB boundary. The GPU VA is aligned to 2MB when the
|
||||
* allocation size is > 2MB and there is enough CPU &
|
||||
* GPU virtual space.
|
||||
*/
|
||||
unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1);
|
||||
|
||||
/* Align desired start VA (gap_end) by calculated alignment shift amount */
|
||||
if (!move_mt_gap(&gap_start, gap_end, alignment_shift))
|
||||
return false;
|
||||
/* Alignment is done so far - check for further alignment requirements */
|
||||
start -= rounded_offset;
|
||||
end -= rounded_offset;
|
||||
|
||||
if (is_shader_code) {
|
||||
/* Shader code allocations must not start or end on a 4GB boundary */
|
||||
alignment_shift = info->align_offset ? info->align_offset : info->length;
|
||||
if (0 == (*gap_end & BASE_MEM_MASK_4GB)) {
|
||||
if (!move_mt_gap(&gap_start, gap_end, alignment_shift))
|
||||
return false;
|
||||
}
|
||||
if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) {
|
||||
if (!move_mt_gap(&gap_start, gap_end, alignment_shift))
|
||||
return false;
|
||||
}
|
||||
/* Patch gap_end to use new starting address for VA region */
|
||||
*gap_end = start;
|
||||
|
||||
if (!(*gap_end & BASE_MEM_MASK_4GB) ||
|
||||
!((*gap_end + info->length) & BASE_MEM_MASK_4GB))
|
||||
/* The preceding 4GB boundary shall not get straddled,
|
||||
* even after accounting for the alignment, as the
|
||||
* size of allocation is limited to 4GB and the initial
|
||||
* start location was already aligned.
|
||||
*/
|
||||
if (WARN_ONCE((start & mask) != ((end - 1) & mask),
|
||||
"Alignment unexpected straddles over 4GB boundary!"))
|
||||
return false;
|
||||
} else if (is_same_4gb_page) {
|
||||
unsigned long start = *gap_end;
|
||||
unsigned long end = *gap_end + info->length;
|
||||
unsigned long mask = ~((unsigned long)U32_MAX);
|
||||
|
||||
/* Check if 4GB boundary is straddled */
|
||||
if ((start & mask) != ((end - 1) & mask)) {
|
||||
unsigned long offset = end - (end & mask);
|
||||
/* This is to ensure that alignment doesn't get
|
||||
* disturbed in an attempt to prevent straddling at
|
||||
* 4GB boundary. The GPU VA is aligned to 2MB when the
|
||||
* allocation size is > 2MB and there is enough CPU &
|
||||
* GPU virtual space.
|
||||
*/
|
||||
unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1);
|
||||
|
||||
if (!move_mt_gap(&gap_start, gap_end, rounded_offset))
|
||||
return false;
|
||||
/* Re-calculate start and end values */
|
||||
start = *gap_end;
|
||||
end = *gap_end + info->length;
|
||||
|
||||
/* The preceding 4GB boundary shall not get straddled,
|
||||
* even after accounting for the alignment, as the
|
||||
* size of allocation is limited to 4GB and the initial
|
||||
* start location was already aligned.
|
||||
*/
|
||||
WARN_ON((start & mask) != ((end - 1) & mask));
|
||||
}
|
||||
}
|
||||
|
||||
if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
|
||||
#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) || !defined(__ANDROID_COMMON_KERNEL__)
|
||||
/**
|
||||
* align_and_check() - Align the specified pointer to the provided alignment and
|
||||
* check that it is still in range. For Kernel versions below
|
||||
@@ -196,45 +129,11 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
|
||||
*gap_end -= (*gap_end - info->align_offset) & info->align_mask;
|
||||
|
||||
if (is_shader_code) {
|
||||
/* Check for 4GB boundary */
|
||||
if (0 == (*gap_end & BASE_MEM_MASK_4GB))
|
||||
(*gap_end) -= (info->align_offset ? info->align_offset : info->length);
|
||||
if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB))
|
||||
(*gap_end) -= (info->align_offset ? info->align_offset : info->length);
|
||||
|
||||
if (!(*gap_end & BASE_MEM_MASK_4GB) ||
|
||||
!((*gap_end + info->length) & BASE_MEM_MASK_4GB))
|
||||
if (!shader_code_align_and_check(gap_end, info))
|
||||
return false;
|
||||
} else if (is_same_4gb_page)
|
||||
if (!align_4gb_no_straddle(gap_end, info))
|
||||
return false;
|
||||
} else if (is_same_4gb_page) {
|
||||
unsigned long start = *gap_end;
|
||||
unsigned long end = *gap_end + info->length;
|
||||
unsigned long mask = ~((unsigned long)U32_MAX);
|
||||
|
||||
/* Check if 4GB boundary is straddled */
|
||||
if ((start & mask) != ((end - 1) & mask)) {
|
||||
unsigned long offset = end - (end & mask);
|
||||
/* This is to ensure that alignment doesn't get
|
||||
* disturbed in an attempt to prevent straddling at
|
||||
* 4GB boundary. The GPU VA is aligned to 2MB when the
|
||||
* allocation size is > 2MB and there is enough CPU &
|
||||
* GPU virtual space.
|
||||
*/
|
||||
unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1);
|
||||
|
||||
start -= rounded_offset;
|
||||
end -= rounded_offset;
|
||||
|
||||
/* Patch gap_end to use new starting address for VA region */
|
||||
*gap_end = start;
|
||||
|
||||
/* The preceding 4GB boundary shall not get straddled,
|
||||
* even after accounting for the alignment, as the
|
||||
* size of allocation is limited to 4GB and the initial
|
||||
* start location was already aligned.
|
||||
*/
|
||||
WARN_ON((start & mask) != ((end - 1) & mask));
|
||||
}
|
||||
}
|
||||
|
||||
if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
|
||||
return false;
|
||||
@@ -370,33 +269,124 @@ check_current:
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
unsigned long high_limit, gap_start, gap_end;
|
||||
#else /* KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE */
|
||||
#ifdef __ANDROID_COMMON_KERNEL__
|
||||
struct vm_unmapped_area_info tmp_info = *info;
|
||||
unsigned long length;
|
||||
|
||||
tmp_info.flags |= VM_UNMAPPED_AREA_TOPDOWN;
|
||||
if (!(is_shader_code || is_same_4gb_page))
|
||||
return vm_unmapped_area(&tmp_info);
|
||||
|
||||
length = info->length + info->align_mask;
|
||||
|
||||
/* Due to additional alignment requirement, shader_code or same_4gb_page
|
||||
* needs iterations for alignment search and confirmation check.
|
||||
*/
|
||||
while (true) {
|
||||
unsigned long saved_high_lmt = tmp_info.high_limit;
|
||||
unsigned long gap_end, start, rev_high_limit;
|
||||
|
||||
gap_end = vm_unmapped_area(&tmp_info);
|
||||
if (IS_ERR_VALUE(gap_end))
|
||||
return gap_end;
|
||||
|
||||
start = gap_end;
|
||||
if (is_shader_code) {
|
||||
bool shader_code_aligned;
|
||||
unsigned long align_cmp_ref;
|
||||
|
||||
while (true) {
|
||||
/* Save the start value for progress check. the loop needs
|
||||
* to end if the alignment can't progress any further.
|
||||
* In summary, the loop ends condition here is either:
|
||||
* 1. shader_code_aligned is true; or
|
||||
* 2. align_cmp_ref == gap_end.
|
||||
*/
|
||||
align_cmp_ref = gap_end;
|
||||
|
||||
shader_code_aligned =
|
||||
shader_code_align_and_check(&gap_end, &tmp_info);
|
||||
if (shader_code_aligned || (align_cmp_ref == gap_end))
|
||||
break;
|
||||
}
|
||||
|
||||
if (shader_code_aligned) {
|
||||
if (start == gap_end)
|
||||
return gap_end;
|
||||
|
||||
rev_high_limit = gap_end + length;
|
||||
} else
|
||||
break;
|
||||
} else {
|
||||
/* must be same_4gb_page case */
|
||||
if (likely(align_4gb_no_straddle(&gap_end, &tmp_info))) {
|
||||
if (start == gap_end)
|
||||
return gap_end;
|
||||
|
||||
rev_high_limit = gap_end + length;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
|
||||
if (rev_high_limit < info->low_limit)
|
||||
break;
|
||||
|
||||
if (WARN_ONCE(rev_high_limit >= saved_high_lmt,
|
||||
"Unexpected recurring high_limit in search, %lx => %lx\n"
|
||||
"\tinfo-input: limit=[%lx, %lx], mask=%lx, len=%lx\n",
|
||||
saved_high_lmt, rev_high_limit, info->low_limit, info->high_limit,
|
||||
info->align_mask, info->length))
|
||||
rev_high_limit = saved_high_lmt -
|
||||
(info->align_offset ? info->align_offset : info->length);
|
||||
|
||||
/* Repeat the search with a decreasing rev_high_limit */
|
||||
tmp_info.high_limit = rev_high_limit;
|
||||
}
|
||||
#else /* __ANDROID_COMMON_KERNEL__ */
|
||||
unsigned long length, high_limit;
|
||||
|
||||
MA_STATE(mas, ¤t->mm->mm_mt, 0, 0);
|
||||
|
||||
/*
|
||||
* Adjust search limits by the desired length.
|
||||
* See implementation comment at top of unmapped_area().
|
||||
*/
|
||||
gap_end = info->high_limit;
|
||||
if (gap_end < info->length)
|
||||
/* Adjust search length to account for worst case alignment overhead */
|
||||
length = info->length + info->align_mask;
|
||||
if (length < info->length)
|
||||
return -ENOMEM;
|
||||
high_limit = gap_end - info->length;
|
||||
|
||||
if (info->low_limit > high_limit)
|
||||
high_limit = info->high_limit;
|
||||
if ((high_limit - info->low_limit) < length)
|
||||
return -ENOMEM;
|
||||
|
||||
while (true) {
|
||||
if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, info->length))
|
||||
unsigned long gap_start, gap_end;
|
||||
unsigned long saved_high_lmt = high_limit;
|
||||
|
||||
if (mas_empty_area_rev(&mas, info->low_limit, high_limit - 1, length))
|
||||
return -ENOMEM;
|
||||
|
||||
gap_end = mas.last + 1;
|
||||
gap_start = mas.index;
|
||||
|
||||
if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page))
|
||||
return gap_end;
|
||||
|
||||
if (gap_end < info->low_limit)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Adjust next search high limit */
|
||||
high_limit = gap_end + length;
|
||||
|
||||
if (WARN_ONCE(high_limit >= saved_high_lmt,
|
||||
"Unexpected recurring high_limit in search, %lx => %lx\n"
|
||||
"\tinfo-input: limit=[%lx, %lx], mask=%lx, len=%lx\n",
|
||||
saved_high_lmt, high_limit, info->low_limit, info->high_limit,
|
||||
info->align_mask, info->length))
|
||||
high_limit = saved_high_lmt -
|
||||
(info->align_offset ? info->align_offset : info->length);
|
||||
mas_reset(&mas);
|
||||
}
|
||||
#endif
|
||||
#endif /* __ANDROID_COMMON_KERNEL__ */
|
||||
#endif /* KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE */
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user