Mali: bifrost: Using Upstream For MT Unmapped Area Topdown-Search

Comes from 03490297.diff in mail "241108: 21:51: ARM support: Inchara: ".

The original patch contains the following information:
{
Subject: [PATCH] [Official] GPUCORE-43479 Using Upstream For MT Unmapped Area Topdown-Search

This commit addresses the unmapped area search not ending its loop
operation issue. The change applies the upstream vm_unmapped_area()
with Maple-Tree (kernel 6.1.x) for search operations and then uses
a kbase specific adjustment loop for revisions to meet some extra
hardware alignment constraints.
}

The "not ending its loop operation issue" is reflected in various RK tests as
a "rcu_sched self-detected stall on CPU" with a call trace within Mali driver.
An instance:
[ 1442.379022] rcu: INFO: rcu_sched self-detected stall on CPU
[ 1442.379028] rcu:     4-....: (14999 ticks this GP) idle=a8b4/1/0x4000000000000000 softirq=152230/152230 fqs=7492
[ 1442.379033]  (t=15000 jiffies g=391269 q=378 ncpus=8)
[ 1442.379038] CPU: 4 PID: 1456 Comm: glmark2-es2-way Not tainted 6.1.99 #406
[ 1442.379041] Hardware name: Rockchip RK3588 EVB7 V11 Board (DT)
[ 1442.379044] pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 1442.379047] pc : mas_empty_area_rev+0x150/0x540
[ 1442.379054] lr : mas_empty_area_rev+0x198/0x540
[ 1442.379056] sp : ffffffc00f6ebae0
...
[ 1442.379097] Call trace:
[ 1442.379099]  mas_empty_area_rev+0x150/0x540
[ 1442.379102]  kbase_unmapped_area_topdown+0x144/0x264
[ 1442.379107]  kbase_context_get_unmapped_area+0x244/0x2ec
[ 1442.379111]  kbase_get_unmapped_area+0x48/0x70
[ 1442.379116]  get_unmapped_area+0x5c/0x100
[ 1442.379121]  do_mmap+0xe0/0x450
[ 1442.379124]  vm_mmap_pgoff+0xa0/0x150
[ 1442.379128]  ksys_mmap_pgoff+0x9c/0xd0
[ 1442.379132]  __arm64_sys_mmap+0x34/0x44
[ 1442.379136]  invoke_syscall+0x4c/0x114
[ 1442.379141]  el0_svc_common.constprop.0+0x54/0x180
[ 1442.379144]  do_el0_svc+0x20/0x2c
[ 1442.379148]  el0_svc+0x14/0x80
[ 1442.379151]  el0t_64_sync_handler+0xb0/0xb4
[ 1442.379155]  el0t_64_sync+0x158/0x15c
[ 1442.379158]
[ 1442.379158] PC: 0xffffffc009375950:
...

Change-Id: I6c5b1d37cf7f0853282126c23ce779032b7fabe7
Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
This commit is contained in:
Zhen Chen
2024-11-11 10:30:07 +08:00
committed by Tao Huang
parent 6b66ee40d9
commit 0fd102707d

View File

@@ -20,155 +20,88 @@
* kbase_context_get_unmapped_area() interface.
*/
#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
/**
* move_mt_gap() - Search the maple tree for an existing gap of a particular size
* immediately before another pre-identified gap.
* @gap_start: Pre-identified gap starting address.
* @gap_end: Pre-identified gap ending address.
* @size: Size of the new gap needed before gap_start.
* shader_code_align_and_check() - Align the specified pointer according to shader code
* requirement.
*
* This function will search the calling process' maple tree
* for another gap, one that is immediately preceding the pre-identified
* gap, for a specific size, and upon success it will decrement gap_end
* by the specified size, and replace gap_start with the new gap_start of
* the newly identified gap.
* @gap_end: Highest possible start address for alignment. The caller must ensure
* the input has already been properly aligned with info contained fields.
* @info: vm_unmapped_area_info structure passed, containing alignment, length
* and limits for the allocation
* The function only undertakes the shader code alignment adjustment. It's the caller's
* responsibility that the input value provided via gap_end has already been properly aligned
* in compliance to the fields specified in the info structure. Irrespective the return result,
* the value of the variable pointed by the pointer gap_end may have been decreased in
* reaching the required alignment, but will not drop below info->low_limit.
*
* Return: true if large enough preceding gap is found, false otherwise.
* Return: true if gap_end is now aligned correctly, false otherwise
*/
static bool move_mt_gap(unsigned long *gap_start, unsigned long *gap_end, unsigned long size)
static bool shader_code_align_and_check(unsigned long *gap_end, struct vm_unmapped_area_info *info)
{
unsigned long new_gap_start, new_gap_end;
unsigned long align_adjust = (info->align_offset ? info->align_offset : info->length);
unsigned long align_floor = info->low_limit + align_adjust;
MA_STATE(mas, &current->mm->mm_mt, 0, 0);
/* Check for 4GB address inner high-bit pattern, make adjustment if all zeros */
if (0 == (*gap_end & BASE_MEM_MASK_4GB) && *gap_end >= align_floor)
(*gap_end) -= align_adjust;
if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB) && *gap_end >= align_floor)
(*gap_end) -= align_adjust;
if (*gap_end < size)
return false;
/* Calculate the gap end for the new, resultant gap */
new_gap_end = *gap_end - size;
/* If the new gap_end (i.e. new VA start address) is larger than gap_start, than the
* pre-identified gap already has space to shrink to accommodate the decrease in
* gap_end.
*/
if (new_gap_end >= *gap_start) {
/* Pre-identified gap already has space - just patch gap_end to new
* lower value and exit.
*/
*gap_end = new_gap_end;
return true;
}
/* Since the new VA start address (new_gap_end) is below the start of the pre-identified
* gap in the maple tree, see if there is a free gap directly before the existing gap, of
* the same size as the alignment shift, such that the effective gap found is "extended".
* This may be larger than needed but leaves the same distance between gap_end and gap_start
* that currently exists.
*/
new_gap_start = *gap_start - size;
if (mas_empty_area_rev(&mas, new_gap_start, *gap_start - 1, size)) {
/* There's no gap between the new start address needed and the
* current start address - so return false to find a new
* gap from the maple tree.
*/
return false;
}
/* Suitable gap found - replace gap_start and gap_end with new values. gap_start takes the
* value of the start of new gap found, which now correctly precedes gap_end, and gap_end
* takes on the new aligned value that has now been decremented by the requested size.
*/
*gap_start = mas.index;
*gap_end = new_gap_end;
return true;
return ((*gap_end & BASE_MEM_MASK_4GB) && ((*gap_end + info->length) & BASE_MEM_MASK_4GB));
}
/**
* align_and_check() - Align the specified pointer to the provided alignment and
* check that it is still in range. On kernel 6.1 onwards
* this function does not require that the initial requested
* gap is extended with the maximum size needed to guarantee
* an alignment.
* @gap_end: Highest possible start address for allocation (end of gap in
* address space)
* @gap_start: Start address of current memory area / gap in address space
* @info: vm_unmapped_area_info structure passed to caller, containing
* alignment, length and limits for the allocation
* @is_shader_code: True if the allocation is for shader code (which has
* additional alignment requirements)
* @is_same_4gb_page: True if the allocation needs to reside completely within
* a 4GB chunk
* align_4gb_no_straddle() - Align the specified pointer not to straddle over a 4_GB boundary.
*
* Return: true if gap_end is now aligned correctly and is still in range,
* false otherwise
* @gap_end: Highest possible start address for alignment. The caller must ensure
* the input has already been properly aligned with info contained fields.
* @info: vm_unmapped_area_info structure passed, containing alignment, length
* and limits for the allocation
*
* The function only undertakes the 4GB boundary alignment adjustment. It's the caller's
* responsibility that the input value provided via gap_end has already been properly aligned
* in compliance to the fields specified in the info structure.
*
* Return: true is always expected and the gap_end is aligned correctly, false can only
* be possible when the code has been wrongly modified.
*/
static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
struct vm_unmapped_area_info *info, bool is_shader_code,
bool is_same_4gb_page)
static bool align_4gb_no_straddle(unsigned long *gap_end, struct vm_unmapped_area_info *info)
{
unsigned long alignment_shift;
unsigned long start = *gap_end;
unsigned long end = *gap_end + info->length;
unsigned long mask = ~((unsigned long)U32_MAX);
/* Compute highest gap address at the desired alignment */
*gap_end -= info->length;
alignment_shift = (*gap_end - info->align_offset) & info->align_mask;
/* Check if 4GB boundary is straddled */
if ((start & mask) != ((end - 1) & mask)) {
unsigned long offset = end - (end & mask);
/* This is to ensure that alignment doesn't get
* disturbed in an attempt to prevent straddling at
* 4GB boundary. The GPU VA is aligned to 2MB when the
* allocation size is > 2MB and there is enough CPU &
* GPU virtual space.
*/
unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1);
/* Align desired start VA (gap_end) by calculated alignment shift amount */
if (!move_mt_gap(&gap_start, gap_end, alignment_shift))
return false;
/* Alignment is done so far - check for further alignment requirements */
start -= rounded_offset;
end -= rounded_offset;
if (is_shader_code) {
/* Shader code allocations must not start or end on a 4GB boundary */
alignment_shift = info->align_offset ? info->align_offset : info->length;
if (0 == (*gap_end & BASE_MEM_MASK_4GB)) {
if (!move_mt_gap(&gap_start, gap_end, alignment_shift))
return false;
}
if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) {
if (!move_mt_gap(&gap_start, gap_end, alignment_shift))
return false;
}
/* Patch gap_end to use new starting address for VA region */
*gap_end = start;
if (!(*gap_end & BASE_MEM_MASK_4GB) ||
!((*gap_end + info->length) & BASE_MEM_MASK_4GB))
/* The preceding 4GB boundary shall not get straddled,
* even after accounting for the alignment, as the
* size of allocation is limited to 4GB and the initial
* start location was already aligned.
*/
if (WARN_ONCE((start & mask) != ((end - 1) & mask),
"Alignment unexpected straddles over 4GB boundary!"))
return false;
} else if (is_same_4gb_page) {
unsigned long start = *gap_end;
unsigned long end = *gap_end + info->length;
unsigned long mask = ~((unsigned long)U32_MAX);
/* Check if 4GB boundary is straddled */
if ((start & mask) != ((end - 1) & mask)) {
unsigned long offset = end - (end & mask);
/* This is to ensure that alignment doesn't get
* disturbed in an attempt to prevent straddling at
* 4GB boundary. The GPU VA is aligned to 2MB when the
* allocation size is > 2MB and there is enough CPU &
* GPU virtual space.
*/
unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1);
if (!move_mt_gap(&gap_start, gap_end, rounded_offset))
return false;
/* Re-calculate start and end values */
start = *gap_end;
end = *gap_end + info->length;
/* The preceding 4GB boundary shall not get straddled,
* even after accounting for the alignment, as the
* size of allocation is limited to 4GB and the initial
* start location was already aligned.
*/
WARN_ON((start & mask) != ((end - 1) & mask));
}
}
if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
return false;
return true;
}
#else
#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) || !defined(__ANDROID_COMMON_KERNEL__)
/**
* align_and_check() - Align the specified pointer to the provided alignment and
* check that it is still in range. For Kernel versions below
@@ -196,45 +129,11 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
*gap_end -= (*gap_end - info->align_offset) & info->align_mask;
if (is_shader_code) {
/* Check for 4GB boundary */
if (0 == (*gap_end & BASE_MEM_MASK_4GB))
(*gap_end) -= (info->align_offset ? info->align_offset : info->length);
if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB))
(*gap_end) -= (info->align_offset ? info->align_offset : info->length);
if (!(*gap_end & BASE_MEM_MASK_4GB) ||
!((*gap_end + info->length) & BASE_MEM_MASK_4GB))
if (!shader_code_align_and_check(gap_end, info))
return false;
} else if (is_same_4gb_page)
if (!align_4gb_no_straddle(gap_end, info))
return false;
} else if (is_same_4gb_page) {
unsigned long start = *gap_end;
unsigned long end = *gap_end + info->length;
unsigned long mask = ~((unsigned long)U32_MAX);
/* Check if 4GB boundary is straddled */
if ((start & mask) != ((end - 1) & mask)) {
unsigned long offset = end - (end & mask);
/* This is to ensure that alignment doesn't get
* disturbed in an attempt to prevent straddling at
* 4GB boundary. The GPU VA is aligned to 2MB when the
* allocation size is > 2MB and there is enough CPU &
* GPU virtual space.
*/
unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1);
start -= rounded_offset;
end -= rounded_offset;
/* Patch gap_end to use new starting address for VA region */
*gap_end = start;
/* The preceding 4GB boundary shall not get straddled,
* even after accounting for the alignment, as the
* size of allocation is limited to 4GB and the initial
* start location was already aligned.
*/
WARN_ON((start & mask) != ((end - 1) & mask));
}
}
if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
return false;
@@ -370,33 +269,124 @@ check_current:
}
}
}
#else
unsigned long high_limit, gap_start, gap_end;
#else /* KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE */
#ifdef __ANDROID_COMMON_KERNEL__
struct vm_unmapped_area_info tmp_info = *info;
unsigned long length;
tmp_info.flags |= VM_UNMAPPED_AREA_TOPDOWN;
if (!(is_shader_code || is_same_4gb_page))
return vm_unmapped_area(&tmp_info);
length = info->length + info->align_mask;
/* Due to additional alignment requirement, shader_code or same_4gb_page
* needs iterations for alignment search and confirmation check.
*/
while (true) {
unsigned long saved_high_lmt = tmp_info.high_limit;
unsigned long gap_end, start, rev_high_limit;
gap_end = vm_unmapped_area(&tmp_info);
if (IS_ERR_VALUE(gap_end))
return gap_end;
start = gap_end;
if (is_shader_code) {
bool shader_code_aligned;
unsigned long align_cmp_ref;
while (true) {
/* Save the start value for progress check. the loop needs
* to end if the alignment can't progress any further.
* In summary, the loop ends condition here is either:
* 1. shader_code_aligned is true; or
* 2. align_cmp_ref == gap_end.
*/
align_cmp_ref = gap_end;
shader_code_aligned =
shader_code_align_and_check(&gap_end, &tmp_info);
if (shader_code_aligned || (align_cmp_ref == gap_end))
break;
}
if (shader_code_aligned) {
if (start == gap_end)
return gap_end;
rev_high_limit = gap_end + length;
} else
break;
} else {
/* must be same_4gb_page case */
if (likely(align_4gb_no_straddle(&gap_end, &tmp_info))) {
if (start == gap_end)
return gap_end;
rev_high_limit = gap_end + length;
} else
break;
}
if (rev_high_limit < info->low_limit)
break;
if (WARN_ONCE(rev_high_limit >= saved_high_lmt,
"Unexpected recurring high_limit in search, %lx => %lx\n"
"\tinfo-input: limit=[%lx, %lx], mask=%lx, len=%lx\n",
saved_high_lmt, rev_high_limit, info->low_limit, info->high_limit,
info->align_mask, info->length))
rev_high_limit = saved_high_lmt -
(info->align_offset ? info->align_offset : info->length);
/* Repeat the search with a decreasing rev_high_limit */
tmp_info.high_limit = rev_high_limit;
}
#else /* __ANDROID_COMMON_KERNEL__ */
unsigned long length, high_limit;
MA_STATE(mas, &current->mm->mm_mt, 0, 0);
/*
* Adjust search limits by the desired length.
* See implementation comment at top of unmapped_area().
*/
gap_end = info->high_limit;
if (gap_end < info->length)
/* Adjust search length to account for worst case alignment overhead */
length = info->length + info->align_mask;
if (length < info->length)
return -ENOMEM;
high_limit = gap_end - info->length;
if (info->low_limit > high_limit)
high_limit = info->high_limit;
if ((high_limit - info->low_limit) < length)
return -ENOMEM;
while (true) {
if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, info->length))
unsigned long gap_start, gap_end;
unsigned long saved_high_lmt = high_limit;
if (mas_empty_area_rev(&mas, info->low_limit, high_limit - 1, length))
return -ENOMEM;
gap_end = mas.last + 1;
gap_start = mas.index;
if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page))
return gap_end;
if (gap_end < info->low_limit)
return -ENOMEM;
/* Adjust next search high limit */
high_limit = gap_end + length;
if (WARN_ONCE(high_limit >= saved_high_lmt,
"Unexpected recurring high_limit in search, %lx => %lx\n"
"\tinfo-input: limit=[%lx, %lx], mask=%lx, len=%lx\n",
saved_high_lmt, high_limit, info->low_limit, info->high_limit,
info->align_mask, info->length))
high_limit = saved_high_lmt -
(info->align_offset ? info->align_offset : info->length);
mas_reset(&mas);
}
#endif
#endif /* __ANDROID_COMMON_KERNEL__ */
#endif /* KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE */
return -ENOMEM;
}