From 0fd102707dd3a6634079863ef8a70c4d578fc037 Mon Sep 17 00:00:00 2001 From: Zhen Chen Date: Mon, 11 Nov 2024 10:30:07 +0800 Subject: [PATCH] Mali: bifrost: Using Upstream For MT Unmapped Area Topdown-Search Comes from 03490297.diff in mail "241108: 21:51: ARM support: Inchara: ". The original patch contains the following information: { Subject: [PATCH] [Official] GPUCORE-43479 Using Upstream For MT Unmapped Area Topdown-Search This commit addresses the unmapped area search not ending its loop operation issue. The change applies the upstream vm_unmapped_area() with Maple-Tree (kernel 6.1.x) for search operations and then uses a kbase specific adjustment loop for revisions to meet some extra hardware alignment constraints. } The "not ending its loop operation issue" is reflected in various RK tests as a "rcu_sched self-detected stall on CPU" with a call trace within Mali driver. An instance: [ 1442.379022] rcu: INFO: rcu_sched self-detected stall on CPU [ 1442.379028] rcu: 4-....: (14999 ticks this GP) idle=a8b4/1/0x4000000000000000 softirq=152230/152230 fqs=7492 [ 1442.379033] (t=15000 jiffies g=391269 q=378 ncpus=8) [ 1442.379038] CPU: 4 PID: 1456 Comm: glmark2-es2-way Not tainted 6.1.99 #406 [ 1442.379041] Hardware name: Rockchip RK3588 EVB7 V11 Board (DT) [ 1442.379044] pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 1442.379047] pc : mas_empty_area_rev+0x150/0x540 [ 1442.379054] lr : mas_empty_area_rev+0x198/0x540 [ 1442.379056] sp : ffffffc00f6ebae0 ... [ 1442.379097] Call trace: [ 1442.379099] mas_empty_area_rev+0x150/0x540 [ 1442.379102] kbase_unmapped_area_topdown+0x144/0x264 [ 1442.379107] kbase_context_get_unmapped_area+0x244/0x2ec [ 1442.379111] kbase_get_unmapped_area+0x48/0x70 [ 1442.379116] get_unmapped_area+0x5c/0x100 [ 1442.379121] do_mmap+0xe0/0x450 [ 1442.379124] vm_mmap_pgoff+0xa0/0x150 [ 1442.379128] ksys_mmap_pgoff+0x9c/0xd0 [ 1442.379132] __arm64_sys_mmap+0x34/0x44 [ 1442.379136] invoke_syscall+0x4c/0x114 [ 1442.379141] el0_svc_common.constprop.0+0x54/0x180 [ 1442.379144] do_el0_svc+0x20/0x2c [ 1442.379148] el0_svc+0x14/0x80 [ 1442.379151] el0t_64_sync_handler+0xb0/0xb4 [ 1442.379155] el0t_64_sync+0x158/0x15c [ 1442.379158] [ 1442.379158] PC: 0xffffffc009375950: ... Change-Id: I6c5b1d37cf7f0853282126c23ce779032b7fabe7 Signed-off-by: Zhen Chen --- .../arm/bifrost/thirdparty/mali_kbase_mmap.c | 344 +++++++++--------- 1 file changed, 167 insertions(+), 177 deletions(-) diff --git a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c index cfb347affa2e..97df69ede4b5 100644 --- a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c +++ b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c @@ -20,155 +20,88 @@ * kbase_context_get_unmapped_area() interface. */ -#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) /** - * move_mt_gap() - Search the maple tree for an existing gap of a particular size - * immediately before another pre-identified gap. - * @gap_start: Pre-identified gap starting address. - * @gap_end: Pre-identified gap ending address. - * @size: Size of the new gap needed before gap_start. + * shader_code_align_and_check() - Align the specified pointer according to shader code + * requirement. * - * This function will search the calling process' maple tree - * for another gap, one that is immediately preceding the pre-identified - * gap, for a specific size, and upon success it will decrement gap_end - * by the specified size, and replace gap_start with the new gap_start of - * the newly identified gap. + * @gap_end: Highest possible start address for alignment. The caller must ensure + * the input has already been properly aligned with info contained fields. + * @info: vm_unmapped_area_info structure passed, containing alignment, length + * and limits for the allocation + * The function only undertakes the shader code alignment adjustment. It's the caller's + * responsibility that the input value provided via gap_end has already been properly aligned + * in compliance to the fields specified in the info structure. Irrespective the return result, + * the value of the variable pointed by the pointer gap_end may have been decreased in + * reaching the required alignment, but will not drop below info->low_limit. * - * Return: true if large enough preceding gap is found, false otherwise. + * Return: true if gap_end is now aligned correctly, false otherwise */ -static bool move_mt_gap(unsigned long *gap_start, unsigned long *gap_end, unsigned long size) +static bool shader_code_align_and_check(unsigned long *gap_end, struct vm_unmapped_area_info *info) { - unsigned long new_gap_start, new_gap_end; + unsigned long align_adjust = (info->align_offset ? info->align_offset : info->length); + unsigned long align_floor = info->low_limit + align_adjust; - MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); + /* Check for 4GB address inner high-bit pattern, make adjustment if all zeros */ + if (0 == (*gap_end & BASE_MEM_MASK_4GB) && *gap_end >= align_floor) + (*gap_end) -= align_adjust; + if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB) && *gap_end >= align_floor) + (*gap_end) -= align_adjust; - if (*gap_end < size) - return false; - - /* Calculate the gap end for the new, resultant gap */ - new_gap_end = *gap_end - size; - - /* If the new gap_end (i.e. new VA start address) is larger than gap_start, than the - * pre-identified gap already has space to shrink to accommodate the decrease in - * gap_end. - */ - if (new_gap_end >= *gap_start) { - /* Pre-identified gap already has space - just patch gap_end to new - * lower value and exit. - */ - *gap_end = new_gap_end; - return true; - } - - /* Since the new VA start address (new_gap_end) is below the start of the pre-identified - * gap in the maple tree, see if there is a free gap directly before the existing gap, of - * the same size as the alignment shift, such that the effective gap found is "extended". - * This may be larger than needed but leaves the same distance between gap_end and gap_start - * that currently exists. - */ - new_gap_start = *gap_start - size; - if (mas_empty_area_rev(&mas, new_gap_start, *gap_start - 1, size)) { - /* There's no gap between the new start address needed and the - * current start address - so return false to find a new - * gap from the maple tree. - */ - return false; - } - /* Suitable gap found - replace gap_start and gap_end with new values. gap_start takes the - * value of the start of new gap found, which now correctly precedes gap_end, and gap_end - * takes on the new aligned value that has now been decremented by the requested size. - */ - *gap_start = mas.index; - *gap_end = new_gap_end; - return true; + return ((*gap_end & BASE_MEM_MASK_4GB) && ((*gap_end + info->length) & BASE_MEM_MASK_4GB)); } /** - * align_and_check() - Align the specified pointer to the provided alignment and - * check that it is still in range. On kernel 6.1 onwards - * this function does not require that the initial requested - * gap is extended with the maximum size needed to guarantee - * an alignment. - * @gap_end: Highest possible start address for allocation (end of gap in - * address space) - * @gap_start: Start address of current memory area / gap in address space - * @info: vm_unmapped_area_info structure passed to caller, containing - * alignment, length and limits for the allocation - * @is_shader_code: True if the allocation is for shader code (which has - * additional alignment requirements) - * @is_same_4gb_page: True if the allocation needs to reside completely within - * a 4GB chunk + * align_4gb_no_straddle() - Align the specified pointer not to straddle over a 4_GB boundary. * - * Return: true if gap_end is now aligned correctly and is still in range, - * false otherwise + * @gap_end: Highest possible start address for alignment. The caller must ensure + * the input has already been properly aligned with info contained fields. + * @info: vm_unmapped_area_info structure passed, containing alignment, length + * and limits for the allocation + * + * The function only undertakes the 4GB boundary alignment adjustment. It's the caller's + * responsibility that the input value provided via gap_end has already been properly aligned + * in compliance to the fields specified in the info structure. + * + * Return: true is always expected and the gap_end is aligned correctly, false can only + * be possible when the code has been wrongly modified. */ -static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, - struct vm_unmapped_area_info *info, bool is_shader_code, - bool is_same_4gb_page) +static bool align_4gb_no_straddle(unsigned long *gap_end, struct vm_unmapped_area_info *info) { - unsigned long alignment_shift; + unsigned long start = *gap_end; + unsigned long end = *gap_end + info->length; + unsigned long mask = ~((unsigned long)U32_MAX); - /* Compute highest gap address at the desired alignment */ - *gap_end -= info->length; - alignment_shift = (*gap_end - info->align_offset) & info->align_mask; + /* Check if 4GB boundary is straddled */ + if ((start & mask) != ((end - 1) & mask)) { + unsigned long offset = end - (end & mask); + /* This is to ensure that alignment doesn't get + * disturbed in an attempt to prevent straddling at + * 4GB boundary. The GPU VA is aligned to 2MB when the + * allocation size is > 2MB and there is enough CPU & + * GPU virtual space. + */ + unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1); - /* Align desired start VA (gap_end) by calculated alignment shift amount */ - if (!move_mt_gap(&gap_start, gap_end, alignment_shift)) - return false; - /* Alignment is done so far - check for further alignment requirements */ + start -= rounded_offset; + end -= rounded_offset; - if (is_shader_code) { - /* Shader code allocations must not start or end on a 4GB boundary */ - alignment_shift = info->align_offset ? info->align_offset : info->length; - if (0 == (*gap_end & BASE_MEM_MASK_4GB)) { - if (!move_mt_gap(&gap_start, gap_end, alignment_shift)) - return false; - } - if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) { - if (!move_mt_gap(&gap_start, gap_end, alignment_shift)) - return false; - } + /* Patch gap_end to use new starting address for VA region */ + *gap_end = start; - if (!(*gap_end & BASE_MEM_MASK_4GB) || - !((*gap_end + info->length) & BASE_MEM_MASK_4GB)) + /* The preceding 4GB boundary shall not get straddled, + * even after accounting for the alignment, as the + * size of allocation is limited to 4GB and the initial + * start location was already aligned. + */ + if (WARN_ONCE((start & mask) != ((end - 1) & mask), + "Alignment unexpected straddles over 4GB boundary!")) return false; - } else if (is_same_4gb_page) { - unsigned long start = *gap_end; - unsigned long end = *gap_end + info->length; - unsigned long mask = ~((unsigned long)U32_MAX); - - /* Check if 4GB boundary is straddled */ - if ((start & mask) != ((end - 1) & mask)) { - unsigned long offset = end - (end & mask); - /* This is to ensure that alignment doesn't get - * disturbed in an attempt to prevent straddling at - * 4GB boundary. The GPU VA is aligned to 2MB when the - * allocation size is > 2MB and there is enough CPU & - * GPU virtual space. - */ - unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1); - - if (!move_mt_gap(&gap_start, gap_end, rounded_offset)) - return false; - /* Re-calculate start and end values */ - start = *gap_end; - end = *gap_end + info->length; - - /* The preceding 4GB boundary shall not get straddled, - * even after accounting for the alignment, as the - * size of allocation is limited to 4GB and the initial - * start location was already aligned. - */ - WARN_ON((start & mask) != ((end - 1) & mask)); - } } - if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) - return false; - return true; } -#else + +#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) || !defined(__ANDROID_COMMON_KERNEL__) /** * align_and_check() - Align the specified pointer to the provided alignment and * check that it is still in range. For Kernel versions below @@ -196,45 +129,11 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, *gap_end -= (*gap_end - info->align_offset) & info->align_mask; if (is_shader_code) { - /* Check for 4GB boundary */ - if (0 == (*gap_end & BASE_MEM_MASK_4GB)) - (*gap_end) -= (info->align_offset ? info->align_offset : info->length); - if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) - (*gap_end) -= (info->align_offset ? info->align_offset : info->length); - - if (!(*gap_end & BASE_MEM_MASK_4GB) || - !((*gap_end + info->length) & BASE_MEM_MASK_4GB)) + if (!shader_code_align_and_check(gap_end, info)) + return false; + } else if (is_same_4gb_page) + if (!align_4gb_no_straddle(gap_end, info)) return false; - } else if (is_same_4gb_page) { - unsigned long start = *gap_end; - unsigned long end = *gap_end + info->length; - unsigned long mask = ~((unsigned long)U32_MAX); - - /* Check if 4GB boundary is straddled */ - if ((start & mask) != ((end - 1) & mask)) { - unsigned long offset = end - (end & mask); - /* This is to ensure that alignment doesn't get - * disturbed in an attempt to prevent straddling at - * 4GB boundary. The GPU VA is aligned to 2MB when the - * allocation size is > 2MB and there is enough CPU & - * GPU virtual space. - */ - unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1); - - start -= rounded_offset; - end -= rounded_offset; - - /* Patch gap_end to use new starting address for VA region */ - *gap_end = start; - - /* The preceding 4GB boundary shall not get straddled, - * even after accounting for the alignment, as the - * size of allocation is limited to 4GB and the initial - * start location was already aligned. - */ - WARN_ON((start & mask) != ((end - 1) & mask)); - } - } if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) return false; @@ -370,33 +269,124 @@ check_current: } } } -#else - unsigned long high_limit, gap_start, gap_end; +#else /* KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE */ +#ifdef __ANDROID_COMMON_KERNEL__ + struct vm_unmapped_area_info tmp_info = *info; + unsigned long length; + + tmp_info.flags |= VM_UNMAPPED_AREA_TOPDOWN; + if (!(is_shader_code || is_same_4gb_page)) + return vm_unmapped_area(&tmp_info); + + length = info->length + info->align_mask; + + /* Due to additional alignment requirement, shader_code or same_4gb_page + * needs iterations for alignment search and confirmation check. + */ + while (true) { + unsigned long saved_high_lmt = tmp_info.high_limit; + unsigned long gap_end, start, rev_high_limit; + + gap_end = vm_unmapped_area(&tmp_info); + if (IS_ERR_VALUE(gap_end)) + return gap_end; + + start = gap_end; + if (is_shader_code) { + bool shader_code_aligned; + unsigned long align_cmp_ref; + + while (true) { + /* Save the start value for progress check. the loop needs + * to end if the alignment can't progress any further. + * In summary, the loop ends condition here is either: + * 1. shader_code_aligned is true; or + * 2. align_cmp_ref == gap_end. + */ + align_cmp_ref = gap_end; + + shader_code_aligned = + shader_code_align_and_check(&gap_end, &tmp_info); + if (shader_code_aligned || (align_cmp_ref == gap_end)) + break; + } + + if (shader_code_aligned) { + if (start == gap_end) + return gap_end; + + rev_high_limit = gap_end + length; + } else + break; + } else { + /* must be same_4gb_page case */ + if (likely(align_4gb_no_straddle(&gap_end, &tmp_info))) { + if (start == gap_end) + return gap_end; + + rev_high_limit = gap_end + length; + } else + break; + } + + if (rev_high_limit < info->low_limit) + break; + + if (WARN_ONCE(rev_high_limit >= saved_high_lmt, + "Unexpected recurring high_limit in search, %lx => %lx\n" + "\tinfo-input: limit=[%lx, %lx], mask=%lx, len=%lx\n", + saved_high_lmt, rev_high_limit, info->low_limit, info->high_limit, + info->align_mask, info->length)) + rev_high_limit = saved_high_lmt - + (info->align_offset ? info->align_offset : info->length); + + /* Repeat the search with a decreasing rev_high_limit */ + tmp_info.high_limit = rev_high_limit; + } +#else /* __ANDROID_COMMON_KERNEL__ */ + unsigned long length, high_limit; MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); - /* - * Adjust search limits by the desired length. - * See implementation comment at top of unmapped_area(). - */ - gap_end = info->high_limit; - if (gap_end < info->length) + /* Adjust search length to account for worst case alignment overhead */ + length = info->length + info->align_mask; + if (length < info->length) return -ENOMEM; - high_limit = gap_end - info->length; - if (info->low_limit > high_limit) + high_limit = info->high_limit; + if ((high_limit - info->low_limit) < length) return -ENOMEM; while (true) { - if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, info->length)) + unsigned long gap_start, gap_end; + unsigned long saved_high_lmt = high_limit; + + if (mas_empty_area_rev(&mas, info->low_limit, high_limit - 1, length)) return -ENOMEM; + gap_end = mas.last + 1; gap_start = mas.index; if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page)) return gap_end; + + if (gap_end < info->low_limit) + return -ENOMEM; + + /* Adjust next search high limit */ + high_limit = gap_end + length; + + if (WARN_ONCE(high_limit >= saved_high_lmt, + "Unexpected recurring high_limit in search, %lx => %lx\n" + "\tinfo-input: limit=[%lx, %lx], mask=%lx, len=%lx\n", + saved_high_lmt, high_limit, info->low_limit, info->high_limit, + info->align_mask, info->length)) + high_limit = saved_high_lmt - + (info->align_offset ? info->align_offset : info->length); + mas_reset(&mas); } -#endif +#endif /* __ANDROID_COMMON_KERNEL__ */ +#endif /* KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE */ return -ENOMEM; }