From 21c9675e110579c371c515d7cb68e69cd8388dd5 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 3 Nov 2022 21:55:24 +0000 Subject: [PATCH] Revert "FROMLIST: x86, mem: move memmove to out of line assembler" This reverts commit 38bfd3357f2354c7299584c06f188dc45d65c54a. Reason for revert: FROMGIT available Bug: 247605214 Change-Id: If2c4ccb05797336593d8622b5f6693f89756d740 Signed-off-by: Nick Desaulniers --- arch/x86/lib/Makefile | 1 - arch/x86/lib/memcpy_32.c | 187 +++++++++++++++++++++++++++++++++ arch/x86/lib/memmove_32.S | 215 -------------------------------------- 3 files changed, 187 insertions(+), 216 deletions(-) delete mode 100644 arch/x86/lib/memmove_32.S diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index e02b20aafb6c..c6506c6a7092 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -60,7 +60,6 @@ ifeq ($(CONFIG_X86_32),y) lib-y += checksum_32.o lib-y += strstr_32.o lib-y += string_32.o - lib-y += memmove_32.o ifneq ($(CONFIG_X86_CMPXCHG64),y) lib-y += cmpxchg8b_emu.o atomic64_386_32.o endif diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c index c69541dbbcc2..e565d1c9019e 100644 --- a/arch/x86/lib/memcpy_32.c +++ b/arch/x86/lib/memcpy_32.c @@ -20,3 +20,190 @@ __visible void *memset(void *s, int c, size_t count) return __memset(s, c, count); } EXPORT_SYMBOL(memset); + +__visible void *memmove(void *dest, const void *src, size_t n) +{ + int d0,d1,d2,d3,d4,d5; + char *ret = dest; + + __asm__ __volatile__( + /* Handle more 16 bytes in loop */ + "cmp $0x10, %0\n\t" + "jb 1f\n\t" + + /* Decide forward/backward copy mode */ + "cmp %2, %1\n\t" + "jb 2f\n\t" + + /* + * movs instruction have many startup latency + * so we handle small size by general register. + */ + "cmp $680, %0\n\t" + "jb 3f\n\t" + /* + * movs instruction is only good for aligned case. + */ + "mov %1, %3\n\t" + "xor %2, %3\n\t" + "and $0xff, %3\n\t" + "jz 4f\n\t" + "3:\n\t" + "sub $0x10, %0\n\t" + + /* + * We gobble 16 bytes forward in each loop. + */ + "3:\n\t" + "sub $0x10, %0\n\t" + "mov 0*4(%1), %3\n\t" + "mov 1*4(%1), %4\n\t" + "mov %3, 0*4(%2)\n\t" + "mov %4, 1*4(%2)\n\t" + "mov 2*4(%1), %3\n\t" + "mov 3*4(%1), %4\n\t" + "mov %3, 2*4(%2)\n\t" + "mov %4, 3*4(%2)\n\t" + "lea 0x10(%1), %1\n\t" + "lea 0x10(%2), %2\n\t" + "jae 3b\n\t" + "add $0x10, %0\n\t" + "jmp 1f\n\t" + + /* + * Handle data forward by movs. + */ + ".p2align 4\n\t" + "4:\n\t" + "mov -4(%1, %0), %3\n\t" + "lea -4(%2, %0), %4\n\t" + "shr $2, %0\n\t" + "rep movsl\n\t" + "mov %3, (%4)\n\t" + "jmp 11f\n\t" + /* + * Handle data backward by movs. + */ + ".p2align 4\n\t" + "6:\n\t" + "mov (%1), %3\n\t" + "mov %2, %4\n\t" + "lea -4(%1, %0), %1\n\t" + "lea -4(%2, %0), %2\n\t" + "shr $2, %0\n\t" + "std\n\t" + "rep movsl\n\t" + "mov %3,(%4)\n\t" + "cld\n\t" + "jmp 11f\n\t" + + /* + * Start to prepare for backward copy. + */ + ".p2align 4\n\t" + "2:\n\t" + "cmp $680, %0\n\t" + "jb 5f\n\t" + "mov %1, %3\n\t" + "xor %2, %3\n\t" + "and $0xff, %3\n\t" + "jz 6b\n\t" + + /* + * Calculate copy position to tail. + */ + "5:\n\t" + "add %0, %1\n\t" + "add %0, %2\n\t" + "sub $0x10, %0\n\t" + + /* + * We gobble 16 bytes backward in each loop. + */ + "7:\n\t" + "sub $0x10, %0\n\t" + + "mov -1*4(%1), %3\n\t" + "mov -2*4(%1), %4\n\t" + "mov %3, -1*4(%2)\n\t" + "mov %4, -2*4(%2)\n\t" + "mov -3*4(%1), %3\n\t" + "mov -4*4(%1), %4\n\t" + "mov %3, -3*4(%2)\n\t" + "mov %4, -4*4(%2)\n\t" + "lea -0x10(%1), %1\n\t" + "lea -0x10(%2), %2\n\t" + "jae 7b\n\t" + /* + * Calculate copy position to head. + */ + "add $0x10, %0\n\t" + "sub %0, %1\n\t" + "sub %0, %2\n\t" + + /* + * Move data from 8 bytes to 15 bytes. + */ + ".p2align 4\n\t" + "1:\n\t" + "cmp $8, %0\n\t" + "jb 8f\n\t" + "mov 0*4(%1), %3\n\t" + "mov 1*4(%1), %4\n\t" + "mov -2*4(%1, %0), %5\n\t" + "mov -1*4(%1, %0), %1\n\t" + + "mov %3, 0*4(%2)\n\t" + "mov %4, 1*4(%2)\n\t" + "mov %5, -2*4(%2, %0)\n\t" + "mov %1, -1*4(%2, %0)\n\t" + "jmp 11f\n\t" + + /* + * Move data from 4 bytes to 7 bytes. + */ + ".p2align 4\n\t" + "8:\n\t" + "cmp $4, %0\n\t" + "jb 9f\n\t" + "mov 0*4(%1), %3\n\t" + "mov -1*4(%1, %0), %4\n\t" + "mov %3, 0*4(%2)\n\t" + "mov %4, -1*4(%2, %0)\n\t" + "jmp 11f\n\t" + + /* + * Move data from 2 bytes to 3 bytes. + */ + ".p2align 4\n\t" + "9:\n\t" + "cmp $2, %0\n\t" + "jb 10f\n\t" + "movw 0*2(%1), %%dx\n\t" + "movw -1*2(%1, %0), %%bx\n\t" + "movw %%dx, 0*2(%2)\n\t" + "movw %%bx, -1*2(%2, %0)\n\t" + "jmp 11f\n\t" + + /* + * Move data for 1 byte. + */ + ".p2align 4\n\t" + "10:\n\t" + "cmp $1, %0\n\t" + "jb 11f\n\t" + "movb (%1), %%cl\n\t" + "movb %%cl, (%2)\n\t" + ".p2align 4\n\t" + "11:" + : "=&c" (d0), "=&S" (d1), "=&D" (d2), + "=r" (d3),"=r" (d4), "=r"(d5) + :"0" (n), + "1" (src), + "2" (dest) + :"memory"); + + return ret; + +} +EXPORT_SYMBOL(memmove); diff --git a/arch/x86/lib/memmove_32.S b/arch/x86/lib/memmove_32.S deleted file mode 100644 index 146664b7eb92..000000000000 --- a/arch/x86/lib/memmove_32.S +++ /dev/null @@ -1,215 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#include -#include - -SYM_FUNC_START(memmove) -/* - * void *memmove(void *dest, const void *src, size_t n) - * -mregparm=3 passes these in registers: - */ -.set dest, %eax -.set src, %edx -.set n, %ecx - -/* - * Need 3 scratch registers. These need to be saved+restored. Section 3.2.1 - * Footnote 7 of the System V Application Binary Interface Version 1.0 aka - * "psABI" notes: - * Note that in contrast to the Intel386 ABI, %rdi, and %rsi belong to the - * called function, not the caller. - * i.e. %edi and %esi are callee saved for i386 (because they belong to the - * caller). - */ -.set tmp0, %edi -.set tmp0w, %di -.set tmp1, %ebx -.set tmp1w, %bx -.set tmp2, %esi -.set tmp3b, %cl - - pushl %ebp - movl %esp, %ebp - - pushl dest - pushl tmp0 - pushl tmp1 - pushl tmp2 - - /* Handle more 16 bytes in loop */ - cmpl $0x10, n - jb .L16_byteswap - - /* Decide forward/backward copy mode */ - cmpl dest, src - jb .Lbackwards_header - - /* - * movs instruction have many startup latency - * so we handle small size by general register. - */ - cmpl $680, n - jb .Ltoo_small_forwards - /* - * movs instruction is only good for aligned case. - */ - movl src, tmp0 - xorl dest, tmp0 - andl $0xff, tmp0 - jz .Lforward_movs -.Ltoo_small_forwards: - subl $0x10, n - - /* - * We gobble 16 bytes forward in each loop. - */ -.L16_byteswap_forwards_loop: - subl $0x10, n - movl 0*4(src), tmp0 - movl 1*4(src), tmp1 - movl tmp0, 0*4(dest) - movl tmp1, 1*4(dest) - movl 2*4(src), tmp0 - movl 3*4(src), tmp1 - movl tmp0, 2*4(dest) - movl tmp1, 3*4(dest) - leal 0x10(src), src - leal 0x10(dest), dest - jae .L16_byteswap_forwards_loop - addl $0x10, n - jmp .L16_byteswap - - /* - * Handle data forward by movs. - */ -.p2align 4 -.Lforward_movs: - movl -4(src, n), tmp0 - leal -4(dest, n), tmp1 - shrl $2, n - rep movsl - movl tmp0, (tmp1) - jmp .Ldone - /* - * Handle data backward by movs. - */ -.p2align 4 -.Lbackwards_movs: - movl (src), tmp0 - movl dest, tmp1 - leal -4(src, n), src - leal -4(dest, n), dest - shrl $2, n - std - rep movsl - movl tmp0,(tmp1) - cld - jmp .Ldone - - /* - * Start to prepare for backward copy. - */ -.p2align 4 -.Lbackwards_header: - cmpl $680, n - jb .Ltoo_small_backwards - movl src, tmp0 - xorl dest, tmp0 - andl $0xff, tmp0 - jz .Lbackwards_movs - - /* - * Calculate copy position to tail. - */ -.Ltoo_small_backwards: - addl n, src - addl n, dest - subl $0x10, n - - /* - * We gobble 16 bytes backward in each loop. - */ -.L16_byteswap_backwards_loop: - subl $0x10, n - - movl -1*4(src), tmp0 - movl -2*4(src), tmp1 - movl tmp0, -1*4(dest) - movl tmp1, -2*4(dest) - movl -3*4(src), tmp0 - movl -4*4(src), tmp1 - movl tmp0, -3*4(dest) - movl tmp1, -4*4(dest) - leal -0x10(src), src - leal -0x10(dest), dest - jae .L16_byteswap_backwards_loop - /* - * Calculate copy position to head. - */ - addl $0x10, n - subl n, src - subl n, dest - - /* - * Move data from 8 bytes to 15 bytes. - */ -.p2align 4 -.L16_byteswap: - cmpl $8, n - jb .L8_byteswap - movl 0*4(src), tmp0 - movl 1*4(src), tmp1 - movl -2*4(src, n), tmp2 - movl -1*4(src, n), src - - movl tmp0, 0*4(dest) - movl tmp1, 1*4(dest) - movl tmp2, -2*4(dest, n) - movl src, -1*4(dest, n) - jmp .Ldone - - /* - * Move data from 4 bytes to 7 bytes. - */ -.p2align 4 -.L8_byteswap: - cmpl $4, n - jb .L4_byteswap - movl 0*4(src), tmp0 - movl -1*4(src, n), tmp1 - movl tmp0, 0*4(dest) - movl tmp1, -1*4(dest, n) - jmp .Ldone - - /* - * Move data from 2 bytes to 3 bytes. - */ -.p2align 4 -.L4_byteswap: - cmpl $2, n - jb .Lbyteswap - movw 0*2(src), tmp0w - movw -1*2(src, n), tmp1w - movw tmp0w, 0*2(dest) - movw tmp1w, -1*2(dest, n) - jmp .Ldone - - /* - * Move data for 1 byte. - */ -.p2align 4 -.Lbyteswap: - cmpl $1, n - jb .Ldone - movb (src), tmp3b - movb tmp3b, (dest) -.p2align 4 -.Ldone: - popl tmp2 - popl tmp1 - popl tmp0 - popl %eax - popl %ebp - RET -SYM_FUNC_END(memmove) -EXPORT_SYMBOL(memmove)