^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #undef memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #undef memset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) __visible void *memcpy(void *to, const void *from, size_t n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) return __memcpy3d(to, from, n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) return __memcpy(to, from, n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) EXPORT_SYMBOL(memcpy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) __visible void *memset(void *s, int c, size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) return __memset(s, c, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) EXPORT_SYMBOL(memset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) __visible void *memmove(void *dest, const void *src, size_t n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) int d0,d1,d2,d3,d4,d5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) char *ret = dest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) __asm__ __volatile__(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) /* Handle more 16 bytes in loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) "cmp $0x10, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) "jb 1f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) /* Decide forward/backward copy mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) "cmp %2, %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) "jb 2f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * movs instruction have many startup latency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * so we handle small size by general register.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) "cmp $680, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) "jb 3f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * movs instruction is only good for aligned case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) "mov %1, %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) "xor %2, %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) "and $0xff, %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) "jz 4f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) "3:\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) "sub $0x10, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * We gobble 16 bytes forward in each loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) "3:\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) "sub $0x10, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) "mov 0*4(%1), %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) "mov 1*4(%1), %4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) "mov %3, 0*4(%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) "mov %4, 1*4(%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) "mov 2*4(%1), %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) "mov 3*4(%1), %4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) "mov %3, 2*4(%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) "mov %4, 3*4(%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) "lea 0x10(%1), %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) "lea 0x10(%2), %2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) "jae 3b\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) "add $0x10, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) "jmp 1f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * Handle data forward by movs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) ".p2align 4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) "4:\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) "mov -4(%1, %0), %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) "lea -4(%2, %0), %4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) "shr $2, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) "rep movsl\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) "mov %3, (%4)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) "jmp 11f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * Handle data backward by movs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) ".p2align 4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) "6:\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) "mov (%1), %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) "mov %2, %4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) "lea -4(%1, %0), %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) "lea -4(%2, %0), %2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) "shr $2, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) "std\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) "rep movsl\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) "mov %3,(%4)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) "cld\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) "jmp 11f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * Start to prepare for backward copy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) ".p2align 4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) "2:\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) "cmp $680, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) "jb 5f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) "mov %1, %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) "xor %2, %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) "and $0xff, %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) "jz 6b\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * Calculate copy position to tail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) "5:\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) "add %0, %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) "add %0, %2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) "sub $0x10, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * We gobble 16 bytes backward in each loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) "7:\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) "sub $0x10, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) "mov -1*4(%1), %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) "mov -2*4(%1), %4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) "mov %3, -1*4(%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) "mov %4, -2*4(%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) "mov -3*4(%1), %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) "mov -4*4(%1), %4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) "mov %3, -3*4(%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) "mov %4, -4*4(%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) "lea -0x10(%1), %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) "lea -0x10(%2), %2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) "jae 7b\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * Calculate copy position to head.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) "add $0x10, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) "sub %0, %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) "sub %0, %2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * Move data from 8 bytes to 15 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) ".p2align 4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) "1:\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) "cmp $8, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) "jb 8f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) "mov 0*4(%1), %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) "mov 1*4(%1), %4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) "mov -2*4(%1, %0), %5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) "mov -1*4(%1, %0), %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) "mov %3, 0*4(%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) "mov %4, 1*4(%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) "mov %5, -2*4(%2, %0)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) "mov %1, -1*4(%2, %0)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) "jmp 11f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * Move data from 4 bytes to 7 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) ".p2align 4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) "8:\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) "cmp $4, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) "jb 9f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) "mov 0*4(%1), %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) "mov -1*4(%1, %0), %4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) "mov %3, 0*4(%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) "mov %4, -1*4(%2, %0)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) "jmp 11f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * Move data from 2 bytes to 3 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) ".p2align 4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) "9:\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) "cmp $2, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) "jb 10f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) "movw 0*2(%1), %%dx\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) "movw -1*2(%1, %0), %%bx\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) "movw %%dx, 0*2(%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) "movw %%bx, -1*2(%2, %0)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) "jmp 11f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * Move data for 1 byte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) ".p2align 4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) "10:\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) "cmp $1, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) "jb 11f\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) "movb (%1), %%cl\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) "movb %%cl, (%2)\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) ".p2align 4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) "11:"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) : "=&c" (d0), "=&S" (d1), "=&D" (d2),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) "=r" (d3),"=r" (d4), "=r"(d5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) :"0" (n),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) "1" (src),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) "2" (dest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) :"memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) EXPORT_SYMBOL(memmove);