^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * arch/openrisc/lib/memcpy.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Optimized memory copy routines for openrisc. These are mostly copied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * from ohter sources but slightly entended based on ideas discuassed in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * #openrisc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * The word unroll implementation is an extension to the arm byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * unrolled implementation, but using word copies (if things are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * properly aligned)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * The great arm loop unroll algorithm can be found at:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * arch/arm/boot/compressed/string.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #ifdef CONFIG_OR1K_1200
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * Do memcpy with word copies and loop unrolling. This gives the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * best performance on the OR1200 and MOR1KX archirectures
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) void *memcpy(void *dest, __const void *src, __kernel_size_t n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) unsigned char *d, *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) /* If both source and dest are word aligned copy words */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) /* Copy 32 bytes per loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) for (i = n >> 5; i > 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) if (n & 1 << 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) if (n & 1 << 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) if (n & 1 << 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) d = (unsigned char *)dest_w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) s = (unsigned char *)src_w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) d = (unsigned char *)dest_w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) s = (unsigned char *)src_w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) for (i = n >> 3; i > 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) if (n & 1 << 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) if (n & 1 << 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) if (n & 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) return dest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * Use word copies but no loop unrolling as we cannot assume there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * will be benefits on the archirecture
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) void *memcpy(void *dest, __const void *src, __kernel_size_t n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) /* If both source and dest are word aligned copy words */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) for (; n >= 4; n -= 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) *dest_w++ = *src_w++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) d = (unsigned char *)dest_w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) s = (unsigned char *)src_w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) /* For remaining or if not aligned, copy bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) for (; n >= 1; n -= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) *d++ = *s++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) return dest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) EXPORT_SYMBOL(memcpy);