^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * linux/arch/alpha/lib/memcpy.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 1995 Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * This is a reasonably optimized memcpy() routine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * Note that the C code is written to be optimized into good assembly. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * This should be done in one go with ldq_u*2/mask/stq_u. Do it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * with a macro so that we can fix it up later..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #define ALIGN_DEST_TO8_UP(d,s,n) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) while (d & 7) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) if (n <= 0) return; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) n--; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) *(char *) d = *(char *) s; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) d++; s++; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #define ALIGN_DEST_TO8_DN(d,s,n) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) while (d & 7) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) if (n <= 0) return; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) n--; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) d--; s--; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) *(char *) d = *(char *) s; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * This should similarly be done with ldq_u*2/mask/stq. The destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * is aligned, but we don't fill in a full quad-word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #define DO_REST_UP(d,s,n) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) while (n > 0) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) n--; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) *(char *) d = *(char *) s; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) d++; s++; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #define DO_REST_DN(d,s,n) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) while (n > 0) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) n--; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) d--; s--; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) *(char *) d = *(char *) s; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * This should be done with ldq/mask/stq. The source and destination are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * aligned, but we don't fill in a full quad-word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * This does unaligned memory copies. We want to avoid storing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * an unaligned address, as that would do a read-modify-write cycle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * We also want to avoid double-reading the unaligned reads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * Note the ordering to try to avoid load (and address generation) latencies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) long n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) ALIGN_DEST_TO8_UP(d,s,n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) n -= 8; /* to avoid compare against 8 in the loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) if (n >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) unsigned long low_word, high_word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) unsigned long tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) n -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) __asm__("extql %1,%2,%0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) :"=r" (low_word)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) :"r" (low_word), "r" (s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) __asm__("extqh %1,%2,%0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) :"=r" (tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) :"r" (high_word), "r" (s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) s += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) *(unsigned long *) d = low_word | tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) d += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) low_word = high_word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) } while (n >= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) n += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) DO_REST_UP(d,s,n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) long n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) /* I don't understand AXP assembler well enough for this. -Tim */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) s += n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) d += n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) while (n--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * (char *) --d = * (char *) --s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * for the load-store. I don't know why, but it would seem that using a floating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * point register for the move seems to slow things down (very small difference,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * though).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * Note the ordering to try to avoid load (and address generation) latencies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) long n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) ALIGN_DEST_TO8_UP(d,s,n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) n -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) while (n >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) unsigned long tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) n -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) s += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) *(unsigned long *) d = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) d += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) n += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) DO_REST_ALIGNED_UP(d,s,n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) long n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) s += n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) d += n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) ALIGN_DEST_TO8_DN(d,s,n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) n -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) while (n >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) unsigned long tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) s -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) n -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) d -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) *(unsigned long *) d = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) n += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) DO_REST_ALIGNED_DN(d,s,n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) void * memcpy(void * dest, const void *src, size_t n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) __memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) return dest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) return dest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) EXPORT_SYMBOL(memcpy);