Orange Pi5 kernel

^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  *  linux/arch/alpha/lib/memcpy.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *  Copyright (C) 1995  Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  * This is a reasonably optimized memcpy() routine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  * Note that the C code is written to be optimized into good assembly. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16)  * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23)  * This should be done in one go with ldq_u*2/mask/stq_u. Do it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24)  * with a macro so that we can fix it up later..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #define ALIGN_DEST_TO8_UP(d,s,n) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 	while (d & 7) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 		if (n <= 0) return; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 		n--; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 		*(char *) d = *(char *) s; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 		d++; s++; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) #define ALIGN_DEST_TO8_DN(d,s,n) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 	while (d & 7) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 		if (n <= 0) return; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 		n--; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 		d--; s--; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 		*(char *) d = *(char *) s; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42)  * This should similarly be done with ldq_u*2/mask/stq. The destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43)  * is aligned, but we don't fill in a full quad-word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) #define DO_REST_UP(d,s,n) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	while (n > 0) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 		n--; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 		*(char *) d = *(char *) s; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 		d++; s++; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) #define DO_REST_DN(d,s,n) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 	while (n > 0) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 		n--; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 		d--; s--; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 		*(char *) d = *(char *) s; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59)  * This should be done with ldq/mask/stq. The source and destination are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60)  * aligned, but we don't fill in a full quad-word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66)  * This does unaligned memory copies. We want to avoid storing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67)  * an unaligned address, as that would do a read-modify-write cycle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68)  * We also want to avoid double-reading the unaligned reads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70)  * Note the ordering to try to avoid load (and address generation) latencies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 					  long n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 	ALIGN_DEST_TO8_UP(d,s,n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	n -= 8;			/* to avoid compare against 8 in the loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	if (n >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 		unsigned long low_word, high_word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 		__asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 			unsigned long tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 			__asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 			n -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 			__asm__("extql %1,%2,%0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 				:"=r" (low_word)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 				:"r" (low_word), "r" (s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 			__asm__("extqh %1,%2,%0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 				:"=r" (tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 				:"r" (high_word), "r" (s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 			s += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 			*(unsigned long *) d = low_word | tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 			d += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 			low_word = high_word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 		} while (n >= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	n += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	DO_REST_UP(d,s,n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 					  long n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	/* I don't understand AXP assembler well enough for this. -Tim */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	s += n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	d += n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	while (n--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 		* (char *) --d = * (char *) --s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)  * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)  * for the load-store. I don't know why, but it would seem that using a floating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)  * point register for the move seems to slow things down (very small difference,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)  * though).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)  * Note the ordering to try to avoid load (and address generation) latencies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 					long n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	ALIGN_DEST_TO8_UP(d,s,n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	n -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	while (n >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 		unsigned long tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 		__asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 		n -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 		s += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 		*(unsigned long *) d = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 		d += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	n += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	DO_REST_ALIGNED_UP(d,s,n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 					long n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	s += n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	d += n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	ALIGN_DEST_TO8_DN(d,s,n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 	n -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 	while (n >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 		unsigned long tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 		s -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 		__asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 		n -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 		d -= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 		*(unsigned long *) d = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	n += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	DO_REST_ALIGNED_DN(d,s,n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) void * memcpy(void * dest, const void *src, size_t n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 	if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 		__memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 				     n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 		return dest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	__memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	return dest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) EXPORT_SYMBOL(memcpy);