Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  *	MMX 3DNow! library helper functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *	To do:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  *	We can use MMX just for prefetch in IRQ's. This may be a win.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  *		(reported so on K6-III)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  *	We should use a better code neutral filler for the short jump
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  *		leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  *	We also want to clobber the filler register so we don't get any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  *		register forwarding stalls on the filler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  *	Add *user handling. Checksums are not a win with MMX on any CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  *	tested so far for any MMX solution figured.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16)  *	22/09/2000 - Arjan van de Ven
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17)  *		Improved for non-egineering-sample Athlons
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #include <linux/hardirq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #include <asm/fpu/api.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) #include <asm/asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30)  * Use KFPU_387.  MMX instructions are not affected by MXCSR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31)  * but both AMD and Intel documentation states that even integer MMX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32)  * operations will result in #MF if an exception is pending in FCW.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34)  * EMMS is not needed afterwards because, after calling kernel_fpu_end(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35)  * any subsequent user of the 387 stack will reinitialize it using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36)  * KFPU_387.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) void *_mmx_memcpy(void *to, const void *from, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	if (unlikely(in_interrupt()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 		return __memcpy(to, from, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	p = to;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	i = len >> 6; /* len/64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 	kernel_fpu_begin_mask(KFPU_387);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 	__asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 		"1: prefetch (%0)\n"		/* This set is 28 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 		"   prefetch 64(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 		"   prefetch 128(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 		"   prefetch 192(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 		"   prefetch 256(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 		"2:  \n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 		".section .fixup, \"ax\"\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 		"   jmp 2b\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 		".previous\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 			_ASM_EXTABLE(1b, 3b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 			: : "r" (from));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	for ( ; i > 5; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 		__asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 		"1:  prefetch 320(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 		"2:  movq (%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 		"  movq 8(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 		"  movq 16(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 		"  movq 24(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 		"  movq %%mm0, (%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 		"  movq %%mm1, 8(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 		"  movq %%mm2, 16(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 		"  movq %%mm3, 24(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 		"  movq 32(%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 		"  movq 40(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 		"  movq 48(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 		"  movq 56(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 		"  movq %%mm0, 32(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 		"  movq %%mm1, 40(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 		"  movq %%mm2, 48(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 		"  movq %%mm3, 56(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 		".section .fixup, \"ax\"\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 		"   jmp 2b\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 		".previous\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 			_ASM_EXTABLE(1b, 3b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 			: : "r" (from), "r" (to) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 		from += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 		to += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	for ( ; i > 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 		__asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 		"  movq (%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 		"  movq 8(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 		"  movq 16(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 		"  movq 24(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 		"  movq %%mm0, (%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 		"  movq %%mm1, 8(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 		"  movq %%mm2, 16(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 		"  movq %%mm3, 24(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 		"  movq 32(%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 		"  movq 40(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 		"  movq 48(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 		"  movq 56(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 		"  movq %%mm0, 32(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 		"  movq %%mm1, 40(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 		"  movq %%mm2, 48(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 		"  movq %%mm3, 56(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 			: : "r" (from), "r" (to) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 		from += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 		to += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	 * Now do the tail of the block:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	__memcpy(to, from, len & 63);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	return p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) EXPORT_SYMBOL(_mmx_memcpy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) #ifdef CONFIG_MK7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)  *	The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)  *	other MMX using processors do not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) static void fast_clear_page(void *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 	kernel_fpu_begin_mask(KFPU_387);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 	__asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 		"  pxor %%mm0, %%mm0\n" : :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 	);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	for (i = 0; i < 4096/64; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 		__asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 		"  movntq %%mm0, (%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 		"  movntq %%mm0, 8(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 		"  movntq %%mm0, 16(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 		"  movntq %%mm0, 24(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 		"  movntq %%mm0, 32(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 		"  movntq %%mm0, 40(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 		"  movntq %%mm0, 48(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 		"  movntq %%mm0, 56(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 		: : "r" (page) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 		page += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	 * Since movntq is weakly-ordered, a "sfence" is needed to become
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	 * ordered again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 	__asm__ __volatile__("sfence\n"::);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) static void fast_copy_page(void *to, void *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 	kernel_fpu_begin_mask(KFPU_387);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	 * maybe the prefetch stuff can go before the expensive fnsave...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	 * but that is for later. -AV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 	__asm__ __volatile__(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 		"1: prefetch (%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 		"   prefetch 64(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 		"   prefetch 128(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 		"   prefetch 192(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 		"   prefetch 256(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 		"2:  \n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 		".section .fixup, \"ax\"\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 		"   jmp 2b\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 		".previous\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 			_ASM_EXTABLE(1b, 3b) : : "r" (from));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 	for (i = 0; i < (4096-320)/64; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 		__asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 		"1: prefetch 320(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 		"2: movq (%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 		"   movntq %%mm0, (%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 		"   movq 8(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 		"   movntq %%mm1, 8(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 		"   movq 16(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 		"   movntq %%mm2, 16(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 		"   movq 24(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 		"   movntq %%mm3, 24(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 		"   movq 32(%0), %%mm4\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 		"   movntq %%mm4, 32(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 		"   movq 40(%0), %%mm5\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 		"   movntq %%mm5, 40(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 		"   movq 48(%0), %%mm6\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 		"   movntq %%mm6, 48(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 		"   movq 56(%0), %%mm7\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 		"   movntq %%mm7, 56(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 		".section .fixup, \"ax\"\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 		"   jmp 2b\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 		".previous\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 		_ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 		from += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 		to += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	for (i = (4096-320)/64; i < 4096/64; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 		__asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 		"2: movq (%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 		"   movntq %%mm0, (%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 		"   movq 8(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 		"   movntq %%mm1, 8(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 		"   movq 16(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 		"   movntq %%mm2, 16(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 		"   movq 24(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 		"   movntq %%mm3, 24(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 		"   movq 32(%0), %%mm4\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 		"   movntq %%mm4, 32(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 		"   movq 40(%0), %%mm5\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 		"   movntq %%mm5, 40(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 		"   movq 48(%0), %%mm6\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 		"   movntq %%mm6, 48(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 		"   movq 56(%0), %%mm7\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 		"   movntq %%mm7, 56(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 			: : "r" (from), "r" (to) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 		from += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 		to += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 	 * Since movntq is weakly-ordered, a "sfence" is needed to become
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	 * ordered again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	__asm__ __volatile__("sfence \n"::);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) #else /* CONFIG_MK7 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)  *	Generic MMX implementation without K7 specific streaming
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) static void fast_clear_page(void *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 	kernel_fpu_begin_mask(KFPU_387);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 	__asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 		"  pxor %%mm0, %%mm0\n" : :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 	);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 	for (i = 0; i < 4096/128; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 		__asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 		"  movq %%mm0, (%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 		"  movq %%mm0, 8(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 		"  movq %%mm0, 16(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 		"  movq %%mm0, 24(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 		"  movq %%mm0, 32(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 		"  movq %%mm0, 40(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 		"  movq %%mm0, 48(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 		"  movq %%mm0, 56(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 		"  movq %%mm0, 64(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 		"  movq %%mm0, 72(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 		"  movq %%mm0, 80(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 		"  movq %%mm0, 88(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 		"  movq %%mm0, 96(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 		"  movq %%mm0, 104(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 		"  movq %%mm0, 112(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 		"  movq %%mm0, 120(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 			: : "r" (page) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 		page += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) static void fast_copy_page(void *to, void *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	kernel_fpu_begin_mask(KFPU_387);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 	__asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 		"1: prefetch (%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 		"   prefetch 64(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 		"   prefetch 128(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 		"   prefetch 192(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 		"   prefetch 256(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 		"2:  \n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 		".section .fixup, \"ax\"\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 		"   jmp 2b\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 		".previous\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 			_ASM_EXTABLE(1b, 3b) : : "r" (from));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 	for (i = 0; i < 4096/64; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 		__asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 		"1: prefetch 320(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 		"2: movq (%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 		"   movq 8(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 		"   movq 16(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 		"   movq 24(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 		"   movq %%mm0, (%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 		"   movq %%mm1, 8(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 		"   movq %%mm2, 16(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 		"   movq %%mm3, 24(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 		"   movq 32(%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 		"   movq 40(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 		"   movq 48(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 		"   movq 56(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 		"   movq %%mm0, 32(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 		"   movq %%mm1, 40(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 		"   movq %%mm2, 48(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 		"   movq %%mm3, 56(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 		".section .fixup, \"ax\"\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 		"   jmp 2b\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 		".previous\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 			_ASM_EXTABLE(1b, 3b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 			: : "r" (from), "r" (to) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 		from += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 		to += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) #endif /* !CONFIG_MK7 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)  * Favour MMX for page clear and copy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) static void slow_zero_page(void *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 	int d0, d1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 	__asm__ __volatile__(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 		"cld\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 		"rep ; stosl"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 			: "=&c" (d0), "=&D" (d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 			:"a" (0), "1" (page), "0" (1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 			:"memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) void mmx_clear_page(void *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 	if (unlikely(in_interrupt()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 		slow_zero_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 		fast_clear_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) EXPORT_SYMBOL(mmx_clear_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) static void slow_copy_page(void *to, void *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 	int d0, d1, d2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 	__asm__ __volatile__(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 		"cld\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 		"rep ; movsl"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 		: "=&c" (d0), "=&D" (d1), "=&S" (d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 		: "0" (1024), "1" ((long) to), "2" ((long) from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 		: "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) void mmx_copy_page(void *to, void *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 	if (unlikely(in_interrupt()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 		slow_copy_page(to, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 		fast_copy_page(to, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) EXPORT_SYMBOL(mmx_copy_page);