^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * MMX 3DNow! library helper functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * To do:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * We can use MMX just for prefetch in IRQ's. This may be a win.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * (reported so on K6-III)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * We should use a better code neutral filler for the short jump
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * We also want to clobber the filler register so we don't get any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * register forwarding stalls on the filler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * Add *user handling. Checksums are not a win with MMX on any CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * tested so far for any MMX solution figured.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * 22/09/2000 - Arjan van de Ven
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * Improved for non-egineering-sample Athlons
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/hardirq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <asm/fpu/api.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <asm/asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * Use KFPU_387. MMX instructions are not affected by MXCSR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * but both AMD and Intel documentation states that even integer MMX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * operations will result in #MF if an exception is pending in FCW.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * EMMS is not needed afterwards because, after calling kernel_fpu_end(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * any subsequent user of the 387 stack will reinitialize it using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * KFPU_387.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) void *_mmx_memcpy(void *to, const void *from, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) if (unlikely(in_interrupt()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) return __memcpy(to, from, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) p = to;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) i = len >> 6; /* len/64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) kernel_fpu_begin_mask(KFPU_387);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) __asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) "1: prefetch (%0)\n" /* This set is 28 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) " prefetch 64(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) " prefetch 128(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) " prefetch 192(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) " prefetch 256(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) "2: \n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) ".section .fixup, \"ax\"\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) " jmp 2b\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) ".previous\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) _ASM_EXTABLE(1b, 3b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) : : "r" (from));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) for ( ; i > 5; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) __asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) "1: prefetch 320(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) "2: movq (%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) " movq 8(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) " movq 16(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) " movq 24(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) " movq %%mm0, (%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) " movq %%mm1, 8(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) " movq %%mm2, 16(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) " movq %%mm3, 24(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) " movq 32(%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) " movq 40(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) " movq 48(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) " movq 56(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) " movq %%mm0, 32(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) " movq %%mm1, 40(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) " movq %%mm2, 48(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) " movq %%mm3, 56(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) ".section .fixup, \"ax\"\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) " jmp 2b\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) ".previous\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) _ASM_EXTABLE(1b, 3b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) : : "r" (from), "r" (to) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) from += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) to += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) for ( ; i > 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) __asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) " movq (%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) " movq 8(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) " movq 16(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) " movq 24(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) " movq %%mm0, (%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) " movq %%mm1, 8(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) " movq %%mm2, 16(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) " movq %%mm3, 24(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) " movq 32(%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) " movq 40(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) " movq 48(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) " movq 56(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) " movq %%mm0, 32(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) " movq %%mm1, 40(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) " movq %%mm2, 48(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) " movq %%mm3, 56(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) : : "r" (from), "r" (to) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) from += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) to += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * Now do the tail of the block:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) __memcpy(to, from, len & 63);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) return p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) EXPORT_SYMBOL(_mmx_memcpy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) #ifdef CONFIG_MK7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * other MMX using processors do not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) static void fast_clear_page(void *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) kernel_fpu_begin_mask(KFPU_387);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) __asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) " pxor %%mm0, %%mm0\n" : :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) for (i = 0; i < 4096/64; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) __asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) " movntq %%mm0, (%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) " movntq %%mm0, 8(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) " movntq %%mm0, 16(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) " movntq %%mm0, 24(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) " movntq %%mm0, 32(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) " movntq %%mm0, 40(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) " movntq %%mm0, 48(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) " movntq %%mm0, 56(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) : : "r" (page) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) page += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * Since movntq is weakly-ordered, a "sfence" is needed to become
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * ordered again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) __asm__ __volatile__("sfence\n"::);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) static void fast_copy_page(void *to, void *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) kernel_fpu_begin_mask(KFPU_387);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * maybe the prefetch stuff can go before the expensive fnsave...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * but that is for later. -AV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) __asm__ __volatile__(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) "1: prefetch (%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) " prefetch 64(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) " prefetch 128(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) " prefetch 192(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) " prefetch 256(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) "2: \n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) ".section .fixup, \"ax\"\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) " jmp 2b\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) ".previous\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) _ASM_EXTABLE(1b, 3b) : : "r" (from));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) for (i = 0; i < (4096-320)/64; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) __asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) "1: prefetch 320(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) "2: movq (%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) " movntq %%mm0, (%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) " movq 8(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) " movntq %%mm1, 8(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) " movq 16(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) " movntq %%mm2, 16(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) " movq 24(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) " movntq %%mm3, 24(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) " movq 32(%0), %%mm4\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) " movntq %%mm4, 32(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) " movq 40(%0), %%mm5\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) " movntq %%mm5, 40(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) " movq 48(%0), %%mm6\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) " movntq %%mm6, 48(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) " movq 56(%0), %%mm7\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) " movntq %%mm7, 56(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) ".section .fixup, \"ax\"\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) " jmp 2b\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) ".previous\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) _ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) from += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) to += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) for (i = (4096-320)/64; i < 4096/64; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) __asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) "2: movq (%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) " movntq %%mm0, (%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) " movq 8(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) " movntq %%mm1, 8(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) " movq 16(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) " movntq %%mm2, 16(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) " movq 24(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) " movntq %%mm3, 24(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) " movq 32(%0), %%mm4\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) " movntq %%mm4, 32(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) " movq 40(%0), %%mm5\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) " movntq %%mm5, 40(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) " movq 48(%0), %%mm6\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) " movntq %%mm6, 48(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) " movq 56(%0), %%mm7\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) " movntq %%mm7, 56(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) : : "r" (from), "r" (to) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) from += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) to += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * Since movntq is weakly-ordered, a "sfence" is needed to become
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * ordered again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) __asm__ __volatile__("sfence \n"::);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) #else /* CONFIG_MK7 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * Generic MMX implementation without K7 specific streaming
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) static void fast_clear_page(void *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) kernel_fpu_begin_mask(KFPU_387);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) __asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) " pxor %%mm0, %%mm0\n" : :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) for (i = 0; i < 4096/128; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) __asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) " movq %%mm0, (%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) " movq %%mm0, 8(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) " movq %%mm0, 16(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) " movq %%mm0, 24(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) " movq %%mm0, 32(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) " movq %%mm0, 40(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) " movq %%mm0, 48(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) " movq %%mm0, 56(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) " movq %%mm0, 64(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) " movq %%mm0, 72(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) " movq %%mm0, 80(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) " movq %%mm0, 88(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) " movq %%mm0, 96(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) " movq %%mm0, 104(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) " movq %%mm0, 112(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) " movq %%mm0, 120(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) : : "r" (page) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) page += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) static void fast_copy_page(void *to, void *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) kernel_fpu_begin_mask(KFPU_387);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) __asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) "1: prefetch (%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) " prefetch 64(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) " prefetch 128(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) " prefetch 192(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) " prefetch 256(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) "2: \n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) ".section .fixup, \"ax\"\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) " jmp 2b\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) ".previous\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) _ASM_EXTABLE(1b, 3b) : : "r" (from));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) for (i = 0; i < 4096/64; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) __asm__ __volatile__ (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) "1: prefetch 320(%0)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) "2: movq (%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) " movq 8(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) " movq 16(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) " movq 24(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) " movq %%mm0, (%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) " movq %%mm1, 8(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) " movq %%mm2, 16(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) " movq %%mm3, 24(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) " movq 32(%0), %%mm0\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) " movq 40(%0), %%mm1\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) " movq 48(%0), %%mm2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) " movq 56(%0), %%mm3\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) " movq %%mm0, 32(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) " movq %%mm1, 40(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) " movq %%mm2, 48(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) " movq %%mm3, 56(%1)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) ".section .fixup, \"ax\"\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) " jmp 2b\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) ".previous\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) _ASM_EXTABLE(1b, 3b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) : : "r" (from), "r" (to) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) from += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) to += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) #endif /* !CONFIG_MK7 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * Favour MMX for page clear and copy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) static void slow_zero_page(void *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) int d0, d1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) __asm__ __volatile__(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) "cld\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) "rep ; stosl"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) : "=&c" (d0), "=&D" (d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) :"a" (0), "1" (page), "0" (1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) :"memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) void mmx_clear_page(void *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) if (unlikely(in_interrupt()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) slow_zero_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) fast_clear_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) EXPORT_SYMBOL(mmx_clear_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) static void slow_copy_page(void *to, void *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) int d0, d1, d2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) __asm__ __volatile__(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) "cld\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) "rep ; movsl"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) : "=&c" (d0), "=&D" (d1), "=&S" (d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) : "0" (1024), "1" ((long) to), "2" ((long) from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) void mmx_copy_page(void *to, void *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) if (unlikely(in_interrupt()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) slow_copy_page(to, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) fast_copy_page(to, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) EXPORT_SYMBOL(mmx_copy_page);