^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Optimized version of the standard copy_page() function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Inputs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * in0: address of target page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * in1: address of source page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * no return value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Copyright (C) 1999, 2001 Hewlett-Packard Co
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * Stephane Eranian <eranian@hpl.hp.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * David Mosberger <davidm@hpl.hp.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * 4/06/01 davidm Tuned to make it perform well both for cached and uncached copies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <asm/asmmacro.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <asm/page.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define PIPE_DEPTH 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define EPI p[PIPE_DEPTH-1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define lcount r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #define saved_pr r17
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #define saved_lc r18
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #define saved_pfs r19
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define src1 r20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #define src2 r21
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #define tgt1 r22
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define tgt2 r23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #define srcf r24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #define tgtf r25
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #define tgt_last r26
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #define Nrot ((8*PIPE_DEPTH+7)&~7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) GLOBAL_ENTRY(copy_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) .prologue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) .save ar.pfs, saved_pfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) .rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) .rotp p[PIPE_DEPTH]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) .save ar.lc, saved_lc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) mov saved_lc=ar.lc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) mov ar.ec=PIPE_DEPTH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) mov lcount=PAGE_SIZE/64-1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) .save pr, saved_pr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) mov saved_pr=pr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) mov pr.rot=1<<16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) .body
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) mov src1=in1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) adds src2=8,in1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) mov tgt_last = PAGE_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) ;;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) adds tgt2=8,in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) add srcf=512,in1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) mov ar.lc=lcount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) mov tgt1=in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) add tgtf=512,in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) add tgt_last = tgt_last, in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) ;;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) (p[0]) ld8 t1[0]=[src1],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) (EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) (p[0]) ld8 t2[0]=[src2],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) (EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) cmp.ltu p6,p0 = tgtf, tgt_last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) ;;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) (p[0]) ld8 t3[0]=[src1],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) (EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) (p[0]) ld8 t4[0]=[src2],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) (EPI) st8 [tgt2]=t4[PIPE_DEPTH-1],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) ;;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) (p[0]) ld8 t5[0]=[src1],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) (EPI) st8 [tgt1]=t5[PIPE_DEPTH-1],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) (p[0]) ld8 t6[0]=[src2],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) (EPI) st8 [tgt2]=t6[PIPE_DEPTH-1],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) ;;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) (p[0]) ld8 t7[0]=[src1],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) (EPI) st8 [tgt1]=t7[PIPE_DEPTH-1],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) (p[0]) ld8 t8[0]=[src2],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) (EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) (p6) lfetch [srcf], 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) (p6) lfetch [tgtf], 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) br.ctop.sptk.few 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) ;;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) mov pr=saved_pr,0xffffffffffff0000 // restore predicates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) mov ar.pfs=saved_pfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) mov ar.lc=saved_lc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) br.ret.sptk.many rp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) END(copy_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) EXPORT_SYMBOL(copy_page)