Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * Author: Anton Blanchard <anton@au.ibm.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  * Copyright 2015 IBM Corporation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6) #include <asm/ppc_asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8) #include <asm/ppc-opcode.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) #define off8	r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) #define off16	r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) #define off24	r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) #define rA	r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) #define rB	r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) #define rC	r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) #define rD	r27
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) #define rE	r28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) #define rF	r29
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #define rG	r30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) #define rH	r31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #ifdef __LITTLE_ENDIAN__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #define LH	lhbrx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) #define LW	lwbrx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #define LD	ldbrx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) #define LVS	lvsr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) #define VPERM(_VRT,_VRA,_VRB,_VRC) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 	vperm _VRT,_VRB,_VRA,_VRC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) #define LH	lhzx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) #define LW	lwzx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) #define LD	ldx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) #define LVS	lvsl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) #define VPERM(_VRT,_VRA,_VRB,_VRC) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	vperm _VRT,_VRA,_VRB,_VRC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) #define VMX_THRESH 4096
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) #define ENTER_VMX_OPS	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	mflr    r0;	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 	std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	std     r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	std     r0,16(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	stdu    r1,-STACKFRAMESIZE(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	bl      enter_vmx_ops; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	cmpwi   cr1,r3,0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 	ld      r0,STACKFRAMESIZE+16(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 	ld      r3,STK_REG(R31)(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 	ld      r4,STK_REG(R30)(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 	ld      r5,STK_REG(R29)(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 	addi	r1,r1,STACKFRAMESIZE; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 	mtlr    r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) #define EXIT_VMX_OPS \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 	mflr    r0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 	std     r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 	std     r0,16(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	stdu    r1,-STACKFRAMESIZE(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	bl      exit_vmx_ops; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	ld      r0,STACKFRAMESIZE+16(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 	ld      r3,STK_REG(R31)(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	ld      r4,STK_REG(R30)(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 	ld      r5,STK_REG(R29)(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 	addi	r1,r1,STACKFRAMESIZE; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	mtlr    r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72)  * LD_VSR_CROSS16B load the 2nd 16 bytes for _vaddr which is unaligned with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73)  * 16 bytes boundary and permute the result with the 1st 16 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75)  *    |  y y y y y y y y y y y y y 0 1 2 | 3 4 5 6 7 8 9 a b c d e f z z z |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76)  *    ^                                  ^                                 ^
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77)  * 0xbbbb10                          0xbbbb20                          0xbbb30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78)  *                                 ^
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79)  *                                _vaddr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82)  * _vmask is the mask generated by LVS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83)  * _v1st_qw is the 1st aligned QW of current addr which is already loaded.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84)  *   for example: 0xyyyyyyyyyyyyy012 for big endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85)  * _v2nd_qw is the 2nd aligned QW of cur _vaddr to be loaded.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86)  *   for example: 0x3456789abcdefzzz for big endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87)  * The permute result is saved in _v_res.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88)  *   for example: 0x0123456789abcdef for big endian.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) #define LD_VSR_CROSS16B(_vaddr,_vmask,_v1st_qw,_v2nd_qw,_v_res) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91)         lvx     _v2nd_qw,_vaddr,off16; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92)         VPERM(_v_res,_v1st_qw,_v2nd_qw,_vmask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95)  * There are 2 categories for memcmp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96)  * 1) src/dst has the same offset to the 8 bytes boundary. The handlers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97)  * are named like .Lsameoffset_xxxx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98)  * 2) src/dst has different offset to the 8 bytes boundary. The handlers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99)  * are named like .Ldiffoffset_xxxx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) _GLOBAL_TOC(memcmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	cmpdi	cr1,r5,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	/* Use the short loop if the src/dst addresses are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	 * with the same offset of 8 bytes align boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	xor	r6,r3,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	andi.	r6,r6,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 	/* Fall back to short loop if compare at aligned addrs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 	 * with less than 8 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	cmpdi   cr6,r5,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 	beq	cr1,.Lzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 	bgt	cr6,.Lno_short
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) .Lshort:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	mtctr	r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 1:	lbz	rA,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	lbz	rB,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	subf.	rC,rB,rA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	bne	.Lnon_zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	bdz	.Lzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	lbz	rA,1(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	lbz	rB,1(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	subf.	rC,rB,rA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	bne	.Lnon_zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	bdz	.Lzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	lbz	rA,2(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	lbz	rB,2(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	subf.	rC,rB,rA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 	bne	.Lnon_zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 	bdz	.Lzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	lbz	rA,3(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	lbz	rB,3(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 	subf.	rC,rB,rA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 	bne	.Lnon_zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	addi	r3,r3,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 	addi	r4,r4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	bdnz	1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) .Lzero:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	li	r3,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) .Lno_short:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	dcbt	0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 	dcbt	0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 	bne	.Ldiffoffset_8bytes_make_align_start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) .Lsameoffset_8bytes_make_align_start:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	/* attempt to compare bytes not aligned with 8 bytes so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	 * rest comparison can run based on 8 bytes alignment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	andi.   r6,r3,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 	/* Try to compare the first double word which is not 8 bytes aligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 	 * load the first double word at (src & ~7UL) and shift left appropriate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	 * bits before comparision.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 	rlwinm  r6,r3,3,26,28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 	beq     .Lsameoffset_8bytes_aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 	clrrdi	r3,r3,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	clrrdi	r4,r4,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 	LD	rA,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 	LD	rB,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 	sld	rA,rA,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	sld	rB,rB,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	cmpld	cr0,rA,rB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	srwi	r6,r6,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	bne	cr0,.LcmpAB_lightweight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 	subfic  r6,r6,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 	subf.	r5,r6,r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 	addi	r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 	addi	r4,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 	beq	.Lzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) .Lsameoffset_8bytes_aligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 	/* now we are aligned with 8 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 	 * Use .Llong loop if left cmp bytes are equal or greater than 32B.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 	cmpdi   cr6,r5,31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	bgt	cr6,.Llong
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) .Lcmp_lt32bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 	/* compare 1 ~ 31 bytes, at least r3 addr is 8 bytes aligned now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 	cmpdi   cr5,r5,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	srdi    r0,r5,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 	ble	cr5,.Lcmp_rest_lt8bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 	/* handle 8 ~ 31 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 	clrldi  r5,r5,61
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	mtctr   r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 	LD	rA,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	LD	rB,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 	cmpld	cr0,rA,rB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 	addi	r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 	addi	r4,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 	bne	cr0,.LcmpAB_lightweight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 	bdnz	2b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 	cmpwi   r5,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 	beq	.Lzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) .Lcmp_rest_lt8bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 	 * Here we have less than 8 bytes to compare. At least s1 is aligned to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 	 * 8 bytes, but s2 may not be. We must make sure s2 + 7 doesn't cross a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 	 * page boundary, otherwise we might read past the end of the buffer and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 	 * trigger a page fault. We use 4K as the conservative minimum page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 	 * size. If we detect that case we go to the byte-by-byte loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	 * Otherwise the next double word is loaded from s1 and s2, and shifted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 	 * right to compare the appropriate bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	clrldi	r6,r4,(64-12)	// r6 = r4 & 0xfff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 	cmpdi	r6,0xff8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 	bgt	.Lshort
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 	subfic  r6,r5,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 	slwi	r6,r6,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	LD	rA,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 	LD	rB,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 	srd	rA,rA,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 	srd	rB,rB,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	cmpld	cr0,rA,rB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 	bne	cr0,.LcmpAB_lightweight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 	b	.Lzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) .Lnon_zero:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 	mr	r3,rC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) .Llong:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) #ifdef CONFIG_ALTIVEC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) BEGIN_FTR_SECTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	/* Try to use vmx loop if length is equal or greater than 4K */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 	cmpldi  cr6,r5,VMX_THRESH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	bge	cr6,.Lsameoffset_vmx_cmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) .Llong_novmx_cmp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	/* At least s1 addr is aligned with 8 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 	li	off8,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 	li	off16,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 	li	off24,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 	std	r31,-8(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 	std	r30,-16(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 	std	r29,-24(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 	std	r28,-32(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 	std	r27,-40(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 	srdi	r0,r5,5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 	mtctr	r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 	andi.	r5,r5,31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	LD	rA,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 	LD	rB,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 	LD	rC,off8,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 	LD	rD,off8,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 	LD	rE,off16,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 	LD	rF,off16,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	LD	rG,off24,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 	LD	rH,off24,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 	cmpld	cr0,rA,rB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 	addi	r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 	addi	r4,r4,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 	bdz	.Lfirst32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 	LD	rA,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 	LD	rB,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 	cmpld	cr1,rC,rD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 	LD	rC,off8,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	LD	rD,off8,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 	cmpld	cr6,rE,rF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	LD	rE,off16,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 	LD	rF,off16,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	cmpld	cr7,rG,rH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 	bne	cr0,.LcmpAB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	LD	rG,off24,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 	LD	rH,off24,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 	cmpld	cr0,rA,rB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 	bne	cr1,.LcmpCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 	addi	r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 	addi	r4,r4,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 	bdz	.Lsecond32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 	.balign	16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 1:	LD	rA,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 	LD	rB,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 	cmpld	cr1,rC,rD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 	bne	cr6,.LcmpEF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 	LD	rC,off8,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 	LD	rD,off8,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 	cmpld	cr6,rE,rF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 	bne	cr7,.LcmpGH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 	LD	rE,off16,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 	LD	rF,off16,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 	cmpld	cr7,rG,rH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 	bne	cr0,.LcmpAB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 	LD	rG,off24,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 	LD	rH,off24,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 	cmpld	cr0,rA,rB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 	bne	cr1,.LcmpCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 	addi	r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 	addi	r4,r4,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 	bdnz	1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) .Lsecond32:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 	cmpld	cr1,rC,rD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 	bne	cr6,.LcmpEF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 	cmpld	cr6,rE,rF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 	bne	cr7,.LcmpGH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 	cmpld	cr7,rG,rH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 	bne	cr0,.LcmpAB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 	bne	cr1,.LcmpCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 	bne	cr6,.LcmpEF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 	bne	cr7,.LcmpGH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) .Ltail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 	ld	r31,-8(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 	ld	r30,-16(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 	ld	r29,-24(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 	ld	r28,-32(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 	ld	r27,-40(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 	cmpdi	r5,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 	beq	.Lzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 	b	.Lshort
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) .Lfirst32:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 	cmpld	cr1,rC,rD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 	cmpld	cr6,rE,rF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 	cmpld	cr7,rG,rH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 	bne	cr0,.LcmpAB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 	bne	cr1,.LcmpCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 	bne	cr6,.LcmpEF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 	bne	cr7,.LcmpGH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 	b	.Ltail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) .LcmpAB:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 	li	r3,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 	bgt	cr0,.Lout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 	li	r3,-1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 	b	.Lout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) .LcmpCD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 	li	r3,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 	bgt	cr1,.Lout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 	li	r3,-1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 	b	.Lout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) .LcmpEF:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 	li	r3,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 	bgt	cr6,.Lout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 	li	r3,-1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 	b	.Lout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) .LcmpGH:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 	li	r3,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 	bgt	cr7,.Lout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 	li	r3,-1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) .Lout:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 	ld	r31,-8(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 	ld	r30,-16(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 	ld	r29,-24(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 	ld	r28,-32(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 	ld	r27,-40(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 	blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) .LcmpAB_lightweight:   /* skip NV GPRS restore */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 	li	r3,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 	bgtlr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 	li	r3,-1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 	blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) #ifdef CONFIG_ALTIVEC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) .Lsameoffset_vmx_cmp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 	/* Enter with src/dst addrs has the same offset with 8 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 	 * align boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 	 * There is an optimization based on following fact: memcmp()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 	 * prones to fail early at the first 32 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 	 * Before applying VMX instructions which will lead to 32x128bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 	 * VMX regs load/restore penalty, we compare the first 32 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 	 * so that we can catch the ~80% fail cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 	li	r0,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 	mtctr	r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) .Lsameoffset_prechk_32B_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 	LD	rA,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 	LD	rB,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 	cmpld	cr0,rA,rB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 	addi	r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 	addi	r4,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 	bne     cr0,.LcmpAB_lightweight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 	addi	r5,r5,-8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 	bdnz	.Lsameoffset_prechk_32B_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 	ENTER_VMX_OPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 	beq     cr1,.Llong_novmx_cmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 	/* need to check whether r4 has the same offset with r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 	 * for 16 bytes boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 	xor	r0,r3,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 	andi.	r0,r0,0xf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 	bne	.Ldiffoffset_vmx_cmp_start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 	/* len is no less than 4KB. Need to align with 16 bytes further.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 	andi.	rA,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 	LD	rA,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 	beq	4f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 	LD	rB,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 	cmpld	cr0,rA,rB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 	addi	r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) 	addi	r4,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 	addi	r5,r5,-8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 	beq	cr0,4f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 	/* save and restore cr0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 	mfocrf  r5,128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 	EXIT_VMX_OPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 	mtocrf  128,r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 	b	.LcmpAB_lightweight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) 	/* compare 32 bytes for each loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 	srdi	r0,r5,5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) 	mtctr	r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) 	clrldi  r5,r5,59
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) 	li	off16,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) .balign 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) 	lvx 	v0,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) 	lvx 	v1,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) 	VCMPEQUD_RC(v0,v0,v1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) 	bnl	cr6,7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 	lvx 	v0,off16,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 	lvx 	v1,off16,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 	VCMPEQUD_RC(v0,v0,v1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) 	bnl	cr6,6f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 	addi	r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) 	addi	r4,r4,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) 	bdnz	5b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) 	EXIT_VMX_OPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) 	cmpdi	r5,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) 	beq	.Lzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) 	b	.Lcmp_lt32bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) 6:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) 	addi	r3,r3,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) 	addi	r4,r4,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 7:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) 	/* diff the last 16 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) 	EXIT_VMX_OPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 	LD	rA,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) 	LD	rB,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) 	cmpld	cr0,rA,rB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) 	li	off8,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 	bne	cr0,.LcmpAB_lightweight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 	LD	rA,off8,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 	LD	rB,off8,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) 	cmpld	cr0,rA,rB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 	bne	cr0,.LcmpAB_lightweight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) 	b	.Lzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) .Ldiffoffset_8bytes_make_align_start:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) 	/* now try to align s1 with 8 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) 	rlwinm  r6,r3,3,26,28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) 	beq     .Ldiffoffset_align_s1_8bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) 	clrrdi	r3,r3,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) 	LD	rA,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) 	LD	rB,0,r4  /* unaligned load */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) 	sld	rA,rA,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) 	srd	rA,rA,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) 	srd	rB,rB,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) 	cmpld	cr0,rA,rB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) 	srwi	r6,r6,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) 	bne	cr0,.LcmpAB_lightweight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) 	subfic  r6,r6,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) 	subf.	r5,r6,r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) 	addi	r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) 	add	r4,r4,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) 	beq	.Lzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) .Ldiffoffset_align_s1_8bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) 	/* now s1 is aligned with 8 bytes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) #ifdef CONFIG_ALTIVEC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) BEGIN_FTR_SECTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) 	/* only do vmx ops when the size equal or greater than 4K bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) 	cmpdi	cr5,r5,VMX_THRESH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) 	bge	cr5,.Ldiffoffset_vmx_cmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) .Ldiffoffset_novmx_cmp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) 	cmpdi   cr5,r5,31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) 	ble	cr5,.Lcmp_lt32bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) #ifdef CONFIG_ALTIVEC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) 	b	.Llong_novmx_cmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) 	b	.Llong
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) #ifdef CONFIG_ALTIVEC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) .Ldiffoffset_vmx_cmp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) 	/* perform a 32 bytes pre-checking before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) 	 * enable VMX operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) 	li	r0,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) 	mtctr	r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) .Ldiffoffset_prechk_32B_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) 	LD	rA,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) 	LD	rB,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) 	cmpld	cr0,rA,rB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) 	addi	r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) 	addi	r4,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) 	bne     cr0,.LcmpAB_lightweight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) 	addi	r5,r5,-8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) 	bdnz	.Ldiffoffset_prechk_32B_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) 	ENTER_VMX_OPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) 	beq     cr1,.Ldiffoffset_novmx_cmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) .Ldiffoffset_vmx_cmp_start:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) 	/* Firstly try to align r3 with 16 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) 	andi.   r6,r3,0xf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) 	li	off16,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) 	beq     .Ldiffoffset_vmx_s1_16bytes_align
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) 	LVS	v3,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) 	LVS	v4,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) 	lvx     v5,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) 	lvx     v6,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) 	LD_VSR_CROSS16B(r3,v3,v5,v7,v9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) 	LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) 	VCMPEQUB_RC(v7,v9,v10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) 	bnl	cr6,.Ldiffoffset_vmx_diff_found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) 	subfic  r6,r6,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) 	subf    r5,r6,r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) 	add     r3,r3,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) 	add     r4,r4,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) .Ldiffoffset_vmx_s1_16bytes_align:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) 	/* now s1 is aligned with 16 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) 	lvx     v6,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) 	LVS	v4,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) 	srdi	r6,r5,5  /* loop for 32 bytes each */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) 	clrldi  r5,r5,59
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) 	mtctr	r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) .balign	16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) .Ldiffoffset_vmx_32bytesloop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) 	/* the first qw of r4 was saved in v6 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) 	lvx	v9,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) 	LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) 	VCMPEQUB_RC(v7,v9,v10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) 	vor	v6,v8,v8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) 	bnl	cr6,.Ldiffoffset_vmx_diff_found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) 	addi	r3,r3,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) 	addi	r4,r4,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) 	lvx	v9,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) 	LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) 	VCMPEQUB_RC(v7,v9,v10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) 	vor	v6,v8,v8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) 	bnl	cr6,.Ldiffoffset_vmx_diff_found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) 	addi	r3,r3,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) 	addi	r4,r4,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) 	bdnz	.Ldiffoffset_vmx_32bytesloop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) 	EXIT_VMX_OPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) 	cmpdi	r5,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) 	beq	.Lzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) 	b	.Lcmp_lt32bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) .Ldiffoffset_vmx_diff_found:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) 	EXIT_VMX_OPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) 	/* anyway, the diff will appear in next 16 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) 	li	r5,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) 	b	.Lcmp_lt32bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) EXPORT_SYMBOL(memcmp)