Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * This file contains assembly-language implementations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  * of IP-style 1's complement checksum routines.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *	
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) #include <linux/sys.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) #include <asm/processor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) #include <asm/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) #include <asm/ppc_asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18)  * Computes the checksum of a memory block at buff, length len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19)  * and adds in "sum" (32-bit).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21)  * __csum_partial(r3=buff, r4=len, r5=sum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) _GLOBAL(__csum_partial)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) 	addic	r0,r5,0			/* clear carry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 	srdi.	r6,r4,3			/* less than 8 bytes? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 	beq	.Lcsum_tail_word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 	 * If only halfword aligned, align to a double word. Since odd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 	 * aligned addresses should be rare and they would require more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 	 * work to calculate the correct checksum, we ignore that case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 	 * and take the potential slowdown of unaligned loads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	beq	.Lcsum_aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 	li	r7,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 	sub	r6,r7,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 	mtctr	r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	lhz	r6,0(r3)		/* align to doubleword */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	subi	r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	addi	r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	bdnz	1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) .Lcsum_aligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 	 * We unroll the loop such that each iteration is 64 bytes with an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 	 * entry and exit limb of 64 bytes, meaning a minimum size of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 	 * 128 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 	srdi.	r6,r4,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	beq	.Lcsum_tail_doublewords		/* len < 128 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	srdi	r6,r4,6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	subi	r6,r6,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 	mtctr	r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	stdu	r1,-STACKFRAMESIZE(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	std	r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	std	r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 	std	r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 	ld	r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 	ld	r9,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 	ld	r10,16(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	ld	r11,24(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 	 * On POWER6 and POWER7 back to back adde instructions take 2 cycles
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 	 * because of the XER dependency. This means the fastest this loop can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	 * go is 16 cycles per iteration. The scheduling of the loop below has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	 * been shown to hit this on both POWER6 and POWER7.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 	.align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	ld	r12,32(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 	ld	r14,40(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	adde	r0,r0,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	ld	r15,48(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	ld	r16,56(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	addi	r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	adde	r0,r0,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 	adde	r0,r0,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	adde	r0,r0,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	adde	r0,r0,r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	adde	r0,r0,r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 	ld	r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	ld	r9,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	adde	r0,r0,r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	ld	r10,16(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	ld	r11,24(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	bdnz	2b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	ld	r12,32(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 	ld	r14,40(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	adde	r0,r0,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	ld	r15,48(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	ld	r16,56(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 	addi	r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 	adde	r0,r0,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 	adde	r0,r0,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	adde	r0,r0,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	adde	r0,r0,r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	adde	r0,r0,r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	adde	r0,r0,r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	ld	r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	ld	r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	ld	r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	addi	r1,r1,STACKFRAMESIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	andi.	r4,r4,63
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) .Lcsum_tail_doublewords:		/* Up to 127 bytes to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	srdi.	r6,r4,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	beq	.Lcsum_tail_word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 	mtctr	r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	ld	r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	addi	r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 	bdnz	3b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 	andi.	r4,r4,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) .Lcsum_tail_word:			/* Up to 7 bytes to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	srdi.	r6,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	beq	.Lcsum_tail_halfword
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	lwz	r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	addi	r3,r3,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	subi	r4,r4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) .Lcsum_tail_halfword:			/* Up to 3 bytes to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 	srdi.	r6,r4,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 	beq	.Lcsum_tail_byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 	lhz	r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	addi	r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	subi	r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) .Lcsum_tail_byte:			/* Up to 1 byte to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 	andi.	r6,r4,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 	beq	.Lcsum_finish
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	lbz	r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) #ifdef __BIG_ENDIAN__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 	adde	r0,r0,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) .Lcsum_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	addze	r0,r0			/* add in final carry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	rldicl	r4,r0,32,0		/* fold two 32 bit halves together */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	add	r3,r4,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	srdi	r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 	blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) EXPORT_SYMBOL(__csum_partial)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 	.macro srcnr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 100:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 	EX_TABLE(100b,.Lerror_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 	.endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	.macro source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 150:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	EX_TABLE(150b,.Lerror)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 	.endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 	.macro dstnr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 200:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	EX_TABLE(200b,.Lerror_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 	.endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 	.macro dest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 250:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	EX_TABLE(250b,.Lerror)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 	.endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)  * Computes the checksum of a memory block at src, length len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)  * and adds in 0xffffffff (32-bit), while copying the block to dst.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)  * If an access exception occurs, it returns 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)  * csum_partial_copy_generic(r3=src, r4=dst, r5=len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) _GLOBAL(csum_partial_copy_generic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 	li	r6,-1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 	addic	r0,r6,0			/* clear carry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 	srdi.	r6,r5,3			/* less than 8 bytes? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 	beq	.Lcopy_tail_word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 	 * If only halfword aligned, align to a double word. Since odd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 	 * aligned addresses should be rare and they would require more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 	 * work to calculate the correct checksum, we ignore that case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	 * and take the potential slowdown of unaligned loads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 	 * If the source and destination are relatively unaligned we only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	 * align the source. This keeps things simple.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 	beq	.Lcopy_aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 	li	r9,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	sub	r6,r9,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 	mtctr	r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) srcnr;	lhz	r6,0(r3)		/* align to doubleword */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 	subi	r5,r5,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 	addi	r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) dstnr;	sth	r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 	addi	r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	bdnz	1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) .Lcopy_aligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 	 * We unroll the loop such that each iteration is 64 bytes with an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	 * entry and exit limb of 64 bytes, meaning a minimum size of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 	 * 128 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 	srdi.	r6,r5,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 	beq	.Lcopy_tail_doublewords		/* len < 128 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 	srdi	r6,r5,6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	subi	r6,r6,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 	mtctr	r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 	stdu	r1,-STACKFRAMESIZE(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 	std	r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 	std	r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 	std	r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) source;	ld	r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) source;	ld	r9,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) source;	ld	r10,16(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) source;	ld	r11,24(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	 * On POWER6 and POWER7 back to back adde instructions take 2 cycles
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 	 * because of the XER dependency. This means the fastest this loop can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 	 * go is 16 cycles per iteration. The scheduling of the loop below has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 	 * been shown to hit this on both POWER6 and POWER7.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 	.align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) source;	ld	r12,32(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) source;	ld	r14,40(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 	adde	r0,r0,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) source;	ld	r15,48(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) source;	ld	r16,56(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 	addi	r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 	adde	r0,r0,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) dest;	std	r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) dest;	std	r9,8(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 	adde	r0,r0,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) dest;	std	r10,16(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) dest;	std	r11,24(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 	adde	r0,r0,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) dest;	std	r12,32(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) dest;	std	r14,40(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	adde	r0,r0,r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) dest;	std	r15,48(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) dest;	std	r16,56(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	addi	r4,r4,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 	adde	r0,r0,r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) source;	ld	r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) source;	ld	r9,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 	adde	r0,r0,r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) source;	ld	r10,16(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) source;	ld	r11,24(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 	bdnz	2b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) source;	ld	r12,32(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) source;	ld	r14,40(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 	adde	r0,r0,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) source;	ld	r15,48(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) source;	ld	r16,56(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 	addi	r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 	adde	r0,r0,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) dest;	std	r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) dest;	std	r9,8(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 	adde	r0,r0,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) dest;	std	r10,16(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) dest;	std	r11,24(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 	adde	r0,r0,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) dest;	std	r12,32(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) dest;	std	r14,40(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 	adde	r0,r0,r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) dest;	std	r15,48(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) dest;	std	r16,56(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 	addi	r4,r4,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 	adde	r0,r0,r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 	adde	r0,r0,r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 	ld	r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 	ld	r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 	ld	r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 	addi	r1,r1,STACKFRAMESIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 	andi.	r5,r5,63
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) .Lcopy_tail_doublewords:		/* Up to 127 bytes to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 	srdi.	r6,r5,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 	beq	.Lcopy_tail_word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 	mtctr	r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) srcnr;	ld	r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 	addi	r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) dstnr;	std	r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 	addi	r4,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 	bdnz	3b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 	andi.	r5,r5,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) .Lcopy_tail_word:			/* Up to 7 bytes to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 	srdi.	r6,r5,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 	beq	.Lcopy_tail_halfword
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) srcnr;	lwz	r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 	addi	r3,r3,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) dstnr;	stw	r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 	addi	r4,r4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 	subi	r5,r5,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) .Lcopy_tail_halfword:			/* Up to 3 bytes to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 	srdi.	r6,r5,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 	beq	.Lcopy_tail_byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) srcnr;	lhz	r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 	addi	r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) dstnr;	sth	r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 	addi	r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 	subi	r5,r5,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) .Lcopy_tail_byte:			/* Up to 1 byte to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 	andi.	r6,r5,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 	beq	.Lcopy_finish
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) srcnr;	lbz	r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) #ifdef __BIG_ENDIAN__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 	adde	r0,r0,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 	adde	r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) dstnr;	stb	r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) .Lcopy_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 	addze	r0,r0			/* add in final carry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 	rldicl	r4,r0,32,0		/* fold two 32 bit halves together */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 	add	r3,r4,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 	srdi	r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 	blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) .Lerror:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 	ld	r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 	ld	r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 	ld	r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 	addi	r1,r1,STACKFRAMESIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) .Lerror_nr:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) 	li	r3,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 	blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) EXPORT_SYMBOL(csum_partial_copy_generic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)  * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)  *			   const struct in6_addr *daddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)  *			   __u32 len, __u8 proto, __wsum sum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) _GLOBAL(csum_ipv6_magic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 	ld	r8, 0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 	ld	r9, 8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 	add	r5, r5, r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 	addc	r0, r8, r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 	ld	r10, 0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 	ld	r11, 8(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) #ifdef CONFIG_CPU_LITTLE_ENDIAN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 	rotldi	r5, r5, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 	adde	r0, r0, r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 	add	r5, r5, r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 	adde	r0, r0, r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 	adde	r0, r0, r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 	addze	r0, r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 	rotldi  r3, r0, 32		/* fold two 32 bit halves together */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 	add	r3, r0, r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 	srdi	r0, r3, 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 	rotlwi	r3, r0, 16		/* fold two 16 bit halves together */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 	add	r3, r0, r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 	not	r3, r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 	rlwinm	r3, r3, 16, 16, 31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 	blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) EXPORT_SYMBOL(csum_ipv6_magic)