Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /* checksum.S: Sparc optimized checksum code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *  Copyright(C) 1995 Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *  Copyright(C) 1995 Miguel de Icaza
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  *  Copyright(C) 1996 David S. Miller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  *  Copyright(C) 1997 Jakub Jelinek
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  * derived from:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  *	Linux/Alpha checksum c-code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  *      Linux/ix86 inline checksum assembly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  *      RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  *	David Mosberger-Tang for optimized reference c-code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  *	BSD4.4 portable checksum routine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) #include <asm/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) 	ldd	[buf + offset + 0x00], t0;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) 	ldd	[buf + offset + 0x08], t2;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) 	addxcc	t0, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) 	addxcc	t1, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) 	ldd	[buf + offset + 0x10], t4;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 	addxcc	t2, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 	addxcc	t3, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 	ldd	[buf + offset + 0x18], t0;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 	addxcc	t4, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 	addxcc	t5, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 	addxcc	t0, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 	addxcc	t1, sum, sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) #define CSUM_LASTCHUNK(buf, offset, sum, t0, t1, t2, t3)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	ldd	[buf - offset - 0x08], t0;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	ldd	[buf - offset - 0x00], t2;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 	addxcc	t0, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 	addxcc	t1, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 	addxcc	t2, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 	addxcc	t3, sum, sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 	/* Do end cruft out of band to get better cache patterns. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) csum_partial_end_cruft:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	be	1f				! caller asks %o1 & 0x8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	 andcc	%o1, 4, %g0			! nope, check for word remaining
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	ldd	[%o0], %g2			! load two
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	addcc	%g2, %o2, %o2			! add first word to sum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	addxcc	%g3, %o2, %o2			! add second word as well
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 	add	%o0, 8, %o0			! advance buf ptr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 	addx	%g0, %o2, %o2			! add in final carry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 	andcc	%o1, 4, %g0			! check again for word remaining
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 1:	be	1f				! nope, skip this code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 	 andcc	%o1, 3, %o1			! check for trailing bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 	ld	[%o0], %g2			! load it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 	addcc	%g2, %o2, %o2			! add to sum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	add	%o0, 4, %o0			! advance buf ptr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 	addx	%g0, %o2, %o2			! add in final carry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	andcc	%o1, 3, %g0			! check again for trailing bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 1:	be	1f				! no trailing bytes, return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 	 addcc	%o1, -1, %g0			! only one byte remains?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 	bne	2f				! at least two bytes more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	 subcc	%o1, 2, %o1			! only two bytes more?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	b	4f				! only one byte remains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	 or	%g0, %g0, %o4			! clear fake hword value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 2:	lduh	[%o0], %o4			! get hword
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	be	6f				! jmp if only hword remains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 	 add	%o0, 2, %o0			! advance buf ptr either way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 	sll	%o4, 16, %o4			! create upper hword
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 4:	ldub	[%o0], %o5			! get final byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 	sll	%o5, 8, %o5			! put into place
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	or	%o5, %o4, %o4			! coalese with hword (if any)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 6:	addcc	%o4, %o2, %o2			! add to sum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 1:	retl					! get outta here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 	 addx	%g0, %o2, %o0			! add final carry into retval
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	/* Also do alignment out of band to get better cache patterns. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) csum_partial_fix_alignment:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	cmp	%o1, 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 	bl	cpte - 0x4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	 andcc	%o0, 0x2, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	be	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	 andcc	%o0, 0x4, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 	lduh	[%o0 + 0x00], %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 	sub	%o1, 2, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	add	%o0, 2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	sll	%g2, 16, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	addcc	%g2, %o2, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	srl	%o2, 16, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	addx	%g0, %g3, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	sll	%o2, 16, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	sll	%g2, 16, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 	srl	%o2, 16, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	andcc	%o0, 0x4, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	or	%g3, %o2, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 1:	be	cpa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	 andcc	%o1, 0xffffff80, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	ld	[%o0 + 0x00], %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	sub	%o1, 4, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 	addcc	%g2, %o2, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	add	%o0, 4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 	addx	%g0, %o2, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	b	cpa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	 andcc	%o1, 0xffffff80, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	/* The common case is to get called with a nicely aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	 * buffer of size 0x20.  Follow the code path for that case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	.globl	csum_partial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	EXPORT_SYMBOL(csum_partial)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) csum_partial:			/* %o0=buf, %o1=len, %o2=sum */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 	andcc	%o0, 0x7, %g0				! alignment problems?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	bne	csum_partial_fix_alignment		! yep, handle it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	 sethi	%hi(cpte - 8), %g7			! prepare table jmp ptr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	andcc	%o1, 0xffffff80, %o3			! num loop iterations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) cpa:	be	3f					! none to do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 	 andcc	%o1, 0x70, %g1				! clears carry flag too
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 5:	CSUM_BIGCHUNK(%o0, 0x00, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 	CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	addx	%g0, %o2, %o2				! sink in final carry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	subcc	%o3, 128, %o3				! detract from loop iters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	bne	5b					! more to do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	 add	%o0, 128, %o0				! advance buf ptr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	andcc	%o1, 0x70, %g1				! clears carry flag too
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 3:	be	cpte					! nope
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	 andcc	%o1, 0xf, %g0				! anything left at all?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	srl	%g1, 1, %o4				! compute offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	sub	%g7, %g1, %g7				! adjust jmp ptr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	sub	%g7, %o4, %g7				! final jmp ptr adjust
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	jmp	%g7 + %lo(cpte - 8)			! enter the table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	 add	%o0, %g1, %o0				! advance buf ptr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) cptbl:	CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g3, %g4, %g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g3, %g4, %g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 	CSUM_LASTCHUNK(%o0, 0x48, %o2, %g2, %g3, %g4, %g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 	CSUM_LASTCHUNK(%o0, 0x38, %o2, %g2, %g3, %g4, %g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g3, %g4, %g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g3, %g4, %g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g3, %g4, %g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 	addx	%g0, %o2, %o2				! fetch final carry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 	andcc	%o1, 0xf, %g0				! anything left at all?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) cpte:	bne	csum_partial_end_cruft			! yep, handle it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	 andcc	%o1, 8, %g0				! check how much
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) cpout:	retl						! get outta here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	 mov	%o2, %o0				! return computed csum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) /* Work around cpp -rob */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) #define ALLOC #alloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) #define EXECINSTR #execinstr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) #define EX(x,y)					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 98:     x,y;                                    \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)         .section __ex_table,ALLOC;		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)         .align  4;                              \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)         .word   98b, cc_fault;                   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)         .text;                                  \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)         .align  4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) #define EXT(start,end)				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)         .section __ex_table,ALLOC;		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)         .align  4;                              \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)         .word   start, 0, end, cc_fault;         \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)         .text;                                  \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)         .align  4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 	/* This aligned version executes typically in 8.5 superscalar cycles, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	 * is the best I can do.  I say 8.5 because the final add will pair with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 	 * the next ldd in the main unrolled loop.  Thus the pipe is always full.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 	 * If you change these macros (including order of instructions),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 	 * please check the fixup code below as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) #define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 	ldd	[src + off + 0x00], t0;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 	ldd	[src + off + 0x08], t2;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 	addxcc	t0, sum, sum;								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	ldd	[src + off + 0x10], t4;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	addxcc	t1, sum, sum;								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	ldd	[src + off + 0x18], t6;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	addxcc	t2, sum, sum;								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 	std	t0, [dst + off + 0x00];							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 	addxcc	t3, sum, sum;								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 	std	t2, [dst + off + 0x08];							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 	addxcc	t4, sum, sum;								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 	std	t4, [dst + off + 0x10];							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 	addxcc	t5, sum, sum;								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 	std	t6, [dst + off + 0x18];							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 	addxcc	t6, sum, sum;								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 	addxcc	t7, sum, sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 	/* 12 superscalar cycles seems to be the limit for this case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	 * because of this we thus do all the ldd's together to get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 	 * Viking MXCC into streaming mode.  Ho hum...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 	ldd	[src + off + 0x00], t0;						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	ldd	[src + off + 0x08], t2;						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 	ldd	[src + off + 0x10], t4;						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 	ldd	[src + off + 0x18], t6;						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 	st	t0, [dst + off + 0x00];						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 	addxcc	t0, sum, sum;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	st	t1, [dst + off + 0x04];						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 	addxcc	t1, sum, sum;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 	st	t2, [dst + off + 0x08];						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	addxcc	t2, sum, sum;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 	st	t3, [dst + off + 0x0c];						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 	addxcc	t3, sum, sum;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 	st	t4, [dst + off + 0x10];						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 	addxcc	t4, sum, sum;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 	st	t5, [dst + off + 0x14];						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 	addxcc	t5, sum, sum;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 	st	t6, [dst + off + 0x18];						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 	addxcc	t6, sum, sum;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 	st	t7, [dst + off + 0x1c];						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 	addxcc	t7, sum, sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 	/* Yuck, 6 superscalar cycles... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 	ldd	[src - off - 0x08], t0;				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 	ldd	[src - off - 0x00], t2;				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 	addxcc	t0, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 	st	t0, [dst - off - 0x08];				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	addxcc	t1, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 	st	t1, [dst - off - 0x04];				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 	addxcc	t2, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	st	t2, [dst - off - 0x00];				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 	addxcc	t3, sum, sum;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 	st	t3, [dst - off + 0x04];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 	/* Handle the end cruft code out of band for better cache patterns. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) cc_end_cruft:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	be	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 	 andcc	%o3, 4, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 	EX(ldd	[%o0 + 0x00], %g2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 	add	%o1, 8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	addcc	%g2, %g7, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 	add	%o0, 8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 	addxcc	%g3, %g7, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 	EX(st	%g2, [%o1 - 0x08])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 	addx	%g0, %g7, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 	andcc	%o3, 4, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	EX(st	%g3, [%o1 - 0x04])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 1:	be	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 	 andcc	%o3, 3, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	EX(ld	[%o0 + 0x00], %g2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 	add	%o1, 4, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	addcc	%g2, %g7, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 	EX(st	%g2, [%o1 - 0x04])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	addx	%g0, %g7, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 	andcc	%o3, 3, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 	add	%o0, 4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 1:	be	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 	 addcc	%o3, -1, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	bne	2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 	 subcc	%o3, 2, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 	b	4f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 	 or	%g0, %g0, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 2:	EX(lduh	[%o0 + 0x00], %o4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 	add	%o0, 2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 	EX(sth	%o4, [%o1 + 0x00])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 	be	6f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 	 add	%o1, 2, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 	sll	%o4, 16, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 4:	EX(ldub	[%o0 + 0x00], %o5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 	EX(stb	%o5, [%o1 + 0x00])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 	sll	%o5, 8, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 	or	%o5, %o4, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 6:	addcc	%o4, %g7, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 1:	retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 	 addx	%g0, %g7, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 	/* Also, handle the alignment code out of band. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) cc_dword_align:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 	cmp	%g1, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 	bge	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 	 srl	%g1, 1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 2:	cmp	%o3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	be,a	ccte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 	 andcc	%g1, 0xf, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 	andcc	%o3, %o0, %g0	! Check %o0 only (%o1 has the same last 2 bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 	be,a	2b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 	 srl	%o3, 1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 1:	andcc	%o0, 0x1, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 	bne	ccslow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 	 andcc	%o0, 0x2, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 	be	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 	 andcc	%o0, 0x4, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 	EX(lduh	[%o0 + 0x00], %g4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 	sub	%g1, 2, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	EX(sth	%g4, [%o1 + 0x00])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 	add	%o0, 2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	sll	%g4, 16, %g4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 	addcc	%g4, %g7, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 	add	%o1, 2, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	srl	%g7, 16, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 	addx	%g0, %g3, %g4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	sll	%g7, 16, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 	sll	%g4, 16, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 	srl	%g7, 16, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	andcc	%o0, 0x4, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 	or	%g3, %g7, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 1:	be	3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 	 andcc	%g1, 0xffffff80, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 	EX(ld	[%o0 + 0x00], %g4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 	sub	%g1, 4, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 	EX(st	%g4, [%o1 + 0x00])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 	add	%o0, 4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 	addcc	%g4, %g7, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 	add	%o1, 4, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 	addx	%g0, %g7, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 	b	3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 	 andcc	%g1, 0xffffff80, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 	/* Sun, you just can't beat me, you just can't.  Stop trying,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 	 * give up.  I'm serious, I am going to kick the living shit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 	 * out of you, game over, lights out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 	.align	8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 	.globl	__csum_partial_copy_sparc_generic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 	EXPORT_SYMBOL(__csum_partial_copy_sparc_generic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) __csum_partial_copy_sparc_generic:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 					/* %o0=src, %o1=dest, %g1=len, %g7=sum */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 	xor	%o0, %o1, %o4		! get changing bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 	andcc	%o4, 3, %g0		! check for mismatched alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 	bne	ccslow			! better this than unaligned/fixups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 	 andcc	%o0, 7, %g0		! need to align things?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 	bne	cc_dword_align		! yes, we check for short lengths there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 	 andcc	%g1, 0xffffff80, %g0	! can we use unrolled loop?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 3:	be	3f			! nope, less than one loop remains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 	 andcc	%o1, 4, %g0		! dest aligned on 4 or 8 byte boundary?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 	be	ccdbl + 4		! 8 byte aligned, kick ass
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 5:	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 10:	EXT(5b, 10b)			! note for exception handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 	sub	%g1, 128, %g1		! detract from length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 	addx	%g0, %g7, %g7		! add in last carry bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 	andcc	%g1, 0xffffff80, %g0	! more to csum?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 	add	%o0, 128, %o0		! advance src ptr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 	bne	5b			! we did not go negative, continue looping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 	 add	%o1, 128, %o1		! advance dest ptr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 3:	andcc	%g1, 0x70, %o2		! can use table?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) ccmerge:be	ccte			! nope, go and check for end cruft
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 	 andcc	%g1, 0xf, %o3		! get low bits of length (clears carry btw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 	srl	%o2, 1, %o4		! begin negative offset computation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 	sethi	%hi(12f), %o5		! set up table ptr end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 	add	%o0, %o2, %o0		! advance src ptr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 	sub	%o5, %o4, %o5		! continue table calculation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 	sll	%o2, 1, %g2		! constant multiplies are fun...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 	sub	%o5, %g2, %o5		! some more adjustments
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 	jmp	%o5 + %lo(12f)		! jump into it, duff style, wheee...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 	 add	%o1, %o2, %o1		! advance dest ptr (carry is clear btw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) cctbl:	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x58,%g2,%g3,%g4,%g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x48,%g2,%g3,%g4,%g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x38,%g2,%g3,%g4,%g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 12:	EXT(cctbl, 12b)			! note for exception table handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 	addx	%g0, %g7, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 	andcc	%o3, 0xf, %g0		! check for low bits set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) ccte:	bne	cc_end_cruft		! something left, handle it out of band
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 	 andcc	%o3, 8, %g0		! begin checks for that code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 	retl				! return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 	 mov	%g7, %o0		! give em the computed checksum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) ccdbl:	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 11:	EXT(ccdbl, 11b)			! note for exception table handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 	sub	%g1, 128, %g1		! detract from length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 	addx	%g0, %g7, %g7		! add in last carry bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 	andcc	%g1, 0xffffff80, %g0	! more to csum?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 	add	%o0, 128, %o0		! advance src ptr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 	bne	ccdbl			! we did not go negative, continue looping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 	 add	%o1, 128, %o1		! advance dest ptr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 	b	ccmerge			! finish it off, above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 	 andcc	%g1, 0x70, %o2		! can use table? (clears carry btw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) ccslow:	cmp	%g1, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 	mov	0, %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 	bleu	4f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 	 andcc	%o0, 1, %o5		
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 	be,a	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 	 srl	%g1, 1, %g4		
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 	sub	%g1, 1, %g1	
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 	EX(ldub	[%o0], %g5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 	add	%o0, 1, %o0	
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 	EX(stb	%g5, [%o1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 	srl	%g1, 1, %g4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 	add	%o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 1:	cmp	%g4, 0		
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 	be,a	3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 	 andcc	%g1, 1, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 	andcc	%o0, 2, %g0	
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 	be,a	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 	 srl	%g4, 1, %g4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 	EX(lduh	[%o0], %o4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 	sub	%g1, 2, %g1	
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 	srl	%o4, 8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 	sub	%g4, 1, %g4	
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 	EX(stb	%g2, [%o1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 	add	%o4, %g5, %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 	EX(stb	%o4, [%o1 + 1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 	add	%o0, 2, %o0	
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 	srl	%g4, 1, %g4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 	add	%o1, 2, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 1:	cmp	%g4, 0		
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) 	be,a	2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 	 andcc	%g1, 2, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 	EX(ld	[%o0], %o4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 5:	srl	%o4, 24, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 	srl	%o4, 16, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 	EX(stb	%g2, [%o1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 	srl	%o4, 8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 	EX(stb	%g3, [%o1 + 1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 	add	%o0, 4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 	EX(stb	%g2, [%o1 + 2])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 	addcc	%o4, %g5, %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 	EX(stb	%o4, [%o1 + 3])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 	addx	%g5, %g0, %g5	! I am now to lazy to optimize this (question it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 	add	%o1, 4, %o1	! is worthy). Maybe some day - with the sll/srl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 	subcc	%g4, 1, %g4	! tricks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 	bne,a	5b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 	 EX(ld	[%o0], %o4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 	sll	%g5, 16, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 	srl	%g5, 16, %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 	srl	%g2, 16, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 	andcc	%g1, 2, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 	add	%g2, %g5, %g5 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 2:	be,a	3f		
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 	 andcc	%g1, 1, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 	EX(lduh	[%o0], %o4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 	andcc	%g1, 1, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 	srl	%o4, 8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 	add	%o0, 2, %o0	
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 	EX(stb	%g2, [%o1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 	add	%g5, %o4, %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 	EX(stb	%o4, [%o1 + 1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 	add	%o1, 2, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 3:	be,a	1f		
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 	 sll	%g5, 16, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 	EX(ldub	[%o0], %g2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 	sll	%g2, 8, %o4	
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 	EX(stb	%g2, [%o1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 	add	%g5, %o4, %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 	sll	%g5, 16, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 1:	addcc	%o4, %g5, %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 	srl	%g5, 16, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 	addx	%g0, %o4, %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 	orcc	%o5, %g0, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) 	be	4f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 	 srl	%g5, 8, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 	and	%g5, 0xff, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 	and	%o4, 0xff, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 	sll	%g2, 8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 	or	%g2, %o4, %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 4:	addcc	%g7, %g5, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 	retl	
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 	 addx	%g0, %g7, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) /* We do these strange calculations for the csum_*_from_user case only, ie.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)  * we only bother with faults on loads... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) cc_fault:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) 	 clr	%o0