^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * arch/alpha/lib/ev6-divide.S
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Alpha division..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * The alpha chip doesn't provide hardware division, so we have to do it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * by hand. The compiler expects the functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * __divqu: 64-bit unsigned long divide
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * __remqu: 64-bit unsigned long remainder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * __divqs/__remqs: signed 64-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * __divlu/__remlu: unsigned 32-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * __divls/__remls: signed 32-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * These are not normal C functions: instead of the normal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * calling sequence, these expect their arguments in registers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * $24 and $25, and return the result in $27. Register $28 may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * be clobbered (assembly temporary), anything else must be saved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * In short: painful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * This is a rather simple bit-at-a-time algorithm: it's very good
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * at dividing random 64-bit numbers, but the more usual case where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * the divisor is small is handled better by the DEC algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * using lookup tables. This uses much less memory, though, and is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * nicer on the cache.. Besides, I don't know the copyright status
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * of the DEC code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * My temporaries:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * $0 - current bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * $1 - shifted divisor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * $2 - modulus/quotient
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * $23 - return address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * $24 - dividend
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * $25 - divisor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * $27 - quotient/modulus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * $28 - compare status
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * Much of the information about 21264 scheduling/coding comes from:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * Compiler Writer's Guide for the Alpha 21264
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * abbreviated as 'CWG' in other comments here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * Scheduling notation:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * E - either cluster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * Try not to change the actual algorithm if possible for consistency.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #define halt .long 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * Select function type and registers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #define mask $0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #define divisor $1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #define compare $28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #define tmp1 $3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #define tmp2 $4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #ifdef DIV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #define DIV_ONLY(x,y...) x,##y
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #define MOD_ONLY(x,y...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #define func(x) __div##x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #define modulus $2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) #define quotient $27
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #define GETSIGN(x) xor $24,$25,x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #define STACK 48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #define DIV_ONLY(x,y...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #define MOD_ONLY(x,y...) x,##y
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #define func(x) __rem##x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) #define modulus $27
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) #define quotient $2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) #define GETSIGN(x) bis $24,$24,x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) #define STACK 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * For 32-bit operations, we need to extend to 64-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #ifdef INTSIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) #define ufunction func(lu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) #define sfunction func(l)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #define LONGIFY(x) zapnot x,15,x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) #define SLONGIFY(x) addl x,0,x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) #define ufunction func(qu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) #define sfunction func(q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) #define LONGIFY(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) #define SLONGIFY(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) .set noat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) .globl ufunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) .ent ufunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) ufunction:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) subq $30,STACK,$30 # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) .frame $30,STACK,$23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) .prologue 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 7: stq $1, 0($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) bis $25,$25,divisor # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) stq $2, 8($30) # L : L U L U
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) bis $24,$24,modulus # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) stq $0,16($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) bis $31,$31,quotient # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) LONGIFY(divisor) # E : U L L U
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) stq tmp1,24($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) LONGIFY(modulus) # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) bis $31,1,mask # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) DIV_ONLY(stq tmp2,32($30)) # L : L U U L
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) beq divisor, 9f /* div by zero */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * In spite of the DIV_ONLY being either a non-instruction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * or an actual stq, the addition of the .align directive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * below ensures that label 1 is going to be nicely aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) #ifdef INTSIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * shift divisor left, using 3-bit shifts for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * 32-bit divides as we can't overflow. Three-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * shifts will result in looping three times less
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * here, but can result in two loops more later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * Thus using a large shift isn't worth it (and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * s8add pairs better than a sll..)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 1: cmpult divisor,modulus,compare # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) s8addq divisor,$31,divisor # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) s8addq mask,$31,mask # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) bne compare,1b # U : U L U L
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 1: cmpult divisor,modulus,compare # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) blt divisor, 2f # U : U L U L
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) addq divisor,divisor,divisor # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) addq mask,mask,mask # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) unop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) bne compare,1b # U : U L U L
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) /* ok, start to go right again.. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * Keep things nicely bundled... use a nop instead of not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * having an instruction for DIV_ONLY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) #ifdef DIV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) DIV_ONLY(addq quotient,mask,tmp2) # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) srl mask,1,mask # U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) cmpule divisor,modulus,compare # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) subq modulus,divisor,tmp1 # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) #ifdef DIV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) DIV_ONLY(cmovne compare,tmp2,quotient) # E : Latency 2, extra map slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) nop # E : as part of the cmovne
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) srl divisor,1,divisor # U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) nop # E : L U L U
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) cmovne compare,tmp1,modulus # E : Latency 2, extra map slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) nop # E : as part of the cmovne
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) bne mask,2b # U : U L U L
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) srl divisor,1,divisor # U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) cmovne compare,tmp1,modulus # E : Latency 2, extra map slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) nop # E : as part of the cmovne
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) bne mask,2b # U : U L L U
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 9: ldq $1, 0($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) ldq $2, 8($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) nop # E : U U L L
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) ldq $0,16($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) ldq tmp1,24($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) #ifdef DIV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) DIV_ONLY(ldq tmp2,32($30)) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) addq $30,STACK,$30 # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) ret $31,($23),1 # L0 : L U U L
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) .end ufunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) EXPORT_SYMBOL(ufunction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) * Uhh.. Ugly signed division. I'd rather not have it at all, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * it's needed in some circumstances. There are different ways to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * handle this, really. This does:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * -a / b = a / -b = -(a / b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * -a % b = -(a % b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * a % -b = a % b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * which is probably not the best solution, but at least should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * have the property that (x/y)*y + (x%y) = x.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) .globl sfunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) .ent sfunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) sfunction:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) subq $30,STACK,$30 # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) .frame $30,STACK,$23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) .prologue 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) bis $24,$25,$28 # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) SLONGIFY($28) # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) bge $28,7b # U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) stq $24,0($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) subq $31,$24,$28 # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) stq $25,8($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) nop # E : U L U L
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) cmovlt $24,$28,$24 /* abs($24) */ # E : Latency 2, extra map slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) nop # E : as part of the cmov
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) stq $23,16($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) subq $31,$25,$28 # E : U L U L
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) stq tmp1,24($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) cmovlt $25,$28,$25 /* abs($25) */ # E : Latency 2, extra map slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) bsr $23,ufunction # L0: L U L U
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) ldq $24,0($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) ldq $25,8($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) GETSIGN($28) # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) subq $31,$27,tmp1 # E : U U L L
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) SLONGIFY($28) # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) ldq $23,16($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) cmovlt $28,tmp1,$27 # E : Latency 2, extra map slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) nop # E : U L L U : as part of the cmov
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) ldq tmp1,24($30) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) nop # E : as part of the cmov
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) addq $30,STACK,$30 # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) ret $31,($23),1 # L0 : L U U L
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) .end sfunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) EXPORT_SYMBOL(sfunction)