^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * arch/alpha/lib/divide.S
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * (C) 1995 Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Alpha division..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * The alpha chip doesn't provide hardware division, so we have to do it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * by hand. The compiler expects the functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * __divqu: 64-bit unsigned long divide
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * __remqu: 64-bit unsigned long remainder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * __divqs/__remqs: signed 64-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * __divlu/__remlu: unsigned 32-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * __divls/__remls: signed 32-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * These are not normal C functions: instead of the normal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * calling sequence, these expect their arguments in registers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * $24 and $25, and return the result in $27. Register $28 may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * be clobbered (assembly temporary), anything else must be saved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * In short: painful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * This is a rather simple bit-at-a-time algorithm: it's very good
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * at dividing random 64-bit numbers, but the more usual case where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * the divisor is small is handled better by the DEC algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * using lookup tables. This uses much less memory, though, and is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * nicer on the cache.. Besides, I don't know the copyright status
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * of the DEC code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * My temporaries:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * $0 - current bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * $1 - shifted divisor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * $2 - modulus/quotient
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * $23 - return address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * $24 - dividend
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * $25 - divisor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * $27 - quotient/modulus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * $28 - compare status
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #define halt .long 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * Select function type and registers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #define mask $0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #define divisor $1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #define compare $28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #define tmp1 $3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #define tmp2 $4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #ifdef DIV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #define DIV_ONLY(x,y...) x,##y
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #define MOD_ONLY(x,y...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) #define func(x) __div##x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #define modulus $2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #define quotient $27
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #define GETSIGN(x) xor $24,$25,x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #define STACK 48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #define DIV_ONLY(x,y...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #define MOD_ONLY(x,y...) x,##y
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #define func(x) __rem##x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #define modulus $27
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #define quotient $2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #define GETSIGN(x) bis $24,$24,x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) #define STACK 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * For 32-bit operations, we need to extend to 64-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #ifdef INTSIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) #define ufunction func(lu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) #define sfunction func(l)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) #define LONGIFY(x) zapnot x,15,x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) #define SLONGIFY(x) addl x,0,x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) #define ufunction func(qu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #define sfunction func(q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #define LONGIFY(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) #define SLONGIFY(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) .set noat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) .align 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) .globl ufunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) .ent ufunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) ufunction:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) subq $30,STACK,$30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) .frame $30,STACK,$23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) .prologue 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 7: stq $1, 0($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) bis $25,$25,divisor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) stq $2, 8($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) bis $24,$24,modulus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) stq $0,16($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) bis $31,$31,quotient
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) LONGIFY(divisor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) stq tmp1,24($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) LONGIFY(modulus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) bis $31,1,mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) DIV_ONLY(stq tmp2,32($30))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) beq divisor, 9f /* div by zero */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) #ifdef INTSIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * shift divisor left, using 3-bit shifts for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * 32-bit divides as we can't overflow. Three-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * shifts will result in looping three times less
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * here, but can result in two loops more later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * Thus using a large shift isn't worth it (and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * s8add pairs better than a sll..)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 1: cmpult divisor,modulus,compare
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) s8addq divisor,$31,divisor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) s8addq mask,$31,mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) bne compare,1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 1: cmpult divisor,modulus,compare
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) blt divisor, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) addq divisor,divisor,divisor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) addq mask,mask,mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) bne compare,1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) unop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) /* ok, start to go right again.. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 2: DIV_ONLY(addq quotient,mask,tmp2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) srl mask,1,mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) cmpule divisor,modulus,compare
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) subq modulus,divisor,tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) DIV_ONLY(cmovne compare,tmp2,quotient)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) srl divisor,1,divisor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) cmovne compare,tmp1,modulus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) bne mask,2b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 9: ldq $1, 0($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) ldq $2, 8($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) ldq $0,16($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) ldq tmp1,24($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) DIV_ONLY(ldq tmp2,32($30))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) addq $30,STACK,$30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) ret $31,($23),1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) .end ufunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) EXPORT_SYMBOL(ufunction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * Uhh.. Ugly signed division. I'd rather not have it at all, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * it's needed in some circumstances. There are different ways to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * handle this, really. This does:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * -a / b = a / -b = -(a / b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * -a % b = -(a % b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * a % -b = a % b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * which is probably not the best solution, but at least should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * have the property that (x/y)*y + (x%y) = x.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) .align 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) .globl sfunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) .ent sfunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) sfunction:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) subq $30,STACK,$30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) .frame $30,STACK,$23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) .prologue 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) bis $24,$25,$28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) SLONGIFY($28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) bge $28,7b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) stq $24,0($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) subq $31,$24,$28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) stq $25,8($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) cmovlt $24,$28,$24 /* abs($24) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) stq $23,16($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) subq $31,$25,$28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) stq tmp1,24($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) cmovlt $25,$28,$25 /* abs($25) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) unop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) bsr $23,ufunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) ldq $24,0($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) ldq $25,8($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) GETSIGN($28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) subq $31,$27,tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) SLONGIFY($28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) ldq $23,16($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) cmovlt $28,tmp1,$27
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) ldq tmp1,24($30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) addq $30,STACK,$30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) ret $31,($23),1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) .end sfunction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) EXPORT_SYMBOL(sfunction)