^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * linux/arch/arm/lib/div64.S
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Optimized computation of 64-bit dividend / 32-bit divisor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Author: Nicolas Pitre
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Created: Oct 5, 2003
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Copyright: Monta Vista Software, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <asm/assembler.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <asm/unwind.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #ifdef __ARMEB__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #define xh r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #define xl r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #define yh r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #define yl r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define xl r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define xh r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define yl r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define yh r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * Note: Calling convention is totally non standard for optimal code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * This is meant to be used by do_div() from include/asm/div64.h only.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * Input parameters:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * xh-xl = dividend (clobbered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * r4 = divisor (preserved)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * Output values:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * yh-yl = result
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * xh = remainder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * Clobbered regs: xl, ip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) ENTRY(__do_div64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) UNWIND(.fnstart)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) @ Test for easy paths first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) subs ip, r4, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) bls 9f @ divisor is 0 or 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) tst ip, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) beq 8f @ divisor is power of 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) @ See if we need to handle upper 32-bit result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) cmp xh, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) mov yh, #0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) blo 3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) @ Align divisor with upper part of dividend.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) @ The aligned divisor is stored in yl preserving the original.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) @ The bit position is stored in ip.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #if __LINUX_ARM_ARCH__ >= 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) clz yl, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) clz ip, xh
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) sub yl, yl, ip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) mov ip, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) mov ip, ip, lsl yl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) mov yl, r4, lsl yl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) mov yl, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) mov ip, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) 1: cmp yl, #0x80000000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) cmpcc yl, xh
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) movcc yl, yl, lsl #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) movcc ip, ip, lsl #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) bcc 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) @ The division loop for needed upper bit positions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) @ Break out early if dividend reaches 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) 2: cmp xh, yl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) orrcs yh, yh, ip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) subscs xh, xh, yl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) movsne ip, ip, lsr #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) mov yl, yl, lsr #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) bne 2b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) @ See if we need to handle lower 32-bit result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) 3: cmp xh, #0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) mov yl, #0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) cmpeq xl, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) movlo xh, xl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) retlo lr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) @ The division loop for lower bit positions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) @ Here we shift remainer bits leftwards rather than moving the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) @ divisor for comparisons, considering the carry-out bit as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) mov ip, #0x80000000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 4: movs xl, xl, lsl #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) adcs xh, xh, xh
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) beq 6f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) cmpcc xh, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 5: orrcs yl, yl, ip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) subcs xh, xh, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) movs ip, ip, lsr #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) bne 4b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) ret lr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) @ The top part of remainder became zero. If carry is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) @ (the 33th bit) this is a false positive so resume the loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) @ Otherwise, if lower part is also null then we are done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 6: bcs 5b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) cmp xl, #0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) reteq lr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) @ We still have remainer bits in the low part. Bring them up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) #if __LINUX_ARM_ARCH__ >= 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) clz xh, xl @ we know xh is zero here so...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) add xh, xh, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) mov xl, xl, lsl xh
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) mov ip, ip, lsr xh
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 7: movs xl, xl, lsl #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) mov ip, ip, lsr #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) bcc 7b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) @ Current remainder is now 1. It is worthless to compare with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) @ divisor at this point since divisor can not be smaller than 3 here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) @ If possible, branch for another shift in the division loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) @ If no bit position left then we are done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) movs ip, ip, lsr #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) mov xh, #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) bne 4b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) ret lr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 8: @ Division by a power of 2: determine what that divisor order is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) @ then simply shift values around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) #if __LINUX_ARM_ARCH__ >= 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) clz ip, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) rsb ip, ip, #31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) mov yl, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) cmp r4, #(1 << 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) mov ip, #0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) movhs yl, yl, lsr #16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) movhs ip, #16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) cmp yl, #(1 << 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) movhs yl, yl, lsr #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) addhs ip, ip, #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) cmp yl, #(1 << 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) movhs yl, yl, lsr #4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) addhs ip, ip, #4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) cmp yl, #(1 << 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) addhi ip, ip, #3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) addls ip, ip, yl, lsr #1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) mov yh, xh, lsr ip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) mov yl, xl, lsr ip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) rsb ip, ip, #32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) ARM( orr yl, yl, xh, lsl ip )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) THUMB( lsl xh, xh, ip )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) THUMB( orr yl, yl, xh )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) mov xh, xl, lsl ip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) mov xh, xh, lsr ip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) ret lr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) @ eq -> division by 1: obvious enough...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 9: moveq yl, xl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) moveq yh, xh
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) moveq xh, #0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) reteq lr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) UNWIND(.fnend)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) UNWIND(.fnstart)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) UNWIND(.pad #4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) UNWIND(.save {lr})
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) Ldiv0_64:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) @ Division by 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) str lr, [sp, #-8]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) bl __div0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) @ as wrong as it could be...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) mov yl, #0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) mov yh, #0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) mov xh, #0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) ldr pc, [sp], #8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) UNWIND(.fnend)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) ENDPROC(__do_div64)