^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* multi_arith.h: multi-precision integer arithmetic functions, needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) to do extended-precision floating point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) (c) 1998 David Huggins-Daines.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) Somewhat based on arch/alpha/math-emu/ieee-math.c, which is (c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) David Mosberger-Tang.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) /* Note:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) These are not general multi-precision math routines. Rather, they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) implement the subset of integer arithmetic that we need in order to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) multiply, divide, and normalize 128-bit unsigned mantissae. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #ifndef MULTI_ARITH_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #define MULTI_ARITH_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) reg->exp += cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) switch (cnt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) case 0 ... 8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) reg->lowmant = reg->mant.m32[1] << (8 - cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) (reg->mant.m32[0] << (32 - cnt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) case 9 ... 32:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) reg->lowmant = reg->mant.m32[1] >> (cnt - 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) if (reg->mant.m32[1] << (40 - cnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) reg->lowmant |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) (reg->mant.m32[0] << (32 - cnt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) case 33 ... 39:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) asm volatile ("bfextu %1{%2,#8},%0" : "=d" (reg->lowmant)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) : "m" (reg->mant.m32[0]), "d" (64 - cnt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) if (reg->mant.m32[1] << (40 - cnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) reg->lowmant |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) reg->mant.m32[0] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) case 40 ... 71:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) reg->lowmant = reg->mant.m32[0] >> (cnt - 40);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) if ((reg->mant.m32[0] << (72 - cnt)) || reg->mant.m32[1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) reg->lowmant |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) reg->mant.m32[0] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) reg->lowmant = reg->mant.m32[0] || reg->mant.m32[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) reg->mant.m32[0] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) reg->mant.m32[1] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) static inline int fp_overnormalize(struct fp_ext *reg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) int shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) if (reg->mant.m32[0]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) reg->mant.m32[0] = (reg->mant.m32[0] << shift) | (reg->mant.m32[1] >> (32 - shift));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) reg->mant.m32[1] = (reg->mant.m32[1] << shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[1]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) reg->mant.m32[0] = (reg->mant.m32[1] << shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) reg->mant.m32[1] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) shift += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) return shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) static inline int fp_addmant(struct fp_ext *dest, struct fp_ext *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) int carry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) /* we assume here, gcc only insert move and a clr instr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) asm volatile ("add.b %1,%0" : "=d,g" (dest->lowmant)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) : "g,d" (src->lowmant), "0,0" (dest->lowmant));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) : "d" (src->mant.m32[1]), "0" (dest->mant.m32[1]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) : "d" (src->mant.m32[0]), "0" (dest->mant.m32[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) asm volatile ("addx.l %0,%0" : "=d" (carry) : "0" (0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) return carry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) static inline int fp_addcarry(struct fp_ext *reg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) if (++reg->exp == 0x7fff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) if (reg->mant.m64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) fp_set_sr(FPSR_EXC_INEX2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) reg->mant.m64 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) fp_set_sr(FPSR_EXC_OVFL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) reg->lowmant = (reg->mant.m32[1] << 7) | (reg->lowmant ? 1 : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) reg->mant.m32[1] = (reg->mant.m32[1] >> 1) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) (reg->mant.m32[0] << 31);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) reg->mant.m32[0] = (reg->mant.m32[0] >> 1) | 0x80000000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) static inline void fp_submant(struct fp_ext *dest, struct fp_ext *src1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) struct fp_ext *src2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) /* we assume here, gcc only insert move and a clr instr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) asm volatile ("sub.b %1,%0" : "=d,g" (dest->lowmant)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) : "g,d" (src2->lowmant), "0,0" (src1->lowmant));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) : "d" (src2->mant.m32[1]), "0" (src1->mant.m32[1]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) : "d" (src2->mant.m32[0]), "0" (src1->mant.m32[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) #define fp_mul64(desth, destl, src1, src2) ({ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) asm ("mulu.l %2,%1:%0" : "=d" (destl), "=d" (desth) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) : "dm" (src1), "0" (src2)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) })
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) #define fp_div64(quot, rem, srch, srcl, div) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) asm ("divu.l %2,%1:%0" : "=d" (quot), "=d" (rem) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) : "dm" (div), "1" (srch), "0" (srcl))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) #define fp_add64(dest1, dest2, src1, src2) ({ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) asm ("add.l %1,%0" : "=d,dm" (dest2) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) : "dm,d" (src2), "0,0" (dest2)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) asm ("addx.l %1,%0" : "=d" (dest1) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) : "d" (src1), "0" (dest1)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) })
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) #define fp_addx96(dest, src) ({ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) /* we assume here, gcc only insert move and a clr instr */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) asm volatile ("add.l %1,%0" : "=d,g" (dest->m32[2]) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) : "g,d" (temp.m32[1]), "0,0" (dest->m32[2])); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) asm volatile ("addx.l %1,%0" : "=d" (dest->m32[1]) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) : "d" (temp.m32[0]), "0" (dest->m32[1])); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) asm volatile ("addx.l %1,%0" : "=d" (dest->m32[0]) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) : "d" (0), "0" (dest->m32[0])); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) })
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) #define fp_sub64(dest, src) ({ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) asm ("sub.l %1,%0" : "=d,dm" (dest.m32[1]) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) : "dm,d" (src.m32[1]), "0,0" (dest.m32[1])); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) asm ("subx.l %1,%0" : "=d" (dest.m32[0]) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) : "d" (src.m32[0]), "0" (dest.m32[0])); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) })
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) #define fp_sub96c(dest, srch, srcm, srcl) ({ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) char carry; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) asm ("sub.l %1,%0" : "=d,dm" (dest.m32[2]) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) : "dm,d" (srcl), "0,0" (dest.m32[2])); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) asm ("subx.l %1,%0" : "=d" (dest.m32[1]) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) : "d" (srcm), "0" (dest.m32[1])); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) asm ("subx.l %2,%1; scs %0" : "=d" (carry), "=d" (dest.m32[0]) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) : "d" (srch), "1" (dest.m32[0])); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) carry; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) })
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) static inline void fp_multiplymant(union fp_mant128 *dest, struct fp_ext *src1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) struct fp_ext *src2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) union fp_mant64 temp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) fp_mul64(dest->m32[0], dest->m32[1], src1->mant.m32[0], src2->mant.m32[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) fp_mul64(dest->m32[2], dest->m32[3], src1->mant.m32[1], src2->mant.m32[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[0], src2->mant.m32[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) fp_addx96(dest, temp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[1], src2->mant.m32[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) fp_addx96(dest, temp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) static inline void fp_dividemant(union fp_mant128 *dest, struct fp_ext *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) struct fp_ext *div)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) union fp_mant128 tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) union fp_mant64 tmp64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) unsigned long *mantp = dest->m32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) unsigned long fix, rem, first, dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) /* the algorithm below requires dest to be smaller than div,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) but both have the high bit set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (src->mant.m64 >= div->mant.m64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) fp_sub64(src->mant, div->mant);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) *mantp = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) *mantp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) mantp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) /* basic idea behind this algorithm: we can't divide two 64bit numbers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) (AB/CD) directly, but we can calculate AB/C0, but this means this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) quotient is off by C0/CD, so we have to multiply the first result
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) to fix the result, after that we have nearly the correct result
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) and only a few corrections are needed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) /* C0/CD can be precalculated, but it's an 64bit division again, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) we can make it a bit easier, by dividing first through C so we get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 10/1D and now only a single shift and the value fits into 32bit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) fix = 0x80000000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) dummy = div->mant.m32[1] / div->mant.m32[0] + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) dummy = (dummy >> 1) | fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) fp_div64(fix, dummy, fix, 0, dummy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) fix--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) for (i = 0; i < 3; i++, mantp++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) if (src->mant.m32[0] == div->mant.m32[0]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) fp_div64(first, rem, 0, src->mant.m32[1], div->mant.m32[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) fp_mul64(*mantp, dummy, first, fix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) *mantp += fix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) fp_div64(first, rem, src->mant.m32[0], src->mant.m32[1], div->mant.m32[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) fp_mul64(*mantp, dummy, first, fix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) fp_mul64(tmp.m32[0], tmp.m32[1], div->mant.m32[0], first - *mantp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) fp_add64(tmp.m32[0], tmp.m32[1], 0, rem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) tmp.m32[2] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) fp_mul64(tmp64.m32[0], tmp64.m32[1], *mantp, div->mant.m32[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) fp_sub96c(tmp, 0, tmp64.m32[0], tmp64.m32[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) src->mant.m32[0] = tmp.m32[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) src->mant.m32[1] = tmp.m32[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) while (!fp_sub96c(tmp, 0, div->mant.m32[0], div->mant.m32[1])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) src->mant.m32[0] = tmp.m32[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) src->mant.m32[1] = tmp.m32[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) *mantp += 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) int shift)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) unsigned long tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) switch (shift) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) case 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) dest->mant.m64 = src->m64[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) dest->lowmant = src->m32[2] >> 24;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) if (src->m32[3] || (src->m32[2] << 8))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) dest->lowmant |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) asm volatile ("lsl.l #1,%0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) : "=d" (tmp) : "0" (src->m32[2]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) asm volatile ("roxl.l #1,%0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) : "=d" (dest->mant.m32[1]) : "0" (src->m32[1]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) asm volatile ("roxl.l #1,%0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) : "=d" (dest->mant.m32[0]) : "0" (src->m32[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) dest->lowmant = tmp >> 24;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) if (src->m32[3] || (tmp << 8))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) dest->lowmant |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) case 31:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) asm volatile ("lsr.l #1,%1; roxr.l #1,%0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) : "=d" (dest->mant.m32[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) : "d" (src->m32[0]), "0" (src->m32[1]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) asm volatile ("roxr.l #1,%0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) : "=d" (dest->mant.m32[1]) : "0" (src->m32[2]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) asm volatile ("roxr.l #1,%0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) : "=d" (tmp) : "0" (src->m32[3]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) dest->lowmant = tmp >> 24;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) if (src->m32[3] << 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) dest->lowmant |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) case 32:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) dest->mant.m32[0] = src->m32[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) dest->mant.m32[1] = src->m32[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) dest->lowmant = src->m32[3] >> 24;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) if (src->m32[3] << 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) dest->lowmant |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) #endif /* MULTI_ARITH_H */