^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* IEEE754 floating point arithmetic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * double precision: common utilities
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * MIPS floating point support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Copyright (C) 1994-2000 Algorithmics Ltd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "ieee754dp.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) int re;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) int rs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) u64 rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) unsigned int lxm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) unsigned int hxm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) unsigned int lym;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) unsigned int hym;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) u64 lrm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) u64 hrm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) u64 t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) u64 at;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) COMPXDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) COMPYDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) EXPLODEXDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) EXPLODEYDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) ieee754_clearcx();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) FLUSHXDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) FLUSHYDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) switch (CLPAIR(xc, yc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) return ieee754dp_nanxcpt(y);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) return ieee754dp_nanxcpt(x);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) return y;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) return x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * Infinity handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) ieee754_setcx(IEEE754_INVALID_OPERATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) return ieee754dp_indef();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) return ieee754dp_inf(xs ^ ys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) return ieee754dp_zero(xs ^ ys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) DPDNORMX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) DPDNORMY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) DPDNORMX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) /* rm = xm * ym, re = xe+ye basically */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) assert(xm & DP_HIDDEN_BIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) assert(ym & DP_HIDDEN_BIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) re = xe + ye;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) rs = xs ^ ys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) /* shunt to top of word */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) xm <<= 64 - (DP_FBITS + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) ym <<= 64 - (DP_FBITS + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) lxm = xm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) hxm = xm >> 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) lym = ym;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) hym = ym >> 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) lrm = DPXMULT(lxm, lym);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) hrm = DPXMULT(hxm, hym);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) t = DPXMULT(lxm, hym);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) at = lrm + (t << 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) hrm += at < lrm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) lrm = at;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) hrm = hrm + (t >> 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) t = DPXMULT(hxm, lym);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) at = lrm + (t << 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) hrm += at < lrm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) lrm = at;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) hrm = hrm + (t >> 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) rm = hrm | (lrm != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * Sticky shift down to normal rounding precision.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) if ((s64) rm < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) ((rm << (DP_FBITS + 1 + 3)) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) re++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) assert(rm & (DP_HIDDEN_BIT << 3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) return ieee754dp_format(rs, re, rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) }