^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * IEEE754 floating point arithmetic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * double precision: MADDF.f (Fused Multiply Add)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * MIPS floating point support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Copyright (C) 2015 Imagination Technologies, Ltd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Author: Markos Chandras <markos.chandras@imgtec.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "ieee754dp.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) /* 128 bits shift right logical with rounding. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) static void srl128(u64 *hptr, u64 *lptr, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) u64 low;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) if (count >= 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) *lptr = *hptr != 0 || *lptr != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) *hptr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) } else if (count >= 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) if (count == 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) *lptr = *hptr | (*lptr != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) low = *lptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) *lptr = *hptr >> (count - 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) *lptr |= (*hptr << (128 - count)) != 0 || low != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) *hptr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) low = *lptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) *lptr = low >> count | *hptr << (64 - count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) *lptr |= (low << (64 - count)) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) *hptr = *hptr >> count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) union ieee754dp y, enum maddf_flags flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) int re;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) int rs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) unsigned int lxm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) unsigned int hxm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) unsigned int lym;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) unsigned int hym;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) u64 lrm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) u64 hrm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) u64 lzm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) u64 hzm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) u64 t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) u64 at;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) int s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) COMPXDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) COMPYDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) COMPZDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) EXPLODEXDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) EXPLODEYDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) EXPLODEZDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) FLUSHXDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) FLUSHYDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) FLUSHZDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) ieee754_clearcx();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) rs = xs ^ ys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) if (flags & MADDF_NEGATE_PRODUCT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) rs ^= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) if (flags & MADDF_NEGATE_ADDITION)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) zs ^= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * Handle the cases when at least one of x, y or z is a NaN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * Order of precedence is sNaN, qNaN and z, x, y.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) if (zc == IEEE754_CLASS_SNAN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) return ieee754dp_nanxcpt(z);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) if (xc == IEEE754_CLASS_SNAN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) return ieee754dp_nanxcpt(x);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) if (yc == IEEE754_CLASS_SNAN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) return ieee754dp_nanxcpt(y);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) if (zc == IEEE754_CLASS_QNAN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) return z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) if (xc == IEEE754_CLASS_QNAN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) return x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) if (yc == IEEE754_CLASS_QNAN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) return y;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) if (zc == IEEE754_CLASS_DNORM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) DPDNORMZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /* ZERO z cases are handled separately below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) switch (CLPAIR(xc, yc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * Infinity handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) ieee754_setcx(IEEE754_INVALID_OPERATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) return ieee754dp_indef();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * Cases of addition of infinities with opposite signs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * or subtraction of infinities with same signs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) ieee754_setcx(IEEE754_INVALID_OPERATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) return ieee754dp_indef();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * z is here either not an infinity, or an infinity having the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * same sign as product (x*y). The result must be an infinity,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * and its sign is determined only by the sign of product (x*y).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) return ieee754dp_inf(rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) if (zc == IEEE754_CLASS_INF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) return ieee754dp_inf(zs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) if (zc == IEEE754_CLASS_ZERO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) /* Handle cases +0 + (-0) and similar ones. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) if (zs == rs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * Cases of addition of zeros of equal signs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * or subtraction of zeroes of opposite signs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * The sign of the resulting zero is in any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * such case determined only by the sign of z.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) return z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) /* x*y is here 0, and z is not 0, so just return z */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) DPDNORMX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) if (zc == IEEE754_CLASS_INF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) return ieee754dp_inf(zs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) DPDNORMY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) if (zc == IEEE754_CLASS_INF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) return ieee754dp_inf(zs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) DPDNORMX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) if (zc == IEEE754_CLASS_INF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) return ieee754dp_inf(zs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) /* continue to real computations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) /* Finally get to do some computation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * Do the multiplication bit first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * rm = xm * ym, re = xe + ye basically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) * At this point xm and ym should have been normalized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) assert(xm & DP_HIDDEN_BIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) assert(ym & DP_HIDDEN_BIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) re = xe + ye;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) /* shunt to top of word */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) xm <<= 64 - (DP_FBITS + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) ym <<= 64 - (DP_FBITS + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) lxm = xm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) hxm = xm >> 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) lym = ym;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) hym = ym >> 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) lrm = DPXMULT(lxm, lym);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) hrm = DPXMULT(hxm, hym);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) t = DPXMULT(lxm, hym);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) at = lrm + (t << 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) hrm += at < lrm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) lrm = at;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) hrm = hrm + (t >> 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) t = DPXMULT(hxm, lym);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) at = lrm + (t << 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) hrm += at < lrm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) lrm = at;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) hrm = hrm + (t >> 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) /* Put explicit bit at bit 126 if necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) if ((int64_t)hrm < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) lrm = (hrm << 63) | (lrm >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) hrm = hrm >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) re++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) assert(hrm & (1 << 62));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) if (zc == IEEE754_CLASS_ZERO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * Move explicit bit from bit 126 to bit 55 since the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * ieee754dp_format code expects the mantissa to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * 56 bits wide (53 + 3 rounding bits).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) srl128(&hrm, &lrm, (126 - 55));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) return ieee754dp_format(rs, re, lrm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) /* Move explicit bit from bit 52 to bit 126 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) lzm = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) hzm = zm << 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) assert(hzm & (1 << 62));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) /* Make the exponents the same */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) if (ze > re) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * Have to shift y fraction right to align.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) s = ze - re;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) srl128(&hrm, &lrm, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) re += s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) } else if (re > ze) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * Have to shift x fraction right to align.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) s = re - ze;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) srl128(&hzm, &lzm, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) ze += s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) assert(ze == re);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) assert(ze <= DP_EMAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) /* Do the addition */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) if (zs == rs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * Generate 128 bit result by adding two 127 bit numbers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * leaving result in hzm:lzm, zs and ze.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) hzm = hzm + hrm + (lzm > (lzm + lrm));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) lzm = lzm + lrm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) if ((int64_t)hzm < 0) { /* carry out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) srl128(&hzm, &lzm, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) ze++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) hzm = hzm - hrm - (lzm < lrm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) lzm = lzm - lrm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) hzm = hrm - hzm - (lrm < lzm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) lzm = lrm - lzm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) zs = rs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) if (lzm == 0 && hzm == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * Put explicit bit at bit 126 if necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) if (hzm == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) /* left shift by 63 or 64 bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) if ((int64_t)lzm < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) /* MSB of lzm is the explicit bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) hzm = lzm >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) lzm = lzm << 63;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) ze -= 63;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) hzm = lzm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) lzm = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) ze -= 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) t = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) while ((hzm >> (62 - t)) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) t++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) assert(t <= 62);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) if (t) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) hzm = hzm << t | lzm >> (64 - t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) lzm = lzm << t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) ze -= t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * Move explicit bit from bit 126 to bit 55 since the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * ieee754dp_format code expects the mantissa to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * 56 bits wide (53 + 3 rounding bits).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) srl128(&hzm, &lzm, (126 - 55));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) return ieee754dp_format(zs, ze, lzm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) union ieee754dp y)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) return _dp_maddf(z, x, y, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) union ieee754dp y)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) union ieee754dp y)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) return _dp_maddf(z, x, y, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) union ieee754dp y)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) union ieee754dp y)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) union ieee754dp y)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }