^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* IEEE754 floating point arithmetic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * single precision square root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * MIPS floating point support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Copyright (C) 1994-2000 Algorithmics Ltd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "ieee754sp.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) union ieee754sp ieee754sp_sqrt(union ieee754sp x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) int ix, s, q, m, t, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) unsigned int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) COMPXSP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) /* take care of Inf and NaN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) EXPLODEXSP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) ieee754_clearcx();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) FLUSHXSP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) /* x == INF or NAN? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) switch (xc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) case IEEE754_CLASS_SNAN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) return ieee754sp_nanxcpt(x);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) case IEEE754_CLASS_QNAN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) /* sqrt(Nan) = Nan */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) return x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) case IEEE754_CLASS_ZERO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) /* sqrt(0) = 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) return x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) case IEEE754_CLASS_INF:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) if (xs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) /* sqrt(-Inf) = Nan */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) ieee754_setcx(IEEE754_INVALID_OPERATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) return ieee754sp_indef();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) /* sqrt(+Inf) = Inf */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) return x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) case IEEE754_CLASS_DNORM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) case IEEE754_CLASS_NORM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) if (xs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /* sqrt(-x) = Nan */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) ieee754_setcx(IEEE754_INVALID_OPERATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) return ieee754sp_indef();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) ix = x.bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) /* normalize x */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) m = (ix >> 23);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) if (m == 0) { /* subnormal x */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) for (i = 0; (ix & 0x00800000) == 0; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) ix <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) m -= i - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) m -= 127; /* unbias exponent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) ix = (ix & 0x007fffff) | 0x00800000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) if (m & 1) /* odd m, double x to make it even */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) ix += ix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) m >>= 1; /* m = [m/2] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) /* generate sqrt(x) bit by bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) ix += ix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) s = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) q = 0; /* q = sqrt(x) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) r = 0x01000000; /* r = moving bit from right to left */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) while (r != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) t = s + r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) if (t <= ix) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) s = t + r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) ix -= t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) q += r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) ix += ix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) r >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) if (ix != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) ieee754_setcx(IEEE754_INEXACT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) switch (ieee754_csr.rm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) case FPU_CSR_RU:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) q += 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) case FPU_CSR_RN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) q += (q & 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) ix = (q >> 1) + 0x3f000000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) ix += (m << 23);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) x.bits = ix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) return x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }