^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*---------------------------------------------------------------------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) | poly_sin.c |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) | Computation of an approximation of the sin function and the cosine |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) | function by a polynomial. |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) | Copyright (C) 1992,1993,1994,1997,1999 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) | E-mail billm@melbpc.org.au |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) +---------------------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "exception.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "reg_constant.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "fpu_emu.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "fpu_system.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include "control_w.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include "poly.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define N_COEFF_P 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define N_COEFF_N 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) static const unsigned long long pos_terms_l[N_COEFF_P] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) 0xaaaaaaaaaaaaaaabLL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) 0x00d00d00d00cf906LL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) 0x000006b99159a8bbLL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) 0x000000000d7392e6LL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) static const unsigned long long neg_terms_l[N_COEFF_N] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) 0x2222222222222167LL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) 0x0002e3bc74aab624LL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) 0x0000000b09229062LL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) 0x00000000000c7973LL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #define N_COEFF_PH 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #define N_COEFF_NH 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) static const unsigned long long pos_terms_h[N_COEFF_PH] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) 0x0000000000000000LL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) 0x05b05b05b05b0406LL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) 0x000049f93edd91a9LL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) 0x00000000c9c9ed62LL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) static const unsigned long long neg_terms_h[N_COEFF_NH] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) 0xaaaaaaaaaaaaaa98LL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) 0x001a01a01a019064LL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) 0x0000008f76c68a77LL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) 0x0000000000d58f5eLL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) /*--- poly_sine() -----------------------------------------------------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) +---------------------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) void poly_sine(FPU_REG *st0_ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) int exponent, echange;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) Xsig accumulator, argSqrd, argTo4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) unsigned long fix_up, adj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) unsigned long long fixed_arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) FPU_REG result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) exponent = exponent(st0_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) accumulator.lsw = accumulator.midw = accumulator.msw = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) /* Split into two ranges, for arguments below and above 1.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) /* The boundary between upper and lower is approx 0.88309101259 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) if ((exponent < -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) /* The argument is <= 0.88309101259 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) argSqrd.msw = st0_ptr->sigh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) argSqrd.midw = st0_ptr->sigl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) argSqrd.lsw = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) mul64_Xsig(&argSqrd, &significand(st0_ptr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) shr_Xsig(&argSqrd, 2 * (-1 - exponent));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) argTo4.msw = argSqrd.msw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) argTo4.midw = argSqrd.midw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) argTo4.lsw = argSqrd.lsw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) mul_Xsig_Xsig(&argTo4, &argTo4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) N_COEFF_N - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) mul_Xsig_Xsig(&accumulator, &argSqrd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) negate_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) N_COEFF_P - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) shr_Xsig(&accumulator, 2); /* Divide by four */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) accumulator.msw |= 0x80000000; /* Add 1.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) mul64_Xsig(&accumulator, &significand(st0_ptr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) mul64_Xsig(&accumulator, &significand(st0_ptr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) mul64_Xsig(&accumulator, &significand(st0_ptr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) /* Divide by four, FPU_REG compatible, etc */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) exponent = 3 * exponent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) /* The minimum exponent difference is 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) negate_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) XSIG_LL(accumulator) += significand(st0_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) echange = round_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) setexponentpos(&result, exponent(st0_ptr) + echange);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) /* The argument is > 0.88309101259 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) /* We use sin(st(0)) = cos(pi/2-st(0)) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) fixed_arg = significand(st0_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) if (exponent == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) /* The argument is >= 1.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) /* Put the binary point at the left. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) fixed_arg <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) fixed_arg = 0x921fb54442d18469LL - fixed_arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) /* There is a special case which arises due to rounding, to fix here. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) if (fixed_arg == 0xffffffffffffffffLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) fixed_arg = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) XSIG_LL(argSqrd) = fixed_arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) argSqrd.lsw = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) mul64_Xsig(&argSqrd, &fixed_arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) XSIG_LL(argTo4) = XSIG_LL(argSqrd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) argTo4.lsw = argSqrd.lsw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) mul_Xsig_Xsig(&argTo4, &argTo4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) N_COEFF_NH - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) mul_Xsig_Xsig(&accumulator, &argSqrd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) negate_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) N_COEFF_PH - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) negate_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) mul64_Xsig(&accumulator, &fixed_arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) mul64_Xsig(&accumulator, &fixed_arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) shr_Xsig(&accumulator, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) negate_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) add_Xsig_Xsig(&accumulator, &argSqrd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) shr_Xsig(&accumulator, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) accumulator.lsw |= 1; /* A zero accumulator here would cause problems */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) negate_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) /* The basic computation is complete. Now fix the answer to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) compensate for the error due to the approximation used for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) pi/2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) /* This has an exponent of -65 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) fix_up = 0x898cc517;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /* The fix-up needs to be improved for larger args */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) if (argSqrd.msw & 0xffc00000) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) /* Get about 32 bit precision in these: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) adj = accumulator.lsw; /* temp save */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) accumulator.lsw -= fix_up;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) if (accumulator.lsw > adj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) XSIG_LL(accumulator)--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) echange = round_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) setexponentpos(&result, echange - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) significand(&result) = XSIG_LL(accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) setsign(&result, getsign(st0_ptr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) FPU_copy_to_reg0(&result, TAG_Valid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) #ifdef PARANOID
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) if ((exponent(&result) >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) && (significand(&result) > 0x8000000000000000LL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) EXCEPTION(EX_INTERNAL | 0x150);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) #endif /* PARANOID */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) /*--- poly_cos() ------------------------------------------------------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) +---------------------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) void poly_cos(FPU_REG *st0_ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) FPU_REG result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) long int exponent, exp2, echange;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) Xsig accumulator, argSqrd, fix_up, argTo4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) unsigned long long fixed_arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) #ifdef PARANOID
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) if ((exponent(st0_ptr) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) || ((exponent(st0_ptr) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) && (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) EXCEPTION(EX_Invalid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) #endif /* PARANOID */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) exponent = exponent(st0_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) accumulator.lsw = accumulator.midw = accumulator.msw = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) if ((exponent < -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) /* arg is < 0.687705 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) argSqrd.msw = st0_ptr->sigh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) argSqrd.midw = st0_ptr->sigl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) argSqrd.lsw = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) mul64_Xsig(&argSqrd, &significand(st0_ptr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) if (exponent < -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) /* shift the argument right by the required places */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) shr_Xsig(&argSqrd, 2 * (-1 - exponent));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) argTo4.msw = argSqrd.msw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) argTo4.midw = argSqrd.midw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) argTo4.lsw = argSqrd.lsw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) mul_Xsig_Xsig(&argTo4, &argTo4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) N_COEFF_NH - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) mul_Xsig_Xsig(&accumulator, &argSqrd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) negate_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) N_COEFF_PH - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) negate_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) mul64_Xsig(&accumulator, &significand(st0_ptr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) mul64_Xsig(&accumulator, &significand(st0_ptr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) shr_Xsig(&accumulator, -2 * (1 + exponent));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) shr_Xsig(&accumulator, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) negate_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) add_Xsig_Xsig(&accumulator, &argSqrd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) shr_Xsig(&accumulator, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) /* It doesn't matter if accumulator is all zero here, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) following code will work ok */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) negate_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) if (accumulator.lsw & 0x80000000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) XSIG_LL(accumulator)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) if (accumulator.msw == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) /* The result is 1.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) FPU_copy_to_reg0(&CONST_1, TAG_Valid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) significand(&result) = XSIG_LL(accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) /* will be a valid positive nr with expon = -1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) setexponentpos(&result, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) fixed_arg = significand(st0_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) if (exponent == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) /* The argument is >= 1.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) /* Put the binary point at the left. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) fixed_arg <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) fixed_arg = 0x921fb54442d18469LL - fixed_arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) /* There is a special case which arises due to rounding, to fix here. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) if (fixed_arg == 0xffffffffffffffffLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) fixed_arg = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) exponent = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) exp2 = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) /* A shift is needed here only for a narrow range of arguments,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) i.e. for fixed_arg approx 2^-32, but we pick up more... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) fixed_arg <<= 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) exponent -= 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) exp2 -= 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) XSIG_LL(argSqrd) = fixed_arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) argSqrd.lsw = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) mul64_Xsig(&argSqrd, &fixed_arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) if (exponent < -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) /* shift the argument right by the required places */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) shr_Xsig(&argSqrd, 2 * (-1 - exponent));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) argTo4.msw = argSqrd.msw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) argTo4.midw = argSqrd.midw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) argTo4.lsw = argSqrd.lsw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) mul_Xsig_Xsig(&argTo4, &argTo4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) N_COEFF_N - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) mul_Xsig_Xsig(&accumulator, &argSqrd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) negate_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) N_COEFF_P - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) shr_Xsig(&accumulator, 2); /* Divide by four */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) accumulator.msw |= 0x80000000; /* Add 1.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) mul64_Xsig(&accumulator, &fixed_arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) mul64_Xsig(&accumulator, &fixed_arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) mul64_Xsig(&accumulator, &fixed_arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) /* Divide by four, FPU_REG compatible, etc */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) exponent = 3 * exponent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) /* The minimum exponent difference is 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) shr_Xsig(&accumulator, exp2 - exponent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) negate_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) XSIG_LL(accumulator) += fixed_arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) /* The basic computation is complete. Now fix the answer to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) compensate for the error due to the approximation used for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) pi/2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) /* This has an exponent of -65 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) fix_up.lsw = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) /* The fix-up needs to be improved for larger args */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) if (argSqrd.msw & 0xffc00000) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) /* Get about 32 bit precision in these: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) exp2 += norm_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) shr_Xsig(&accumulator, 1); /* Prevent overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) exp2++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) shr_Xsig(&fix_up, 65 + exp2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) add_Xsig_Xsig(&accumulator, &fix_up);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) echange = round_Xsig(&accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) setexponentpos(&result, exp2 + echange);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) significand(&result) = XSIG_LL(accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) FPU_copy_to_reg0(&result, TAG_Valid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) #ifdef PARANOID
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) if ((exponent(&result) >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) && (significand(&result) > 0x8000000000000000LL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) EXCEPTION(EX_INTERNAL | 0x151);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) #endif /* PARANOID */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) }