^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*---------------------------------------------------------------------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) | poly.h |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) | Header file for the FPU-emu poly*.c source files. |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) | Copyright (C) 1994,1999 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) | Australia. E-mail billm@melbpc.org.au |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) | Declarations and definitions for functions operating on Xsig (12-byte |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) | extended-significand) quantities. |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) +---------------------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #ifndef _POLY_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #define _POLY_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) /* This 12-byte structure is used to improve the accuracy of computation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) of transcendental functions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) Intended to be used to get results better than 8-byte computation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) allows. 9-byte would probably be sufficient.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) typedef struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) unsigned long lsw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) unsigned long midw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) unsigned long msw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) } Xsig;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) unsigned long long *result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) const unsigned long long terms[], const int n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) asmlinkage void shr_Xsig(Xsig *, const int n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) asmlinkage int round_Xsig(Xsig *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) asmlinkage int norm_Xsig(Xsig *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) /* Macro to extract the most significant 32 bits from a long long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #define LL_MSW(x) (((unsigned long *)&x)[1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) /* Macro to initialize an Xsig struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #define MK_XSIG(a,b,c) { c, b, a }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) /* Macro to access the 8 ms bytes of an Xsig as a long long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #define XSIG_LL(x) (*(unsigned long long *)&x.midw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) Need to run gcc with optimizations on to get these to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) actually be in-line.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) /* Multiply two fixed-point 32 bit numbers, producing a 32 bit result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) The answer is the ms word of the product. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) /* Some versions of gcc make it difficult to stop eax from being clobbered.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) Merely specifying that it is used doesn't work...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) static inline unsigned long mul_32_32(const unsigned long arg1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) const unsigned long arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) int retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) asm volatile ("mull %2; movl %%edx,%%eax":"=a" (retval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) :"0"(arg1), "g"(arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) :"dx");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n":"=g"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) (*dest):"g"(dest), "g"(x2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) :"ax", "si", "di");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) /* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) /* Note: the constraints in the asm statement didn't always work properly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) with gcc 2.5.8. Changing from using edi to using ecx got around the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) problem, but keep fingers crossed! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) "jnc 0f;\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) "movl %4,%%ecx; incl (%%ecx)\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) "movl $1,%%eax; jmp 1f;\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) "0: xorl %%eax,%%eax;\n" "1:\n":"=g" (*exp), "=g"(*dest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) :"g"(dest), "g"(x2), "g"(exp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) :"cx", "si", "ax");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) /* Negate (subtract from 1.0) the 12 byte Xsig */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) /* This is faster in a loop on my 386 than using the "neg" instruction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) static inline void negate_Xsig(Xsig *x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) asm volatile ("movl %1,%%esi;\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) "xorl %%ecx,%%ecx;\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n":"=g"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) (*x):"g"(x):"si", "ax", "cx");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) #endif /* _POLY_H */