^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*---------------------------------------------------------------------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) | polynomial_Xsig.S |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) | Fixed point arithmetic polynomial evaluation. |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) | Copyright (C) 1992,1993,1994,1995 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) | Australia. E-mail billm@jacobi.maths.monash.edu.au |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) | Call from C as: |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) | void polynomial_Xsig(Xsig *accum, unsigned long long x, |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) | unsigned long long terms[], int n) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) | Computes: |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) | and adds the result to the 12 byte Xsig. |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) | precision. |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) | This function must be used carefully: most overflow of intermediate |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) | results is controlled, but overflow of the result is not. |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) +---------------------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) .file "polynomial_Xsig.S"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include "fpu_emu.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #define TERM_SIZE $8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #define SUM_MS -20(%ebp) /* sum ms long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define SUM_MIDDLE -24(%ebp) /* sum middle long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #define SUM_LS -28(%ebp) /* sum ls long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #define ACCUM_MS -4(%ebp) /* accum ms long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #define ACCUM_LS -12(%ebp) /* accum ls long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #define OVERFLOWED -16(%ebp) /* addition overflow flag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) SYM_FUNC_START(polynomial_Xsig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) pushl %ebp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) movl %esp,%ebp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) subl $32,%esp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) pushl %esi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) pushl %edi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) pushl %ebx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) movl PARAM2,%esi /* x */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) movl PARAM3,%edi /* terms */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) movl TERM_SIZE,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) mull PARAM4 /* n */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) addl %eax,%edi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) movl 4(%edi),%edx /* terms[n] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) movl %edx,SUM_MS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) movl (%edi),%edx /* terms[n] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) movl %edx,SUM_MIDDLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) xor %eax,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) movl %eax,SUM_LS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) movb %al,OVERFLOWED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) subl TERM_SIZE,%edi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) decl PARAM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) js L_accum_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) L_accum_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) xor %eax,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) movl %eax,ACCUM_MS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) movl %eax,ACCUM_MIDDLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) movl SUM_MIDDLE,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) mull (%esi) /* x ls long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) movl %edx,ACCUM_LS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) movl SUM_MIDDLE,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) mull 4(%esi) /* x ms long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) addl %eax,ACCUM_LS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) adcl %edx,ACCUM_MIDDLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) adcl $0,ACCUM_MS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) movl SUM_MS,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) mull (%esi) /* x ls long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) addl %eax,ACCUM_LS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) adcl %edx,ACCUM_MIDDLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) adcl $0,ACCUM_MS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) movl SUM_MS,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) mull 4(%esi) /* x ms long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) addl %eax,ACCUM_MIDDLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) adcl %edx,ACCUM_MS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) testb $0xff,OVERFLOWED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) jz L_no_overflow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) movl (%esi),%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) addl %eax,ACCUM_MIDDLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) movl 4(%esi),%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) adcl %eax,ACCUM_MS /* This could overflow too */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) L_no_overflow:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * Now put the sum of next term and the accumulator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * into the sum register
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) movl ACCUM_LS,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) addl (%edi),%eax /* term ls long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) movl %eax,SUM_LS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) movl ACCUM_MIDDLE,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) adcl (%edi),%eax /* term ls long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) movl %eax,SUM_MIDDLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) movl ACCUM_MS,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) adcl 4(%edi),%eax /* term ms long */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) movl %eax,SUM_MS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) sbbb %al,%al
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) movb %al,OVERFLOWED /* Used in the next iteration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) subl TERM_SIZE,%edi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) decl PARAM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) jns L_accum_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) L_accum_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) movl PARAM1,%edi /* accum */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) movl SUM_LS,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) addl %eax,(%edi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) movl SUM_MIDDLE,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) adcl %eax,4(%edi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) movl SUM_MS,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) adcl %eax,8(%edi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) popl %ebx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) popl %edi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) popl %esi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) leave
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) SYM_FUNC_END(polynomial_Xsig)