/* SPDX-License-Identifier: GPL-2.0 */ /*---------------------------------------------------------------------------+ | polynomial_Xsig.S | | | | Fixed point arithmetic polynomial evaluation. | | | | Copyright (C) 1992,1993,1994,1995 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | | | Call from C as: | | void polynomial_Xsig(Xsig *accum, unsigned long long x, | | unsigned long long terms[], int n) | | | | Computes: | | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | | and adds the result to the 12 byte Xsig. | | The terms[] are each 8 bytes, but all computation is performed to 12 byte | | precision. | | | | This function must be used carefully: most overflow of intermediate | | results is controlled, but overflow of the result is not. | | | +---------------------------------------------------------------------------*/ <------>.file "polynomial_Xsig.S" #include "fpu_emu.h" #define TERM_SIZE $8 #define SUM_MS -20(%ebp) /* sum ms long */ #define SUM_MIDDLE -24(%ebp) /* sum middle long */ #define SUM_LS -28(%ebp) /* sum ls long */ #define ACCUM_MS -4(%ebp) /* accum ms long */ #define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ #define ACCUM_LS -12(%ebp) /* accum ls long */ #define OVERFLOWED -16(%ebp) /* addition overflow flag */ .text SYM_FUNC_START(polynomial_Xsig) <------>pushl %ebp <------>movl %esp,%ebp <------>subl $32,%esp <------>pushl %esi <------>pushl %edi <------>pushl %ebx <------>movl PARAM2,%esi /* x */ <------>movl PARAM3,%edi /* terms */ <------>movl TERM_SIZE,%eax <------>mull PARAM4 /* n */ <------>addl %eax,%edi <------>movl 4(%edi),%edx /* terms[n] */ <------>movl %edx,SUM_MS <------>movl (%edi),%edx /* terms[n] */ <------>movl %edx,SUM_MIDDLE <------>xor %eax,%eax <------>movl %eax,SUM_LS <------>movb %al,OVERFLOWED <------>subl TERM_SIZE,%edi <------>decl PARAM4 <------>js L_accum_done L_accum_loop: <------>xor %eax,%eax <------>movl %eax,ACCUM_MS <------>movl %eax,ACCUM_MIDDLE <------>movl SUM_MIDDLE,%eax <------>mull (%esi) /* x ls long */ <------>movl %edx,ACCUM_LS <------>movl SUM_MIDDLE,%eax <------>mull 4(%esi) /* x ms long */ <------>addl %eax,ACCUM_LS <------>adcl %edx,ACCUM_MIDDLE <------>adcl $0,ACCUM_MS <------>movl SUM_MS,%eax <------>mull (%esi) /* x ls long */ <------>addl %eax,ACCUM_LS <------>adcl %edx,ACCUM_MIDDLE <------>adcl $0,ACCUM_MS <------>movl SUM_MS,%eax <------>mull 4(%esi) /* x ms long */ <------>addl %eax,ACCUM_MIDDLE <------>adcl %edx,ACCUM_MS <------>testb $0xff,OVERFLOWED <------>jz L_no_overflow <------>movl (%esi),%eax <------>addl %eax,ACCUM_MIDDLE <------>movl 4(%esi),%eax <------>adcl %eax,ACCUM_MS /* This could overflow too */ L_no_overflow: /* * Now put the sum of next term and the accumulator * into the sum register */ <------>movl ACCUM_LS,%eax <------>addl (%edi),%eax /* term ls long */ <------>movl %eax,SUM_LS <------>movl ACCUM_MIDDLE,%eax <------>adcl (%edi),%eax /* term ls long */ <------>movl %eax,SUM_MIDDLE <------>movl ACCUM_MS,%eax <------>adcl 4(%edi),%eax /* term ms long */ <------>movl %eax,SUM_MS <------>sbbb %al,%al <------>movb %al,OVERFLOWED /* Used in the next iteration */ <------>subl TERM_SIZE,%edi <------>decl PARAM4 <------>jns L_accum_loop L_accum_done: <------>movl PARAM1,%edi /* accum */ <------>movl SUM_LS,%eax <------>addl %eax,(%edi) <------>movl SUM_MIDDLE,%eax <------>adcl %eax,4(%edi) <------>movl SUM_MS,%eax <------>adcl %eax,8(%edi) <------>popl %ebx <------>popl %edi <------>popl %esi <------>leave <------>ret SYM_FUNC_END(polynomial_Xsig)