/*---------------------------------------------------------------------------+ | polynomial_Xsig.S | | | | Fixed point arithmetic polynomial evaluation. | | | | Copyright (C) 1992,1993,1994,1995 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | | | Call from C as: | | void polynomial_Xsig(Xsig *accum, unsigned long long x, | | unsigned long long terms[], int n) | | | | Computes: | | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | | and adds the result to the 12 byte Xsig. | | The terms[] are each 8 bytes, but all computation is performed to 12 byte | | precision. | | | | This function must be used carefully: most overflow of intermediate | | results is controlled, but overflow of the result is not. | | | +---------------------------------------------------------------------------*/ .file "polynomial_Xsig.S" #include "fpu_emu.h" #define TERM_SIZE $8 #define SUM_MS -20(%ebp) /* sum ms long */ #define SUM_MIDDLE -24(%ebp) /* sum middle long */ #define SUM_LS -28(%ebp) /* sum ls long */ #define ACCUM_MS -4(%ebp) /* accum ms long */ #define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ #define ACCUM_LS -12(%ebp) /* accum ls long */ #define OVERFLOWED -16(%ebp) /* addition overflow flag */ .text ENTRY(polynomial_Xsig) pushl %ebp movl %esp,%ebp subl $32,%esp pushl %esi pushl %edi pushl %ebx movl PARAM2,%esi /* x */ movl PARAM3,%edi /* terms */ movl TERM_SIZE,%eax mull PARAM4 /* n */ addl %eax,%edi movl 4(%edi),%edx /* terms[n] */ movl %edx,SUM_MS movl (%edi),%edx /* terms[n] */ movl %edx,SUM_MIDDLE xor %eax,%eax movl %eax,SUM_LS movb %al,OVERFLOWED subl TERM_SIZE,%edi decl PARAM4 js L_accum_done L_accum_loop: xor %eax,%eax movl %eax,ACCUM_MS movl %eax,ACCUM_MIDDLE movl SUM_MIDDLE,%eax mull (%esi) /* x ls long */ movl %edx,ACCUM_LS movl SUM_MIDDLE,%eax mull 4(%esi) /* x ms long */ addl %eax,ACCUM_LS adcl %edx,ACCUM_MIDDLE adcl $0,ACCUM_MS movl SUM_MS,%eax mull (%esi) /* x ls long */ addl %eax,ACCUM_LS adcl %edx,ACCUM_MIDDLE adcl $0,ACCUM_MS movl SUM_MS,%eax mull 4(%esi) /* x ms long */ addl %eax,ACCUM_MIDDLE adcl %edx,ACCUM_MS testb $0xff,OVERFLOWED jz L_no_overflow movl (%esi),%eax addl %eax,ACCUM_MIDDLE movl 4(%esi),%eax adcl %eax,ACCUM_MS /* This could overflow too */ L_no_overflow: /* * Now put the sum of next term and the accumulator * into the sum register */ movl ACCUM_LS,%eax addl (%edi),%eax /* term ls long */ movl %eax,SUM_LS movl ACCUM_MIDDLE,%eax adcl (%edi),%eax /* term ls long */ movl %eax,SUM_MIDDLE movl ACCUM_MS,%eax adcl 4(%edi),%eax /* term ms long */ movl %eax,SUM_MS sbbb %al,%al movb %al,OVERFLOWED /* Used in the next iteration */ subl TERM_SIZE,%edi decl PARAM4 jns L_accum_loop L_accum_done: movl PARAM1,%edi /* accum */ movl SUM_LS,%eax addl %eax,(%edi) movl SUM_MIDDLE,%eax adcl %eax,4(%edi) movl SUM_MS,%eax adcl %eax,8(%edi) popl %ebx popl %edi popl %esi leave ret