Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
1da177e4 LT |
2 | /*---------------------------------------------------------------------------+ |
3 | | polynomial_Xsig.S | | |
4 | | | | |
5 | | Fixed point arithmetic polynomial evaluation. | | |
6 | | | | |
7 | | Copyright (C) 1992,1993,1994,1995 | | |
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | |
9 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | |
10 | | | | |
11 | | Call from C as: | | |
12 | | void polynomial_Xsig(Xsig *accum, unsigned long long x, | | |
13 | | unsigned long long terms[], int n) | | |
14 | | | | |
15 | | Computes: | | |
16 | | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | | |
17 | | and adds the result to the 12 byte Xsig. | | |
18 | | The terms[] are each 8 bytes, but all computation is performed to 12 byte | | |
19 | | precision. | | |
20 | | | | |
21 | | This function must be used carefully: most overflow of intermediate | | |
22 | | results is controlled, but overflow of the result is not. | | |
23 | | | | |
24 | +---------------------------------------------------------------------------*/ | |
25 | .file "polynomial_Xsig.S" | |
26 | ||
27 | #include "fpu_emu.h" | |
28 | ||
29 | ||
30 | #define TERM_SIZE $8 | |
31 | #define SUM_MS -20(%ebp) /* sum ms long */ | |
32 | #define SUM_MIDDLE -24(%ebp) /* sum middle long */ | |
33 | #define SUM_LS -28(%ebp) /* sum ls long */ | |
34 | #define ACCUM_MS -4(%ebp) /* accum ms long */ | |
35 | #define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ | |
36 | #define ACCUM_LS -12(%ebp) /* accum ls long */ | |
37 | #define OVERFLOWED -16(%ebp) /* addition overflow flag */ | |
38 | ||
39 | .text | |
40 | ENTRY(polynomial_Xsig) | |
41 | pushl %ebp | |
42 | movl %esp,%ebp | |
43 | subl $32,%esp | |
44 | pushl %esi | |
45 | pushl %edi | |
46 | pushl %ebx | |
47 | ||
48 | movl PARAM2,%esi /* x */ | |
49 | movl PARAM3,%edi /* terms */ | |
50 | ||
51 | movl TERM_SIZE,%eax | |
52 | mull PARAM4 /* n */ | |
53 | addl %eax,%edi | |
54 | ||
55 | movl 4(%edi),%edx /* terms[n] */ | |
56 | movl %edx,SUM_MS | |
57 | movl (%edi),%edx /* terms[n] */ | |
58 | movl %edx,SUM_MIDDLE | |
59 | xor %eax,%eax | |
60 | movl %eax,SUM_LS | |
61 | movb %al,OVERFLOWED | |
62 | ||
63 | subl TERM_SIZE,%edi | |
64 | decl PARAM4 | |
65 | js L_accum_done | |
66 | ||
67 | L_accum_loop: | |
68 | xor %eax,%eax | |
69 | movl %eax,ACCUM_MS | |
70 | movl %eax,ACCUM_MIDDLE | |
71 | ||
72 | movl SUM_MIDDLE,%eax | |
73 | mull (%esi) /* x ls long */ | |
74 | movl %edx,ACCUM_LS | |
75 | ||
76 | movl SUM_MIDDLE,%eax | |
77 | mull 4(%esi) /* x ms long */ | |
78 | addl %eax,ACCUM_LS | |
79 | adcl %edx,ACCUM_MIDDLE | |
80 | adcl $0,ACCUM_MS | |
81 | ||
82 | movl SUM_MS,%eax | |
83 | mull (%esi) /* x ls long */ | |
84 | addl %eax,ACCUM_LS | |
85 | adcl %edx,ACCUM_MIDDLE | |
86 | adcl $0,ACCUM_MS | |
87 | ||
88 | movl SUM_MS,%eax | |
89 | mull 4(%esi) /* x ms long */ | |
90 | addl %eax,ACCUM_MIDDLE | |
91 | adcl %edx,ACCUM_MS | |
92 | ||
93 | testb $0xff,OVERFLOWED | |
94 | jz L_no_overflow | |
95 | ||
96 | movl (%esi),%eax | |
97 | addl %eax,ACCUM_MIDDLE | |
98 | movl 4(%esi),%eax | |
99 | adcl %eax,ACCUM_MS /* This could overflow too */ | |
100 | ||
101 | L_no_overflow: | |
102 | ||
103 | /* | |
104 | * Now put the sum of next term and the accumulator | |
105 | * into the sum register | |
106 | */ | |
107 | movl ACCUM_LS,%eax | |
108 | addl (%edi),%eax /* term ls long */ | |
109 | movl %eax,SUM_LS | |
110 | movl ACCUM_MIDDLE,%eax | |
111 | adcl (%edi),%eax /* term ls long */ | |
112 | movl %eax,SUM_MIDDLE | |
113 | movl ACCUM_MS,%eax | |
114 | adcl 4(%edi),%eax /* term ms long */ | |
115 | movl %eax,SUM_MS | |
116 | sbbb %al,%al | |
117 | movb %al,OVERFLOWED /* Used in the next iteration */ | |
118 | ||
119 | subl TERM_SIZE,%edi | |
120 | decl PARAM4 | |
121 | jns L_accum_loop | |
122 | ||
123 | L_accum_done: | |
124 | movl PARAM1,%edi /* accum */ | |
125 | movl SUM_LS,%eax | |
126 | addl %eax,(%edi) | |
127 | movl SUM_MIDDLE,%eax | |
128 | adcl %eax,4(%edi) | |
129 | movl SUM_MS,%eax | |
130 | adcl %eax,8(%edi) | |
131 | ||
132 | popl %ebx | |
133 | popl %edi | |
134 | popl %esi | |
135 | leave | |
136 | ret | |
bd6be579 | 137 | ENDPROC(polynomial_Xsig) |