Commit | Line | Data |
---|---|---|
6dd7a82c AB |
1 | /* |
2 | * Calculate the checksum of data that is 16 byte aligned and a multiple of | |
3 | * 16 bytes. | |
4 | * | |
5 | * The first step is to reduce it to 1024 bits. We do this in 8 parallel | |
6 | * chunks in order to mask the latency of the vpmsum instructions. If we | |
7 | * have more than 32 kB of data to checksum we repeat this step multiple | |
8 | * times, passing in the previous 1024 bits. | |
9 | * | |
10 | * The next step is to reduce the 1024 bits to 64 bits. This step adds | |
11 | * 32 bits of 0s to the end - this matches what a CRC does. We just | |
12 | * calculate constants that land the data in this 32 bits. | |
13 | * | |
14 | * We then use fixed point Barrett reduction to compute a mod n over GF(2) | |
15 | * for n = CRC using POWER8 instructions. We use x = 32. | |
16 | * | |
17 | * http://en.wikipedia.org/wiki/Barrett_reduction | |
18 | * | |
19 | * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM | |
20 | * | |
21 | * This program is free software; you can redistribute it and/or | |
22 | * modify it under the terms of the GNU General Public License | |
23 | * as published by the Free Software Foundation; either version | |
24 | * 2 of the License, or (at your option) any later version. | |
25 | */ | |
26 | #include <asm/ppc_asm.h> | |
27 | #include <asm/ppc-opcode.h> | |
28 | ||
29 | .section .rodata | |
30 | .balign 16 | |
31 | ||
32 | .byteswap_constant: | |
33 | /* byte reverse permute constant */ | |
34 | .octa 0x0F0E0D0C0B0A09080706050403020100 | |
35 | ||
36 | #define MAX_SIZE 32768 | |
37 | .constants: | |
38 | ||
39 | /* Reduce 262144 kbits to 1024 bits */ | |
40 | /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ | |
41 | .octa 0x00000000b6ca9e20000000009c37c408 | |
42 | ||
43 | /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ | |
44 | .octa 0x00000000350249a800000001b51df26c | |
45 | ||
46 | /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ | |
47 | .octa 0x00000001862dac54000000000724b9d0 | |
48 | ||
49 | /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ | |
50 | .octa 0x00000001d87fb48c00000001c00532fe | |
51 | ||
52 | /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ | |
53 | .octa 0x00000001f39b699e00000000f05a9362 | |
54 | ||
55 | /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ | |
56 | .octa 0x0000000101da11b400000001e1007970 | |
57 | ||
58 | /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ | |
59 | .octa 0x00000001cab571e000000000a57366ee | |
60 | ||
61 | /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ | |
62 | .octa 0x00000000c7020cfe0000000192011284 | |
63 | ||
64 | /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ | |
65 | .octa 0x00000000cdaed1ae0000000162716d9a | |
66 | ||
67 | /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ | |
68 | .octa 0x00000001e804effc00000000cd97ecde | |
69 | ||
70 | /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ | |
71 | .octa 0x0000000077c3ea3a0000000058812bc0 | |
72 | ||
73 | /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ | |
74 | .octa 0x0000000068df31b40000000088b8c12e | |
75 | ||
76 | /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ | |
77 | .octa 0x00000000b059b6c200000001230b234c | |
78 | ||
79 | /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ | |
80 | .octa 0x0000000145fb8ed800000001120b416e | |
81 | ||
82 | /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ | |
83 | .octa 0x00000000cbc0916800000001974aecb0 | |
84 | ||
85 | /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ | |
86 | .octa 0x000000005ceeedc2000000008ee3f226 | |
87 | ||
88 | /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ | |
89 | .octa 0x0000000047d74e8600000001089aba9a | |
90 | ||
91 | /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ | |
92 | .octa 0x00000001407e9e220000000065113872 | |
93 | ||
94 | /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ | |
95 | .octa 0x00000001da967bda000000005c07ec10 | |
96 | ||
97 | /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ | |
98 | .octa 0x000000006c8983680000000187590924 | |
99 | ||
100 | /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ | |
101 | .octa 0x00000000f2d14c9800000000e35da7c6 | |
102 | ||
103 | /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ | |
104 | .octa 0x00000001993c6ad4000000000415855a | |
105 | ||
106 | /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ | |
107 | .octa 0x000000014683d1ac0000000073617758 | |
108 | ||
109 | /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ | |
110 | .octa 0x00000001a7c93e6c0000000176021d28 | |
111 | ||
112 | /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ | |
113 | .octa 0x000000010211e90a00000001c358fd0a | |
114 | ||
115 | /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ | |
116 | .octa 0x000000001119403e00000001ff7a2c18 | |
117 | ||
118 | /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ | |
119 | .octa 0x000000001c3261aa00000000f2d9f7e4 | |
120 | ||
121 | /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ | |
122 | .octa 0x000000014e37a634000000016cf1f9c8 | |
123 | ||
124 | /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ | |
125 | .octa 0x0000000073786c0c000000010af9279a | |
126 | ||
127 | /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ | |
128 | .octa 0x000000011dc037f80000000004f101e8 | |
129 | ||
130 | /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ | |
131 | .octa 0x0000000031433dfc0000000070bcf184 | |
132 | ||
133 | /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ | |
134 | .octa 0x000000009cde8348000000000a8de642 | |
135 | ||
136 | /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ | |
137 | .octa 0x0000000038d3c2a60000000062ea130c | |
138 | ||
139 | /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ | |
140 | .octa 0x000000011b25f26000000001eb31cbb2 | |
141 | ||
142 | /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ | |
143 | .octa 0x000000001629e6f00000000170783448 | |
144 | ||
145 | /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ | |
146 | .octa 0x0000000160838b4c00000001a684b4c6 | |
147 | ||
148 | /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ | |
149 | .octa 0x000000007a44011c00000000253ca5b4 | |
150 | ||
151 | /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ | |
152 | .octa 0x00000000226f417a0000000057b4b1e2 | |
153 | ||
154 | /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ | |
155 | .octa 0x0000000045eb2eb400000000b6bd084c | |
156 | ||
157 | /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ | |
158 | .octa 0x000000014459d70c0000000123c2d592 | |
159 | ||
160 | /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ | |
161 | .octa 0x00000001d406ed8200000000159dafce | |
162 | ||
163 | /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ | |
164 | .octa 0x0000000160c8e1a80000000127e1a64e | |
165 | ||
166 | /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ | |
167 | .octa 0x0000000027ba80980000000056860754 | |
168 | ||
169 | /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ | |
170 | .octa 0x000000006d92d01800000001e661aae8 | |
171 | ||
172 | /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ | |
173 | .octa 0x000000012ed7e3f200000000f82c6166 | |
174 | ||
175 | /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ | |
176 | .octa 0x000000002dc8778800000000c4f9c7ae | |
177 | ||
178 | /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ | |
179 | .octa 0x0000000018240bb80000000074203d20 | |
180 | ||
181 | /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ | |
182 | .octa 0x000000001ad381580000000198173052 | |
183 | ||
184 | /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ | |
185 | .octa 0x00000001396b78f200000001ce8aba54 | |
186 | ||
187 | /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ | |
188 | .octa 0x000000011a68133400000001850d5d94 | |
189 | ||
190 | /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ | |
191 | .octa 0x000000012104732e00000001d609239c | |
192 | ||
193 | /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ | |
194 | .octa 0x00000000a140d90c000000001595f048 | |
195 | ||
196 | /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ | |
197 | .octa 0x00000001b7215eda0000000042ccee08 | |
198 | ||
199 | /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ | |
200 | .octa 0x00000001aaf1df3c000000010a389d74 | |
201 | ||
202 | /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ | |
203 | .octa 0x0000000029d15b8a000000012a840da6 | |
204 | ||
205 | /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ | |
206 | .octa 0x00000000f1a96922000000001d181c0c | |
207 | ||
208 | /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ | |
209 | .octa 0x00000001ac80d03c0000000068b7d1f6 | |
210 | ||
211 | /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ | |
212 | .octa 0x000000000f11d56a000000005b0f14fc | |
213 | ||
214 | /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ | |
215 | .octa 0x00000001f1c022a20000000179e9e730 | |
216 | ||
217 | /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ | |
218 | .octa 0x0000000173d00ae200000001ce1368d6 | |
219 | ||
220 | /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ | |
221 | .octa 0x00000001d4ffe4ac0000000112c3a84c | |
222 | ||
223 | /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ | |
224 | .octa 0x000000016edc5ae400000000de940fee | |
225 | ||
226 | /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ | |
227 | .octa 0x00000001f1a0214000000000fe896b7e | |
228 | ||
229 | /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ | |
230 | .octa 0x00000000ca0b28a000000001f797431c | |
231 | ||
232 | /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ | |
233 | .octa 0x00000001928e30a20000000053e989ba | |
234 | ||
235 | /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ | |
236 | .octa 0x0000000097b1b002000000003920cd16 | |
237 | ||
238 | /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ | |
239 | .octa 0x00000000b15bf90600000001e6f579b8 | |
240 | ||
241 | /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ | |
242 | .octa 0x00000000411c5d52000000007493cb0a | |
243 | ||
244 | /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ | |
245 | .octa 0x00000001c36f330000000001bdd376d8 | |
246 | ||
247 | /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ | |
248 | .octa 0x00000001119227e0000000016badfee6 | |
249 | ||
250 | /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ | |
251 | .octa 0x00000000114d47020000000071de5c58 | |
252 | ||
253 | /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ | |
254 | .octa 0x00000000458b5b9800000000453f317c | |
255 | ||
256 | /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ | |
257 | .octa 0x000000012e31fb8e0000000121675cce | |
258 | ||
259 | /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ | |
260 | .octa 0x000000005cf619d800000001f409ee92 | |
261 | ||
262 | /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ | |
263 | .octa 0x0000000063f4d8b200000000f36b9c88 | |
264 | ||
265 | /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ | |
266 | .octa 0x000000004138dc8a0000000036b398f4 | |
267 | ||
268 | /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ | |
269 | .octa 0x00000001d29ee8e000000001748f9adc | |
270 | ||
271 | /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ | |
272 | .octa 0x000000006a08ace800000001be94ec00 | |
273 | ||
274 | /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ | |
275 | .octa 0x0000000127d4201000000000b74370d6 | |
276 | ||
277 | /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ | |
278 | .octa 0x0000000019d76b6200000001174d0b98 | |
279 | ||
280 | /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ | |
281 | .octa 0x00000001b1471f6e00000000befc06a4 | |
282 | ||
283 | /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ | |
284 | .octa 0x00000001f64c19cc00000001ae125288 | |
285 | ||
286 | /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ | |
287 | .octa 0x00000000003c0ea00000000095c19b34 | |
288 | ||
289 | /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ | |
290 | .octa 0x000000014d73abf600000001a78496f2 | |
291 | ||
292 | /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ | |
293 | .octa 0x00000001620eb84400000001ac5390a0 | |
294 | ||
295 | /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ | |
296 | .octa 0x0000000147655048000000002a80ed6e | |
297 | ||
298 | /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ | |
299 | .octa 0x0000000067b5077e00000001fa9b0128 | |
300 | ||
301 | /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ | |
302 | .octa 0x0000000010ffe20600000001ea94929e | |
303 | ||
304 | /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ | |
305 | .octa 0x000000000fee8f1e0000000125f4305c | |
306 | ||
307 | /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ | |
308 | .octa 0x00000001da26fbae00000001471e2002 | |
309 | ||
310 | /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ | |
311 | .octa 0x00000001b3a8bd880000000132d2253a | |
312 | ||
313 | /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ | |
314 | .octa 0x00000000e8f3898e00000000f26b3592 | |
315 | ||
316 | /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ | |
317 | .octa 0x00000000b0d0d28c00000000bc8b67b0 | |
318 | ||
319 | /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ | |
320 | .octa 0x0000000030f2a798000000013a826ef2 | |
321 | ||
322 | /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ | |
323 | .octa 0x000000000fba10020000000081482c84 | |
324 | ||
325 | /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ | |
326 | .octa 0x00000000bdb9bd7200000000e77307c2 | |
327 | ||
328 | /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ | |
329 | .octa 0x0000000075d3bf5a00000000d4a07ec8 | |
330 | ||
331 | /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ | |
332 | .octa 0x00000000ef1f98a00000000017102100 | |
333 | ||
334 | /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ | |
335 | .octa 0x00000000689c760200000000db406486 | |
336 | ||
337 | /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ | |
338 | .octa 0x000000016d5fa5fe0000000192db7f88 | |
339 | ||
340 | /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ | |
341 | .octa 0x00000001d0d2b9ca000000018bf67b1e | |
342 | ||
343 | /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ | |
344 | .octa 0x0000000041e7b470000000007c09163e | |
345 | ||
346 | /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ | |
347 | .octa 0x00000001cbb6495e000000000adac060 | |
348 | ||
349 | /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ | |
350 | .octa 0x000000010052a0b000000000bd8316ae | |
351 | ||
352 | /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ | |
353 | .octa 0x00000001d8effb5c000000019f09ab54 | |
354 | ||
355 | /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ | |
356 | .octa 0x00000001d969853c0000000125155542 | |
357 | ||
358 | /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ | |
359 | .octa 0x00000000523ccce2000000018fdb5882 | |
360 | ||
361 | /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ | |
362 | .octa 0x000000001e2436bc00000000e794b3f4 | |
363 | ||
364 | /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ | |
365 | .octa 0x00000000ddd1c3a2000000016f9bb022 | |
366 | ||
367 | /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ | |
368 | .octa 0x0000000019fcfe3800000000290c9978 | |
369 | ||
370 | /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ | |
371 | .octa 0x00000001ce95db640000000083c0f350 | |
372 | ||
373 | /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ | |
374 | .octa 0x00000000af5828060000000173ea6628 | |
375 | ||
376 | /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ | |
377 | .octa 0x00000001006388f600000001c8b4e00a | |
378 | ||
379 | /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ | |
380 | .octa 0x0000000179eca00a00000000de95d6aa | |
381 | ||
382 | /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ | |
383 | .octa 0x0000000122410a6a000000010b7f7248 | |
384 | ||
385 | /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ | |
386 | .octa 0x000000004288e87c00000001326e3a06 | |
387 | ||
388 | /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ | |
389 | .octa 0x000000016c5490da00000000bb62c2e6 | |
390 | ||
391 | /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ | |
392 | .octa 0x00000000d1c71f6e0000000156a4b2c2 | |
393 | ||
394 | /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ | |
395 | .octa 0x00000001b4ce08a6000000011dfe763a | |
396 | ||
397 | /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ | |
398 | .octa 0x00000001466ba60c000000007bcca8e2 | |
399 | ||
400 | /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ | |
401 | .octa 0x00000001f6c488a40000000186118faa | |
402 | ||
403 | /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ | |
404 | .octa 0x000000013bfb06820000000111a65a88 | |
405 | ||
406 | /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ | |
407 | .octa 0x00000000690e9e54000000003565e1c4 | |
408 | ||
409 | /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ | |
410 | .octa 0x00000000281346b6000000012ed02a82 | |
411 | ||
412 | /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ | |
413 | .octa 0x000000015646402400000000c486ecfc | |
414 | ||
415 | /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ | |
416 | .octa 0x000000016063a8dc0000000001b951b2 | |
417 | ||
418 | /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ | |
419 | .octa 0x0000000116a663620000000048143916 | |
420 | ||
421 | /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ | |
422 | .octa 0x000000017e8aa4d200000001dc2ae124 | |
423 | ||
424 | /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ | |
425 | .octa 0x00000001728eb10c00000001416c58d6 | |
426 | ||
427 | /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ | |
428 | .octa 0x00000001b08fd7fa00000000a479744a | |
429 | ||
430 | /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ | |
431 | .octa 0x00000001092a16e80000000096ca3a26 | |
432 | ||
433 | /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ | |
434 | .octa 0x00000000a505637c00000000ff223d4e | |
435 | ||
436 | /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ | |
437 | .octa 0x00000000d94869b2000000010e84da42 | |
438 | ||
439 | /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ | |
440 | .octa 0x00000001c8b203ae00000001b61ba3d0 | |
441 | ||
442 | /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ | |
443 | .octa 0x000000005704aea000000000680f2de8 | |
444 | ||
445 | /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ | |
446 | .octa 0x000000012e295fa2000000008772a9a8 | |
447 | ||
448 | /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ | |
449 | .octa 0x000000011d0908bc0000000155f295bc | |
450 | ||
451 | /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ | |
452 | .octa 0x0000000193ed97ea00000000595f9282 | |
453 | ||
454 | /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ | |
455 | .octa 0x000000013a0f1c520000000164b1c25a | |
456 | ||
457 | /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ | |
458 | .octa 0x000000010c2c40c000000000fbd67c50 | |
459 | ||
460 | /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ | |
461 | .octa 0x00000000ff6fac3e0000000096076268 | |
462 | ||
463 | /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ | |
464 | .octa 0x000000017b3609c000000001d288e4cc | |
465 | ||
466 | /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ | |
467 | .octa 0x0000000088c8c92200000001eaac1bdc | |
468 | ||
469 | /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ | |
470 | .octa 0x00000001751baae600000001f1ea39e2 | |
471 | ||
472 | /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ | |
473 | .octa 0x000000010795297200000001eb6506fc | |
474 | ||
475 | /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ | |
476 | .octa 0x0000000162b00abe000000010f806ffe | |
477 | ||
478 | /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ | |
479 | .octa 0x000000000d7b404c000000010408481e | |
480 | ||
481 | /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ | |
482 | .octa 0x00000000763b13d40000000188260534 | |
483 | ||
484 | /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ | |
485 | .octa 0x00000000f6dc22d80000000058fc73e0 | |
486 | ||
487 | /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ | |
488 | .octa 0x000000007daae06000000000391c59b8 | |
489 | ||
490 | /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ | |
491 | .octa 0x000000013359ab7c000000018b638400 | |
492 | ||
493 | /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ | |
494 | .octa 0x000000008add438a000000011738f5c4 | |
495 | ||
496 | /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ | |
497 | .octa 0x00000001edbefdea000000008cf7c6da | |
498 | ||
499 | /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ | |
500 | .octa 0x000000004104e0f800000001ef97fb16 | |
501 | ||
502 | /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ | |
503 | .octa 0x00000000b48a82220000000102130e20 | |
504 | ||
505 | /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ | |
506 | .octa 0x00000001bcb4684400000000db968898 | |
507 | ||
508 | /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ | |
509 | .octa 0x000000013293ce0a00000000b5047b5e | |
510 | ||
511 | /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ | |
512 | .octa 0x00000001710d0844000000010b90fdb2 | |
513 | ||
514 | /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ | |
515 | .octa 0x0000000117907f6e000000004834a32e | |
516 | ||
517 | /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ | |
518 | .octa 0x0000000087ddf93e0000000059c8f2b0 | |
519 | ||
520 | /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ | |
521 | .octa 0x000000005970e9b00000000122cec508 | |
522 | ||
523 | /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ | |
524 | .octa 0x0000000185b2b7d0000000000a330cda | |
525 | ||
526 | /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ | |
527 | .octa 0x00000001dcee0efc000000014a47148c | |
528 | ||
529 | /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ | |
530 | .octa 0x0000000030da27220000000042c61cb8 | |
531 | ||
532 | /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ | |
533 | .octa 0x000000012f925a180000000012fe6960 | |
534 | ||
535 | /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ | |
536 | .octa 0x00000000dd2e357c00000000dbda2c20 | |
537 | ||
538 | /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ | |
539 | .octa 0x00000000071c80de000000011122410c | |
540 | ||
541 | /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ | |
542 | .octa 0x000000011513140a00000000977b2070 | |
543 | ||
544 | /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ | |
545 | .octa 0x00000001df876e8e000000014050438e | |
546 | ||
547 | /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ | |
548 | .octa 0x000000015f81d6ce0000000147c840e8 | |
549 | ||
550 | /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ | |
551 | .octa 0x000000019dd94dbe00000001cc7c88ce | |
552 | ||
553 | /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ | |
554 | .octa 0x00000001373d206e00000001476b35a4 | |
555 | ||
556 | /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ | |
557 | .octa 0x00000000668ccade000000013d52d508 | |
558 | ||
559 | /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ | |
560 | .octa 0x00000001b192d268000000008e4be32e | |
561 | ||
562 | /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ | |
563 | .octa 0x00000000e30f3a7800000000024120fe | |
564 | ||
565 | /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ | |
566 | .octa 0x000000010ef1f7bc00000000ddecddb4 | |
567 | ||
568 | /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ | |
569 | .octa 0x00000001f5ac738000000000d4d403bc | |
570 | ||
571 | /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ | |
572 | .octa 0x000000011822ea7000000001734b89aa | |
573 | ||
574 | /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ | |
575 | .octa 0x00000000c3a33848000000010e7a58d6 | |
576 | ||
577 | /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ | |
578 | .octa 0x00000001bd151c2400000001f9f04e9c | |
579 | ||
580 | /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ | |
581 | .octa 0x0000000056002d7600000000b692225e | |
582 | ||
583 | /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ | |
584 | .octa 0x000000014657c4f4000000019b8d3f3e | |
585 | ||
586 | /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ | |
587 | .octa 0x0000000113742d7c00000001a874f11e | |
588 | ||
589 | /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ | |
590 | .octa 0x000000019c5920ba000000010d5a4254 | |
591 | ||
592 | /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ | |
593 | .octa 0x000000005216d2d600000000bbb2f5d6 | |
594 | ||
595 | /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ | |
596 | .octa 0x0000000136f5ad8a0000000179cc0e36 | |
597 | ||
598 | /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ | |
599 | .octa 0x000000018b07beb600000001dca1da4a | |
600 | ||
601 | /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ | |
602 | .octa 0x00000000db1e93b000000000feb1a192 | |
603 | ||
604 | /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ | |
605 | .octa 0x000000000b96fa3a00000000d1eeedd6 | |
606 | ||
607 | /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ | |
608 | .octa 0x00000001d9968af0000000008fad9bb4 | |
609 | ||
610 | /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ | |
611 | .octa 0x000000000e4a77a200000001884938e4 | |
612 | ||
613 | /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ | |
614 | .octa 0x00000000508c2ac800000001bc2e9bc0 | |
615 | ||
616 | /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ | |
617 | .octa 0x0000000021572a8000000001f9658a68 | |
618 | ||
619 | /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ | |
620 | .octa 0x00000001b859daf2000000001b9224fc | |
621 | ||
622 | /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ | |
623 | .octa 0x000000016f7884740000000055b2fb84 | |
624 | ||
625 | /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ | |
626 | .octa 0x00000001b438810e000000018b090348 | |
627 | ||
628 | /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ | |
629 | .octa 0x0000000095ddc6f2000000011ccbd5ea | |
630 | ||
631 | /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ | |
632 | .octa 0x00000001d977c20c0000000007ae47f8 | |
633 | ||
634 | /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ | |
635 | .octa 0x00000000ebedb99a0000000172acbec0 | |
636 | ||
637 | /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ | |
638 | .octa 0x00000001df9e9e9200000001c6e3ff20 | |
639 | ||
640 | /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ | |
641 | .octa 0x00000001a4a3f95200000000e1b38744 | |
642 | ||
643 | /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ | |
644 | .octa 0x00000000e2f5122000000000791585b2 | |
645 | ||
646 | /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ | |
647 | .octa 0x000000004aa01f3e00000000ac53b894 | |
648 | ||
649 | /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ | |
650 | .octa 0x00000000b3e90a5800000001ed5f2cf4 | |
651 | ||
652 | /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ | |
653 | .octa 0x000000000c9ca2aa00000001df48b2e0 | |
654 | ||
655 | /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ | |
656 | .octa 0x000000015168231600000000049c1c62 | |
657 | ||
658 | /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ | |
659 | .octa 0x0000000036fce78c000000017c460c12 | |
660 | ||
661 | /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ | |
662 | .octa 0x000000009037dc10000000015be4da7e | |
663 | ||
664 | /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ | |
665 | .octa 0x00000000d3298582000000010f38f668 | |
666 | ||
667 | /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ | |
668 | .octa 0x00000001b42e8ad60000000039f40a00 | |
669 | ||
670 | /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ | |
671 | .octa 0x00000000142a983800000000bd4c10c4 | |
672 | ||
673 | /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ | |
674 | .octa 0x0000000109c7f1900000000042db1d98 | |
675 | ||
676 | /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ | |
677 | .octa 0x0000000056ff931000000001c905bae6 | |
678 | ||
679 | /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ | |
680 | .octa 0x00000001594513aa00000000069d40ea | |
681 | ||
682 | /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ | |
683 | .octa 0x00000001e3b5b1e8000000008e4fbad0 | |
684 | ||
685 | /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ | |
686 | .octa 0x000000011dd5fc080000000047bedd46 | |
687 | ||
688 | /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ | |
689 | .octa 0x00000001675f0cc20000000026396bf8 | |
690 | ||
691 | /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ | |
692 | .octa 0x00000000d1c8dd4400000000379beb92 | |
693 | ||
694 | /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ | |
695 | .octa 0x0000000115ebd3d8000000000abae54a | |
696 | ||
697 | /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ | |
698 | .octa 0x00000001ecbd0dac0000000007e6a128 | |
699 | ||
700 | /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ | |
701 | .octa 0x00000000cdf67af2000000000ade29d2 | |
702 | ||
703 | /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ | |
704 | .octa 0x000000004c01ff4c00000000f974c45c | |
705 | ||
706 | /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ | |
707 | .octa 0x00000000f2d8657e00000000e77ac60a | |
708 | ||
709 | /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ | |
710 | .octa 0x000000006bae74c40000000145895816 | |
711 | ||
712 | /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ | |
713 | .octa 0x0000000152af8aa00000000038e362be | |
714 | ||
715 | /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ | |
716 | .octa 0x0000000004663802000000007f991a64 | |
717 | ||
718 | /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ | |
719 | .octa 0x00000001ab2f5afc00000000fa366d3a | |
720 | ||
721 | /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ | |
722 | .octa 0x0000000074a4ebd400000001a2bb34f0 | |
723 | ||
724 | /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ | |
725 | .octa 0x00000001d7ab3a4c0000000028a9981e | |
726 | ||
727 | /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ | |
728 | .octa 0x00000001a8da60c600000001dbc672be | |
729 | ||
730 | /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ | |
731 | .octa 0x000000013cf6382000000000b04d77f6 | |
732 | ||
733 | /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ | |
734 | .octa 0x00000000bec12e1e0000000124400d96 | |
735 | ||
736 | /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ | |
737 | .octa 0x00000001c6368010000000014ca4b414 | |
738 | ||
739 | /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ | |
740 | .octa 0x00000001e6e78758000000012fe2c938 | |
741 | ||
742 | /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ | |
743 | .octa 0x000000008d7f2b3c00000001faed01e6 | |
744 | ||
745 | /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ | |
746 | .octa 0x000000016b4a156e000000007e80ecfe | |
747 | ||
748 | /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ | |
749 | .octa 0x00000001c63cfeb60000000098daee94 | |
750 | ||
751 | /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ | |
752 | .octa 0x000000015f902670000000010a04edea | |
753 | ||
754 | /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ | |
755 | .octa 0x00000001cd5de11e00000001c00b4524 | |
756 | ||
757 | /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ | |
758 | .octa 0x000000001acaec540000000170296550 | |
759 | ||
760 | /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ | |
761 | .octa 0x000000002bd0ca780000000181afaa48 | |
762 | ||
763 | /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ | |
764 | .octa 0x0000000032d63d5c0000000185a31ffa | |
765 | ||
766 | /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ | |
767 | .octa 0x000000001c6d4e4c000000002469f608 | |
768 | ||
769 | /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ | |
770 | .octa 0x0000000106a60b92000000006980102a | |
771 | ||
772 | /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ | |
773 | .octa 0x00000000d3855e120000000111ea9ca8 | |
774 | ||
775 | /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ | |
776 | .octa 0x00000000e312563600000001bd1d29ce | |
777 | ||
778 | /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ | |
779 | .octa 0x000000009e8f7ea400000001b34b9580 | |
780 | ||
781 | /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ | |
782 | .octa 0x00000001c82e562c000000003076054e | |
783 | ||
784 | /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ | |
785 | .octa 0x00000000ca9f09ce000000012a608ea4 | |
786 | ||
787 | /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ | |
788 | .octa 0x00000000c63764e600000000784d05fe | |
789 | ||
790 | /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ | |
791 | .octa 0x0000000168d2e49e000000016ef0d82a | |
792 | ||
793 | /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ | |
794 | .octa 0x00000000e986c1480000000075bda454 | |
795 | ||
796 | /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ | |
797 | .octa 0x00000000cfb65894000000003dc0a1c4 | |
798 | ||
799 | /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ | |
800 | .octa 0x0000000111cadee400000000e9a5d8be | |
801 | ||
802 | /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ | |
803 | .octa 0x0000000171fb63ce00000001609bc4b4 | |
804 | ||
805 | .short_constants: | |
806 | ||
807 | /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ | |
808 | /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */ | |
809 | .octa 0x7fec2963e5bf80485cf015c388e56f72 | |
810 | ||
811 | /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */ | |
812 | .octa 0x38e888d4844752a9963a18920246e2e6 | |
813 | ||
814 | /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */ | |
815 | .octa 0x42316c00730206ad419a441956993a31 | |
816 | ||
817 | /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */ | |
818 | .octa 0x543d5c543e65ddf9924752ba2b830011 | |
819 | ||
820 | /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */ | |
821 | .octa 0x78e87aaf56767c9255bd7f9518e4a304 | |
822 | ||
823 | /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */ | |
824 | .octa 0x8f68fcec1903da7f6d76739fe0553f1e | |
825 | ||
826 | /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */ | |
827 | .octa 0x3f4840246791d588c133722b1fe0b5c3 | |
828 | ||
829 | /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */ | |
830 | .octa 0x34c96751b04de25a64b67ee0e55ef1f3 | |
831 | ||
832 | /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */ | |
833 | .octa 0x156c8e180b4a395b069db049b8fdb1e7 | |
834 | ||
835 | /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */ | |
836 | .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e | |
837 | ||
838 | /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */ | |
839 | .octa 0x041d37768cd75659817cdc5119b29a35 | |
840 | ||
841 | /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */ | |
842 | .octa 0x3a0777818cfaa9651ce9d94b36c41f1c | |
843 | ||
844 | /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */ | |
845 | .octa 0x0e148e8252377a554f256efcb82be955 | |
846 | ||
847 | /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */ | |
848 | .octa 0x9c25531d19e65ddeec1631edb2dea967 | |
849 | ||
850 | /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */ | |
851 | .octa 0x790606ff9957c0a65d27e147510ac59a | |
852 | ||
853 | /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */ | |
854 | .octa 0x82f63b786ea2d55ca66805eb18b8ea18 | |
855 | ||
856 | ||
857 | .barrett_constants: | |
858 | /* 33 bit reflected Barrett constant m - (4^32)/n */ | |
859 | .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */ | |
860 | /* 33 bit reflected Barrett constant n */ | |
861 | .octa 0x00000000000000000000000105ec76f1 | |
862 | ||
863 | .text | |
864 | ||
865 | #if defined(__BIG_ENDIAN__) | |
866 | #define BYTESWAP_DATA | |
867 | #else | |
868 | #undef BYTESWAP_DATA | |
869 | #endif | |
870 | ||
871 | #define off16 r25 | |
872 | #define off32 r26 | |
873 | #define off48 r27 | |
874 | #define off64 r28 | |
875 | #define off80 r29 | |
876 | #define off96 r30 | |
877 | #define off112 r31 | |
878 | ||
879 | #define const1 v24 | |
880 | #define const2 v25 | |
881 | ||
882 | #define byteswap v26 | |
883 | #define mask_32bit v27 | |
884 | #define mask_64bit v28 | |
885 | #define zeroes v29 | |
886 | ||
887 | #ifdef BYTESWAP_DATA | |
888 | #define VPERM(A, B, C, D) vperm A, B, C, D | |
889 | #else | |
890 | #define VPERM(A, B, C, D) | |
891 | #endif | |
892 | ||
893 | /* unsigned int __crc32c_vpmsum(unsigned int crc, void *p, unsigned long len) */ | |
894 | FUNC_START(__crc32c_vpmsum) | |
895 | std r31,-8(r1) | |
896 | std r30,-16(r1) | |
897 | std r29,-24(r1) | |
898 | std r28,-32(r1) | |
899 | std r27,-40(r1) | |
900 | std r26,-48(r1) | |
901 | std r25,-56(r1) | |
902 | ||
903 | li off16,16 | |
904 | li off32,32 | |
905 | li off48,48 | |
906 | li off64,64 | |
907 | li off80,80 | |
908 | li off96,96 | |
909 | li off112,112 | |
910 | li r0,0 | |
911 | ||
912 | /* Enough room for saving 10 non volatile VMX registers */ | |
913 | subi r6,r1,56+10*16 | |
914 | subi r7,r1,56+2*16 | |
915 | ||
916 | stvx v20,0,r6 | |
917 | stvx v21,off16,r6 | |
918 | stvx v22,off32,r6 | |
919 | stvx v23,off48,r6 | |
920 | stvx v24,off64,r6 | |
921 | stvx v25,off80,r6 | |
922 | stvx v26,off96,r6 | |
923 | stvx v27,off112,r6 | |
924 | stvx v28,0,r7 | |
925 | stvx v29,off16,r7 | |
926 | ||
927 | mr r10,r3 | |
928 | ||
929 | vxor zeroes,zeroes,zeroes | |
930 | vspltisw v0,-1 | |
931 | ||
932 | vsldoi mask_32bit,zeroes,v0,4 | |
933 | vsldoi mask_64bit,zeroes,v0,8 | |
934 | ||
935 | /* Get the initial value into v8 */ | |
936 | vxor v8,v8,v8 | |
937 | MTVRD(v8, R3) | |
938 | vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */ | |
939 | ||
940 | #ifdef BYTESWAP_DATA | |
941 | addis r3,r2,.byteswap_constant@toc@ha | |
942 | addi r3,r3,.byteswap_constant@toc@l | |
943 | ||
944 | lvx byteswap,0,r3 | |
945 | addi r3,r3,16 | |
946 | #endif | |
947 | ||
948 | cmpdi r5,256 | |
949 | blt .Lshort | |
950 | ||
951 | rldicr r6,r5,0,56 | |
952 | ||
953 | /* Checksum in blocks of MAX_SIZE */ | |
954 | 1: lis r7,MAX_SIZE@h | |
955 | ori r7,r7,MAX_SIZE@l | |
956 | mr r9,r7 | |
957 | cmpd r6,r7 | |
958 | bgt 2f | |
959 | mr r7,r6 | |
960 | 2: subf r6,r7,r6 | |
961 | ||
962 | /* our main loop does 128 bytes at a time */ | |
963 | srdi r7,r7,7 | |
964 | ||
965 | /* | |
966 | * Work out the offset into the constants table to start at. Each | |
967 | * constant is 16 bytes, and it is used against 128 bytes of input | |
968 | * data - 128 / 16 = 8 | |
969 | */ | |
970 | sldi r8,r7,4 | |
971 | srdi r9,r9,3 | |
972 | subf r8,r8,r9 | |
973 | ||
974 | /* We reduce our final 128 bytes in a separate step */ | |
975 | addi r7,r7,-1 | |
976 | mtctr r7 | |
977 | ||
978 | addis r3,r2,.constants@toc@ha | |
979 | addi r3,r3,.constants@toc@l | |
980 | ||
981 | /* Find the start of our constants */ | |
982 | add r3,r3,r8 | |
983 | ||
984 | /* zero v0-v7 which will contain our checksums */ | |
985 | vxor v0,v0,v0 | |
986 | vxor v1,v1,v1 | |
987 | vxor v2,v2,v2 | |
988 | vxor v3,v3,v3 | |
989 | vxor v4,v4,v4 | |
990 | vxor v5,v5,v5 | |
991 | vxor v6,v6,v6 | |
992 | vxor v7,v7,v7 | |
993 | ||
994 | lvx const1,0,r3 | |
995 | ||
996 | /* | |
997 | * If we are looping back to consume more data we use the values | |
998 | * already in v16-v23. | |
999 | */ | |
1000 | cmpdi r0,1 | |
1001 | beq 2f | |
1002 | ||
1003 | /* First warm up pass */ | |
1004 | lvx v16,0,r4 | |
1005 | lvx v17,off16,r4 | |
1006 | VPERM(v16,v16,v16,byteswap) | |
1007 | VPERM(v17,v17,v17,byteswap) | |
1008 | lvx v18,off32,r4 | |
1009 | lvx v19,off48,r4 | |
1010 | VPERM(v18,v18,v18,byteswap) | |
1011 | VPERM(v19,v19,v19,byteswap) | |
1012 | lvx v20,off64,r4 | |
1013 | lvx v21,off80,r4 | |
1014 | VPERM(v20,v20,v20,byteswap) | |
1015 | VPERM(v21,v21,v21,byteswap) | |
1016 | lvx v22,off96,r4 | |
1017 | lvx v23,off112,r4 | |
1018 | VPERM(v22,v22,v22,byteswap) | |
1019 | VPERM(v23,v23,v23,byteswap) | |
1020 | addi r4,r4,8*16 | |
1021 | ||
1022 | /* xor in initial value */ | |
1023 | vxor v16,v16,v8 | |
1024 | ||
1025 | 2: bdz .Lfirst_warm_up_done | |
1026 | ||
1027 | addi r3,r3,16 | |
1028 | lvx const2,0,r3 | |
1029 | ||
1030 | /* Second warm up pass */ | |
1031 | VPMSUMD(v8,v16,const1) | |
1032 | lvx v16,0,r4 | |
1033 | VPERM(v16,v16,v16,byteswap) | |
1034 | ori r2,r2,0 | |
1035 | ||
1036 | VPMSUMD(v9,v17,const1) | |
1037 | lvx v17,off16,r4 | |
1038 | VPERM(v17,v17,v17,byteswap) | |
1039 | ori r2,r2,0 | |
1040 | ||
1041 | VPMSUMD(v10,v18,const1) | |
1042 | lvx v18,off32,r4 | |
1043 | VPERM(v18,v18,v18,byteswap) | |
1044 | ori r2,r2,0 | |
1045 | ||
1046 | VPMSUMD(v11,v19,const1) | |
1047 | lvx v19,off48,r4 | |
1048 | VPERM(v19,v19,v19,byteswap) | |
1049 | ori r2,r2,0 | |
1050 | ||
1051 | VPMSUMD(v12,v20,const1) | |
1052 | lvx v20,off64,r4 | |
1053 | VPERM(v20,v20,v20,byteswap) | |
1054 | ori r2,r2,0 | |
1055 | ||
1056 | VPMSUMD(v13,v21,const1) | |
1057 | lvx v21,off80,r4 | |
1058 | VPERM(v21,v21,v21,byteswap) | |
1059 | ori r2,r2,0 | |
1060 | ||
1061 | VPMSUMD(v14,v22,const1) | |
1062 | lvx v22,off96,r4 | |
1063 | VPERM(v22,v22,v22,byteswap) | |
1064 | ori r2,r2,0 | |
1065 | ||
1066 | VPMSUMD(v15,v23,const1) | |
1067 | lvx v23,off112,r4 | |
1068 | VPERM(v23,v23,v23,byteswap) | |
1069 | ||
1070 | addi r4,r4,8*16 | |
1071 | ||
1072 | bdz .Lfirst_cool_down | |
1073 | ||
1074 | /* | |
1075 | * main loop. We modulo schedule it such that it takes three iterations | |
1076 | * to complete - first iteration load, second iteration vpmsum, third | |
1077 | * iteration xor. | |
1078 | */ | |
1079 | .balign 16 | |
1080 | 4: lvx const1,0,r3 | |
1081 | addi r3,r3,16 | |
1082 | ori r2,r2,0 | |
1083 | ||
1084 | vxor v0,v0,v8 | |
1085 | VPMSUMD(v8,v16,const2) | |
1086 | lvx v16,0,r4 | |
1087 | VPERM(v16,v16,v16,byteswap) | |
1088 | ori r2,r2,0 | |
1089 | ||
1090 | vxor v1,v1,v9 | |
1091 | VPMSUMD(v9,v17,const2) | |
1092 | lvx v17,off16,r4 | |
1093 | VPERM(v17,v17,v17,byteswap) | |
1094 | ori r2,r2,0 | |
1095 | ||
1096 | vxor v2,v2,v10 | |
1097 | VPMSUMD(v10,v18,const2) | |
1098 | lvx v18,off32,r4 | |
1099 | VPERM(v18,v18,v18,byteswap) | |
1100 | ori r2,r2,0 | |
1101 | ||
1102 | vxor v3,v3,v11 | |
1103 | VPMSUMD(v11,v19,const2) | |
1104 | lvx v19,off48,r4 | |
1105 | VPERM(v19,v19,v19,byteswap) | |
1106 | lvx const2,0,r3 | |
1107 | ori r2,r2,0 | |
1108 | ||
1109 | vxor v4,v4,v12 | |
1110 | VPMSUMD(v12,v20,const1) | |
1111 | lvx v20,off64,r4 | |
1112 | VPERM(v20,v20,v20,byteswap) | |
1113 | ori r2,r2,0 | |
1114 | ||
1115 | vxor v5,v5,v13 | |
1116 | VPMSUMD(v13,v21,const1) | |
1117 | lvx v21,off80,r4 | |
1118 | VPERM(v21,v21,v21,byteswap) | |
1119 | ori r2,r2,0 | |
1120 | ||
1121 | vxor v6,v6,v14 | |
1122 | VPMSUMD(v14,v22,const1) | |
1123 | lvx v22,off96,r4 | |
1124 | VPERM(v22,v22,v22,byteswap) | |
1125 | ori r2,r2,0 | |
1126 | ||
1127 | vxor v7,v7,v15 | |
1128 | VPMSUMD(v15,v23,const1) | |
1129 | lvx v23,off112,r4 | |
1130 | VPERM(v23,v23,v23,byteswap) | |
1131 | ||
1132 | addi r4,r4,8*16 | |
1133 | ||
1134 | bdnz 4b | |
1135 | ||
1136 | .Lfirst_cool_down: | |
1137 | /* First cool down pass */ | |
1138 | lvx const1,0,r3 | |
1139 | addi r3,r3,16 | |
1140 | ||
1141 | vxor v0,v0,v8 | |
1142 | VPMSUMD(v8,v16,const1) | |
1143 | ori r2,r2,0 | |
1144 | ||
1145 | vxor v1,v1,v9 | |
1146 | VPMSUMD(v9,v17,const1) | |
1147 | ori r2,r2,0 | |
1148 | ||
1149 | vxor v2,v2,v10 | |
1150 | VPMSUMD(v10,v18,const1) | |
1151 | ori r2,r2,0 | |
1152 | ||
1153 | vxor v3,v3,v11 | |
1154 | VPMSUMD(v11,v19,const1) | |
1155 | ori r2,r2,0 | |
1156 | ||
1157 | vxor v4,v4,v12 | |
1158 | VPMSUMD(v12,v20,const1) | |
1159 | ori r2,r2,0 | |
1160 | ||
1161 | vxor v5,v5,v13 | |
1162 | VPMSUMD(v13,v21,const1) | |
1163 | ori r2,r2,0 | |
1164 | ||
1165 | vxor v6,v6,v14 | |
1166 | VPMSUMD(v14,v22,const1) | |
1167 | ori r2,r2,0 | |
1168 | ||
1169 | vxor v7,v7,v15 | |
1170 | VPMSUMD(v15,v23,const1) | |
1171 | ori r2,r2,0 | |
1172 | ||
1173 | .Lsecond_cool_down: | |
1174 | /* Second cool down pass */ | |
1175 | vxor v0,v0,v8 | |
1176 | vxor v1,v1,v9 | |
1177 | vxor v2,v2,v10 | |
1178 | vxor v3,v3,v11 | |
1179 | vxor v4,v4,v12 | |
1180 | vxor v5,v5,v13 | |
1181 | vxor v6,v6,v14 | |
1182 | vxor v7,v7,v15 | |
1183 | ||
1184 | /* | |
1185 | * vpmsumd produces a 96 bit result in the least significant bits | |
1186 | * of the register. Since we are bit reflected we have to shift it | |
1187 | * left 32 bits so it occupies the least significant bits in the | |
1188 | * bit reflected domain. | |
1189 | */ | |
1190 | vsldoi v0,v0,zeroes,4 | |
1191 | vsldoi v1,v1,zeroes,4 | |
1192 | vsldoi v2,v2,zeroes,4 | |
1193 | vsldoi v3,v3,zeroes,4 | |
1194 | vsldoi v4,v4,zeroes,4 | |
1195 | vsldoi v5,v5,zeroes,4 | |
1196 | vsldoi v6,v6,zeroes,4 | |
1197 | vsldoi v7,v7,zeroes,4 | |
1198 | ||
1199 | /* xor with last 1024 bits */ | |
1200 | lvx v8,0,r4 | |
1201 | lvx v9,off16,r4 | |
1202 | VPERM(v8,v8,v8,byteswap) | |
1203 | VPERM(v9,v9,v9,byteswap) | |
1204 | lvx v10,off32,r4 | |
1205 | lvx v11,off48,r4 | |
1206 | VPERM(v10,v10,v10,byteswap) | |
1207 | VPERM(v11,v11,v11,byteswap) | |
1208 | lvx v12,off64,r4 | |
1209 | lvx v13,off80,r4 | |
1210 | VPERM(v12,v12,v12,byteswap) | |
1211 | VPERM(v13,v13,v13,byteswap) | |
1212 | lvx v14,off96,r4 | |
1213 | lvx v15,off112,r4 | |
1214 | VPERM(v14,v14,v14,byteswap) | |
1215 | VPERM(v15,v15,v15,byteswap) | |
1216 | ||
1217 | addi r4,r4,8*16 | |
1218 | ||
1219 | vxor v16,v0,v8 | |
1220 | vxor v17,v1,v9 | |
1221 | vxor v18,v2,v10 | |
1222 | vxor v19,v3,v11 | |
1223 | vxor v20,v4,v12 | |
1224 | vxor v21,v5,v13 | |
1225 | vxor v22,v6,v14 | |
1226 | vxor v23,v7,v15 | |
1227 | ||
1228 | li r0,1 | |
1229 | cmpdi r6,0 | |
1230 | addi r6,r6,128 | |
1231 | bne 1b | |
1232 | ||
1233 | /* Work out how many bytes we have left */ | |
1234 | andi. r5,r5,127 | |
1235 | ||
1236 | /* Calculate where in the constant table we need to start */ | |
1237 | subfic r6,r5,128 | |
1238 | add r3,r3,r6 | |
1239 | ||
1240 | /* How many 16 byte chunks are in the tail */ | |
1241 | srdi r7,r5,4 | |
1242 | mtctr r7 | |
1243 | ||
1244 | /* | |
1245 | * Reduce the previously calculated 1024 bits to 64 bits, shifting | |
1246 | * 32 bits to include the trailing 32 bits of zeros | |
1247 | */ | |
1248 | lvx v0,0,r3 | |
1249 | lvx v1,off16,r3 | |
1250 | lvx v2,off32,r3 | |
1251 | lvx v3,off48,r3 | |
1252 | lvx v4,off64,r3 | |
1253 | lvx v5,off80,r3 | |
1254 | lvx v6,off96,r3 | |
1255 | lvx v7,off112,r3 | |
1256 | addi r3,r3,8*16 | |
1257 | ||
1258 | VPMSUMW(v0,v16,v0) | |
1259 | VPMSUMW(v1,v17,v1) | |
1260 | VPMSUMW(v2,v18,v2) | |
1261 | VPMSUMW(v3,v19,v3) | |
1262 | VPMSUMW(v4,v20,v4) | |
1263 | VPMSUMW(v5,v21,v5) | |
1264 | VPMSUMW(v6,v22,v6) | |
1265 | VPMSUMW(v7,v23,v7) | |
1266 | ||
1267 | /* Now reduce the tail (0 - 112 bytes) */ | |
1268 | cmpdi r7,0 | |
1269 | beq 1f | |
1270 | ||
1271 | lvx v16,0,r4 | |
1272 | lvx v17,0,r3 | |
1273 | VPERM(v16,v16,v16,byteswap) | |
1274 | VPMSUMW(v16,v16,v17) | |
1275 | vxor v0,v0,v16 | |
1276 | bdz 1f | |
1277 | ||
1278 | lvx v16,off16,r4 | |
1279 | lvx v17,off16,r3 | |
1280 | VPERM(v16,v16,v16,byteswap) | |
1281 | VPMSUMW(v16,v16,v17) | |
1282 | vxor v0,v0,v16 | |
1283 | bdz 1f | |
1284 | ||
1285 | lvx v16,off32,r4 | |
1286 | lvx v17,off32,r3 | |
1287 | VPERM(v16,v16,v16,byteswap) | |
1288 | VPMSUMW(v16,v16,v17) | |
1289 | vxor v0,v0,v16 | |
1290 | bdz 1f | |
1291 | ||
1292 | lvx v16,off48,r4 | |
1293 | lvx v17,off48,r3 | |
1294 | VPERM(v16,v16,v16,byteswap) | |
1295 | VPMSUMW(v16,v16,v17) | |
1296 | vxor v0,v0,v16 | |
1297 | bdz 1f | |
1298 | ||
1299 | lvx v16,off64,r4 | |
1300 | lvx v17,off64,r3 | |
1301 | VPERM(v16,v16,v16,byteswap) | |
1302 | VPMSUMW(v16,v16,v17) | |
1303 | vxor v0,v0,v16 | |
1304 | bdz 1f | |
1305 | ||
1306 | lvx v16,off80,r4 | |
1307 | lvx v17,off80,r3 | |
1308 | VPERM(v16,v16,v16,byteswap) | |
1309 | VPMSUMW(v16,v16,v17) | |
1310 | vxor v0,v0,v16 | |
1311 | bdz 1f | |
1312 | ||
1313 | lvx v16,off96,r4 | |
1314 | lvx v17,off96,r3 | |
1315 | VPERM(v16,v16,v16,byteswap) | |
1316 | VPMSUMW(v16,v16,v17) | |
1317 | vxor v0,v0,v16 | |
1318 | ||
1319 | /* Now xor all the parallel chunks together */ | |
1320 | 1: vxor v0,v0,v1 | |
1321 | vxor v2,v2,v3 | |
1322 | vxor v4,v4,v5 | |
1323 | vxor v6,v6,v7 | |
1324 | ||
1325 | vxor v0,v0,v2 | |
1326 | vxor v4,v4,v6 | |
1327 | ||
1328 | vxor v0,v0,v4 | |
1329 | ||
1330 | .Lbarrett_reduction: | |
1331 | /* Barrett constants */ | |
1332 | addis r3,r2,.barrett_constants@toc@ha | |
1333 | addi r3,r3,.barrett_constants@toc@l | |
1334 | ||
1335 | lvx const1,0,r3 | |
1336 | lvx const2,off16,r3 | |
1337 | ||
1338 | vsldoi v1,v0,v0,8 | |
1339 | vxor v0,v0,v1 /* xor two 64 bit results together */ | |
1340 | ||
1341 | /* shift left one bit */ | |
1342 | vspltisb v1,1 | |
1343 | vsl v0,v0,v1 | |
1344 | ||
1345 | vand v0,v0,mask_64bit | |
1346 | ||
1347 | /* | |
1348 | * The reflected version of Barrett reduction. Instead of bit | |
1349 | * reflecting our data (which is expensive to do), we bit reflect our | |
1350 | * constants and our algorithm, which means the intermediate data in | |
1351 | * our vector registers goes from 0-63 instead of 63-0. We can reflect | |
1352 | * the algorithm because we don't carry in mod 2 arithmetic. | |
1353 | */ | |
1354 | vand v1,v0,mask_32bit /* bottom 32 bits of a */ | |
1355 | VPMSUMD(v1,v1,const1) /* ma */ | |
1356 | vand v1,v1,mask_32bit /* bottom 32bits of ma */ | |
1357 | VPMSUMD(v1,v1,const2) /* qn */ | |
1358 | vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ | |
1359 | ||
1360 | /* | |
1361 | * Since we are bit reflected, the result (ie the low 32 bits) is in | |
1362 | * the high 32 bits. We just need to shift it left 4 bytes | |
1363 | * V0 [ 0 1 X 3 ] | |
1364 | * V0 [ 0 X 2 3 ] | |
1365 | */ | |
1366 | vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */ | |
1367 | ||
1368 | /* Get it into r3 */ | |
1369 | MFVRD(R3, v0) | |
1370 | ||
1371 | .Lout: | |
1372 | subi r6,r1,56+10*16 | |
1373 | subi r7,r1,56+2*16 | |
1374 | ||
1375 | lvx v20,0,r6 | |
1376 | lvx v21,off16,r6 | |
1377 | lvx v22,off32,r6 | |
1378 | lvx v23,off48,r6 | |
1379 | lvx v24,off64,r6 | |
1380 | lvx v25,off80,r6 | |
1381 | lvx v26,off96,r6 | |
1382 | lvx v27,off112,r6 | |
1383 | lvx v28,0,r7 | |
1384 | lvx v29,off16,r7 | |
1385 | ||
1386 | ld r31,-8(r1) | |
1387 | ld r30,-16(r1) | |
1388 | ld r29,-24(r1) | |
1389 | ld r28,-32(r1) | |
1390 | ld r27,-40(r1) | |
1391 | ld r26,-48(r1) | |
1392 | ld r25,-56(r1) | |
1393 | ||
1394 | blr | |
1395 | ||
1396 | .Lfirst_warm_up_done: | |
1397 | lvx const1,0,r3 | |
1398 | addi r3,r3,16 | |
1399 | ||
1400 | VPMSUMD(v8,v16,const1) | |
1401 | VPMSUMD(v9,v17,const1) | |
1402 | VPMSUMD(v10,v18,const1) | |
1403 | VPMSUMD(v11,v19,const1) | |
1404 | VPMSUMD(v12,v20,const1) | |
1405 | VPMSUMD(v13,v21,const1) | |
1406 | VPMSUMD(v14,v22,const1) | |
1407 | VPMSUMD(v15,v23,const1) | |
1408 | ||
1409 | b .Lsecond_cool_down | |
1410 | ||
1411 | .Lshort: | |
1412 | cmpdi r5,0 | |
1413 | beq .Lzero | |
1414 | ||
1415 | addis r3,r2,.short_constants@toc@ha | |
1416 | addi r3,r3,.short_constants@toc@l | |
1417 | ||
1418 | /* Calculate where in the constant table we need to start */ | |
1419 | subfic r6,r5,256 | |
1420 | add r3,r3,r6 | |
1421 | ||
1422 | /* How many 16 byte chunks? */ | |
1423 | srdi r7,r5,4 | |
1424 | mtctr r7 | |
1425 | ||
1426 | vxor v19,v19,v19 | |
1427 | vxor v20,v20,v20 | |
1428 | ||
1429 | lvx v0,0,r4 | |
1430 | lvx v16,0,r3 | |
1431 | VPERM(v0,v0,v16,byteswap) | |
1432 | vxor v0,v0,v8 /* xor in initial value */ | |
1433 | VPMSUMW(v0,v0,v16) | |
1434 | bdz .Lv0 | |
1435 | ||
1436 | lvx v1,off16,r4 | |
1437 | lvx v17,off16,r3 | |
1438 | VPERM(v1,v1,v17,byteswap) | |
1439 | VPMSUMW(v1,v1,v17) | |
1440 | bdz .Lv1 | |
1441 | ||
1442 | lvx v2,off32,r4 | |
1443 | lvx v16,off32,r3 | |
1444 | VPERM(v2,v2,v16,byteswap) | |
1445 | VPMSUMW(v2,v2,v16) | |
1446 | bdz .Lv2 | |
1447 | ||
1448 | lvx v3,off48,r4 | |
1449 | lvx v17,off48,r3 | |
1450 | VPERM(v3,v3,v17,byteswap) | |
1451 | VPMSUMW(v3,v3,v17) | |
1452 | bdz .Lv3 | |
1453 | ||
1454 | lvx v4,off64,r4 | |
1455 | lvx v16,off64,r3 | |
1456 | VPERM(v4,v4,v16,byteswap) | |
1457 | VPMSUMW(v4,v4,v16) | |
1458 | bdz .Lv4 | |
1459 | ||
1460 | lvx v5,off80,r4 | |
1461 | lvx v17,off80,r3 | |
1462 | VPERM(v5,v5,v17,byteswap) | |
1463 | VPMSUMW(v5,v5,v17) | |
1464 | bdz .Lv5 | |
1465 | ||
1466 | lvx v6,off96,r4 | |
1467 | lvx v16,off96,r3 | |
1468 | VPERM(v6,v6,v16,byteswap) | |
1469 | VPMSUMW(v6,v6,v16) | |
1470 | bdz .Lv6 | |
1471 | ||
1472 | lvx v7,off112,r4 | |
1473 | lvx v17,off112,r3 | |
1474 | VPERM(v7,v7,v17,byteswap) | |
1475 | VPMSUMW(v7,v7,v17) | |
1476 | bdz .Lv7 | |
1477 | ||
1478 | addi r3,r3,128 | |
1479 | addi r4,r4,128 | |
1480 | ||
1481 | lvx v8,0,r4 | |
1482 | lvx v16,0,r3 | |
1483 | VPERM(v8,v8,v16,byteswap) | |
1484 | VPMSUMW(v8,v8,v16) | |
1485 | bdz .Lv8 | |
1486 | ||
1487 | lvx v9,off16,r4 | |
1488 | lvx v17,off16,r3 | |
1489 | VPERM(v9,v9,v17,byteswap) | |
1490 | VPMSUMW(v9,v9,v17) | |
1491 | bdz .Lv9 | |
1492 | ||
1493 | lvx v10,off32,r4 | |
1494 | lvx v16,off32,r3 | |
1495 | VPERM(v10,v10,v16,byteswap) | |
1496 | VPMSUMW(v10,v10,v16) | |
1497 | bdz .Lv10 | |
1498 | ||
1499 | lvx v11,off48,r4 | |
1500 | lvx v17,off48,r3 | |
1501 | VPERM(v11,v11,v17,byteswap) | |
1502 | VPMSUMW(v11,v11,v17) | |
1503 | bdz .Lv11 | |
1504 | ||
1505 | lvx v12,off64,r4 | |
1506 | lvx v16,off64,r3 | |
1507 | VPERM(v12,v12,v16,byteswap) | |
1508 | VPMSUMW(v12,v12,v16) | |
1509 | bdz .Lv12 | |
1510 | ||
1511 | lvx v13,off80,r4 | |
1512 | lvx v17,off80,r3 | |
1513 | VPERM(v13,v13,v17,byteswap) | |
1514 | VPMSUMW(v13,v13,v17) | |
1515 | bdz .Lv13 | |
1516 | ||
1517 | lvx v14,off96,r4 | |
1518 | lvx v16,off96,r3 | |
1519 | VPERM(v14,v14,v16,byteswap) | |
1520 | VPMSUMW(v14,v14,v16) | |
1521 | bdz .Lv14 | |
1522 | ||
1523 | lvx v15,off112,r4 | |
1524 | lvx v17,off112,r3 | |
1525 | VPERM(v15,v15,v17,byteswap) | |
1526 | VPMSUMW(v15,v15,v17) | |
1527 | ||
1528 | .Lv15: vxor v19,v19,v15 | |
1529 | .Lv14: vxor v20,v20,v14 | |
1530 | .Lv13: vxor v19,v19,v13 | |
1531 | .Lv12: vxor v20,v20,v12 | |
1532 | .Lv11: vxor v19,v19,v11 | |
1533 | .Lv10: vxor v20,v20,v10 | |
1534 | .Lv9: vxor v19,v19,v9 | |
1535 | .Lv8: vxor v20,v20,v8 | |
1536 | .Lv7: vxor v19,v19,v7 | |
1537 | .Lv6: vxor v20,v20,v6 | |
1538 | .Lv5: vxor v19,v19,v5 | |
1539 | .Lv4: vxor v20,v20,v4 | |
1540 | .Lv3: vxor v19,v19,v3 | |
1541 | .Lv2: vxor v20,v20,v2 | |
1542 | .Lv1: vxor v19,v19,v1 | |
1543 | .Lv0: vxor v20,v20,v0 | |
1544 | ||
1545 | vxor v0,v19,v20 | |
1546 | ||
1547 | b .Lbarrett_reduction | |
1548 | ||
1549 | .Lzero: | |
1550 | mr r3,r10 | |
1551 | b .Lout | |
1552 | ||
1553 | FUNC_END(__crc32_vpmsum) |