Commit | Line | Data |
---|---|---|
af1a8899 | 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
1da177e4 LT |
2 | /* |
3 | * include/asm-generic/xor.h | |
4 | * | |
5 | * Generic optimized RAID-5 checksumming functions. | |
1da177e4 LT |
6 | */ |
7 | ||
268bb0ce | 8 | #include <linux/prefetch.h> |
1da177e4 LT |
9 | |
10 | static void | |
297565aa AB |
11 | xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1, |
12 | const unsigned long * __restrict p2) | |
1da177e4 LT |
13 | { |
14 | long lines = bytes / (sizeof (long)) / 8; | |
15 | ||
16 | do { | |
17 | p1[0] ^= p2[0]; | |
18 | p1[1] ^= p2[1]; | |
19 | p1[2] ^= p2[2]; | |
20 | p1[3] ^= p2[3]; | |
21 | p1[4] ^= p2[4]; | |
22 | p1[5] ^= p2[5]; | |
23 | p1[6] ^= p2[6]; | |
24 | p1[7] ^= p2[7]; | |
25 | p1 += 8; | |
26 | p2 += 8; | |
27 | } while (--lines > 0); | |
28 | } | |
29 | ||
30 | static void | |
297565aa AB |
31 | xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1, |
32 | const unsigned long * __restrict p2, | |
33 | const unsigned long * __restrict p3) | |
1da177e4 LT |
34 | { |
35 | long lines = bytes / (sizeof (long)) / 8; | |
36 | ||
37 | do { | |
38 | p1[0] ^= p2[0] ^ p3[0]; | |
39 | p1[1] ^= p2[1] ^ p3[1]; | |
40 | p1[2] ^= p2[2] ^ p3[2]; | |
41 | p1[3] ^= p2[3] ^ p3[3]; | |
42 | p1[4] ^= p2[4] ^ p3[4]; | |
43 | p1[5] ^= p2[5] ^ p3[5]; | |
44 | p1[6] ^= p2[6] ^ p3[6]; | |
45 | p1[7] ^= p2[7] ^ p3[7]; | |
46 | p1 += 8; | |
47 | p2 += 8; | |
48 | p3 += 8; | |
49 | } while (--lines > 0); | |
50 | } | |
51 | ||
52 | static void | |
297565aa AB |
53 | xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1, |
54 | const unsigned long * __restrict p2, | |
55 | const unsigned long * __restrict p3, | |
56 | const unsigned long * __restrict p4) | |
1da177e4 LT |
57 | { |
58 | long lines = bytes / (sizeof (long)) / 8; | |
59 | ||
60 | do { | |
61 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; | |
62 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; | |
63 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; | |
64 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; | |
65 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; | |
66 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; | |
67 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; | |
68 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; | |
69 | p1 += 8; | |
70 | p2 += 8; | |
71 | p3 += 8; | |
72 | p4 += 8; | |
73 | } while (--lines > 0); | |
74 | } | |
75 | ||
76 | static void | |
297565aa AB |
77 | xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1, |
78 | const unsigned long * __restrict p2, | |
79 | const unsigned long * __restrict p3, | |
80 | const unsigned long * __restrict p4, | |
81 | const unsigned long * __restrict p5) | |
1da177e4 LT |
82 | { |
83 | long lines = bytes / (sizeof (long)) / 8; | |
84 | ||
85 | do { | |
86 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; | |
87 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; | |
88 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; | |
89 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; | |
90 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; | |
91 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; | |
92 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; | |
93 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; | |
94 | p1 += 8; | |
95 | p2 += 8; | |
96 | p3 += 8; | |
97 | p4 += 8; | |
98 | p5 += 8; | |
99 | } while (--lines > 0); | |
100 | } | |
101 | ||
102 | static void | |
297565aa AB |
103 | xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1, |
104 | const unsigned long * __restrict p2) | |
1da177e4 LT |
105 | { |
106 | long lines = bytes / (sizeof (long)) / 8; | |
107 | ||
108 | do { | |
109 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
110 | d0 = p1[0]; /* Pull the stuff into registers */ | |
111 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
112 | d2 = p1[2]; | |
113 | d3 = p1[3]; | |
114 | d4 = p1[4]; | |
115 | d5 = p1[5]; | |
116 | d6 = p1[6]; | |
117 | d7 = p1[7]; | |
118 | d0 ^= p2[0]; | |
119 | d1 ^= p2[1]; | |
120 | d2 ^= p2[2]; | |
121 | d3 ^= p2[3]; | |
122 | d4 ^= p2[4]; | |
123 | d5 ^= p2[5]; | |
124 | d6 ^= p2[6]; | |
125 | d7 ^= p2[7]; | |
126 | p1[0] = d0; /* Store the result (in bursts) */ | |
127 | p1[1] = d1; | |
128 | p1[2] = d2; | |
129 | p1[3] = d3; | |
130 | p1[4] = d4; | |
131 | p1[5] = d5; | |
132 | p1[6] = d6; | |
133 | p1[7] = d7; | |
134 | p1 += 8; | |
135 | p2 += 8; | |
136 | } while (--lines > 0); | |
137 | } | |
138 | ||
139 | static void | |
297565aa AB |
140 | xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1, |
141 | const unsigned long * __restrict p2, | |
142 | const unsigned long * __restrict p3) | |
1da177e4 LT |
143 | { |
144 | long lines = bytes / (sizeof (long)) / 8; | |
145 | ||
146 | do { | |
147 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
148 | d0 = p1[0]; /* Pull the stuff into registers */ | |
149 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
150 | d2 = p1[2]; | |
151 | d3 = p1[3]; | |
152 | d4 = p1[4]; | |
153 | d5 = p1[5]; | |
154 | d6 = p1[6]; | |
155 | d7 = p1[7]; | |
156 | d0 ^= p2[0]; | |
157 | d1 ^= p2[1]; | |
158 | d2 ^= p2[2]; | |
159 | d3 ^= p2[3]; | |
160 | d4 ^= p2[4]; | |
161 | d5 ^= p2[5]; | |
162 | d6 ^= p2[6]; | |
163 | d7 ^= p2[7]; | |
164 | d0 ^= p3[0]; | |
165 | d1 ^= p3[1]; | |
166 | d2 ^= p3[2]; | |
167 | d3 ^= p3[3]; | |
168 | d4 ^= p3[4]; | |
169 | d5 ^= p3[5]; | |
170 | d6 ^= p3[6]; | |
171 | d7 ^= p3[7]; | |
172 | p1[0] = d0; /* Store the result (in bursts) */ | |
173 | p1[1] = d1; | |
174 | p1[2] = d2; | |
175 | p1[3] = d3; | |
176 | p1[4] = d4; | |
177 | p1[5] = d5; | |
178 | p1[6] = d6; | |
179 | p1[7] = d7; | |
180 | p1 += 8; | |
181 | p2 += 8; | |
182 | p3 += 8; | |
183 | } while (--lines > 0); | |
184 | } | |
185 | ||
186 | static void | |
297565aa AB |
187 | xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1, |
188 | const unsigned long * __restrict p2, | |
189 | const unsigned long * __restrict p3, | |
190 | const unsigned long * __restrict p4) | |
1da177e4 LT |
191 | { |
192 | long lines = bytes / (sizeof (long)) / 8; | |
193 | ||
194 | do { | |
195 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
196 | d0 = p1[0]; /* Pull the stuff into registers */ | |
197 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
198 | d2 = p1[2]; | |
199 | d3 = p1[3]; | |
200 | d4 = p1[4]; | |
201 | d5 = p1[5]; | |
202 | d6 = p1[6]; | |
203 | d7 = p1[7]; | |
204 | d0 ^= p2[0]; | |
205 | d1 ^= p2[1]; | |
206 | d2 ^= p2[2]; | |
207 | d3 ^= p2[3]; | |
208 | d4 ^= p2[4]; | |
209 | d5 ^= p2[5]; | |
210 | d6 ^= p2[6]; | |
211 | d7 ^= p2[7]; | |
212 | d0 ^= p3[0]; | |
213 | d1 ^= p3[1]; | |
214 | d2 ^= p3[2]; | |
215 | d3 ^= p3[3]; | |
216 | d4 ^= p3[4]; | |
217 | d5 ^= p3[5]; | |
218 | d6 ^= p3[6]; | |
219 | d7 ^= p3[7]; | |
220 | d0 ^= p4[0]; | |
221 | d1 ^= p4[1]; | |
222 | d2 ^= p4[2]; | |
223 | d3 ^= p4[3]; | |
224 | d4 ^= p4[4]; | |
225 | d5 ^= p4[5]; | |
226 | d6 ^= p4[6]; | |
227 | d7 ^= p4[7]; | |
228 | p1[0] = d0; /* Store the result (in bursts) */ | |
229 | p1[1] = d1; | |
230 | p1[2] = d2; | |
231 | p1[3] = d3; | |
232 | p1[4] = d4; | |
233 | p1[5] = d5; | |
234 | p1[6] = d6; | |
235 | p1[7] = d7; | |
236 | p1 += 8; | |
237 | p2 += 8; | |
238 | p3 += 8; | |
239 | p4 += 8; | |
240 | } while (--lines > 0); | |
241 | } | |
242 | ||
243 | static void | |
297565aa AB |
244 | xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1, |
245 | const unsigned long * __restrict p2, | |
246 | const unsigned long * __restrict p3, | |
247 | const unsigned long * __restrict p4, | |
248 | const unsigned long * __restrict p5) | |
1da177e4 LT |
249 | { |
250 | long lines = bytes / (sizeof (long)) / 8; | |
251 | ||
252 | do { | |
253 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
254 | d0 = p1[0]; /* Pull the stuff into registers */ | |
255 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
256 | d2 = p1[2]; | |
257 | d3 = p1[3]; | |
258 | d4 = p1[4]; | |
259 | d5 = p1[5]; | |
260 | d6 = p1[6]; | |
261 | d7 = p1[7]; | |
262 | d0 ^= p2[0]; | |
263 | d1 ^= p2[1]; | |
264 | d2 ^= p2[2]; | |
265 | d3 ^= p2[3]; | |
266 | d4 ^= p2[4]; | |
267 | d5 ^= p2[5]; | |
268 | d6 ^= p2[6]; | |
269 | d7 ^= p2[7]; | |
270 | d0 ^= p3[0]; | |
271 | d1 ^= p3[1]; | |
272 | d2 ^= p3[2]; | |
273 | d3 ^= p3[3]; | |
274 | d4 ^= p3[4]; | |
275 | d5 ^= p3[5]; | |
276 | d6 ^= p3[6]; | |
277 | d7 ^= p3[7]; | |
278 | d0 ^= p4[0]; | |
279 | d1 ^= p4[1]; | |
280 | d2 ^= p4[2]; | |
281 | d3 ^= p4[3]; | |
282 | d4 ^= p4[4]; | |
283 | d5 ^= p4[5]; | |
284 | d6 ^= p4[6]; | |
285 | d7 ^= p4[7]; | |
286 | d0 ^= p5[0]; | |
287 | d1 ^= p5[1]; | |
288 | d2 ^= p5[2]; | |
289 | d3 ^= p5[3]; | |
290 | d4 ^= p5[4]; | |
291 | d5 ^= p5[5]; | |
292 | d6 ^= p5[6]; | |
293 | d7 ^= p5[7]; | |
294 | p1[0] = d0; /* Store the result (in bursts) */ | |
295 | p1[1] = d1; | |
296 | p1[2] = d2; | |
297 | p1[3] = d3; | |
298 | p1[4] = d4; | |
299 | p1[5] = d5; | |
300 | p1[6] = d6; | |
301 | p1[7] = d7; | |
302 | p1 += 8; | |
303 | p2 += 8; | |
304 | p3 += 8; | |
305 | p4 += 8; | |
306 | p5 += 8; | |
307 | } while (--lines > 0); | |
308 | } | |
309 | ||
310 | static void | |
297565aa AB |
311 | xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1, |
312 | const unsigned long * __restrict p2) | |
1da177e4 LT |
313 | { |
314 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
315 | prefetchw(p1); | |
316 | prefetch(p2); | |
317 | ||
318 | do { | |
319 | prefetchw(p1+8); | |
320 | prefetch(p2+8); | |
321 | once_more: | |
322 | p1[0] ^= p2[0]; | |
323 | p1[1] ^= p2[1]; | |
324 | p1[2] ^= p2[2]; | |
325 | p1[3] ^= p2[3]; | |
326 | p1[4] ^= p2[4]; | |
327 | p1[5] ^= p2[5]; | |
328 | p1[6] ^= p2[6]; | |
329 | p1[7] ^= p2[7]; | |
330 | p1 += 8; | |
331 | p2 += 8; | |
332 | } while (--lines > 0); | |
333 | if (lines == 0) | |
334 | goto once_more; | |
335 | } | |
336 | ||
337 | static void | |
297565aa AB |
338 | xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1, |
339 | const unsigned long * __restrict p2, | |
340 | const unsigned long * __restrict p3) | |
1da177e4 LT |
341 | { |
342 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
343 | prefetchw(p1); | |
344 | prefetch(p2); | |
345 | prefetch(p3); | |
346 | ||
347 | do { | |
348 | prefetchw(p1+8); | |
349 | prefetch(p2+8); | |
350 | prefetch(p3+8); | |
351 | once_more: | |
352 | p1[0] ^= p2[0] ^ p3[0]; | |
353 | p1[1] ^= p2[1] ^ p3[1]; | |
354 | p1[2] ^= p2[2] ^ p3[2]; | |
355 | p1[3] ^= p2[3] ^ p3[3]; | |
356 | p1[4] ^= p2[4] ^ p3[4]; | |
357 | p1[5] ^= p2[5] ^ p3[5]; | |
358 | p1[6] ^= p2[6] ^ p3[6]; | |
359 | p1[7] ^= p2[7] ^ p3[7]; | |
360 | p1 += 8; | |
361 | p2 += 8; | |
362 | p3 += 8; | |
363 | } while (--lines > 0); | |
364 | if (lines == 0) | |
365 | goto once_more; | |
366 | } | |
367 | ||
368 | static void | |
297565aa AB |
369 | xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1, |
370 | const unsigned long * __restrict p2, | |
371 | const unsigned long * __restrict p3, | |
372 | const unsigned long * __restrict p4) | |
1da177e4 LT |
373 | { |
374 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
375 | ||
376 | prefetchw(p1); | |
377 | prefetch(p2); | |
378 | prefetch(p3); | |
379 | prefetch(p4); | |
380 | ||
381 | do { | |
382 | prefetchw(p1+8); | |
383 | prefetch(p2+8); | |
384 | prefetch(p3+8); | |
385 | prefetch(p4+8); | |
386 | once_more: | |
387 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; | |
388 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; | |
389 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; | |
390 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; | |
391 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; | |
392 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; | |
393 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; | |
394 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; | |
395 | p1 += 8; | |
396 | p2 += 8; | |
397 | p3 += 8; | |
398 | p4 += 8; | |
399 | } while (--lines > 0); | |
400 | if (lines == 0) | |
401 | goto once_more; | |
402 | } | |
403 | ||
404 | static void | |
297565aa AB |
405 | xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1, |
406 | const unsigned long * __restrict p2, | |
407 | const unsigned long * __restrict p3, | |
408 | const unsigned long * __restrict p4, | |
409 | const unsigned long * __restrict p5) | |
1da177e4 LT |
410 | { |
411 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
412 | ||
413 | prefetchw(p1); | |
414 | prefetch(p2); | |
415 | prefetch(p3); | |
416 | prefetch(p4); | |
417 | prefetch(p5); | |
418 | ||
419 | do { | |
420 | prefetchw(p1+8); | |
421 | prefetch(p2+8); | |
422 | prefetch(p3+8); | |
423 | prefetch(p4+8); | |
424 | prefetch(p5+8); | |
425 | once_more: | |
426 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; | |
427 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; | |
428 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; | |
429 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; | |
430 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; | |
431 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; | |
432 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; | |
433 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; | |
434 | p1 += 8; | |
435 | p2 += 8; | |
436 | p3 += 8; | |
437 | p4 += 8; | |
438 | p5 += 8; | |
439 | } while (--lines > 0); | |
440 | if (lines == 0) | |
441 | goto once_more; | |
442 | } | |
443 | ||
444 | static void | |
297565aa AB |
445 | xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1, |
446 | const unsigned long * __restrict p2) | |
1da177e4 LT |
447 | { |
448 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
449 | ||
450 | prefetchw(p1); | |
451 | prefetch(p2); | |
452 | ||
453 | do { | |
454 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
455 | ||
456 | prefetchw(p1+8); | |
457 | prefetch(p2+8); | |
458 | once_more: | |
459 | d0 = p1[0]; /* Pull the stuff into registers */ | |
460 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
461 | d2 = p1[2]; | |
462 | d3 = p1[3]; | |
463 | d4 = p1[4]; | |
464 | d5 = p1[5]; | |
465 | d6 = p1[6]; | |
466 | d7 = p1[7]; | |
467 | d0 ^= p2[0]; | |
468 | d1 ^= p2[1]; | |
469 | d2 ^= p2[2]; | |
470 | d3 ^= p2[3]; | |
471 | d4 ^= p2[4]; | |
472 | d5 ^= p2[5]; | |
473 | d6 ^= p2[6]; | |
474 | d7 ^= p2[7]; | |
475 | p1[0] = d0; /* Store the result (in bursts) */ | |
476 | p1[1] = d1; | |
477 | p1[2] = d2; | |
478 | p1[3] = d3; | |
479 | p1[4] = d4; | |
480 | p1[5] = d5; | |
481 | p1[6] = d6; | |
482 | p1[7] = d7; | |
483 | p1 += 8; | |
484 | p2 += 8; | |
485 | } while (--lines > 0); | |
486 | if (lines == 0) | |
487 | goto once_more; | |
488 | } | |
489 | ||
490 | static void | |
297565aa AB |
491 | xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1, |
492 | const unsigned long * __restrict p2, | |
493 | const unsigned long * __restrict p3) | |
1da177e4 LT |
494 | { |
495 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
496 | ||
497 | prefetchw(p1); | |
498 | prefetch(p2); | |
499 | prefetch(p3); | |
500 | ||
501 | do { | |
502 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
503 | ||
504 | prefetchw(p1+8); | |
505 | prefetch(p2+8); | |
506 | prefetch(p3+8); | |
507 | once_more: | |
508 | d0 = p1[0]; /* Pull the stuff into registers */ | |
509 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
510 | d2 = p1[2]; | |
511 | d3 = p1[3]; | |
512 | d4 = p1[4]; | |
513 | d5 = p1[5]; | |
514 | d6 = p1[6]; | |
515 | d7 = p1[7]; | |
516 | d0 ^= p2[0]; | |
517 | d1 ^= p2[1]; | |
518 | d2 ^= p2[2]; | |
519 | d3 ^= p2[3]; | |
520 | d4 ^= p2[4]; | |
521 | d5 ^= p2[5]; | |
522 | d6 ^= p2[6]; | |
523 | d7 ^= p2[7]; | |
524 | d0 ^= p3[0]; | |
525 | d1 ^= p3[1]; | |
526 | d2 ^= p3[2]; | |
527 | d3 ^= p3[3]; | |
528 | d4 ^= p3[4]; | |
529 | d5 ^= p3[5]; | |
530 | d6 ^= p3[6]; | |
531 | d7 ^= p3[7]; | |
532 | p1[0] = d0; /* Store the result (in bursts) */ | |
533 | p1[1] = d1; | |
534 | p1[2] = d2; | |
535 | p1[3] = d3; | |
536 | p1[4] = d4; | |
537 | p1[5] = d5; | |
538 | p1[6] = d6; | |
539 | p1[7] = d7; | |
540 | p1 += 8; | |
541 | p2 += 8; | |
542 | p3 += 8; | |
543 | } while (--lines > 0); | |
544 | if (lines == 0) | |
545 | goto once_more; | |
546 | } | |
547 | ||
548 | static void | |
297565aa AB |
549 | xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1, |
550 | const unsigned long * __restrict p2, | |
551 | const unsigned long * __restrict p3, | |
552 | const unsigned long * __restrict p4) | |
1da177e4 LT |
553 | { |
554 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
555 | ||
556 | prefetchw(p1); | |
557 | prefetch(p2); | |
558 | prefetch(p3); | |
559 | prefetch(p4); | |
560 | ||
561 | do { | |
562 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
563 | ||
564 | prefetchw(p1+8); | |
565 | prefetch(p2+8); | |
566 | prefetch(p3+8); | |
567 | prefetch(p4+8); | |
568 | once_more: | |
569 | d0 = p1[0]; /* Pull the stuff into registers */ | |
570 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
571 | d2 = p1[2]; | |
572 | d3 = p1[3]; | |
573 | d4 = p1[4]; | |
574 | d5 = p1[5]; | |
575 | d6 = p1[6]; | |
576 | d7 = p1[7]; | |
577 | d0 ^= p2[0]; | |
578 | d1 ^= p2[1]; | |
579 | d2 ^= p2[2]; | |
580 | d3 ^= p2[3]; | |
581 | d4 ^= p2[4]; | |
582 | d5 ^= p2[5]; | |
583 | d6 ^= p2[6]; | |
584 | d7 ^= p2[7]; | |
585 | d0 ^= p3[0]; | |
586 | d1 ^= p3[1]; | |
587 | d2 ^= p3[2]; | |
588 | d3 ^= p3[3]; | |
589 | d4 ^= p3[4]; | |
590 | d5 ^= p3[5]; | |
591 | d6 ^= p3[6]; | |
592 | d7 ^= p3[7]; | |
593 | d0 ^= p4[0]; | |
594 | d1 ^= p4[1]; | |
595 | d2 ^= p4[2]; | |
596 | d3 ^= p4[3]; | |
597 | d4 ^= p4[4]; | |
598 | d5 ^= p4[5]; | |
599 | d6 ^= p4[6]; | |
600 | d7 ^= p4[7]; | |
601 | p1[0] = d0; /* Store the result (in bursts) */ | |
602 | p1[1] = d1; | |
603 | p1[2] = d2; | |
604 | p1[3] = d3; | |
605 | p1[4] = d4; | |
606 | p1[5] = d5; | |
607 | p1[6] = d6; | |
608 | p1[7] = d7; | |
609 | p1 += 8; | |
610 | p2 += 8; | |
611 | p3 += 8; | |
612 | p4 += 8; | |
613 | } while (--lines > 0); | |
614 | if (lines == 0) | |
615 | goto once_more; | |
616 | } | |
617 | ||
618 | static void | |
297565aa AB |
619 | xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1, |
620 | const unsigned long * __restrict p2, | |
621 | const unsigned long * __restrict p3, | |
622 | const unsigned long * __restrict p4, | |
623 | const unsigned long * __restrict p5) | |
1da177e4 LT |
624 | { |
625 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
626 | ||
627 | prefetchw(p1); | |
628 | prefetch(p2); | |
629 | prefetch(p3); | |
630 | prefetch(p4); | |
631 | prefetch(p5); | |
632 | ||
633 | do { | |
634 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
635 | ||
636 | prefetchw(p1+8); | |
637 | prefetch(p2+8); | |
638 | prefetch(p3+8); | |
639 | prefetch(p4+8); | |
640 | prefetch(p5+8); | |
641 | once_more: | |
642 | d0 = p1[0]; /* Pull the stuff into registers */ | |
643 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
644 | d2 = p1[2]; | |
645 | d3 = p1[3]; | |
646 | d4 = p1[4]; | |
647 | d5 = p1[5]; | |
648 | d6 = p1[6]; | |
649 | d7 = p1[7]; | |
650 | d0 ^= p2[0]; | |
651 | d1 ^= p2[1]; | |
652 | d2 ^= p2[2]; | |
653 | d3 ^= p2[3]; | |
654 | d4 ^= p2[4]; | |
655 | d5 ^= p2[5]; | |
656 | d6 ^= p2[6]; | |
657 | d7 ^= p2[7]; | |
658 | d0 ^= p3[0]; | |
659 | d1 ^= p3[1]; | |
660 | d2 ^= p3[2]; | |
661 | d3 ^= p3[3]; | |
662 | d4 ^= p3[4]; | |
663 | d5 ^= p3[5]; | |
664 | d6 ^= p3[6]; | |
665 | d7 ^= p3[7]; | |
666 | d0 ^= p4[0]; | |
667 | d1 ^= p4[1]; | |
668 | d2 ^= p4[2]; | |
669 | d3 ^= p4[3]; | |
670 | d4 ^= p4[4]; | |
671 | d5 ^= p4[5]; | |
672 | d6 ^= p4[6]; | |
673 | d7 ^= p4[7]; | |
674 | d0 ^= p5[0]; | |
675 | d1 ^= p5[1]; | |
676 | d2 ^= p5[2]; | |
677 | d3 ^= p5[3]; | |
678 | d4 ^= p5[4]; | |
679 | d5 ^= p5[5]; | |
680 | d6 ^= p5[6]; | |
681 | d7 ^= p5[7]; | |
682 | p1[0] = d0; /* Store the result (in bursts) */ | |
683 | p1[1] = d1; | |
684 | p1[2] = d2; | |
685 | p1[3] = d3; | |
686 | p1[4] = d4; | |
687 | p1[5] = d5; | |
688 | p1[6] = d6; | |
689 | p1[7] = d7; | |
690 | p1 += 8; | |
691 | p2 += 8; | |
692 | p3 += 8; | |
693 | p4 += 8; | |
694 | p5 += 8; | |
695 | } while (--lines > 0); | |
696 | if (lines == 0) | |
697 | goto once_more; | |
698 | } | |
699 | ||
700 | static struct xor_block_template xor_block_8regs = { | |
701 | .name = "8regs", | |
702 | .do_2 = xor_8regs_2, | |
703 | .do_3 = xor_8regs_3, | |
704 | .do_4 = xor_8regs_4, | |
705 | .do_5 = xor_8regs_5, | |
706 | }; | |
707 | ||
708 | static struct xor_block_template xor_block_32regs = { | |
709 | .name = "32regs", | |
710 | .do_2 = xor_32regs_2, | |
711 | .do_3 = xor_32regs_3, | |
712 | .do_4 = xor_32regs_4, | |
713 | .do_5 = xor_32regs_5, | |
714 | }; | |
715 | ||
720fb197 | 716 | static struct xor_block_template xor_block_8regs_p __maybe_unused = { |
1da177e4 LT |
717 | .name = "8regs_prefetch", |
718 | .do_2 = xor_8regs_p_2, | |
719 | .do_3 = xor_8regs_p_3, | |
720 | .do_4 = xor_8regs_p_4, | |
721 | .do_5 = xor_8regs_p_5, | |
722 | }; | |
723 | ||
720fb197 | 724 | static struct xor_block_template xor_block_32regs_p __maybe_unused = { |
1da177e4 LT |
725 | .name = "32regs_prefetch", |
726 | .do_2 = xor_32regs_p_2, | |
727 | .do_3 = xor_32regs_p_3, | |
728 | .do_4 = xor_32regs_p_4, | |
729 | .do_5 = xor_32regs_p_5, | |
730 | }; | |
731 | ||
732 | #define XOR_TRY_TEMPLATES \ | |
733 | do { \ | |
734 | xor_speed(&xor_block_8regs); \ | |
735 | xor_speed(&xor_block_8regs_p); \ | |
736 | xor_speed(&xor_block_32regs); \ | |
737 | xor_speed(&xor_block_32regs_p); \ | |
738 | } while (0) |