Commit | Line | Data |
---|---|---|
13c520b2 GK |
1 | /* |
2 | * Copyright (C) 2016 Intel Corporation | |
3 | * | |
4 | * Author: Gayatri Kammela <gayatri.kammela@intel.com> | |
5 | * Author: Megha Dey <megha.dey@linux.intel.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation; version 2 | |
10 | * of the License. | |
11 | * | |
12 | */ | |
13 | ||
14 | #ifdef CONFIG_AS_AVX512 | |
15 | ||
16 | #include <linux/raid/pq.h> | |
17 | #include "x86.h" | |
18 | ||
19 | static int raid6_has_avx512(void) | |
20 | { | |
21 | return boot_cpu_has(X86_FEATURE_AVX2) && | |
22 | boot_cpu_has(X86_FEATURE_AVX) && | |
23 | boot_cpu_has(X86_FEATURE_AVX512F) && | |
24 | boot_cpu_has(X86_FEATURE_AVX512BW) && | |
25 | boot_cpu_has(X86_FEATURE_AVX512VL) && | |
26 | boot_cpu_has(X86_FEATURE_AVX512DQ); | |
27 | } | |
28 | ||
29 | static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, | |
30 | int failb, void **ptrs) | |
31 | { | |
32 | u8 *p, *q, *dp, *dq; | |
33 | const u8 *pbmul; /* P multiplier table for B data */ | |
34 | const u8 *qmul; /* Q multiplier table (for both) */ | |
35 | const u8 x0f = 0x0f; | |
36 | ||
37 | p = (u8 *)ptrs[disks-2]; | |
38 | q = (u8 *)ptrs[disks-1]; | |
39 | ||
40 | /* | |
41 | * Compute syndrome with zero for the missing data pages | |
42 | * Use the dead data pages as temporary storage for | |
43 | * delta p and delta q | |
44 | */ | |
45 | ||
46 | dp = (u8 *)ptrs[faila]; | |
47 | ptrs[faila] = (void *)raid6_empty_zero_page; | |
48 | ptrs[disks-2] = dp; | |
49 | dq = (u8 *)ptrs[failb]; | |
50 | ptrs[failb] = (void *)raid6_empty_zero_page; | |
51 | ptrs[disks-1] = dq; | |
52 | ||
53 | raid6_call.gen_syndrome(disks, bytes, ptrs); | |
54 | ||
55 | /* Restore pointer table */ | |
56 | ptrs[faila] = dp; | |
57 | ptrs[failb] = dq; | |
58 | ptrs[disks-2] = p; | |
59 | ptrs[disks-1] = q; | |
60 | ||
61 | /* Now, pick the proper data tables */ | |
62 | pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; | |
63 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ | |
64 | raid6_gfexp[failb]]]; | |
65 | ||
66 | kernel_fpu_begin(); | |
67 | ||
68 | /* zmm0 = x0f[16] */ | |
69 | asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); | |
70 | ||
71 | while (bytes) { | |
72 | #ifdef CONFIG_X86_64 | |
73 | asm volatile("vmovdqa64 %0, %%zmm1\n\t" | |
74 | "vmovdqa64 %1, %%zmm9\n\t" | |
75 | "vmovdqa64 %2, %%zmm0\n\t" | |
76 | "vmovdqa64 %3, %%zmm8\n\t" | |
77 | "vpxorq %4, %%zmm1, %%zmm1\n\t" | |
78 | "vpxorq %5, %%zmm9, %%zmm9\n\t" | |
79 | "vpxorq %6, %%zmm0, %%zmm0\n\t" | |
80 | "vpxorq %7, %%zmm8, %%zmm8" | |
81 | : | |
82 | : "m" (q[0]), "m" (q[64]), "m" (p[0]), | |
83 | "m" (p[64]), "m" (dq[0]), "m" (dq[64]), | |
84 | "m" (dp[0]), "m" (dp[64])); | |
85 | ||
86 | /* | |
87 | * 1 = dq[0] ^ q[0] | |
88 | * 9 = dq[64] ^ q[64] | |
89 | * 0 = dp[0] ^ p[0] | |
90 | * 8 = dp[64] ^ p[64] | |
91 | */ | |
92 | ||
93 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | |
94 | "vbroadcasti64x2 %1, %%zmm5" | |
95 | : | |
96 | : "m" (qmul[0]), "m" (qmul[16])); | |
97 | ||
98 | asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" | |
99 | "vpsraw $4, %%zmm9, %%zmm12\n\t" | |
100 | "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" | |
101 | "vpandq %%zmm7, %%zmm9, %%zmm9\n\t" | |
102 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | |
103 | "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" | |
104 | "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t" | |
105 | "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" | |
106 | "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t" | |
107 | "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" | |
108 | "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t" | |
109 | "vpxorq %%zmm4, %%zmm5, %%zmm5" | |
110 | : | |
111 | : ); | |
112 | ||
113 | /* | |
114 | * 5 = qx[0] | |
115 | * 15 = qx[64] | |
116 | */ | |
117 | ||
118 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | |
119 | "vbroadcasti64x2 %1, %%zmm1\n\t" | |
120 | "vpsraw $4, %%zmm0, %%zmm2\n\t" | |
121 | "vpsraw $4, %%zmm8, %%zmm6\n\t" | |
122 | "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" | |
123 | "vpandq %%zmm7, %%zmm8, %%zmm14\n\t" | |
124 | "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" | |
125 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | |
126 | "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t" | |
127 | "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" | |
128 | "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t" | |
129 | "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" | |
130 | "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t" | |
131 | "vpxorq %%zmm12, %%zmm13, %%zmm13" | |
132 | : | |
133 | : "m" (pbmul[0]), "m" (pbmul[16])); | |
134 | ||
135 | /* | |
136 | * 1 = pbmul[px[0]] | |
137 | * 13 = pbmul[px[64]] | |
138 | */ | |
139 | asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" | |
140 | "vpxorq %%zmm15, %%zmm13, %%zmm13" | |
141 | : | |
142 | : ); | |
143 | ||
144 | /* | |
145 | * 1 = db = DQ | |
146 | * 13 = db[64] = DQ[64] | |
147 | */ | |
148 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | |
149 | "vmovdqa64 %%zmm13,%1\n\t" | |
150 | "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" | |
151 | "vpxorq %%zmm13, %%zmm8, %%zmm8" | |
152 | : | |
153 | : "m" (dq[0]), "m" (dq[64])); | |
154 | ||
155 | asm volatile("vmovdqa64 %%zmm0, %0\n\t" | |
156 | "vmovdqa64 %%zmm8, %1" | |
157 | : | |
158 | : "m" (dp[0]), "m" (dp[64])); | |
159 | ||
160 | bytes -= 128; | |
161 | p += 128; | |
162 | q += 128; | |
163 | dp += 128; | |
164 | dq += 128; | |
165 | #else | |
166 | asm volatile("vmovdqa64 %0, %%zmm1\n\t" | |
167 | "vmovdqa64 %1, %%zmm0\n\t" | |
168 | "vpxorq %2, %%zmm1, %%zmm1\n\t" | |
169 | "vpxorq %3, %%zmm0, %%zmm0" | |
170 | : | |
171 | : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp)); | |
172 | ||
173 | /* 1 = dq ^ q; 0 = dp ^ p */ | |
174 | ||
175 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | |
176 | "vbroadcasti64x2 %1, %%zmm5" | |
177 | : | |
178 | : "m" (qmul[0]), "m" (qmul[16])); | |
179 | ||
180 | /* | |
181 | * 1 = dq ^ q | |
182 | * 3 = dq ^ p >> 4 | |
183 | */ | |
184 | asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" | |
185 | "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" | |
186 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | |
187 | "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" | |
188 | "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" | |
189 | "vpxorq %%zmm4, %%zmm5, %%zmm5" | |
190 | : | |
191 | : ); | |
192 | ||
193 | /* 5 = qx */ | |
194 | ||
195 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | |
196 | "vbroadcasti64x2 %1, %%zmm1" | |
197 | : | |
198 | : "m" (pbmul[0]), "m" (pbmul[16])); | |
199 | ||
200 | asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t" | |
201 | "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" | |
202 | "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" | |
203 | "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" | |
204 | "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" | |
205 | "vpxorq %%zmm4, %%zmm1, %%zmm1" | |
206 | : | |
207 | : ); | |
208 | ||
209 | /* 1 = pbmul[px] */ | |
210 | asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" | |
211 | /* 1 = db = DQ */ | |
212 | "vmovdqa64 %%zmm1, %0\n\t" | |
213 | : | |
214 | : "m" (dq[0])); | |
215 | ||
216 | asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" | |
217 | "vmovdqa64 %%zmm0, %0" | |
218 | : | |
219 | : "m" (dp[0])); | |
220 | ||
221 | bytes -= 64; | |
222 | p += 64; | |
223 | q += 64; | |
224 | dp += 64; | |
225 | dq += 64; | |
226 | #endif | |
227 | } | |
228 | ||
229 | kernel_fpu_end(); | |
230 | } | |
231 | ||
232 | static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, | |
233 | void **ptrs) | |
234 | { | |
235 | u8 *p, *q, *dq; | |
236 | const u8 *qmul; /* Q multiplier table */ | |
237 | const u8 x0f = 0x0f; | |
238 | ||
239 | p = (u8 *)ptrs[disks-2]; | |
240 | q = (u8 *)ptrs[disks-1]; | |
241 | ||
242 | /* | |
243 | * Compute syndrome with zero for the missing data page | |
244 | * Use the dead data page as temporary storage for delta q | |
245 | */ | |
246 | ||
247 | dq = (u8 *)ptrs[faila]; | |
248 | ptrs[faila] = (void *)raid6_empty_zero_page; | |
249 | ptrs[disks-1] = dq; | |
250 | ||
251 | raid6_call.gen_syndrome(disks, bytes, ptrs); | |
252 | ||
253 | /* Restore pointer table */ | |
254 | ptrs[faila] = dq; | |
255 | ptrs[disks-1] = q; | |
256 | ||
257 | /* Now, pick the proper data tables */ | |
258 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; | |
259 | ||
260 | kernel_fpu_begin(); | |
261 | ||
262 | asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); | |
263 | ||
264 | while (bytes) { | |
265 | #ifdef CONFIG_X86_64 | |
266 | asm volatile("vmovdqa64 %0, %%zmm3\n\t" | |
267 | "vmovdqa64 %1, %%zmm8\n\t" | |
268 | "vpxorq %2, %%zmm3, %%zmm3\n\t" | |
269 | "vpxorq %3, %%zmm8, %%zmm8" | |
270 | : | |
271 | : "m" (dq[0]), "m" (dq[64]), "m" (q[0]), | |
272 | "m" (q[64])); | |
273 | ||
274 | /* | |
275 | * 3 = q[0] ^ dq[0] | |
276 | * 8 = q[64] ^ dq[64] | |
277 | */ | |
278 | asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" | |
279 | "vmovapd %%zmm0, %%zmm13\n\t" | |
280 | "vbroadcasti64x2 %1, %%zmm1\n\t" | |
281 | "vmovapd %%zmm1, %%zmm14" | |
282 | : | |
283 | : "m" (qmul[0]), "m" (qmul[16])); | |
284 | ||
285 | asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" | |
286 | "vpsraw $4, %%zmm8, %%zmm12\n\t" | |
287 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | |
288 | "vpandq %%zmm7, %%zmm8, %%zmm8\n\t" | |
289 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | |
290 | "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" | |
291 | "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" | |
292 | "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t" | |
293 | "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" | |
294 | "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t" | |
295 | "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t" | |
296 | "vpxorq %%zmm13, %%zmm14, %%zmm14" | |
297 | : | |
298 | : ); | |
299 | ||
300 | /* | |
301 | * 1 = qmul[q[0] ^ dq[0]] | |
302 | * 14 = qmul[q[64] ^ dq[64]] | |
303 | */ | |
304 | asm volatile("vmovdqa64 %0, %%zmm2\n\t" | |
305 | "vmovdqa64 %1, %%zmm12\n\t" | |
306 | "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t" | |
307 | "vpxorq %%zmm14, %%zmm12, %%zmm12" | |
308 | : | |
309 | : "m" (p[0]), "m" (p[64])); | |
310 | ||
311 | /* | |
312 | * 2 = p[0] ^ qmul[q[0] ^ dq[0]] | |
313 | * 12 = p[64] ^ qmul[q[64] ^ dq[64]] | |
314 | */ | |
315 | ||
316 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | |
317 | "vmovdqa64 %%zmm14, %1\n\t" | |
318 | "vmovdqa64 %%zmm2, %2\n\t" | |
319 | "vmovdqa64 %%zmm12,%3" | |
320 | : | |
321 | : "m" (dq[0]), "m" (dq[64]), "m" (p[0]), | |
322 | "m" (p[64])); | |
323 | ||
324 | bytes -= 128; | |
325 | p += 128; | |
326 | q += 128; | |
327 | dq += 128; | |
328 | #else | |
329 | asm volatile("vmovdqa64 %0, %%zmm3\n\t" | |
330 | "vpxorq %1, %%zmm3, %%zmm3" | |
331 | : | |
332 | : "m" (dq[0]), "m" (q[0])); | |
333 | ||
334 | /* 3 = q ^ dq */ | |
335 | ||
336 | asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" | |
337 | "vbroadcasti64x2 %1, %%zmm1" | |
338 | : | |
339 | : "m" (qmul[0]), "m" (qmul[16])); | |
340 | ||
341 | asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" | |
342 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | |
343 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | |
344 | "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" | |
345 | "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" | |
346 | "vpxorq %%zmm0, %%zmm1, %%zmm1" | |
347 | : | |
348 | : ); | |
349 | ||
350 | /* 1 = qmul[q ^ dq] */ | |
351 | ||
352 | asm volatile("vmovdqa64 %0, %%zmm2\n\t" | |
353 | "vpxorq %%zmm1, %%zmm2, %%zmm2" | |
354 | : | |
355 | : "m" (p[0])); | |
356 | ||
357 | /* 2 = p ^ qmul[q ^ dq] */ | |
358 | ||
359 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | |
360 | "vmovdqa64 %%zmm2, %1" | |
361 | : | |
362 | : "m" (dq[0]), "m" (p[0])); | |
363 | ||
364 | bytes -= 64; | |
365 | p += 64; | |
366 | q += 64; | |
367 | dq += 64; | |
368 | #endif | |
369 | } | |
370 | ||
371 | kernel_fpu_end(); | |
372 | } | |
373 | ||
374 | const struct raid6_recov_calls raid6_recov_avx512 = { | |
375 | .data2 = raid6_2data_recov_avx512, | |
376 | .datap = raid6_datap_recov_avx512, | |
377 | .valid = raid6_has_avx512, | |
378 | #ifdef CONFIG_X86_64 | |
379 | .name = "avx512x2", | |
380 | #else | |
381 | .name = "avx512x1", | |
382 | #endif | |
383 | .priority = 3, | |
384 | }; | |
385 | ||
386 | #else | |
387 | #warning "your version of binutils lacks AVX512 support" | |
388 | #endif |