Commit | Line | Data |
---|---|---|
2874c5fd | 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
60468255 JK |
2 | /* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function |
3 | * | |
4 | * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> | |
60468255 JK |
5 | */ |
6 | ||
7 | #include <linux/linkage.h> | |
0777e3e1 | 8 | #include <asm/assembler.h> |
60468255 JK |
9 | |
10 | .syntax unified | |
60468255 JK |
11 | .fpu neon |
12 | ||
13 | .text | |
14 | ||
15 | ||
16 | /* Context structure */ | |
17 | ||
18 | #define state_h0 0 | |
19 | #define state_h1 4 | |
20 | #define state_h2 8 | |
21 | #define state_h3 12 | |
22 | #define state_h4 16 | |
23 | ||
24 | ||
25 | /* Constants */ | |
26 | ||
27 | #define K1 0x5A827999 | |
28 | #define K2 0x6ED9EBA1 | |
29 | #define K3 0x8F1BBCDC | |
30 | #define K4 0xCA62C1D6 | |
31 | .align 4 | |
32 | .LK_VEC: | |
33 | .LK1: .long K1, K1, K1, K1 | |
34 | .LK2: .long K2, K2, K2, K2 | |
35 | .LK3: .long K3, K3, K3, K3 | |
36 | .LK4: .long K4, K4, K4, K4 | |
37 | ||
38 | ||
39 | /* Register macros */ | |
40 | ||
41 | #define RSTATE r0 | |
42 | #define RDATA r1 | |
43 | #define RNBLKS r2 | |
44 | #define ROLDSTACK r3 | |
45 | #define RWK lr | |
46 | ||
47 | #define _a r4 | |
48 | #define _b r5 | |
49 | #define _c r6 | |
50 | #define _d r7 | |
51 | #define _e r8 | |
52 | ||
53 | #define RT0 r9 | |
54 | #define RT1 r10 | |
55 | #define RT2 r11 | |
56 | #define RT3 r12 | |
57 | ||
58 | #define W0 q0 | |
0777e3e1 | 59 | #define W1 q7 |
60468255 JK |
60 | #define W2 q2 |
61 | #define W3 q3 | |
62 | #define W4 q4 | |
0777e3e1 AB |
63 | #define W5 q6 |
64 | #define W6 q5 | |
65 | #define W7 q1 | |
60468255 JK |
66 | |
67 | #define tmp0 q8 | |
68 | #define tmp1 q9 | |
69 | #define tmp2 q10 | |
70 | #define tmp3 q11 | |
71 | ||
72 | #define qK1 q12 | |
73 | #define qK2 q13 | |
74 | #define qK3 q14 | |
75 | #define qK4 q15 | |
76 | ||
0777e3e1 AB |
77 | #ifdef CONFIG_CPU_BIG_ENDIAN |
78 | #define ARM_LE(code...) | |
79 | #else | |
80 | #define ARM_LE(code...) code | |
81 | #endif | |
60468255 JK |
82 | |
83 | /* Round function macros. */ | |
84 | ||
85 | #define WK_offs(i) (((i) & 15) * 4) | |
86 | ||
87 | #define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
88 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
89 | ldr RT3, [sp, WK_offs(i)]; \ | |
90 | pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
91 | bic RT0, d, b; \ | |
92 | add e, e, a, ror #(32 - 5); \ | |
93 | and RT1, c, b; \ | |
94 | pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
95 | add RT0, RT0, RT3; \ | |
96 | add e, e, RT1; \ | |
97 | ror b, #(32 - 30); \ | |
98 | pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
99 | add e, e, RT0; | |
100 | ||
101 | #define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
102 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
103 | ldr RT3, [sp, WK_offs(i)]; \ | |
104 | pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
105 | eor RT0, d, b; \ | |
106 | add e, e, a, ror #(32 - 5); \ | |
107 | eor RT0, RT0, c; \ | |
108 | pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
109 | add e, e, RT3; \ | |
110 | ror b, #(32 - 30); \ | |
111 | pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
112 | add e, e, RT0; \ | |
113 | ||
114 | #define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
115 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
116 | ldr RT3, [sp, WK_offs(i)]; \ | |
117 | pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
118 | eor RT0, b, c; \ | |
119 | and RT1, b, c; \ | |
120 | add e, e, a, ror #(32 - 5); \ | |
121 | pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
122 | and RT0, RT0, d; \ | |
123 | add RT1, RT1, RT3; \ | |
124 | add e, e, RT0; \ | |
125 | ror b, #(32 - 30); \ | |
126 | pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
127 | add e, e, RT1; | |
128 | ||
129 | #define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
130 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
131 | _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
132 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) | |
133 | ||
134 | #define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\ | |
135 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
136 | _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
137 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) | |
138 | ||
139 | #define R(a,b,c,d,e,f,i) \ | |
140 | _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\ | |
141 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) | |
142 | ||
143 | #define dummy(...) | |
144 | ||
145 | ||
146 | /* Input expansion macros. */ | |
147 | ||
148 | /********* Precalc macros for rounds 0-15 *************************************/ | |
149 | ||
150 | #define W_PRECALC_00_15() \ | |
151 | add RWK, sp, #(WK_offs(0)); \ | |
152 | \ | |
0777e3e1 AB |
153 | vld1.32 {W0, W7}, [RDATA]!; \ |
154 | ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ | |
155 | vld1.32 {W6, W5}, [RDATA]!; \ | |
60468255 | 156 | vadd.u32 tmp0, W0, curK; \ |
0777e3e1 AB |
157 | ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ |
158 | ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ | |
60468255 | 159 | vadd.u32 tmp1, W7, curK; \ |
0777e3e1 | 160 | ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ |
60468255 JK |
161 | vadd.u32 tmp2, W6, curK; \ |
162 | vst1.32 {tmp0, tmp1}, [RWK]!; \ | |
163 | vadd.u32 tmp3, W5, curK; \ | |
164 | vst1.32 {tmp2, tmp3}, [RWK]; \ | |
165 | ||
166 | #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 167 | vld1.32 {W0, W7}, [RDATA]!; \ |
60468255 JK |
168 | |
169 | #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
170 | add RWK, sp, #(WK_offs(0)); \ | |
171 | ||
172 | #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 173 | ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ |
60468255 JK |
174 | |
175 | #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 176 | vld1.32 {W6, W5}, [RDATA]!; \ |
60468255 JK |
177 | |
178 | #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
179 | vadd.u32 tmp0, W0, curK; \ | |
180 | ||
181 | #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 182 | ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ |
60468255 JK |
183 | |
184 | #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 185 | ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ |
60468255 JK |
186 | |
187 | #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
188 | vadd.u32 tmp1, W7, curK; \ | |
189 | ||
190 | #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 191 | ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ |
60468255 JK |
192 | |
193 | #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
194 | vadd.u32 tmp2, W6, curK; \ | |
195 | ||
196 | #define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
197 | vst1.32 {tmp0, tmp1}, [RWK]!; \ | |
198 | ||
199 | #define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
200 | vadd.u32 tmp3, W5, curK; \ | |
201 | ||
202 | #define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
203 | vst1.32 {tmp2, tmp3}, [RWK]; \ | |
204 | ||
205 | ||
206 | /********* Precalc macros for rounds 16-31 ************************************/ | |
207 | ||
208 | #define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
209 | veor tmp0, tmp0; \ | |
210 | vext.8 W, W_m16, W_m12, #8; \ | |
211 | ||
212 | #define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
213 | add RWK, sp, #(WK_offs(i)); \ | |
214 | vext.8 tmp0, W_m04, tmp0, #4; \ | |
215 | ||
216 | #define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
217 | veor tmp0, tmp0, W_m16; \ | |
218 | veor.32 W, W, W_m08; \ | |
219 | ||
220 | #define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
221 | veor tmp1, tmp1; \ | |
222 | veor W, W, tmp0; \ | |
223 | ||
224 | #define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
225 | vshl.u32 tmp0, W, #1; \ | |
226 | ||
227 | #define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
228 | vext.8 tmp1, tmp1, W, #(16-12); \ | |
229 | vshr.u32 W, W, #31; \ | |
230 | ||
231 | #define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
232 | vorr tmp0, tmp0, W; \ | |
233 | vshr.u32 W, tmp1, #30; \ | |
234 | ||
235 | #define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
236 | vshl.u32 tmp1, tmp1, #2; \ | |
237 | ||
238 | #define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
239 | veor tmp0, tmp0, W; \ | |
240 | ||
241 | #define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
242 | veor W, tmp0, tmp1; \ | |
243 | ||
244 | #define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
245 | vadd.u32 tmp0, W, curK; \ | |
246 | ||
247 | #define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
248 | vst1.32 {tmp0}, [RWK]; | |
249 | ||
250 | ||
251 | /********* Precalc macros for rounds 32-79 ************************************/ | |
252 | ||
253 | #define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
254 | veor W, W_m28; \ | |
255 | ||
256 | #define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
257 | vext.8 tmp0, W_m08, W_m04, #8; \ | |
258 | ||
259 | #define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
260 | veor W, W_m16; \ | |
261 | ||
262 | #define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
263 | veor W, tmp0; \ | |
264 | ||
265 | #define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
266 | add RWK, sp, #(WK_offs(i&~3)); \ | |
267 | ||
268 | #define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
269 | vshl.u32 tmp1, W, #2; \ | |
270 | ||
271 | #define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
272 | vshr.u32 tmp0, W, #30; \ | |
273 | ||
274 | #define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
275 | vorr W, tmp0, tmp1; \ | |
276 | ||
277 | #define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
278 | vadd.u32 tmp0, W, curK; \ | |
279 | ||
280 | #define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
281 | vst1.32 {tmp0}, [RWK]; | |
282 | ||
283 | ||
284 | /* | |
285 | * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. | |
286 | * | |
287 | * unsigned int | |
288 | * sha1_transform_neon (void *ctx, const unsigned char *data, | |
289 | * unsigned int nblks) | |
290 | */ | |
291 | .align 3 | |
292 | ENTRY(sha1_transform_neon) | |
293 | /* input: | |
294 | * r0: ctx, CTX | |
295 | * r1: data (64*nblks bytes) | |
296 | * r2: nblks | |
297 | */ | |
298 | ||
299 | cmp RNBLKS, #0; | |
300 | beq .Ldo_nothing; | |
301 | ||
302 | push {r4-r12, lr}; | |
303 | /*vpush {q4-q7};*/ | |
304 | ||
305 | adr RT3, .LK_VEC; | |
306 | ||
307 | mov ROLDSTACK, sp; | |
308 | ||
309 | /* Align stack. */ | |
310 | sub RT0, sp, #(16*4); | |
311 | and RT0, #(~(16-1)); | |
312 | mov sp, RT0; | |
313 | ||
314 | vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */ | |
315 | ||
316 | /* Get the values of the chaining variables. */ | |
317 | ldm RSTATE, {_a-_e}; | |
318 | ||
319 | vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */ | |
320 | ||
321 | #undef curK | |
322 | #define curK qK1 | |
323 | /* Precalc 0-15. */ | |
324 | W_PRECALC_00_15(); | |
325 | ||
326 | .Loop: | |
327 | /* Transform 0-15 + Precalc 16-31. */ | |
328 | _R( _a, _b, _c, _d, _e, F1, 0, | |
329 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16, | |
330 | W4, W5, W6, W7, W0, _, _, _ ); | |
331 | _R( _e, _a, _b, _c, _d, F1, 1, | |
332 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16, | |
333 | W4, W5, W6, W7, W0, _, _, _ ); | |
334 | _R( _d, _e, _a, _b, _c, F1, 2, | |
335 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16, | |
336 | W4, W5, W6, W7, W0, _, _, _ ); | |
337 | _R( _c, _d, _e, _a, _b, F1, 3, | |
338 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16, | |
339 | W4, W5, W6, W7, W0, _, _, _ ); | |
340 | ||
341 | #undef curK | |
342 | #define curK qK2 | |
343 | _R( _b, _c, _d, _e, _a, F1, 4, | |
344 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20, | |
345 | W3, W4, W5, W6, W7, _, _, _ ); | |
346 | _R( _a, _b, _c, _d, _e, F1, 5, | |
347 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20, | |
348 | W3, W4, W5, W6, W7, _, _, _ ); | |
349 | _R( _e, _a, _b, _c, _d, F1, 6, | |
350 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20, | |
351 | W3, W4, W5, W6, W7, _, _, _ ); | |
352 | _R( _d, _e, _a, _b, _c, F1, 7, | |
353 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20, | |
354 | W3, W4, W5, W6, W7, _, _, _ ); | |
355 | ||
356 | _R( _c, _d, _e, _a, _b, F1, 8, | |
357 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24, | |
358 | W2, W3, W4, W5, W6, _, _, _ ); | |
359 | _R( _b, _c, _d, _e, _a, F1, 9, | |
360 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24, | |
361 | W2, W3, W4, W5, W6, _, _, _ ); | |
362 | _R( _a, _b, _c, _d, _e, F1, 10, | |
363 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24, | |
364 | W2, W3, W4, W5, W6, _, _, _ ); | |
365 | _R( _e, _a, _b, _c, _d, F1, 11, | |
366 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24, | |
367 | W2, W3, W4, W5, W6, _, _, _ ); | |
368 | ||
369 | _R( _d, _e, _a, _b, _c, F1, 12, | |
370 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28, | |
371 | W1, W2, W3, W4, W5, _, _, _ ); | |
372 | _R( _c, _d, _e, _a, _b, F1, 13, | |
373 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28, | |
374 | W1, W2, W3, W4, W5, _, _, _ ); | |
375 | _R( _b, _c, _d, _e, _a, F1, 14, | |
376 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28, | |
377 | W1, W2, W3, W4, W5, _, _, _ ); | |
378 | _R( _a, _b, _c, _d, _e, F1, 15, | |
379 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28, | |
380 | W1, W2, W3, W4, W5, _, _, _ ); | |
381 | ||
382 | /* Transform 16-63 + Precalc 32-79. */ | |
383 | _R( _e, _a, _b, _c, _d, F1, 16, | |
384 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32, | |
385 | W0, W1, W2, W3, W4, W5, W6, W7); | |
386 | _R( _d, _e, _a, _b, _c, F1, 17, | |
387 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32, | |
388 | W0, W1, W2, W3, W4, W5, W6, W7); | |
389 | _R( _c, _d, _e, _a, _b, F1, 18, | |
390 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32, | |
391 | W0, W1, W2, W3, W4, W5, W6, W7); | |
392 | _R( _b, _c, _d, _e, _a, F1, 19, | |
393 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32, | |
394 | W0, W1, W2, W3, W4, W5, W6, W7); | |
395 | ||
396 | _R( _a, _b, _c, _d, _e, F2, 20, | |
397 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36, | |
398 | W7, W0, W1, W2, W3, W4, W5, W6); | |
399 | _R( _e, _a, _b, _c, _d, F2, 21, | |
400 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36, | |
401 | W7, W0, W1, W2, W3, W4, W5, W6); | |
402 | _R( _d, _e, _a, _b, _c, F2, 22, | |
403 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36, | |
404 | W7, W0, W1, W2, W3, W4, W5, W6); | |
405 | _R( _c, _d, _e, _a, _b, F2, 23, | |
406 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36, | |
407 | W7, W0, W1, W2, W3, W4, W5, W6); | |
408 | ||
409 | #undef curK | |
410 | #define curK qK3 | |
411 | _R( _b, _c, _d, _e, _a, F2, 24, | |
412 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40, | |
413 | W6, W7, W0, W1, W2, W3, W4, W5); | |
414 | _R( _a, _b, _c, _d, _e, F2, 25, | |
415 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40, | |
416 | W6, W7, W0, W1, W2, W3, W4, W5); | |
417 | _R( _e, _a, _b, _c, _d, F2, 26, | |
418 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40, | |
419 | W6, W7, W0, W1, W2, W3, W4, W5); | |
420 | _R( _d, _e, _a, _b, _c, F2, 27, | |
421 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40, | |
422 | W6, W7, W0, W1, W2, W3, W4, W5); | |
423 | ||
424 | _R( _c, _d, _e, _a, _b, F2, 28, | |
425 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44, | |
426 | W5, W6, W7, W0, W1, W2, W3, W4); | |
427 | _R( _b, _c, _d, _e, _a, F2, 29, | |
428 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44, | |
429 | W5, W6, W7, W0, W1, W2, W3, W4); | |
430 | _R( _a, _b, _c, _d, _e, F2, 30, | |
431 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44, | |
432 | W5, W6, W7, W0, W1, W2, W3, W4); | |
433 | _R( _e, _a, _b, _c, _d, F2, 31, | |
434 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44, | |
435 | W5, W6, W7, W0, W1, W2, W3, W4); | |
436 | ||
437 | _R( _d, _e, _a, _b, _c, F2, 32, | |
438 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48, | |
439 | W4, W5, W6, W7, W0, W1, W2, W3); | |
440 | _R( _c, _d, _e, _a, _b, F2, 33, | |
441 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48, | |
442 | W4, W5, W6, W7, W0, W1, W2, W3); | |
443 | _R( _b, _c, _d, _e, _a, F2, 34, | |
444 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48, | |
445 | W4, W5, W6, W7, W0, W1, W2, W3); | |
446 | _R( _a, _b, _c, _d, _e, F2, 35, | |
447 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48, | |
448 | W4, W5, W6, W7, W0, W1, W2, W3); | |
449 | ||
450 | _R( _e, _a, _b, _c, _d, F2, 36, | |
451 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52, | |
452 | W3, W4, W5, W6, W7, W0, W1, W2); | |
453 | _R( _d, _e, _a, _b, _c, F2, 37, | |
454 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52, | |
455 | W3, W4, W5, W6, W7, W0, W1, W2); | |
456 | _R( _c, _d, _e, _a, _b, F2, 38, | |
457 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52, | |
458 | W3, W4, W5, W6, W7, W0, W1, W2); | |
459 | _R( _b, _c, _d, _e, _a, F2, 39, | |
460 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52, | |
461 | W3, W4, W5, W6, W7, W0, W1, W2); | |
462 | ||
463 | _R( _a, _b, _c, _d, _e, F3, 40, | |
464 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56, | |
465 | W2, W3, W4, W5, W6, W7, W0, W1); | |
466 | _R( _e, _a, _b, _c, _d, F3, 41, | |
467 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56, | |
468 | W2, W3, W4, W5, W6, W7, W0, W1); | |
469 | _R( _d, _e, _a, _b, _c, F3, 42, | |
470 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56, | |
471 | W2, W3, W4, W5, W6, W7, W0, W1); | |
472 | _R( _c, _d, _e, _a, _b, F3, 43, | |
473 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56, | |
474 | W2, W3, W4, W5, W6, W7, W0, W1); | |
475 | ||
476 | #undef curK | |
477 | #define curK qK4 | |
478 | _R( _b, _c, _d, _e, _a, F3, 44, | |
479 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60, | |
480 | W1, W2, W3, W4, W5, W6, W7, W0); | |
481 | _R( _a, _b, _c, _d, _e, F3, 45, | |
482 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60, | |
483 | W1, W2, W3, W4, W5, W6, W7, W0); | |
484 | _R( _e, _a, _b, _c, _d, F3, 46, | |
485 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60, | |
486 | W1, W2, W3, W4, W5, W6, W7, W0); | |
487 | _R( _d, _e, _a, _b, _c, F3, 47, | |
488 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60, | |
489 | W1, W2, W3, W4, W5, W6, W7, W0); | |
490 | ||
491 | _R( _c, _d, _e, _a, _b, F3, 48, | |
492 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64, | |
493 | W0, W1, W2, W3, W4, W5, W6, W7); | |
494 | _R( _b, _c, _d, _e, _a, F3, 49, | |
495 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64, | |
496 | W0, W1, W2, W3, W4, W5, W6, W7); | |
497 | _R( _a, _b, _c, _d, _e, F3, 50, | |
498 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64, | |
499 | W0, W1, W2, W3, W4, W5, W6, W7); | |
500 | _R( _e, _a, _b, _c, _d, F3, 51, | |
501 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64, | |
502 | W0, W1, W2, W3, W4, W5, W6, W7); | |
503 | ||
504 | _R( _d, _e, _a, _b, _c, F3, 52, | |
505 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68, | |
506 | W7, W0, W1, W2, W3, W4, W5, W6); | |
507 | _R( _c, _d, _e, _a, _b, F3, 53, | |
508 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68, | |
509 | W7, W0, W1, W2, W3, W4, W5, W6); | |
510 | _R( _b, _c, _d, _e, _a, F3, 54, | |
511 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68, | |
512 | W7, W0, W1, W2, W3, W4, W5, W6); | |
513 | _R( _a, _b, _c, _d, _e, F3, 55, | |
514 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68, | |
515 | W7, W0, W1, W2, W3, W4, W5, W6); | |
516 | ||
517 | _R( _e, _a, _b, _c, _d, F3, 56, | |
518 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72, | |
519 | W6, W7, W0, W1, W2, W3, W4, W5); | |
520 | _R( _d, _e, _a, _b, _c, F3, 57, | |
521 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72, | |
522 | W6, W7, W0, W1, W2, W3, W4, W5); | |
523 | _R( _c, _d, _e, _a, _b, F3, 58, | |
524 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72, | |
525 | W6, W7, W0, W1, W2, W3, W4, W5); | |
526 | _R( _b, _c, _d, _e, _a, F3, 59, | |
527 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72, | |
528 | W6, W7, W0, W1, W2, W3, W4, W5); | |
529 | ||
530 | subs RNBLKS, #1; | |
531 | ||
532 | _R( _a, _b, _c, _d, _e, F4, 60, | |
533 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76, | |
534 | W5, W6, W7, W0, W1, W2, W3, W4); | |
535 | _R( _e, _a, _b, _c, _d, F4, 61, | |
536 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76, | |
537 | W5, W6, W7, W0, W1, W2, W3, W4); | |
538 | _R( _d, _e, _a, _b, _c, F4, 62, | |
539 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76, | |
540 | W5, W6, W7, W0, W1, W2, W3, W4); | |
541 | _R( _c, _d, _e, _a, _b, F4, 63, | |
542 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76, | |
543 | W5, W6, W7, W0, W1, W2, W3, W4); | |
544 | ||
545 | beq .Lend; | |
546 | ||
547 | /* Transform 64-79 + Precalc 0-15 of next block. */ | |
548 | #undef curK | |
549 | #define curK qK1 | |
550 | _R( _b, _c, _d, _e, _a, F4, 64, | |
551 | WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
552 | _R( _a, _b, _c, _d, _e, F4, 65, | |
553 | WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
554 | _R( _e, _a, _b, _c, _d, F4, 66, | |
555 | WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
556 | _R( _d, _e, _a, _b, _c, F4, 67, | |
557 | WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
558 | ||
559 | _R( _c, _d, _e, _a, _b, F4, 68, | |
560 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
561 | _R( _b, _c, _d, _e, _a, F4, 69, | |
562 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
563 | _R( _a, _b, _c, _d, _e, F4, 70, | |
564 | WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
565 | _R( _e, _a, _b, _c, _d, F4, 71, | |
566 | WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
567 | ||
568 | _R( _d, _e, _a, _b, _c, F4, 72, | |
569 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
570 | _R( _c, _d, _e, _a, _b, F4, 73, | |
571 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
572 | _R( _b, _c, _d, _e, _a, F4, 74, | |
573 | WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
574 | _R( _a, _b, _c, _d, _e, F4, 75, | |
575 | WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
576 | ||
577 | _R( _e, _a, _b, _c, _d, F4, 76, | |
578 | WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
579 | _R( _d, _e, _a, _b, _c, F4, 77, | |
580 | WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
581 | _R( _c, _d, _e, _a, _b, F4, 78, | |
582 | WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
583 | _R( _b, _c, _d, _e, _a, F4, 79, | |
584 | WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ ); | |
585 | ||
586 | /* Update the chaining variables. */ | |
587 | ldm RSTATE, {RT0-RT3}; | |
588 | add _a, RT0; | |
589 | ldr RT0, [RSTATE, #state_h4]; | |
590 | add _b, RT1; | |
591 | add _c, RT2; | |
592 | add _d, RT3; | |
593 | add _e, RT0; | |
594 | stm RSTATE, {_a-_e}; | |
595 | ||
596 | b .Loop; | |
597 | ||
598 | .Lend: | |
599 | /* Transform 64-79 */ | |
600 | R( _b, _c, _d, _e, _a, F4, 64 ); | |
601 | R( _a, _b, _c, _d, _e, F4, 65 ); | |
602 | R( _e, _a, _b, _c, _d, F4, 66 ); | |
603 | R( _d, _e, _a, _b, _c, F4, 67 ); | |
604 | R( _c, _d, _e, _a, _b, F4, 68 ); | |
605 | R( _b, _c, _d, _e, _a, F4, 69 ); | |
606 | R( _a, _b, _c, _d, _e, F4, 70 ); | |
607 | R( _e, _a, _b, _c, _d, F4, 71 ); | |
608 | R( _d, _e, _a, _b, _c, F4, 72 ); | |
609 | R( _c, _d, _e, _a, _b, F4, 73 ); | |
610 | R( _b, _c, _d, _e, _a, F4, 74 ); | |
611 | R( _a, _b, _c, _d, _e, F4, 75 ); | |
612 | R( _e, _a, _b, _c, _d, F4, 76 ); | |
613 | R( _d, _e, _a, _b, _c, F4, 77 ); | |
614 | R( _c, _d, _e, _a, _b, F4, 78 ); | |
615 | R( _b, _c, _d, _e, _a, F4, 79 ); | |
616 | ||
617 | mov sp, ROLDSTACK; | |
618 | ||
619 | /* Update the chaining variables. */ | |
620 | ldm RSTATE, {RT0-RT3}; | |
621 | add _a, RT0; | |
622 | ldr RT0, [RSTATE, #state_h4]; | |
623 | add _b, RT1; | |
624 | add _c, RT2; | |
625 | add _d, RT3; | |
626 | /*vpop {q4-q7};*/ | |
627 | add _e, RT0; | |
628 | stm RSTATE, {_a-_e}; | |
629 | ||
630 | pop {r4-r12, pc}; | |
631 | ||
632 | .Ldo_nothing: | |
633 | bx lr | |
634 | ENDPROC(sha1_transform_neon) |