Commit | Line | Data |
---|---|---|
f0be44f4 | 1 | #define __ARM_ARCH__ __LINUX_ARM_ARCH__ |
c2e415fe AL |
2 | @ SPDX-License-Identifier: GPL-2.0 |
3 | ||
4 | @ This code is taken from the OpenSSL project but the author (Andy Polyakov) | |
5 | @ has relicensed it under the GPLv2. Therefore this program is free software; | |
6 | @ you can redistribute it and/or modify it under the terms of the GNU General | |
7 | @ Public License version 2 as published by the Free Software Foundation. | |
8 | @ | |
9 | @ The original headers, including the original license headers, are | |
10 | @ included below for completeness. | |
11 | ||
f0be44f4 DM |
12 | @ ==================================================================== |
13 | @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
14 | @ project. The module is, however, dual licensed under OpenSSL and | |
15 | @ CRYPTOGAMS licenses depending on where you obtain it. For further | |
9332a9e7 | 16 | @ details see https://www.openssl.org/~appro/cryptogams/. |
f0be44f4 DM |
17 | @ ==================================================================== |
18 | ||
19 | @ sha1_block procedure for ARMv4. | |
20 | @ | |
21 | @ January 2007. | |
22 | ||
23 | @ Size/performance trade-off | |
24 | @ ==================================================================== | |
25 | @ impl size in bytes comp cycles[*] measured performance | |
26 | @ ==================================================================== | |
27 | @ thumb 304 3212 4420 | |
28 | @ armv4-small 392/+29% 1958/+64% 2250/+96% | |
29 | @ armv4-compact 740/+89% 1552/+26% 1840/+22% | |
30 | @ armv4-large 1420/+92% 1307/+19% 1370/+34%[***] | |
31 | @ full unroll ~5100/+260% ~1260/+4% ~1300/+5% | |
32 | @ ==================================================================== | |
33 | @ thumb = same as 'small' but in Thumb instructions[**] and | |
34 | @ with recurring code in two private functions; | |
35 | @ small = detached Xload/update, loops are folded; | |
36 | @ compact = detached Xload/update, 5x unroll; | |
37 | @ large = interleaved Xload/update, 5x unroll; | |
38 | @ full unroll = interleaved Xload/update, full unroll, estimated[!]; | |
39 | @ | |
40 | @ [*] Manually counted instructions in "grand" loop body. Measured | |
41 | @ performance is affected by prologue and epilogue overhead, | |
42 | @ i-cache availability, branch penalties, etc. | |
43 | @ [**] While each Thumb instruction is twice smaller, they are not as | |
44 | @ diverse as ARM ones: e.g., there are only two arithmetic | |
45 | @ instructions with 3 arguments, no [fixed] rotate, addressing | |
46 | @ modes are limited. As result it takes more instructions to do | |
47 | @ the same job in Thumb, therefore the code is never twice as | |
48 | @ small and always slower. | |
49 | @ [***] which is also ~35% better than compiler generated code. Dual- | |
50 | @ issue Cortex A8 core was measured to process input block in | |
51 | @ ~990 cycles. | |
52 | ||
53 | @ August 2010. | |
54 | @ | |
55 | @ Rescheduling for dual-issue pipeline resulted in 13% improvement on | |
56 | @ Cortex A8 core and in absolute terms ~870 cycles per input block | |
57 | @ [or 13.6 cycles per byte]. | |
58 | ||
59 | @ February 2011. | |
60 | @ | |
61 | @ Profiler-assisted and platform-specific optimization resulted in 10% | |
62 | @ improvement on Cortex A8 core and 12.2 cycles per byte. | |
63 | ||
638591cd | 64 | #include <linux/linkage.h> |
f0be44f4 | 65 | |
638591cd | 66 | .text |
f0be44f4 DM |
67 | |
68 | .align 2 | |
638591cd | 69 | ENTRY(sha1_block_data_order) |
f0be44f4 DM |
70 | stmdb sp!,{r4-r12,lr} |
71 | add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 | |
72 | ldmia r0,{r3,r4,r5,r6,r7} | |
73 | .Lloop: | |
74 | ldr r8,.LK_00_19 | |
75 | mov r14,sp | |
76 | sub sp,sp,#15*4 | |
77 | mov r5,r5,ror#30 | |
78 | mov r6,r6,ror#30 | |
79 | mov r7,r7,ror#30 @ [6] | |
80 | .L_00_15: | |
81 | #if __ARM_ARCH__<7 | |
82 | ldrb r10,[r1,#2] | |
83 | ldrb r9,[r1,#3] | |
84 | ldrb r11,[r1,#1] | |
85 | add r7,r8,r7,ror#2 @ E+=K_00_19 | |
86 | ldrb r12,[r1],#4 | |
87 | orr r9,r9,r10,lsl#8 | |
88 | eor r10,r5,r6 @ F_xx_xx | |
89 | orr r9,r9,r11,lsl#16 | |
90 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | |
91 | orr r9,r9,r12,lsl#24 | |
92 | #else | |
93 | ldr r9,[r1],#4 @ handles unaligned | |
94 | add r7,r8,r7,ror#2 @ E+=K_00_19 | |
95 | eor r10,r5,r6 @ F_xx_xx | |
96 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | |
97 | #ifdef __ARMEL__ | |
98 | rev r9,r9 @ byte swap | |
99 | #endif | |
100 | #endif | |
101 | and r10,r4,r10,ror#2 | |
102 | add r7,r7,r9 @ E+=X[i] | |
103 | eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) | |
104 | str r9,[r14,#-4]! | |
105 | add r7,r7,r10 @ E+=F_00_19(B,C,D) | |
106 | #if __ARM_ARCH__<7 | |
107 | ldrb r10,[r1,#2] | |
108 | ldrb r9,[r1,#3] | |
109 | ldrb r11,[r1,#1] | |
110 | add r6,r8,r6,ror#2 @ E+=K_00_19 | |
111 | ldrb r12,[r1],#4 | |
112 | orr r9,r9,r10,lsl#8 | |
113 | eor r10,r4,r5 @ F_xx_xx | |
114 | orr r9,r9,r11,lsl#16 | |
115 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | |
116 | orr r9,r9,r12,lsl#24 | |
117 | #else | |
118 | ldr r9,[r1],#4 @ handles unaligned | |
119 | add r6,r8,r6,ror#2 @ E+=K_00_19 | |
120 | eor r10,r4,r5 @ F_xx_xx | |
121 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | |
122 | #ifdef __ARMEL__ | |
123 | rev r9,r9 @ byte swap | |
124 | #endif | |
125 | #endif | |
126 | and r10,r3,r10,ror#2 | |
127 | add r6,r6,r9 @ E+=X[i] | |
128 | eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) | |
129 | str r9,[r14,#-4]! | |
130 | add r6,r6,r10 @ E+=F_00_19(B,C,D) | |
131 | #if __ARM_ARCH__<7 | |
132 | ldrb r10,[r1,#2] | |
133 | ldrb r9,[r1,#3] | |
134 | ldrb r11,[r1,#1] | |
135 | add r5,r8,r5,ror#2 @ E+=K_00_19 | |
136 | ldrb r12,[r1],#4 | |
137 | orr r9,r9,r10,lsl#8 | |
138 | eor r10,r3,r4 @ F_xx_xx | |
139 | orr r9,r9,r11,lsl#16 | |
140 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | |
141 | orr r9,r9,r12,lsl#24 | |
142 | #else | |
143 | ldr r9,[r1],#4 @ handles unaligned | |
144 | add r5,r8,r5,ror#2 @ E+=K_00_19 | |
145 | eor r10,r3,r4 @ F_xx_xx | |
146 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | |
147 | #ifdef __ARMEL__ | |
148 | rev r9,r9 @ byte swap | |
149 | #endif | |
150 | #endif | |
151 | and r10,r7,r10,ror#2 | |
152 | add r5,r5,r9 @ E+=X[i] | |
153 | eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) | |
154 | str r9,[r14,#-4]! | |
155 | add r5,r5,r10 @ E+=F_00_19(B,C,D) | |
156 | #if __ARM_ARCH__<7 | |
157 | ldrb r10,[r1,#2] | |
158 | ldrb r9,[r1,#3] | |
159 | ldrb r11,[r1,#1] | |
160 | add r4,r8,r4,ror#2 @ E+=K_00_19 | |
161 | ldrb r12,[r1],#4 | |
162 | orr r9,r9,r10,lsl#8 | |
163 | eor r10,r7,r3 @ F_xx_xx | |
164 | orr r9,r9,r11,lsl#16 | |
165 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | |
166 | orr r9,r9,r12,lsl#24 | |
167 | #else | |
168 | ldr r9,[r1],#4 @ handles unaligned | |
169 | add r4,r8,r4,ror#2 @ E+=K_00_19 | |
170 | eor r10,r7,r3 @ F_xx_xx | |
171 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | |
172 | #ifdef __ARMEL__ | |
173 | rev r9,r9 @ byte swap | |
174 | #endif | |
175 | #endif | |
176 | and r10,r6,r10,ror#2 | |
177 | add r4,r4,r9 @ E+=X[i] | |
178 | eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) | |
179 | str r9,[r14,#-4]! | |
180 | add r4,r4,r10 @ E+=F_00_19(B,C,D) | |
181 | #if __ARM_ARCH__<7 | |
182 | ldrb r10,[r1,#2] | |
183 | ldrb r9,[r1,#3] | |
184 | ldrb r11,[r1,#1] | |
185 | add r3,r8,r3,ror#2 @ E+=K_00_19 | |
186 | ldrb r12,[r1],#4 | |
187 | orr r9,r9,r10,lsl#8 | |
188 | eor r10,r6,r7 @ F_xx_xx | |
189 | orr r9,r9,r11,lsl#16 | |
190 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | |
191 | orr r9,r9,r12,lsl#24 | |
192 | #else | |
193 | ldr r9,[r1],#4 @ handles unaligned | |
194 | add r3,r8,r3,ror#2 @ E+=K_00_19 | |
195 | eor r10,r6,r7 @ F_xx_xx | |
196 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | |
197 | #ifdef __ARMEL__ | |
198 | rev r9,r9 @ byte swap | |
199 | #endif | |
200 | #endif | |
201 | and r10,r5,r10,ror#2 | |
202 | add r3,r3,r9 @ E+=X[i] | |
203 | eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) | |
204 | str r9,[r14,#-4]! | |
205 | add r3,r3,r10 @ E+=F_00_19(B,C,D) | |
638591cd | 206 | cmp r14,sp |
f0be44f4 | 207 | bne .L_00_15 @ [((11+4)*5+2)*3] |
934fc24d | 208 | sub sp,sp,#25*4 |
f0be44f4 DM |
209 | #if __ARM_ARCH__<7 |
210 | ldrb r10,[r1,#2] | |
211 | ldrb r9,[r1,#3] | |
212 | ldrb r11,[r1,#1] | |
213 | add r7,r8,r7,ror#2 @ E+=K_00_19 | |
214 | ldrb r12,[r1],#4 | |
215 | orr r9,r9,r10,lsl#8 | |
216 | eor r10,r5,r6 @ F_xx_xx | |
217 | orr r9,r9,r11,lsl#16 | |
218 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | |
219 | orr r9,r9,r12,lsl#24 | |
220 | #else | |
221 | ldr r9,[r1],#4 @ handles unaligned | |
222 | add r7,r8,r7,ror#2 @ E+=K_00_19 | |
223 | eor r10,r5,r6 @ F_xx_xx | |
224 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | |
225 | #ifdef __ARMEL__ | |
226 | rev r9,r9 @ byte swap | |
227 | #endif | |
228 | #endif | |
229 | and r10,r4,r10,ror#2 | |
230 | add r7,r7,r9 @ E+=X[i] | |
231 | eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) | |
232 | str r9,[r14,#-4]! | |
233 | add r7,r7,r10 @ E+=F_00_19(B,C,D) | |
234 | ldr r9,[r14,#15*4] | |
235 | ldr r10,[r14,#13*4] | |
236 | ldr r11,[r14,#7*4] | |
237 | add r6,r8,r6,ror#2 @ E+=K_xx_xx | |
238 | ldr r12,[r14,#2*4] | |
239 | eor r9,r9,r10 | |
240 | eor r11,r11,r12 @ 1 cycle stall | |
241 | eor r10,r4,r5 @ F_xx_xx | |
242 | mov r9,r9,ror#31 | |
243 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | |
244 | eor r9,r9,r11,ror#31 | |
245 | str r9,[r14,#-4]! | |
246 | and r10,r3,r10,ror#2 @ F_xx_xx | |
247 | @ F_xx_xx | |
248 | add r6,r6,r9 @ E+=X[i] | |
249 | eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) | |
250 | add r6,r6,r10 @ E+=F_00_19(B,C,D) | |
251 | ldr r9,[r14,#15*4] | |
252 | ldr r10,[r14,#13*4] | |
253 | ldr r11,[r14,#7*4] | |
254 | add r5,r8,r5,ror#2 @ E+=K_xx_xx | |
255 | ldr r12,[r14,#2*4] | |
256 | eor r9,r9,r10 | |
257 | eor r11,r11,r12 @ 1 cycle stall | |
258 | eor r10,r3,r4 @ F_xx_xx | |
259 | mov r9,r9,ror#31 | |
260 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | |
261 | eor r9,r9,r11,ror#31 | |
262 | str r9,[r14,#-4]! | |
263 | and r10,r7,r10,ror#2 @ F_xx_xx | |
264 | @ F_xx_xx | |
265 | add r5,r5,r9 @ E+=X[i] | |
266 | eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) | |
267 | add r5,r5,r10 @ E+=F_00_19(B,C,D) | |
268 | ldr r9,[r14,#15*4] | |
269 | ldr r10,[r14,#13*4] | |
270 | ldr r11,[r14,#7*4] | |
271 | add r4,r8,r4,ror#2 @ E+=K_xx_xx | |
272 | ldr r12,[r14,#2*4] | |
273 | eor r9,r9,r10 | |
274 | eor r11,r11,r12 @ 1 cycle stall | |
275 | eor r10,r7,r3 @ F_xx_xx | |
276 | mov r9,r9,ror#31 | |
277 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | |
278 | eor r9,r9,r11,ror#31 | |
279 | str r9,[r14,#-4]! | |
280 | and r10,r6,r10,ror#2 @ F_xx_xx | |
281 | @ F_xx_xx | |
282 | add r4,r4,r9 @ E+=X[i] | |
283 | eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) | |
284 | add r4,r4,r10 @ E+=F_00_19(B,C,D) | |
285 | ldr r9,[r14,#15*4] | |
286 | ldr r10,[r14,#13*4] | |
287 | ldr r11,[r14,#7*4] | |
288 | add r3,r8,r3,ror#2 @ E+=K_xx_xx | |
289 | ldr r12,[r14,#2*4] | |
290 | eor r9,r9,r10 | |
291 | eor r11,r11,r12 @ 1 cycle stall | |
292 | eor r10,r6,r7 @ F_xx_xx | |
293 | mov r9,r9,ror#31 | |
294 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | |
295 | eor r9,r9,r11,ror#31 | |
296 | str r9,[r14,#-4]! | |
297 | and r10,r5,r10,ror#2 @ F_xx_xx | |
298 | @ F_xx_xx | |
299 | add r3,r3,r9 @ E+=X[i] | |
300 | eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) | |
301 | add r3,r3,r10 @ E+=F_00_19(B,C,D) | |
302 | ||
303 | ldr r8,.LK_20_39 @ [+15+16*4] | |
f0be44f4 DM |
304 | cmn sp,#0 @ [+3], clear carry to denote 20_39 |
305 | .L_20_39_or_60_79: | |
306 | ldr r9,[r14,#15*4] | |
307 | ldr r10,[r14,#13*4] | |
308 | ldr r11,[r14,#7*4] | |
309 | add r7,r8,r7,ror#2 @ E+=K_xx_xx | |
310 | ldr r12,[r14,#2*4] | |
311 | eor r9,r9,r10 | |
312 | eor r11,r11,r12 @ 1 cycle stall | |
313 | eor r10,r5,r6 @ F_xx_xx | |
314 | mov r9,r9,ror#31 | |
315 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | |
316 | eor r9,r9,r11,ror#31 | |
317 | str r9,[r14,#-4]! | |
318 | eor r10,r4,r10,ror#2 @ F_xx_xx | |
319 | @ F_xx_xx | |
320 | add r7,r7,r9 @ E+=X[i] | |
321 | add r7,r7,r10 @ E+=F_20_39(B,C,D) | |
322 | ldr r9,[r14,#15*4] | |
323 | ldr r10,[r14,#13*4] | |
324 | ldr r11,[r14,#7*4] | |
325 | add r6,r8,r6,ror#2 @ E+=K_xx_xx | |
326 | ldr r12,[r14,#2*4] | |
327 | eor r9,r9,r10 | |
328 | eor r11,r11,r12 @ 1 cycle stall | |
329 | eor r10,r4,r5 @ F_xx_xx | |
330 | mov r9,r9,ror#31 | |
331 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | |
332 | eor r9,r9,r11,ror#31 | |
333 | str r9,[r14,#-4]! | |
334 | eor r10,r3,r10,ror#2 @ F_xx_xx | |
335 | @ F_xx_xx | |
336 | add r6,r6,r9 @ E+=X[i] | |
337 | add r6,r6,r10 @ E+=F_20_39(B,C,D) | |
338 | ldr r9,[r14,#15*4] | |
339 | ldr r10,[r14,#13*4] | |
340 | ldr r11,[r14,#7*4] | |
341 | add r5,r8,r5,ror#2 @ E+=K_xx_xx | |
342 | ldr r12,[r14,#2*4] | |
343 | eor r9,r9,r10 | |
344 | eor r11,r11,r12 @ 1 cycle stall | |
345 | eor r10,r3,r4 @ F_xx_xx | |
346 | mov r9,r9,ror#31 | |
347 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | |
348 | eor r9,r9,r11,ror#31 | |
349 | str r9,[r14,#-4]! | |
350 | eor r10,r7,r10,ror#2 @ F_xx_xx | |
351 | @ F_xx_xx | |
352 | add r5,r5,r9 @ E+=X[i] | |
353 | add r5,r5,r10 @ E+=F_20_39(B,C,D) | |
354 | ldr r9,[r14,#15*4] | |
355 | ldr r10,[r14,#13*4] | |
356 | ldr r11,[r14,#7*4] | |
357 | add r4,r8,r4,ror#2 @ E+=K_xx_xx | |
358 | ldr r12,[r14,#2*4] | |
359 | eor r9,r9,r10 | |
360 | eor r11,r11,r12 @ 1 cycle stall | |
361 | eor r10,r7,r3 @ F_xx_xx | |
362 | mov r9,r9,ror#31 | |
363 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | |
364 | eor r9,r9,r11,ror#31 | |
365 | str r9,[r14,#-4]! | |
366 | eor r10,r6,r10,ror#2 @ F_xx_xx | |
367 | @ F_xx_xx | |
368 | add r4,r4,r9 @ E+=X[i] | |
369 | add r4,r4,r10 @ E+=F_20_39(B,C,D) | |
370 | ldr r9,[r14,#15*4] | |
371 | ldr r10,[r14,#13*4] | |
372 | ldr r11,[r14,#7*4] | |
373 | add r3,r8,r3,ror#2 @ E+=K_xx_xx | |
374 | ldr r12,[r14,#2*4] | |
375 | eor r9,r9,r10 | |
376 | eor r11,r11,r12 @ 1 cycle stall | |
377 | eor r10,r6,r7 @ F_xx_xx | |
378 | mov r9,r9,ror#31 | |
379 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | |
380 | eor r9,r9,r11,ror#31 | |
381 | str r9,[r14,#-4]! | |
382 | eor r10,r5,r10,ror#2 @ F_xx_xx | |
383 | @ F_xx_xx | |
384 | add r3,r3,r9 @ E+=X[i] | |
385 | add r3,r3,r10 @ E+=F_20_39(B,C,D) | |
638591cd DM |
386 | ARM( teq r14,sp ) @ preserve carry |
387 | THUMB( mov r11,sp ) | |
388 | THUMB( teq r14,r11 ) @ preserve carry | |
f0be44f4 DM |
389 | bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] |
390 | bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes | |
391 | ||
392 | ldr r8,.LK_40_59 | |
393 | sub sp,sp,#20*4 @ [+2] | |
394 | .L_40_59: | |
395 | ldr r9,[r14,#15*4] | |
396 | ldr r10,[r14,#13*4] | |
397 | ldr r11,[r14,#7*4] | |
398 | add r7,r8,r7,ror#2 @ E+=K_xx_xx | |
399 | ldr r12,[r14,#2*4] | |
400 | eor r9,r9,r10 | |
401 | eor r11,r11,r12 @ 1 cycle stall | |
402 | eor r10,r5,r6 @ F_xx_xx | |
403 | mov r9,r9,ror#31 | |
404 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | |
405 | eor r9,r9,r11,ror#31 | |
406 | str r9,[r14,#-4]! | |
407 | and r10,r4,r10,ror#2 @ F_xx_xx | |
408 | and r11,r5,r6 @ F_xx_xx | |
409 | add r7,r7,r9 @ E+=X[i] | |
410 | add r7,r7,r10 @ E+=F_40_59(B,C,D) | |
411 | add r7,r7,r11,ror#2 | |
412 | ldr r9,[r14,#15*4] | |
413 | ldr r10,[r14,#13*4] | |
414 | ldr r11,[r14,#7*4] | |
415 | add r6,r8,r6,ror#2 @ E+=K_xx_xx | |
416 | ldr r12,[r14,#2*4] | |
417 | eor r9,r9,r10 | |
418 | eor r11,r11,r12 @ 1 cycle stall | |
419 | eor r10,r4,r5 @ F_xx_xx | |
420 | mov r9,r9,ror#31 | |
421 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | |
422 | eor r9,r9,r11,ror#31 | |
423 | str r9,[r14,#-4]! | |
424 | and r10,r3,r10,ror#2 @ F_xx_xx | |
425 | and r11,r4,r5 @ F_xx_xx | |
426 | add r6,r6,r9 @ E+=X[i] | |
427 | add r6,r6,r10 @ E+=F_40_59(B,C,D) | |
428 | add r6,r6,r11,ror#2 | |
429 | ldr r9,[r14,#15*4] | |
430 | ldr r10,[r14,#13*4] | |
431 | ldr r11,[r14,#7*4] | |
432 | add r5,r8,r5,ror#2 @ E+=K_xx_xx | |
433 | ldr r12,[r14,#2*4] | |
434 | eor r9,r9,r10 | |
435 | eor r11,r11,r12 @ 1 cycle stall | |
436 | eor r10,r3,r4 @ F_xx_xx | |
437 | mov r9,r9,ror#31 | |
438 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | |
439 | eor r9,r9,r11,ror#31 | |
440 | str r9,[r14,#-4]! | |
441 | and r10,r7,r10,ror#2 @ F_xx_xx | |
442 | and r11,r3,r4 @ F_xx_xx | |
443 | add r5,r5,r9 @ E+=X[i] | |
444 | add r5,r5,r10 @ E+=F_40_59(B,C,D) | |
445 | add r5,r5,r11,ror#2 | |
446 | ldr r9,[r14,#15*4] | |
447 | ldr r10,[r14,#13*4] | |
448 | ldr r11,[r14,#7*4] | |
449 | add r4,r8,r4,ror#2 @ E+=K_xx_xx | |
450 | ldr r12,[r14,#2*4] | |
451 | eor r9,r9,r10 | |
452 | eor r11,r11,r12 @ 1 cycle stall | |
453 | eor r10,r7,r3 @ F_xx_xx | |
454 | mov r9,r9,ror#31 | |
455 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | |
456 | eor r9,r9,r11,ror#31 | |
457 | str r9,[r14,#-4]! | |
458 | and r10,r6,r10,ror#2 @ F_xx_xx | |
459 | and r11,r7,r3 @ F_xx_xx | |
460 | add r4,r4,r9 @ E+=X[i] | |
461 | add r4,r4,r10 @ E+=F_40_59(B,C,D) | |
462 | add r4,r4,r11,ror#2 | |
463 | ldr r9,[r14,#15*4] | |
464 | ldr r10,[r14,#13*4] | |
465 | ldr r11,[r14,#7*4] | |
466 | add r3,r8,r3,ror#2 @ E+=K_xx_xx | |
467 | ldr r12,[r14,#2*4] | |
468 | eor r9,r9,r10 | |
469 | eor r11,r11,r12 @ 1 cycle stall | |
470 | eor r10,r6,r7 @ F_xx_xx | |
471 | mov r9,r9,ror#31 | |
472 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | |
473 | eor r9,r9,r11,ror#31 | |
474 | str r9,[r14,#-4]! | |
475 | and r10,r5,r10,ror#2 @ F_xx_xx | |
476 | and r11,r6,r7 @ F_xx_xx | |
477 | add r3,r3,r9 @ E+=X[i] | |
478 | add r3,r3,r10 @ E+=F_40_59(B,C,D) | |
479 | add r3,r3,r11,ror#2 | |
638591cd | 480 | cmp r14,sp |
f0be44f4 DM |
481 | bne .L_40_59 @ [+((12+5)*5+2)*4] |
482 | ||
483 | ldr r8,.LK_60_79 | |
484 | sub sp,sp,#20*4 | |
485 | cmp sp,#0 @ set carry to denote 60_79 | |
486 | b .L_20_39_or_60_79 @ [+4], spare 300 bytes | |
487 | .L_done: | |
488 | add sp,sp,#80*4 @ "deallocate" stack frame | |
489 | ldmia r0,{r8,r9,r10,r11,r12} | |
490 | add r3,r8,r3 | |
491 | add r4,r9,r4 | |
492 | add r5,r10,r5,ror#2 | |
493 | add r6,r11,r6,ror#2 | |
494 | add r7,r12,r7,ror#2 | |
495 | stmia r0,{r3,r4,r5,r6,r7} | |
496 | teq r1,r2 | |
497 | bne .Lloop @ [+18], total 1307 | |
498 | ||
f0be44f4 | 499 | ldmia sp!,{r4-r12,pc} |
f0be44f4 DM |
500 | .align 2 |
501 | .LK_00_19: .word 0x5a827999 | |
502 | .LK_20_39: .word 0x6ed9eba1 | |
503 | .LK_40_59: .word 0x8f1bbcdc | |
504 | .LK_60_79: .word 0xca62c1d6 | |
638591cd | 505 | ENDPROC(sha1_block_data_order) |
f0be44f4 DM |
506 | .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" |
507 | .align 2 |