1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
5 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
11 #define AES_ENTRY(func) ENTRY(neon_ ## func)
12 #define AES_ENDPROC(func) ENDPROC(neon_ ## func)
18 .macro xts_reload_mask, tmp
22 /* multiply by polynomial 'x' in GF(2^8) */
23 .macro mul_by_x, out, in, temp, const
26 and \temp, \temp, \const
30 /* multiply by polynomial 'x^2' in GF(2^8) */
31 .macro mul_by_x2, out, in, temp, const
34 pmul \temp, \temp, \const
38 /* preload the entire Sbox */
39 .macro prepare, sbox, shiftrows, temp
41 ldr_l q13, \shiftrows, \temp
42 ldr_l q14, .Lror32by8, \temp
44 ld1 {v16.16b-v19.16b}, [\temp], #64
45 ld1 {v20.16b-v23.16b}, [\temp], #64
46 ld1 {v24.16b-v27.16b}, [\temp], #64
47 ld1 {v28.16b-v31.16b}, [\temp]
50 /* do preload for encryption */
51 .macro enc_prepare, ignore0, ignore1, temp
52 prepare .LForward_Sbox, .LForward_ShiftRows, \temp
55 .macro enc_switch_key, ignore0, ignore1, temp
59 /* do preload for decryption */
60 .macro dec_prepare, ignore0, ignore1, temp
61 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
64 /* apply SubBytes transformation using the the preloaded Sbox */
66 sub v9.16b, \in\().16b, v15.16b
67 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
68 sub v10.16b, v9.16b, v15.16b
69 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
70 sub v11.16b, v10.16b, v15.16b
71 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
72 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
75 /* apply MixColumns transformation */
76 .macro mix_columns, in, enc
78 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
79 mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
80 eor \in\().16b, \in\().16b, v8.16b
82 eor \in\().16b, \in\().16b, v8.16b
85 mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
86 rev32 v8.8h, \in\().8h
87 eor v8.16b, v8.16b, v9.16b
88 eor \in\().16b, \in\().16b, v8.16b
89 tbl \in\().16b, {\in\().16b}, v14.16b
90 eor \in\().16b, \in\().16b, v8.16b
93 .macro do_block, enc, in, rounds, rk, rkp, i
97 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
99 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
102 ld1 {v15.4s}, [\rkp], #16
104 mix_columns \in, \enc
106 2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
109 .macro encrypt_block, in, rounds, rk, rkp, i
110 do_block 1, \in, \rounds, \rk, \rkp, \i
113 .macro decrypt_block, in, rounds, rk, rkp, i
114 do_block 0, \in, \rounds, \rk, \rkp, \i
118 * Interleaved versions: functionally equivalent to the
119 * ones above, but applied to AES states in parallel.
122 .macro sub_bytes_4x, in0, in1, in2, in3
123 sub v8.16b, \in0\().16b, v15.16b
124 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
125 sub v9.16b, \in1\().16b, v15.16b
126 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
127 sub v10.16b, \in2\().16b, v15.16b
128 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
129 sub v11.16b, \in3\().16b, v15.16b
130 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
131 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
132 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
133 sub v8.16b, v8.16b, v15.16b
134 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
135 sub v9.16b, v9.16b, v15.16b
136 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
137 sub v10.16b, v10.16b, v15.16b
138 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
139 sub v11.16b, v11.16b, v15.16b
140 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
141 sub v8.16b, v8.16b, v15.16b
142 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
143 sub v9.16b, v9.16b, v15.16b
144 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
145 sub v10.16b, v10.16b, v15.16b
146 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
147 sub v11.16b, v11.16b, v15.16b
148 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
149 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
150 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
153 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
154 sshr \tmp0\().16b, \in0\().16b, #7
155 shl \out0\().16b, \in0\().16b, #1
156 sshr \tmp1\().16b, \in1\().16b, #7
157 and \tmp0\().16b, \tmp0\().16b, \const\().16b
158 shl \out1\().16b, \in1\().16b, #1
159 and \tmp1\().16b, \tmp1\().16b, \const\().16b
160 eor \out0\().16b, \out0\().16b, \tmp0\().16b
161 eor \out1\().16b, \out1\().16b, \tmp1\().16b
164 .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
165 ushr \tmp0\().16b, \in0\().16b, #6
166 shl \out0\().16b, \in0\().16b, #2
167 ushr \tmp1\().16b, \in1\().16b, #6
168 pmul \tmp0\().16b, \tmp0\().16b, \const\().16b
169 shl \out1\().16b, \in1\().16b, #2
170 pmul \tmp1\().16b, \tmp1\().16b, \const\().16b
171 eor \out0\().16b, \out0\().16b, \tmp0\().16b
172 eor \out1\().16b, \out1\().16b, \tmp1\().16b
175 .macro mix_columns_2x, in0, in1, enc
177 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
178 mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
179 eor \in0\().16b, \in0\().16b, v8.16b
181 eor \in1\().16b, \in1\().16b, v9.16b
183 eor \in0\().16b, \in0\().16b, v8.16b
184 eor \in1\().16b, \in1\().16b, v9.16b
187 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
188 rev32 v10.8h, \in0\().8h
189 rev32 v11.8h, \in1\().8h
190 eor v10.16b, v10.16b, v8.16b
191 eor v11.16b, v11.16b, v9.16b
192 eor \in0\().16b, \in0\().16b, v10.16b
193 eor \in1\().16b, \in1\().16b, v11.16b
194 tbl \in0\().16b, {\in0\().16b}, v14.16b
195 tbl \in1\().16b, {\in1\().16b}, v14.16b
196 eor \in0\().16b, \in0\().16b, v10.16b
197 eor \in1\().16b, \in1\().16b, v11.16b
200 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
204 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
205 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
206 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
207 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
209 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
210 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
211 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
212 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
213 sub_bytes_4x \in0, \in1, \in2, \in3
215 ld1 {v15.4s}, [\rkp], #16
217 mix_columns_2x \in0, \in1, \enc
218 mix_columns_2x \in2, \in3, \enc
220 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
221 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
222 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
223 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
226 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
227 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
230 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
231 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
234 #include "aes-modes.S"
236 .section ".rodata", "a"
239 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
240 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
241 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
242 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
243 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
244 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
245 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
246 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
247 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
248 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
249 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
250 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
251 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
252 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
253 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
254 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
255 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
256 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
257 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
258 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
259 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
260 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
261 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
262 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
263 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
264 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
265 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
266 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
267 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
268 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
269 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
270 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
273 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
274 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
275 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
276 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
277 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
278 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
279 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
280 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
281 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
282 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
283 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
284 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
285 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
286 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
287 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
288 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
289 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
290 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
291 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
292 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
293 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
294 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
295 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
296 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
297 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
298 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
299 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
300 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
301 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
302 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
303 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
304 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
307 .octa 0x0b06010c07020d08030e09040f0a0500
310 .octa 0x0306090c0f0205080b0e0104070a0d00
313 .octa 0x0c0f0e0d080b0a090407060500030201