2 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
4 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
14 #define AES_ENTRY(func) ENTRY(neon_ ## func)
15 #define AES_ENDPROC(func) ENDPROC(neon_ ## func)
19 .macro xts_reload_mask, tmp
23 /* multiply by polynomial 'x' in GF(2^8) */
24 .macro mul_by_x, out, in, temp, const
27 and \temp, \temp, \const
31 /* multiply by polynomial 'x^2' in GF(2^8) */
32 .macro mul_by_x2, out, in, temp, const
35 pmul \temp, \temp, \const
39 /* preload the entire Sbox */
40 .macro prepare, sbox, shiftrows, temp
42 ldr_l q13, \shiftrows, \temp
43 ldr_l q14, .Lror32by8, \temp
45 ld1 {v16.16b-v19.16b}, [\temp], #64
46 ld1 {v20.16b-v23.16b}, [\temp], #64
47 ld1 {v24.16b-v27.16b}, [\temp], #64
48 ld1 {v28.16b-v31.16b}, [\temp]
51 /* do preload for encryption */
52 .macro enc_prepare, ignore0, ignore1, temp
53 prepare .LForward_Sbox, .LForward_ShiftRows, \temp
56 .macro enc_switch_key, ignore0, ignore1, temp
60 /* do preload for decryption */
61 .macro dec_prepare, ignore0, ignore1, temp
62 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
65 /* apply SubBytes transformation using the the preloaded Sbox */
67 sub v9.16b, \in\().16b, v15.16b
68 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
69 sub v10.16b, v9.16b, v15.16b
70 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
71 sub v11.16b, v10.16b, v15.16b
72 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
73 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
76 /* apply MixColumns transformation */
77 .macro mix_columns, in, enc
79 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
80 mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
81 eor \in\().16b, \in\().16b, v8.16b
83 eor \in\().16b, \in\().16b, v8.16b
86 mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
87 rev32 v8.8h, \in\().8h
88 eor v8.16b, v8.16b, v9.16b
89 eor \in\().16b, \in\().16b, v8.16b
90 tbl \in\().16b, {\in\().16b}, v14.16b
91 eor \in\().16b, \in\().16b, v8.16b
94 .macro do_block, enc, in, rounds, rk, rkp, i
98 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
100 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
103 ld1 {v15.4s}, [\rkp], #16
105 mix_columns \in, \enc
107 2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
110 .macro encrypt_block, in, rounds, rk, rkp, i
111 do_block 1, \in, \rounds, \rk, \rkp, \i
114 .macro decrypt_block, in, rounds, rk, rkp, i
115 do_block 0, \in, \rounds, \rk, \rkp, \i
119 * Interleaved versions: functionally equivalent to the
120 * ones above, but applied to 2 or 4 AES states in parallel.
123 .macro sub_bytes_2x, in0, in1
124 sub v8.16b, \in0\().16b, v15.16b
125 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
126 sub v9.16b, \in1\().16b, v15.16b
127 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
128 sub v10.16b, v8.16b, v15.16b
129 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
130 sub v11.16b, v9.16b, v15.16b
131 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
132 sub v8.16b, v10.16b, v15.16b
133 tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
134 sub v9.16b, v11.16b, v15.16b
135 tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
136 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
137 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
140 .macro sub_bytes_4x, in0, in1, in2, in3
141 sub v8.16b, \in0\().16b, v15.16b
142 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
143 sub v9.16b, \in1\().16b, v15.16b
144 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
145 sub v10.16b, \in2\().16b, v15.16b
146 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
147 sub v11.16b, \in3\().16b, v15.16b
148 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
149 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
150 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
151 sub v8.16b, v8.16b, v15.16b
152 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
153 sub v9.16b, v9.16b, v15.16b
154 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
155 sub v10.16b, v10.16b, v15.16b
156 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
157 sub v11.16b, v11.16b, v15.16b
158 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
159 sub v8.16b, v8.16b, v15.16b
160 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
161 sub v9.16b, v9.16b, v15.16b
162 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
163 sub v10.16b, v10.16b, v15.16b
164 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
165 sub v11.16b, v11.16b, v15.16b
166 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
167 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
168 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
171 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
172 sshr \tmp0\().16b, \in0\().16b, #7
173 shl \out0\().16b, \in0\().16b, #1
174 sshr \tmp1\().16b, \in1\().16b, #7
175 and \tmp0\().16b, \tmp0\().16b, \const\().16b
176 shl \out1\().16b, \in1\().16b, #1
177 and \tmp1\().16b, \tmp1\().16b, \const\().16b
178 eor \out0\().16b, \out0\().16b, \tmp0\().16b
179 eor \out1\().16b, \out1\().16b, \tmp1\().16b
182 .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
183 ushr \tmp0\().16b, \in0\().16b, #6
184 shl \out0\().16b, \in0\().16b, #2
185 ushr \tmp1\().16b, \in1\().16b, #6
186 pmul \tmp0\().16b, \tmp0\().16b, \const\().16b
187 shl \out1\().16b, \in1\().16b, #2
188 pmul \tmp1\().16b, \tmp1\().16b, \const\().16b
189 eor \out0\().16b, \out0\().16b, \tmp0\().16b
190 eor \out1\().16b, \out1\().16b, \tmp1\().16b
193 .macro mix_columns_2x, in0, in1, enc
195 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
196 mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
197 eor \in0\().16b, \in0\().16b, v8.16b
199 eor \in1\().16b, \in1\().16b, v9.16b
201 eor \in0\().16b, \in0\().16b, v8.16b
202 eor \in1\().16b, \in1\().16b, v9.16b
205 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
206 rev32 v10.8h, \in0\().8h
207 rev32 v11.8h, \in1\().8h
208 eor v10.16b, v10.16b, v8.16b
209 eor v11.16b, v11.16b, v9.16b
210 eor \in0\().16b, \in0\().16b, v10.16b
211 eor \in1\().16b, \in1\().16b, v11.16b
212 tbl \in0\().16b, {\in0\().16b}, v14.16b
213 tbl \in1\().16b, {\in1\().16b}, v14.16b
214 eor \in0\().16b, \in0\().16b, v10.16b
215 eor \in1\().16b, \in1\().16b, v11.16b
218 .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
222 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
223 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
225 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
226 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
227 sub_bytes_2x \in0, \in1
229 ld1 {v15.4s}, [\rkp], #16
231 mix_columns_2x \in0, \in1, \enc
233 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
234 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
237 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
241 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
242 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
243 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
244 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
246 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
247 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
248 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
249 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
250 sub_bytes_4x \in0, \in1, \in2, \in3
252 ld1 {v15.4s}, [\rkp], #16
254 mix_columns_2x \in0, \in1, \enc
255 mix_columns_2x \in2, \in3, \enc
257 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
258 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
259 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
260 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
263 .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
264 do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
267 .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
268 do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
271 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
272 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
275 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
276 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
279 #include "aes-modes.S"
281 .section ".rodata", "a"
284 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
285 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
286 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
287 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
288 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
289 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
290 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
291 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
292 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
293 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
294 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
295 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
296 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
297 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
298 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
299 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
300 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
301 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
302 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
303 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
304 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
305 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
306 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
307 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
308 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
309 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
310 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
311 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
312 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
313 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
314 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
315 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
318 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
319 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
320 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
321 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
322 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
323 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
324 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
325 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
326 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
327 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
328 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
329 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
330 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
331 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
332 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
333 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
334 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
335 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
336 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
337 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
338 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
339 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
340 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
341 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
342 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
343 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
344 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
345 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
346 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
347 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
348 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
349 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
352 .octa 0x0b06010c07020d08030e09040f0a0500
355 .octa 0x0306090c0f0205080b0e0104070a0d00
358 .octa 0x0c0f0e0d080b0a090407060500030201