Merge tag 'linux-kselftest-5.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / arch / arm64 / crypto / aes-neon.S
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
4  *
5  * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
6  */
7
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
10
11 #define AES_ENTRY(func)         ENTRY(neon_ ## func)
12 #define AES_ENDPROC(func)       ENDPROC(neon_ ## func)
13
14         xtsmask         .req    v7
15         cbciv           .req    v7
16         vctr            .req    v4
17
18         .macro          xts_reload_mask, tmp
19         xts_load_mask   \tmp
20         .endm
21
22         /* multiply by polynomial 'x' in GF(2^8) */
23         .macro          mul_by_x, out, in, temp, const
24         sshr            \temp, \in, #7
25         shl             \out, \in, #1
26         and             \temp, \temp, \const
27         eor             \out, \out, \temp
28         .endm
29
30         /* multiply by polynomial 'x^2' in GF(2^8) */
31         .macro          mul_by_x2, out, in, temp, const
32         ushr            \temp, \in, #6
33         shl             \out, \in, #2
34         pmul            \temp, \temp, \const
35         eor             \out, \out, \temp
36         .endm
37
38         /* preload the entire Sbox */
39         .macro          prepare, sbox, shiftrows, temp
40         movi            v12.16b, #0x1b
41         ldr_l           q13, \shiftrows, \temp
42         ldr_l           q14, .Lror32by8, \temp
43         adr_l           \temp, \sbox
44         ld1             {v16.16b-v19.16b}, [\temp], #64
45         ld1             {v20.16b-v23.16b}, [\temp], #64
46         ld1             {v24.16b-v27.16b}, [\temp], #64
47         ld1             {v28.16b-v31.16b}, [\temp]
48         .endm
49
50         /* do preload for encryption */
51         .macro          enc_prepare, ignore0, ignore1, temp
52         prepare         .LForward_Sbox, .LForward_ShiftRows, \temp
53         .endm
54
55         .macro          enc_switch_key, ignore0, ignore1, temp
56         /* do nothing */
57         .endm
58
59         /* do preload for decryption */
60         .macro          dec_prepare, ignore0, ignore1, temp
61         prepare         .LReverse_Sbox, .LReverse_ShiftRows, \temp
62         .endm
63
64         /* apply SubBytes transformation using the the preloaded Sbox */
65         .macro          sub_bytes, in
66         sub             v9.16b, \in\().16b, v15.16b
67         tbl             \in\().16b, {v16.16b-v19.16b}, \in\().16b
68         sub             v10.16b, v9.16b, v15.16b
69         tbx             \in\().16b, {v20.16b-v23.16b}, v9.16b
70         sub             v11.16b, v10.16b, v15.16b
71         tbx             \in\().16b, {v24.16b-v27.16b}, v10.16b
72         tbx             \in\().16b, {v28.16b-v31.16b}, v11.16b
73         .endm
74
75         /* apply MixColumns transformation */
76         .macro          mix_columns, in, enc
77         .if             \enc == 0
78         /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
79         mul_by_x2       v8.16b, \in\().16b, v9.16b, v12.16b
80         eor             \in\().16b, \in\().16b, v8.16b
81         rev32           v8.8h, v8.8h
82         eor             \in\().16b, \in\().16b, v8.16b
83         .endif
84
85         mul_by_x        v9.16b, \in\().16b, v8.16b, v12.16b
86         rev32           v8.8h, \in\().8h
87         eor             v8.16b, v8.16b, v9.16b
88         eor             \in\().16b, \in\().16b, v8.16b
89         tbl             \in\().16b, {\in\().16b}, v14.16b
90         eor             \in\().16b, \in\().16b, v8.16b
91         .endm
92
93         .macro          do_block, enc, in, rounds, rk, rkp, i
94         ld1             {v15.4s}, [\rk]
95         add             \rkp, \rk, #16
96         mov             \i, \rounds
97 1111:   eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
98         movi            v15.16b, #0x40
99         tbl             \in\().16b, {\in\().16b}, v13.16b       /* ShiftRows */
100         sub_bytes       \in
101         subs            \i, \i, #1
102         ld1             {v15.4s}, [\rkp], #16
103         beq             2222f
104         mix_columns     \in, \enc
105         b               1111b
106 2222:   eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
107         .endm
108
109         .macro          encrypt_block, in, rounds, rk, rkp, i
110         do_block        1, \in, \rounds, \rk, \rkp, \i
111         .endm
112
113         .macro          decrypt_block, in, rounds, rk, rkp, i
114         do_block        0, \in, \rounds, \rk, \rkp, \i
115         .endm
116
117         /*
118          * Interleaved versions: functionally equivalent to the
119          * ones above, but applied to AES states in parallel.
120          */
121
122         .macro          sub_bytes_4x, in0, in1, in2, in3
123         sub             v8.16b, \in0\().16b, v15.16b
124         tbl             \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
125         sub             v9.16b, \in1\().16b, v15.16b
126         tbl             \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
127         sub             v10.16b, \in2\().16b, v15.16b
128         tbl             \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
129         sub             v11.16b, \in3\().16b, v15.16b
130         tbl             \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
131         tbx             \in0\().16b, {v20.16b-v23.16b}, v8.16b
132         tbx             \in1\().16b, {v20.16b-v23.16b}, v9.16b
133         sub             v8.16b, v8.16b, v15.16b
134         tbx             \in2\().16b, {v20.16b-v23.16b}, v10.16b
135         sub             v9.16b, v9.16b, v15.16b
136         tbx             \in3\().16b, {v20.16b-v23.16b}, v11.16b
137         sub             v10.16b, v10.16b, v15.16b
138         tbx             \in0\().16b, {v24.16b-v27.16b}, v8.16b
139         sub             v11.16b, v11.16b, v15.16b
140         tbx             \in1\().16b, {v24.16b-v27.16b}, v9.16b
141         sub             v8.16b, v8.16b, v15.16b
142         tbx             \in2\().16b, {v24.16b-v27.16b}, v10.16b
143         sub             v9.16b, v9.16b, v15.16b
144         tbx             \in3\().16b, {v24.16b-v27.16b}, v11.16b
145         sub             v10.16b, v10.16b, v15.16b
146         tbx             \in0\().16b, {v28.16b-v31.16b}, v8.16b
147         sub             v11.16b, v11.16b, v15.16b
148         tbx             \in1\().16b, {v28.16b-v31.16b}, v9.16b
149         tbx             \in2\().16b, {v28.16b-v31.16b}, v10.16b
150         tbx             \in3\().16b, {v28.16b-v31.16b}, v11.16b
151         .endm
152
153         .macro          mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
154         sshr            \tmp0\().16b, \in0\().16b, #7
155         shl             \out0\().16b, \in0\().16b, #1
156         sshr            \tmp1\().16b, \in1\().16b, #7
157         and             \tmp0\().16b, \tmp0\().16b, \const\().16b
158         shl             \out1\().16b, \in1\().16b, #1
159         and             \tmp1\().16b, \tmp1\().16b, \const\().16b
160         eor             \out0\().16b, \out0\().16b, \tmp0\().16b
161         eor             \out1\().16b, \out1\().16b, \tmp1\().16b
162         .endm
163
164         .macro          mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
165         ushr            \tmp0\().16b, \in0\().16b, #6
166         shl             \out0\().16b, \in0\().16b, #2
167         ushr            \tmp1\().16b, \in1\().16b, #6
168         pmul            \tmp0\().16b, \tmp0\().16b, \const\().16b
169         shl             \out1\().16b, \in1\().16b, #2
170         pmul            \tmp1\().16b, \tmp1\().16b, \const\().16b
171         eor             \out0\().16b, \out0\().16b, \tmp0\().16b
172         eor             \out1\().16b, \out1\().16b, \tmp1\().16b
173         .endm
174
175         .macro          mix_columns_2x, in0, in1, enc
176         .if             \enc == 0
177         /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
178         mul_by_x2_2x    v8, v9, \in0, \in1, v10, v11, v12
179         eor             \in0\().16b, \in0\().16b, v8.16b
180         rev32           v8.8h, v8.8h
181         eor             \in1\().16b, \in1\().16b, v9.16b
182         rev32           v9.8h, v9.8h
183         eor             \in0\().16b, \in0\().16b, v8.16b
184         eor             \in1\().16b, \in1\().16b, v9.16b
185         .endif
186
187         mul_by_x_2x     v8, v9, \in0, \in1, v10, v11, v12
188         rev32           v10.8h, \in0\().8h
189         rev32           v11.8h, \in1\().8h
190         eor             v10.16b, v10.16b, v8.16b
191         eor             v11.16b, v11.16b, v9.16b
192         eor             \in0\().16b, \in0\().16b, v10.16b
193         eor             \in1\().16b, \in1\().16b, v11.16b
194         tbl             \in0\().16b, {\in0\().16b}, v14.16b
195         tbl             \in1\().16b, {\in1\().16b}, v14.16b
196         eor             \in0\().16b, \in0\().16b, v10.16b
197         eor             \in1\().16b, \in1\().16b, v11.16b
198         .endm
199
200         .macro          do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
201         ld1             {v15.4s}, [\rk]
202         add             \rkp, \rk, #16
203         mov             \i, \rounds
204 1111:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
205         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
206         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
207         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
208         movi            v15.16b, #0x40
209         tbl             \in0\().16b, {\in0\().16b}, v13.16b     /* ShiftRows */
210         tbl             \in1\().16b, {\in1\().16b}, v13.16b     /* ShiftRows */
211         tbl             \in2\().16b, {\in2\().16b}, v13.16b     /* ShiftRows */
212         tbl             \in3\().16b, {\in3\().16b}, v13.16b     /* ShiftRows */
213         sub_bytes_4x    \in0, \in1, \in2, \in3
214         subs            \i, \i, #1
215         ld1             {v15.4s}, [\rkp], #16
216         beq             2222f
217         mix_columns_2x  \in0, \in1, \enc
218         mix_columns_2x  \in2, \in3, \enc
219         b               1111b
220 2222:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
221         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
222         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
223         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
224         .endm
225
226         .macro          encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
227         do_block_4x     1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
228         .endm
229
230         .macro          decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
231         do_block_4x     0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
232         .endm
233
234 #include "aes-modes.S"
235
236         .section        ".rodata", "a"
237         .align          6
238 .LForward_Sbox:
239         .byte           0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
240         .byte           0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
241         .byte           0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
242         .byte           0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
243         .byte           0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
244         .byte           0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
245         .byte           0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
246         .byte           0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
247         .byte           0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
248         .byte           0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
249         .byte           0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
250         .byte           0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
251         .byte           0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
252         .byte           0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
253         .byte           0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
254         .byte           0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
255         .byte           0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
256         .byte           0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
257         .byte           0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
258         .byte           0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
259         .byte           0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
260         .byte           0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
261         .byte           0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
262         .byte           0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
263         .byte           0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
264         .byte           0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
265         .byte           0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
266         .byte           0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
267         .byte           0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
268         .byte           0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
269         .byte           0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
270         .byte           0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
271
272 .LReverse_Sbox:
273         .byte           0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
274         .byte           0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
275         .byte           0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
276         .byte           0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
277         .byte           0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
278         .byte           0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
279         .byte           0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
280         .byte           0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
281         .byte           0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
282         .byte           0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
283         .byte           0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
284         .byte           0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
285         .byte           0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
286         .byte           0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
287         .byte           0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
288         .byte           0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
289         .byte           0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
290         .byte           0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
291         .byte           0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
292         .byte           0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
293         .byte           0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
294         .byte           0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
295         .byte           0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
296         .byte           0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
297         .byte           0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
298         .byte           0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
299         .byte           0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
300         .byte           0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
301         .byte           0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
302         .byte           0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
303         .byte           0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
304         .byte           0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
305
306 .LForward_ShiftRows:
307         .octa           0x0b06010c07020d08030e09040f0a0500
308
309 .LReverse_ShiftRows:
310         .octa           0x0306090c0f0205080b0e0104070a0d00
311
312 .Lror32by8:
313         .octa           0x0c0f0e0d080b0a090407060500030201