caac519d62490d18104826d83e3f6a055bba3265
[sfrench/cifs-2.6.git] / arch / arm / crypto / aes-ce-core.S
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
4  *
5  * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
6  */
7
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
10
11         .text
12         .fpu            crypto-neon-fp-armv8
13         .align          3
14
15         .macro          enc_round, state, key
16         aese.8          \state, \key
17         aesmc.8         \state, \state
18         .endm
19
20         .macro          dec_round, state, key
21         aesd.8          \state, \key
22         aesimc.8        \state, \state
23         .endm
24
25         .macro          enc_dround, key1, key2
26         enc_round       q0, \key1
27         enc_round       q0, \key2
28         .endm
29
30         .macro          dec_dround, key1, key2
31         dec_round       q0, \key1
32         dec_round       q0, \key2
33         .endm
34
35         .macro          enc_fround, key1, key2, key3
36         enc_round       q0, \key1
37         aese.8          q0, \key2
38         veor            q0, q0, \key3
39         .endm
40
41         .macro          dec_fround, key1, key2, key3
42         dec_round       q0, \key1
43         aesd.8          q0, \key2
44         veor            q0, q0, \key3
45         .endm
46
47         .macro          enc_dround_3x, key1, key2
48         enc_round       q0, \key1
49         enc_round       q1, \key1
50         enc_round       q2, \key1
51         enc_round       q0, \key2
52         enc_round       q1, \key2
53         enc_round       q2, \key2
54         .endm
55
56         .macro          dec_dround_3x, key1, key2
57         dec_round       q0, \key1
58         dec_round       q1, \key1
59         dec_round       q2, \key1
60         dec_round       q0, \key2
61         dec_round       q1, \key2
62         dec_round       q2, \key2
63         .endm
64
65         .macro          enc_fround_3x, key1, key2, key3
66         enc_round       q0, \key1
67         enc_round       q1, \key1
68         enc_round       q2, \key1
69         aese.8          q0, \key2
70         aese.8          q1, \key2
71         aese.8          q2, \key2
72         veor            q0, q0, \key3
73         veor            q1, q1, \key3
74         veor            q2, q2, \key3
75         .endm
76
77         .macro          dec_fround_3x, key1, key2, key3
78         dec_round       q0, \key1
79         dec_round       q1, \key1
80         dec_round       q2, \key1
81         aesd.8          q0, \key2
82         aesd.8          q1, \key2
83         aesd.8          q2, \key2
84         veor            q0, q0, \key3
85         veor            q1, q1, \key3
86         veor            q2, q2, \key3
87         .endm
88
89         .macro          do_block, dround, fround
90         cmp             r3, #12                 @ which key size?
91         vld1.8          {q10-q11}, [ip]!
92         \dround         q8, q9
93         vld1.8          {q12-q13}, [ip]!
94         \dround         q10, q11
95         vld1.8          {q10-q11}, [ip]!
96         \dround         q12, q13
97         vld1.8          {q12-q13}, [ip]!
98         \dround         q10, q11
99         blo             0f                      @ AES-128: 10 rounds
100         vld1.8          {q10-q11}, [ip]!
101         \dround         q12, q13
102         beq             1f                      @ AES-192: 12 rounds
103         vld1.8          {q12-q13}, [ip]
104         \dround         q10, q11
105 0:      \fround         q12, q13, q14
106         bx              lr
107
108 1:      \fround         q10, q11, q14
109         bx              lr
110         .endm
111
112         /*
113          * Internal, non-AAPCS compliant functions that implement the core AES
114          * transforms. These should preserve all registers except q0 - q2 and ip
115          * Arguments:
116          *   q0        : first in/output block
117          *   q1        : second in/output block (_3x version only)
118          *   q2        : third in/output block (_3x version only)
119          *   q8        : first round key
120          *   q9        : secound round key
121          *   q14       : final round key
122          *   r2        : address of round key array
123          *   r3        : number of rounds
124          */
125         .align          6
126 aes_encrypt:
127         add             ip, r2, #32             @ 3rd round key
128 .Laes_encrypt_tweak:
129         do_block        enc_dround, enc_fround
130 ENDPROC(aes_encrypt)
131
132         .align          6
133 aes_decrypt:
134         add             ip, r2, #32             @ 3rd round key
135         do_block        dec_dround, dec_fround
136 ENDPROC(aes_decrypt)
137
138         .align          6
139 aes_encrypt_3x:
140         add             ip, r2, #32             @ 3rd round key
141         do_block        enc_dround_3x, enc_fround_3x
142 ENDPROC(aes_encrypt_3x)
143
144         .align          6
145 aes_decrypt_3x:
146         add             ip, r2, #32             @ 3rd round key
147         do_block        dec_dround_3x, dec_fround_3x
148 ENDPROC(aes_decrypt_3x)
149
150         .macro          prepare_key, rk, rounds
151         add             ip, \rk, \rounds, lsl #4
152         vld1.8          {q8-q9}, [\rk]          @ load first 2 round keys
153         vld1.8          {q14}, [ip]             @ load last round key
154         .endm
155
156         /*
157          * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
158          *                 int blocks)
159          * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
160          *                 int blocks)
161          */
162 ENTRY(ce_aes_ecb_encrypt)
163         push            {r4, lr}
164         ldr             r4, [sp, #8]
165         prepare_key     r2, r3
166 .Lecbencloop3x:
167         subs            r4, r4, #3
168         bmi             .Lecbenc1x
169         vld1.8          {q0-q1}, [r1]!
170         vld1.8          {q2}, [r1]!
171         bl              aes_encrypt_3x
172         vst1.8          {q0-q1}, [r0]!
173         vst1.8          {q2}, [r0]!
174         b               .Lecbencloop3x
175 .Lecbenc1x:
176         adds            r4, r4, #3
177         beq             .Lecbencout
178 .Lecbencloop:
179         vld1.8          {q0}, [r1]!
180         bl              aes_encrypt
181         vst1.8          {q0}, [r0]!
182         subs            r4, r4, #1
183         bne             .Lecbencloop
184 .Lecbencout:
185         pop             {r4, pc}
186 ENDPROC(ce_aes_ecb_encrypt)
187
188 ENTRY(ce_aes_ecb_decrypt)
189         push            {r4, lr}
190         ldr             r4, [sp, #8]
191         prepare_key     r2, r3
192 .Lecbdecloop3x:
193         subs            r4, r4, #3
194         bmi             .Lecbdec1x
195         vld1.8          {q0-q1}, [r1]!
196         vld1.8          {q2}, [r1]!
197         bl              aes_decrypt_3x
198         vst1.8          {q0-q1}, [r0]!
199         vst1.8          {q2}, [r0]!
200         b               .Lecbdecloop3x
201 .Lecbdec1x:
202         adds            r4, r4, #3
203         beq             .Lecbdecout
204 .Lecbdecloop:
205         vld1.8          {q0}, [r1]!
206         bl              aes_decrypt
207         vst1.8          {q0}, [r0]!
208         subs            r4, r4, #1
209         bne             .Lecbdecloop
210 .Lecbdecout:
211         pop             {r4, pc}
212 ENDPROC(ce_aes_ecb_decrypt)
213
214         /*
215          * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
216          *                 int blocks, u8 iv[])
217          * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
218          *                 int blocks, u8 iv[])
219          */
220 ENTRY(ce_aes_cbc_encrypt)
221         push            {r4-r6, lr}
222         ldrd            r4, r5, [sp, #16]
223         vld1.8          {q0}, [r5]
224         prepare_key     r2, r3
225 .Lcbcencloop:
226         vld1.8          {q1}, [r1]!             @ get next pt block
227         veor            q0, q0, q1              @ ..and xor with iv
228         bl              aes_encrypt
229         vst1.8          {q0}, [r0]!
230         subs            r4, r4, #1
231         bne             .Lcbcencloop
232         vst1.8          {q0}, [r5]
233         pop             {r4-r6, pc}
234 ENDPROC(ce_aes_cbc_encrypt)
235
236 ENTRY(ce_aes_cbc_decrypt)
237         push            {r4-r6, lr}
238         ldrd            r4, r5, [sp, #16]
239         vld1.8          {q6}, [r5]              @ keep iv in q6
240         prepare_key     r2, r3
241 .Lcbcdecloop3x:
242         subs            r4, r4, #3
243         bmi             .Lcbcdec1x
244         vld1.8          {q0-q1}, [r1]!
245         vld1.8          {q2}, [r1]!
246         vmov            q3, q0
247         vmov            q4, q1
248         vmov            q5, q2
249         bl              aes_decrypt_3x
250         veor            q0, q0, q6
251         veor            q1, q1, q3
252         veor            q2, q2, q4
253         vmov            q6, q5
254         vst1.8          {q0-q1}, [r0]!
255         vst1.8          {q2}, [r0]!
256         b               .Lcbcdecloop3x
257 .Lcbcdec1x:
258         adds            r4, r4, #3
259         beq             .Lcbcdecout
260         vmov            q15, q14                @ preserve last round key
261 .Lcbcdecloop:
262         vld1.8          {q0}, [r1]!             @ get next ct block
263         veor            q14, q15, q6            @ combine prev ct with last key
264         vmov            q6, q0
265         bl              aes_decrypt
266         vst1.8          {q0}, [r0]!
267         subs            r4, r4, #1
268         bne             .Lcbcdecloop
269 .Lcbcdecout:
270         vst1.8          {q6}, [r5]              @ keep iv in q6
271         pop             {r4-r6, pc}
272 ENDPROC(ce_aes_cbc_decrypt)
273
274         /*
275          * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
276          *                 int blocks, u8 ctr[])
277          */
278 ENTRY(ce_aes_ctr_encrypt)
279         push            {r4-r6, lr}
280         ldrd            r4, r5, [sp, #16]
281         vld1.8          {q6}, [r5]              @ load ctr
282         prepare_key     r2, r3
283         vmov            r6, s27                 @ keep swabbed ctr in r6
284         rev             r6, r6
285         cmn             r6, r4                  @ 32 bit overflow?
286         bcs             .Lctrloop
287 .Lctrloop3x:
288         subs            r4, r4, #3
289         bmi             .Lctr1x
290         add             r6, r6, #1
291         vmov            q0, q6
292         vmov            q1, q6
293         rev             ip, r6
294         add             r6, r6, #1
295         vmov            q2, q6
296         vmov            s7, ip
297         rev             ip, r6
298         add             r6, r6, #1
299         vmov            s11, ip
300         vld1.8          {q3-q4}, [r1]!
301         vld1.8          {q5}, [r1]!
302         bl              aes_encrypt_3x
303         veor            q0, q0, q3
304         veor            q1, q1, q4
305         veor            q2, q2, q5
306         rev             ip, r6
307         vst1.8          {q0-q1}, [r0]!
308         vst1.8          {q2}, [r0]!
309         vmov            s27, ip
310         b               .Lctrloop3x
311 .Lctr1x:
312         adds            r4, r4, #3
313         beq             .Lctrout
314 .Lctrloop:
315         vmov            q0, q6
316         bl              aes_encrypt
317
318         adds            r6, r6, #1              @ increment BE ctr
319         rev             ip, r6
320         vmov            s27, ip
321         bcs             .Lctrcarry
322
323 .Lctrcarrydone:
324         subs            r4, r4, #1
325         bmi             .Lctrtailblock          @ blocks < 0 means tail block
326         vld1.8          {q3}, [r1]!
327         veor            q3, q0, q3
328         vst1.8          {q3}, [r0]!
329         bne             .Lctrloop
330
331 .Lctrout:
332         vst1.8          {q6}, [r5]              @ return next CTR value
333         pop             {r4-r6, pc}
334
335 .Lctrtailblock:
336         vst1.8          {q0}, [r0, :64]         @ return the key stream
337         b               .Lctrout
338
339 .Lctrcarry:
340         .irp            sreg, s26, s25, s24
341         vmov            ip, \sreg               @ load next word of ctr
342         rev             ip, ip                  @ ... to handle the carry
343         adds            ip, ip, #1
344         rev             ip, ip
345         vmov            \sreg, ip
346         bcc             .Lctrcarrydone
347         .endr
348         b               .Lctrcarrydone
349 ENDPROC(ce_aes_ctr_encrypt)
350
351         /*
352          * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
353          *                 int blocks, u8 iv[], u8 const rk2[], int first)
354          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
355          *                 int blocks, u8 iv[], u8 const rk2[], int first)
356          */
357
358         .macro          next_tweak, out, in, const, tmp
359         vshr.s64        \tmp, \in, #63
360         vand            \tmp, \tmp, \const
361         vadd.u64        \out, \in, \in
362         vext.8          \tmp, \tmp, \tmp, #8
363         veor            \out, \out, \tmp
364         .endm
365
366         .align          3
367 .Lxts_mul_x:
368         .quad           1, 0x87
369
370 ce_aes_xts_init:
371         vldr            d14, .Lxts_mul_x
372         vldr            d15, .Lxts_mul_x + 8
373
374         ldrd            r4, r5, [sp, #16]       @ load args
375         ldr             r6, [sp, #28]
376         vld1.8          {q0}, [r5]              @ load iv
377         teq             r6, #1                  @ start of a block?
378         bxne            lr
379
380         @ Encrypt the IV in q0 with the second AES key. This should only
381         @ be done at the start of a block.
382         ldr             r6, [sp, #24]           @ load AES key 2
383         prepare_key     r6, r3
384         add             ip, r6, #32             @ 3rd round key of key 2
385         b               .Laes_encrypt_tweak     @ tail call
386 ENDPROC(ce_aes_xts_init)
387
388 ENTRY(ce_aes_xts_encrypt)
389         push            {r4-r6, lr}
390
391         bl              ce_aes_xts_init         @ run shared prologue
392         prepare_key     r2, r3
393         vmov            q3, q0
394
395         teq             r6, #0                  @ start of a block?
396         bne             .Lxtsenc3x
397
398 .Lxtsencloop3x:
399         next_tweak      q3, q3, q7, q6
400 .Lxtsenc3x:
401         subs            r4, r4, #3
402         bmi             .Lxtsenc1x
403         vld1.8          {q0-q1}, [r1]!          @ get 3 pt blocks
404         vld1.8          {q2}, [r1]!
405         next_tweak      q4, q3, q7, q6
406         veor            q0, q0, q3
407         next_tweak      q5, q4, q7, q6
408         veor            q1, q1, q4
409         veor            q2, q2, q5
410         bl              aes_encrypt_3x
411         veor            q0, q0, q3
412         veor            q1, q1, q4
413         veor            q2, q2, q5
414         vst1.8          {q0-q1}, [r0]!          @ write 3 ct blocks
415         vst1.8          {q2}, [r0]!
416         vmov            q3, q5
417         teq             r4, #0
418         beq             .Lxtsencout
419         b               .Lxtsencloop3x
420 .Lxtsenc1x:
421         adds            r4, r4, #3
422         beq             .Lxtsencout
423 .Lxtsencloop:
424         vld1.8          {q0}, [r1]!
425         veor            q0, q0, q3
426         bl              aes_encrypt
427         veor            q0, q0, q3
428         vst1.8          {q0}, [r0]!
429         subs            r4, r4, #1
430         beq             .Lxtsencout
431         next_tweak      q3, q3, q7, q6
432         b               .Lxtsencloop
433 .Lxtsencout:
434         vst1.8          {q3}, [r5]
435         pop             {r4-r6, pc}
436 ENDPROC(ce_aes_xts_encrypt)
437
438
439 ENTRY(ce_aes_xts_decrypt)
440         push            {r4-r6, lr}
441
442         bl              ce_aes_xts_init         @ run shared prologue
443         prepare_key     r2, r3
444         vmov            q3, q0
445
446         teq             r6, #0                  @ start of a block?
447         bne             .Lxtsdec3x
448
449 .Lxtsdecloop3x:
450         next_tweak      q3, q3, q7, q6
451 .Lxtsdec3x:
452         subs            r4, r4, #3
453         bmi             .Lxtsdec1x
454         vld1.8          {q0-q1}, [r1]!          @ get 3 ct blocks
455         vld1.8          {q2}, [r1]!
456         next_tweak      q4, q3, q7, q6
457         veor            q0, q0, q3
458         next_tweak      q5, q4, q7, q6
459         veor            q1, q1, q4
460         veor            q2, q2, q5
461         bl              aes_decrypt_3x
462         veor            q0, q0, q3
463         veor            q1, q1, q4
464         veor            q2, q2, q5
465         vst1.8          {q0-q1}, [r0]!          @ write 3 pt blocks
466         vst1.8          {q2}, [r0]!
467         vmov            q3, q5
468         teq             r4, #0
469         beq             .Lxtsdecout
470         b               .Lxtsdecloop3x
471 .Lxtsdec1x:
472         adds            r4, r4, #3
473         beq             .Lxtsdecout
474 .Lxtsdecloop:
475         vld1.8          {q0}, [r1]!
476         veor            q0, q0, q3
477         add             ip, r2, #32             @ 3rd round key
478         bl              aes_decrypt
479         veor            q0, q0, q3
480         vst1.8          {q0}, [r0]!
481         subs            r4, r4, #1
482         beq             .Lxtsdecout
483         next_tweak      q3, q3, q7, q6
484         b               .Lxtsdecloop
485 .Lxtsdecout:
486         vst1.8          {q3}, [r5]
487         pop             {r4-r6, pc}
488 ENDPROC(ce_aes_xts_decrypt)
489
490         /*
491          * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
492          *                             AES sbox substitution on each byte in
493          *                             'input'
494          */
495 ENTRY(ce_aes_sub)
496         vdup.32         q1, r0
497         veor            q0, q0, q0
498         aese.8          q0, q1
499         vmov            r0, s0
500         bx              lr
501 ENDPROC(ce_aes_sub)
502
503         /*
504          * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
505          *                                        operation on round key *src
506          */
507 ENTRY(ce_aes_invert)
508         vld1.8          {q0}, [r1]
509         aesimc.8        q0, q0
510         vst1.8          {q0}, [r0]
511         bx              lr
512 ENDPROC(ce_aes_invert)