Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[sfrench/cifs-2.6.git] / arch / arm / crypto / aes-ce-core.S
1 /*
2  * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
3  *
4  * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
13
14         .text
15         .fpu            crypto-neon-fp-armv8
16         .align          3
17
18         .macro          enc_round, state, key
19         aese.8          \state, \key
20         aesmc.8         \state, \state
21         .endm
22
23         .macro          dec_round, state, key
24         aesd.8          \state, \key
25         aesimc.8        \state, \state
26         .endm
27
28         .macro          enc_dround, key1, key2
29         enc_round       q0, \key1
30         enc_round       q0, \key2
31         .endm
32
33         .macro          dec_dround, key1, key2
34         dec_round       q0, \key1
35         dec_round       q0, \key2
36         .endm
37
38         .macro          enc_fround, key1, key2, key3
39         enc_round       q0, \key1
40         aese.8          q0, \key2
41         veor            q0, q0, \key3
42         .endm
43
44         .macro          dec_fround, key1, key2, key3
45         dec_round       q0, \key1
46         aesd.8          q0, \key2
47         veor            q0, q0, \key3
48         .endm
49
50         .macro          enc_dround_3x, key1, key2
51         enc_round       q0, \key1
52         enc_round       q1, \key1
53         enc_round       q2, \key1
54         enc_round       q0, \key2
55         enc_round       q1, \key2
56         enc_round       q2, \key2
57         .endm
58
59         .macro          dec_dround_3x, key1, key2
60         dec_round       q0, \key1
61         dec_round       q1, \key1
62         dec_round       q2, \key1
63         dec_round       q0, \key2
64         dec_round       q1, \key2
65         dec_round       q2, \key2
66         .endm
67
68         .macro          enc_fround_3x, key1, key2, key3
69         enc_round       q0, \key1
70         enc_round       q1, \key1
71         enc_round       q2, \key1
72         aese.8          q0, \key2
73         aese.8          q1, \key2
74         aese.8          q2, \key2
75         veor            q0, q0, \key3
76         veor            q1, q1, \key3
77         veor            q2, q2, \key3
78         .endm
79
80         .macro          dec_fround_3x, key1, key2, key3
81         dec_round       q0, \key1
82         dec_round       q1, \key1
83         dec_round       q2, \key1
84         aesd.8          q0, \key2
85         aesd.8          q1, \key2
86         aesd.8          q2, \key2
87         veor            q0, q0, \key3
88         veor            q1, q1, \key3
89         veor            q2, q2, \key3
90         .endm
91
92         .macro          do_block, dround, fround
93         cmp             r3, #12                 @ which key size?
94         vld1.8          {q10-q11}, [ip]!
95         \dround         q8, q9
96         vld1.8          {q12-q13}, [ip]!
97         \dround         q10, q11
98         vld1.8          {q10-q11}, [ip]!
99         \dround         q12, q13
100         vld1.8          {q12-q13}, [ip]!
101         \dround         q10, q11
102         blo             0f                      @ AES-128: 10 rounds
103         vld1.8          {q10-q11}, [ip]!
104         \dround         q12, q13
105         beq             1f                      @ AES-192: 12 rounds
106         vld1.8          {q12-q13}, [ip]
107         \dround         q10, q11
108 0:      \fround         q12, q13, q14
109         bx              lr
110
111 1:      \fround         q10, q11, q14
112         bx              lr
113         .endm
114
115         /*
116          * Internal, non-AAPCS compliant functions that implement the core AES
117          * transforms. These should preserve all registers except q0 - q2 and ip
118          * Arguments:
119          *   q0        : first in/output block
120          *   q1        : second in/output block (_3x version only)
121          *   q2        : third in/output block (_3x version only)
122          *   q8        : first round key
123          *   q9        : secound round key
124          *   q14       : final round key
125          *   r2        : address of round key array
126          *   r3        : number of rounds
127          */
128         .align          6
129 aes_encrypt:
130         add             ip, r2, #32             @ 3rd round key
131 .Laes_encrypt_tweak:
132         do_block        enc_dround, enc_fround
133 ENDPROC(aes_encrypt)
134
135         .align          6
136 aes_decrypt:
137         add             ip, r2, #32             @ 3rd round key
138         do_block        dec_dround, dec_fround
139 ENDPROC(aes_decrypt)
140
141         .align          6
142 aes_encrypt_3x:
143         add             ip, r2, #32             @ 3rd round key
144         do_block        enc_dround_3x, enc_fround_3x
145 ENDPROC(aes_encrypt_3x)
146
147         .align          6
148 aes_decrypt_3x:
149         add             ip, r2, #32             @ 3rd round key
150         do_block        dec_dround_3x, dec_fround_3x
151 ENDPROC(aes_decrypt_3x)
152
153         .macro          prepare_key, rk, rounds
154         add             ip, \rk, \rounds, lsl #4
155         vld1.8          {q8-q9}, [\rk]          @ load first 2 round keys
156         vld1.8          {q14}, [ip]             @ load last round key
157         .endm
158
159         /*
160          * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
161          *                 int blocks)
162          * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
163          *                 int blocks)
164          */
165 ENTRY(ce_aes_ecb_encrypt)
166         push            {r4, lr}
167         ldr             r4, [sp, #8]
168         prepare_key     r2, r3
169 .Lecbencloop3x:
170         subs            r4, r4, #3
171         bmi             .Lecbenc1x
172         vld1.8          {q0-q1}, [r1]!
173         vld1.8          {q2}, [r1]!
174         bl              aes_encrypt_3x
175         vst1.8          {q0-q1}, [r0]!
176         vst1.8          {q2}, [r0]!
177         b               .Lecbencloop3x
178 .Lecbenc1x:
179         adds            r4, r4, #3
180         beq             .Lecbencout
181 .Lecbencloop:
182         vld1.8          {q0}, [r1]!
183         bl              aes_encrypt
184         vst1.8          {q0}, [r0]!
185         subs            r4, r4, #1
186         bne             .Lecbencloop
187 .Lecbencout:
188         pop             {r4, pc}
189 ENDPROC(ce_aes_ecb_encrypt)
190
191 ENTRY(ce_aes_ecb_decrypt)
192         push            {r4, lr}
193         ldr             r4, [sp, #8]
194         prepare_key     r2, r3
195 .Lecbdecloop3x:
196         subs            r4, r4, #3
197         bmi             .Lecbdec1x
198         vld1.8          {q0-q1}, [r1]!
199         vld1.8          {q2}, [r1]!
200         bl              aes_decrypt_3x
201         vst1.8          {q0-q1}, [r0]!
202         vst1.8          {q2}, [r0]!
203         b               .Lecbdecloop3x
204 .Lecbdec1x:
205         adds            r4, r4, #3
206         beq             .Lecbdecout
207 .Lecbdecloop:
208         vld1.8          {q0}, [r1]!
209         bl              aes_decrypt
210         vst1.8          {q0}, [r0]!
211         subs            r4, r4, #1
212         bne             .Lecbdecloop
213 .Lecbdecout:
214         pop             {r4, pc}
215 ENDPROC(ce_aes_ecb_decrypt)
216
217         /*
218          * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
219          *                 int blocks, u8 iv[])
220          * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
221          *                 int blocks, u8 iv[])
222          */
223 ENTRY(ce_aes_cbc_encrypt)
224         push            {r4-r6, lr}
225         ldrd            r4, r5, [sp, #16]
226         vld1.8          {q0}, [r5]
227         prepare_key     r2, r3
228 .Lcbcencloop:
229         vld1.8          {q1}, [r1]!             @ get next pt block
230         veor            q0, q0, q1              @ ..and xor with iv
231         bl              aes_encrypt
232         vst1.8          {q0}, [r0]!
233         subs            r4, r4, #1
234         bne             .Lcbcencloop
235         vst1.8          {q0}, [r5]
236         pop             {r4-r6, pc}
237 ENDPROC(ce_aes_cbc_encrypt)
238
239 ENTRY(ce_aes_cbc_decrypt)
240         push            {r4-r6, lr}
241         ldrd            r4, r5, [sp, #16]
242         vld1.8          {q6}, [r5]              @ keep iv in q6
243         prepare_key     r2, r3
244 .Lcbcdecloop3x:
245         subs            r4, r4, #3
246         bmi             .Lcbcdec1x
247         vld1.8          {q0-q1}, [r1]!
248         vld1.8          {q2}, [r1]!
249         vmov            q3, q0
250         vmov            q4, q1
251         vmov            q5, q2
252         bl              aes_decrypt_3x
253         veor            q0, q0, q6
254         veor            q1, q1, q3
255         veor            q2, q2, q4
256         vmov            q6, q5
257         vst1.8          {q0-q1}, [r0]!
258         vst1.8          {q2}, [r0]!
259         b               .Lcbcdecloop3x
260 .Lcbcdec1x:
261         adds            r4, r4, #3
262         beq             .Lcbcdecout
263         vmov            q15, q14                @ preserve last round key
264 .Lcbcdecloop:
265         vld1.8          {q0}, [r1]!             @ get next ct block
266         veor            q14, q15, q6            @ combine prev ct with last key
267         vmov            q6, q0
268         bl              aes_decrypt
269         vst1.8          {q0}, [r0]!
270         subs            r4, r4, #1
271         bne             .Lcbcdecloop
272 .Lcbcdecout:
273         vst1.8          {q6}, [r5]              @ keep iv in q6
274         pop             {r4-r6, pc}
275 ENDPROC(ce_aes_cbc_decrypt)
276
277         /*
278          * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
279          *                 int blocks, u8 ctr[])
280          */
281 ENTRY(ce_aes_ctr_encrypt)
282         push            {r4-r6, lr}
283         ldrd            r4, r5, [sp, #16]
284         vld1.8          {q6}, [r5]              @ load ctr
285         prepare_key     r2, r3
286         vmov            r6, s27                 @ keep swabbed ctr in r6
287         rev             r6, r6
288         cmn             r6, r4                  @ 32 bit overflow?
289         bcs             .Lctrloop
290 .Lctrloop3x:
291         subs            r4, r4, #3
292         bmi             .Lctr1x
293         add             r6, r6, #1
294         vmov            q0, q6
295         vmov            q1, q6
296         rev             ip, r6
297         add             r6, r6, #1
298         vmov            q2, q6
299         vmov            s7, ip
300         rev             ip, r6
301         add             r6, r6, #1
302         vmov            s11, ip
303         vld1.8          {q3-q4}, [r1]!
304         vld1.8          {q5}, [r1]!
305         bl              aes_encrypt_3x
306         veor            q0, q0, q3
307         veor            q1, q1, q4
308         veor            q2, q2, q5
309         rev             ip, r6
310         vst1.8          {q0-q1}, [r0]!
311         vst1.8          {q2}, [r0]!
312         vmov            s27, ip
313         b               .Lctrloop3x
314 .Lctr1x:
315         adds            r4, r4, #3
316         beq             .Lctrout
317 .Lctrloop:
318         vmov            q0, q6
319         bl              aes_encrypt
320         subs            r4, r4, #1
321         bmi             .Lctrtailblock          @ blocks < 0 means tail block
322         vld1.8          {q3}, [r1]!
323         veor            q3, q0, q3
324         vst1.8          {q3}, [r0]!
325
326         adds            r6, r6, #1              @ increment BE ctr
327         rev             ip, r6
328         vmov            s27, ip
329         bcs             .Lctrcarry
330         teq             r4, #0
331         bne             .Lctrloop
332 .Lctrout:
333         vst1.8          {q6}, [r5]
334         pop             {r4-r6, pc}
335
336 .Lctrtailblock:
337         vst1.8          {q0}, [r0, :64]         @ return just the key stream
338         pop             {r4-r6, pc}
339
340 .Lctrcarry:
341         .irp            sreg, s26, s25, s24
342         vmov            ip, \sreg               @ load next word of ctr
343         rev             ip, ip                  @ ... to handle the carry
344         adds            ip, ip, #1
345         rev             ip, ip
346         vmov            \sreg, ip
347         bcc             0f
348         .endr
349 0:      teq             r4, #0
350         beq             .Lctrout
351         b               .Lctrloop
352 ENDPROC(ce_aes_ctr_encrypt)
353
354         /*
355          * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
356          *                 int blocks, u8 iv[], u8 const rk2[], int first)
357          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
358          *                 int blocks, u8 iv[], u8 const rk2[], int first)
359          */
360
361         .macro          next_tweak, out, in, const, tmp
362         vshr.s64        \tmp, \in, #63
363         vand            \tmp, \tmp, \const
364         vadd.u64        \out, \in, \in
365         vext.8          \tmp, \tmp, \tmp, #8
366         veor            \out, \out, \tmp
367         .endm
368
369         .align          3
370 .Lxts_mul_x:
371         .quad           1, 0x87
372
373 ce_aes_xts_init:
374         vldr            d14, .Lxts_mul_x
375         vldr            d15, .Lxts_mul_x + 8
376
377         ldrd            r4, r5, [sp, #16]       @ load args
378         ldr             r6, [sp, #28]
379         vld1.8          {q0}, [r5]              @ load iv
380         teq             r6, #1                  @ start of a block?
381         bxne            lr
382
383         @ Encrypt the IV in q0 with the second AES key. This should only
384         @ be done at the start of a block.
385         ldr             r6, [sp, #24]           @ load AES key 2
386         prepare_key     r6, r3
387         add             ip, r6, #32             @ 3rd round key of key 2
388         b               .Laes_encrypt_tweak     @ tail call
389 ENDPROC(ce_aes_xts_init)
390
391 ENTRY(ce_aes_xts_encrypt)
392         push            {r4-r6, lr}
393
394         bl              ce_aes_xts_init         @ run shared prologue
395         prepare_key     r2, r3
396         vmov            q3, q0
397
398         teq             r6, #0                  @ start of a block?
399         bne             .Lxtsenc3x
400
401 .Lxtsencloop3x:
402         next_tweak      q3, q3, q7, q6
403 .Lxtsenc3x:
404         subs            r4, r4, #3
405         bmi             .Lxtsenc1x
406         vld1.8          {q0-q1}, [r1]!          @ get 3 pt blocks
407         vld1.8          {q2}, [r1]!
408         next_tweak      q4, q3, q7, q6
409         veor            q0, q0, q3
410         next_tweak      q5, q4, q7, q6
411         veor            q1, q1, q4
412         veor            q2, q2, q5
413         bl              aes_encrypt_3x
414         veor            q0, q0, q3
415         veor            q1, q1, q4
416         veor            q2, q2, q5
417         vst1.8          {q0-q1}, [r0]!          @ write 3 ct blocks
418         vst1.8          {q2}, [r0]!
419         vmov            q3, q5
420         teq             r4, #0
421         beq             .Lxtsencout
422         b               .Lxtsencloop3x
423 .Lxtsenc1x:
424         adds            r4, r4, #3
425         beq             .Lxtsencout
426 .Lxtsencloop:
427         vld1.8          {q0}, [r1]!
428         veor            q0, q0, q3
429         bl              aes_encrypt
430         veor            q0, q0, q3
431         vst1.8          {q0}, [r0]!
432         subs            r4, r4, #1
433         beq             .Lxtsencout
434         next_tweak      q3, q3, q7, q6
435         b               .Lxtsencloop
436 .Lxtsencout:
437         vst1.8          {q3}, [r5]
438         pop             {r4-r6, pc}
439 ENDPROC(ce_aes_xts_encrypt)
440
441
442 ENTRY(ce_aes_xts_decrypt)
443         push            {r4-r6, lr}
444
445         bl              ce_aes_xts_init         @ run shared prologue
446         prepare_key     r2, r3
447         vmov            q3, q0
448
449         teq             r6, #0                  @ start of a block?
450         bne             .Lxtsdec3x
451
452 .Lxtsdecloop3x:
453         next_tweak      q3, q3, q7, q6
454 .Lxtsdec3x:
455         subs            r4, r4, #3
456         bmi             .Lxtsdec1x
457         vld1.8          {q0-q1}, [r1]!          @ get 3 ct blocks
458         vld1.8          {q2}, [r1]!
459         next_tweak      q4, q3, q7, q6
460         veor            q0, q0, q3
461         next_tweak      q5, q4, q7, q6
462         veor            q1, q1, q4
463         veor            q2, q2, q5
464         bl              aes_decrypt_3x
465         veor            q0, q0, q3
466         veor            q1, q1, q4
467         veor            q2, q2, q5
468         vst1.8          {q0-q1}, [r0]!          @ write 3 pt blocks
469         vst1.8          {q2}, [r0]!
470         vmov            q3, q5
471         teq             r4, #0
472         beq             .Lxtsdecout
473         b               .Lxtsdecloop3x
474 .Lxtsdec1x:
475         adds            r4, r4, #3
476         beq             .Lxtsdecout
477 .Lxtsdecloop:
478         vld1.8          {q0}, [r1]!
479         veor            q0, q0, q3
480         add             ip, r2, #32             @ 3rd round key
481         bl              aes_decrypt
482         veor            q0, q0, q3
483         vst1.8          {q0}, [r0]!
484         subs            r4, r4, #1
485         beq             .Lxtsdecout
486         next_tweak      q3, q3, q7, q6
487         b               .Lxtsdecloop
488 .Lxtsdecout:
489         vst1.8          {q3}, [r5]
490         pop             {r4-r6, pc}
491 ENDPROC(ce_aes_xts_decrypt)
492
493         /*
494          * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
495          *                             AES sbox substitution on each byte in
496          *                             'input'
497          */
498 ENTRY(ce_aes_sub)
499         vdup.32         q1, r0
500         veor            q0, q0, q0
501         aese.8          q0, q1
502         vmov            r0, s0
503         bx              lr
504 ENDPROC(ce_aes_sub)
505
506         /*
507          * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
508          *                                        operation on round key *src
509          */
510 ENTRY(ce_aes_invert)
511         vld1.8          {q0}, [r1]
512         aesimc.8        q0, q0
513         vst1.8          {q0}, [r0]
514         bx              lr
515 ENDPROC(ce_aes_invert)