Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / arch / arm64 / crypto / aes-modes.S
1 /*
2  * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
3  *
4  * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /* included by aes-ce.S and aes-neon.S */
12
13         .text
14         .align          4
15
16 aes_encrypt_block4x:
17         encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
18         ret
19 ENDPROC(aes_encrypt_block4x)
20
21 aes_decrypt_block4x:
22         decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
23         ret
24 ENDPROC(aes_decrypt_block4x)
25
26         /*
27          * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
28          *                 int blocks)
29          * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
30          *                 int blocks)
31          */
32
33 AES_ENTRY(aes_ecb_encrypt)
34         stp             x29, x30, [sp, #-16]!
35         mov             x29, sp
36
37         enc_prepare     w3, x2, x5
38
39 .LecbencloopNx:
40         subs            w4, w4, #4
41         bmi             .Lecbenc1x
42         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
43         bl              aes_encrypt_block4x
44         st1             {v0.16b-v3.16b}, [x0], #64
45         b               .LecbencloopNx
46 .Lecbenc1x:
47         adds            w4, w4, #4
48         beq             .Lecbencout
49 .Lecbencloop:
50         ld1             {v0.16b}, [x1], #16             /* get next pt block */
51         encrypt_block   v0, w3, x2, x5, w6
52         st1             {v0.16b}, [x0], #16
53         subs            w4, w4, #1
54         bne             .Lecbencloop
55 .Lecbencout:
56         ldp             x29, x30, [sp], #16
57         ret
58 AES_ENDPROC(aes_ecb_encrypt)
59
60
61 AES_ENTRY(aes_ecb_decrypt)
62         stp             x29, x30, [sp, #-16]!
63         mov             x29, sp
64
65         dec_prepare     w3, x2, x5
66
67 .LecbdecloopNx:
68         subs            w4, w4, #4
69         bmi             .Lecbdec1x
70         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
71         bl              aes_decrypt_block4x
72         st1             {v0.16b-v3.16b}, [x0], #64
73         b               .LecbdecloopNx
74 .Lecbdec1x:
75         adds            w4, w4, #4
76         beq             .Lecbdecout
77 .Lecbdecloop:
78         ld1             {v0.16b}, [x1], #16             /* get next ct block */
79         decrypt_block   v0, w3, x2, x5, w6
80         st1             {v0.16b}, [x0], #16
81         subs            w4, w4, #1
82         bne             .Lecbdecloop
83 .Lecbdecout:
84         ldp             x29, x30, [sp], #16
85         ret
86 AES_ENDPROC(aes_ecb_decrypt)
87
88
89         /*
90          * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
91          *                 int blocks, u8 iv[])
92          * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
93          *                 int blocks, u8 iv[])
94          */
95
96 AES_ENTRY(aes_cbc_encrypt)
97         ld1             {v4.16b}, [x5]                  /* get iv */
98         enc_prepare     w3, x2, x6
99
100 .Lcbcencloop4x:
101         subs            w4, w4, #4
102         bmi             .Lcbcenc1x
103         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
104         eor             v0.16b, v0.16b, v4.16b          /* ..and xor with iv */
105         encrypt_block   v0, w3, x2, x6, w7
106         eor             v1.16b, v1.16b, v0.16b
107         encrypt_block   v1, w3, x2, x6, w7
108         eor             v2.16b, v2.16b, v1.16b
109         encrypt_block   v2, w3, x2, x6, w7
110         eor             v3.16b, v3.16b, v2.16b
111         encrypt_block   v3, w3, x2, x6, w7
112         st1             {v0.16b-v3.16b}, [x0], #64
113         mov             v4.16b, v3.16b
114         b               .Lcbcencloop4x
115 .Lcbcenc1x:
116         adds            w4, w4, #4
117         beq             .Lcbcencout
118 .Lcbcencloop:
119         ld1             {v0.16b}, [x1], #16             /* get next pt block */
120         eor             v4.16b, v4.16b, v0.16b          /* ..and xor with iv */
121         encrypt_block   v4, w3, x2, x6, w7
122         st1             {v4.16b}, [x0], #16
123         subs            w4, w4, #1
124         bne             .Lcbcencloop
125 .Lcbcencout:
126         st1             {v4.16b}, [x5]                  /* return iv */
127         ret
128 AES_ENDPROC(aes_cbc_encrypt)
129
130
131 AES_ENTRY(aes_cbc_decrypt)
132         stp             x29, x30, [sp, #-16]!
133         mov             x29, sp
134
135         ld1             {v7.16b}, [x5]                  /* get iv */
136         dec_prepare     w3, x2, x6
137
138 .LcbcdecloopNx:
139         subs            w4, w4, #4
140         bmi             .Lcbcdec1x
141         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
142         mov             v4.16b, v0.16b
143         mov             v5.16b, v1.16b
144         mov             v6.16b, v2.16b
145         bl              aes_decrypt_block4x
146         sub             x1, x1, #16
147         eor             v0.16b, v0.16b, v7.16b
148         eor             v1.16b, v1.16b, v4.16b
149         ld1             {v7.16b}, [x1], #16             /* reload 1 ct block */
150         eor             v2.16b, v2.16b, v5.16b
151         eor             v3.16b, v3.16b, v6.16b
152         st1             {v0.16b-v3.16b}, [x0], #64
153         b               .LcbcdecloopNx
154 .Lcbcdec1x:
155         adds            w4, w4, #4
156         beq             .Lcbcdecout
157 .Lcbcdecloop:
158         ld1             {v1.16b}, [x1], #16             /* get next ct block */
159         mov             v0.16b, v1.16b                  /* ...and copy to v0 */
160         decrypt_block   v0, w3, x2, x6, w7
161         eor             v0.16b, v0.16b, v7.16b          /* xor with iv => pt */
162         mov             v7.16b, v1.16b                  /* ct is next iv */
163         st1             {v0.16b}, [x0], #16
164         subs            w4, w4, #1
165         bne             .Lcbcdecloop
166 .Lcbcdecout:
167         st1             {v7.16b}, [x5]                  /* return iv */
168         ldp             x29, x30, [sp], #16
169         ret
170 AES_ENDPROC(aes_cbc_decrypt)
171
172
173         /*
174          * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
175          *                     int rounds, int bytes, u8 const iv[])
176          * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
177          *                     int rounds, int bytes, u8 const iv[])
178          */
179
180 AES_ENTRY(aes_cbc_cts_encrypt)
181         adr_l           x8, .Lcts_permute_table
182         sub             x4, x4, #16
183         add             x9, x8, #32
184         add             x8, x8, x4
185         sub             x9, x9, x4
186         ld1             {v3.16b}, [x8]
187         ld1             {v4.16b}, [x9]
188
189         ld1             {v0.16b}, [x1], x4              /* overlapping loads */
190         ld1             {v1.16b}, [x1]
191
192         ld1             {v5.16b}, [x5]                  /* get iv */
193         enc_prepare     w3, x2, x6
194
195         eor             v0.16b, v0.16b, v5.16b          /* xor with iv */
196         tbl             v1.16b, {v1.16b}, v4.16b
197         encrypt_block   v0, w3, x2, x6, w7
198
199         eor             v1.16b, v1.16b, v0.16b
200         tbl             v0.16b, {v0.16b}, v3.16b
201         encrypt_block   v1, w3, x2, x6, w7
202
203         add             x4, x0, x4
204         st1             {v0.16b}, [x4]                  /* overlapping stores */
205         st1             {v1.16b}, [x0]
206         ret
207 AES_ENDPROC(aes_cbc_cts_encrypt)
208
209 AES_ENTRY(aes_cbc_cts_decrypt)
210         adr_l           x8, .Lcts_permute_table
211         sub             x4, x4, #16
212         add             x9, x8, #32
213         add             x8, x8, x4
214         sub             x9, x9, x4
215         ld1             {v3.16b}, [x8]
216         ld1             {v4.16b}, [x9]
217
218         ld1             {v0.16b}, [x1], x4              /* overlapping loads */
219         ld1             {v1.16b}, [x1]
220
221         ld1             {v5.16b}, [x5]                  /* get iv */
222         dec_prepare     w3, x2, x6
223
224         tbl             v2.16b, {v1.16b}, v4.16b
225         decrypt_block   v0, w3, x2, x6, w7
226         eor             v2.16b, v2.16b, v0.16b
227
228         tbx             v0.16b, {v1.16b}, v4.16b
229         tbl             v2.16b, {v2.16b}, v3.16b
230         decrypt_block   v0, w3, x2, x6, w7
231         eor             v0.16b, v0.16b, v5.16b          /* xor with iv */
232
233         add             x4, x0, x4
234         st1             {v2.16b}, [x4]                  /* overlapping stores */
235         st1             {v0.16b}, [x0]
236         ret
237 AES_ENDPROC(aes_cbc_cts_decrypt)
238
239         .section        ".rodata", "a"
240         .align          6
241 .Lcts_permute_table:
242         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
243         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
244         .byte            0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
245         .byte            0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
246         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
247         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
248         .previous
249
250
251         /*
252          * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
253          *                 int blocks, u8 ctr[])
254          */
255
256 AES_ENTRY(aes_ctr_encrypt)
257         stp             x29, x30, [sp, #-16]!
258         mov             x29, sp
259
260         enc_prepare     w3, x2, x6
261         ld1             {v4.16b}, [x5]
262
263         umov            x6, v4.d[1]             /* keep swabbed ctr in reg */
264         rev             x6, x6
265         cmn             w6, w4                  /* 32 bit overflow? */
266         bcs             .Lctrloop
267 .LctrloopNx:
268         subs            w4, w4, #4
269         bmi             .Lctr1x
270         add             w7, w6, #1
271         mov             v0.16b, v4.16b
272         add             w8, w6, #2
273         mov             v1.16b, v4.16b
274         add             w9, w6, #3
275         mov             v2.16b, v4.16b
276         rev             w7, w7
277         mov             v3.16b, v4.16b
278         rev             w8, w8
279         mov             v1.s[3], w7
280         rev             w9, w9
281         mov             v2.s[3], w8
282         mov             v3.s[3], w9
283         ld1             {v5.16b-v7.16b}, [x1], #48      /* get 3 input blocks */
284         bl              aes_encrypt_block4x
285         eor             v0.16b, v5.16b, v0.16b
286         ld1             {v5.16b}, [x1], #16             /* get 1 input block  */
287         eor             v1.16b, v6.16b, v1.16b
288         eor             v2.16b, v7.16b, v2.16b
289         eor             v3.16b, v5.16b, v3.16b
290         st1             {v0.16b-v3.16b}, [x0], #64
291         add             x6, x6, #4
292         rev             x7, x6
293         ins             v4.d[1], x7
294         cbz             w4, .Lctrout
295         b               .LctrloopNx
296 .Lctr1x:
297         adds            w4, w4, #4
298         beq             .Lctrout
299 .Lctrloop:
300         mov             v0.16b, v4.16b
301         encrypt_block   v0, w3, x2, x8, w7
302
303         adds            x6, x6, #1              /* increment BE ctr */
304         rev             x7, x6
305         ins             v4.d[1], x7
306         bcs             .Lctrcarry              /* overflow? */
307
308 .Lctrcarrydone:
309         subs            w4, w4, #1
310         bmi             .Lctrtailblock          /* blocks <0 means tail block */
311         ld1             {v3.16b}, [x1], #16
312         eor             v3.16b, v0.16b, v3.16b
313         st1             {v3.16b}, [x0], #16
314         bne             .Lctrloop
315
316 .Lctrout:
317         st1             {v4.16b}, [x5]          /* return next CTR value */
318         ldp             x29, x30, [sp], #16
319         ret
320
321 .Lctrtailblock:
322         st1             {v0.16b}, [x0]
323         b               .Lctrout
324
325 .Lctrcarry:
326         umov            x7, v4.d[0]             /* load upper word of ctr  */
327         rev             x7, x7                  /* ... to handle the carry */
328         add             x7, x7, #1
329         rev             x7, x7
330         ins             v4.d[0], x7
331         b               .Lctrcarrydone
332 AES_ENDPROC(aes_ctr_encrypt)
333
334
335         /*
336          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
337          *                 int blocks, u8 const rk2[], u8 iv[], int first)
338          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
339          *                 int blocks, u8 const rk2[], u8 iv[], int first)
340          */
341
342         .macro          next_tweak, out, in, tmp
343         sshr            \tmp\().2d,  \in\().2d,   #63
344         and             \tmp\().16b, \tmp\().16b, xtsmask.16b
345         add             \out\().2d,  \in\().2d,   \in\().2d
346         ext             \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
347         eor             \out\().16b, \out\().16b, \tmp\().16b
348         .endm
349
350         .macro          xts_load_mask, tmp
351         movi            xtsmask.2s, #0x1
352         movi            \tmp\().2s, #0x87
353         uzp1            xtsmask.4s, xtsmask.4s, \tmp\().4s
354         .endm
355
356 AES_ENTRY(aes_xts_encrypt)
357         stp             x29, x30, [sp, #-16]!
358         mov             x29, sp
359
360         ld1             {v4.16b}, [x6]
361         xts_load_mask   v8
362         cbz             w7, .Lxtsencnotfirst
363
364         enc_prepare     w3, x5, x8
365         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
366         enc_switch_key  w3, x2, x8
367         b               .LxtsencNx
368
369 .Lxtsencnotfirst:
370         enc_prepare     w3, x2, x8
371 .LxtsencloopNx:
372         next_tweak      v4, v4, v8
373 .LxtsencNx:
374         subs            w4, w4, #4
375         bmi             .Lxtsenc1x
376         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
377         next_tweak      v5, v4, v8
378         eor             v0.16b, v0.16b, v4.16b
379         next_tweak      v6, v5, v8
380         eor             v1.16b, v1.16b, v5.16b
381         eor             v2.16b, v2.16b, v6.16b
382         next_tweak      v7, v6, v8
383         eor             v3.16b, v3.16b, v7.16b
384         bl              aes_encrypt_block4x
385         eor             v3.16b, v3.16b, v7.16b
386         eor             v0.16b, v0.16b, v4.16b
387         eor             v1.16b, v1.16b, v5.16b
388         eor             v2.16b, v2.16b, v6.16b
389         st1             {v0.16b-v3.16b}, [x0], #64
390         mov             v4.16b, v7.16b
391         cbz             w4, .Lxtsencout
392         xts_reload_mask v8
393         b               .LxtsencloopNx
394 .Lxtsenc1x:
395         adds            w4, w4, #4
396         beq             .Lxtsencout
397 .Lxtsencloop:
398         ld1             {v1.16b}, [x1], #16
399         eor             v0.16b, v1.16b, v4.16b
400         encrypt_block   v0, w3, x2, x8, w7
401         eor             v0.16b, v0.16b, v4.16b
402         st1             {v0.16b}, [x0], #16
403         subs            w4, w4, #1
404         beq             .Lxtsencout
405         next_tweak      v4, v4, v8
406         b               .Lxtsencloop
407 .Lxtsencout:
408         st1             {v4.16b}, [x6]
409         ldp             x29, x30, [sp], #16
410         ret
411 AES_ENDPROC(aes_xts_encrypt)
412
413
414 AES_ENTRY(aes_xts_decrypt)
415         stp             x29, x30, [sp, #-16]!
416         mov             x29, sp
417
418         ld1             {v4.16b}, [x6]
419         xts_load_mask   v8
420         cbz             w7, .Lxtsdecnotfirst
421
422         enc_prepare     w3, x5, x8
423         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
424         dec_prepare     w3, x2, x8
425         b               .LxtsdecNx
426
427 .Lxtsdecnotfirst:
428         dec_prepare     w3, x2, x8
429 .LxtsdecloopNx:
430         next_tweak      v4, v4, v8
431 .LxtsdecNx:
432         subs            w4, w4, #4
433         bmi             .Lxtsdec1x
434         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
435         next_tweak      v5, v4, v8
436         eor             v0.16b, v0.16b, v4.16b
437         next_tweak      v6, v5, v8
438         eor             v1.16b, v1.16b, v5.16b
439         eor             v2.16b, v2.16b, v6.16b
440         next_tweak      v7, v6, v8
441         eor             v3.16b, v3.16b, v7.16b
442         bl              aes_decrypt_block4x
443         eor             v3.16b, v3.16b, v7.16b
444         eor             v0.16b, v0.16b, v4.16b
445         eor             v1.16b, v1.16b, v5.16b
446         eor             v2.16b, v2.16b, v6.16b
447         st1             {v0.16b-v3.16b}, [x0], #64
448         mov             v4.16b, v7.16b
449         cbz             w4, .Lxtsdecout
450         xts_reload_mask v8
451         b               .LxtsdecloopNx
452 .Lxtsdec1x:
453         adds            w4, w4, #4
454         beq             .Lxtsdecout
455 .Lxtsdecloop:
456         ld1             {v1.16b}, [x1], #16
457         eor             v0.16b, v1.16b, v4.16b
458         decrypt_block   v0, w3, x2, x8, w7
459         eor             v0.16b, v0.16b, v4.16b
460         st1             {v0.16b}, [x0], #16
461         subs            w4, w4, #1
462         beq             .Lxtsdecout
463         next_tweak      v4, v4, v8
464         b               .Lxtsdecloop
465 .Lxtsdecout:
466         st1             {v4.16b}, [x6]
467         ldp             x29, x30, [sp], #16
468         ret
469 AES_ENDPROC(aes_xts_decrypt)
470
471         /*
472          * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
473          *                int blocks, u8 dg[], int enc_before, int enc_after)
474          */
475 AES_ENTRY(aes_mac_update)
476         frame_push      6
477
478         mov             x19, x0
479         mov             x20, x1
480         mov             x21, x2
481         mov             x22, x3
482         mov             x23, x4
483         mov             x24, x6
484
485         ld1             {v0.16b}, [x23]                 /* get dg */
486         enc_prepare     w2, x1, x7
487         cbz             w5, .Lmacloop4x
488
489         encrypt_block   v0, w2, x1, x7, w8
490
491 .Lmacloop4x:
492         subs            w22, w22, #4
493         bmi             .Lmac1x
494         ld1             {v1.16b-v4.16b}, [x19], #64     /* get next pt block */
495         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
496         encrypt_block   v0, w21, x20, x7, w8
497         eor             v0.16b, v0.16b, v2.16b
498         encrypt_block   v0, w21, x20, x7, w8
499         eor             v0.16b, v0.16b, v3.16b
500         encrypt_block   v0, w21, x20, x7, w8
501         eor             v0.16b, v0.16b, v4.16b
502         cmp             w22, wzr
503         csinv           x5, x24, xzr, eq
504         cbz             w5, .Lmacout
505         encrypt_block   v0, w21, x20, x7, w8
506         st1             {v0.16b}, [x23]                 /* return dg */
507         cond_yield_neon .Lmacrestart
508         b               .Lmacloop4x
509 .Lmac1x:
510         add             w22, w22, #4
511 .Lmacloop:
512         cbz             w22, .Lmacout
513         ld1             {v1.16b}, [x19], #16            /* get next pt block */
514         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
515
516         subs            w22, w22, #1
517         csinv           x5, x24, xzr, eq
518         cbz             w5, .Lmacout
519
520 .Lmacenc:
521         encrypt_block   v0, w21, x20, x7, w8
522         b               .Lmacloop
523
524 .Lmacout:
525         st1             {v0.16b}, [x23]                 /* return dg */
526         frame_pop
527         ret
528
529 .Lmacrestart:
530         ld1             {v0.16b}, [x23]                 /* get dg */
531         enc_prepare     w21, x20, x0
532         b               .Lmacloop4x
533 AES_ENDPROC(aes_mac_update)