Merge tag 'compiler-attributes-for-linus-4.20-rc1' of https://github.com/ojeda/linux
[sfrench/cifs-2.6.git] / arch / arm64 / crypto / aes-modes.S
1 /*
2  * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
3  *
4  * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /* included by aes-ce.S and aes-neon.S */
12
13         .text
14         .align          4
15
16 aes_encrypt_block4x:
17         encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
18         ret
19 ENDPROC(aes_encrypt_block4x)
20
21 aes_decrypt_block4x:
22         decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
23         ret
24 ENDPROC(aes_decrypt_block4x)
25
26         /*
27          * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
28          *                 int blocks)
29          * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
30          *                 int blocks)
31          */
32
33 AES_ENTRY(aes_ecb_encrypt)
34         stp             x29, x30, [sp, #-16]!
35         mov             x29, sp
36
37         enc_prepare     w3, x2, x5
38
39 .LecbencloopNx:
40         subs            w4, w4, #4
41         bmi             .Lecbenc1x
42         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
43         bl              aes_encrypt_block4x
44         st1             {v0.16b-v3.16b}, [x0], #64
45         b               .LecbencloopNx
46 .Lecbenc1x:
47         adds            w4, w4, #4
48         beq             .Lecbencout
49 .Lecbencloop:
50         ld1             {v0.16b}, [x1], #16             /* get next pt block */
51         encrypt_block   v0, w3, x2, x5, w6
52         st1             {v0.16b}, [x0], #16
53         subs            w4, w4, #1
54         bne             .Lecbencloop
55 .Lecbencout:
56         ldp             x29, x30, [sp], #16
57         ret
58 AES_ENDPROC(aes_ecb_encrypt)
59
60
61 AES_ENTRY(aes_ecb_decrypt)
62         stp             x29, x30, [sp, #-16]!
63         mov             x29, sp
64
65         dec_prepare     w3, x2, x5
66
67 .LecbdecloopNx:
68         subs            w4, w4, #4
69         bmi             .Lecbdec1x
70         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
71         bl              aes_decrypt_block4x
72         st1             {v0.16b-v3.16b}, [x0], #64
73         b               .LecbdecloopNx
74 .Lecbdec1x:
75         adds            w4, w4, #4
76         beq             .Lecbdecout
77 .Lecbdecloop:
78         ld1             {v0.16b}, [x1], #16             /* get next ct block */
79         decrypt_block   v0, w3, x2, x5, w6
80         st1             {v0.16b}, [x0], #16
81         subs            w4, w4, #1
82         bne             .Lecbdecloop
83 .Lecbdecout:
84         ldp             x29, x30, [sp], #16
85         ret
86 AES_ENDPROC(aes_ecb_decrypt)
87
88
89         /*
90          * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
91          *                 int blocks, u8 iv[])
92          * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
93          *                 int blocks, u8 iv[])
94          */
95
96 AES_ENTRY(aes_cbc_encrypt)
97         ld1             {v4.16b}, [x5]                  /* get iv */
98         enc_prepare     w3, x2, x6
99
100 .Lcbcencloop4x:
101         subs            w4, w4, #4
102         bmi             .Lcbcenc1x
103         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
104         eor             v0.16b, v0.16b, v4.16b          /* ..and xor with iv */
105         encrypt_block   v0, w3, x2, x6, w7
106         eor             v1.16b, v1.16b, v0.16b
107         encrypt_block   v1, w3, x2, x6, w7
108         eor             v2.16b, v2.16b, v1.16b
109         encrypt_block   v2, w3, x2, x6, w7
110         eor             v3.16b, v3.16b, v2.16b
111         encrypt_block   v3, w3, x2, x6, w7
112         st1             {v0.16b-v3.16b}, [x0], #64
113         mov             v4.16b, v3.16b
114         b               .Lcbcencloop4x
115 .Lcbcenc1x:
116         adds            w4, w4, #4
117         beq             .Lcbcencout
118 .Lcbcencloop:
119         ld1             {v0.16b}, [x1], #16             /* get next pt block */
120         eor             v4.16b, v4.16b, v0.16b          /* ..and xor with iv */
121         encrypt_block   v4, w3, x2, x6, w7
122         st1             {v4.16b}, [x0], #16
123         subs            w4, w4, #1
124         bne             .Lcbcencloop
125 .Lcbcencout:
126         st1             {v4.16b}, [x5]                  /* return iv */
127         ret
128 AES_ENDPROC(aes_cbc_encrypt)
129
130
131 AES_ENTRY(aes_cbc_decrypt)
132         stp             x29, x30, [sp, #-16]!
133         mov             x29, sp
134
135         ld1             {v7.16b}, [x5]                  /* get iv */
136         dec_prepare     w3, x2, x6
137
138 .LcbcdecloopNx:
139         subs            w4, w4, #4
140         bmi             .Lcbcdec1x
141         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
142         mov             v4.16b, v0.16b
143         mov             v5.16b, v1.16b
144         mov             v6.16b, v2.16b
145         bl              aes_decrypt_block4x
146         sub             x1, x1, #16
147         eor             v0.16b, v0.16b, v7.16b
148         eor             v1.16b, v1.16b, v4.16b
149         ld1             {v7.16b}, [x1], #16             /* reload 1 ct block */
150         eor             v2.16b, v2.16b, v5.16b
151         eor             v3.16b, v3.16b, v6.16b
152         st1             {v0.16b-v3.16b}, [x0], #64
153         b               .LcbcdecloopNx
154 .Lcbcdec1x:
155         adds            w4, w4, #4
156         beq             .Lcbcdecout
157 .Lcbcdecloop:
158         ld1             {v1.16b}, [x1], #16             /* get next ct block */
159         mov             v0.16b, v1.16b                  /* ...and copy to v0 */
160         decrypt_block   v0, w3, x2, x6, w7
161         eor             v0.16b, v0.16b, v7.16b          /* xor with iv => pt */
162         mov             v7.16b, v1.16b                  /* ct is next iv */
163         st1             {v0.16b}, [x0], #16
164         subs            w4, w4, #1
165         bne             .Lcbcdecloop
166 .Lcbcdecout:
167         st1             {v7.16b}, [x5]                  /* return iv */
168         ldp             x29, x30, [sp], #16
169         ret
170 AES_ENDPROC(aes_cbc_decrypt)
171
172
173         /*
174          * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
175          *                     int rounds, int bytes, u8 const iv[])
176          * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
177          *                     int rounds, int bytes, u8 const iv[])
178          */
179
180 AES_ENTRY(aes_cbc_cts_encrypt)
181         adr_l           x8, .Lcts_permute_table
182         sub             x4, x4, #16
183         add             x9, x8, #32
184         add             x8, x8, x4
185         sub             x9, x9, x4
186         ld1             {v3.16b}, [x8]
187         ld1             {v4.16b}, [x9]
188
189         ld1             {v0.16b}, [x1], x4              /* overlapping loads */
190         ld1             {v1.16b}, [x1]
191
192         ld1             {v5.16b}, [x5]                  /* get iv */
193         enc_prepare     w3, x2, x6
194
195         eor             v0.16b, v0.16b, v5.16b          /* xor with iv */
196         tbl             v1.16b, {v1.16b}, v4.16b
197         encrypt_block   v0, w3, x2, x6, w7
198
199         eor             v1.16b, v1.16b, v0.16b
200         tbl             v0.16b, {v0.16b}, v3.16b
201         encrypt_block   v1, w3, x2, x6, w7
202
203         add             x4, x0, x4
204         st1             {v0.16b}, [x4]                  /* overlapping stores */
205         st1             {v1.16b}, [x0]
206         ret
207 AES_ENDPROC(aes_cbc_cts_encrypt)
208
209 AES_ENTRY(aes_cbc_cts_decrypt)
210         adr_l           x8, .Lcts_permute_table
211         sub             x4, x4, #16
212         add             x9, x8, #32
213         add             x8, x8, x4
214         sub             x9, x9, x4
215         ld1             {v3.16b}, [x8]
216         ld1             {v4.16b}, [x9]
217
218         ld1             {v0.16b}, [x1], x4              /* overlapping loads */
219         ld1             {v1.16b}, [x1]
220
221         ld1             {v5.16b}, [x5]                  /* get iv */
222         dec_prepare     w3, x2, x6
223
224         tbl             v2.16b, {v1.16b}, v4.16b
225         decrypt_block   v0, w3, x2, x6, w7
226         eor             v2.16b, v2.16b, v0.16b
227
228         tbx             v0.16b, {v1.16b}, v4.16b
229         tbl             v2.16b, {v2.16b}, v3.16b
230         decrypt_block   v0, w3, x2, x6, w7
231         eor             v0.16b, v0.16b, v5.16b          /* xor with iv */
232
233         add             x4, x0, x4
234         st1             {v2.16b}, [x4]                  /* overlapping stores */
235         st1             {v0.16b}, [x0]
236         ret
237 AES_ENDPROC(aes_cbc_cts_decrypt)
238
239         .section        ".rodata", "a"
240         .align          6
241 .Lcts_permute_table:
242         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
243         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
244         .byte            0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
245         .byte            0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
246         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
247         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
248         .previous
249
250
251         /*
252          * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
253          *                 int blocks, u8 ctr[])
254          */
255
256 AES_ENTRY(aes_ctr_encrypt)
257         stp             x29, x30, [sp, #-16]!
258         mov             x29, sp
259
260         enc_prepare     w3, x2, x6
261         ld1             {v4.16b}, [x5]
262
263         umov            x6, v4.d[1]             /* keep swabbed ctr in reg */
264         rev             x6, x6
265         cmn             w6, w4                  /* 32 bit overflow? */
266         bcs             .Lctrloop
267 .LctrloopNx:
268         subs            w4, w4, #4
269         bmi             .Lctr1x
270         add             w7, w6, #1
271         mov             v0.16b, v4.16b
272         add             w8, w6, #2
273         mov             v1.16b, v4.16b
274         add             w9, w6, #3
275         mov             v2.16b, v4.16b
276         rev             w7, w7
277         mov             v3.16b, v4.16b
278         rev             w8, w8
279         mov             v1.s[3], w7
280         rev             w9, w9
281         mov             v2.s[3], w8
282         mov             v3.s[3], w9
283         ld1             {v5.16b-v7.16b}, [x1], #48      /* get 3 input blocks */
284         bl              aes_encrypt_block4x
285         eor             v0.16b, v5.16b, v0.16b
286         ld1             {v5.16b}, [x1], #16             /* get 1 input block  */
287         eor             v1.16b, v6.16b, v1.16b
288         eor             v2.16b, v7.16b, v2.16b
289         eor             v3.16b, v5.16b, v3.16b
290         st1             {v0.16b-v3.16b}, [x0], #64
291         add             x6, x6, #4
292         rev             x7, x6
293         ins             v4.d[1], x7
294         cbz             w4, .Lctrout
295         b               .LctrloopNx
296 .Lctr1x:
297         adds            w4, w4, #4
298         beq             .Lctrout
299 .Lctrloop:
300         mov             v0.16b, v4.16b
301         encrypt_block   v0, w3, x2, x8, w7
302
303         adds            x6, x6, #1              /* increment BE ctr */
304         rev             x7, x6
305         ins             v4.d[1], x7
306         bcs             .Lctrcarry              /* overflow? */
307
308 .Lctrcarrydone:
309         subs            w4, w4, #1
310         bmi             .Lctrtailblock          /* blocks <0 means tail block */
311         ld1             {v3.16b}, [x1], #16
312         eor             v3.16b, v0.16b, v3.16b
313         st1             {v3.16b}, [x0], #16
314         bne             .Lctrloop
315
316 .Lctrout:
317         st1             {v4.16b}, [x5]          /* return next CTR value */
318         ldp             x29, x30, [sp], #16
319         ret
320
321 .Lctrtailblock:
322         st1             {v0.16b}, [x0]
323         ldp             x29, x30, [sp], #16
324         ret
325
326 .Lctrcarry:
327         umov            x7, v4.d[0]             /* load upper word of ctr  */
328         rev             x7, x7                  /* ... to handle the carry */
329         add             x7, x7, #1
330         rev             x7, x7
331         ins             v4.d[0], x7
332         b               .Lctrcarrydone
333 AES_ENDPROC(aes_ctr_encrypt)
334
335
336         /*
337          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
338          *                 int blocks, u8 const rk2[], u8 iv[], int first)
339          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
340          *                 int blocks, u8 const rk2[], u8 iv[], int first)
341          */
342
343         .macro          next_tweak, out, in, tmp
344         sshr            \tmp\().2d,  \in\().2d,   #63
345         and             \tmp\().16b, \tmp\().16b, xtsmask.16b
346         add             \out\().2d,  \in\().2d,   \in\().2d
347         ext             \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
348         eor             \out\().16b, \out\().16b, \tmp\().16b
349         .endm
350
351         .macro          xts_load_mask, tmp
352         movi            xtsmask.2s, #0x1
353         movi            \tmp\().2s, #0x87
354         uzp1            xtsmask.4s, xtsmask.4s, \tmp\().4s
355         .endm
356
357 AES_ENTRY(aes_xts_encrypt)
358         stp             x29, x30, [sp, #-16]!
359         mov             x29, sp
360
361         ld1             {v4.16b}, [x6]
362         xts_load_mask   v8
363         cbz             w7, .Lxtsencnotfirst
364
365         enc_prepare     w3, x5, x8
366         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
367         enc_switch_key  w3, x2, x8
368         b               .LxtsencNx
369
370 .Lxtsencnotfirst:
371         enc_prepare     w3, x2, x8
372 .LxtsencloopNx:
373         next_tweak      v4, v4, v8
374 .LxtsencNx:
375         subs            w4, w4, #4
376         bmi             .Lxtsenc1x
377         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
378         next_tweak      v5, v4, v8
379         eor             v0.16b, v0.16b, v4.16b
380         next_tweak      v6, v5, v8
381         eor             v1.16b, v1.16b, v5.16b
382         eor             v2.16b, v2.16b, v6.16b
383         next_tweak      v7, v6, v8
384         eor             v3.16b, v3.16b, v7.16b
385         bl              aes_encrypt_block4x
386         eor             v3.16b, v3.16b, v7.16b
387         eor             v0.16b, v0.16b, v4.16b
388         eor             v1.16b, v1.16b, v5.16b
389         eor             v2.16b, v2.16b, v6.16b
390         st1             {v0.16b-v3.16b}, [x0], #64
391         mov             v4.16b, v7.16b
392         cbz             w4, .Lxtsencout
393         xts_reload_mask v8
394         b               .LxtsencloopNx
395 .Lxtsenc1x:
396         adds            w4, w4, #4
397         beq             .Lxtsencout
398 .Lxtsencloop:
399         ld1             {v1.16b}, [x1], #16
400         eor             v0.16b, v1.16b, v4.16b
401         encrypt_block   v0, w3, x2, x8, w7
402         eor             v0.16b, v0.16b, v4.16b
403         st1             {v0.16b}, [x0], #16
404         subs            w4, w4, #1
405         beq             .Lxtsencout
406         next_tweak      v4, v4, v8
407         b               .Lxtsencloop
408 .Lxtsencout:
409         st1             {v4.16b}, [x6]
410         ldp             x29, x30, [sp], #16
411         ret
412 AES_ENDPROC(aes_xts_encrypt)
413
414
415 AES_ENTRY(aes_xts_decrypt)
416         stp             x29, x30, [sp, #-16]!
417         mov             x29, sp
418
419         ld1             {v4.16b}, [x6]
420         xts_load_mask   v8
421         cbz             w7, .Lxtsdecnotfirst
422
423         enc_prepare     w3, x5, x8
424         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
425         dec_prepare     w3, x2, x8
426         b               .LxtsdecNx
427
428 .Lxtsdecnotfirst:
429         dec_prepare     w3, x2, x8
430 .LxtsdecloopNx:
431         next_tweak      v4, v4, v8
432 .LxtsdecNx:
433         subs            w4, w4, #4
434         bmi             .Lxtsdec1x
435         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
436         next_tweak      v5, v4, v8
437         eor             v0.16b, v0.16b, v4.16b
438         next_tweak      v6, v5, v8
439         eor             v1.16b, v1.16b, v5.16b
440         eor             v2.16b, v2.16b, v6.16b
441         next_tweak      v7, v6, v8
442         eor             v3.16b, v3.16b, v7.16b
443         bl              aes_decrypt_block4x
444         eor             v3.16b, v3.16b, v7.16b
445         eor             v0.16b, v0.16b, v4.16b
446         eor             v1.16b, v1.16b, v5.16b
447         eor             v2.16b, v2.16b, v6.16b
448         st1             {v0.16b-v3.16b}, [x0], #64
449         mov             v4.16b, v7.16b
450         cbz             w4, .Lxtsdecout
451         xts_reload_mask v8
452         b               .LxtsdecloopNx
453 .Lxtsdec1x:
454         adds            w4, w4, #4
455         beq             .Lxtsdecout
456 .Lxtsdecloop:
457         ld1             {v1.16b}, [x1], #16
458         eor             v0.16b, v1.16b, v4.16b
459         decrypt_block   v0, w3, x2, x8, w7
460         eor             v0.16b, v0.16b, v4.16b
461         st1             {v0.16b}, [x0], #16
462         subs            w4, w4, #1
463         beq             .Lxtsdecout
464         next_tweak      v4, v4, v8
465         b               .Lxtsdecloop
466 .Lxtsdecout:
467         st1             {v4.16b}, [x6]
468         ldp             x29, x30, [sp], #16
469         ret
470 AES_ENDPROC(aes_xts_decrypt)
471
472         /*
473          * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
474          *                int blocks, u8 dg[], int enc_before, int enc_after)
475          */
476 AES_ENTRY(aes_mac_update)
477         frame_push      6
478
479         mov             x19, x0
480         mov             x20, x1
481         mov             x21, x2
482         mov             x22, x3
483         mov             x23, x4
484         mov             x24, x6
485
486         ld1             {v0.16b}, [x23]                 /* get dg */
487         enc_prepare     w2, x1, x7
488         cbz             w5, .Lmacloop4x
489
490         encrypt_block   v0, w2, x1, x7, w8
491
492 .Lmacloop4x:
493         subs            w22, w22, #4
494         bmi             .Lmac1x
495         ld1             {v1.16b-v4.16b}, [x19], #64     /* get next pt block */
496         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
497         encrypt_block   v0, w21, x20, x7, w8
498         eor             v0.16b, v0.16b, v2.16b
499         encrypt_block   v0, w21, x20, x7, w8
500         eor             v0.16b, v0.16b, v3.16b
501         encrypt_block   v0, w21, x20, x7, w8
502         eor             v0.16b, v0.16b, v4.16b
503         cmp             w22, wzr
504         csinv           x5, x24, xzr, eq
505         cbz             w5, .Lmacout
506         encrypt_block   v0, w21, x20, x7, w8
507         st1             {v0.16b}, [x23]                 /* return dg */
508         cond_yield_neon .Lmacrestart
509         b               .Lmacloop4x
510 .Lmac1x:
511         add             w22, w22, #4
512 .Lmacloop:
513         cbz             w22, .Lmacout
514         ld1             {v1.16b}, [x19], #16            /* get next pt block */
515         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
516
517         subs            w22, w22, #1
518         csinv           x5, x24, xzr, eq
519         cbz             w5, .Lmacout
520
521 .Lmacenc:
522         encrypt_block   v0, w21, x20, x7, w8
523         b               .Lmacloop
524
525 .Lmacout:
526         st1             {v0.16b}, [x23]                 /* return dg */
527         frame_pop
528         ret
529
530 .Lmacrestart:
531         ld1             {v0.16b}, [x23]                 /* get dg */
532         enc_prepare     w21, x20, x0
533         b               .Lmacloop4x
534 AES_ENDPROC(aes_mac_update)