treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 500
[sfrench/cifs-2.6.git] / arch / arm64 / crypto / aes-modes.S
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4  *
5  * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6  */
7
8 /* included by aes-ce.S and aes-neon.S */
9
10         .text
11         .align          4
12
13 aes_encrypt_block4x:
14         encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
15         ret
16 ENDPROC(aes_encrypt_block4x)
17
18 aes_decrypt_block4x:
19         decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
20         ret
21 ENDPROC(aes_decrypt_block4x)
22
23         /*
24          * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
25          *                 int blocks)
26          * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
27          *                 int blocks)
28          */
29
30 AES_ENTRY(aes_ecb_encrypt)
31         stp             x29, x30, [sp, #-16]!
32         mov             x29, sp
33
34         enc_prepare     w3, x2, x5
35
36 .LecbencloopNx:
37         subs            w4, w4, #4
38         bmi             .Lecbenc1x
39         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
40         bl              aes_encrypt_block4x
41         st1             {v0.16b-v3.16b}, [x0], #64
42         b               .LecbencloopNx
43 .Lecbenc1x:
44         adds            w4, w4, #4
45         beq             .Lecbencout
46 .Lecbencloop:
47         ld1             {v0.16b}, [x1], #16             /* get next pt block */
48         encrypt_block   v0, w3, x2, x5, w6
49         st1             {v0.16b}, [x0], #16
50         subs            w4, w4, #1
51         bne             .Lecbencloop
52 .Lecbencout:
53         ldp             x29, x30, [sp], #16
54         ret
55 AES_ENDPROC(aes_ecb_encrypt)
56
57
58 AES_ENTRY(aes_ecb_decrypt)
59         stp             x29, x30, [sp, #-16]!
60         mov             x29, sp
61
62         dec_prepare     w3, x2, x5
63
64 .LecbdecloopNx:
65         subs            w4, w4, #4
66         bmi             .Lecbdec1x
67         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
68         bl              aes_decrypt_block4x
69         st1             {v0.16b-v3.16b}, [x0], #64
70         b               .LecbdecloopNx
71 .Lecbdec1x:
72         adds            w4, w4, #4
73         beq             .Lecbdecout
74 .Lecbdecloop:
75         ld1             {v0.16b}, [x1], #16             /* get next ct block */
76         decrypt_block   v0, w3, x2, x5, w6
77         st1             {v0.16b}, [x0], #16
78         subs            w4, w4, #1
79         bne             .Lecbdecloop
80 .Lecbdecout:
81         ldp             x29, x30, [sp], #16
82         ret
83 AES_ENDPROC(aes_ecb_decrypt)
84
85
86         /*
87          * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
88          *                 int blocks, u8 iv[])
89          * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
90          *                 int blocks, u8 iv[])
91          */
92
93 AES_ENTRY(aes_cbc_encrypt)
94         ld1             {v4.16b}, [x5]                  /* get iv */
95         enc_prepare     w3, x2, x6
96
97 .Lcbcencloop4x:
98         subs            w4, w4, #4
99         bmi             .Lcbcenc1x
100         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
101         eor             v0.16b, v0.16b, v4.16b          /* ..and xor with iv */
102         encrypt_block   v0, w3, x2, x6, w7
103         eor             v1.16b, v1.16b, v0.16b
104         encrypt_block   v1, w3, x2, x6, w7
105         eor             v2.16b, v2.16b, v1.16b
106         encrypt_block   v2, w3, x2, x6, w7
107         eor             v3.16b, v3.16b, v2.16b
108         encrypt_block   v3, w3, x2, x6, w7
109         st1             {v0.16b-v3.16b}, [x0], #64
110         mov             v4.16b, v3.16b
111         b               .Lcbcencloop4x
112 .Lcbcenc1x:
113         adds            w4, w4, #4
114         beq             .Lcbcencout
115 .Lcbcencloop:
116         ld1             {v0.16b}, [x1], #16             /* get next pt block */
117         eor             v4.16b, v4.16b, v0.16b          /* ..and xor with iv */
118         encrypt_block   v4, w3, x2, x6, w7
119         st1             {v4.16b}, [x0], #16
120         subs            w4, w4, #1
121         bne             .Lcbcencloop
122 .Lcbcencout:
123         st1             {v4.16b}, [x5]                  /* return iv */
124         ret
125 AES_ENDPROC(aes_cbc_encrypt)
126
127
128 AES_ENTRY(aes_cbc_decrypt)
129         stp             x29, x30, [sp, #-16]!
130         mov             x29, sp
131
132         ld1             {v7.16b}, [x5]                  /* get iv */
133         dec_prepare     w3, x2, x6
134
135 .LcbcdecloopNx:
136         subs            w4, w4, #4
137         bmi             .Lcbcdec1x
138         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
139         mov             v4.16b, v0.16b
140         mov             v5.16b, v1.16b
141         mov             v6.16b, v2.16b
142         bl              aes_decrypt_block4x
143         sub             x1, x1, #16
144         eor             v0.16b, v0.16b, v7.16b
145         eor             v1.16b, v1.16b, v4.16b
146         ld1             {v7.16b}, [x1], #16             /* reload 1 ct block */
147         eor             v2.16b, v2.16b, v5.16b
148         eor             v3.16b, v3.16b, v6.16b
149         st1             {v0.16b-v3.16b}, [x0], #64
150         b               .LcbcdecloopNx
151 .Lcbcdec1x:
152         adds            w4, w4, #4
153         beq             .Lcbcdecout
154 .Lcbcdecloop:
155         ld1             {v1.16b}, [x1], #16             /* get next ct block */
156         mov             v0.16b, v1.16b                  /* ...and copy to v0 */
157         decrypt_block   v0, w3, x2, x6, w7
158         eor             v0.16b, v0.16b, v7.16b          /* xor with iv => pt */
159         mov             v7.16b, v1.16b                  /* ct is next iv */
160         st1             {v0.16b}, [x0], #16
161         subs            w4, w4, #1
162         bne             .Lcbcdecloop
163 .Lcbcdecout:
164         st1             {v7.16b}, [x5]                  /* return iv */
165         ldp             x29, x30, [sp], #16
166         ret
167 AES_ENDPROC(aes_cbc_decrypt)
168
169
170         /*
171          * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
172          *                     int rounds, int bytes, u8 const iv[])
173          * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
174          *                     int rounds, int bytes, u8 const iv[])
175          */
176
177 AES_ENTRY(aes_cbc_cts_encrypt)
178         adr_l           x8, .Lcts_permute_table
179         sub             x4, x4, #16
180         add             x9, x8, #32
181         add             x8, x8, x4
182         sub             x9, x9, x4
183         ld1             {v3.16b}, [x8]
184         ld1             {v4.16b}, [x9]
185
186         ld1             {v0.16b}, [x1], x4              /* overlapping loads */
187         ld1             {v1.16b}, [x1]
188
189         ld1             {v5.16b}, [x5]                  /* get iv */
190         enc_prepare     w3, x2, x6
191
192         eor             v0.16b, v0.16b, v5.16b          /* xor with iv */
193         tbl             v1.16b, {v1.16b}, v4.16b
194         encrypt_block   v0, w3, x2, x6, w7
195
196         eor             v1.16b, v1.16b, v0.16b
197         tbl             v0.16b, {v0.16b}, v3.16b
198         encrypt_block   v1, w3, x2, x6, w7
199
200         add             x4, x0, x4
201         st1             {v0.16b}, [x4]                  /* overlapping stores */
202         st1             {v1.16b}, [x0]
203         ret
204 AES_ENDPROC(aes_cbc_cts_encrypt)
205
206 AES_ENTRY(aes_cbc_cts_decrypt)
207         adr_l           x8, .Lcts_permute_table
208         sub             x4, x4, #16
209         add             x9, x8, #32
210         add             x8, x8, x4
211         sub             x9, x9, x4
212         ld1             {v3.16b}, [x8]
213         ld1             {v4.16b}, [x9]
214
215         ld1             {v0.16b}, [x1], x4              /* overlapping loads */
216         ld1             {v1.16b}, [x1]
217
218         ld1             {v5.16b}, [x5]                  /* get iv */
219         dec_prepare     w3, x2, x6
220
221         tbl             v2.16b, {v1.16b}, v4.16b
222         decrypt_block   v0, w3, x2, x6, w7
223         eor             v2.16b, v2.16b, v0.16b
224
225         tbx             v0.16b, {v1.16b}, v4.16b
226         tbl             v2.16b, {v2.16b}, v3.16b
227         decrypt_block   v0, w3, x2, x6, w7
228         eor             v0.16b, v0.16b, v5.16b          /* xor with iv */
229
230         add             x4, x0, x4
231         st1             {v2.16b}, [x4]                  /* overlapping stores */
232         st1             {v0.16b}, [x0]
233         ret
234 AES_ENDPROC(aes_cbc_cts_decrypt)
235
236         .section        ".rodata", "a"
237         .align          6
238 .Lcts_permute_table:
239         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
240         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
241         .byte            0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
242         .byte            0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
243         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
244         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
245         .previous
246
247
248         /*
249          * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
250          *                 int blocks, u8 ctr[])
251          */
252
253 AES_ENTRY(aes_ctr_encrypt)
254         stp             x29, x30, [sp, #-16]!
255         mov             x29, sp
256
257         enc_prepare     w3, x2, x6
258         ld1             {v4.16b}, [x5]
259
260         umov            x6, v4.d[1]             /* keep swabbed ctr in reg */
261         rev             x6, x6
262         cmn             w6, w4                  /* 32 bit overflow? */
263         bcs             .Lctrloop
264 .LctrloopNx:
265         subs            w4, w4, #4
266         bmi             .Lctr1x
267         add             w7, w6, #1
268         mov             v0.16b, v4.16b
269         add             w8, w6, #2
270         mov             v1.16b, v4.16b
271         add             w9, w6, #3
272         mov             v2.16b, v4.16b
273         rev             w7, w7
274         mov             v3.16b, v4.16b
275         rev             w8, w8
276         mov             v1.s[3], w7
277         rev             w9, w9
278         mov             v2.s[3], w8
279         mov             v3.s[3], w9
280         ld1             {v5.16b-v7.16b}, [x1], #48      /* get 3 input blocks */
281         bl              aes_encrypt_block4x
282         eor             v0.16b, v5.16b, v0.16b
283         ld1             {v5.16b}, [x1], #16             /* get 1 input block  */
284         eor             v1.16b, v6.16b, v1.16b
285         eor             v2.16b, v7.16b, v2.16b
286         eor             v3.16b, v5.16b, v3.16b
287         st1             {v0.16b-v3.16b}, [x0], #64
288         add             x6, x6, #4
289         rev             x7, x6
290         ins             v4.d[1], x7
291         cbz             w4, .Lctrout
292         b               .LctrloopNx
293 .Lctr1x:
294         adds            w4, w4, #4
295         beq             .Lctrout
296 .Lctrloop:
297         mov             v0.16b, v4.16b
298         encrypt_block   v0, w3, x2, x8, w7
299
300         adds            x6, x6, #1              /* increment BE ctr */
301         rev             x7, x6
302         ins             v4.d[1], x7
303         bcs             .Lctrcarry              /* overflow? */
304
305 .Lctrcarrydone:
306         subs            w4, w4, #1
307         bmi             .Lctrtailblock          /* blocks <0 means tail block */
308         ld1             {v3.16b}, [x1], #16
309         eor             v3.16b, v0.16b, v3.16b
310         st1             {v3.16b}, [x0], #16
311         bne             .Lctrloop
312
313 .Lctrout:
314         st1             {v4.16b}, [x5]          /* return next CTR value */
315         ldp             x29, x30, [sp], #16
316         ret
317
318 .Lctrtailblock:
319         st1             {v0.16b}, [x0]
320         b               .Lctrout
321
322 .Lctrcarry:
323         umov            x7, v4.d[0]             /* load upper word of ctr  */
324         rev             x7, x7                  /* ... to handle the carry */
325         add             x7, x7, #1
326         rev             x7, x7
327         ins             v4.d[0], x7
328         b               .Lctrcarrydone
329 AES_ENDPROC(aes_ctr_encrypt)
330
331
332         /*
333          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
334          *                 int blocks, u8 const rk2[], u8 iv[], int first)
335          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
336          *                 int blocks, u8 const rk2[], u8 iv[], int first)
337          */
338
339         .macro          next_tweak, out, in, tmp
340         sshr            \tmp\().2d,  \in\().2d,   #63
341         and             \tmp\().16b, \tmp\().16b, xtsmask.16b
342         add             \out\().2d,  \in\().2d,   \in\().2d
343         ext             \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
344         eor             \out\().16b, \out\().16b, \tmp\().16b
345         .endm
346
347         .macro          xts_load_mask, tmp
348         movi            xtsmask.2s, #0x1
349         movi            \tmp\().2s, #0x87
350         uzp1            xtsmask.4s, xtsmask.4s, \tmp\().4s
351         .endm
352
353 AES_ENTRY(aes_xts_encrypt)
354         stp             x29, x30, [sp, #-16]!
355         mov             x29, sp
356
357         ld1             {v4.16b}, [x6]
358         xts_load_mask   v8
359         cbz             w7, .Lxtsencnotfirst
360
361         enc_prepare     w3, x5, x8
362         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
363         enc_switch_key  w3, x2, x8
364         b               .LxtsencNx
365
366 .Lxtsencnotfirst:
367         enc_prepare     w3, x2, x8
368 .LxtsencloopNx:
369         next_tweak      v4, v4, v8
370 .LxtsencNx:
371         subs            w4, w4, #4
372         bmi             .Lxtsenc1x
373         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
374         next_tweak      v5, v4, v8
375         eor             v0.16b, v0.16b, v4.16b
376         next_tweak      v6, v5, v8
377         eor             v1.16b, v1.16b, v5.16b
378         eor             v2.16b, v2.16b, v6.16b
379         next_tweak      v7, v6, v8
380         eor             v3.16b, v3.16b, v7.16b
381         bl              aes_encrypt_block4x
382         eor             v3.16b, v3.16b, v7.16b
383         eor             v0.16b, v0.16b, v4.16b
384         eor             v1.16b, v1.16b, v5.16b
385         eor             v2.16b, v2.16b, v6.16b
386         st1             {v0.16b-v3.16b}, [x0], #64
387         mov             v4.16b, v7.16b
388         cbz             w4, .Lxtsencout
389         xts_reload_mask v8
390         b               .LxtsencloopNx
391 .Lxtsenc1x:
392         adds            w4, w4, #4
393         beq             .Lxtsencout
394 .Lxtsencloop:
395         ld1             {v1.16b}, [x1], #16
396         eor             v0.16b, v1.16b, v4.16b
397         encrypt_block   v0, w3, x2, x8, w7
398         eor             v0.16b, v0.16b, v4.16b
399         st1             {v0.16b}, [x0], #16
400         subs            w4, w4, #1
401         beq             .Lxtsencout
402         next_tweak      v4, v4, v8
403         b               .Lxtsencloop
404 .Lxtsencout:
405         st1             {v4.16b}, [x6]
406         ldp             x29, x30, [sp], #16
407         ret
408 AES_ENDPROC(aes_xts_encrypt)
409
410
411 AES_ENTRY(aes_xts_decrypt)
412         stp             x29, x30, [sp, #-16]!
413         mov             x29, sp
414
415         ld1             {v4.16b}, [x6]
416         xts_load_mask   v8
417         cbz             w7, .Lxtsdecnotfirst
418
419         enc_prepare     w3, x5, x8
420         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
421         dec_prepare     w3, x2, x8
422         b               .LxtsdecNx
423
424 .Lxtsdecnotfirst:
425         dec_prepare     w3, x2, x8
426 .LxtsdecloopNx:
427         next_tweak      v4, v4, v8
428 .LxtsdecNx:
429         subs            w4, w4, #4
430         bmi             .Lxtsdec1x
431         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
432         next_tweak      v5, v4, v8
433         eor             v0.16b, v0.16b, v4.16b
434         next_tweak      v6, v5, v8
435         eor             v1.16b, v1.16b, v5.16b
436         eor             v2.16b, v2.16b, v6.16b
437         next_tweak      v7, v6, v8
438         eor             v3.16b, v3.16b, v7.16b
439         bl              aes_decrypt_block4x
440         eor             v3.16b, v3.16b, v7.16b
441         eor             v0.16b, v0.16b, v4.16b
442         eor             v1.16b, v1.16b, v5.16b
443         eor             v2.16b, v2.16b, v6.16b
444         st1             {v0.16b-v3.16b}, [x0], #64
445         mov             v4.16b, v7.16b
446         cbz             w4, .Lxtsdecout
447         xts_reload_mask v8
448         b               .LxtsdecloopNx
449 .Lxtsdec1x:
450         adds            w4, w4, #4
451         beq             .Lxtsdecout
452 .Lxtsdecloop:
453         ld1             {v1.16b}, [x1], #16
454         eor             v0.16b, v1.16b, v4.16b
455         decrypt_block   v0, w3, x2, x8, w7
456         eor             v0.16b, v0.16b, v4.16b
457         st1             {v0.16b}, [x0], #16
458         subs            w4, w4, #1
459         beq             .Lxtsdecout
460         next_tweak      v4, v4, v8
461         b               .Lxtsdecloop
462 .Lxtsdecout:
463         st1             {v4.16b}, [x6]
464         ldp             x29, x30, [sp], #16
465         ret
466 AES_ENDPROC(aes_xts_decrypt)
467
468         /*
469          * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
470          *                int blocks, u8 dg[], int enc_before, int enc_after)
471          */
472 AES_ENTRY(aes_mac_update)
473         frame_push      6
474
475         mov             x19, x0
476         mov             x20, x1
477         mov             x21, x2
478         mov             x22, x3
479         mov             x23, x4
480         mov             x24, x6
481
482         ld1             {v0.16b}, [x23]                 /* get dg */
483         enc_prepare     w2, x1, x7
484         cbz             w5, .Lmacloop4x
485
486         encrypt_block   v0, w2, x1, x7, w8
487
488 .Lmacloop4x:
489         subs            w22, w22, #4
490         bmi             .Lmac1x
491         ld1             {v1.16b-v4.16b}, [x19], #64     /* get next pt block */
492         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
493         encrypt_block   v0, w21, x20, x7, w8
494         eor             v0.16b, v0.16b, v2.16b
495         encrypt_block   v0, w21, x20, x7, w8
496         eor             v0.16b, v0.16b, v3.16b
497         encrypt_block   v0, w21, x20, x7, w8
498         eor             v0.16b, v0.16b, v4.16b
499         cmp             w22, wzr
500         csinv           x5, x24, xzr, eq
501         cbz             w5, .Lmacout
502         encrypt_block   v0, w21, x20, x7, w8
503         st1             {v0.16b}, [x23]                 /* return dg */
504         cond_yield_neon .Lmacrestart
505         b               .Lmacloop4x
506 .Lmac1x:
507         add             w22, w22, #4
508 .Lmacloop:
509         cbz             w22, .Lmacout
510         ld1             {v1.16b}, [x19], #16            /* get next pt block */
511         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
512
513         subs            w22, w22, #1
514         csinv           x5, x24, xzr, eq
515         cbz             w5, .Lmacout
516
517 .Lmacenc:
518         encrypt_block   v0, w21, x20, x7, w8
519         b               .Lmacloop
520
521 .Lmacout:
522         st1             {v0.16b}, [x23]                 /* return dg */
523         frame_pop
524         ret
525
526 .Lmacrestart:
527         ld1             {v0.16b}, [x23]                 /* get dg */
528         enc_prepare     w21, x20, x0
529         b               .Lmacloop4x
530 AES_ENDPROC(aes_mac_update)