Merge branch 'i2c/make_remove_callback_void-immutable' of git://git.kernel.org/pub...
[sfrench/cifs-2.6.git] / arch / arm64 / crypto / aes-neon.S
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
4  *
5  * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
6  */
7
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
10
11 #define AES_FUNC_START(func)            SYM_FUNC_START(neon_ ## func)
12 #define AES_FUNC_END(func)              SYM_FUNC_END(neon_ ## func)
13
14         xtsmask         .req    v7
15         cbciv           .req    v7
16         vctr            .req    v4
17
18         .macro          xts_reload_mask, tmp
19         xts_load_mask   \tmp
20         .endm
21
22         /* special case for the neon-bs driver calling into this one for CTS */
23         .macro          xts_cts_skip_tw, reg, lbl
24         tbnz            \reg, #1, \lbl
25         .endm
26
27         /* multiply by polynomial 'x' in GF(2^8) */
28         .macro          mul_by_x, out, in, temp, const
29         sshr            \temp, \in, #7
30         shl             \out, \in, #1
31         and             \temp, \temp, \const
32         eor             \out, \out, \temp
33         .endm
34
35         /* multiply by polynomial 'x^2' in GF(2^8) */
36         .macro          mul_by_x2, out, in, temp, const
37         ushr            \temp, \in, #6
38         shl             \out, \in, #2
39         pmul            \temp, \temp, \const
40         eor             \out, \out, \temp
41         .endm
42
43         /* preload the entire Sbox */
44         .macro          prepare, sbox, shiftrows, temp
45         movi            v12.16b, #0x1b
46         ldr_l           q13, \shiftrows, \temp
47         ldr_l           q14, .Lror32by8, \temp
48         adr_l           \temp, \sbox
49         ld1             {v16.16b-v19.16b}, [\temp], #64
50         ld1             {v20.16b-v23.16b}, [\temp], #64
51         ld1             {v24.16b-v27.16b}, [\temp], #64
52         ld1             {v28.16b-v31.16b}, [\temp]
53         .endm
54
55         /* do preload for encryption */
56         .macro          enc_prepare, ignore0, ignore1, temp
57         prepare         crypto_aes_sbox, .LForward_ShiftRows, \temp
58         .endm
59
60         .macro          enc_switch_key, ignore0, ignore1, temp
61         /* do nothing */
62         .endm
63
64         /* do preload for decryption */
65         .macro          dec_prepare, ignore0, ignore1, temp
66         prepare         crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
67         .endm
68
69         /* apply SubBytes transformation using the preloaded Sbox */
70         .macro          sub_bytes, in
71         sub             v9.16b, \in\().16b, v15.16b
72         tbl             \in\().16b, {v16.16b-v19.16b}, \in\().16b
73         sub             v10.16b, v9.16b, v15.16b
74         tbx             \in\().16b, {v20.16b-v23.16b}, v9.16b
75         sub             v11.16b, v10.16b, v15.16b
76         tbx             \in\().16b, {v24.16b-v27.16b}, v10.16b
77         tbx             \in\().16b, {v28.16b-v31.16b}, v11.16b
78         .endm
79
80         /* apply MixColumns transformation */
81         .macro          mix_columns, in, enc
82         .if             \enc == 0
83         /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
84         mul_by_x2       v8.16b, \in\().16b, v9.16b, v12.16b
85         eor             \in\().16b, \in\().16b, v8.16b
86         rev32           v8.8h, v8.8h
87         eor             \in\().16b, \in\().16b, v8.16b
88         .endif
89
90         mul_by_x        v9.16b, \in\().16b, v8.16b, v12.16b
91         rev32           v8.8h, \in\().8h
92         eor             v8.16b, v8.16b, v9.16b
93         eor             \in\().16b, \in\().16b, v8.16b
94         tbl             \in\().16b, {\in\().16b}, v14.16b
95         eor             \in\().16b, \in\().16b, v8.16b
96         .endm
97
98         .macro          do_block, enc, in, rounds, rk, rkp, i
99         ld1             {v15.4s}, [\rk]
100         add             \rkp, \rk, #16
101         mov             \i, \rounds
102 1111:   eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
103         movi            v15.16b, #0x40
104         tbl             \in\().16b, {\in\().16b}, v13.16b       /* ShiftRows */
105         sub_bytes       \in
106         subs            \i, \i, #1
107         ld1             {v15.4s}, [\rkp], #16
108         beq             2222f
109         mix_columns     \in, \enc
110         b               1111b
111 2222:   eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
112         .endm
113
114         .macro          encrypt_block, in, rounds, rk, rkp, i
115         do_block        1, \in, \rounds, \rk, \rkp, \i
116         .endm
117
118         .macro          decrypt_block, in, rounds, rk, rkp, i
119         do_block        0, \in, \rounds, \rk, \rkp, \i
120         .endm
121
122         /*
123          * Interleaved versions: functionally equivalent to the
124          * ones above, but applied to AES states in parallel.
125          */
126
127         .macro          sub_bytes_4x, in0, in1, in2, in3
128         sub             v8.16b, \in0\().16b, v15.16b
129         tbl             \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
130         sub             v9.16b, \in1\().16b, v15.16b
131         tbl             \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
132         sub             v10.16b, \in2\().16b, v15.16b
133         tbl             \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
134         sub             v11.16b, \in3\().16b, v15.16b
135         tbl             \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
136         tbx             \in0\().16b, {v20.16b-v23.16b}, v8.16b
137         tbx             \in1\().16b, {v20.16b-v23.16b}, v9.16b
138         sub             v8.16b, v8.16b, v15.16b
139         tbx             \in2\().16b, {v20.16b-v23.16b}, v10.16b
140         sub             v9.16b, v9.16b, v15.16b
141         tbx             \in3\().16b, {v20.16b-v23.16b}, v11.16b
142         sub             v10.16b, v10.16b, v15.16b
143         tbx             \in0\().16b, {v24.16b-v27.16b}, v8.16b
144         sub             v11.16b, v11.16b, v15.16b
145         tbx             \in1\().16b, {v24.16b-v27.16b}, v9.16b
146         sub             v8.16b, v8.16b, v15.16b
147         tbx             \in2\().16b, {v24.16b-v27.16b}, v10.16b
148         sub             v9.16b, v9.16b, v15.16b
149         tbx             \in3\().16b, {v24.16b-v27.16b}, v11.16b
150         sub             v10.16b, v10.16b, v15.16b
151         tbx             \in0\().16b, {v28.16b-v31.16b}, v8.16b
152         sub             v11.16b, v11.16b, v15.16b
153         tbx             \in1\().16b, {v28.16b-v31.16b}, v9.16b
154         tbx             \in2\().16b, {v28.16b-v31.16b}, v10.16b
155         tbx             \in3\().16b, {v28.16b-v31.16b}, v11.16b
156         .endm
157
158         .macro          mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
159         sshr            \tmp0\().16b, \in0\().16b, #7
160         shl             \out0\().16b, \in0\().16b, #1
161         sshr            \tmp1\().16b, \in1\().16b, #7
162         and             \tmp0\().16b, \tmp0\().16b, \const\().16b
163         shl             \out1\().16b, \in1\().16b, #1
164         and             \tmp1\().16b, \tmp1\().16b, \const\().16b
165         eor             \out0\().16b, \out0\().16b, \tmp0\().16b
166         eor             \out1\().16b, \out1\().16b, \tmp1\().16b
167         .endm
168
169         .macro          mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
170         ushr            \tmp0\().16b, \in0\().16b, #6
171         shl             \out0\().16b, \in0\().16b, #2
172         ushr            \tmp1\().16b, \in1\().16b, #6
173         pmul            \tmp0\().16b, \tmp0\().16b, \const\().16b
174         shl             \out1\().16b, \in1\().16b, #2
175         pmul            \tmp1\().16b, \tmp1\().16b, \const\().16b
176         eor             \out0\().16b, \out0\().16b, \tmp0\().16b
177         eor             \out1\().16b, \out1\().16b, \tmp1\().16b
178         .endm
179
180         .macro          mix_columns_2x, in0, in1, enc
181         .if             \enc == 0
182         /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
183         mul_by_x2_2x    v8, v9, \in0, \in1, v10, v11, v12
184         eor             \in0\().16b, \in0\().16b, v8.16b
185         rev32           v8.8h, v8.8h
186         eor             \in1\().16b, \in1\().16b, v9.16b
187         rev32           v9.8h, v9.8h
188         eor             \in0\().16b, \in0\().16b, v8.16b
189         eor             \in1\().16b, \in1\().16b, v9.16b
190         .endif
191
192         mul_by_x_2x     v8, v9, \in0, \in1, v10, v11, v12
193         rev32           v10.8h, \in0\().8h
194         rev32           v11.8h, \in1\().8h
195         eor             v10.16b, v10.16b, v8.16b
196         eor             v11.16b, v11.16b, v9.16b
197         eor             \in0\().16b, \in0\().16b, v10.16b
198         eor             \in1\().16b, \in1\().16b, v11.16b
199         tbl             \in0\().16b, {\in0\().16b}, v14.16b
200         tbl             \in1\().16b, {\in1\().16b}, v14.16b
201         eor             \in0\().16b, \in0\().16b, v10.16b
202         eor             \in1\().16b, \in1\().16b, v11.16b
203         .endm
204
205         .macro          do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
206         ld1             {v15.4s}, [\rk]
207         add             \rkp, \rk, #16
208         mov             \i, \rounds
209 1111:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
210         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
211         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
212         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
213         movi            v15.16b, #0x40
214         tbl             \in0\().16b, {\in0\().16b}, v13.16b     /* ShiftRows */
215         tbl             \in1\().16b, {\in1\().16b}, v13.16b     /* ShiftRows */
216         tbl             \in2\().16b, {\in2\().16b}, v13.16b     /* ShiftRows */
217         tbl             \in3\().16b, {\in3\().16b}, v13.16b     /* ShiftRows */
218         sub_bytes_4x    \in0, \in1, \in2, \in3
219         subs            \i, \i, #1
220         ld1             {v15.4s}, [\rkp], #16
221         beq             2222f
222         mix_columns_2x  \in0, \in1, \enc
223         mix_columns_2x  \in2, \in3, \enc
224         b               1111b
225 2222:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
226         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
227         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
228         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
229         .endm
230
231         .macro          encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
232         do_block_4x     1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
233         .endm
234
235         .macro          decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
236         do_block_4x     0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
237         .endm
238
239 #include "aes-modes.S"
240
241         .section        ".rodata", "a"
242         .align          4
243 .LForward_ShiftRows:
244         .octa           0x0b06010c07020d08030e09040f0a0500
245
246 .LReverse_ShiftRows:
247         .octa           0x0306090c0f0205080b0e0104070a0d00
248
249 .Lror32by8:
250         .octa           0x0c0f0e0d080b0a090407060500030201