Merge ../to-linus-stable/
[sfrench/cifs-2.6.git] / arch / x86_64 / crypto / aes-x86_64-asm.S
1 /* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
2  *
3  * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
4  *
5  * License:
6  * This code can be distributed under the terms of the GNU General Public
7  * License (GPL) Version 2 provided that the above header down to and
8  * including this sentence is retained in full.
9  */
10
11 .extern aes_ft_tab
12 .extern aes_it_tab
13 .extern aes_fl_tab
14 .extern aes_il_tab
15
16 .text
17
18 #define R1      %rax
19 #define R1E     %eax
20 #define R1X     %ax
21 #define R1H     %ah
22 #define R1L     %al
23 #define R2      %rbx
24 #define R2E     %ebx
25 #define R2X     %bx
26 #define R2H     %bh
27 #define R2L     %bl
28 #define R3      %rcx
29 #define R3E     %ecx
30 #define R3X     %cx
31 #define R3H     %ch
32 #define R3L     %cl
33 #define R4      %rdx
34 #define R4E     %edx
35 #define R4X     %dx
36 #define R4H     %dh
37 #define R4L     %dl
38 #define R5      %rsi
39 #define R5E     %esi
40 #define R6      %rdi
41 #define R6E     %edi
42 #define R7      %rbp
43 #define R7E     %ebp
44 #define R8      %r8
45 #define R9      %r9
46 #define R10     %r10
47 #define R11     %r11
48
49 #define prologue(FUNC,BASE,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
50         .global FUNC;                   \
51         .type   FUNC,@function;         \
52         .align  8;                      \
53 FUNC:   movq    r1,r2;                  \
54         movq    r3,r4;                  \
55         leaq    BASE+52(r8),r9;         \
56         movq    r10,r11;                \
57         movl    (r7),r5 ## E;           \
58         movl    4(r7),r1 ## E;          \
59         movl    8(r7),r6 ## E;          \
60         movl    12(r7),r7 ## E;         \
61         movl    (r8),r10 ## E;          \
62         xorl    -48(r9),r5 ## E;        \
63         xorl    -44(r9),r1 ## E;        \
64         xorl    -40(r9),r6 ## E;        \
65         xorl    -36(r9),r7 ## E;        \
66         cmpl    $24,r10 ## E;           \
67         jb      B128;                   \
68         leaq    32(r9),r9;              \
69         je      B192;                   \
70         leaq    32(r9),r9;
71
72 #define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
73         movq    r1,r2;                  \
74         movq    r3,r4;                  \
75         movl    r5 ## E,(r9);           \
76         movl    r6 ## E,4(r9);          \
77         movl    r7 ## E,8(r9);          \
78         movl    r8 ## E,12(r9);         \
79         ret;
80
81 #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
82         movzbl  r2 ## H,r5 ## E;        \
83         movzbl  r2 ## L,r6 ## E;        \
84         movl    TAB+1024(,r5,4),r5 ## E;\
85         movw    r4 ## X,r2 ## X;        \
86         movl    TAB(,r6,4),r6 ## E;     \
87         roll    $16,r2 ## E;            \
88         shrl    $16,r4 ## E;            \
89         movzbl  r4 ## H,r7 ## E;        \
90         movzbl  r4 ## L,r4 ## E;        \
91         xorl    OFFSET(r8),ra ## E;     \
92         xorl    OFFSET+4(r8),rb ## E;   \
93         xorl    TAB+3072(,r7,4),r5 ## E;\
94         xorl    TAB+2048(,r4,4),r6 ## E;\
95         movzbl  r1 ## L,r7 ## E;        \
96         movzbl  r1 ## H,r4 ## E;        \
97         movl    TAB+1024(,r4,4),r4 ## E;\
98         movw    r3 ## X,r1 ## X;        \
99         roll    $16,r1 ## E;            \
100         shrl    $16,r3 ## E;            \
101         xorl    TAB(,r7,4),r5 ## E;     \
102         movzbl  r3 ## H,r7 ## E;        \
103         movzbl  r3 ## L,r3 ## E;        \
104         xorl    TAB+3072(,r7,4),r4 ## E;\
105         xorl    TAB+2048(,r3,4),r5 ## E;\
106         movzbl  r1 ## H,r7 ## E;        \
107         movzbl  r1 ## L,r3 ## E;        \
108         shrl    $16,r1 ## E;            \
109         xorl    TAB+3072(,r7,4),r6 ## E;\
110         movl    TAB+2048(,r3,4),r3 ## E;\
111         movzbl  r1 ## H,r7 ## E;        \
112         movzbl  r1 ## L,r1 ## E;        \
113         xorl    TAB+1024(,r7,4),r6 ## E;\
114         xorl    TAB(,r1,4),r3 ## E;     \
115         movzbl  r2 ## H,r1 ## E;        \
116         movzbl  r2 ## L,r7 ## E;        \
117         shrl    $16,r2 ## E;            \
118         xorl    TAB+3072(,r1,4),r3 ## E;\
119         xorl    TAB+2048(,r7,4),r4 ## E;\
120         movzbl  r2 ## H,r1 ## E;        \
121         movzbl  r2 ## L,r2 ## E;        \
122         xorl    OFFSET+8(r8),rc ## E;   \
123         xorl    OFFSET+12(r8),rd ## E;  \
124         xorl    TAB+1024(,r1,4),r3 ## E;\
125         xorl    TAB(,r2,4),r4 ## E;
126
127 #define move_regs(r1,r2,r3,r4) \
128         movl    r3 ## E,r1 ## E;        \
129         movl    r4 ## E,r2 ## E;
130
131 #define entry(FUNC,BASE,B128,B192) \
132         prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
133
134 #define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
135
136 #define encrypt_round(TAB,OFFSET) \
137         round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
138         move_regs(R1,R2,R5,R6)
139
140 #define encrypt_final(TAB,OFFSET) \
141         round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
142
143 #define decrypt_round(TAB,OFFSET) \
144         round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
145         move_regs(R1,R2,R5,R6)
146
147 #define decrypt_final(TAB,OFFSET) \
148         round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
149
150 /* void aes_encrypt(void *ctx, u8 *out, const u8 *in) */
151
152         entry(aes_encrypt,0,enc128,enc192)
153         encrypt_round(aes_ft_tab,-96)
154         encrypt_round(aes_ft_tab,-80)
155 enc192: encrypt_round(aes_ft_tab,-64)
156         encrypt_round(aes_ft_tab,-48)
157 enc128: encrypt_round(aes_ft_tab,-32)
158         encrypt_round(aes_ft_tab,-16)
159         encrypt_round(aes_ft_tab,  0)
160         encrypt_round(aes_ft_tab, 16)
161         encrypt_round(aes_ft_tab, 32)
162         encrypt_round(aes_ft_tab, 48)
163         encrypt_round(aes_ft_tab, 64)
164         encrypt_round(aes_ft_tab, 80)
165         encrypt_round(aes_ft_tab, 96)
166         encrypt_final(aes_fl_tab,112)
167         return
168
169 /* void aes_decrypt(void *ctx, u8 *out, const u8 *in) */
170
171         entry(aes_decrypt,240,dec128,dec192)
172         decrypt_round(aes_it_tab,-96)
173         decrypt_round(aes_it_tab,-80)
174 dec192: decrypt_round(aes_it_tab,-64)
175         decrypt_round(aes_it_tab,-48)
176 dec128: decrypt_round(aes_it_tab,-32)
177         decrypt_round(aes_it_tab,-16)
178         decrypt_round(aes_it_tab,  0)
179         decrypt_round(aes_it_tab, 16)
180         decrypt_round(aes_it_tab, 32)
181         decrypt_round(aes_it_tab, 48)
182         decrypt_round(aes_it_tab, 64)
183         decrypt_round(aes_it_tab, 80)
184         decrypt_round(aes_it_tab, 96)
185         decrypt_final(aes_il_tab,112)
186         return