Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
[sfrench/cifs-2.6.git] / arch / arm / crypto / sha1-armv4-large.S
1 #define __ARM_ARCH__ __LINUX_ARM_ARCH__
2 @ SPDX-License-Identifier: GPL-2.0
3
4 @ This code is taken from the OpenSSL project but the author (Andy Polyakov)
5 @ has relicensed it under the GPLv2. Therefore this program is free software;
6 @ you can redistribute it and/or modify it under the terms of the GNU General
7 @ Public License version 2 as published by the Free Software Foundation.
8 @
9 @ The original headers, including the original license headers, are
10 @ included below for completeness.
11
12 @ ====================================================================
13 @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
14 @ project. The module is, however, dual licensed under OpenSSL and
15 @ CRYPTOGAMS licenses depending on where you obtain it. For further
16 @ details see http://www.openssl.org/~appro/cryptogams/.
17 @ ====================================================================
18
19 @ sha1_block procedure for ARMv4.
20 @
21 @ January 2007.
22
23 @ Size/performance trade-off
24 @ ====================================================================
25 @ impl          size in bytes   comp cycles[*]  measured performance
26 @ ====================================================================
27 @ thumb         304             3212            4420
28 @ armv4-small   392/+29%        1958/+64%       2250/+96%
29 @ armv4-compact 740/+89%        1552/+26%       1840/+22%
30 @ armv4-large   1420/+92%       1307/+19%       1370/+34%[***]
31 @ full unroll   ~5100/+260%     ~1260/+4%       ~1300/+5%
32 @ ====================================================================
33 @ thumb         = same as 'small' but in Thumb instructions[**] and
34 @                 with recurring code in two private functions;
35 @ small         = detached Xload/update, loops are folded;
36 @ compact       = detached Xload/update, 5x unroll;
37 @ large         = interleaved Xload/update, 5x unroll;
38 @ full unroll   = interleaved Xload/update, full unroll, estimated[!];
39 @
40 @ [*]   Manually counted instructions in "grand" loop body. Measured
41 @       performance is affected by prologue and epilogue overhead,
42 @       i-cache availability, branch penalties, etc.
43 @ [**]  While each Thumb instruction is twice smaller, they are not as
44 @       diverse as ARM ones: e.g., there are only two arithmetic
45 @       instructions with 3 arguments, no [fixed] rotate, addressing
46 @       modes are limited. As result it takes more instructions to do
47 @       the same job in Thumb, therefore the code is never twice as
48 @       small and always slower.
49 @ [***] which is also ~35% better than compiler generated code. Dual-
50 @       issue Cortex A8 core was measured to process input block in
51 @       ~990 cycles.
52
53 @ August 2010.
54 @
55 @ Rescheduling for dual-issue pipeline resulted in 13% improvement on
56 @ Cortex A8 core and in absolute terms ~870 cycles per input block
57 @ [or 13.6 cycles per byte].
58
59 @ February 2011.
60 @
61 @ Profiler-assisted and platform-specific optimization resulted in 10%
62 @ improvement on Cortex A8 core and 12.2 cycles per byte.
63
64 #include <linux/linkage.h>
65
66 .text
67
68 .align  2
69 ENTRY(sha1_block_data_order)
70         stmdb   sp!,{r4-r12,lr}
71         add     r2,r1,r2,lsl#6  @ r2 to point at the end of r1
72         ldmia   r0,{r3,r4,r5,r6,r7}
73 .Lloop:
74         ldr     r8,.LK_00_19
75         mov     r14,sp
76         sub     sp,sp,#15*4
77         mov     r5,r5,ror#30
78         mov     r6,r6,ror#30
79         mov     r7,r7,ror#30            @ [6]
80 .L_00_15:
81 #if __ARM_ARCH__<7
82         ldrb    r10,[r1,#2]
83         ldrb    r9,[r1,#3]
84         ldrb    r11,[r1,#1]
85         add     r7,r8,r7,ror#2                  @ E+=K_00_19
86         ldrb    r12,[r1],#4
87         orr     r9,r9,r10,lsl#8
88         eor     r10,r5,r6                       @ F_xx_xx
89         orr     r9,r9,r11,lsl#16
90         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
91         orr     r9,r9,r12,lsl#24
92 #else
93         ldr     r9,[r1],#4                      @ handles unaligned
94         add     r7,r8,r7,ror#2                  @ E+=K_00_19
95         eor     r10,r5,r6                       @ F_xx_xx
96         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
97 #ifdef __ARMEL__
98         rev     r9,r9                           @ byte swap
99 #endif
100 #endif
101         and     r10,r4,r10,ror#2
102         add     r7,r7,r9                        @ E+=X[i]
103         eor     r10,r10,r6,ror#2                @ F_00_19(B,C,D)
104         str     r9,[r14,#-4]!
105         add     r7,r7,r10                       @ E+=F_00_19(B,C,D)
106 #if __ARM_ARCH__<7
107         ldrb    r10,[r1,#2]
108         ldrb    r9,[r1,#3]
109         ldrb    r11,[r1,#1]
110         add     r6,r8,r6,ror#2                  @ E+=K_00_19
111         ldrb    r12,[r1],#4
112         orr     r9,r9,r10,lsl#8
113         eor     r10,r4,r5                       @ F_xx_xx
114         orr     r9,r9,r11,lsl#16
115         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
116         orr     r9,r9,r12,lsl#24
117 #else
118         ldr     r9,[r1],#4                      @ handles unaligned
119         add     r6,r8,r6,ror#2                  @ E+=K_00_19
120         eor     r10,r4,r5                       @ F_xx_xx
121         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
122 #ifdef __ARMEL__
123         rev     r9,r9                           @ byte swap
124 #endif
125 #endif
126         and     r10,r3,r10,ror#2
127         add     r6,r6,r9                        @ E+=X[i]
128         eor     r10,r10,r5,ror#2                @ F_00_19(B,C,D)
129         str     r9,[r14,#-4]!
130         add     r6,r6,r10                       @ E+=F_00_19(B,C,D)
131 #if __ARM_ARCH__<7
132         ldrb    r10,[r1,#2]
133         ldrb    r9,[r1,#3]
134         ldrb    r11,[r1,#1]
135         add     r5,r8,r5,ror#2                  @ E+=K_00_19
136         ldrb    r12,[r1],#4
137         orr     r9,r9,r10,lsl#8
138         eor     r10,r3,r4                       @ F_xx_xx
139         orr     r9,r9,r11,lsl#16
140         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
141         orr     r9,r9,r12,lsl#24
142 #else
143         ldr     r9,[r1],#4                      @ handles unaligned
144         add     r5,r8,r5,ror#2                  @ E+=K_00_19
145         eor     r10,r3,r4                       @ F_xx_xx
146         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
147 #ifdef __ARMEL__
148         rev     r9,r9                           @ byte swap
149 #endif
150 #endif
151         and     r10,r7,r10,ror#2
152         add     r5,r5,r9                        @ E+=X[i]
153         eor     r10,r10,r4,ror#2                @ F_00_19(B,C,D)
154         str     r9,[r14,#-4]!
155         add     r5,r5,r10                       @ E+=F_00_19(B,C,D)
156 #if __ARM_ARCH__<7
157         ldrb    r10,[r1,#2]
158         ldrb    r9,[r1,#3]
159         ldrb    r11,[r1,#1]
160         add     r4,r8,r4,ror#2                  @ E+=K_00_19
161         ldrb    r12,[r1],#4
162         orr     r9,r9,r10,lsl#8
163         eor     r10,r7,r3                       @ F_xx_xx
164         orr     r9,r9,r11,lsl#16
165         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
166         orr     r9,r9,r12,lsl#24
167 #else
168         ldr     r9,[r1],#4                      @ handles unaligned
169         add     r4,r8,r4,ror#2                  @ E+=K_00_19
170         eor     r10,r7,r3                       @ F_xx_xx
171         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
172 #ifdef __ARMEL__
173         rev     r9,r9                           @ byte swap
174 #endif
175 #endif
176         and     r10,r6,r10,ror#2
177         add     r4,r4,r9                        @ E+=X[i]
178         eor     r10,r10,r3,ror#2                @ F_00_19(B,C,D)
179         str     r9,[r14,#-4]!
180         add     r4,r4,r10                       @ E+=F_00_19(B,C,D)
181 #if __ARM_ARCH__<7
182         ldrb    r10,[r1,#2]
183         ldrb    r9,[r1,#3]
184         ldrb    r11,[r1,#1]
185         add     r3,r8,r3,ror#2                  @ E+=K_00_19
186         ldrb    r12,[r1],#4
187         orr     r9,r9,r10,lsl#8
188         eor     r10,r6,r7                       @ F_xx_xx
189         orr     r9,r9,r11,lsl#16
190         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
191         orr     r9,r9,r12,lsl#24
192 #else
193         ldr     r9,[r1],#4                      @ handles unaligned
194         add     r3,r8,r3,ror#2                  @ E+=K_00_19
195         eor     r10,r6,r7                       @ F_xx_xx
196         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
197 #ifdef __ARMEL__
198         rev     r9,r9                           @ byte swap
199 #endif
200 #endif
201         and     r10,r5,r10,ror#2
202         add     r3,r3,r9                        @ E+=X[i]
203         eor     r10,r10,r7,ror#2                @ F_00_19(B,C,D)
204         str     r9,[r14,#-4]!
205         add     r3,r3,r10                       @ E+=F_00_19(B,C,D)
206         cmp     r14,sp
207         bne     .L_00_15                @ [((11+4)*5+2)*3]
208         sub     sp,sp,#25*4
209 #if __ARM_ARCH__<7
210         ldrb    r10,[r1,#2]
211         ldrb    r9,[r1,#3]
212         ldrb    r11,[r1,#1]
213         add     r7,r8,r7,ror#2                  @ E+=K_00_19
214         ldrb    r12,[r1],#4
215         orr     r9,r9,r10,lsl#8
216         eor     r10,r5,r6                       @ F_xx_xx
217         orr     r9,r9,r11,lsl#16
218         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
219         orr     r9,r9,r12,lsl#24
220 #else
221         ldr     r9,[r1],#4                      @ handles unaligned
222         add     r7,r8,r7,ror#2                  @ E+=K_00_19
223         eor     r10,r5,r6                       @ F_xx_xx
224         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
225 #ifdef __ARMEL__
226         rev     r9,r9                           @ byte swap
227 #endif
228 #endif
229         and     r10,r4,r10,ror#2
230         add     r7,r7,r9                        @ E+=X[i]
231         eor     r10,r10,r6,ror#2                @ F_00_19(B,C,D)
232         str     r9,[r14,#-4]!
233         add     r7,r7,r10                       @ E+=F_00_19(B,C,D)
234         ldr     r9,[r14,#15*4]
235         ldr     r10,[r14,#13*4]
236         ldr     r11,[r14,#7*4]
237         add     r6,r8,r6,ror#2                  @ E+=K_xx_xx
238         ldr     r12,[r14,#2*4]
239         eor     r9,r9,r10
240         eor     r11,r11,r12                     @ 1 cycle stall
241         eor     r10,r4,r5                       @ F_xx_xx
242         mov     r9,r9,ror#31
243         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
244         eor     r9,r9,r11,ror#31
245         str     r9,[r14,#-4]!
246         and r10,r3,r10,ror#2                                    @ F_xx_xx
247                                                 @ F_xx_xx
248         add     r6,r6,r9                        @ E+=X[i]
249         eor     r10,r10,r5,ror#2                @ F_00_19(B,C,D)
250         add     r6,r6,r10                       @ E+=F_00_19(B,C,D)
251         ldr     r9,[r14,#15*4]
252         ldr     r10,[r14,#13*4]
253         ldr     r11,[r14,#7*4]
254         add     r5,r8,r5,ror#2                  @ E+=K_xx_xx
255         ldr     r12,[r14,#2*4]
256         eor     r9,r9,r10
257         eor     r11,r11,r12                     @ 1 cycle stall
258         eor     r10,r3,r4                       @ F_xx_xx
259         mov     r9,r9,ror#31
260         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
261         eor     r9,r9,r11,ror#31
262         str     r9,[r14,#-4]!
263         and r10,r7,r10,ror#2                                    @ F_xx_xx
264                                                 @ F_xx_xx
265         add     r5,r5,r9                        @ E+=X[i]
266         eor     r10,r10,r4,ror#2                @ F_00_19(B,C,D)
267         add     r5,r5,r10                       @ E+=F_00_19(B,C,D)
268         ldr     r9,[r14,#15*4]
269         ldr     r10,[r14,#13*4]
270         ldr     r11,[r14,#7*4]
271         add     r4,r8,r4,ror#2                  @ E+=K_xx_xx
272         ldr     r12,[r14,#2*4]
273         eor     r9,r9,r10
274         eor     r11,r11,r12                     @ 1 cycle stall
275         eor     r10,r7,r3                       @ F_xx_xx
276         mov     r9,r9,ror#31
277         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
278         eor     r9,r9,r11,ror#31
279         str     r9,[r14,#-4]!
280         and r10,r6,r10,ror#2                                    @ F_xx_xx
281                                                 @ F_xx_xx
282         add     r4,r4,r9                        @ E+=X[i]
283         eor     r10,r10,r3,ror#2                @ F_00_19(B,C,D)
284         add     r4,r4,r10                       @ E+=F_00_19(B,C,D)
285         ldr     r9,[r14,#15*4]
286         ldr     r10,[r14,#13*4]
287         ldr     r11,[r14,#7*4]
288         add     r3,r8,r3,ror#2                  @ E+=K_xx_xx
289         ldr     r12,[r14,#2*4]
290         eor     r9,r9,r10
291         eor     r11,r11,r12                     @ 1 cycle stall
292         eor     r10,r6,r7                       @ F_xx_xx
293         mov     r9,r9,ror#31
294         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
295         eor     r9,r9,r11,ror#31
296         str     r9,[r14,#-4]!
297         and r10,r5,r10,ror#2                                    @ F_xx_xx
298                                                 @ F_xx_xx
299         add     r3,r3,r9                        @ E+=X[i]
300         eor     r10,r10,r7,ror#2                @ F_00_19(B,C,D)
301         add     r3,r3,r10                       @ E+=F_00_19(B,C,D)
302
303         ldr     r8,.LK_20_39            @ [+15+16*4]
304         cmn     sp,#0                   @ [+3], clear carry to denote 20_39
305 .L_20_39_or_60_79:
306         ldr     r9,[r14,#15*4]
307         ldr     r10,[r14,#13*4]
308         ldr     r11,[r14,#7*4]
309         add     r7,r8,r7,ror#2                  @ E+=K_xx_xx
310         ldr     r12,[r14,#2*4]
311         eor     r9,r9,r10
312         eor     r11,r11,r12                     @ 1 cycle stall
313         eor     r10,r5,r6                       @ F_xx_xx
314         mov     r9,r9,ror#31
315         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
316         eor     r9,r9,r11,ror#31
317         str     r9,[r14,#-4]!
318         eor r10,r4,r10,ror#2                                    @ F_xx_xx
319                                                 @ F_xx_xx
320         add     r7,r7,r9                        @ E+=X[i]
321         add     r7,r7,r10                       @ E+=F_20_39(B,C,D)
322         ldr     r9,[r14,#15*4]
323         ldr     r10,[r14,#13*4]
324         ldr     r11,[r14,#7*4]
325         add     r6,r8,r6,ror#2                  @ E+=K_xx_xx
326         ldr     r12,[r14,#2*4]
327         eor     r9,r9,r10
328         eor     r11,r11,r12                     @ 1 cycle stall
329         eor     r10,r4,r5                       @ F_xx_xx
330         mov     r9,r9,ror#31
331         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
332         eor     r9,r9,r11,ror#31
333         str     r9,[r14,#-4]!
334         eor r10,r3,r10,ror#2                                    @ F_xx_xx
335                                                 @ F_xx_xx
336         add     r6,r6,r9                        @ E+=X[i]
337         add     r6,r6,r10                       @ E+=F_20_39(B,C,D)
338         ldr     r9,[r14,#15*4]
339         ldr     r10,[r14,#13*4]
340         ldr     r11,[r14,#7*4]
341         add     r5,r8,r5,ror#2                  @ E+=K_xx_xx
342         ldr     r12,[r14,#2*4]
343         eor     r9,r9,r10
344         eor     r11,r11,r12                     @ 1 cycle stall
345         eor     r10,r3,r4                       @ F_xx_xx
346         mov     r9,r9,ror#31
347         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
348         eor     r9,r9,r11,ror#31
349         str     r9,[r14,#-4]!
350         eor r10,r7,r10,ror#2                                    @ F_xx_xx
351                                                 @ F_xx_xx
352         add     r5,r5,r9                        @ E+=X[i]
353         add     r5,r5,r10                       @ E+=F_20_39(B,C,D)
354         ldr     r9,[r14,#15*4]
355         ldr     r10,[r14,#13*4]
356         ldr     r11,[r14,#7*4]
357         add     r4,r8,r4,ror#2                  @ E+=K_xx_xx
358         ldr     r12,[r14,#2*4]
359         eor     r9,r9,r10
360         eor     r11,r11,r12                     @ 1 cycle stall
361         eor     r10,r7,r3                       @ F_xx_xx
362         mov     r9,r9,ror#31
363         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
364         eor     r9,r9,r11,ror#31
365         str     r9,[r14,#-4]!
366         eor r10,r6,r10,ror#2                                    @ F_xx_xx
367                                                 @ F_xx_xx
368         add     r4,r4,r9                        @ E+=X[i]
369         add     r4,r4,r10                       @ E+=F_20_39(B,C,D)
370         ldr     r9,[r14,#15*4]
371         ldr     r10,[r14,#13*4]
372         ldr     r11,[r14,#7*4]
373         add     r3,r8,r3,ror#2                  @ E+=K_xx_xx
374         ldr     r12,[r14,#2*4]
375         eor     r9,r9,r10
376         eor     r11,r11,r12                     @ 1 cycle stall
377         eor     r10,r6,r7                       @ F_xx_xx
378         mov     r9,r9,ror#31
379         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
380         eor     r9,r9,r11,ror#31
381         str     r9,[r14,#-4]!
382         eor r10,r5,r10,ror#2                                    @ F_xx_xx
383                                                 @ F_xx_xx
384         add     r3,r3,r9                        @ E+=X[i]
385         add     r3,r3,r10                       @ E+=F_20_39(B,C,D)
386  ARM(   teq     r14,sp          )       @ preserve carry
387  THUMB( mov     r11,sp          )
388  THUMB( teq     r14,r11         )       @ preserve carry
389         bne     .L_20_39_or_60_79       @ [+((12+3)*5+2)*4]
390         bcs     .L_done                 @ [+((12+3)*5+2)*4], spare 300 bytes
391
392         ldr     r8,.LK_40_59
393         sub     sp,sp,#20*4             @ [+2]
394 .L_40_59:
395         ldr     r9,[r14,#15*4]
396         ldr     r10,[r14,#13*4]
397         ldr     r11,[r14,#7*4]
398         add     r7,r8,r7,ror#2                  @ E+=K_xx_xx
399         ldr     r12,[r14,#2*4]
400         eor     r9,r9,r10
401         eor     r11,r11,r12                     @ 1 cycle stall
402         eor     r10,r5,r6                       @ F_xx_xx
403         mov     r9,r9,ror#31
404         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
405         eor     r9,r9,r11,ror#31
406         str     r9,[r14,#-4]!
407         and r10,r4,r10,ror#2                                    @ F_xx_xx
408         and r11,r5,r6                                   @ F_xx_xx
409         add     r7,r7,r9                        @ E+=X[i]
410         add     r7,r7,r10                       @ E+=F_40_59(B,C,D)
411         add     r7,r7,r11,ror#2
412         ldr     r9,[r14,#15*4]
413         ldr     r10,[r14,#13*4]
414         ldr     r11,[r14,#7*4]
415         add     r6,r8,r6,ror#2                  @ E+=K_xx_xx
416         ldr     r12,[r14,#2*4]
417         eor     r9,r9,r10
418         eor     r11,r11,r12                     @ 1 cycle stall
419         eor     r10,r4,r5                       @ F_xx_xx
420         mov     r9,r9,ror#31
421         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
422         eor     r9,r9,r11,ror#31
423         str     r9,[r14,#-4]!
424         and r10,r3,r10,ror#2                                    @ F_xx_xx
425         and r11,r4,r5                                   @ F_xx_xx
426         add     r6,r6,r9                        @ E+=X[i]
427         add     r6,r6,r10                       @ E+=F_40_59(B,C,D)
428         add     r6,r6,r11,ror#2
429         ldr     r9,[r14,#15*4]
430         ldr     r10,[r14,#13*4]
431         ldr     r11,[r14,#7*4]
432         add     r5,r8,r5,ror#2                  @ E+=K_xx_xx
433         ldr     r12,[r14,#2*4]
434         eor     r9,r9,r10
435         eor     r11,r11,r12                     @ 1 cycle stall
436         eor     r10,r3,r4                       @ F_xx_xx
437         mov     r9,r9,ror#31
438         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
439         eor     r9,r9,r11,ror#31
440         str     r9,[r14,#-4]!
441         and r10,r7,r10,ror#2                                    @ F_xx_xx
442         and r11,r3,r4                                   @ F_xx_xx
443         add     r5,r5,r9                        @ E+=X[i]
444         add     r5,r5,r10                       @ E+=F_40_59(B,C,D)
445         add     r5,r5,r11,ror#2
446         ldr     r9,[r14,#15*4]
447         ldr     r10,[r14,#13*4]
448         ldr     r11,[r14,#7*4]
449         add     r4,r8,r4,ror#2                  @ E+=K_xx_xx
450         ldr     r12,[r14,#2*4]
451         eor     r9,r9,r10
452         eor     r11,r11,r12                     @ 1 cycle stall
453         eor     r10,r7,r3                       @ F_xx_xx
454         mov     r9,r9,ror#31
455         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
456         eor     r9,r9,r11,ror#31
457         str     r9,[r14,#-4]!
458         and r10,r6,r10,ror#2                                    @ F_xx_xx
459         and r11,r7,r3                                   @ F_xx_xx
460         add     r4,r4,r9                        @ E+=X[i]
461         add     r4,r4,r10                       @ E+=F_40_59(B,C,D)
462         add     r4,r4,r11,ror#2
463         ldr     r9,[r14,#15*4]
464         ldr     r10,[r14,#13*4]
465         ldr     r11,[r14,#7*4]
466         add     r3,r8,r3,ror#2                  @ E+=K_xx_xx
467         ldr     r12,[r14,#2*4]
468         eor     r9,r9,r10
469         eor     r11,r11,r12                     @ 1 cycle stall
470         eor     r10,r6,r7                       @ F_xx_xx
471         mov     r9,r9,ror#31
472         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
473         eor     r9,r9,r11,ror#31
474         str     r9,[r14,#-4]!
475         and r10,r5,r10,ror#2                                    @ F_xx_xx
476         and r11,r6,r7                                   @ F_xx_xx
477         add     r3,r3,r9                        @ E+=X[i]
478         add     r3,r3,r10                       @ E+=F_40_59(B,C,D)
479         add     r3,r3,r11,ror#2
480         cmp     r14,sp
481         bne     .L_40_59                @ [+((12+5)*5+2)*4]
482
483         ldr     r8,.LK_60_79
484         sub     sp,sp,#20*4
485         cmp     sp,#0                   @ set carry to denote 60_79
486         b       .L_20_39_or_60_79       @ [+4], spare 300 bytes
487 .L_done:
488         add     sp,sp,#80*4             @ "deallocate" stack frame
489         ldmia   r0,{r8,r9,r10,r11,r12}
490         add     r3,r8,r3
491         add     r4,r9,r4
492         add     r5,r10,r5,ror#2
493         add     r6,r11,r6,ror#2
494         add     r7,r12,r7,ror#2
495         stmia   r0,{r3,r4,r5,r6,r7}
496         teq     r1,r2
497         bne     .Lloop                  @ [+18], total 1307
498
499         ldmia   sp!,{r4-r12,pc}
500 .align  2
501 .LK_00_19:      .word   0x5a827999
502 .LK_20_39:      .word   0x6ed9eba1
503 .LK_40_59:      .word   0x8f1bbcdc
504 .LK_60_79:      .word   0xca62c1d6
505 ENDPROC(sha1_block_data_order)
506 .asciz  "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
507 .align  2