Merge tag 'pci-v5.1-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
[sfrench/cifs-2.6.git] / arch / arm / crypto / sha256-core.S_shipped
1 @ SPDX-License-Identifier: GPL-2.0
2
3 @ This code is taken from the OpenSSL project but the author (Andy Polyakov)
4 @ has relicensed it under the GPLv2. Therefore this program is free software;
5 @ you can redistribute it and/or modify it under the terms of the GNU General
6 @ Public License version 2 as published by the Free Software Foundation.
7 @
8 @ The original headers, including the original license headers, are
9 @ included below for completeness.
10
11 @ ====================================================================
12 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
13 @ project. The module is, however, dual licensed under OpenSSL and
14 @ CRYPTOGAMS licenses depending on where you obtain it. For further
15 @ details see http://www.openssl.org/~appro/cryptogams/.
16 @ ====================================================================
17
18 @ SHA256 block procedure for ARMv4. May 2007.
19
20 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
21 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
22 @ byte [on single-issue Xscale PXA250 core].
23
24 @ July 2010.
25 @
26 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
27 @ Cortex A8 core and ~20 cycles per processed byte.
28
29 @ February 2011.
30 @
31 @ Profiler-assisted and platform-specific optimization resulted in 16%
32 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
33
34 @ September 2013.
35 @
36 @ Add NEON implementation. On Cortex A8 it was measured to process one
37 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
38 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
39 @ code (meaning that latter performs sub-optimally, nothing was done
40 @ about it).
41
42 @ May 2014.
43 @
44 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
45
46 #ifndef __KERNEL__
47 # include "arm_arch.h"
48 #else
49 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
50 # define __ARM_MAX_ARCH__ 7
51 #endif
52
53 .text
54 #if __ARM_ARCH__<7
55 .code   32
56 #else
57 .syntax unified
58 # ifdef __thumb2__
59 #  define adrl adr
60 .thumb
61 # else
62 .code   32
63 # endif
64 #endif
65
66 .type   K256,%object
67 .align  5
68 K256:
69 .word   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
70 .word   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
71 .word   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
72 .word   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
73 .word   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
74 .word   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
75 .word   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
76 .word   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
77 .word   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
78 .word   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
79 .word   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
80 .word   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
81 .word   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
82 .word   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
83 .word   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
84 .word   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
85 .size   K256,.-K256
86 .word   0                               @ terminator
87 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
88 .LOPENSSL_armcap:
89 .word   OPENSSL_armcap_P-sha256_block_data_order
90 #endif
91 .align  5
92
93 .global sha256_block_data_order
94 .type   sha256_block_data_order,%function
95 sha256_block_data_order:
96 .Lsha256_block_data_order:
97 #if __ARM_ARCH__<7
98         sub     r3,pc,#8                @ sha256_block_data_order
99 #else
100         adr     r3,.Lsha256_block_data_order
101 #endif
102 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
103         ldr     r12,.LOPENSSL_armcap
104         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
105         tst     r12,#ARMV8_SHA256
106         bne     .LARMv8
107         tst     r12,#ARMV7_NEON
108         bne     .LNEON
109 #endif
110         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
111         stmdb   sp!,{r0,r1,r2,r4-r11,lr}
112         ldmia   r0,{r4,r5,r6,r7,r8,r9,r10,r11}
113         sub     r14,r3,#256+32  @ K256
114         sub     sp,sp,#16*4             @ alloca(X[16])
115 .Loop:
116 # if __ARM_ARCH__>=7
117         ldr     r2,[r1],#4
118 # else
119         ldrb    r2,[r1,#3]
120 # endif
121         eor     r3,r5,r6                @ magic
122         eor     r12,r12,r12
123 #if __ARM_ARCH__>=7
124         @ ldr   r2,[r1],#4                      @ 0
125 # if 0==15
126         str     r1,[sp,#17*4]                   @ make room for r1
127 # endif
128         eor     r0,r8,r8,ror#5
129         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
130         eor     r0,r0,r8,ror#19 @ Sigma1(e)
131 # ifndef __ARMEB__
132         rev     r2,r2
133 # endif
134 #else
135         @ ldrb  r2,[r1,#3]                      @ 0
136         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
137         ldrb    r12,[r1,#2]
138         ldrb    r0,[r1,#1]
139         orr     r2,r2,r12,lsl#8
140         ldrb    r12,[r1],#4
141         orr     r2,r2,r0,lsl#16
142 # if 0==15
143         str     r1,[sp,#17*4]                   @ make room for r1
144 # endif
145         eor     r0,r8,r8,ror#5
146         orr     r2,r2,r12,lsl#24
147         eor     r0,r0,r8,ror#19 @ Sigma1(e)
148 #endif
149         ldr     r12,[r14],#4                    @ *K256++
150         add     r11,r11,r2                      @ h+=X[i]
151         str     r2,[sp,#0*4]
152         eor     r2,r9,r10
153         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
154         and     r2,r2,r8
155         add     r11,r11,r12                     @ h+=K256[i]
156         eor     r2,r2,r10                       @ Ch(e,f,g)
157         eor     r0,r4,r4,ror#11
158         add     r11,r11,r2                      @ h+=Ch(e,f,g)
159 #if 0==31
160         and     r12,r12,#0xff
161         cmp     r12,#0xf2                       @ done?
162 #endif
163 #if 0<15
164 # if __ARM_ARCH__>=7
165         ldr     r2,[r1],#4                      @ prefetch
166 # else
167         ldrb    r2,[r1,#3]
168 # endif
169         eor     r12,r4,r5                       @ a^b, b^c in next round
170 #else
171         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
172         eor     r12,r4,r5                       @ a^b, b^c in next round
173         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
174 #endif
175         eor     r0,r0,r4,ror#20 @ Sigma0(a)
176         and     r3,r3,r12                       @ (b^c)&=(a^b)
177         add     r7,r7,r11                       @ d+=h
178         eor     r3,r3,r5                        @ Maj(a,b,c)
179         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
180         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
181 #if __ARM_ARCH__>=7
182         @ ldr   r2,[r1],#4                      @ 1
183 # if 1==15
184         str     r1,[sp,#17*4]                   @ make room for r1
185 # endif
186         eor     r0,r7,r7,ror#5
187         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
188         eor     r0,r0,r7,ror#19 @ Sigma1(e)
189 # ifndef __ARMEB__
190         rev     r2,r2
191 # endif
192 #else
193         @ ldrb  r2,[r1,#3]                      @ 1
194         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
195         ldrb    r3,[r1,#2]
196         ldrb    r0,[r1,#1]
197         orr     r2,r2,r3,lsl#8
198         ldrb    r3,[r1],#4
199         orr     r2,r2,r0,lsl#16
200 # if 1==15
201         str     r1,[sp,#17*4]                   @ make room for r1
202 # endif
203         eor     r0,r7,r7,ror#5
204         orr     r2,r2,r3,lsl#24
205         eor     r0,r0,r7,ror#19 @ Sigma1(e)
206 #endif
207         ldr     r3,[r14],#4                     @ *K256++
208         add     r10,r10,r2                      @ h+=X[i]
209         str     r2,[sp,#1*4]
210         eor     r2,r8,r9
211         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
212         and     r2,r2,r7
213         add     r10,r10,r3                      @ h+=K256[i]
214         eor     r2,r2,r9                        @ Ch(e,f,g)
215         eor     r0,r11,r11,ror#11
216         add     r10,r10,r2                      @ h+=Ch(e,f,g)
217 #if 1==31
218         and     r3,r3,#0xff
219         cmp     r3,#0xf2                        @ done?
220 #endif
221 #if 1<15
222 # if __ARM_ARCH__>=7
223         ldr     r2,[r1],#4                      @ prefetch
224 # else
225         ldrb    r2,[r1,#3]
226 # endif
227         eor     r3,r11,r4                       @ a^b, b^c in next round
228 #else
229         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
230         eor     r3,r11,r4                       @ a^b, b^c in next round
231         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
232 #endif
233         eor     r0,r0,r11,ror#20        @ Sigma0(a)
234         and     r12,r12,r3                      @ (b^c)&=(a^b)
235         add     r6,r6,r10                       @ d+=h
236         eor     r12,r12,r4                      @ Maj(a,b,c)
237         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
238         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
239 #if __ARM_ARCH__>=7
240         @ ldr   r2,[r1],#4                      @ 2
241 # if 2==15
242         str     r1,[sp,#17*4]                   @ make room for r1
243 # endif
244         eor     r0,r6,r6,ror#5
245         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
246         eor     r0,r0,r6,ror#19 @ Sigma1(e)
247 # ifndef __ARMEB__
248         rev     r2,r2
249 # endif
250 #else
251         @ ldrb  r2,[r1,#3]                      @ 2
252         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
253         ldrb    r12,[r1,#2]
254         ldrb    r0,[r1,#1]
255         orr     r2,r2,r12,lsl#8
256         ldrb    r12,[r1],#4
257         orr     r2,r2,r0,lsl#16
258 # if 2==15
259         str     r1,[sp,#17*4]                   @ make room for r1
260 # endif
261         eor     r0,r6,r6,ror#5
262         orr     r2,r2,r12,lsl#24
263         eor     r0,r0,r6,ror#19 @ Sigma1(e)
264 #endif
265         ldr     r12,[r14],#4                    @ *K256++
266         add     r9,r9,r2                        @ h+=X[i]
267         str     r2,[sp,#2*4]
268         eor     r2,r7,r8
269         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
270         and     r2,r2,r6
271         add     r9,r9,r12                       @ h+=K256[i]
272         eor     r2,r2,r8                        @ Ch(e,f,g)
273         eor     r0,r10,r10,ror#11
274         add     r9,r9,r2                        @ h+=Ch(e,f,g)
275 #if 2==31
276         and     r12,r12,#0xff
277         cmp     r12,#0xf2                       @ done?
278 #endif
279 #if 2<15
280 # if __ARM_ARCH__>=7
281         ldr     r2,[r1],#4                      @ prefetch
282 # else
283         ldrb    r2,[r1,#3]
284 # endif
285         eor     r12,r10,r11                     @ a^b, b^c in next round
286 #else
287         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
288         eor     r12,r10,r11                     @ a^b, b^c in next round
289         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
290 #endif
291         eor     r0,r0,r10,ror#20        @ Sigma0(a)
292         and     r3,r3,r12                       @ (b^c)&=(a^b)
293         add     r5,r5,r9                        @ d+=h
294         eor     r3,r3,r11                       @ Maj(a,b,c)
295         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
296         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
297 #if __ARM_ARCH__>=7
298         @ ldr   r2,[r1],#4                      @ 3
299 # if 3==15
300         str     r1,[sp,#17*4]                   @ make room for r1
301 # endif
302         eor     r0,r5,r5,ror#5
303         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
304         eor     r0,r0,r5,ror#19 @ Sigma1(e)
305 # ifndef __ARMEB__
306         rev     r2,r2
307 # endif
308 #else
309         @ ldrb  r2,[r1,#3]                      @ 3
310         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
311         ldrb    r3,[r1,#2]
312         ldrb    r0,[r1,#1]
313         orr     r2,r2,r3,lsl#8
314         ldrb    r3,[r1],#4
315         orr     r2,r2,r0,lsl#16
316 # if 3==15
317         str     r1,[sp,#17*4]                   @ make room for r1
318 # endif
319         eor     r0,r5,r5,ror#5
320         orr     r2,r2,r3,lsl#24
321         eor     r0,r0,r5,ror#19 @ Sigma1(e)
322 #endif
323         ldr     r3,[r14],#4                     @ *K256++
324         add     r8,r8,r2                        @ h+=X[i]
325         str     r2,[sp,#3*4]
326         eor     r2,r6,r7
327         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
328         and     r2,r2,r5
329         add     r8,r8,r3                        @ h+=K256[i]
330         eor     r2,r2,r7                        @ Ch(e,f,g)
331         eor     r0,r9,r9,ror#11
332         add     r8,r8,r2                        @ h+=Ch(e,f,g)
333 #if 3==31
334         and     r3,r3,#0xff
335         cmp     r3,#0xf2                        @ done?
336 #endif
337 #if 3<15
338 # if __ARM_ARCH__>=7
339         ldr     r2,[r1],#4                      @ prefetch
340 # else
341         ldrb    r2,[r1,#3]
342 # endif
343         eor     r3,r9,r10                       @ a^b, b^c in next round
344 #else
345         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
346         eor     r3,r9,r10                       @ a^b, b^c in next round
347         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
348 #endif
349         eor     r0,r0,r9,ror#20 @ Sigma0(a)
350         and     r12,r12,r3                      @ (b^c)&=(a^b)
351         add     r4,r4,r8                        @ d+=h
352         eor     r12,r12,r10                     @ Maj(a,b,c)
353         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
354         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
355 #if __ARM_ARCH__>=7
356         @ ldr   r2,[r1],#4                      @ 4
357 # if 4==15
358         str     r1,[sp,#17*4]                   @ make room for r1
359 # endif
360         eor     r0,r4,r4,ror#5
361         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
362         eor     r0,r0,r4,ror#19 @ Sigma1(e)
363 # ifndef __ARMEB__
364         rev     r2,r2
365 # endif
366 #else
367         @ ldrb  r2,[r1,#3]                      @ 4
368         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
369         ldrb    r12,[r1,#2]
370         ldrb    r0,[r1,#1]
371         orr     r2,r2,r12,lsl#8
372         ldrb    r12,[r1],#4
373         orr     r2,r2,r0,lsl#16
374 # if 4==15
375         str     r1,[sp,#17*4]                   @ make room for r1
376 # endif
377         eor     r0,r4,r4,ror#5
378         orr     r2,r2,r12,lsl#24
379         eor     r0,r0,r4,ror#19 @ Sigma1(e)
380 #endif
381         ldr     r12,[r14],#4                    @ *K256++
382         add     r7,r7,r2                        @ h+=X[i]
383         str     r2,[sp,#4*4]
384         eor     r2,r5,r6
385         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
386         and     r2,r2,r4
387         add     r7,r7,r12                       @ h+=K256[i]
388         eor     r2,r2,r6                        @ Ch(e,f,g)
389         eor     r0,r8,r8,ror#11
390         add     r7,r7,r2                        @ h+=Ch(e,f,g)
391 #if 4==31
392         and     r12,r12,#0xff
393         cmp     r12,#0xf2                       @ done?
394 #endif
395 #if 4<15
396 # if __ARM_ARCH__>=7
397         ldr     r2,[r1],#4                      @ prefetch
398 # else
399         ldrb    r2,[r1,#3]
400 # endif
401         eor     r12,r8,r9                       @ a^b, b^c in next round
402 #else
403         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
404         eor     r12,r8,r9                       @ a^b, b^c in next round
405         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
406 #endif
407         eor     r0,r0,r8,ror#20 @ Sigma0(a)
408         and     r3,r3,r12                       @ (b^c)&=(a^b)
409         add     r11,r11,r7                      @ d+=h
410         eor     r3,r3,r9                        @ Maj(a,b,c)
411         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
412         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
413 #if __ARM_ARCH__>=7
414         @ ldr   r2,[r1],#4                      @ 5
415 # if 5==15
416         str     r1,[sp,#17*4]                   @ make room for r1
417 # endif
418         eor     r0,r11,r11,ror#5
419         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
420         eor     r0,r0,r11,ror#19        @ Sigma1(e)
421 # ifndef __ARMEB__
422         rev     r2,r2
423 # endif
424 #else
425         @ ldrb  r2,[r1,#3]                      @ 5
426         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
427         ldrb    r3,[r1,#2]
428         ldrb    r0,[r1,#1]
429         orr     r2,r2,r3,lsl#8
430         ldrb    r3,[r1],#4
431         orr     r2,r2,r0,lsl#16
432 # if 5==15
433         str     r1,[sp,#17*4]                   @ make room for r1
434 # endif
435         eor     r0,r11,r11,ror#5
436         orr     r2,r2,r3,lsl#24
437         eor     r0,r0,r11,ror#19        @ Sigma1(e)
438 #endif
439         ldr     r3,[r14],#4                     @ *K256++
440         add     r6,r6,r2                        @ h+=X[i]
441         str     r2,[sp,#5*4]
442         eor     r2,r4,r5
443         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
444         and     r2,r2,r11
445         add     r6,r6,r3                        @ h+=K256[i]
446         eor     r2,r2,r5                        @ Ch(e,f,g)
447         eor     r0,r7,r7,ror#11
448         add     r6,r6,r2                        @ h+=Ch(e,f,g)
449 #if 5==31
450         and     r3,r3,#0xff
451         cmp     r3,#0xf2                        @ done?
452 #endif
453 #if 5<15
454 # if __ARM_ARCH__>=7
455         ldr     r2,[r1],#4                      @ prefetch
456 # else
457         ldrb    r2,[r1,#3]
458 # endif
459         eor     r3,r7,r8                        @ a^b, b^c in next round
460 #else
461         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
462         eor     r3,r7,r8                        @ a^b, b^c in next round
463         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
464 #endif
465         eor     r0,r0,r7,ror#20 @ Sigma0(a)
466         and     r12,r12,r3                      @ (b^c)&=(a^b)
467         add     r10,r10,r6                      @ d+=h
468         eor     r12,r12,r8                      @ Maj(a,b,c)
469         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
470         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
471 #if __ARM_ARCH__>=7
472         @ ldr   r2,[r1],#4                      @ 6
473 # if 6==15
474         str     r1,[sp,#17*4]                   @ make room for r1
475 # endif
476         eor     r0,r10,r10,ror#5
477         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
478         eor     r0,r0,r10,ror#19        @ Sigma1(e)
479 # ifndef __ARMEB__
480         rev     r2,r2
481 # endif
482 #else
483         @ ldrb  r2,[r1,#3]                      @ 6
484         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
485         ldrb    r12,[r1,#2]
486         ldrb    r0,[r1,#1]
487         orr     r2,r2,r12,lsl#8
488         ldrb    r12,[r1],#4
489         orr     r2,r2,r0,lsl#16
490 # if 6==15
491         str     r1,[sp,#17*4]                   @ make room for r1
492 # endif
493         eor     r0,r10,r10,ror#5
494         orr     r2,r2,r12,lsl#24
495         eor     r0,r0,r10,ror#19        @ Sigma1(e)
496 #endif
497         ldr     r12,[r14],#4                    @ *K256++
498         add     r5,r5,r2                        @ h+=X[i]
499         str     r2,[sp,#6*4]
500         eor     r2,r11,r4
501         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
502         and     r2,r2,r10
503         add     r5,r5,r12                       @ h+=K256[i]
504         eor     r2,r2,r4                        @ Ch(e,f,g)
505         eor     r0,r6,r6,ror#11
506         add     r5,r5,r2                        @ h+=Ch(e,f,g)
507 #if 6==31
508         and     r12,r12,#0xff
509         cmp     r12,#0xf2                       @ done?
510 #endif
511 #if 6<15
512 # if __ARM_ARCH__>=7
513         ldr     r2,[r1],#4                      @ prefetch
514 # else
515         ldrb    r2,[r1,#3]
516 # endif
517         eor     r12,r6,r7                       @ a^b, b^c in next round
518 #else
519         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
520         eor     r12,r6,r7                       @ a^b, b^c in next round
521         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
522 #endif
523         eor     r0,r0,r6,ror#20 @ Sigma0(a)
524         and     r3,r3,r12                       @ (b^c)&=(a^b)
525         add     r9,r9,r5                        @ d+=h
526         eor     r3,r3,r7                        @ Maj(a,b,c)
527         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
528         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
529 #if __ARM_ARCH__>=7
530         @ ldr   r2,[r1],#4                      @ 7
531 # if 7==15
532         str     r1,[sp,#17*4]                   @ make room for r1
533 # endif
534         eor     r0,r9,r9,ror#5
535         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
536         eor     r0,r0,r9,ror#19 @ Sigma1(e)
537 # ifndef __ARMEB__
538         rev     r2,r2
539 # endif
540 #else
541         @ ldrb  r2,[r1,#3]                      @ 7
542         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
543         ldrb    r3,[r1,#2]
544         ldrb    r0,[r1,#1]
545         orr     r2,r2,r3,lsl#8
546         ldrb    r3,[r1],#4
547         orr     r2,r2,r0,lsl#16
548 # if 7==15
549         str     r1,[sp,#17*4]                   @ make room for r1
550 # endif
551         eor     r0,r9,r9,ror#5
552         orr     r2,r2,r3,lsl#24
553         eor     r0,r0,r9,ror#19 @ Sigma1(e)
554 #endif
555         ldr     r3,[r14],#4                     @ *K256++
556         add     r4,r4,r2                        @ h+=X[i]
557         str     r2,[sp,#7*4]
558         eor     r2,r10,r11
559         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
560         and     r2,r2,r9
561         add     r4,r4,r3                        @ h+=K256[i]
562         eor     r2,r2,r11                       @ Ch(e,f,g)
563         eor     r0,r5,r5,ror#11
564         add     r4,r4,r2                        @ h+=Ch(e,f,g)
565 #if 7==31
566         and     r3,r3,#0xff
567         cmp     r3,#0xf2                        @ done?
568 #endif
569 #if 7<15
570 # if __ARM_ARCH__>=7
571         ldr     r2,[r1],#4                      @ prefetch
572 # else
573         ldrb    r2,[r1,#3]
574 # endif
575         eor     r3,r5,r6                        @ a^b, b^c in next round
576 #else
577         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
578         eor     r3,r5,r6                        @ a^b, b^c in next round
579         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
580 #endif
581         eor     r0,r0,r5,ror#20 @ Sigma0(a)
582         and     r12,r12,r3                      @ (b^c)&=(a^b)
583         add     r8,r8,r4                        @ d+=h
584         eor     r12,r12,r6                      @ Maj(a,b,c)
585         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
586         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
587 #if __ARM_ARCH__>=7
588         @ ldr   r2,[r1],#4                      @ 8
589 # if 8==15
590         str     r1,[sp,#17*4]                   @ make room for r1
591 # endif
592         eor     r0,r8,r8,ror#5
593         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
594         eor     r0,r0,r8,ror#19 @ Sigma1(e)
595 # ifndef __ARMEB__
596         rev     r2,r2
597 # endif
598 #else
599         @ ldrb  r2,[r1,#3]                      @ 8
600         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
601         ldrb    r12,[r1,#2]
602         ldrb    r0,[r1,#1]
603         orr     r2,r2,r12,lsl#8
604         ldrb    r12,[r1],#4
605         orr     r2,r2,r0,lsl#16
606 # if 8==15
607         str     r1,[sp,#17*4]                   @ make room for r1
608 # endif
609         eor     r0,r8,r8,ror#5
610         orr     r2,r2,r12,lsl#24
611         eor     r0,r0,r8,ror#19 @ Sigma1(e)
612 #endif
613         ldr     r12,[r14],#4                    @ *K256++
614         add     r11,r11,r2                      @ h+=X[i]
615         str     r2,[sp,#8*4]
616         eor     r2,r9,r10
617         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
618         and     r2,r2,r8
619         add     r11,r11,r12                     @ h+=K256[i]
620         eor     r2,r2,r10                       @ Ch(e,f,g)
621         eor     r0,r4,r4,ror#11
622         add     r11,r11,r2                      @ h+=Ch(e,f,g)
623 #if 8==31
624         and     r12,r12,#0xff
625         cmp     r12,#0xf2                       @ done?
626 #endif
627 #if 8<15
628 # if __ARM_ARCH__>=7
629         ldr     r2,[r1],#4                      @ prefetch
630 # else
631         ldrb    r2,[r1,#3]
632 # endif
633         eor     r12,r4,r5                       @ a^b, b^c in next round
634 #else
635         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
636         eor     r12,r4,r5                       @ a^b, b^c in next round
637         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
638 #endif
639         eor     r0,r0,r4,ror#20 @ Sigma0(a)
640         and     r3,r3,r12                       @ (b^c)&=(a^b)
641         add     r7,r7,r11                       @ d+=h
642         eor     r3,r3,r5                        @ Maj(a,b,c)
643         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
644         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
645 #if __ARM_ARCH__>=7
646         @ ldr   r2,[r1],#4                      @ 9
647 # if 9==15
648         str     r1,[sp,#17*4]                   @ make room for r1
649 # endif
650         eor     r0,r7,r7,ror#5
651         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
652         eor     r0,r0,r7,ror#19 @ Sigma1(e)
653 # ifndef __ARMEB__
654         rev     r2,r2
655 # endif
656 #else
657         @ ldrb  r2,[r1,#3]                      @ 9
658         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
659         ldrb    r3,[r1,#2]
660         ldrb    r0,[r1,#1]
661         orr     r2,r2,r3,lsl#8
662         ldrb    r3,[r1],#4
663         orr     r2,r2,r0,lsl#16
664 # if 9==15
665         str     r1,[sp,#17*4]                   @ make room for r1
666 # endif
667         eor     r0,r7,r7,ror#5
668         orr     r2,r2,r3,lsl#24
669         eor     r0,r0,r7,ror#19 @ Sigma1(e)
670 #endif
671         ldr     r3,[r14],#4                     @ *K256++
672         add     r10,r10,r2                      @ h+=X[i]
673         str     r2,[sp,#9*4]
674         eor     r2,r8,r9
675         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
676         and     r2,r2,r7
677         add     r10,r10,r3                      @ h+=K256[i]
678         eor     r2,r2,r9                        @ Ch(e,f,g)
679         eor     r0,r11,r11,ror#11
680         add     r10,r10,r2                      @ h+=Ch(e,f,g)
681 #if 9==31
682         and     r3,r3,#0xff
683         cmp     r3,#0xf2                        @ done?
684 #endif
685 #if 9<15
686 # if __ARM_ARCH__>=7
687         ldr     r2,[r1],#4                      @ prefetch
688 # else
689         ldrb    r2,[r1,#3]
690 # endif
691         eor     r3,r11,r4                       @ a^b, b^c in next round
692 #else
693         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
694         eor     r3,r11,r4                       @ a^b, b^c in next round
695         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
696 #endif
697         eor     r0,r0,r11,ror#20        @ Sigma0(a)
698         and     r12,r12,r3                      @ (b^c)&=(a^b)
699         add     r6,r6,r10                       @ d+=h
700         eor     r12,r12,r4                      @ Maj(a,b,c)
701         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
702         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
703 #if __ARM_ARCH__>=7
704         @ ldr   r2,[r1],#4                      @ 10
705 # if 10==15
706         str     r1,[sp,#17*4]                   @ make room for r1
707 # endif
708         eor     r0,r6,r6,ror#5
709         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
710         eor     r0,r0,r6,ror#19 @ Sigma1(e)
711 # ifndef __ARMEB__
712         rev     r2,r2
713 # endif
714 #else
715         @ ldrb  r2,[r1,#3]                      @ 10
716         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
717         ldrb    r12,[r1,#2]
718         ldrb    r0,[r1,#1]
719         orr     r2,r2,r12,lsl#8
720         ldrb    r12,[r1],#4
721         orr     r2,r2,r0,lsl#16
722 # if 10==15
723         str     r1,[sp,#17*4]                   @ make room for r1
724 # endif
725         eor     r0,r6,r6,ror#5
726         orr     r2,r2,r12,lsl#24
727         eor     r0,r0,r6,ror#19 @ Sigma1(e)
728 #endif
729         ldr     r12,[r14],#4                    @ *K256++
730         add     r9,r9,r2                        @ h+=X[i]
731         str     r2,[sp,#10*4]
732         eor     r2,r7,r8
733         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
734         and     r2,r2,r6
735         add     r9,r9,r12                       @ h+=K256[i]
736         eor     r2,r2,r8                        @ Ch(e,f,g)
737         eor     r0,r10,r10,ror#11
738         add     r9,r9,r2                        @ h+=Ch(e,f,g)
739 #if 10==31
740         and     r12,r12,#0xff
741         cmp     r12,#0xf2                       @ done?
742 #endif
743 #if 10<15
744 # if __ARM_ARCH__>=7
745         ldr     r2,[r1],#4                      @ prefetch
746 # else
747         ldrb    r2,[r1,#3]
748 # endif
749         eor     r12,r10,r11                     @ a^b, b^c in next round
750 #else
751         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
752         eor     r12,r10,r11                     @ a^b, b^c in next round
753         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
754 #endif
755         eor     r0,r0,r10,ror#20        @ Sigma0(a)
756         and     r3,r3,r12                       @ (b^c)&=(a^b)
757         add     r5,r5,r9                        @ d+=h
758         eor     r3,r3,r11                       @ Maj(a,b,c)
759         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
760         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
761 #if __ARM_ARCH__>=7
762         @ ldr   r2,[r1],#4                      @ 11
763 # if 11==15
764         str     r1,[sp,#17*4]                   @ make room for r1
765 # endif
766         eor     r0,r5,r5,ror#5
767         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
768         eor     r0,r0,r5,ror#19 @ Sigma1(e)
769 # ifndef __ARMEB__
770         rev     r2,r2
771 # endif
772 #else
773         @ ldrb  r2,[r1,#3]                      @ 11
774         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
775         ldrb    r3,[r1,#2]
776         ldrb    r0,[r1,#1]
777         orr     r2,r2,r3,lsl#8
778         ldrb    r3,[r1],#4
779         orr     r2,r2,r0,lsl#16
780 # if 11==15
781         str     r1,[sp,#17*4]                   @ make room for r1
782 # endif
783         eor     r0,r5,r5,ror#5
784         orr     r2,r2,r3,lsl#24
785         eor     r0,r0,r5,ror#19 @ Sigma1(e)
786 #endif
787         ldr     r3,[r14],#4                     @ *K256++
788         add     r8,r8,r2                        @ h+=X[i]
789         str     r2,[sp,#11*4]
790         eor     r2,r6,r7
791         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
792         and     r2,r2,r5
793         add     r8,r8,r3                        @ h+=K256[i]
794         eor     r2,r2,r7                        @ Ch(e,f,g)
795         eor     r0,r9,r9,ror#11
796         add     r8,r8,r2                        @ h+=Ch(e,f,g)
797 #if 11==31
798         and     r3,r3,#0xff
799         cmp     r3,#0xf2                        @ done?
800 #endif
801 #if 11<15
802 # if __ARM_ARCH__>=7
803         ldr     r2,[r1],#4                      @ prefetch
804 # else
805         ldrb    r2,[r1,#3]
806 # endif
807         eor     r3,r9,r10                       @ a^b, b^c in next round
808 #else
809         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
810         eor     r3,r9,r10                       @ a^b, b^c in next round
811         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
812 #endif
813         eor     r0,r0,r9,ror#20 @ Sigma0(a)
814         and     r12,r12,r3                      @ (b^c)&=(a^b)
815         add     r4,r4,r8                        @ d+=h
816         eor     r12,r12,r10                     @ Maj(a,b,c)
817         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
818         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
819 #if __ARM_ARCH__>=7
820         @ ldr   r2,[r1],#4                      @ 12
821 # if 12==15
822         str     r1,[sp,#17*4]                   @ make room for r1
823 # endif
824         eor     r0,r4,r4,ror#5
825         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
826         eor     r0,r0,r4,ror#19 @ Sigma1(e)
827 # ifndef __ARMEB__
828         rev     r2,r2
829 # endif
830 #else
831         @ ldrb  r2,[r1,#3]                      @ 12
832         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
833         ldrb    r12,[r1,#2]
834         ldrb    r0,[r1,#1]
835         orr     r2,r2,r12,lsl#8
836         ldrb    r12,[r1],#4
837         orr     r2,r2,r0,lsl#16
838 # if 12==15
839         str     r1,[sp,#17*4]                   @ make room for r1
840 # endif
841         eor     r0,r4,r4,ror#5
842         orr     r2,r2,r12,lsl#24
843         eor     r0,r0,r4,ror#19 @ Sigma1(e)
844 #endif
845         ldr     r12,[r14],#4                    @ *K256++
846         add     r7,r7,r2                        @ h+=X[i]
847         str     r2,[sp,#12*4]
848         eor     r2,r5,r6
849         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
850         and     r2,r2,r4
851         add     r7,r7,r12                       @ h+=K256[i]
852         eor     r2,r2,r6                        @ Ch(e,f,g)
853         eor     r0,r8,r8,ror#11
854         add     r7,r7,r2                        @ h+=Ch(e,f,g)
855 #if 12==31
856         and     r12,r12,#0xff
857         cmp     r12,#0xf2                       @ done?
858 #endif
859 #if 12<15
860 # if __ARM_ARCH__>=7
861         ldr     r2,[r1],#4                      @ prefetch
862 # else
863         ldrb    r2,[r1,#3]
864 # endif
865         eor     r12,r8,r9                       @ a^b, b^c in next round
866 #else
867         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
868         eor     r12,r8,r9                       @ a^b, b^c in next round
869         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
870 #endif
871         eor     r0,r0,r8,ror#20 @ Sigma0(a)
872         and     r3,r3,r12                       @ (b^c)&=(a^b)
873         add     r11,r11,r7                      @ d+=h
874         eor     r3,r3,r9                        @ Maj(a,b,c)
875         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
876         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
877 #if __ARM_ARCH__>=7
878         @ ldr   r2,[r1],#4                      @ 13
879 # if 13==15
880         str     r1,[sp,#17*4]                   @ make room for r1
881 # endif
882         eor     r0,r11,r11,ror#5
883         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
884         eor     r0,r0,r11,ror#19        @ Sigma1(e)
885 # ifndef __ARMEB__
886         rev     r2,r2
887 # endif
888 #else
889         @ ldrb  r2,[r1,#3]                      @ 13
890         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
891         ldrb    r3,[r1,#2]
892         ldrb    r0,[r1,#1]
893         orr     r2,r2,r3,lsl#8
894         ldrb    r3,[r1],#4
895         orr     r2,r2,r0,lsl#16
896 # if 13==15
897         str     r1,[sp,#17*4]                   @ make room for r1
898 # endif
899         eor     r0,r11,r11,ror#5
900         orr     r2,r2,r3,lsl#24
901         eor     r0,r0,r11,ror#19        @ Sigma1(e)
902 #endif
903         ldr     r3,[r14],#4                     @ *K256++
904         add     r6,r6,r2                        @ h+=X[i]
905         str     r2,[sp,#13*4]
906         eor     r2,r4,r5
907         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
908         and     r2,r2,r11
909         add     r6,r6,r3                        @ h+=K256[i]
910         eor     r2,r2,r5                        @ Ch(e,f,g)
911         eor     r0,r7,r7,ror#11
912         add     r6,r6,r2                        @ h+=Ch(e,f,g)
913 #if 13==31
914         and     r3,r3,#0xff
915         cmp     r3,#0xf2                        @ done?
916 #endif
917 #if 13<15
918 # if __ARM_ARCH__>=7
919         ldr     r2,[r1],#4                      @ prefetch
920 # else
921         ldrb    r2,[r1,#3]
922 # endif
923         eor     r3,r7,r8                        @ a^b, b^c in next round
924 #else
925         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
926         eor     r3,r7,r8                        @ a^b, b^c in next round
927         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
928 #endif
929         eor     r0,r0,r7,ror#20 @ Sigma0(a)
930         and     r12,r12,r3                      @ (b^c)&=(a^b)
931         add     r10,r10,r6                      @ d+=h
932         eor     r12,r12,r8                      @ Maj(a,b,c)
933         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
934         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
935 #if __ARM_ARCH__>=7
936         @ ldr   r2,[r1],#4                      @ 14
937 # if 14==15
938         str     r1,[sp,#17*4]                   @ make room for r1
939 # endif
940         eor     r0,r10,r10,ror#5
941         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
942         eor     r0,r0,r10,ror#19        @ Sigma1(e)
943 # ifndef __ARMEB__
944         rev     r2,r2
945 # endif
946 #else
947         @ ldrb  r2,[r1,#3]                      @ 14
948         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
949         ldrb    r12,[r1,#2]
950         ldrb    r0,[r1,#1]
951         orr     r2,r2,r12,lsl#8
952         ldrb    r12,[r1],#4
953         orr     r2,r2,r0,lsl#16
954 # if 14==15
955         str     r1,[sp,#17*4]                   @ make room for r1
956 # endif
957         eor     r0,r10,r10,ror#5
958         orr     r2,r2,r12,lsl#24
959         eor     r0,r0,r10,ror#19        @ Sigma1(e)
960 #endif
961         ldr     r12,[r14],#4                    @ *K256++
962         add     r5,r5,r2                        @ h+=X[i]
963         str     r2,[sp,#14*4]
964         eor     r2,r11,r4
965         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
966         and     r2,r2,r10
967         add     r5,r5,r12                       @ h+=K256[i]
968         eor     r2,r2,r4                        @ Ch(e,f,g)
969         eor     r0,r6,r6,ror#11
970         add     r5,r5,r2                        @ h+=Ch(e,f,g)
971 #if 14==31
972         and     r12,r12,#0xff
973         cmp     r12,#0xf2                       @ done?
974 #endif
975 #if 14<15
976 # if __ARM_ARCH__>=7
977         ldr     r2,[r1],#4                      @ prefetch
978 # else
979         ldrb    r2,[r1,#3]
980 # endif
981         eor     r12,r6,r7                       @ a^b, b^c in next round
982 #else
983         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
984         eor     r12,r6,r7                       @ a^b, b^c in next round
985         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
986 #endif
987         eor     r0,r0,r6,ror#20 @ Sigma0(a)
988         and     r3,r3,r12                       @ (b^c)&=(a^b)
989         add     r9,r9,r5                        @ d+=h
990         eor     r3,r3,r7                        @ Maj(a,b,c)
991         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
992         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
993 #if __ARM_ARCH__>=7
994         @ ldr   r2,[r1],#4                      @ 15
995 # if 15==15
996         str     r1,[sp,#17*4]                   @ make room for r1
997 # endif
998         eor     r0,r9,r9,ror#5
999         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1000         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1001 # ifndef __ARMEB__
1002         rev     r2,r2
1003 # endif
1004 #else
1005         @ ldrb  r2,[r1,#3]                      @ 15
1006         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1007         ldrb    r3,[r1,#2]
1008         ldrb    r0,[r1,#1]
1009         orr     r2,r2,r3,lsl#8
1010         ldrb    r3,[r1],#4
1011         orr     r2,r2,r0,lsl#16
1012 # if 15==15
1013         str     r1,[sp,#17*4]                   @ make room for r1
1014 # endif
1015         eor     r0,r9,r9,ror#5
1016         orr     r2,r2,r3,lsl#24
1017         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1018 #endif
1019         ldr     r3,[r14],#4                     @ *K256++
1020         add     r4,r4,r2                        @ h+=X[i]
1021         str     r2,[sp,#15*4]
1022         eor     r2,r10,r11
1023         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1024         and     r2,r2,r9
1025         add     r4,r4,r3                        @ h+=K256[i]
1026         eor     r2,r2,r11                       @ Ch(e,f,g)
1027         eor     r0,r5,r5,ror#11
1028         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1029 #if 15==31
1030         and     r3,r3,#0xff
1031         cmp     r3,#0xf2                        @ done?
1032 #endif
1033 #if 15<15
1034 # if __ARM_ARCH__>=7
1035         ldr     r2,[r1],#4                      @ prefetch
1036 # else
1037         ldrb    r2,[r1,#3]
1038 # endif
1039         eor     r3,r5,r6                        @ a^b, b^c in next round
1040 #else
1041         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1042         eor     r3,r5,r6                        @ a^b, b^c in next round
1043         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1044 #endif
1045         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1046         and     r12,r12,r3                      @ (b^c)&=(a^b)
1047         add     r8,r8,r4                        @ d+=h
1048         eor     r12,r12,r6                      @ Maj(a,b,c)
1049         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1050         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1051 .Lrounds_16_xx:
1052         @ ldr   r2,[sp,#1*4]            @ 16
1053         @ ldr   r1,[sp,#14*4]
1054         mov     r0,r2,ror#7
1055         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1056         mov     r12,r1,ror#17
1057         eor     r0,r0,r2,ror#18
1058         eor     r12,r12,r1,ror#19
1059         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1060         ldr     r2,[sp,#0*4]
1061         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1062         ldr     r1,[sp,#9*4]
1063
1064         add     r12,r12,r0
1065         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1066         add     r2,r2,r12
1067         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1068         add     r2,r2,r1                        @ X[i]
1069         ldr     r12,[r14],#4                    @ *K256++
1070         add     r11,r11,r2                      @ h+=X[i]
1071         str     r2,[sp,#0*4]
1072         eor     r2,r9,r10
1073         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1074         and     r2,r2,r8
1075         add     r11,r11,r12                     @ h+=K256[i]
1076         eor     r2,r2,r10                       @ Ch(e,f,g)
1077         eor     r0,r4,r4,ror#11
1078         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1079 #if 16==31
1080         and     r12,r12,#0xff
1081         cmp     r12,#0xf2                       @ done?
1082 #endif
1083 #if 16<15
1084 # if __ARM_ARCH__>=7
1085         ldr     r2,[r1],#4                      @ prefetch
1086 # else
1087         ldrb    r2,[r1,#3]
1088 # endif
1089         eor     r12,r4,r5                       @ a^b, b^c in next round
1090 #else
1091         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
1092         eor     r12,r4,r5                       @ a^b, b^c in next round
1093         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
1094 #endif
1095         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1096         and     r3,r3,r12                       @ (b^c)&=(a^b)
1097         add     r7,r7,r11                       @ d+=h
1098         eor     r3,r3,r5                        @ Maj(a,b,c)
1099         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1100         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1101         @ ldr   r2,[sp,#2*4]            @ 17
1102         @ ldr   r1,[sp,#15*4]
1103         mov     r0,r2,ror#7
1104         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1105         mov     r3,r1,ror#17
1106         eor     r0,r0,r2,ror#18
1107         eor     r3,r3,r1,ror#19
1108         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1109         ldr     r2,[sp,#1*4]
1110         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1111         ldr     r1,[sp,#10*4]
1112
1113         add     r3,r3,r0
1114         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1115         add     r2,r2,r3
1116         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1117         add     r2,r2,r1                        @ X[i]
1118         ldr     r3,[r14],#4                     @ *K256++
1119         add     r10,r10,r2                      @ h+=X[i]
1120         str     r2,[sp,#1*4]
1121         eor     r2,r8,r9
1122         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1123         and     r2,r2,r7
1124         add     r10,r10,r3                      @ h+=K256[i]
1125         eor     r2,r2,r9                        @ Ch(e,f,g)
1126         eor     r0,r11,r11,ror#11
1127         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1128 #if 17==31
1129         and     r3,r3,#0xff
1130         cmp     r3,#0xf2                        @ done?
1131 #endif
1132 #if 17<15
1133 # if __ARM_ARCH__>=7
1134         ldr     r2,[r1],#4                      @ prefetch
1135 # else
1136         ldrb    r2,[r1,#3]
1137 # endif
1138         eor     r3,r11,r4                       @ a^b, b^c in next round
1139 #else
1140         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
1141         eor     r3,r11,r4                       @ a^b, b^c in next round
1142         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
1143 #endif
1144         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1145         and     r12,r12,r3                      @ (b^c)&=(a^b)
1146         add     r6,r6,r10                       @ d+=h
1147         eor     r12,r12,r4                      @ Maj(a,b,c)
1148         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1149         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1150         @ ldr   r2,[sp,#3*4]            @ 18
1151         @ ldr   r1,[sp,#0*4]
1152         mov     r0,r2,ror#7
1153         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1154         mov     r12,r1,ror#17
1155         eor     r0,r0,r2,ror#18
1156         eor     r12,r12,r1,ror#19
1157         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1158         ldr     r2,[sp,#2*4]
1159         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1160         ldr     r1,[sp,#11*4]
1161
1162         add     r12,r12,r0
1163         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1164         add     r2,r2,r12
1165         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1166         add     r2,r2,r1                        @ X[i]
1167         ldr     r12,[r14],#4                    @ *K256++
1168         add     r9,r9,r2                        @ h+=X[i]
1169         str     r2,[sp,#2*4]
1170         eor     r2,r7,r8
1171         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1172         and     r2,r2,r6
1173         add     r9,r9,r12                       @ h+=K256[i]
1174         eor     r2,r2,r8                        @ Ch(e,f,g)
1175         eor     r0,r10,r10,ror#11
1176         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1177 #if 18==31
1178         and     r12,r12,#0xff
1179         cmp     r12,#0xf2                       @ done?
1180 #endif
1181 #if 18<15
1182 # if __ARM_ARCH__>=7
1183         ldr     r2,[r1],#4                      @ prefetch
1184 # else
1185         ldrb    r2,[r1,#3]
1186 # endif
1187         eor     r12,r10,r11                     @ a^b, b^c in next round
1188 #else
1189         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
1190         eor     r12,r10,r11                     @ a^b, b^c in next round
1191         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
1192 #endif
1193         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1194         and     r3,r3,r12                       @ (b^c)&=(a^b)
1195         add     r5,r5,r9                        @ d+=h
1196         eor     r3,r3,r11                       @ Maj(a,b,c)
1197         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1198         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1199         @ ldr   r2,[sp,#4*4]            @ 19
1200         @ ldr   r1,[sp,#1*4]
1201         mov     r0,r2,ror#7
1202         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1203         mov     r3,r1,ror#17
1204         eor     r0,r0,r2,ror#18
1205         eor     r3,r3,r1,ror#19
1206         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1207         ldr     r2,[sp,#3*4]
1208         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1209         ldr     r1,[sp,#12*4]
1210
1211         add     r3,r3,r0
1212         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1213         add     r2,r2,r3
1214         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1215         add     r2,r2,r1                        @ X[i]
1216         ldr     r3,[r14],#4                     @ *K256++
1217         add     r8,r8,r2                        @ h+=X[i]
1218         str     r2,[sp,#3*4]
1219         eor     r2,r6,r7
1220         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1221         and     r2,r2,r5
1222         add     r8,r8,r3                        @ h+=K256[i]
1223         eor     r2,r2,r7                        @ Ch(e,f,g)
1224         eor     r0,r9,r9,ror#11
1225         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1226 #if 19==31
1227         and     r3,r3,#0xff
1228         cmp     r3,#0xf2                        @ done?
1229 #endif
1230 #if 19<15
1231 # if __ARM_ARCH__>=7
1232         ldr     r2,[r1],#4                      @ prefetch
1233 # else
1234         ldrb    r2,[r1,#3]
1235 # endif
1236         eor     r3,r9,r10                       @ a^b, b^c in next round
1237 #else
1238         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
1239         eor     r3,r9,r10                       @ a^b, b^c in next round
1240         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
1241 #endif
1242         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1243         and     r12,r12,r3                      @ (b^c)&=(a^b)
1244         add     r4,r4,r8                        @ d+=h
1245         eor     r12,r12,r10                     @ Maj(a,b,c)
1246         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1247         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1248         @ ldr   r2,[sp,#5*4]            @ 20
1249         @ ldr   r1,[sp,#2*4]
1250         mov     r0,r2,ror#7
1251         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1252         mov     r12,r1,ror#17
1253         eor     r0,r0,r2,ror#18
1254         eor     r12,r12,r1,ror#19
1255         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1256         ldr     r2,[sp,#4*4]
1257         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1258         ldr     r1,[sp,#13*4]
1259
1260         add     r12,r12,r0
1261         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1262         add     r2,r2,r12
1263         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1264         add     r2,r2,r1                        @ X[i]
1265         ldr     r12,[r14],#4                    @ *K256++
1266         add     r7,r7,r2                        @ h+=X[i]
1267         str     r2,[sp,#4*4]
1268         eor     r2,r5,r6
1269         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1270         and     r2,r2,r4
1271         add     r7,r7,r12                       @ h+=K256[i]
1272         eor     r2,r2,r6                        @ Ch(e,f,g)
1273         eor     r0,r8,r8,ror#11
1274         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1275 #if 20==31
1276         and     r12,r12,#0xff
1277         cmp     r12,#0xf2                       @ done?
1278 #endif
1279 #if 20<15
1280 # if __ARM_ARCH__>=7
1281         ldr     r2,[r1],#4                      @ prefetch
1282 # else
1283         ldrb    r2,[r1,#3]
1284 # endif
1285         eor     r12,r8,r9                       @ a^b, b^c in next round
1286 #else
1287         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
1288         eor     r12,r8,r9                       @ a^b, b^c in next round
1289         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
1290 #endif
1291         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1292         and     r3,r3,r12                       @ (b^c)&=(a^b)
1293         add     r11,r11,r7                      @ d+=h
1294         eor     r3,r3,r9                        @ Maj(a,b,c)
1295         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1296         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1297         @ ldr   r2,[sp,#6*4]            @ 21
1298         @ ldr   r1,[sp,#3*4]
1299         mov     r0,r2,ror#7
1300         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1301         mov     r3,r1,ror#17
1302         eor     r0,r0,r2,ror#18
1303         eor     r3,r3,r1,ror#19
1304         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1305         ldr     r2,[sp,#5*4]
1306         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1307         ldr     r1,[sp,#14*4]
1308
1309         add     r3,r3,r0
1310         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1311         add     r2,r2,r3
1312         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1313         add     r2,r2,r1                        @ X[i]
1314         ldr     r3,[r14],#4                     @ *K256++
1315         add     r6,r6,r2                        @ h+=X[i]
1316         str     r2,[sp,#5*4]
1317         eor     r2,r4,r5
1318         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1319         and     r2,r2,r11
1320         add     r6,r6,r3                        @ h+=K256[i]
1321         eor     r2,r2,r5                        @ Ch(e,f,g)
1322         eor     r0,r7,r7,ror#11
1323         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1324 #if 21==31
1325         and     r3,r3,#0xff
1326         cmp     r3,#0xf2                        @ done?
1327 #endif
1328 #if 21<15
1329 # if __ARM_ARCH__>=7
1330         ldr     r2,[r1],#4                      @ prefetch
1331 # else
1332         ldrb    r2,[r1,#3]
1333 # endif
1334         eor     r3,r7,r8                        @ a^b, b^c in next round
1335 #else
1336         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
1337         eor     r3,r7,r8                        @ a^b, b^c in next round
1338         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
1339 #endif
1340         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1341         and     r12,r12,r3                      @ (b^c)&=(a^b)
1342         add     r10,r10,r6                      @ d+=h
1343         eor     r12,r12,r8                      @ Maj(a,b,c)
1344         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1345         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1346         @ ldr   r2,[sp,#7*4]            @ 22
1347         @ ldr   r1,[sp,#4*4]
1348         mov     r0,r2,ror#7
1349         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1350         mov     r12,r1,ror#17
1351         eor     r0,r0,r2,ror#18
1352         eor     r12,r12,r1,ror#19
1353         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1354         ldr     r2,[sp,#6*4]
1355         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1356         ldr     r1,[sp,#15*4]
1357
1358         add     r12,r12,r0
1359         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1360         add     r2,r2,r12
1361         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1362         add     r2,r2,r1                        @ X[i]
1363         ldr     r12,[r14],#4                    @ *K256++
1364         add     r5,r5,r2                        @ h+=X[i]
1365         str     r2,[sp,#6*4]
1366         eor     r2,r11,r4
1367         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1368         and     r2,r2,r10
1369         add     r5,r5,r12                       @ h+=K256[i]
1370         eor     r2,r2,r4                        @ Ch(e,f,g)
1371         eor     r0,r6,r6,ror#11
1372         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1373 #if 22==31
1374         and     r12,r12,#0xff
1375         cmp     r12,#0xf2                       @ done?
1376 #endif
1377 #if 22<15
1378 # if __ARM_ARCH__>=7
1379         ldr     r2,[r1],#4                      @ prefetch
1380 # else
1381         ldrb    r2,[r1,#3]
1382 # endif
1383         eor     r12,r6,r7                       @ a^b, b^c in next round
1384 #else
1385         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
1386         eor     r12,r6,r7                       @ a^b, b^c in next round
1387         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
1388 #endif
1389         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1390         and     r3,r3,r12                       @ (b^c)&=(a^b)
1391         add     r9,r9,r5                        @ d+=h
1392         eor     r3,r3,r7                        @ Maj(a,b,c)
1393         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1394         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1395         @ ldr   r2,[sp,#8*4]            @ 23
1396         @ ldr   r1,[sp,#5*4]
1397         mov     r0,r2,ror#7
1398         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1399         mov     r3,r1,ror#17
1400         eor     r0,r0,r2,ror#18
1401         eor     r3,r3,r1,ror#19
1402         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1403         ldr     r2,[sp,#7*4]
1404         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1405         ldr     r1,[sp,#0*4]
1406
1407         add     r3,r3,r0
1408         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1409         add     r2,r2,r3
1410         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1411         add     r2,r2,r1                        @ X[i]
1412         ldr     r3,[r14],#4                     @ *K256++
1413         add     r4,r4,r2                        @ h+=X[i]
1414         str     r2,[sp,#7*4]
1415         eor     r2,r10,r11
1416         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1417         and     r2,r2,r9
1418         add     r4,r4,r3                        @ h+=K256[i]
1419         eor     r2,r2,r11                       @ Ch(e,f,g)
1420         eor     r0,r5,r5,ror#11
1421         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1422 #if 23==31
1423         and     r3,r3,#0xff
1424         cmp     r3,#0xf2                        @ done?
1425 #endif
1426 #if 23<15
1427 # if __ARM_ARCH__>=7
1428         ldr     r2,[r1],#4                      @ prefetch
1429 # else
1430         ldrb    r2,[r1,#3]
1431 # endif
1432         eor     r3,r5,r6                        @ a^b, b^c in next round
1433 #else
1434         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
1435         eor     r3,r5,r6                        @ a^b, b^c in next round
1436         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
1437 #endif
1438         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1439         and     r12,r12,r3                      @ (b^c)&=(a^b)
1440         add     r8,r8,r4                        @ d+=h
1441         eor     r12,r12,r6                      @ Maj(a,b,c)
1442         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1443         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1444         @ ldr   r2,[sp,#9*4]            @ 24
1445         @ ldr   r1,[sp,#6*4]
1446         mov     r0,r2,ror#7
1447         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1448         mov     r12,r1,ror#17
1449         eor     r0,r0,r2,ror#18
1450         eor     r12,r12,r1,ror#19
1451         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1452         ldr     r2,[sp,#8*4]
1453         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1454         ldr     r1,[sp,#1*4]
1455
1456         add     r12,r12,r0
1457         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1458         add     r2,r2,r12
1459         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1460         add     r2,r2,r1                        @ X[i]
1461         ldr     r12,[r14],#4                    @ *K256++
1462         add     r11,r11,r2                      @ h+=X[i]
1463         str     r2,[sp,#8*4]
1464         eor     r2,r9,r10
1465         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1466         and     r2,r2,r8
1467         add     r11,r11,r12                     @ h+=K256[i]
1468         eor     r2,r2,r10                       @ Ch(e,f,g)
1469         eor     r0,r4,r4,ror#11
1470         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1471 #if 24==31
1472         and     r12,r12,#0xff
1473         cmp     r12,#0xf2                       @ done?
1474 #endif
1475 #if 24<15
1476 # if __ARM_ARCH__>=7
1477         ldr     r2,[r1],#4                      @ prefetch
1478 # else
1479         ldrb    r2,[r1,#3]
1480 # endif
1481         eor     r12,r4,r5                       @ a^b, b^c in next round
1482 #else
1483         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
1484         eor     r12,r4,r5                       @ a^b, b^c in next round
1485         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
1486 #endif
1487         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1488         and     r3,r3,r12                       @ (b^c)&=(a^b)
1489         add     r7,r7,r11                       @ d+=h
1490         eor     r3,r3,r5                        @ Maj(a,b,c)
1491         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1492         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1493         @ ldr   r2,[sp,#10*4]           @ 25
1494         @ ldr   r1,[sp,#7*4]
1495         mov     r0,r2,ror#7
1496         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1497         mov     r3,r1,ror#17
1498         eor     r0,r0,r2,ror#18
1499         eor     r3,r3,r1,ror#19
1500         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1501         ldr     r2,[sp,#9*4]
1502         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1503         ldr     r1,[sp,#2*4]
1504
1505         add     r3,r3,r0
1506         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1507         add     r2,r2,r3
1508         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1509         add     r2,r2,r1                        @ X[i]
1510         ldr     r3,[r14],#4                     @ *K256++
1511         add     r10,r10,r2                      @ h+=X[i]
1512         str     r2,[sp,#9*4]
1513         eor     r2,r8,r9
1514         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1515         and     r2,r2,r7
1516         add     r10,r10,r3                      @ h+=K256[i]
1517         eor     r2,r2,r9                        @ Ch(e,f,g)
1518         eor     r0,r11,r11,ror#11
1519         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1520 #if 25==31
1521         and     r3,r3,#0xff
1522         cmp     r3,#0xf2                        @ done?
1523 #endif
1524 #if 25<15
1525 # if __ARM_ARCH__>=7
1526         ldr     r2,[r1],#4                      @ prefetch
1527 # else
1528         ldrb    r2,[r1,#3]
1529 # endif
1530         eor     r3,r11,r4                       @ a^b, b^c in next round
1531 #else
1532         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
1533         eor     r3,r11,r4                       @ a^b, b^c in next round
1534         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
1535 #endif
1536         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1537         and     r12,r12,r3                      @ (b^c)&=(a^b)
1538         add     r6,r6,r10                       @ d+=h
1539         eor     r12,r12,r4                      @ Maj(a,b,c)
1540         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1541         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1542         @ ldr   r2,[sp,#11*4]           @ 26
1543         @ ldr   r1,[sp,#8*4]
1544         mov     r0,r2,ror#7
1545         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1546         mov     r12,r1,ror#17
1547         eor     r0,r0,r2,ror#18
1548         eor     r12,r12,r1,ror#19
1549         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1550         ldr     r2,[sp,#10*4]
1551         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1552         ldr     r1,[sp,#3*4]
1553
1554         add     r12,r12,r0
1555         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1556         add     r2,r2,r12
1557         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1558         add     r2,r2,r1                        @ X[i]
1559         ldr     r12,[r14],#4                    @ *K256++
1560         add     r9,r9,r2                        @ h+=X[i]
1561         str     r2,[sp,#10*4]
1562         eor     r2,r7,r8
1563         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1564         and     r2,r2,r6
1565         add     r9,r9,r12                       @ h+=K256[i]
1566         eor     r2,r2,r8                        @ Ch(e,f,g)
1567         eor     r0,r10,r10,ror#11
1568         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1569 #if 26==31
1570         and     r12,r12,#0xff
1571         cmp     r12,#0xf2                       @ done?
1572 #endif
1573 #if 26<15
1574 # if __ARM_ARCH__>=7
1575         ldr     r2,[r1],#4                      @ prefetch
1576 # else
1577         ldrb    r2,[r1,#3]
1578 # endif
1579         eor     r12,r10,r11                     @ a^b, b^c in next round
1580 #else
1581         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
1582         eor     r12,r10,r11                     @ a^b, b^c in next round
1583         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
1584 #endif
1585         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1586         and     r3,r3,r12                       @ (b^c)&=(a^b)
1587         add     r5,r5,r9                        @ d+=h
1588         eor     r3,r3,r11                       @ Maj(a,b,c)
1589         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1590         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1591         @ ldr   r2,[sp,#12*4]           @ 27
1592         @ ldr   r1,[sp,#9*4]
1593         mov     r0,r2,ror#7
1594         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1595         mov     r3,r1,ror#17
1596         eor     r0,r0,r2,ror#18
1597         eor     r3,r3,r1,ror#19
1598         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1599         ldr     r2,[sp,#11*4]
1600         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1601         ldr     r1,[sp,#4*4]
1602
1603         add     r3,r3,r0
1604         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1605         add     r2,r2,r3
1606         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1607         add     r2,r2,r1                        @ X[i]
1608         ldr     r3,[r14],#4                     @ *K256++
1609         add     r8,r8,r2                        @ h+=X[i]
1610         str     r2,[sp,#11*4]
1611         eor     r2,r6,r7
1612         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1613         and     r2,r2,r5
1614         add     r8,r8,r3                        @ h+=K256[i]
1615         eor     r2,r2,r7                        @ Ch(e,f,g)
1616         eor     r0,r9,r9,ror#11
1617         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1618 #if 27==31
1619         and     r3,r3,#0xff
1620         cmp     r3,#0xf2                        @ done?
1621 #endif
1622 #if 27<15
1623 # if __ARM_ARCH__>=7
1624         ldr     r2,[r1],#4                      @ prefetch
1625 # else
1626         ldrb    r2,[r1,#3]
1627 # endif
1628         eor     r3,r9,r10                       @ a^b, b^c in next round
1629 #else
1630         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
1631         eor     r3,r9,r10                       @ a^b, b^c in next round
1632         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
1633 #endif
1634         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1635         and     r12,r12,r3                      @ (b^c)&=(a^b)
1636         add     r4,r4,r8                        @ d+=h
1637         eor     r12,r12,r10                     @ Maj(a,b,c)
1638         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1639         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1640         @ ldr   r2,[sp,#13*4]           @ 28
1641         @ ldr   r1,[sp,#10*4]
1642         mov     r0,r2,ror#7
1643         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1644         mov     r12,r1,ror#17
1645         eor     r0,r0,r2,ror#18
1646         eor     r12,r12,r1,ror#19
1647         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1648         ldr     r2,[sp,#12*4]
1649         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1650         ldr     r1,[sp,#5*4]
1651
1652         add     r12,r12,r0
1653         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1654         add     r2,r2,r12
1655         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1656         add     r2,r2,r1                        @ X[i]
1657         ldr     r12,[r14],#4                    @ *K256++
1658         add     r7,r7,r2                        @ h+=X[i]
1659         str     r2,[sp,#12*4]
1660         eor     r2,r5,r6
1661         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1662         and     r2,r2,r4
1663         add     r7,r7,r12                       @ h+=K256[i]
1664         eor     r2,r2,r6                        @ Ch(e,f,g)
1665         eor     r0,r8,r8,ror#11
1666         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1667 #if 28==31
1668         and     r12,r12,#0xff
1669         cmp     r12,#0xf2                       @ done?
1670 #endif
1671 #if 28<15
1672 # if __ARM_ARCH__>=7
1673         ldr     r2,[r1],#4                      @ prefetch
1674 # else
1675         ldrb    r2,[r1,#3]
1676 # endif
1677         eor     r12,r8,r9                       @ a^b, b^c in next round
1678 #else
1679         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
1680         eor     r12,r8,r9                       @ a^b, b^c in next round
1681         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
1682 #endif
1683         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1684         and     r3,r3,r12                       @ (b^c)&=(a^b)
1685         add     r11,r11,r7                      @ d+=h
1686         eor     r3,r3,r9                        @ Maj(a,b,c)
1687         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1688         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1689         @ ldr   r2,[sp,#14*4]           @ 29
1690         @ ldr   r1,[sp,#11*4]
1691         mov     r0,r2,ror#7
1692         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1693         mov     r3,r1,ror#17
1694         eor     r0,r0,r2,ror#18
1695         eor     r3,r3,r1,ror#19
1696         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1697         ldr     r2,[sp,#13*4]
1698         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1699         ldr     r1,[sp,#6*4]
1700
1701         add     r3,r3,r0
1702         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1703         add     r2,r2,r3
1704         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1705         add     r2,r2,r1                        @ X[i]
1706         ldr     r3,[r14],#4                     @ *K256++
1707         add     r6,r6,r2                        @ h+=X[i]
1708         str     r2,[sp,#13*4]
1709         eor     r2,r4,r5
1710         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1711         and     r2,r2,r11
1712         add     r6,r6,r3                        @ h+=K256[i]
1713         eor     r2,r2,r5                        @ Ch(e,f,g)
1714         eor     r0,r7,r7,ror#11
1715         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1716 #if 29==31
1717         and     r3,r3,#0xff
1718         cmp     r3,#0xf2                        @ done?
1719 #endif
1720 #if 29<15
1721 # if __ARM_ARCH__>=7
1722         ldr     r2,[r1],#4                      @ prefetch
1723 # else
1724         ldrb    r2,[r1,#3]
1725 # endif
1726         eor     r3,r7,r8                        @ a^b, b^c in next round
1727 #else
1728         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
1729         eor     r3,r7,r8                        @ a^b, b^c in next round
1730         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
1731 #endif
1732         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1733         and     r12,r12,r3                      @ (b^c)&=(a^b)
1734         add     r10,r10,r6                      @ d+=h
1735         eor     r12,r12,r8                      @ Maj(a,b,c)
1736         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1737         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1738         @ ldr   r2,[sp,#15*4]           @ 30
1739         @ ldr   r1,[sp,#12*4]
1740         mov     r0,r2,ror#7
1741         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1742         mov     r12,r1,ror#17
1743         eor     r0,r0,r2,ror#18
1744         eor     r12,r12,r1,ror#19
1745         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1746         ldr     r2,[sp,#14*4]
1747         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1748         ldr     r1,[sp,#7*4]
1749
1750         add     r12,r12,r0
1751         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1752         add     r2,r2,r12
1753         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1754         add     r2,r2,r1                        @ X[i]
1755         ldr     r12,[r14],#4                    @ *K256++
1756         add     r5,r5,r2                        @ h+=X[i]
1757         str     r2,[sp,#14*4]
1758         eor     r2,r11,r4
1759         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1760         and     r2,r2,r10
1761         add     r5,r5,r12                       @ h+=K256[i]
1762         eor     r2,r2,r4                        @ Ch(e,f,g)
1763         eor     r0,r6,r6,ror#11
1764         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1765 #if 30==31
1766         and     r12,r12,#0xff
1767         cmp     r12,#0xf2                       @ done?
1768 #endif
1769 #if 30<15
1770 # if __ARM_ARCH__>=7
1771         ldr     r2,[r1],#4                      @ prefetch
1772 # else
1773         ldrb    r2,[r1,#3]
1774 # endif
1775         eor     r12,r6,r7                       @ a^b, b^c in next round
1776 #else
1777         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
1778         eor     r12,r6,r7                       @ a^b, b^c in next round
1779         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
1780 #endif
1781         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1782         and     r3,r3,r12                       @ (b^c)&=(a^b)
1783         add     r9,r9,r5                        @ d+=h
1784         eor     r3,r3,r7                        @ Maj(a,b,c)
1785         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1786         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1787         @ ldr   r2,[sp,#0*4]            @ 31
1788         @ ldr   r1,[sp,#13*4]
1789         mov     r0,r2,ror#7
1790         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1791         mov     r3,r1,ror#17
1792         eor     r0,r0,r2,ror#18
1793         eor     r3,r3,r1,ror#19
1794         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1795         ldr     r2,[sp,#15*4]
1796         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1797         ldr     r1,[sp,#8*4]
1798
1799         add     r3,r3,r0
1800         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1801         add     r2,r2,r3
1802         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1803         add     r2,r2,r1                        @ X[i]
1804         ldr     r3,[r14],#4                     @ *K256++
1805         add     r4,r4,r2                        @ h+=X[i]
1806         str     r2,[sp,#15*4]
1807         eor     r2,r10,r11
1808         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1809         and     r2,r2,r9
1810         add     r4,r4,r3                        @ h+=K256[i]
1811         eor     r2,r2,r11                       @ Ch(e,f,g)
1812         eor     r0,r5,r5,ror#11
1813         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1814 #if 31==31
1815         and     r3,r3,#0xff
1816         cmp     r3,#0xf2                        @ done?
1817 #endif
1818 #if 31<15
1819 # if __ARM_ARCH__>=7
1820         ldr     r2,[r1],#4                      @ prefetch
1821 # else
1822         ldrb    r2,[r1,#3]
1823 # endif
1824         eor     r3,r5,r6                        @ a^b, b^c in next round
1825 #else
1826         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1827         eor     r3,r5,r6                        @ a^b, b^c in next round
1828         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1829 #endif
1830         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1831         and     r12,r12,r3                      @ (b^c)&=(a^b)
1832         add     r8,r8,r4                        @ d+=h
1833         eor     r12,r12,r6                      @ Maj(a,b,c)
1834         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1835         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1836 #if __ARM_ARCH__>=7
1837         ite     eq                      @ Thumb2 thing, sanity check in ARM
1838 #endif
1839         ldreq   r3,[sp,#16*4]           @ pull ctx
1840         bne     .Lrounds_16_xx
1841
1842         add     r4,r4,r12               @ h+=Maj(a,b,c) from the past
1843         ldr     r0,[r3,#0]
1844         ldr     r2,[r3,#4]
1845         ldr     r12,[r3,#8]
1846         add     r4,r4,r0
1847         ldr     r0,[r3,#12]
1848         add     r5,r5,r2
1849         ldr     r2,[r3,#16]
1850         add     r6,r6,r12
1851         ldr     r12,[r3,#20]
1852         add     r7,r7,r0
1853         ldr     r0,[r3,#24]
1854         add     r8,r8,r2
1855         ldr     r2,[r3,#28]
1856         add     r9,r9,r12
1857         ldr     r1,[sp,#17*4]           @ pull inp
1858         ldr     r12,[sp,#18*4]          @ pull inp+len
1859         add     r10,r10,r0
1860         add     r11,r11,r2
1861         stmia   r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1862         cmp     r1,r12
1863         sub     r14,r14,#256    @ rewind Ktbl
1864         bne     .Loop
1865
1866         add     sp,sp,#19*4     @ destroy frame
1867 #if __ARM_ARCH__>=5
1868         ldmia   sp!,{r4-r11,pc}
1869 #else
1870         ldmia   sp!,{r4-r11,lr}
1871         tst     lr,#1
1872         moveq   pc,lr                   @ be binary compatible with V4, yet
1873         .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
1874 #endif
1875 .size   sha256_block_data_order,.-sha256_block_data_order
1876 #if __ARM_MAX_ARCH__>=7
1877 .arch   armv7-a
1878 .fpu    neon
1879
1880 .global sha256_block_data_order_neon
1881 .type   sha256_block_data_order_neon,%function
1882 .align  4
1883 sha256_block_data_order_neon:
1884 .LNEON:
1885         stmdb   sp!,{r4-r12,lr}
1886
1887         sub     r11,sp,#16*4+16
1888         adrl    r14,K256
1889         bic     r11,r11,#15             @ align for 128-bit stores
1890         mov     r12,sp
1891         mov     sp,r11                  @ alloca
1892         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
1893
1894         vld1.8          {q0},[r1]!
1895         vld1.8          {q1},[r1]!
1896         vld1.8          {q2},[r1]!
1897         vld1.8          {q3},[r1]!
1898         vld1.32         {q8},[r14,:128]!
1899         vld1.32         {q9},[r14,:128]!
1900         vld1.32         {q10},[r14,:128]!
1901         vld1.32         {q11},[r14,:128]!
1902         vrev32.8        q0,q0           @ yes, even on
1903         str             r0,[sp,#64]
1904         vrev32.8        q1,q1           @ big-endian
1905         str             r1,[sp,#68]
1906         mov             r1,sp
1907         vrev32.8        q2,q2
1908         str             r2,[sp,#72]
1909         vrev32.8        q3,q3
1910         str             r12,[sp,#76]            @ save original sp
1911         vadd.i32        q8,q8,q0
1912         vadd.i32        q9,q9,q1
1913         vst1.32         {q8},[r1,:128]!
1914         vadd.i32        q10,q10,q2
1915         vst1.32         {q9},[r1,:128]!
1916         vadd.i32        q11,q11,q3
1917         vst1.32         {q10},[r1,:128]!
1918         vst1.32         {q11},[r1,:128]!
1919
1920         ldmia           r0,{r4-r11}
1921         sub             r1,r1,#64
1922         ldr             r2,[sp,#0]
1923         eor             r12,r12,r12
1924         eor             r3,r5,r6
1925         b               .L_00_48
1926
1927 .align  4
1928 .L_00_48:
1929         vext.8  q8,q0,q1,#4
1930         add     r11,r11,r2
1931         eor     r2,r9,r10
1932         eor     r0,r8,r8,ror#5
1933         vext.8  q9,q2,q3,#4
1934         add     r4,r4,r12
1935         and     r2,r2,r8
1936         eor     r12,r0,r8,ror#19
1937         vshr.u32        q10,q8,#7
1938         eor     r0,r4,r4,ror#11
1939         eor     r2,r2,r10
1940         vadd.i32        q0,q0,q9
1941         add     r11,r11,r12,ror#6
1942         eor     r12,r4,r5
1943         vshr.u32        q9,q8,#3
1944         eor     r0,r0,r4,ror#20
1945         add     r11,r11,r2
1946         vsli.32 q10,q8,#25
1947         ldr     r2,[sp,#4]
1948         and     r3,r3,r12
1949         vshr.u32        q11,q8,#18
1950         add     r7,r7,r11
1951         add     r11,r11,r0,ror#2
1952         eor     r3,r3,r5
1953         veor    q9,q9,q10
1954         add     r10,r10,r2
1955         vsli.32 q11,q8,#14
1956         eor     r2,r8,r9
1957         eor     r0,r7,r7,ror#5
1958         vshr.u32        d24,d7,#17
1959         add     r11,r11,r3
1960         and     r2,r2,r7
1961         veor    q9,q9,q11
1962         eor     r3,r0,r7,ror#19
1963         eor     r0,r11,r11,ror#11
1964         vsli.32 d24,d7,#15
1965         eor     r2,r2,r9
1966         add     r10,r10,r3,ror#6
1967         vshr.u32        d25,d7,#10
1968         eor     r3,r11,r4
1969         eor     r0,r0,r11,ror#20
1970         vadd.i32        q0,q0,q9
1971         add     r10,r10,r2
1972         ldr     r2,[sp,#8]
1973         veor    d25,d25,d24
1974         and     r12,r12,r3
1975         add     r6,r6,r10
1976         vshr.u32        d24,d7,#19
1977         add     r10,r10,r0,ror#2
1978         eor     r12,r12,r4
1979         vsli.32 d24,d7,#13
1980         add     r9,r9,r2
1981         eor     r2,r7,r8
1982         veor    d25,d25,d24
1983         eor     r0,r6,r6,ror#5
1984         add     r10,r10,r12
1985         vadd.i32        d0,d0,d25
1986         and     r2,r2,r6
1987         eor     r12,r0,r6,ror#19
1988         vshr.u32        d24,d0,#17
1989         eor     r0,r10,r10,ror#11
1990         eor     r2,r2,r8
1991         vsli.32 d24,d0,#15
1992         add     r9,r9,r12,ror#6
1993         eor     r12,r10,r11
1994         vshr.u32        d25,d0,#10
1995         eor     r0,r0,r10,ror#20
1996         add     r9,r9,r2
1997         veor    d25,d25,d24
1998         ldr     r2,[sp,#12]
1999         and     r3,r3,r12
2000         vshr.u32        d24,d0,#19
2001         add     r5,r5,r9
2002         add     r9,r9,r0,ror#2
2003         eor     r3,r3,r11
2004         vld1.32 {q8},[r14,:128]!
2005         add     r8,r8,r2
2006         vsli.32 d24,d0,#13
2007         eor     r2,r6,r7
2008         eor     r0,r5,r5,ror#5
2009         veor    d25,d25,d24
2010         add     r9,r9,r3
2011         and     r2,r2,r5
2012         vadd.i32        d1,d1,d25
2013         eor     r3,r0,r5,ror#19
2014         eor     r0,r9,r9,ror#11
2015         vadd.i32        q8,q8,q0
2016         eor     r2,r2,r7
2017         add     r8,r8,r3,ror#6
2018         eor     r3,r9,r10
2019         eor     r0,r0,r9,ror#20
2020         add     r8,r8,r2
2021         ldr     r2,[sp,#16]
2022         and     r12,r12,r3
2023         add     r4,r4,r8
2024         vst1.32 {q8},[r1,:128]!
2025         add     r8,r8,r0,ror#2
2026         eor     r12,r12,r10
2027         vext.8  q8,q1,q2,#4
2028         add     r7,r7,r2
2029         eor     r2,r5,r6
2030         eor     r0,r4,r4,ror#5
2031         vext.8  q9,q3,q0,#4
2032         add     r8,r8,r12
2033         and     r2,r2,r4
2034         eor     r12,r0,r4,ror#19
2035         vshr.u32        q10,q8,#7
2036         eor     r0,r8,r8,ror#11
2037         eor     r2,r2,r6
2038         vadd.i32        q1,q1,q9
2039         add     r7,r7,r12,ror#6
2040         eor     r12,r8,r9
2041         vshr.u32        q9,q8,#3
2042         eor     r0,r0,r8,ror#20
2043         add     r7,r7,r2
2044         vsli.32 q10,q8,#25
2045         ldr     r2,[sp,#20]
2046         and     r3,r3,r12
2047         vshr.u32        q11,q8,#18
2048         add     r11,r11,r7
2049         add     r7,r7,r0,ror#2
2050         eor     r3,r3,r9
2051         veor    q9,q9,q10
2052         add     r6,r6,r2
2053         vsli.32 q11,q8,#14
2054         eor     r2,r4,r5
2055         eor     r0,r11,r11,ror#5
2056         vshr.u32        d24,d1,#17
2057         add     r7,r7,r3
2058         and     r2,r2,r11
2059         veor    q9,q9,q11
2060         eor     r3,r0,r11,ror#19
2061         eor     r0,r7,r7,ror#11
2062         vsli.32 d24,d1,#15
2063         eor     r2,r2,r5
2064         add     r6,r6,r3,ror#6
2065         vshr.u32        d25,d1,#10
2066         eor     r3,r7,r8
2067         eor     r0,r0,r7,ror#20
2068         vadd.i32        q1,q1,q9
2069         add     r6,r6,r2
2070         ldr     r2,[sp,#24]
2071         veor    d25,d25,d24
2072         and     r12,r12,r3
2073         add     r10,r10,r6
2074         vshr.u32        d24,d1,#19
2075         add     r6,r6,r0,ror#2
2076         eor     r12,r12,r8
2077         vsli.32 d24,d1,#13
2078         add     r5,r5,r2
2079         eor     r2,r11,r4
2080         veor    d25,d25,d24
2081         eor     r0,r10,r10,ror#5
2082         add     r6,r6,r12
2083         vadd.i32        d2,d2,d25
2084         and     r2,r2,r10
2085         eor     r12,r0,r10,ror#19
2086         vshr.u32        d24,d2,#17
2087         eor     r0,r6,r6,ror#11
2088         eor     r2,r2,r4
2089         vsli.32 d24,d2,#15
2090         add     r5,r5,r12,ror#6
2091         eor     r12,r6,r7
2092         vshr.u32        d25,d2,#10
2093         eor     r0,r0,r6,ror#20
2094         add     r5,r5,r2
2095         veor    d25,d25,d24
2096         ldr     r2,[sp,#28]
2097         and     r3,r3,r12
2098         vshr.u32        d24,d2,#19
2099         add     r9,r9,r5
2100         add     r5,r5,r0,ror#2
2101         eor     r3,r3,r7
2102         vld1.32 {q8},[r14,:128]!
2103         add     r4,r4,r2
2104         vsli.32 d24,d2,#13
2105         eor     r2,r10,r11
2106         eor     r0,r9,r9,ror#5
2107         veor    d25,d25,d24
2108         add     r5,r5,r3
2109         and     r2,r2,r9
2110         vadd.i32        d3,d3,d25
2111         eor     r3,r0,r9,ror#19
2112         eor     r0,r5,r5,ror#11
2113         vadd.i32        q8,q8,q1
2114         eor     r2,r2,r11
2115         add     r4,r4,r3,ror#6
2116         eor     r3,r5,r6
2117         eor     r0,r0,r5,ror#20
2118         add     r4,r4,r2
2119         ldr     r2,[sp,#32]
2120         and     r12,r12,r3
2121         add     r8,r8,r4
2122         vst1.32 {q8},[r1,:128]!
2123         add     r4,r4,r0,ror#2
2124         eor     r12,r12,r6
2125         vext.8  q8,q2,q3,#4
2126         add     r11,r11,r2
2127         eor     r2,r9,r10
2128         eor     r0,r8,r8,ror#5
2129         vext.8  q9,q0,q1,#4
2130         add     r4,r4,r12
2131         and     r2,r2,r8
2132         eor     r12,r0,r8,ror#19
2133         vshr.u32        q10,q8,#7
2134         eor     r0,r4,r4,ror#11
2135         eor     r2,r2,r10
2136         vadd.i32        q2,q2,q9
2137         add     r11,r11,r12,ror#6
2138         eor     r12,r4,r5
2139         vshr.u32        q9,q8,#3
2140         eor     r0,r0,r4,ror#20
2141         add     r11,r11,r2
2142         vsli.32 q10,q8,#25
2143         ldr     r2,[sp,#36]
2144         and     r3,r3,r12
2145         vshr.u32        q11,q8,#18
2146         add     r7,r7,r11
2147         add     r11,r11,r0,ror#2
2148         eor     r3,r3,r5
2149         veor    q9,q9,q10
2150         add     r10,r10,r2
2151         vsli.32 q11,q8,#14
2152         eor     r2,r8,r9
2153         eor     r0,r7,r7,ror#5
2154         vshr.u32        d24,d3,#17
2155         add     r11,r11,r3
2156         and     r2,r2,r7
2157         veor    q9,q9,q11
2158         eor     r3,r0,r7,ror#19
2159         eor     r0,r11,r11,ror#11
2160         vsli.32 d24,d3,#15
2161         eor     r2,r2,r9
2162         add     r10,r10,r3,ror#6
2163         vshr.u32        d25,d3,#10
2164         eor     r3,r11,r4
2165         eor     r0,r0,r11,ror#20
2166         vadd.i32        q2,q2,q9
2167         add     r10,r10,r2
2168         ldr     r2,[sp,#40]
2169         veor    d25,d25,d24
2170         and     r12,r12,r3
2171         add     r6,r6,r10
2172         vshr.u32        d24,d3,#19
2173         add     r10,r10,r0,ror#2
2174         eor     r12,r12,r4
2175         vsli.32 d24,d3,#13
2176         add     r9,r9,r2
2177         eor     r2,r7,r8
2178         veor    d25,d25,d24
2179         eor     r0,r6,r6,ror#5
2180         add     r10,r10,r12
2181         vadd.i32        d4,d4,d25
2182         and     r2,r2,r6
2183         eor     r12,r0,r6,ror#19
2184         vshr.u32        d24,d4,#17
2185         eor     r0,r10,r10,ror#11
2186         eor     r2,r2,r8
2187         vsli.32 d24,d4,#15
2188         add     r9,r9,r12,ror#6
2189         eor     r12,r10,r11
2190         vshr.u32        d25,d4,#10
2191         eor     r0,r0,r10,ror#20
2192         add     r9,r9,r2
2193         veor    d25,d25,d24
2194         ldr     r2,[sp,#44]
2195         and     r3,r3,r12
2196         vshr.u32        d24,d4,#19
2197         add     r5,r5,r9
2198         add     r9,r9,r0,ror#2
2199         eor     r3,r3,r11
2200         vld1.32 {q8},[r14,:128]!
2201         add     r8,r8,r2
2202         vsli.32 d24,d4,#13
2203         eor     r2,r6,r7
2204         eor     r0,r5,r5,ror#5
2205         veor    d25,d25,d24
2206         add     r9,r9,r3
2207         and     r2,r2,r5
2208         vadd.i32        d5,d5,d25
2209         eor     r3,r0,r5,ror#19
2210         eor     r0,r9,r9,ror#11
2211         vadd.i32        q8,q8,q2
2212         eor     r2,r2,r7
2213         add     r8,r8,r3,ror#6
2214         eor     r3,r9,r10
2215         eor     r0,r0,r9,ror#20
2216         add     r8,r8,r2
2217         ldr     r2,[sp,#48]
2218         and     r12,r12,r3
2219         add     r4,r4,r8
2220         vst1.32 {q8},[r1,:128]!
2221         add     r8,r8,r0,ror#2
2222         eor     r12,r12,r10
2223         vext.8  q8,q3,q0,#4
2224         add     r7,r7,r2
2225         eor     r2,r5,r6
2226         eor     r0,r4,r4,ror#5
2227         vext.8  q9,q1,q2,#4
2228         add     r8,r8,r12
2229         and     r2,r2,r4
2230         eor     r12,r0,r4,ror#19
2231         vshr.u32        q10,q8,#7
2232         eor     r0,r8,r8,ror#11
2233         eor     r2,r2,r6
2234         vadd.i32        q3,q3,q9
2235         add     r7,r7,r12,ror#6
2236         eor     r12,r8,r9
2237         vshr.u32        q9,q8,#3
2238         eor     r0,r0,r8,ror#20
2239         add     r7,r7,r2
2240         vsli.32 q10,q8,#25
2241         ldr     r2,[sp,#52]
2242         and     r3,r3,r12
2243         vshr.u32        q11,q8,#18
2244         add     r11,r11,r7
2245         add     r7,r7,r0,ror#2
2246         eor     r3,r3,r9
2247         veor    q9,q9,q10
2248         add     r6,r6,r2
2249         vsli.32 q11,q8,#14
2250         eor     r2,r4,r5
2251         eor     r0,r11,r11,ror#5
2252         vshr.u32        d24,d5,#17
2253         add     r7,r7,r3
2254         and     r2,r2,r11
2255         veor    q9,q9,q11
2256         eor     r3,r0,r11,ror#19
2257         eor     r0,r7,r7,ror#11
2258         vsli.32 d24,d5,#15
2259         eor     r2,r2,r5
2260         add     r6,r6,r3,ror#6
2261         vshr.u32        d25,d5,#10
2262         eor     r3,r7,r8
2263         eor     r0,r0,r7,ror#20
2264         vadd.i32        q3,q3,q9
2265         add     r6,r6,r2
2266         ldr     r2,[sp,#56]
2267         veor    d25,d25,d24
2268         and     r12,r12,r3
2269         add     r10,r10,r6
2270         vshr.u32        d24,d5,#19
2271         add     r6,r6,r0,ror#2
2272         eor     r12,r12,r8
2273         vsli.32 d24,d5,#13
2274         add     r5,r5,r2
2275         eor     r2,r11,r4
2276         veor    d25,d25,d24
2277         eor     r0,r10,r10,ror#5
2278         add     r6,r6,r12
2279         vadd.i32        d6,d6,d25
2280         and     r2,r2,r10
2281         eor     r12,r0,r10,ror#19
2282         vshr.u32        d24,d6,#17
2283         eor     r0,r6,r6,ror#11
2284         eor     r2,r2,r4
2285         vsli.32 d24,d6,#15
2286         add     r5,r5,r12,ror#6
2287         eor     r12,r6,r7
2288         vshr.u32        d25,d6,#10
2289         eor     r0,r0,r6,ror#20
2290         add     r5,r5,r2
2291         veor    d25,d25,d24
2292         ldr     r2,[sp,#60]
2293         and     r3,r3,r12
2294         vshr.u32        d24,d6,#19
2295         add     r9,r9,r5
2296         add     r5,r5,r0,ror#2
2297         eor     r3,r3,r7
2298         vld1.32 {q8},[r14,:128]!
2299         add     r4,r4,r2
2300         vsli.32 d24,d6,#13
2301         eor     r2,r10,r11
2302         eor     r0,r9,r9,ror#5
2303         veor    d25,d25,d24
2304         add     r5,r5,r3
2305         and     r2,r2,r9
2306         vadd.i32        d7,d7,d25
2307         eor     r3,r0,r9,ror#19
2308         eor     r0,r5,r5,ror#11
2309         vadd.i32        q8,q8,q3
2310         eor     r2,r2,r11
2311         add     r4,r4,r3,ror#6
2312         eor     r3,r5,r6
2313         eor     r0,r0,r5,ror#20
2314         add     r4,r4,r2
2315         ldr     r2,[r14]
2316         and     r12,r12,r3
2317         add     r8,r8,r4
2318         vst1.32 {q8},[r1,:128]!
2319         add     r4,r4,r0,ror#2
2320         eor     r12,r12,r6
2321         teq     r2,#0                           @ check for K256 terminator
2322         ldr     r2,[sp,#0]
2323         sub     r1,r1,#64
2324         bne     .L_00_48
2325
2326         ldr             r1,[sp,#68]
2327         ldr             r0,[sp,#72]
2328         sub             r14,r14,#256    @ rewind r14
2329         teq             r1,r0
2330         it              eq
2331         subeq           r1,r1,#64               @ avoid SEGV
2332         vld1.8          {q0},[r1]!              @ load next input block
2333         vld1.8          {q1},[r1]!
2334         vld1.8          {q2},[r1]!
2335         vld1.8          {q3},[r1]!
2336         it              ne
2337         strne           r1,[sp,#68]
2338         mov             r1,sp
2339         add     r11,r11,r2
2340         eor     r2,r9,r10
2341         eor     r0,r8,r8,ror#5
2342         add     r4,r4,r12
2343         vld1.32 {q8},[r14,:128]!
2344         and     r2,r2,r8
2345         eor     r12,r0,r8,ror#19
2346         eor     r0,r4,r4,ror#11
2347         eor     r2,r2,r10
2348         vrev32.8        q0,q0
2349         add     r11,r11,r12,ror#6
2350         eor     r12,r4,r5
2351         eor     r0,r0,r4,ror#20
2352         add     r11,r11,r2
2353         vadd.i32        q8,q8,q0
2354         ldr     r2,[sp,#4]
2355         and     r3,r3,r12
2356         add     r7,r7,r11
2357         add     r11,r11,r0,ror#2
2358         eor     r3,r3,r5
2359         add     r10,r10,r2
2360         eor     r2,r8,r9
2361         eor     r0,r7,r7,ror#5
2362         add     r11,r11,r3
2363         and     r2,r2,r7
2364         eor     r3,r0,r7,ror#19
2365         eor     r0,r11,r11,ror#11
2366         eor     r2,r2,r9
2367         add     r10,r10,r3,ror#6
2368         eor     r3,r11,r4
2369         eor     r0,r0,r11,ror#20
2370         add     r10,r10,r2
2371         ldr     r2,[sp,#8]
2372         and     r12,r12,r3
2373         add     r6,r6,r10
2374         add     r10,r10,r0,ror#2
2375         eor     r12,r12,r4
2376         add     r9,r9,r2
2377         eor     r2,r7,r8
2378         eor     r0,r6,r6,ror#5
2379         add     r10,r10,r12
2380         and     r2,r2,r6
2381         eor     r12,r0,r6,ror#19
2382         eor     r0,r10,r10,ror#11
2383         eor     r2,r2,r8
2384         add     r9,r9,r12,ror#6
2385         eor     r12,r10,r11
2386         eor     r0,r0,r10,ror#20
2387         add     r9,r9,r2
2388         ldr     r2,[sp,#12]
2389         and     r3,r3,r12
2390         add     r5,r5,r9
2391         add     r9,r9,r0,ror#2
2392         eor     r3,r3,r11
2393         add     r8,r8,r2
2394         eor     r2,r6,r7
2395         eor     r0,r5,r5,ror#5
2396         add     r9,r9,r3
2397         and     r2,r2,r5
2398         eor     r3,r0,r5,ror#19
2399         eor     r0,r9,r9,ror#11
2400         eor     r2,r2,r7
2401         add     r8,r8,r3,ror#6
2402         eor     r3,r9,r10
2403         eor     r0,r0,r9,ror#20
2404         add     r8,r8,r2
2405         ldr     r2,[sp,#16]
2406         and     r12,r12,r3
2407         add     r4,r4,r8
2408         add     r8,r8,r0,ror#2
2409         eor     r12,r12,r10
2410         vst1.32 {q8},[r1,:128]!
2411         add     r7,r7,r2
2412         eor     r2,r5,r6
2413         eor     r0,r4,r4,ror#5
2414         add     r8,r8,r12
2415         vld1.32 {q8},[r14,:128]!
2416         and     r2,r2,r4
2417         eor     r12,r0,r4,ror#19
2418         eor     r0,r8,r8,ror#11
2419         eor     r2,r2,r6
2420         vrev32.8        q1,q1
2421         add     r7,r7,r12,ror#6
2422         eor     r12,r8,r9
2423         eor     r0,r0,r8,ror#20
2424         add     r7,r7,r2
2425         vadd.i32        q8,q8,q1
2426         ldr     r2,[sp,#20]
2427         and     r3,r3,r12
2428         add     r11,r11,r7
2429         add     r7,r7,r0,ror#2
2430         eor     r3,r3,r9
2431         add     r6,r6,r2
2432         eor     r2,r4,r5
2433         eor     r0,r11,r11,ror#5
2434         add     r7,r7,r3
2435         and     r2,r2,r11
2436         eor     r3,r0,r11,ror#19
2437         eor     r0,r7,r7,ror#11
2438         eor     r2,r2,r5
2439         add     r6,r6,r3,ror#6
2440         eor     r3,r7,r8
2441         eor     r0,r0,r7,ror#20
2442         add     r6,r6,r2
2443         ldr     r2,[sp,#24]
2444         and     r12,r12,r3
2445         add     r10,r10,r6
2446         add     r6,r6,r0,ror#2
2447         eor     r12,r12,r8
2448         add     r5,r5,r2
2449         eor     r2,r11,r4
2450         eor     r0,r10,r10,ror#5
2451         add     r6,r6,r12
2452         and     r2,r2,r10
2453         eor     r12,r0,r10,ror#19
2454         eor     r0,r6,r6,ror#11
2455         eor     r2,r2,r4
2456         add     r5,r5,r12,ror#6
2457         eor     r12,r6,r7
2458         eor     r0,r0,r6,ror#20
2459         add     r5,r5,r2
2460         ldr     r2,[sp,#28]
2461         and     r3,r3,r12
2462         add     r9,r9,r5
2463         add     r5,r5,r0,ror#2
2464         eor     r3,r3,r7
2465         add     r4,r4,r2
2466         eor     r2,r10,r11
2467         eor     r0,r9,r9,ror#5
2468         add     r5,r5,r3
2469         and     r2,r2,r9
2470         eor     r3,r0,r9,ror#19
2471         eor     r0,r5,r5,ror#11
2472         eor     r2,r2,r11
2473         add     r4,r4,r3,ror#6
2474         eor     r3,r5,r6
2475         eor     r0,r0,r5,ror#20
2476         add     r4,r4,r2
2477         ldr     r2,[sp,#32]
2478         and     r12,r12,r3
2479         add     r8,r8,r4
2480         add     r4,r4,r0,ror#2
2481         eor     r12,r12,r6
2482         vst1.32 {q8},[r1,:128]!
2483         add     r11,r11,r2
2484         eor     r2,r9,r10
2485         eor     r0,r8,r8,ror#5
2486         add     r4,r4,r12
2487         vld1.32 {q8},[r14,:128]!
2488         and     r2,r2,r8
2489         eor     r12,r0,r8,ror#19
2490         eor     r0,r4,r4,ror#11
2491         eor     r2,r2,r10
2492         vrev32.8        q2,q2
2493         add     r11,r11,r12,ror#6
2494         eor     r12,r4,r5
2495         eor     r0,r0,r4,ror#20
2496         add     r11,r11,r2
2497         vadd.i32        q8,q8,q2
2498         ldr     r2,[sp,#36]
2499         and     r3,r3,r12
2500         add     r7,r7,r11
2501         add     r11,r11,r0,ror#2
2502         eor     r3,r3,r5
2503         add     r10,r10,r2
2504         eor     r2,r8,r9
2505         eor     r0,r7,r7,ror#5
2506         add     r11,r11,r3
2507         and     r2,r2,r7
2508         eor     r3,r0,r7,ror#19
2509         eor     r0,r11,r11,ror#11
2510         eor     r2,r2,r9
2511         add     r10,r10,r3,ror#6
2512         eor     r3,r11,r4
2513         eor     r0,r0,r11,ror#20
2514         add     r10,r10,r2
2515         ldr     r2,[sp,#40]
2516         and     r12,r12,r3
2517         add     r6,r6,r10
2518         add     r10,r10,r0,ror#2
2519         eor     r12,r12,r4
2520         add     r9,r9,r2
2521         eor     r2,r7,r8
2522         eor     r0,r6,r6,ror#5
2523         add     r10,r10,r12
2524         and     r2,r2,r6
2525         eor     r12,r0,r6,ror#19
2526         eor     r0,r10,r10,ror#11
2527         eor     r2,r2,r8
2528         add     r9,r9,r12,ror#6
2529         eor     r12,r10,r11
2530         eor     r0,r0,r10,ror#20
2531         add     r9,r9,r2
2532         ldr     r2,[sp,#44]
2533         and     r3,r3,r12
2534         add     r5,r5,r9
2535         add     r9,r9,r0,ror#2
2536         eor     r3,r3,r11
2537         add     r8,r8,r2
2538         eor     r2,r6,r7
2539         eor     r0,r5,r5,ror#5
2540         add     r9,r9,r3
2541         and     r2,r2,r5
2542         eor     r3,r0,r5,ror#19
2543         eor     r0,r9,r9,ror#11
2544         eor     r2,r2,r7
2545         add     r8,r8,r3,ror#6
2546         eor     r3,r9,r10
2547         eor     r0,r0,r9,ror#20
2548         add     r8,r8,r2
2549         ldr     r2,[sp,#48]
2550         and     r12,r12,r3
2551         add     r4,r4,r8
2552         add     r8,r8,r0,ror#2
2553         eor     r12,r12,r10
2554         vst1.32 {q8},[r1,:128]!
2555         add     r7,r7,r2
2556         eor     r2,r5,r6
2557         eor     r0,r4,r4,ror#5
2558         add     r8,r8,r12
2559         vld1.32 {q8},[r14,:128]!
2560         and     r2,r2,r4
2561         eor     r12,r0,r4,ror#19
2562         eor     r0,r8,r8,ror#11
2563         eor     r2,r2,r6
2564         vrev32.8        q3,q3
2565         add     r7,r7,r12,ror#6
2566         eor     r12,r8,r9
2567         eor     r0,r0,r8,ror#20
2568         add     r7,r7,r2
2569         vadd.i32        q8,q8,q3
2570         ldr     r2,[sp,#52]
2571         and     r3,r3,r12
2572         add     r11,r11,r7
2573         add     r7,r7,r0,ror#2
2574         eor     r3,r3,r9
2575         add     r6,r6,r2
2576         eor     r2,r4,r5
2577         eor     r0,r11,r11,ror#5
2578         add     r7,r7,r3
2579         and     r2,r2,r11
2580         eor     r3,r0,r11,ror#19
2581         eor     r0,r7,r7,ror#11
2582         eor     r2,r2,r5
2583         add     r6,r6,r3,ror#6
2584         eor     r3,r7,r8
2585         eor     r0,r0,r7,ror#20
2586         add     r6,r6,r2
2587         ldr     r2,[sp,#56]
2588         and     r12,r12,r3
2589         add     r10,r10,r6
2590         add     r6,r6,r0,ror#2
2591         eor     r12,r12,r8
2592         add     r5,r5,r2
2593         eor     r2,r11,r4
2594         eor     r0,r10,r10,ror#5
2595         add     r6,r6,r12
2596         and     r2,r2,r10
2597         eor     r12,r0,r10,ror#19
2598         eor     r0,r6,r6,ror#11
2599         eor     r2,r2,r4
2600         add     r5,r5,r12,ror#6
2601         eor     r12,r6,r7
2602         eor     r0,r0,r6,ror#20
2603         add     r5,r5,r2
2604         ldr     r2,[sp,#60]
2605         and     r3,r3,r12
2606         add     r9,r9,r5
2607         add     r5,r5,r0,ror#2
2608         eor     r3,r3,r7
2609         add     r4,r4,r2
2610         eor     r2,r10,r11
2611         eor     r0,r9,r9,ror#5
2612         add     r5,r5,r3
2613         and     r2,r2,r9
2614         eor     r3,r0,r9,ror#19
2615         eor     r0,r5,r5,ror#11
2616         eor     r2,r2,r11
2617         add     r4,r4,r3,ror#6
2618         eor     r3,r5,r6
2619         eor     r0,r0,r5,ror#20
2620         add     r4,r4,r2
2621         ldr     r2,[sp,#64]
2622         and     r12,r12,r3
2623         add     r8,r8,r4
2624         add     r4,r4,r0,ror#2
2625         eor     r12,r12,r6
2626         vst1.32 {q8},[r1,:128]!
2627         ldr     r0,[r2,#0]
2628         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
2629         ldr     r12,[r2,#4]
2630         ldr     r3,[r2,#8]
2631         ldr     r1,[r2,#12]
2632         add     r4,r4,r0                        @ accumulate
2633         ldr     r0,[r2,#16]
2634         add     r5,r5,r12
2635         ldr     r12,[r2,#20]
2636         add     r6,r6,r3
2637         ldr     r3,[r2,#24]
2638         add     r7,r7,r1
2639         ldr     r1,[r2,#28]
2640         add     r8,r8,r0
2641         str     r4,[r2],#4
2642         add     r9,r9,r12
2643         str     r5,[r2],#4
2644         add     r10,r10,r3
2645         str     r6,[r2],#4
2646         add     r11,r11,r1
2647         str     r7,[r2],#4
2648         stmia   r2,{r8-r11}
2649
2650         ittte   ne
2651         movne   r1,sp
2652         ldrne   r2,[sp,#0]
2653         eorne   r12,r12,r12
2654         ldreq   sp,[sp,#76]                     @ restore original sp
2655         itt     ne
2656         eorne   r3,r5,r6
2657         bne     .L_00_48
2658
2659         ldmia   sp!,{r4-r12,pc}
2660 .size   sha256_block_data_order_neon,.-sha256_block_data_order_neon
2661 #endif
2662 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2663
2664 # ifdef __thumb2__
2665 #  define INST(a,b,c,d) .byte   c,d|0xc,a,b
2666 # else
2667 #  define INST(a,b,c,d) .byte   a,b,c,d
2668 # endif
2669
2670 .type   sha256_block_data_order_armv8,%function
2671 .align  5
2672 sha256_block_data_order_armv8:
2673 .LARMv8:
2674         vld1.32 {q0,q1},[r0]
2675 # ifdef __thumb2__
2676         adr     r3,.LARMv8
2677         sub     r3,r3,#.LARMv8-K256
2678 # else
2679         adrl    r3,K256
2680 # endif
2681         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
2682
2683 .Loop_v8:
2684         vld1.8          {q8-q9},[r1]!
2685         vld1.8          {q10-q11},[r1]!
2686         vld1.32         {q12},[r3]!
2687         vrev32.8        q8,q8
2688         vrev32.8        q9,q9
2689         vrev32.8        q10,q10
2690         vrev32.8        q11,q11
2691         vmov            q14,q0  @ offload
2692         vmov            q15,q1
2693         teq             r1,r2
2694         vld1.32         {q13},[r3]!
2695         vadd.i32        q12,q12,q8
2696         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2697         vmov            q2,q0
2698         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2699         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2700         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2701         vld1.32         {q12},[r3]!
2702         vadd.i32        q13,q13,q9
2703         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2704         vmov            q2,q0
2705         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2706         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2707         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2708         vld1.32         {q13},[r3]!
2709         vadd.i32        q12,q12,q10
2710         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2711         vmov            q2,q0
2712         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2713         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2714         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2715         vld1.32         {q12},[r3]!
2716         vadd.i32        q13,q13,q11
2717         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2718         vmov            q2,q0
2719         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2720         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2721         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2722         vld1.32         {q13},[r3]!
2723         vadd.i32        q12,q12,q8
2724         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2725         vmov            q2,q0
2726         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2727         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2728         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2729         vld1.32         {q12},[r3]!
2730         vadd.i32        q13,q13,q9
2731         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2732         vmov            q2,q0
2733         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2734         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2735         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2736         vld1.32         {q13},[r3]!
2737         vadd.i32        q12,q12,q10
2738         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2739         vmov            q2,q0
2740         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2741         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2742         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2743         vld1.32         {q12},[r3]!
2744         vadd.i32        q13,q13,q11
2745         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2746         vmov            q2,q0
2747         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2748         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2749         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2750         vld1.32         {q13},[r3]!
2751         vadd.i32        q12,q12,q8
2752         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2753         vmov            q2,q0
2754         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2755         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2756         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2757         vld1.32         {q12},[r3]!
2758         vadd.i32        q13,q13,q9
2759         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2760         vmov            q2,q0
2761         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2762         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2763         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2764         vld1.32         {q13},[r3]!
2765         vadd.i32        q12,q12,q10
2766         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2767         vmov            q2,q0
2768         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2769         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2770         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2771         vld1.32         {q12},[r3]!
2772         vadd.i32        q13,q13,q11
2773         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2774         vmov            q2,q0
2775         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2776         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2777         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2778         vld1.32         {q13},[r3]!
2779         vadd.i32        q12,q12,q8
2780         vmov            q2,q0
2781         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2782         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2783
2784         vld1.32         {q12},[r3]!
2785         vadd.i32        q13,q13,q9
2786         vmov            q2,q0
2787         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2788         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2789
2790         vld1.32         {q13},[r3]
2791         vadd.i32        q12,q12,q10
2792         sub             r3,r3,#256-16   @ rewind
2793         vmov            q2,q0
2794         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2795         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2796
2797         vadd.i32        q13,q13,q11
2798         vmov            q2,q0
2799         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2800         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2801
2802         vadd.i32        q0,q0,q14
2803         vadd.i32        q1,q1,q15
2804         it              ne
2805         bne             .Loop_v8
2806
2807         vst1.32         {q0,q1},[r0]
2808
2809         bx      lr              @ bx lr
2810 .size   sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2811 #endif
2812 .asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2813 .align  2
2814 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2815 .comm   OPENSSL_armcap_P,4,4
2816 #endif