Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[sfrench/cifs-2.6.git] / arch / arm / crypto / sha256-core.S_shipped
1 @ SPDX-License-Identifier: GPL-2.0
2
3 @ This code is taken from the OpenSSL project but the author (Andy Polyakov)
4 @ has relicensed it under the GPLv2. Therefore this program is free software;
5 @ you can redistribute it and/or modify it under the terms of the GNU General
6 @ Public License version 2 as published by the Free Software Foundation.
7 @
8 @ The original headers, including the original license headers, are
9 @ included below for completeness.
10
11 @ ====================================================================
12 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
13 @ project. The module is, however, dual licensed under OpenSSL and
14 @ CRYPTOGAMS licenses depending on where you obtain it. For further
15 @ details see http://www.openssl.org/~appro/cryptogams/.
16 @ ====================================================================
17
18 @ SHA256 block procedure for ARMv4. May 2007.
19
20 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
21 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
22 @ byte [on single-issue Xscale PXA250 core].
23
24 @ July 2010.
25 @
26 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
27 @ Cortex A8 core and ~20 cycles per processed byte.
28
29 @ February 2011.
30 @
31 @ Profiler-assisted and platform-specific optimization resulted in 16%
32 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
33
34 @ September 2013.
35 @
36 @ Add NEON implementation. On Cortex A8 it was measured to process one
37 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
38 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
39 @ code (meaning that latter performs sub-optimally, nothing was done
40 @ about it).
41
42 @ May 2014.
43 @
44 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
45
46 #ifndef __KERNEL__
47 # include "arm_arch.h"
48 #else
49 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
50 # define __ARM_MAX_ARCH__ 7
51 #endif
52
53 .text
54 #if __ARM_ARCH__<7
55 .code   32
56 #else
57 .syntax unified
58 # ifdef __thumb2__
59 #  define adrl adr
60 .thumb
61 # else
62 .code   32
63 # endif
64 #endif
65
66 .type   K256,%object
67 .align  5
68 K256:
69 .word   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
70 .word   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
71 .word   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
72 .word   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
73 .word   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
74 .word   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
75 .word   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
76 .word   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
77 .word   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
78 .word   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
79 .word   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
80 .word   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
81 .word   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
82 .word   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
83 .word   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
84 .word   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
85 .size   K256,.-K256
86 .word   0                               @ terminator
87 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
88 .LOPENSSL_armcap:
89 .word   OPENSSL_armcap_P-sha256_block_data_order
90 #endif
91 .align  5
92
93 .global sha256_block_data_order
94 .type   sha256_block_data_order,%function
95 sha256_block_data_order:
96 #if __ARM_ARCH__<7
97         sub     r3,pc,#8                @ sha256_block_data_order
98 #else
99         adr     r3,sha256_block_data_order
100 #endif
101 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
102         ldr     r12,.LOPENSSL_armcap
103         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
104         tst     r12,#ARMV8_SHA256
105         bne     .LARMv8
106         tst     r12,#ARMV7_NEON
107         bne     .LNEON
108 #endif
109         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
110         stmdb   sp!,{r0,r1,r2,r4-r11,lr}
111         ldmia   r0,{r4,r5,r6,r7,r8,r9,r10,r11}
112         sub     r14,r3,#256+32  @ K256
113         sub     sp,sp,#16*4             @ alloca(X[16])
114 .Loop:
115 # if __ARM_ARCH__>=7
116         ldr     r2,[r1],#4
117 # else
118         ldrb    r2,[r1,#3]
119 # endif
120         eor     r3,r5,r6                @ magic
121         eor     r12,r12,r12
122 #if __ARM_ARCH__>=7
123         @ ldr   r2,[r1],#4                      @ 0
124 # if 0==15
125         str     r1,[sp,#17*4]                   @ make room for r1
126 # endif
127         eor     r0,r8,r8,ror#5
128         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
129         eor     r0,r0,r8,ror#19 @ Sigma1(e)
130 # ifndef __ARMEB__
131         rev     r2,r2
132 # endif
133 #else
134         @ ldrb  r2,[r1,#3]                      @ 0
135         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
136         ldrb    r12,[r1,#2]
137         ldrb    r0,[r1,#1]
138         orr     r2,r2,r12,lsl#8
139         ldrb    r12,[r1],#4
140         orr     r2,r2,r0,lsl#16
141 # if 0==15
142         str     r1,[sp,#17*4]                   @ make room for r1
143 # endif
144         eor     r0,r8,r8,ror#5
145         orr     r2,r2,r12,lsl#24
146         eor     r0,r0,r8,ror#19 @ Sigma1(e)
147 #endif
148         ldr     r12,[r14],#4                    @ *K256++
149         add     r11,r11,r2                      @ h+=X[i]
150         str     r2,[sp,#0*4]
151         eor     r2,r9,r10
152         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
153         and     r2,r2,r8
154         add     r11,r11,r12                     @ h+=K256[i]
155         eor     r2,r2,r10                       @ Ch(e,f,g)
156         eor     r0,r4,r4,ror#11
157         add     r11,r11,r2                      @ h+=Ch(e,f,g)
158 #if 0==31
159         and     r12,r12,#0xff
160         cmp     r12,#0xf2                       @ done?
161 #endif
162 #if 0<15
163 # if __ARM_ARCH__>=7
164         ldr     r2,[r1],#4                      @ prefetch
165 # else
166         ldrb    r2,[r1,#3]
167 # endif
168         eor     r12,r4,r5                       @ a^b, b^c in next round
169 #else
170         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
171         eor     r12,r4,r5                       @ a^b, b^c in next round
172         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
173 #endif
174         eor     r0,r0,r4,ror#20 @ Sigma0(a)
175         and     r3,r3,r12                       @ (b^c)&=(a^b)
176         add     r7,r7,r11                       @ d+=h
177         eor     r3,r3,r5                        @ Maj(a,b,c)
178         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
179         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
180 #if __ARM_ARCH__>=7
181         @ ldr   r2,[r1],#4                      @ 1
182 # if 1==15
183         str     r1,[sp,#17*4]                   @ make room for r1
184 # endif
185         eor     r0,r7,r7,ror#5
186         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
187         eor     r0,r0,r7,ror#19 @ Sigma1(e)
188 # ifndef __ARMEB__
189         rev     r2,r2
190 # endif
191 #else
192         @ ldrb  r2,[r1,#3]                      @ 1
193         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
194         ldrb    r3,[r1,#2]
195         ldrb    r0,[r1,#1]
196         orr     r2,r2,r3,lsl#8
197         ldrb    r3,[r1],#4
198         orr     r2,r2,r0,lsl#16
199 # if 1==15
200         str     r1,[sp,#17*4]                   @ make room for r1
201 # endif
202         eor     r0,r7,r7,ror#5
203         orr     r2,r2,r3,lsl#24
204         eor     r0,r0,r7,ror#19 @ Sigma1(e)
205 #endif
206         ldr     r3,[r14],#4                     @ *K256++
207         add     r10,r10,r2                      @ h+=X[i]
208         str     r2,[sp,#1*4]
209         eor     r2,r8,r9
210         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
211         and     r2,r2,r7
212         add     r10,r10,r3                      @ h+=K256[i]
213         eor     r2,r2,r9                        @ Ch(e,f,g)
214         eor     r0,r11,r11,ror#11
215         add     r10,r10,r2                      @ h+=Ch(e,f,g)
216 #if 1==31
217         and     r3,r3,#0xff
218         cmp     r3,#0xf2                        @ done?
219 #endif
220 #if 1<15
221 # if __ARM_ARCH__>=7
222         ldr     r2,[r1],#4                      @ prefetch
223 # else
224         ldrb    r2,[r1,#3]
225 # endif
226         eor     r3,r11,r4                       @ a^b, b^c in next round
227 #else
228         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
229         eor     r3,r11,r4                       @ a^b, b^c in next round
230         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
231 #endif
232         eor     r0,r0,r11,ror#20        @ Sigma0(a)
233         and     r12,r12,r3                      @ (b^c)&=(a^b)
234         add     r6,r6,r10                       @ d+=h
235         eor     r12,r12,r4                      @ Maj(a,b,c)
236         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
237         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
238 #if __ARM_ARCH__>=7
239         @ ldr   r2,[r1],#4                      @ 2
240 # if 2==15
241         str     r1,[sp,#17*4]                   @ make room for r1
242 # endif
243         eor     r0,r6,r6,ror#5
244         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
245         eor     r0,r0,r6,ror#19 @ Sigma1(e)
246 # ifndef __ARMEB__
247         rev     r2,r2
248 # endif
249 #else
250         @ ldrb  r2,[r1,#3]                      @ 2
251         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
252         ldrb    r12,[r1,#2]
253         ldrb    r0,[r1,#1]
254         orr     r2,r2,r12,lsl#8
255         ldrb    r12,[r1],#4
256         orr     r2,r2,r0,lsl#16
257 # if 2==15
258         str     r1,[sp,#17*4]                   @ make room for r1
259 # endif
260         eor     r0,r6,r6,ror#5
261         orr     r2,r2,r12,lsl#24
262         eor     r0,r0,r6,ror#19 @ Sigma1(e)
263 #endif
264         ldr     r12,[r14],#4                    @ *K256++
265         add     r9,r9,r2                        @ h+=X[i]
266         str     r2,[sp,#2*4]
267         eor     r2,r7,r8
268         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
269         and     r2,r2,r6
270         add     r9,r9,r12                       @ h+=K256[i]
271         eor     r2,r2,r8                        @ Ch(e,f,g)
272         eor     r0,r10,r10,ror#11
273         add     r9,r9,r2                        @ h+=Ch(e,f,g)
274 #if 2==31
275         and     r12,r12,#0xff
276         cmp     r12,#0xf2                       @ done?
277 #endif
278 #if 2<15
279 # if __ARM_ARCH__>=7
280         ldr     r2,[r1],#4                      @ prefetch
281 # else
282         ldrb    r2,[r1,#3]
283 # endif
284         eor     r12,r10,r11                     @ a^b, b^c in next round
285 #else
286         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
287         eor     r12,r10,r11                     @ a^b, b^c in next round
288         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
289 #endif
290         eor     r0,r0,r10,ror#20        @ Sigma0(a)
291         and     r3,r3,r12                       @ (b^c)&=(a^b)
292         add     r5,r5,r9                        @ d+=h
293         eor     r3,r3,r11                       @ Maj(a,b,c)
294         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
295         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
296 #if __ARM_ARCH__>=7
297         @ ldr   r2,[r1],#4                      @ 3
298 # if 3==15
299         str     r1,[sp,#17*4]                   @ make room for r1
300 # endif
301         eor     r0,r5,r5,ror#5
302         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
303         eor     r0,r0,r5,ror#19 @ Sigma1(e)
304 # ifndef __ARMEB__
305         rev     r2,r2
306 # endif
307 #else
308         @ ldrb  r2,[r1,#3]                      @ 3
309         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
310         ldrb    r3,[r1,#2]
311         ldrb    r0,[r1,#1]
312         orr     r2,r2,r3,lsl#8
313         ldrb    r3,[r1],#4
314         orr     r2,r2,r0,lsl#16
315 # if 3==15
316         str     r1,[sp,#17*4]                   @ make room for r1
317 # endif
318         eor     r0,r5,r5,ror#5
319         orr     r2,r2,r3,lsl#24
320         eor     r0,r0,r5,ror#19 @ Sigma1(e)
321 #endif
322         ldr     r3,[r14],#4                     @ *K256++
323         add     r8,r8,r2                        @ h+=X[i]
324         str     r2,[sp,#3*4]
325         eor     r2,r6,r7
326         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
327         and     r2,r2,r5
328         add     r8,r8,r3                        @ h+=K256[i]
329         eor     r2,r2,r7                        @ Ch(e,f,g)
330         eor     r0,r9,r9,ror#11
331         add     r8,r8,r2                        @ h+=Ch(e,f,g)
332 #if 3==31
333         and     r3,r3,#0xff
334         cmp     r3,#0xf2                        @ done?
335 #endif
336 #if 3<15
337 # if __ARM_ARCH__>=7
338         ldr     r2,[r1],#4                      @ prefetch
339 # else
340         ldrb    r2,[r1,#3]
341 # endif
342         eor     r3,r9,r10                       @ a^b, b^c in next round
343 #else
344         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
345         eor     r3,r9,r10                       @ a^b, b^c in next round
346         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
347 #endif
348         eor     r0,r0,r9,ror#20 @ Sigma0(a)
349         and     r12,r12,r3                      @ (b^c)&=(a^b)
350         add     r4,r4,r8                        @ d+=h
351         eor     r12,r12,r10                     @ Maj(a,b,c)
352         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
353         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
354 #if __ARM_ARCH__>=7
355         @ ldr   r2,[r1],#4                      @ 4
356 # if 4==15
357         str     r1,[sp,#17*4]                   @ make room for r1
358 # endif
359         eor     r0,r4,r4,ror#5
360         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
361         eor     r0,r0,r4,ror#19 @ Sigma1(e)
362 # ifndef __ARMEB__
363         rev     r2,r2
364 # endif
365 #else
366         @ ldrb  r2,[r1,#3]                      @ 4
367         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
368         ldrb    r12,[r1,#2]
369         ldrb    r0,[r1,#1]
370         orr     r2,r2,r12,lsl#8
371         ldrb    r12,[r1],#4
372         orr     r2,r2,r0,lsl#16
373 # if 4==15
374         str     r1,[sp,#17*4]                   @ make room for r1
375 # endif
376         eor     r0,r4,r4,ror#5
377         orr     r2,r2,r12,lsl#24
378         eor     r0,r0,r4,ror#19 @ Sigma1(e)
379 #endif
380         ldr     r12,[r14],#4                    @ *K256++
381         add     r7,r7,r2                        @ h+=X[i]
382         str     r2,[sp,#4*4]
383         eor     r2,r5,r6
384         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
385         and     r2,r2,r4
386         add     r7,r7,r12                       @ h+=K256[i]
387         eor     r2,r2,r6                        @ Ch(e,f,g)
388         eor     r0,r8,r8,ror#11
389         add     r7,r7,r2                        @ h+=Ch(e,f,g)
390 #if 4==31
391         and     r12,r12,#0xff
392         cmp     r12,#0xf2                       @ done?
393 #endif
394 #if 4<15
395 # if __ARM_ARCH__>=7
396         ldr     r2,[r1],#4                      @ prefetch
397 # else
398         ldrb    r2,[r1,#3]
399 # endif
400         eor     r12,r8,r9                       @ a^b, b^c in next round
401 #else
402         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
403         eor     r12,r8,r9                       @ a^b, b^c in next round
404         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
405 #endif
406         eor     r0,r0,r8,ror#20 @ Sigma0(a)
407         and     r3,r3,r12                       @ (b^c)&=(a^b)
408         add     r11,r11,r7                      @ d+=h
409         eor     r3,r3,r9                        @ Maj(a,b,c)
410         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
411         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
412 #if __ARM_ARCH__>=7
413         @ ldr   r2,[r1],#4                      @ 5
414 # if 5==15
415         str     r1,[sp,#17*4]                   @ make room for r1
416 # endif
417         eor     r0,r11,r11,ror#5
418         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
419         eor     r0,r0,r11,ror#19        @ Sigma1(e)
420 # ifndef __ARMEB__
421         rev     r2,r2
422 # endif
423 #else
424         @ ldrb  r2,[r1,#3]                      @ 5
425         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
426         ldrb    r3,[r1,#2]
427         ldrb    r0,[r1,#1]
428         orr     r2,r2,r3,lsl#8
429         ldrb    r3,[r1],#4
430         orr     r2,r2,r0,lsl#16
431 # if 5==15
432         str     r1,[sp,#17*4]                   @ make room for r1
433 # endif
434         eor     r0,r11,r11,ror#5
435         orr     r2,r2,r3,lsl#24
436         eor     r0,r0,r11,ror#19        @ Sigma1(e)
437 #endif
438         ldr     r3,[r14],#4                     @ *K256++
439         add     r6,r6,r2                        @ h+=X[i]
440         str     r2,[sp,#5*4]
441         eor     r2,r4,r5
442         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
443         and     r2,r2,r11
444         add     r6,r6,r3                        @ h+=K256[i]
445         eor     r2,r2,r5                        @ Ch(e,f,g)
446         eor     r0,r7,r7,ror#11
447         add     r6,r6,r2                        @ h+=Ch(e,f,g)
448 #if 5==31
449         and     r3,r3,#0xff
450         cmp     r3,#0xf2                        @ done?
451 #endif
452 #if 5<15
453 # if __ARM_ARCH__>=7
454         ldr     r2,[r1],#4                      @ prefetch
455 # else
456         ldrb    r2,[r1,#3]
457 # endif
458         eor     r3,r7,r8                        @ a^b, b^c in next round
459 #else
460         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
461         eor     r3,r7,r8                        @ a^b, b^c in next round
462         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
463 #endif
464         eor     r0,r0,r7,ror#20 @ Sigma0(a)
465         and     r12,r12,r3                      @ (b^c)&=(a^b)
466         add     r10,r10,r6                      @ d+=h
467         eor     r12,r12,r8                      @ Maj(a,b,c)
468         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
469         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
470 #if __ARM_ARCH__>=7
471         @ ldr   r2,[r1],#4                      @ 6
472 # if 6==15
473         str     r1,[sp,#17*4]                   @ make room for r1
474 # endif
475         eor     r0,r10,r10,ror#5
476         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
477         eor     r0,r0,r10,ror#19        @ Sigma1(e)
478 # ifndef __ARMEB__
479         rev     r2,r2
480 # endif
481 #else
482         @ ldrb  r2,[r1,#3]                      @ 6
483         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
484         ldrb    r12,[r1,#2]
485         ldrb    r0,[r1,#1]
486         orr     r2,r2,r12,lsl#8
487         ldrb    r12,[r1],#4
488         orr     r2,r2,r0,lsl#16
489 # if 6==15
490         str     r1,[sp,#17*4]                   @ make room for r1
491 # endif
492         eor     r0,r10,r10,ror#5
493         orr     r2,r2,r12,lsl#24
494         eor     r0,r0,r10,ror#19        @ Sigma1(e)
495 #endif
496         ldr     r12,[r14],#4                    @ *K256++
497         add     r5,r5,r2                        @ h+=X[i]
498         str     r2,[sp,#6*4]
499         eor     r2,r11,r4
500         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
501         and     r2,r2,r10
502         add     r5,r5,r12                       @ h+=K256[i]
503         eor     r2,r2,r4                        @ Ch(e,f,g)
504         eor     r0,r6,r6,ror#11
505         add     r5,r5,r2                        @ h+=Ch(e,f,g)
506 #if 6==31
507         and     r12,r12,#0xff
508         cmp     r12,#0xf2                       @ done?
509 #endif
510 #if 6<15
511 # if __ARM_ARCH__>=7
512         ldr     r2,[r1],#4                      @ prefetch
513 # else
514         ldrb    r2,[r1,#3]
515 # endif
516         eor     r12,r6,r7                       @ a^b, b^c in next round
517 #else
518         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
519         eor     r12,r6,r7                       @ a^b, b^c in next round
520         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
521 #endif
522         eor     r0,r0,r6,ror#20 @ Sigma0(a)
523         and     r3,r3,r12                       @ (b^c)&=(a^b)
524         add     r9,r9,r5                        @ d+=h
525         eor     r3,r3,r7                        @ Maj(a,b,c)
526         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
527         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
528 #if __ARM_ARCH__>=7
529         @ ldr   r2,[r1],#4                      @ 7
530 # if 7==15
531         str     r1,[sp,#17*4]                   @ make room for r1
532 # endif
533         eor     r0,r9,r9,ror#5
534         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
535         eor     r0,r0,r9,ror#19 @ Sigma1(e)
536 # ifndef __ARMEB__
537         rev     r2,r2
538 # endif
539 #else
540         @ ldrb  r2,[r1,#3]                      @ 7
541         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
542         ldrb    r3,[r1,#2]
543         ldrb    r0,[r1,#1]
544         orr     r2,r2,r3,lsl#8
545         ldrb    r3,[r1],#4
546         orr     r2,r2,r0,lsl#16
547 # if 7==15
548         str     r1,[sp,#17*4]                   @ make room for r1
549 # endif
550         eor     r0,r9,r9,ror#5
551         orr     r2,r2,r3,lsl#24
552         eor     r0,r0,r9,ror#19 @ Sigma1(e)
553 #endif
554         ldr     r3,[r14],#4                     @ *K256++
555         add     r4,r4,r2                        @ h+=X[i]
556         str     r2,[sp,#7*4]
557         eor     r2,r10,r11
558         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
559         and     r2,r2,r9
560         add     r4,r4,r3                        @ h+=K256[i]
561         eor     r2,r2,r11                       @ Ch(e,f,g)
562         eor     r0,r5,r5,ror#11
563         add     r4,r4,r2                        @ h+=Ch(e,f,g)
564 #if 7==31
565         and     r3,r3,#0xff
566         cmp     r3,#0xf2                        @ done?
567 #endif
568 #if 7<15
569 # if __ARM_ARCH__>=7
570         ldr     r2,[r1],#4                      @ prefetch
571 # else
572         ldrb    r2,[r1,#3]
573 # endif
574         eor     r3,r5,r6                        @ a^b, b^c in next round
575 #else
576         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
577         eor     r3,r5,r6                        @ a^b, b^c in next round
578         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
579 #endif
580         eor     r0,r0,r5,ror#20 @ Sigma0(a)
581         and     r12,r12,r3                      @ (b^c)&=(a^b)
582         add     r8,r8,r4                        @ d+=h
583         eor     r12,r12,r6                      @ Maj(a,b,c)
584         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
585         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
586 #if __ARM_ARCH__>=7
587         @ ldr   r2,[r1],#4                      @ 8
588 # if 8==15
589         str     r1,[sp,#17*4]                   @ make room for r1
590 # endif
591         eor     r0,r8,r8,ror#5
592         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
593         eor     r0,r0,r8,ror#19 @ Sigma1(e)
594 # ifndef __ARMEB__
595         rev     r2,r2
596 # endif
597 #else
598         @ ldrb  r2,[r1,#3]                      @ 8
599         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
600         ldrb    r12,[r1,#2]
601         ldrb    r0,[r1,#1]
602         orr     r2,r2,r12,lsl#8
603         ldrb    r12,[r1],#4
604         orr     r2,r2,r0,lsl#16
605 # if 8==15
606         str     r1,[sp,#17*4]                   @ make room for r1
607 # endif
608         eor     r0,r8,r8,ror#5
609         orr     r2,r2,r12,lsl#24
610         eor     r0,r0,r8,ror#19 @ Sigma1(e)
611 #endif
612         ldr     r12,[r14],#4                    @ *K256++
613         add     r11,r11,r2                      @ h+=X[i]
614         str     r2,[sp,#8*4]
615         eor     r2,r9,r10
616         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
617         and     r2,r2,r8
618         add     r11,r11,r12                     @ h+=K256[i]
619         eor     r2,r2,r10                       @ Ch(e,f,g)
620         eor     r0,r4,r4,ror#11
621         add     r11,r11,r2                      @ h+=Ch(e,f,g)
622 #if 8==31
623         and     r12,r12,#0xff
624         cmp     r12,#0xf2                       @ done?
625 #endif
626 #if 8<15
627 # if __ARM_ARCH__>=7
628         ldr     r2,[r1],#4                      @ prefetch
629 # else
630         ldrb    r2,[r1,#3]
631 # endif
632         eor     r12,r4,r5                       @ a^b, b^c in next round
633 #else
634         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
635         eor     r12,r4,r5                       @ a^b, b^c in next round
636         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
637 #endif
638         eor     r0,r0,r4,ror#20 @ Sigma0(a)
639         and     r3,r3,r12                       @ (b^c)&=(a^b)
640         add     r7,r7,r11                       @ d+=h
641         eor     r3,r3,r5                        @ Maj(a,b,c)
642         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
643         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
644 #if __ARM_ARCH__>=7
645         @ ldr   r2,[r1],#4                      @ 9
646 # if 9==15
647         str     r1,[sp,#17*4]                   @ make room for r1
648 # endif
649         eor     r0,r7,r7,ror#5
650         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
651         eor     r0,r0,r7,ror#19 @ Sigma1(e)
652 # ifndef __ARMEB__
653         rev     r2,r2
654 # endif
655 #else
656         @ ldrb  r2,[r1,#3]                      @ 9
657         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
658         ldrb    r3,[r1,#2]
659         ldrb    r0,[r1,#1]
660         orr     r2,r2,r3,lsl#8
661         ldrb    r3,[r1],#4
662         orr     r2,r2,r0,lsl#16
663 # if 9==15
664         str     r1,[sp,#17*4]                   @ make room for r1
665 # endif
666         eor     r0,r7,r7,ror#5
667         orr     r2,r2,r3,lsl#24
668         eor     r0,r0,r7,ror#19 @ Sigma1(e)
669 #endif
670         ldr     r3,[r14],#4                     @ *K256++
671         add     r10,r10,r2                      @ h+=X[i]
672         str     r2,[sp,#9*4]
673         eor     r2,r8,r9
674         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
675         and     r2,r2,r7
676         add     r10,r10,r3                      @ h+=K256[i]
677         eor     r2,r2,r9                        @ Ch(e,f,g)
678         eor     r0,r11,r11,ror#11
679         add     r10,r10,r2                      @ h+=Ch(e,f,g)
680 #if 9==31
681         and     r3,r3,#0xff
682         cmp     r3,#0xf2                        @ done?
683 #endif
684 #if 9<15
685 # if __ARM_ARCH__>=7
686         ldr     r2,[r1],#4                      @ prefetch
687 # else
688         ldrb    r2,[r1,#3]
689 # endif
690         eor     r3,r11,r4                       @ a^b, b^c in next round
691 #else
692         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
693         eor     r3,r11,r4                       @ a^b, b^c in next round
694         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
695 #endif
696         eor     r0,r0,r11,ror#20        @ Sigma0(a)
697         and     r12,r12,r3                      @ (b^c)&=(a^b)
698         add     r6,r6,r10                       @ d+=h
699         eor     r12,r12,r4                      @ Maj(a,b,c)
700         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
701         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
702 #if __ARM_ARCH__>=7
703         @ ldr   r2,[r1],#4                      @ 10
704 # if 10==15
705         str     r1,[sp,#17*4]                   @ make room for r1
706 # endif
707         eor     r0,r6,r6,ror#5
708         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
709         eor     r0,r0,r6,ror#19 @ Sigma1(e)
710 # ifndef __ARMEB__
711         rev     r2,r2
712 # endif
713 #else
714         @ ldrb  r2,[r1,#3]                      @ 10
715         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
716         ldrb    r12,[r1,#2]
717         ldrb    r0,[r1,#1]
718         orr     r2,r2,r12,lsl#8
719         ldrb    r12,[r1],#4
720         orr     r2,r2,r0,lsl#16
721 # if 10==15
722         str     r1,[sp,#17*4]                   @ make room for r1
723 # endif
724         eor     r0,r6,r6,ror#5
725         orr     r2,r2,r12,lsl#24
726         eor     r0,r0,r6,ror#19 @ Sigma1(e)
727 #endif
728         ldr     r12,[r14],#4                    @ *K256++
729         add     r9,r9,r2                        @ h+=X[i]
730         str     r2,[sp,#10*4]
731         eor     r2,r7,r8
732         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
733         and     r2,r2,r6
734         add     r9,r9,r12                       @ h+=K256[i]
735         eor     r2,r2,r8                        @ Ch(e,f,g)
736         eor     r0,r10,r10,ror#11
737         add     r9,r9,r2                        @ h+=Ch(e,f,g)
738 #if 10==31
739         and     r12,r12,#0xff
740         cmp     r12,#0xf2                       @ done?
741 #endif
742 #if 10<15
743 # if __ARM_ARCH__>=7
744         ldr     r2,[r1],#4                      @ prefetch
745 # else
746         ldrb    r2,[r1,#3]
747 # endif
748         eor     r12,r10,r11                     @ a^b, b^c in next round
749 #else
750         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
751         eor     r12,r10,r11                     @ a^b, b^c in next round
752         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
753 #endif
754         eor     r0,r0,r10,ror#20        @ Sigma0(a)
755         and     r3,r3,r12                       @ (b^c)&=(a^b)
756         add     r5,r5,r9                        @ d+=h
757         eor     r3,r3,r11                       @ Maj(a,b,c)
758         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
759         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
760 #if __ARM_ARCH__>=7
761         @ ldr   r2,[r1],#4                      @ 11
762 # if 11==15
763         str     r1,[sp,#17*4]                   @ make room for r1
764 # endif
765         eor     r0,r5,r5,ror#5
766         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
767         eor     r0,r0,r5,ror#19 @ Sigma1(e)
768 # ifndef __ARMEB__
769         rev     r2,r2
770 # endif
771 #else
772         @ ldrb  r2,[r1,#3]                      @ 11
773         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
774         ldrb    r3,[r1,#2]
775         ldrb    r0,[r1,#1]
776         orr     r2,r2,r3,lsl#8
777         ldrb    r3,[r1],#4
778         orr     r2,r2,r0,lsl#16
779 # if 11==15
780         str     r1,[sp,#17*4]                   @ make room for r1
781 # endif
782         eor     r0,r5,r5,ror#5
783         orr     r2,r2,r3,lsl#24
784         eor     r0,r0,r5,ror#19 @ Sigma1(e)
785 #endif
786         ldr     r3,[r14],#4                     @ *K256++
787         add     r8,r8,r2                        @ h+=X[i]
788         str     r2,[sp,#11*4]
789         eor     r2,r6,r7
790         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
791         and     r2,r2,r5
792         add     r8,r8,r3                        @ h+=K256[i]
793         eor     r2,r2,r7                        @ Ch(e,f,g)
794         eor     r0,r9,r9,ror#11
795         add     r8,r8,r2                        @ h+=Ch(e,f,g)
796 #if 11==31
797         and     r3,r3,#0xff
798         cmp     r3,#0xf2                        @ done?
799 #endif
800 #if 11<15
801 # if __ARM_ARCH__>=7
802         ldr     r2,[r1],#4                      @ prefetch
803 # else
804         ldrb    r2,[r1,#3]
805 # endif
806         eor     r3,r9,r10                       @ a^b, b^c in next round
807 #else
808         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
809         eor     r3,r9,r10                       @ a^b, b^c in next round
810         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
811 #endif
812         eor     r0,r0,r9,ror#20 @ Sigma0(a)
813         and     r12,r12,r3                      @ (b^c)&=(a^b)
814         add     r4,r4,r8                        @ d+=h
815         eor     r12,r12,r10                     @ Maj(a,b,c)
816         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
817         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
818 #if __ARM_ARCH__>=7
819         @ ldr   r2,[r1],#4                      @ 12
820 # if 12==15
821         str     r1,[sp,#17*4]                   @ make room for r1
822 # endif
823         eor     r0,r4,r4,ror#5
824         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
825         eor     r0,r0,r4,ror#19 @ Sigma1(e)
826 # ifndef __ARMEB__
827         rev     r2,r2
828 # endif
829 #else
830         @ ldrb  r2,[r1,#3]                      @ 12
831         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
832         ldrb    r12,[r1,#2]
833         ldrb    r0,[r1,#1]
834         orr     r2,r2,r12,lsl#8
835         ldrb    r12,[r1],#4
836         orr     r2,r2,r0,lsl#16
837 # if 12==15
838         str     r1,[sp,#17*4]                   @ make room for r1
839 # endif
840         eor     r0,r4,r4,ror#5
841         orr     r2,r2,r12,lsl#24
842         eor     r0,r0,r4,ror#19 @ Sigma1(e)
843 #endif
844         ldr     r12,[r14],#4                    @ *K256++
845         add     r7,r7,r2                        @ h+=X[i]
846         str     r2,[sp,#12*4]
847         eor     r2,r5,r6
848         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
849         and     r2,r2,r4
850         add     r7,r7,r12                       @ h+=K256[i]
851         eor     r2,r2,r6                        @ Ch(e,f,g)
852         eor     r0,r8,r8,ror#11
853         add     r7,r7,r2                        @ h+=Ch(e,f,g)
854 #if 12==31
855         and     r12,r12,#0xff
856         cmp     r12,#0xf2                       @ done?
857 #endif
858 #if 12<15
859 # if __ARM_ARCH__>=7
860         ldr     r2,[r1],#4                      @ prefetch
861 # else
862         ldrb    r2,[r1,#3]
863 # endif
864         eor     r12,r8,r9                       @ a^b, b^c in next round
865 #else
866         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
867         eor     r12,r8,r9                       @ a^b, b^c in next round
868         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
869 #endif
870         eor     r0,r0,r8,ror#20 @ Sigma0(a)
871         and     r3,r3,r12                       @ (b^c)&=(a^b)
872         add     r11,r11,r7                      @ d+=h
873         eor     r3,r3,r9                        @ Maj(a,b,c)
874         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
875         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
876 #if __ARM_ARCH__>=7
877         @ ldr   r2,[r1],#4                      @ 13
878 # if 13==15
879         str     r1,[sp,#17*4]                   @ make room for r1
880 # endif
881         eor     r0,r11,r11,ror#5
882         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
883         eor     r0,r0,r11,ror#19        @ Sigma1(e)
884 # ifndef __ARMEB__
885         rev     r2,r2
886 # endif
887 #else
888         @ ldrb  r2,[r1,#3]                      @ 13
889         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
890         ldrb    r3,[r1,#2]
891         ldrb    r0,[r1,#1]
892         orr     r2,r2,r3,lsl#8
893         ldrb    r3,[r1],#4
894         orr     r2,r2,r0,lsl#16
895 # if 13==15
896         str     r1,[sp,#17*4]                   @ make room for r1
897 # endif
898         eor     r0,r11,r11,ror#5
899         orr     r2,r2,r3,lsl#24
900         eor     r0,r0,r11,ror#19        @ Sigma1(e)
901 #endif
902         ldr     r3,[r14],#4                     @ *K256++
903         add     r6,r6,r2                        @ h+=X[i]
904         str     r2,[sp,#13*4]
905         eor     r2,r4,r5
906         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
907         and     r2,r2,r11
908         add     r6,r6,r3                        @ h+=K256[i]
909         eor     r2,r2,r5                        @ Ch(e,f,g)
910         eor     r0,r7,r7,ror#11
911         add     r6,r6,r2                        @ h+=Ch(e,f,g)
912 #if 13==31
913         and     r3,r3,#0xff
914         cmp     r3,#0xf2                        @ done?
915 #endif
916 #if 13<15
917 # if __ARM_ARCH__>=7
918         ldr     r2,[r1],#4                      @ prefetch
919 # else
920         ldrb    r2,[r1,#3]
921 # endif
922         eor     r3,r7,r8                        @ a^b, b^c in next round
923 #else
924         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
925         eor     r3,r7,r8                        @ a^b, b^c in next round
926         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
927 #endif
928         eor     r0,r0,r7,ror#20 @ Sigma0(a)
929         and     r12,r12,r3                      @ (b^c)&=(a^b)
930         add     r10,r10,r6                      @ d+=h
931         eor     r12,r12,r8                      @ Maj(a,b,c)
932         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
933         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
934 #if __ARM_ARCH__>=7
935         @ ldr   r2,[r1],#4                      @ 14
936 # if 14==15
937         str     r1,[sp,#17*4]                   @ make room for r1
938 # endif
939         eor     r0,r10,r10,ror#5
940         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
941         eor     r0,r0,r10,ror#19        @ Sigma1(e)
942 # ifndef __ARMEB__
943         rev     r2,r2
944 # endif
945 #else
946         @ ldrb  r2,[r1,#3]                      @ 14
947         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
948         ldrb    r12,[r1,#2]
949         ldrb    r0,[r1,#1]
950         orr     r2,r2,r12,lsl#8
951         ldrb    r12,[r1],#4
952         orr     r2,r2,r0,lsl#16
953 # if 14==15
954         str     r1,[sp,#17*4]                   @ make room for r1
955 # endif
956         eor     r0,r10,r10,ror#5
957         orr     r2,r2,r12,lsl#24
958         eor     r0,r0,r10,ror#19        @ Sigma1(e)
959 #endif
960         ldr     r12,[r14],#4                    @ *K256++
961         add     r5,r5,r2                        @ h+=X[i]
962         str     r2,[sp,#14*4]
963         eor     r2,r11,r4
964         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
965         and     r2,r2,r10
966         add     r5,r5,r12                       @ h+=K256[i]
967         eor     r2,r2,r4                        @ Ch(e,f,g)
968         eor     r0,r6,r6,ror#11
969         add     r5,r5,r2                        @ h+=Ch(e,f,g)
970 #if 14==31
971         and     r12,r12,#0xff
972         cmp     r12,#0xf2                       @ done?
973 #endif
974 #if 14<15
975 # if __ARM_ARCH__>=7
976         ldr     r2,[r1],#4                      @ prefetch
977 # else
978         ldrb    r2,[r1,#3]
979 # endif
980         eor     r12,r6,r7                       @ a^b, b^c in next round
981 #else
982         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
983         eor     r12,r6,r7                       @ a^b, b^c in next round
984         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
985 #endif
986         eor     r0,r0,r6,ror#20 @ Sigma0(a)
987         and     r3,r3,r12                       @ (b^c)&=(a^b)
988         add     r9,r9,r5                        @ d+=h
989         eor     r3,r3,r7                        @ Maj(a,b,c)
990         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
991         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
992 #if __ARM_ARCH__>=7
993         @ ldr   r2,[r1],#4                      @ 15
994 # if 15==15
995         str     r1,[sp,#17*4]                   @ make room for r1
996 # endif
997         eor     r0,r9,r9,ror#5
998         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
999         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1000 # ifndef __ARMEB__
1001         rev     r2,r2
1002 # endif
1003 #else
1004         @ ldrb  r2,[r1,#3]                      @ 15
1005         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1006         ldrb    r3,[r1,#2]
1007         ldrb    r0,[r1,#1]
1008         orr     r2,r2,r3,lsl#8
1009         ldrb    r3,[r1],#4
1010         orr     r2,r2,r0,lsl#16
1011 # if 15==15
1012         str     r1,[sp,#17*4]                   @ make room for r1
1013 # endif
1014         eor     r0,r9,r9,ror#5
1015         orr     r2,r2,r3,lsl#24
1016         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1017 #endif
1018         ldr     r3,[r14],#4                     @ *K256++
1019         add     r4,r4,r2                        @ h+=X[i]
1020         str     r2,[sp,#15*4]
1021         eor     r2,r10,r11
1022         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1023         and     r2,r2,r9
1024         add     r4,r4,r3                        @ h+=K256[i]
1025         eor     r2,r2,r11                       @ Ch(e,f,g)
1026         eor     r0,r5,r5,ror#11
1027         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1028 #if 15==31
1029         and     r3,r3,#0xff
1030         cmp     r3,#0xf2                        @ done?
1031 #endif
1032 #if 15<15
1033 # if __ARM_ARCH__>=7
1034         ldr     r2,[r1],#4                      @ prefetch
1035 # else
1036         ldrb    r2,[r1,#3]
1037 # endif
1038         eor     r3,r5,r6                        @ a^b, b^c in next round
1039 #else
1040         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1041         eor     r3,r5,r6                        @ a^b, b^c in next round
1042         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1043 #endif
1044         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1045         and     r12,r12,r3                      @ (b^c)&=(a^b)
1046         add     r8,r8,r4                        @ d+=h
1047         eor     r12,r12,r6                      @ Maj(a,b,c)
1048         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1049         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1050 .Lrounds_16_xx:
1051         @ ldr   r2,[sp,#1*4]            @ 16
1052         @ ldr   r1,[sp,#14*4]
1053         mov     r0,r2,ror#7
1054         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1055         mov     r12,r1,ror#17
1056         eor     r0,r0,r2,ror#18
1057         eor     r12,r12,r1,ror#19
1058         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1059         ldr     r2,[sp,#0*4]
1060         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1061         ldr     r1,[sp,#9*4]
1062
1063         add     r12,r12,r0
1064         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1065         add     r2,r2,r12
1066         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1067         add     r2,r2,r1                        @ X[i]
1068         ldr     r12,[r14],#4                    @ *K256++
1069         add     r11,r11,r2                      @ h+=X[i]
1070         str     r2,[sp,#0*4]
1071         eor     r2,r9,r10
1072         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1073         and     r2,r2,r8
1074         add     r11,r11,r12                     @ h+=K256[i]
1075         eor     r2,r2,r10                       @ Ch(e,f,g)
1076         eor     r0,r4,r4,ror#11
1077         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1078 #if 16==31
1079         and     r12,r12,#0xff
1080         cmp     r12,#0xf2                       @ done?
1081 #endif
1082 #if 16<15
1083 # if __ARM_ARCH__>=7
1084         ldr     r2,[r1],#4                      @ prefetch
1085 # else
1086         ldrb    r2,[r1,#3]
1087 # endif
1088         eor     r12,r4,r5                       @ a^b, b^c in next round
1089 #else
1090         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
1091         eor     r12,r4,r5                       @ a^b, b^c in next round
1092         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
1093 #endif
1094         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1095         and     r3,r3,r12                       @ (b^c)&=(a^b)
1096         add     r7,r7,r11                       @ d+=h
1097         eor     r3,r3,r5                        @ Maj(a,b,c)
1098         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1099         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1100         @ ldr   r2,[sp,#2*4]            @ 17
1101         @ ldr   r1,[sp,#15*4]
1102         mov     r0,r2,ror#7
1103         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1104         mov     r3,r1,ror#17
1105         eor     r0,r0,r2,ror#18
1106         eor     r3,r3,r1,ror#19
1107         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1108         ldr     r2,[sp,#1*4]
1109         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1110         ldr     r1,[sp,#10*4]
1111
1112         add     r3,r3,r0
1113         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1114         add     r2,r2,r3
1115         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1116         add     r2,r2,r1                        @ X[i]
1117         ldr     r3,[r14],#4                     @ *K256++
1118         add     r10,r10,r2                      @ h+=X[i]
1119         str     r2,[sp,#1*4]
1120         eor     r2,r8,r9
1121         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1122         and     r2,r2,r7
1123         add     r10,r10,r3                      @ h+=K256[i]
1124         eor     r2,r2,r9                        @ Ch(e,f,g)
1125         eor     r0,r11,r11,ror#11
1126         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1127 #if 17==31
1128         and     r3,r3,#0xff
1129         cmp     r3,#0xf2                        @ done?
1130 #endif
1131 #if 17<15
1132 # if __ARM_ARCH__>=7
1133         ldr     r2,[r1],#4                      @ prefetch
1134 # else
1135         ldrb    r2,[r1,#3]
1136 # endif
1137         eor     r3,r11,r4                       @ a^b, b^c in next round
1138 #else
1139         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
1140         eor     r3,r11,r4                       @ a^b, b^c in next round
1141         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
1142 #endif
1143         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1144         and     r12,r12,r3                      @ (b^c)&=(a^b)
1145         add     r6,r6,r10                       @ d+=h
1146         eor     r12,r12,r4                      @ Maj(a,b,c)
1147         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1148         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1149         @ ldr   r2,[sp,#3*4]            @ 18
1150         @ ldr   r1,[sp,#0*4]
1151         mov     r0,r2,ror#7
1152         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1153         mov     r12,r1,ror#17
1154         eor     r0,r0,r2,ror#18
1155         eor     r12,r12,r1,ror#19
1156         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1157         ldr     r2,[sp,#2*4]
1158         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1159         ldr     r1,[sp,#11*4]
1160
1161         add     r12,r12,r0
1162         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1163         add     r2,r2,r12
1164         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1165         add     r2,r2,r1                        @ X[i]
1166         ldr     r12,[r14],#4                    @ *K256++
1167         add     r9,r9,r2                        @ h+=X[i]
1168         str     r2,[sp,#2*4]
1169         eor     r2,r7,r8
1170         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1171         and     r2,r2,r6
1172         add     r9,r9,r12                       @ h+=K256[i]
1173         eor     r2,r2,r8                        @ Ch(e,f,g)
1174         eor     r0,r10,r10,ror#11
1175         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1176 #if 18==31
1177         and     r12,r12,#0xff
1178         cmp     r12,#0xf2                       @ done?
1179 #endif
1180 #if 18<15
1181 # if __ARM_ARCH__>=7
1182         ldr     r2,[r1],#4                      @ prefetch
1183 # else
1184         ldrb    r2,[r1,#3]
1185 # endif
1186         eor     r12,r10,r11                     @ a^b, b^c in next round
1187 #else
1188         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
1189         eor     r12,r10,r11                     @ a^b, b^c in next round
1190         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
1191 #endif
1192         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1193         and     r3,r3,r12                       @ (b^c)&=(a^b)
1194         add     r5,r5,r9                        @ d+=h
1195         eor     r3,r3,r11                       @ Maj(a,b,c)
1196         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1197         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1198         @ ldr   r2,[sp,#4*4]            @ 19
1199         @ ldr   r1,[sp,#1*4]
1200         mov     r0,r2,ror#7
1201         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1202         mov     r3,r1,ror#17
1203         eor     r0,r0,r2,ror#18
1204         eor     r3,r3,r1,ror#19
1205         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1206         ldr     r2,[sp,#3*4]
1207         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1208         ldr     r1,[sp,#12*4]
1209
1210         add     r3,r3,r0
1211         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1212         add     r2,r2,r3
1213         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1214         add     r2,r2,r1                        @ X[i]
1215         ldr     r3,[r14],#4                     @ *K256++
1216         add     r8,r8,r2                        @ h+=X[i]
1217         str     r2,[sp,#3*4]
1218         eor     r2,r6,r7
1219         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1220         and     r2,r2,r5
1221         add     r8,r8,r3                        @ h+=K256[i]
1222         eor     r2,r2,r7                        @ Ch(e,f,g)
1223         eor     r0,r9,r9,ror#11
1224         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1225 #if 19==31
1226         and     r3,r3,#0xff
1227         cmp     r3,#0xf2                        @ done?
1228 #endif
1229 #if 19<15
1230 # if __ARM_ARCH__>=7
1231         ldr     r2,[r1],#4                      @ prefetch
1232 # else
1233         ldrb    r2,[r1,#3]
1234 # endif
1235         eor     r3,r9,r10                       @ a^b, b^c in next round
1236 #else
1237         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
1238         eor     r3,r9,r10                       @ a^b, b^c in next round
1239         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
1240 #endif
1241         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1242         and     r12,r12,r3                      @ (b^c)&=(a^b)
1243         add     r4,r4,r8                        @ d+=h
1244         eor     r12,r12,r10                     @ Maj(a,b,c)
1245         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1246         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1247         @ ldr   r2,[sp,#5*4]            @ 20
1248         @ ldr   r1,[sp,#2*4]
1249         mov     r0,r2,ror#7
1250         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1251         mov     r12,r1,ror#17
1252         eor     r0,r0,r2,ror#18
1253         eor     r12,r12,r1,ror#19
1254         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1255         ldr     r2,[sp,#4*4]
1256         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1257         ldr     r1,[sp,#13*4]
1258
1259         add     r12,r12,r0
1260         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1261         add     r2,r2,r12
1262         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1263         add     r2,r2,r1                        @ X[i]
1264         ldr     r12,[r14],#4                    @ *K256++
1265         add     r7,r7,r2                        @ h+=X[i]
1266         str     r2,[sp,#4*4]
1267         eor     r2,r5,r6
1268         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1269         and     r2,r2,r4
1270         add     r7,r7,r12                       @ h+=K256[i]
1271         eor     r2,r2,r6                        @ Ch(e,f,g)
1272         eor     r0,r8,r8,ror#11
1273         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1274 #if 20==31
1275         and     r12,r12,#0xff
1276         cmp     r12,#0xf2                       @ done?
1277 #endif
1278 #if 20<15
1279 # if __ARM_ARCH__>=7
1280         ldr     r2,[r1],#4                      @ prefetch
1281 # else
1282         ldrb    r2,[r1,#3]
1283 # endif
1284         eor     r12,r8,r9                       @ a^b, b^c in next round
1285 #else
1286         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
1287         eor     r12,r8,r9                       @ a^b, b^c in next round
1288         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
1289 #endif
1290         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1291         and     r3,r3,r12                       @ (b^c)&=(a^b)
1292         add     r11,r11,r7                      @ d+=h
1293         eor     r3,r3,r9                        @ Maj(a,b,c)
1294         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1295         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1296         @ ldr   r2,[sp,#6*4]            @ 21
1297         @ ldr   r1,[sp,#3*4]
1298         mov     r0,r2,ror#7
1299         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1300         mov     r3,r1,ror#17
1301         eor     r0,r0,r2,ror#18
1302         eor     r3,r3,r1,ror#19
1303         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1304         ldr     r2,[sp,#5*4]
1305         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1306         ldr     r1,[sp,#14*4]
1307
1308         add     r3,r3,r0
1309         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1310         add     r2,r2,r3
1311         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1312         add     r2,r2,r1                        @ X[i]
1313         ldr     r3,[r14],#4                     @ *K256++
1314         add     r6,r6,r2                        @ h+=X[i]
1315         str     r2,[sp,#5*4]
1316         eor     r2,r4,r5
1317         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1318         and     r2,r2,r11
1319         add     r6,r6,r3                        @ h+=K256[i]
1320         eor     r2,r2,r5                        @ Ch(e,f,g)
1321         eor     r0,r7,r7,ror#11
1322         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1323 #if 21==31
1324         and     r3,r3,#0xff
1325         cmp     r3,#0xf2                        @ done?
1326 #endif
1327 #if 21<15
1328 # if __ARM_ARCH__>=7
1329         ldr     r2,[r1],#4                      @ prefetch
1330 # else
1331         ldrb    r2,[r1,#3]
1332 # endif
1333         eor     r3,r7,r8                        @ a^b, b^c in next round
1334 #else
1335         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
1336         eor     r3,r7,r8                        @ a^b, b^c in next round
1337         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
1338 #endif
1339         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1340         and     r12,r12,r3                      @ (b^c)&=(a^b)
1341         add     r10,r10,r6                      @ d+=h
1342         eor     r12,r12,r8                      @ Maj(a,b,c)
1343         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1344         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1345         @ ldr   r2,[sp,#7*4]            @ 22
1346         @ ldr   r1,[sp,#4*4]
1347         mov     r0,r2,ror#7
1348         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1349         mov     r12,r1,ror#17
1350         eor     r0,r0,r2,ror#18
1351         eor     r12,r12,r1,ror#19
1352         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1353         ldr     r2,[sp,#6*4]
1354         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1355         ldr     r1,[sp,#15*4]
1356
1357         add     r12,r12,r0
1358         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1359         add     r2,r2,r12
1360         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1361         add     r2,r2,r1                        @ X[i]
1362         ldr     r12,[r14],#4                    @ *K256++
1363         add     r5,r5,r2                        @ h+=X[i]
1364         str     r2,[sp,#6*4]
1365         eor     r2,r11,r4
1366         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1367         and     r2,r2,r10
1368         add     r5,r5,r12                       @ h+=K256[i]
1369         eor     r2,r2,r4                        @ Ch(e,f,g)
1370         eor     r0,r6,r6,ror#11
1371         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1372 #if 22==31
1373         and     r12,r12,#0xff
1374         cmp     r12,#0xf2                       @ done?
1375 #endif
1376 #if 22<15
1377 # if __ARM_ARCH__>=7
1378         ldr     r2,[r1],#4                      @ prefetch
1379 # else
1380         ldrb    r2,[r1,#3]
1381 # endif
1382         eor     r12,r6,r7                       @ a^b, b^c in next round
1383 #else
1384         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
1385         eor     r12,r6,r7                       @ a^b, b^c in next round
1386         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
1387 #endif
1388         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1389         and     r3,r3,r12                       @ (b^c)&=(a^b)
1390         add     r9,r9,r5                        @ d+=h
1391         eor     r3,r3,r7                        @ Maj(a,b,c)
1392         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1393         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1394         @ ldr   r2,[sp,#8*4]            @ 23
1395         @ ldr   r1,[sp,#5*4]
1396         mov     r0,r2,ror#7
1397         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1398         mov     r3,r1,ror#17
1399         eor     r0,r0,r2,ror#18
1400         eor     r3,r3,r1,ror#19
1401         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1402         ldr     r2,[sp,#7*4]
1403         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1404         ldr     r1,[sp,#0*4]
1405
1406         add     r3,r3,r0
1407         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1408         add     r2,r2,r3
1409         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1410         add     r2,r2,r1                        @ X[i]
1411         ldr     r3,[r14],#4                     @ *K256++
1412         add     r4,r4,r2                        @ h+=X[i]
1413         str     r2,[sp,#7*4]
1414         eor     r2,r10,r11
1415         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1416         and     r2,r2,r9
1417         add     r4,r4,r3                        @ h+=K256[i]
1418         eor     r2,r2,r11                       @ Ch(e,f,g)
1419         eor     r0,r5,r5,ror#11
1420         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1421 #if 23==31
1422         and     r3,r3,#0xff
1423         cmp     r3,#0xf2                        @ done?
1424 #endif
1425 #if 23<15
1426 # if __ARM_ARCH__>=7
1427         ldr     r2,[r1],#4                      @ prefetch
1428 # else
1429         ldrb    r2,[r1,#3]
1430 # endif
1431         eor     r3,r5,r6                        @ a^b, b^c in next round
1432 #else
1433         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
1434         eor     r3,r5,r6                        @ a^b, b^c in next round
1435         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
1436 #endif
1437         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1438         and     r12,r12,r3                      @ (b^c)&=(a^b)
1439         add     r8,r8,r4                        @ d+=h
1440         eor     r12,r12,r6                      @ Maj(a,b,c)
1441         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1442         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1443         @ ldr   r2,[sp,#9*4]            @ 24
1444         @ ldr   r1,[sp,#6*4]
1445         mov     r0,r2,ror#7
1446         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1447         mov     r12,r1,ror#17
1448         eor     r0,r0,r2,ror#18
1449         eor     r12,r12,r1,ror#19
1450         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1451         ldr     r2,[sp,#8*4]
1452         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1453         ldr     r1,[sp,#1*4]
1454
1455         add     r12,r12,r0
1456         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1457         add     r2,r2,r12
1458         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1459         add     r2,r2,r1                        @ X[i]
1460         ldr     r12,[r14],#4                    @ *K256++
1461         add     r11,r11,r2                      @ h+=X[i]
1462         str     r2,[sp,#8*4]
1463         eor     r2,r9,r10
1464         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1465         and     r2,r2,r8
1466         add     r11,r11,r12                     @ h+=K256[i]
1467         eor     r2,r2,r10                       @ Ch(e,f,g)
1468         eor     r0,r4,r4,ror#11
1469         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1470 #if 24==31
1471         and     r12,r12,#0xff
1472         cmp     r12,#0xf2                       @ done?
1473 #endif
1474 #if 24<15
1475 # if __ARM_ARCH__>=7
1476         ldr     r2,[r1],#4                      @ prefetch
1477 # else
1478         ldrb    r2,[r1,#3]
1479 # endif
1480         eor     r12,r4,r5                       @ a^b, b^c in next round
1481 #else
1482         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
1483         eor     r12,r4,r5                       @ a^b, b^c in next round
1484         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
1485 #endif
1486         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1487         and     r3,r3,r12                       @ (b^c)&=(a^b)
1488         add     r7,r7,r11                       @ d+=h
1489         eor     r3,r3,r5                        @ Maj(a,b,c)
1490         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1491         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1492         @ ldr   r2,[sp,#10*4]           @ 25
1493         @ ldr   r1,[sp,#7*4]
1494         mov     r0,r2,ror#7
1495         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1496         mov     r3,r1,ror#17
1497         eor     r0,r0,r2,ror#18
1498         eor     r3,r3,r1,ror#19
1499         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1500         ldr     r2,[sp,#9*4]
1501         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1502         ldr     r1,[sp,#2*4]
1503
1504         add     r3,r3,r0
1505         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1506         add     r2,r2,r3
1507         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1508         add     r2,r2,r1                        @ X[i]
1509         ldr     r3,[r14],#4                     @ *K256++
1510         add     r10,r10,r2                      @ h+=X[i]
1511         str     r2,[sp,#9*4]
1512         eor     r2,r8,r9
1513         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1514         and     r2,r2,r7
1515         add     r10,r10,r3                      @ h+=K256[i]
1516         eor     r2,r2,r9                        @ Ch(e,f,g)
1517         eor     r0,r11,r11,ror#11
1518         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1519 #if 25==31
1520         and     r3,r3,#0xff
1521         cmp     r3,#0xf2                        @ done?
1522 #endif
1523 #if 25<15
1524 # if __ARM_ARCH__>=7
1525         ldr     r2,[r1],#4                      @ prefetch
1526 # else
1527         ldrb    r2,[r1,#3]
1528 # endif
1529         eor     r3,r11,r4                       @ a^b, b^c in next round
1530 #else
1531         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
1532         eor     r3,r11,r4                       @ a^b, b^c in next round
1533         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
1534 #endif
1535         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1536         and     r12,r12,r3                      @ (b^c)&=(a^b)
1537         add     r6,r6,r10                       @ d+=h
1538         eor     r12,r12,r4                      @ Maj(a,b,c)
1539         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1540         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1541         @ ldr   r2,[sp,#11*4]           @ 26
1542         @ ldr   r1,[sp,#8*4]
1543         mov     r0,r2,ror#7
1544         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1545         mov     r12,r1,ror#17
1546         eor     r0,r0,r2,ror#18
1547         eor     r12,r12,r1,ror#19
1548         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1549         ldr     r2,[sp,#10*4]
1550         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1551         ldr     r1,[sp,#3*4]
1552
1553         add     r12,r12,r0
1554         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1555         add     r2,r2,r12
1556         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1557         add     r2,r2,r1                        @ X[i]
1558         ldr     r12,[r14],#4                    @ *K256++
1559         add     r9,r9,r2                        @ h+=X[i]
1560         str     r2,[sp,#10*4]
1561         eor     r2,r7,r8
1562         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1563         and     r2,r2,r6
1564         add     r9,r9,r12                       @ h+=K256[i]
1565         eor     r2,r2,r8                        @ Ch(e,f,g)
1566         eor     r0,r10,r10,ror#11
1567         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1568 #if 26==31
1569         and     r12,r12,#0xff
1570         cmp     r12,#0xf2                       @ done?
1571 #endif
1572 #if 26<15
1573 # if __ARM_ARCH__>=7
1574         ldr     r2,[r1],#4                      @ prefetch
1575 # else
1576         ldrb    r2,[r1,#3]
1577 # endif
1578         eor     r12,r10,r11                     @ a^b, b^c in next round
1579 #else
1580         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
1581         eor     r12,r10,r11                     @ a^b, b^c in next round
1582         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
1583 #endif
1584         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1585         and     r3,r3,r12                       @ (b^c)&=(a^b)
1586         add     r5,r5,r9                        @ d+=h
1587         eor     r3,r3,r11                       @ Maj(a,b,c)
1588         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1589         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1590         @ ldr   r2,[sp,#12*4]           @ 27
1591         @ ldr   r1,[sp,#9*4]
1592         mov     r0,r2,ror#7
1593         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1594         mov     r3,r1,ror#17
1595         eor     r0,r0,r2,ror#18
1596         eor     r3,r3,r1,ror#19
1597         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1598         ldr     r2,[sp,#11*4]
1599         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1600         ldr     r1,[sp,#4*4]
1601
1602         add     r3,r3,r0
1603         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1604         add     r2,r2,r3
1605         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1606         add     r2,r2,r1                        @ X[i]
1607         ldr     r3,[r14],#4                     @ *K256++
1608         add     r8,r8,r2                        @ h+=X[i]
1609         str     r2,[sp,#11*4]
1610         eor     r2,r6,r7
1611         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1612         and     r2,r2,r5
1613         add     r8,r8,r3                        @ h+=K256[i]
1614         eor     r2,r2,r7                        @ Ch(e,f,g)
1615         eor     r0,r9,r9,ror#11
1616         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1617 #if 27==31
1618         and     r3,r3,#0xff
1619         cmp     r3,#0xf2                        @ done?
1620 #endif
1621 #if 27<15
1622 # if __ARM_ARCH__>=7
1623         ldr     r2,[r1],#4                      @ prefetch
1624 # else
1625         ldrb    r2,[r1,#3]
1626 # endif
1627         eor     r3,r9,r10                       @ a^b, b^c in next round
1628 #else
1629         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
1630         eor     r3,r9,r10                       @ a^b, b^c in next round
1631         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
1632 #endif
1633         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1634         and     r12,r12,r3                      @ (b^c)&=(a^b)
1635         add     r4,r4,r8                        @ d+=h
1636         eor     r12,r12,r10                     @ Maj(a,b,c)
1637         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1638         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1639         @ ldr   r2,[sp,#13*4]           @ 28
1640         @ ldr   r1,[sp,#10*4]
1641         mov     r0,r2,ror#7
1642         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1643         mov     r12,r1,ror#17
1644         eor     r0,r0,r2,ror#18
1645         eor     r12,r12,r1,ror#19
1646         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1647         ldr     r2,[sp,#12*4]
1648         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1649         ldr     r1,[sp,#5*4]
1650
1651         add     r12,r12,r0
1652         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1653         add     r2,r2,r12
1654         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1655         add     r2,r2,r1                        @ X[i]
1656         ldr     r12,[r14],#4                    @ *K256++
1657         add     r7,r7,r2                        @ h+=X[i]
1658         str     r2,[sp,#12*4]
1659         eor     r2,r5,r6
1660         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1661         and     r2,r2,r4
1662         add     r7,r7,r12                       @ h+=K256[i]
1663         eor     r2,r2,r6                        @ Ch(e,f,g)
1664         eor     r0,r8,r8,ror#11
1665         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1666 #if 28==31
1667         and     r12,r12,#0xff
1668         cmp     r12,#0xf2                       @ done?
1669 #endif
1670 #if 28<15
1671 # if __ARM_ARCH__>=7
1672         ldr     r2,[r1],#4                      @ prefetch
1673 # else
1674         ldrb    r2,[r1,#3]
1675 # endif
1676         eor     r12,r8,r9                       @ a^b, b^c in next round
1677 #else
1678         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
1679         eor     r12,r8,r9                       @ a^b, b^c in next round
1680         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
1681 #endif
1682         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1683         and     r3,r3,r12                       @ (b^c)&=(a^b)
1684         add     r11,r11,r7                      @ d+=h
1685         eor     r3,r3,r9                        @ Maj(a,b,c)
1686         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1687         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1688         @ ldr   r2,[sp,#14*4]           @ 29
1689         @ ldr   r1,[sp,#11*4]
1690         mov     r0,r2,ror#7
1691         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1692         mov     r3,r1,ror#17
1693         eor     r0,r0,r2,ror#18
1694         eor     r3,r3,r1,ror#19
1695         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1696         ldr     r2,[sp,#13*4]
1697         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1698         ldr     r1,[sp,#6*4]
1699
1700         add     r3,r3,r0
1701         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1702         add     r2,r2,r3
1703         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1704         add     r2,r2,r1                        @ X[i]
1705         ldr     r3,[r14],#4                     @ *K256++
1706         add     r6,r6,r2                        @ h+=X[i]
1707         str     r2,[sp,#13*4]
1708         eor     r2,r4,r5
1709         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1710         and     r2,r2,r11
1711         add     r6,r6,r3                        @ h+=K256[i]
1712         eor     r2,r2,r5                        @ Ch(e,f,g)
1713         eor     r0,r7,r7,ror#11
1714         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1715 #if 29==31
1716         and     r3,r3,#0xff
1717         cmp     r3,#0xf2                        @ done?
1718 #endif
1719 #if 29<15
1720 # if __ARM_ARCH__>=7
1721         ldr     r2,[r1],#4                      @ prefetch
1722 # else
1723         ldrb    r2,[r1,#3]
1724 # endif
1725         eor     r3,r7,r8                        @ a^b, b^c in next round
1726 #else
1727         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
1728         eor     r3,r7,r8                        @ a^b, b^c in next round
1729         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
1730 #endif
1731         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1732         and     r12,r12,r3                      @ (b^c)&=(a^b)
1733         add     r10,r10,r6                      @ d+=h
1734         eor     r12,r12,r8                      @ Maj(a,b,c)
1735         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1736         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1737         @ ldr   r2,[sp,#15*4]           @ 30
1738         @ ldr   r1,[sp,#12*4]
1739         mov     r0,r2,ror#7
1740         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1741         mov     r12,r1,ror#17
1742         eor     r0,r0,r2,ror#18
1743         eor     r12,r12,r1,ror#19
1744         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1745         ldr     r2,[sp,#14*4]
1746         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1747         ldr     r1,[sp,#7*4]
1748
1749         add     r12,r12,r0
1750         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1751         add     r2,r2,r12
1752         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1753         add     r2,r2,r1                        @ X[i]
1754         ldr     r12,[r14],#4                    @ *K256++
1755         add     r5,r5,r2                        @ h+=X[i]
1756         str     r2,[sp,#14*4]
1757         eor     r2,r11,r4
1758         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1759         and     r2,r2,r10
1760         add     r5,r5,r12                       @ h+=K256[i]
1761         eor     r2,r2,r4                        @ Ch(e,f,g)
1762         eor     r0,r6,r6,ror#11
1763         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1764 #if 30==31
1765         and     r12,r12,#0xff
1766         cmp     r12,#0xf2                       @ done?
1767 #endif
1768 #if 30<15
1769 # if __ARM_ARCH__>=7
1770         ldr     r2,[r1],#4                      @ prefetch
1771 # else
1772         ldrb    r2,[r1,#3]
1773 # endif
1774         eor     r12,r6,r7                       @ a^b, b^c in next round
1775 #else
1776         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
1777         eor     r12,r6,r7                       @ a^b, b^c in next round
1778         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
1779 #endif
1780         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1781         and     r3,r3,r12                       @ (b^c)&=(a^b)
1782         add     r9,r9,r5                        @ d+=h
1783         eor     r3,r3,r7                        @ Maj(a,b,c)
1784         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1785         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1786         @ ldr   r2,[sp,#0*4]            @ 31
1787         @ ldr   r1,[sp,#13*4]
1788         mov     r0,r2,ror#7
1789         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1790         mov     r3,r1,ror#17
1791         eor     r0,r0,r2,ror#18
1792         eor     r3,r3,r1,ror#19
1793         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1794         ldr     r2,[sp,#15*4]
1795         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1796         ldr     r1,[sp,#8*4]
1797
1798         add     r3,r3,r0
1799         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1800         add     r2,r2,r3
1801         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1802         add     r2,r2,r1                        @ X[i]
1803         ldr     r3,[r14],#4                     @ *K256++
1804         add     r4,r4,r2                        @ h+=X[i]
1805         str     r2,[sp,#15*4]
1806         eor     r2,r10,r11
1807         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1808         and     r2,r2,r9
1809         add     r4,r4,r3                        @ h+=K256[i]
1810         eor     r2,r2,r11                       @ Ch(e,f,g)
1811         eor     r0,r5,r5,ror#11
1812         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1813 #if 31==31
1814         and     r3,r3,#0xff
1815         cmp     r3,#0xf2                        @ done?
1816 #endif
1817 #if 31<15
1818 # if __ARM_ARCH__>=7
1819         ldr     r2,[r1],#4                      @ prefetch
1820 # else
1821         ldrb    r2,[r1,#3]
1822 # endif
1823         eor     r3,r5,r6                        @ a^b, b^c in next round
1824 #else
1825         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1826         eor     r3,r5,r6                        @ a^b, b^c in next round
1827         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1828 #endif
1829         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1830         and     r12,r12,r3                      @ (b^c)&=(a^b)
1831         add     r8,r8,r4                        @ d+=h
1832         eor     r12,r12,r6                      @ Maj(a,b,c)
1833         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1834         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1835 #if __ARM_ARCH__>=7
1836         ite     eq                      @ Thumb2 thing, sanity check in ARM
1837 #endif
1838         ldreq   r3,[sp,#16*4]           @ pull ctx
1839         bne     .Lrounds_16_xx
1840
1841         add     r4,r4,r12               @ h+=Maj(a,b,c) from the past
1842         ldr     r0,[r3,#0]
1843         ldr     r2,[r3,#4]
1844         ldr     r12,[r3,#8]
1845         add     r4,r4,r0
1846         ldr     r0,[r3,#12]
1847         add     r5,r5,r2
1848         ldr     r2,[r3,#16]
1849         add     r6,r6,r12
1850         ldr     r12,[r3,#20]
1851         add     r7,r7,r0
1852         ldr     r0,[r3,#24]
1853         add     r8,r8,r2
1854         ldr     r2,[r3,#28]
1855         add     r9,r9,r12
1856         ldr     r1,[sp,#17*4]           @ pull inp
1857         ldr     r12,[sp,#18*4]          @ pull inp+len
1858         add     r10,r10,r0
1859         add     r11,r11,r2
1860         stmia   r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1861         cmp     r1,r12
1862         sub     r14,r14,#256    @ rewind Ktbl
1863         bne     .Loop
1864
1865         add     sp,sp,#19*4     @ destroy frame
1866 #if __ARM_ARCH__>=5
1867         ldmia   sp!,{r4-r11,pc}
1868 #else
1869         ldmia   sp!,{r4-r11,lr}
1870         tst     lr,#1
1871         moveq   pc,lr                   @ be binary compatible with V4, yet
1872         .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
1873 #endif
1874 .size   sha256_block_data_order,.-sha256_block_data_order
1875 #if __ARM_MAX_ARCH__>=7
1876 .arch   armv7-a
1877 .fpu    neon
1878
1879 .global sha256_block_data_order_neon
1880 .type   sha256_block_data_order_neon,%function
1881 .align  4
1882 sha256_block_data_order_neon:
1883 .LNEON:
1884         stmdb   sp!,{r4-r12,lr}
1885
1886         sub     r11,sp,#16*4+16
1887         adrl    r14,K256
1888         bic     r11,r11,#15             @ align for 128-bit stores
1889         mov     r12,sp
1890         mov     sp,r11                  @ alloca
1891         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
1892
1893         vld1.8          {q0},[r1]!
1894         vld1.8          {q1},[r1]!
1895         vld1.8          {q2},[r1]!
1896         vld1.8          {q3},[r1]!
1897         vld1.32         {q8},[r14,:128]!
1898         vld1.32         {q9},[r14,:128]!
1899         vld1.32         {q10},[r14,:128]!
1900         vld1.32         {q11},[r14,:128]!
1901         vrev32.8        q0,q0           @ yes, even on
1902         str             r0,[sp,#64]
1903         vrev32.8        q1,q1           @ big-endian
1904         str             r1,[sp,#68]
1905         mov             r1,sp
1906         vrev32.8        q2,q2
1907         str             r2,[sp,#72]
1908         vrev32.8        q3,q3
1909         str             r12,[sp,#76]            @ save original sp
1910         vadd.i32        q8,q8,q0
1911         vadd.i32        q9,q9,q1
1912         vst1.32         {q8},[r1,:128]!
1913         vadd.i32        q10,q10,q2
1914         vst1.32         {q9},[r1,:128]!
1915         vadd.i32        q11,q11,q3
1916         vst1.32         {q10},[r1,:128]!
1917         vst1.32         {q11},[r1,:128]!
1918
1919         ldmia           r0,{r4-r11}
1920         sub             r1,r1,#64
1921         ldr             r2,[sp,#0]
1922         eor             r12,r12,r12
1923         eor             r3,r5,r6
1924         b               .L_00_48
1925
1926 .align  4
1927 .L_00_48:
1928         vext.8  q8,q0,q1,#4
1929         add     r11,r11,r2
1930         eor     r2,r9,r10
1931         eor     r0,r8,r8,ror#5
1932         vext.8  q9,q2,q3,#4
1933         add     r4,r4,r12
1934         and     r2,r2,r8
1935         eor     r12,r0,r8,ror#19
1936         vshr.u32        q10,q8,#7
1937         eor     r0,r4,r4,ror#11
1938         eor     r2,r2,r10
1939         vadd.i32        q0,q0,q9
1940         add     r11,r11,r12,ror#6
1941         eor     r12,r4,r5
1942         vshr.u32        q9,q8,#3
1943         eor     r0,r0,r4,ror#20
1944         add     r11,r11,r2
1945         vsli.32 q10,q8,#25
1946         ldr     r2,[sp,#4]
1947         and     r3,r3,r12
1948         vshr.u32        q11,q8,#18
1949         add     r7,r7,r11
1950         add     r11,r11,r0,ror#2
1951         eor     r3,r3,r5
1952         veor    q9,q9,q10
1953         add     r10,r10,r2
1954         vsli.32 q11,q8,#14
1955         eor     r2,r8,r9
1956         eor     r0,r7,r7,ror#5
1957         vshr.u32        d24,d7,#17
1958         add     r11,r11,r3
1959         and     r2,r2,r7
1960         veor    q9,q9,q11
1961         eor     r3,r0,r7,ror#19
1962         eor     r0,r11,r11,ror#11
1963         vsli.32 d24,d7,#15
1964         eor     r2,r2,r9
1965         add     r10,r10,r3,ror#6
1966         vshr.u32        d25,d7,#10
1967         eor     r3,r11,r4
1968         eor     r0,r0,r11,ror#20
1969         vadd.i32        q0,q0,q9
1970         add     r10,r10,r2
1971         ldr     r2,[sp,#8]
1972         veor    d25,d25,d24
1973         and     r12,r12,r3
1974         add     r6,r6,r10
1975         vshr.u32        d24,d7,#19
1976         add     r10,r10,r0,ror#2
1977         eor     r12,r12,r4
1978         vsli.32 d24,d7,#13
1979         add     r9,r9,r2
1980         eor     r2,r7,r8
1981         veor    d25,d25,d24
1982         eor     r0,r6,r6,ror#5
1983         add     r10,r10,r12
1984         vadd.i32        d0,d0,d25
1985         and     r2,r2,r6
1986         eor     r12,r0,r6,ror#19
1987         vshr.u32        d24,d0,#17
1988         eor     r0,r10,r10,ror#11
1989         eor     r2,r2,r8
1990         vsli.32 d24,d0,#15
1991         add     r9,r9,r12,ror#6
1992         eor     r12,r10,r11
1993         vshr.u32        d25,d0,#10
1994         eor     r0,r0,r10,ror#20
1995         add     r9,r9,r2
1996         veor    d25,d25,d24
1997         ldr     r2,[sp,#12]
1998         and     r3,r3,r12
1999         vshr.u32        d24,d0,#19
2000         add     r5,r5,r9
2001         add     r9,r9,r0,ror#2
2002         eor     r3,r3,r11
2003         vld1.32 {q8},[r14,:128]!
2004         add     r8,r8,r2
2005         vsli.32 d24,d0,#13
2006         eor     r2,r6,r7
2007         eor     r0,r5,r5,ror#5
2008         veor    d25,d25,d24
2009         add     r9,r9,r3
2010         and     r2,r2,r5
2011         vadd.i32        d1,d1,d25
2012         eor     r3,r0,r5,ror#19
2013         eor     r0,r9,r9,ror#11
2014         vadd.i32        q8,q8,q0
2015         eor     r2,r2,r7
2016         add     r8,r8,r3,ror#6
2017         eor     r3,r9,r10
2018         eor     r0,r0,r9,ror#20
2019         add     r8,r8,r2
2020         ldr     r2,[sp,#16]
2021         and     r12,r12,r3
2022         add     r4,r4,r8
2023         vst1.32 {q8},[r1,:128]!
2024         add     r8,r8,r0,ror#2
2025         eor     r12,r12,r10
2026         vext.8  q8,q1,q2,#4
2027         add     r7,r7,r2
2028         eor     r2,r5,r6
2029         eor     r0,r4,r4,ror#5
2030         vext.8  q9,q3,q0,#4
2031         add     r8,r8,r12
2032         and     r2,r2,r4
2033         eor     r12,r0,r4,ror#19
2034         vshr.u32        q10,q8,#7
2035         eor     r0,r8,r8,ror#11
2036         eor     r2,r2,r6
2037         vadd.i32        q1,q1,q9
2038         add     r7,r7,r12,ror#6
2039         eor     r12,r8,r9
2040         vshr.u32        q9,q8,#3
2041         eor     r0,r0,r8,ror#20
2042         add     r7,r7,r2
2043         vsli.32 q10,q8,#25
2044         ldr     r2,[sp,#20]
2045         and     r3,r3,r12
2046         vshr.u32        q11,q8,#18
2047         add     r11,r11,r7
2048         add     r7,r7,r0,ror#2
2049         eor     r3,r3,r9
2050         veor    q9,q9,q10
2051         add     r6,r6,r2
2052         vsli.32 q11,q8,#14
2053         eor     r2,r4,r5
2054         eor     r0,r11,r11,ror#5
2055         vshr.u32        d24,d1,#17
2056         add     r7,r7,r3
2057         and     r2,r2,r11
2058         veor    q9,q9,q11
2059         eor     r3,r0,r11,ror#19
2060         eor     r0,r7,r7,ror#11
2061         vsli.32 d24,d1,#15
2062         eor     r2,r2,r5
2063         add     r6,r6,r3,ror#6
2064         vshr.u32        d25,d1,#10
2065         eor     r3,r7,r8
2066         eor     r0,r0,r7,ror#20
2067         vadd.i32        q1,q1,q9
2068         add     r6,r6,r2
2069         ldr     r2,[sp,#24]
2070         veor    d25,d25,d24
2071         and     r12,r12,r3
2072         add     r10,r10,r6
2073         vshr.u32        d24,d1,#19
2074         add     r6,r6,r0,ror#2
2075         eor     r12,r12,r8
2076         vsli.32 d24,d1,#13
2077         add     r5,r5,r2
2078         eor     r2,r11,r4
2079         veor    d25,d25,d24
2080         eor     r0,r10,r10,ror#5
2081         add     r6,r6,r12
2082         vadd.i32        d2,d2,d25
2083         and     r2,r2,r10
2084         eor     r12,r0,r10,ror#19
2085         vshr.u32        d24,d2,#17
2086         eor     r0,r6,r6,ror#11
2087         eor     r2,r2,r4
2088         vsli.32 d24,d2,#15
2089         add     r5,r5,r12,ror#6
2090         eor     r12,r6,r7
2091         vshr.u32        d25,d2,#10
2092         eor     r0,r0,r6,ror#20
2093         add     r5,r5,r2
2094         veor    d25,d25,d24
2095         ldr     r2,[sp,#28]
2096         and     r3,r3,r12
2097         vshr.u32        d24,d2,#19
2098         add     r9,r9,r5
2099         add     r5,r5,r0,ror#2
2100         eor     r3,r3,r7
2101         vld1.32 {q8},[r14,:128]!
2102         add     r4,r4,r2
2103         vsli.32 d24,d2,#13
2104         eor     r2,r10,r11
2105         eor     r0,r9,r9,ror#5
2106         veor    d25,d25,d24
2107         add     r5,r5,r3
2108         and     r2,r2,r9
2109         vadd.i32        d3,d3,d25
2110         eor     r3,r0,r9,ror#19
2111         eor     r0,r5,r5,ror#11
2112         vadd.i32        q8,q8,q1
2113         eor     r2,r2,r11
2114         add     r4,r4,r3,ror#6
2115         eor     r3,r5,r6
2116         eor     r0,r0,r5,ror#20
2117         add     r4,r4,r2
2118         ldr     r2,[sp,#32]
2119         and     r12,r12,r3
2120         add     r8,r8,r4
2121         vst1.32 {q8},[r1,:128]!
2122         add     r4,r4,r0,ror#2
2123         eor     r12,r12,r6
2124         vext.8  q8,q2,q3,#4
2125         add     r11,r11,r2
2126         eor     r2,r9,r10
2127         eor     r0,r8,r8,ror#5
2128         vext.8  q9,q0,q1,#4
2129         add     r4,r4,r12
2130         and     r2,r2,r8
2131         eor     r12,r0,r8,ror#19
2132         vshr.u32        q10,q8,#7
2133         eor     r0,r4,r4,ror#11
2134         eor     r2,r2,r10
2135         vadd.i32        q2,q2,q9
2136         add     r11,r11,r12,ror#6
2137         eor     r12,r4,r5
2138         vshr.u32        q9,q8,#3
2139         eor     r0,r0,r4,ror#20
2140         add     r11,r11,r2
2141         vsli.32 q10,q8,#25
2142         ldr     r2,[sp,#36]
2143         and     r3,r3,r12
2144         vshr.u32        q11,q8,#18
2145         add     r7,r7,r11
2146         add     r11,r11,r0,ror#2
2147         eor     r3,r3,r5
2148         veor    q9,q9,q10
2149         add     r10,r10,r2
2150         vsli.32 q11,q8,#14
2151         eor     r2,r8,r9
2152         eor     r0,r7,r7,ror#5
2153         vshr.u32        d24,d3,#17
2154         add     r11,r11,r3
2155         and     r2,r2,r7
2156         veor    q9,q9,q11
2157         eor     r3,r0,r7,ror#19
2158         eor     r0,r11,r11,ror#11
2159         vsli.32 d24,d3,#15
2160         eor     r2,r2,r9
2161         add     r10,r10,r3,ror#6
2162         vshr.u32        d25,d3,#10
2163         eor     r3,r11,r4
2164         eor     r0,r0,r11,ror#20
2165         vadd.i32        q2,q2,q9
2166         add     r10,r10,r2
2167         ldr     r2,[sp,#40]
2168         veor    d25,d25,d24
2169         and     r12,r12,r3
2170         add     r6,r6,r10
2171         vshr.u32        d24,d3,#19
2172         add     r10,r10,r0,ror#2
2173         eor     r12,r12,r4
2174         vsli.32 d24,d3,#13
2175         add     r9,r9,r2
2176         eor     r2,r7,r8
2177         veor    d25,d25,d24
2178         eor     r0,r6,r6,ror#5
2179         add     r10,r10,r12
2180         vadd.i32        d4,d4,d25
2181         and     r2,r2,r6
2182         eor     r12,r0,r6,ror#19
2183         vshr.u32        d24,d4,#17
2184         eor     r0,r10,r10,ror#11
2185         eor     r2,r2,r8
2186         vsli.32 d24,d4,#15
2187         add     r9,r9,r12,ror#6
2188         eor     r12,r10,r11
2189         vshr.u32        d25,d4,#10
2190         eor     r0,r0,r10,ror#20
2191         add     r9,r9,r2
2192         veor    d25,d25,d24
2193         ldr     r2,[sp,#44]
2194         and     r3,r3,r12
2195         vshr.u32        d24,d4,#19
2196         add     r5,r5,r9
2197         add     r9,r9,r0,ror#2
2198         eor     r3,r3,r11
2199         vld1.32 {q8},[r14,:128]!
2200         add     r8,r8,r2
2201         vsli.32 d24,d4,#13
2202         eor     r2,r6,r7
2203         eor     r0,r5,r5,ror#5
2204         veor    d25,d25,d24
2205         add     r9,r9,r3
2206         and     r2,r2,r5
2207         vadd.i32        d5,d5,d25
2208         eor     r3,r0,r5,ror#19
2209         eor     r0,r9,r9,ror#11
2210         vadd.i32        q8,q8,q2
2211         eor     r2,r2,r7
2212         add     r8,r8,r3,ror#6
2213         eor     r3,r9,r10
2214         eor     r0,r0,r9,ror#20
2215         add     r8,r8,r2
2216         ldr     r2,[sp,#48]
2217         and     r12,r12,r3
2218         add     r4,r4,r8
2219         vst1.32 {q8},[r1,:128]!
2220         add     r8,r8,r0,ror#2
2221         eor     r12,r12,r10
2222         vext.8  q8,q3,q0,#4
2223         add     r7,r7,r2
2224         eor     r2,r5,r6
2225         eor     r0,r4,r4,ror#5
2226         vext.8  q9,q1,q2,#4
2227         add     r8,r8,r12
2228         and     r2,r2,r4
2229         eor     r12,r0,r4,ror#19
2230         vshr.u32        q10,q8,#7
2231         eor     r0,r8,r8,ror#11
2232         eor     r2,r2,r6
2233         vadd.i32        q3,q3,q9
2234         add     r7,r7,r12,ror#6
2235         eor     r12,r8,r9
2236         vshr.u32        q9,q8,#3
2237         eor     r0,r0,r8,ror#20
2238         add     r7,r7,r2
2239         vsli.32 q10,q8,#25
2240         ldr     r2,[sp,#52]
2241         and     r3,r3,r12
2242         vshr.u32        q11,q8,#18
2243         add     r11,r11,r7
2244         add     r7,r7,r0,ror#2
2245         eor     r3,r3,r9
2246         veor    q9,q9,q10
2247         add     r6,r6,r2
2248         vsli.32 q11,q8,#14
2249         eor     r2,r4,r5
2250         eor     r0,r11,r11,ror#5
2251         vshr.u32        d24,d5,#17
2252         add     r7,r7,r3
2253         and     r2,r2,r11
2254         veor    q9,q9,q11
2255         eor     r3,r0,r11,ror#19
2256         eor     r0,r7,r7,ror#11
2257         vsli.32 d24,d5,#15
2258         eor     r2,r2,r5
2259         add     r6,r6,r3,ror#6
2260         vshr.u32        d25,d5,#10
2261         eor     r3,r7,r8
2262         eor     r0,r0,r7,ror#20
2263         vadd.i32        q3,q3,q9
2264         add     r6,r6,r2
2265         ldr     r2,[sp,#56]
2266         veor    d25,d25,d24
2267         and     r12,r12,r3
2268         add     r10,r10,r6
2269         vshr.u32        d24,d5,#19
2270         add     r6,r6,r0,ror#2
2271         eor     r12,r12,r8
2272         vsli.32 d24,d5,#13
2273         add     r5,r5,r2
2274         eor     r2,r11,r4
2275         veor    d25,d25,d24
2276         eor     r0,r10,r10,ror#5
2277         add     r6,r6,r12
2278         vadd.i32        d6,d6,d25
2279         and     r2,r2,r10
2280         eor     r12,r0,r10,ror#19
2281         vshr.u32        d24,d6,#17
2282         eor     r0,r6,r6,ror#11
2283         eor     r2,r2,r4
2284         vsli.32 d24,d6,#15
2285         add     r5,r5,r12,ror#6
2286         eor     r12,r6,r7
2287         vshr.u32        d25,d6,#10
2288         eor     r0,r0,r6,ror#20
2289         add     r5,r5,r2
2290         veor    d25,d25,d24
2291         ldr     r2,[sp,#60]
2292         and     r3,r3,r12
2293         vshr.u32        d24,d6,#19
2294         add     r9,r9,r5
2295         add     r5,r5,r0,ror#2
2296         eor     r3,r3,r7
2297         vld1.32 {q8},[r14,:128]!
2298         add     r4,r4,r2
2299         vsli.32 d24,d6,#13
2300         eor     r2,r10,r11
2301         eor     r0,r9,r9,ror#5
2302         veor    d25,d25,d24
2303         add     r5,r5,r3
2304         and     r2,r2,r9
2305         vadd.i32        d7,d7,d25
2306         eor     r3,r0,r9,ror#19
2307         eor     r0,r5,r5,ror#11
2308         vadd.i32        q8,q8,q3
2309         eor     r2,r2,r11
2310         add     r4,r4,r3,ror#6
2311         eor     r3,r5,r6
2312         eor     r0,r0,r5,ror#20
2313         add     r4,r4,r2
2314         ldr     r2,[r14]
2315         and     r12,r12,r3
2316         add     r8,r8,r4
2317         vst1.32 {q8},[r1,:128]!
2318         add     r4,r4,r0,ror#2
2319         eor     r12,r12,r6
2320         teq     r2,#0                           @ check for K256 terminator
2321         ldr     r2,[sp,#0]
2322         sub     r1,r1,#64
2323         bne     .L_00_48
2324
2325         ldr             r1,[sp,#68]
2326         ldr             r0,[sp,#72]
2327         sub             r14,r14,#256    @ rewind r14
2328         teq             r1,r0
2329         it              eq
2330         subeq           r1,r1,#64               @ avoid SEGV
2331         vld1.8          {q0},[r1]!              @ load next input block
2332         vld1.8          {q1},[r1]!
2333         vld1.8          {q2},[r1]!
2334         vld1.8          {q3},[r1]!
2335         it              ne
2336         strne           r1,[sp,#68]
2337         mov             r1,sp
2338         add     r11,r11,r2
2339         eor     r2,r9,r10
2340         eor     r0,r8,r8,ror#5
2341         add     r4,r4,r12
2342         vld1.32 {q8},[r14,:128]!
2343         and     r2,r2,r8
2344         eor     r12,r0,r8,ror#19
2345         eor     r0,r4,r4,ror#11
2346         eor     r2,r2,r10
2347         vrev32.8        q0,q0
2348         add     r11,r11,r12,ror#6
2349         eor     r12,r4,r5
2350         eor     r0,r0,r4,ror#20
2351         add     r11,r11,r2
2352         vadd.i32        q8,q8,q0
2353         ldr     r2,[sp,#4]
2354         and     r3,r3,r12
2355         add     r7,r7,r11
2356         add     r11,r11,r0,ror#2
2357         eor     r3,r3,r5
2358         add     r10,r10,r2
2359         eor     r2,r8,r9
2360         eor     r0,r7,r7,ror#5
2361         add     r11,r11,r3
2362         and     r2,r2,r7
2363         eor     r3,r0,r7,ror#19
2364         eor     r0,r11,r11,ror#11
2365         eor     r2,r2,r9
2366         add     r10,r10,r3,ror#6
2367         eor     r3,r11,r4
2368         eor     r0,r0,r11,ror#20
2369         add     r10,r10,r2
2370         ldr     r2,[sp,#8]
2371         and     r12,r12,r3
2372         add     r6,r6,r10
2373         add     r10,r10,r0,ror#2
2374         eor     r12,r12,r4
2375         add     r9,r9,r2
2376         eor     r2,r7,r8
2377         eor     r0,r6,r6,ror#5
2378         add     r10,r10,r12
2379         and     r2,r2,r6
2380         eor     r12,r0,r6,ror#19
2381         eor     r0,r10,r10,ror#11
2382         eor     r2,r2,r8
2383         add     r9,r9,r12,ror#6
2384         eor     r12,r10,r11
2385         eor     r0,r0,r10,ror#20
2386         add     r9,r9,r2
2387         ldr     r2,[sp,#12]
2388         and     r3,r3,r12
2389         add     r5,r5,r9
2390         add     r9,r9,r0,ror#2
2391         eor     r3,r3,r11
2392         add     r8,r8,r2
2393         eor     r2,r6,r7
2394         eor     r0,r5,r5,ror#5
2395         add     r9,r9,r3
2396         and     r2,r2,r5
2397         eor     r3,r0,r5,ror#19
2398         eor     r0,r9,r9,ror#11
2399         eor     r2,r2,r7
2400         add     r8,r8,r3,ror#6
2401         eor     r3,r9,r10
2402         eor     r0,r0,r9,ror#20
2403         add     r8,r8,r2
2404         ldr     r2,[sp,#16]
2405         and     r12,r12,r3
2406         add     r4,r4,r8
2407         add     r8,r8,r0,ror#2
2408         eor     r12,r12,r10
2409         vst1.32 {q8},[r1,:128]!
2410         add     r7,r7,r2
2411         eor     r2,r5,r6
2412         eor     r0,r4,r4,ror#5
2413         add     r8,r8,r12
2414         vld1.32 {q8},[r14,:128]!
2415         and     r2,r2,r4
2416         eor     r12,r0,r4,ror#19
2417         eor     r0,r8,r8,ror#11
2418         eor     r2,r2,r6
2419         vrev32.8        q1,q1
2420         add     r7,r7,r12,ror#6
2421         eor     r12,r8,r9
2422         eor     r0,r0,r8,ror#20
2423         add     r7,r7,r2
2424         vadd.i32        q8,q8,q1
2425         ldr     r2,[sp,#20]
2426         and     r3,r3,r12
2427         add     r11,r11,r7
2428         add     r7,r7,r0,ror#2
2429         eor     r3,r3,r9
2430         add     r6,r6,r2
2431         eor     r2,r4,r5
2432         eor     r0,r11,r11,ror#5
2433         add     r7,r7,r3
2434         and     r2,r2,r11
2435         eor     r3,r0,r11,ror#19
2436         eor     r0,r7,r7,ror#11
2437         eor     r2,r2,r5
2438         add     r6,r6,r3,ror#6
2439         eor     r3,r7,r8
2440         eor     r0,r0,r7,ror#20
2441         add     r6,r6,r2
2442         ldr     r2,[sp,#24]
2443         and     r12,r12,r3
2444         add     r10,r10,r6
2445         add     r6,r6,r0,ror#2
2446         eor     r12,r12,r8
2447         add     r5,r5,r2
2448         eor     r2,r11,r4
2449         eor     r0,r10,r10,ror#5
2450         add     r6,r6,r12
2451         and     r2,r2,r10
2452         eor     r12,r0,r10,ror#19
2453         eor     r0,r6,r6,ror#11
2454         eor     r2,r2,r4
2455         add     r5,r5,r12,ror#6
2456         eor     r12,r6,r7
2457         eor     r0,r0,r6,ror#20
2458         add     r5,r5,r2
2459         ldr     r2,[sp,#28]
2460         and     r3,r3,r12
2461         add     r9,r9,r5
2462         add     r5,r5,r0,ror#2
2463         eor     r3,r3,r7
2464         add     r4,r4,r2
2465         eor     r2,r10,r11
2466         eor     r0,r9,r9,ror#5
2467         add     r5,r5,r3
2468         and     r2,r2,r9
2469         eor     r3,r0,r9,ror#19
2470         eor     r0,r5,r5,ror#11
2471         eor     r2,r2,r11
2472         add     r4,r4,r3,ror#6
2473         eor     r3,r5,r6
2474         eor     r0,r0,r5,ror#20
2475         add     r4,r4,r2
2476         ldr     r2,[sp,#32]
2477         and     r12,r12,r3
2478         add     r8,r8,r4
2479         add     r4,r4,r0,ror#2
2480         eor     r12,r12,r6
2481         vst1.32 {q8},[r1,:128]!
2482         add     r11,r11,r2
2483         eor     r2,r9,r10
2484         eor     r0,r8,r8,ror#5
2485         add     r4,r4,r12
2486         vld1.32 {q8},[r14,:128]!
2487         and     r2,r2,r8
2488         eor     r12,r0,r8,ror#19
2489         eor     r0,r4,r4,ror#11
2490         eor     r2,r2,r10
2491         vrev32.8        q2,q2
2492         add     r11,r11,r12,ror#6
2493         eor     r12,r4,r5
2494         eor     r0,r0,r4,ror#20
2495         add     r11,r11,r2
2496         vadd.i32        q8,q8,q2
2497         ldr     r2,[sp,#36]
2498         and     r3,r3,r12
2499         add     r7,r7,r11
2500         add     r11,r11,r0,ror#2
2501         eor     r3,r3,r5
2502         add     r10,r10,r2
2503         eor     r2,r8,r9
2504         eor     r0,r7,r7,ror#5
2505         add     r11,r11,r3
2506         and     r2,r2,r7
2507         eor     r3,r0,r7,ror#19
2508         eor     r0,r11,r11,ror#11
2509         eor     r2,r2,r9
2510         add     r10,r10,r3,ror#6
2511         eor     r3,r11,r4
2512         eor     r0,r0,r11,ror#20
2513         add     r10,r10,r2
2514         ldr     r2,[sp,#40]
2515         and     r12,r12,r3
2516         add     r6,r6,r10
2517         add     r10,r10,r0,ror#2
2518         eor     r12,r12,r4
2519         add     r9,r9,r2
2520         eor     r2,r7,r8
2521         eor     r0,r6,r6,ror#5
2522         add     r10,r10,r12
2523         and     r2,r2,r6
2524         eor     r12,r0,r6,ror#19
2525         eor     r0,r10,r10,ror#11
2526         eor     r2,r2,r8
2527         add     r9,r9,r12,ror#6
2528         eor     r12,r10,r11
2529         eor     r0,r0,r10,ror#20
2530         add     r9,r9,r2
2531         ldr     r2,[sp,#44]
2532         and     r3,r3,r12
2533         add     r5,r5,r9
2534         add     r9,r9,r0,ror#2
2535         eor     r3,r3,r11
2536         add     r8,r8,r2
2537         eor     r2,r6,r7
2538         eor     r0,r5,r5,ror#5
2539         add     r9,r9,r3
2540         and     r2,r2,r5
2541         eor     r3,r0,r5,ror#19
2542         eor     r0,r9,r9,ror#11
2543         eor     r2,r2,r7
2544         add     r8,r8,r3,ror#6
2545         eor     r3,r9,r10
2546         eor     r0,r0,r9,ror#20
2547         add     r8,r8,r2
2548         ldr     r2,[sp,#48]
2549         and     r12,r12,r3
2550         add     r4,r4,r8
2551         add     r8,r8,r0,ror#2
2552         eor     r12,r12,r10
2553         vst1.32 {q8},[r1,:128]!
2554         add     r7,r7,r2
2555         eor     r2,r5,r6
2556         eor     r0,r4,r4,ror#5
2557         add     r8,r8,r12
2558         vld1.32 {q8},[r14,:128]!
2559         and     r2,r2,r4
2560         eor     r12,r0,r4,ror#19
2561         eor     r0,r8,r8,ror#11
2562         eor     r2,r2,r6
2563         vrev32.8        q3,q3
2564         add     r7,r7,r12,ror#6
2565         eor     r12,r8,r9
2566         eor     r0,r0,r8,ror#20
2567         add     r7,r7,r2
2568         vadd.i32        q8,q8,q3
2569         ldr     r2,[sp,#52]
2570         and     r3,r3,r12
2571         add     r11,r11,r7
2572         add     r7,r7,r0,ror#2
2573         eor     r3,r3,r9
2574         add     r6,r6,r2
2575         eor     r2,r4,r5
2576         eor     r0,r11,r11,ror#5
2577         add     r7,r7,r3
2578         and     r2,r2,r11
2579         eor     r3,r0,r11,ror#19
2580         eor     r0,r7,r7,ror#11
2581         eor     r2,r2,r5
2582         add     r6,r6,r3,ror#6
2583         eor     r3,r7,r8
2584         eor     r0,r0,r7,ror#20
2585         add     r6,r6,r2
2586         ldr     r2,[sp,#56]
2587         and     r12,r12,r3
2588         add     r10,r10,r6
2589         add     r6,r6,r0,ror#2
2590         eor     r12,r12,r8
2591         add     r5,r5,r2
2592         eor     r2,r11,r4
2593         eor     r0,r10,r10,ror#5
2594         add     r6,r6,r12
2595         and     r2,r2,r10
2596         eor     r12,r0,r10,ror#19
2597         eor     r0,r6,r6,ror#11
2598         eor     r2,r2,r4
2599         add     r5,r5,r12,ror#6
2600         eor     r12,r6,r7
2601         eor     r0,r0,r6,ror#20
2602         add     r5,r5,r2
2603         ldr     r2,[sp,#60]
2604         and     r3,r3,r12
2605         add     r9,r9,r5
2606         add     r5,r5,r0,ror#2
2607         eor     r3,r3,r7
2608         add     r4,r4,r2
2609         eor     r2,r10,r11
2610         eor     r0,r9,r9,ror#5
2611         add     r5,r5,r3
2612         and     r2,r2,r9
2613         eor     r3,r0,r9,ror#19
2614         eor     r0,r5,r5,ror#11
2615         eor     r2,r2,r11
2616         add     r4,r4,r3,ror#6
2617         eor     r3,r5,r6
2618         eor     r0,r0,r5,ror#20
2619         add     r4,r4,r2
2620         ldr     r2,[sp,#64]
2621         and     r12,r12,r3
2622         add     r8,r8,r4
2623         add     r4,r4,r0,ror#2
2624         eor     r12,r12,r6
2625         vst1.32 {q8},[r1,:128]!
2626         ldr     r0,[r2,#0]
2627         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
2628         ldr     r12,[r2,#4]
2629         ldr     r3,[r2,#8]
2630         ldr     r1,[r2,#12]
2631         add     r4,r4,r0                        @ accumulate
2632         ldr     r0,[r2,#16]
2633         add     r5,r5,r12
2634         ldr     r12,[r2,#20]
2635         add     r6,r6,r3
2636         ldr     r3,[r2,#24]
2637         add     r7,r7,r1
2638         ldr     r1,[r2,#28]
2639         add     r8,r8,r0
2640         str     r4,[r2],#4
2641         add     r9,r9,r12
2642         str     r5,[r2],#4
2643         add     r10,r10,r3
2644         str     r6,[r2],#4
2645         add     r11,r11,r1
2646         str     r7,[r2],#4
2647         stmia   r2,{r8-r11}
2648
2649         ittte   ne
2650         movne   r1,sp
2651         ldrne   r2,[sp,#0]
2652         eorne   r12,r12,r12
2653         ldreq   sp,[sp,#76]                     @ restore original sp
2654         itt     ne
2655         eorne   r3,r5,r6
2656         bne     .L_00_48
2657
2658         ldmia   sp!,{r4-r12,pc}
2659 .size   sha256_block_data_order_neon,.-sha256_block_data_order_neon
2660 #endif
2661 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2662
2663 # ifdef __thumb2__
2664 #  define INST(a,b,c,d) .byte   c,d|0xc,a,b
2665 # else
2666 #  define INST(a,b,c,d) .byte   a,b,c,d
2667 # endif
2668
2669 .type   sha256_block_data_order_armv8,%function
2670 .align  5
2671 sha256_block_data_order_armv8:
2672 .LARMv8:
2673         vld1.32 {q0,q1},[r0]
2674 # ifdef __thumb2__
2675         adr     r3,.LARMv8
2676         sub     r3,r3,#.LARMv8-K256
2677 # else
2678         adrl    r3,K256
2679 # endif
2680         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
2681
2682 .Loop_v8:
2683         vld1.8          {q8-q9},[r1]!
2684         vld1.8          {q10-q11},[r1]!
2685         vld1.32         {q12},[r3]!
2686         vrev32.8        q8,q8
2687         vrev32.8        q9,q9
2688         vrev32.8        q10,q10
2689         vrev32.8        q11,q11
2690         vmov            q14,q0  @ offload
2691         vmov            q15,q1
2692         teq             r1,r2
2693         vld1.32         {q13},[r3]!
2694         vadd.i32        q12,q12,q8
2695         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2696         vmov            q2,q0
2697         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2698         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2699         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2700         vld1.32         {q12},[r3]!
2701         vadd.i32        q13,q13,q9
2702         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2703         vmov            q2,q0
2704         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2705         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2706         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2707         vld1.32         {q13},[r3]!
2708         vadd.i32        q12,q12,q10
2709         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2710         vmov            q2,q0
2711         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2712         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2713         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2714         vld1.32         {q12},[r3]!
2715         vadd.i32        q13,q13,q11
2716         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2717         vmov            q2,q0
2718         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2719         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2720         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2721         vld1.32         {q13},[r3]!
2722         vadd.i32        q12,q12,q8
2723         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2724         vmov            q2,q0
2725         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2726         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2727         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2728         vld1.32         {q12},[r3]!
2729         vadd.i32        q13,q13,q9
2730         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2731         vmov            q2,q0
2732         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2733         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2734         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2735         vld1.32         {q13},[r3]!
2736         vadd.i32        q12,q12,q10
2737         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2738         vmov            q2,q0
2739         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2740         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2741         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2742         vld1.32         {q12},[r3]!
2743         vadd.i32        q13,q13,q11
2744         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2745         vmov            q2,q0
2746         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2747         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2748         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2749         vld1.32         {q13},[r3]!
2750         vadd.i32        q12,q12,q8
2751         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2752         vmov            q2,q0
2753         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2754         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2755         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2756         vld1.32         {q12},[r3]!
2757         vadd.i32        q13,q13,q9
2758         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2759         vmov            q2,q0
2760         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2761         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2762         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2763         vld1.32         {q13},[r3]!
2764         vadd.i32        q12,q12,q10
2765         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2766         vmov            q2,q0
2767         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2768         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2769         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2770         vld1.32         {q12},[r3]!
2771         vadd.i32        q13,q13,q11
2772         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2773         vmov            q2,q0
2774         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2775         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2776         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2777         vld1.32         {q13},[r3]!
2778         vadd.i32        q12,q12,q8
2779         vmov            q2,q0
2780         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2781         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2782
2783         vld1.32         {q12},[r3]!
2784         vadd.i32        q13,q13,q9
2785         vmov            q2,q0
2786         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2787         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2788
2789         vld1.32         {q13},[r3]
2790         vadd.i32        q12,q12,q10
2791         sub             r3,r3,#256-16   @ rewind
2792         vmov            q2,q0
2793         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2794         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2795
2796         vadd.i32        q13,q13,q11
2797         vmov            q2,q0
2798         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2799         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2800
2801         vadd.i32        q0,q0,q14
2802         vadd.i32        q1,q1,q15
2803         it              ne
2804         bne             .Loop_v8
2805
2806         vst1.32         {q0,q1},[r0]
2807
2808         bx      lr              @ bx lr
2809 .size   sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2810 #endif
2811 .asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2812 .align  2
2813 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2814 .comm   OPENSSL_armcap_P,4,4
2815 #endif