Merge remote-tracking branch 'spi/fix/core' into spi-linus
[sfrench/cifs-2.6.git] / arch / arm / crypto / sha256-core.S_shipped
1
2 @ ====================================================================
3 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
4 @ project. The module is, however, dual licensed under OpenSSL and
5 @ CRYPTOGAMS licenses depending on where you obtain it. For further
6 @ details see http://www.openssl.org/~appro/cryptogams/.
7 @
8 @ Permission to use under GPL terms is granted.
9 @ ====================================================================
10
11 @ SHA256 block procedure for ARMv4. May 2007.
12
13 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
14 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
15 @ byte [on single-issue Xscale PXA250 core].
16
17 @ July 2010.
18 @
19 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
20 @ Cortex A8 core and ~20 cycles per processed byte.
21
22 @ February 2011.
23 @
24 @ Profiler-assisted and platform-specific optimization resulted in 16%
25 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
26
27 @ September 2013.
28 @
29 @ Add NEON implementation. On Cortex A8 it was measured to process one
30 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
31 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
32 @ code (meaning that latter performs sub-optimally, nothing was done
33 @ about it).
34
35 @ May 2014.
36 @
37 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
38
39 #ifndef __KERNEL__
40 # include "arm_arch.h"
41 #else
42 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
43 # define __ARM_MAX_ARCH__ 7
44 #endif
45
46 .text
47 #if __ARM_ARCH__<7
48 .code   32
49 #else
50 .syntax unified
51 # ifdef __thumb2__
52 #  define adrl adr
53 .thumb
54 # else
55 .code   32
56 # endif
57 #endif
58
59 .type   K256,%object
60 .align  5
61 K256:
62 .word   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
63 .word   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
64 .word   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
65 .word   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
66 .word   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
67 .word   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
68 .word   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
69 .word   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
70 .word   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
71 .word   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
72 .word   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
73 .word   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
74 .word   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
75 .word   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
76 .word   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
77 .word   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
78 .size   K256,.-K256
79 .word   0                               @ terminator
80 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
81 .LOPENSSL_armcap:
82 .word   OPENSSL_armcap_P-sha256_block_data_order
83 #endif
84 .align  5
85
86 .global sha256_block_data_order
87 .type   sha256_block_data_order,%function
88 sha256_block_data_order:
89 #if __ARM_ARCH__<7
90         sub     r3,pc,#8                @ sha256_block_data_order
91 #else
92         adr     r3,sha256_block_data_order
93 #endif
94 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
95         ldr     r12,.LOPENSSL_armcap
96         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
97         tst     r12,#ARMV8_SHA256
98         bne     .LARMv8
99         tst     r12,#ARMV7_NEON
100         bne     .LNEON
101 #endif
102         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
103         stmdb   sp!,{r0,r1,r2,r4-r11,lr}
104         ldmia   r0,{r4,r5,r6,r7,r8,r9,r10,r11}
105         sub     r14,r3,#256+32  @ K256
106         sub     sp,sp,#16*4             @ alloca(X[16])
107 .Loop:
108 # if __ARM_ARCH__>=7
109         ldr     r2,[r1],#4
110 # else
111         ldrb    r2,[r1,#3]
112 # endif
113         eor     r3,r5,r6                @ magic
114         eor     r12,r12,r12
115 #if __ARM_ARCH__>=7
116         @ ldr   r2,[r1],#4                      @ 0
117 # if 0==15
118         str     r1,[sp,#17*4]                   @ make room for r1
119 # endif
120         eor     r0,r8,r8,ror#5
121         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
122         eor     r0,r0,r8,ror#19 @ Sigma1(e)
123 # ifndef __ARMEB__
124         rev     r2,r2
125 # endif
126 #else
127         @ ldrb  r2,[r1,#3]                      @ 0
128         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
129         ldrb    r12,[r1,#2]
130         ldrb    r0,[r1,#1]
131         orr     r2,r2,r12,lsl#8
132         ldrb    r12,[r1],#4
133         orr     r2,r2,r0,lsl#16
134 # if 0==15
135         str     r1,[sp,#17*4]                   @ make room for r1
136 # endif
137         eor     r0,r8,r8,ror#5
138         orr     r2,r2,r12,lsl#24
139         eor     r0,r0,r8,ror#19 @ Sigma1(e)
140 #endif
141         ldr     r12,[r14],#4                    @ *K256++
142         add     r11,r11,r2                      @ h+=X[i]
143         str     r2,[sp,#0*4]
144         eor     r2,r9,r10
145         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
146         and     r2,r2,r8
147         add     r11,r11,r12                     @ h+=K256[i]
148         eor     r2,r2,r10                       @ Ch(e,f,g)
149         eor     r0,r4,r4,ror#11
150         add     r11,r11,r2                      @ h+=Ch(e,f,g)
151 #if 0==31
152         and     r12,r12,#0xff
153         cmp     r12,#0xf2                       @ done?
154 #endif
155 #if 0<15
156 # if __ARM_ARCH__>=7
157         ldr     r2,[r1],#4                      @ prefetch
158 # else
159         ldrb    r2,[r1,#3]
160 # endif
161         eor     r12,r4,r5                       @ a^b, b^c in next round
162 #else
163         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
164         eor     r12,r4,r5                       @ a^b, b^c in next round
165         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
166 #endif
167         eor     r0,r0,r4,ror#20 @ Sigma0(a)
168         and     r3,r3,r12                       @ (b^c)&=(a^b)
169         add     r7,r7,r11                       @ d+=h
170         eor     r3,r3,r5                        @ Maj(a,b,c)
171         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
172         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
173 #if __ARM_ARCH__>=7
174         @ ldr   r2,[r1],#4                      @ 1
175 # if 1==15
176         str     r1,[sp,#17*4]                   @ make room for r1
177 # endif
178         eor     r0,r7,r7,ror#5
179         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
180         eor     r0,r0,r7,ror#19 @ Sigma1(e)
181 # ifndef __ARMEB__
182         rev     r2,r2
183 # endif
184 #else
185         @ ldrb  r2,[r1,#3]                      @ 1
186         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
187         ldrb    r3,[r1,#2]
188         ldrb    r0,[r1,#1]
189         orr     r2,r2,r3,lsl#8
190         ldrb    r3,[r1],#4
191         orr     r2,r2,r0,lsl#16
192 # if 1==15
193         str     r1,[sp,#17*4]                   @ make room for r1
194 # endif
195         eor     r0,r7,r7,ror#5
196         orr     r2,r2,r3,lsl#24
197         eor     r0,r0,r7,ror#19 @ Sigma1(e)
198 #endif
199         ldr     r3,[r14],#4                     @ *K256++
200         add     r10,r10,r2                      @ h+=X[i]
201         str     r2,[sp,#1*4]
202         eor     r2,r8,r9
203         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
204         and     r2,r2,r7
205         add     r10,r10,r3                      @ h+=K256[i]
206         eor     r2,r2,r9                        @ Ch(e,f,g)
207         eor     r0,r11,r11,ror#11
208         add     r10,r10,r2                      @ h+=Ch(e,f,g)
209 #if 1==31
210         and     r3,r3,#0xff
211         cmp     r3,#0xf2                        @ done?
212 #endif
213 #if 1<15
214 # if __ARM_ARCH__>=7
215         ldr     r2,[r1],#4                      @ prefetch
216 # else
217         ldrb    r2,[r1,#3]
218 # endif
219         eor     r3,r11,r4                       @ a^b, b^c in next round
220 #else
221         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
222         eor     r3,r11,r4                       @ a^b, b^c in next round
223         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
224 #endif
225         eor     r0,r0,r11,ror#20        @ Sigma0(a)
226         and     r12,r12,r3                      @ (b^c)&=(a^b)
227         add     r6,r6,r10                       @ d+=h
228         eor     r12,r12,r4                      @ Maj(a,b,c)
229         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
230         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
231 #if __ARM_ARCH__>=7
232         @ ldr   r2,[r1],#4                      @ 2
233 # if 2==15
234         str     r1,[sp,#17*4]                   @ make room for r1
235 # endif
236         eor     r0,r6,r6,ror#5
237         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
238         eor     r0,r0,r6,ror#19 @ Sigma1(e)
239 # ifndef __ARMEB__
240         rev     r2,r2
241 # endif
242 #else
243         @ ldrb  r2,[r1,#3]                      @ 2
244         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
245         ldrb    r12,[r1,#2]
246         ldrb    r0,[r1,#1]
247         orr     r2,r2,r12,lsl#8
248         ldrb    r12,[r1],#4
249         orr     r2,r2,r0,lsl#16
250 # if 2==15
251         str     r1,[sp,#17*4]                   @ make room for r1
252 # endif
253         eor     r0,r6,r6,ror#5
254         orr     r2,r2,r12,lsl#24
255         eor     r0,r0,r6,ror#19 @ Sigma1(e)
256 #endif
257         ldr     r12,[r14],#4                    @ *K256++
258         add     r9,r9,r2                        @ h+=X[i]
259         str     r2,[sp,#2*4]
260         eor     r2,r7,r8
261         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
262         and     r2,r2,r6
263         add     r9,r9,r12                       @ h+=K256[i]
264         eor     r2,r2,r8                        @ Ch(e,f,g)
265         eor     r0,r10,r10,ror#11
266         add     r9,r9,r2                        @ h+=Ch(e,f,g)
267 #if 2==31
268         and     r12,r12,#0xff
269         cmp     r12,#0xf2                       @ done?
270 #endif
271 #if 2<15
272 # if __ARM_ARCH__>=7
273         ldr     r2,[r1],#4                      @ prefetch
274 # else
275         ldrb    r2,[r1,#3]
276 # endif
277         eor     r12,r10,r11                     @ a^b, b^c in next round
278 #else
279         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
280         eor     r12,r10,r11                     @ a^b, b^c in next round
281         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
282 #endif
283         eor     r0,r0,r10,ror#20        @ Sigma0(a)
284         and     r3,r3,r12                       @ (b^c)&=(a^b)
285         add     r5,r5,r9                        @ d+=h
286         eor     r3,r3,r11                       @ Maj(a,b,c)
287         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
288         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
289 #if __ARM_ARCH__>=7
290         @ ldr   r2,[r1],#4                      @ 3
291 # if 3==15
292         str     r1,[sp,#17*4]                   @ make room for r1
293 # endif
294         eor     r0,r5,r5,ror#5
295         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
296         eor     r0,r0,r5,ror#19 @ Sigma1(e)
297 # ifndef __ARMEB__
298         rev     r2,r2
299 # endif
300 #else
301         @ ldrb  r2,[r1,#3]                      @ 3
302         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
303         ldrb    r3,[r1,#2]
304         ldrb    r0,[r1,#1]
305         orr     r2,r2,r3,lsl#8
306         ldrb    r3,[r1],#4
307         orr     r2,r2,r0,lsl#16
308 # if 3==15
309         str     r1,[sp,#17*4]                   @ make room for r1
310 # endif
311         eor     r0,r5,r5,ror#5
312         orr     r2,r2,r3,lsl#24
313         eor     r0,r0,r5,ror#19 @ Sigma1(e)
314 #endif
315         ldr     r3,[r14],#4                     @ *K256++
316         add     r8,r8,r2                        @ h+=X[i]
317         str     r2,[sp,#3*4]
318         eor     r2,r6,r7
319         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
320         and     r2,r2,r5
321         add     r8,r8,r3                        @ h+=K256[i]
322         eor     r2,r2,r7                        @ Ch(e,f,g)
323         eor     r0,r9,r9,ror#11
324         add     r8,r8,r2                        @ h+=Ch(e,f,g)
325 #if 3==31
326         and     r3,r3,#0xff
327         cmp     r3,#0xf2                        @ done?
328 #endif
329 #if 3<15
330 # if __ARM_ARCH__>=7
331         ldr     r2,[r1],#4                      @ prefetch
332 # else
333         ldrb    r2,[r1,#3]
334 # endif
335         eor     r3,r9,r10                       @ a^b, b^c in next round
336 #else
337         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
338         eor     r3,r9,r10                       @ a^b, b^c in next round
339         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
340 #endif
341         eor     r0,r0,r9,ror#20 @ Sigma0(a)
342         and     r12,r12,r3                      @ (b^c)&=(a^b)
343         add     r4,r4,r8                        @ d+=h
344         eor     r12,r12,r10                     @ Maj(a,b,c)
345         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
346         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
347 #if __ARM_ARCH__>=7
348         @ ldr   r2,[r1],#4                      @ 4
349 # if 4==15
350         str     r1,[sp,#17*4]                   @ make room for r1
351 # endif
352         eor     r0,r4,r4,ror#5
353         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
354         eor     r0,r0,r4,ror#19 @ Sigma1(e)
355 # ifndef __ARMEB__
356         rev     r2,r2
357 # endif
358 #else
359         @ ldrb  r2,[r1,#3]                      @ 4
360         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
361         ldrb    r12,[r1,#2]
362         ldrb    r0,[r1,#1]
363         orr     r2,r2,r12,lsl#8
364         ldrb    r12,[r1],#4
365         orr     r2,r2,r0,lsl#16
366 # if 4==15
367         str     r1,[sp,#17*4]                   @ make room for r1
368 # endif
369         eor     r0,r4,r4,ror#5
370         orr     r2,r2,r12,lsl#24
371         eor     r0,r0,r4,ror#19 @ Sigma1(e)
372 #endif
373         ldr     r12,[r14],#4                    @ *K256++
374         add     r7,r7,r2                        @ h+=X[i]
375         str     r2,[sp,#4*4]
376         eor     r2,r5,r6
377         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
378         and     r2,r2,r4
379         add     r7,r7,r12                       @ h+=K256[i]
380         eor     r2,r2,r6                        @ Ch(e,f,g)
381         eor     r0,r8,r8,ror#11
382         add     r7,r7,r2                        @ h+=Ch(e,f,g)
383 #if 4==31
384         and     r12,r12,#0xff
385         cmp     r12,#0xf2                       @ done?
386 #endif
387 #if 4<15
388 # if __ARM_ARCH__>=7
389         ldr     r2,[r1],#4                      @ prefetch
390 # else
391         ldrb    r2,[r1,#3]
392 # endif
393         eor     r12,r8,r9                       @ a^b, b^c in next round
394 #else
395         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
396         eor     r12,r8,r9                       @ a^b, b^c in next round
397         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
398 #endif
399         eor     r0,r0,r8,ror#20 @ Sigma0(a)
400         and     r3,r3,r12                       @ (b^c)&=(a^b)
401         add     r11,r11,r7                      @ d+=h
402         eor     r3,r3,r9                        @ Maj(a,b,c)
403         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
404         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
405 #if __ARM_ARCH__>=7
406         @ ldr   r2,[r1],#4                      @ 5
407 # if 5==15
408         str     r1,[sp,#17*4]                   @ make room for r1
409 # endif
410         eor     r0,r11,r11,ror#5
411         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
412         eor     r0,r0,r11,ror#19        @ Sigma1(e)
413 # ifndef __ARMEB__
414         rev     r2,r2
415 # endif
416 #else
417         @ ldrb  r2,[r1,#3]                      @ 5
418         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
419         ldrb    r3,[r1,#2]
420         ldrb    r0,[r1,#1]
421         orr     r2,r2,r3,lsl#8
422         ldrb    r3,[r1],#4
423         orr     r2,r2,r0,lsl#16
424 # if 5==15
425         str     r1,[sp,#17*4]                   @ make room for r1
426 # endif
427         eor     r0,r11,r11,ror#5
428         orr     r2,r2,r3,lsl#24
429         eor     r0,r0,r11,ror#19        @ Sigma1(e)
430 #endif
431         ldr     r3,[r14],#4                     @ *K256++
432         add     r6,r6,r2                        @ h+=X[i]
433         str     r2,[sp,#5*4]
434         eor     r2,r4,r5
435         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
436         and     r2,r2,r11
437         add     r6,r6,r3                        @ h+=K256[i]
438         eor     r2,r2,r5                        @ Ch(e,f,g)
439         eor     r0,r7,r7,ror#11
440         add     r6,r6,r2                        @ h+=Ch(e,f,g)
441 #if 5==31
442         and     r3,r3,#0xff
443         cmp     r3,#0xf2                        @ done?
444 #endif
445 #if 5<15
446 # if __ARM_ARCH__>=7
447         ldr     r2,[r1],#4                      @ prefetch
448 # else
449         ldrb    r2,[r1,#3]
450 # endif
451         eor     r3,r7,r8                        @ a^b, b^c in next round
452 #else
453         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
454         eor     r3,r7,r8                        @ a^b, b^c in next round
455         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
456 #endif
457         eor     r0,r0,r7,ror#20 @ Sigma0(a)
458         and     r12,r12,r3                      @ (b^c)&=(a^b)
459         add     r10,r10,r6                      @ d+=h
460         eor     r12,r12,r8                      @ Maj(a,b,c)
461         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
462         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
463 #if __ARM_ARCH__>=7
464         @ ldr   r2,[r1],#4                      @ 6
465 # if 6==15
466         str     r1,[sp,#17*4]                   @ make room for r1
467 # endif
468         eor     r0,r10,r10,ror#5
469         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
470         eor     r0,r0,r10,ror#19        @ Sigma1(e)
471 # ifndef __ARMEB__
472         rev     r2,r2
473 # endif
474 #else
475         @ ldrb  r2,[r1,#3]                      @ 6
476         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
477         ldrb    r12,[r1,#2]
478         ldrb    r0,[r1,#1]
479         orr     r2,r2,r12,lsl#8
480         ldrb    r12,[r1],#4
481         orr     r2,r2,r0,lsl#16
482 # if 6==15
483         str     r1,[sp,#17*4]                   @ make room for r1
484 # endif
485         eor     r0,r10,r10,ror#5
486         orr     r2,r2,r12,lsl#24
487         eor     r0,r0,r10,ror#19        @ Sigma1(e)
488 #endif
489         ldr     r12,[r14],#4                    @ *K256++
490         add     r5,r5,r2                        @ h+=X[i]
491         str     r2,[sp,#6*4]
492         eor     r2,r11,r4
493         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
494         and     r2,r2,r10
495         add     r5,r5,r12                       @ h+=K256[i]
496         eor     r2,r2,r4                        @ Ch(e,f,g)
497         eor     r0,r6,r6,ror#11
498         add     r5,r5,r2                        @ h+=Ch(e,f,g)
499 #if 6==31
500         and     r12,r12,#0xff
501         cmp     r12,#0xf2                       @ done?
502 #endif
503 #if 6<15
504 # if __ARM_ARCH__>=7
505         ldr     r2,[r1],#4                      @ prefetch
506 # else
507         ldrb    r2,[r1,#3]
508 # endif
509         eor     r12,r6,r7                       @ a^b, b^c in next round
510 #else
511         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
512         eor     r12,r6,r7                       @ a^b, b^c in next round
513         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
514 #endif
515         eor     r0,r0,r6,ror#20 @ Sigma0(a)
516         and     r3,r3,r12                       @ (b^c)&=(a^b)
517         add     r9,r9,r5                        @ d+=h
518         eor     r3,r3,r7                        @ Maj(a,b,c)
519         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
520         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
521 #if __ARM_ARCH__>=7
522         @ ldr   r2,[r1],#4                      @ 7
523 # if 7==15
524         str     r1,[sp,#17*4]                   @ make room for r1
525 # endif
526         eor     r0,r9,r9,ror#5
527         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
528         eor     r0,r0,r9,ror#19 @ Sigma1(e)
529 # ifndef __ARMEB__
530         rev     r2,r2
531 # endif
532 #else
533         @ ldrb  r2,[r1,#3]                      @ 7
534         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
535         ldrb    r3,[r1,#2]
536         ldrb    r0,[r1,#1]
537         orr     r2,r2,r3,lsl#8
538         ldrb    r3,[r1],#4
539         orr     r2,r2,r0,lsl#16
540 # if 7==15
541         str     r1,[sp,#17*4]                   @ make room for r1
542 # endif
543         eor     r0,r9,r9,ror#5
544         orr     r2,r2,r3,lsl#24
545         eor     r0,r0,r9,ror#19 @ Sigma1(e)
546 #endif
547         ldr     r3,[r14],#4                     @ *K256++
548         add     r4,r4,r2                        @ h+=X[i]
549         str     r2,[sp,#7*4]
550         eor     r2,r10,r11
551         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
552         and     r2,r2,r9
553         add     r4,r4,r3                        @ h+=K256[i]
554         eor     r2,r2,r11                       @ Ch(e,f,g)
555         eor     r0,r5,r5,ror#11
556         add     r4,r4,r2                        @ h+=Ch(e,f,g)
557 #if 7==31
558         and     r3,r3,#0xff
559         cmp     r3,#0xf2                        @ done?
560 #endif
561 #if 7<15
562 # if __ARM_ARCH__>=7
563         ldr     r2,[r1],#4                      @ prefetch
564 # else
565         ldrb    r2,[r1,#3]
566 # endif
567         eor     r3,r5,r6                        @ a^b, b^c in next round
568 #else
569         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
570         eor     r3,r5,r6                        @ a^b, b^c in next round
571         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
572 #endif
573         eor     r0,r0,r5,ror#20 @ Sigma0(a)
574         and     r12,r12,r3                      @ (b^c)&=(a^b)
575         add     r8,r8,r4                        @ d+=h
576         eor     r12,r12,r6                      @ Maj(a,b,c)
577         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
578         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
579 #if __ARM_ARCH__>=7
580         @ ldr   r2,[r1],#4                      @ 8
581 # if 8==15
582         str     r1,[sp,#17*4]                   @ make room for r1
583 # endif
584         eor     r0,r8,r8,ror#5
585         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
586         eor     r0,r0,r8,ror#19 @ Sigma1(e)
587 # ifndef __ARMEB__
588         rev     r2,r2
589 # endif
590 #else
591         @ ldrb  r2,[r1,#3]                      @ 8
592         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
593         ldrb    r12,[r1,#2]
594         ldrb    r0,[r1,#1]
595         orr     r2,r2,r12,lsl#8
596         ldrb    r12,[r1],#4
597         orr     r2,r2,r0,lsl#16
598 # if 8==15
599         str     r1,[sp,#17*4]                   @ make room for r1
600 # endif
601         eor     r0,r8,r8,ror#5
602         orr     r2,r2,r12,lsl#24
603         eor     r0,r0,r8,ror#19 @ Sigma1(e)
604 #endif
605         ldr     r12,[r14],#4                    @ *K256++
606         add     r11,r11,r2                      @ h+=X[i]
607         str     r2,[sp,#8*4]
608         eor     r2,r9,r10
609         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
610         and     r2,r2,r8
611         add     r11,r11,r12                     @ h+=K256[i]
612         eor     r2,r2,r10                       @ Ch(e,f,g)
613         eor     r0,r4,r4,ror#11
614         add     r11,r11,r2                      @ h+=Ch(e,f,g)
615 #if 8==31
616         and     r12,r12,#0xff
617         cmp     r12,#0xf2                       @ done?
618 #endif
619 #if 8<15
620 # if __ARM_ARCH__>=7
621         ldr     r2,[r1],#4                      @ prefetch
622 # else
623         ldrb    r2,[r1,#3]
624 # endif
625         eor     r12,r4,r5                       @ a^b, b^c in next round
626 #else
627         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
628         eor     r12,r4,r5                       @ a^b, b^c in next round
629         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
630 #endif
631         eor     r0,r0,r4,ror#20 @ Sigma0(a)
632         and     r3,r3,r12                       @ (b^c)&=(a^b)
633         add     r7,r7,r11                       @ d+=h
634         eor     r3,r3,r5                        @ Maj(a,b,c)
635         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
636         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
637 #if __ARM_ARCH__>=7
638         @ ldr   r2,[r1],#4                      @ 9
639 # if 9==15
640         str     r1,[sp,#17*4]                   @ make room for r1
641 # endif
642         eor     r0,r7,r7,ror#5
643         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
644         eor     r0,r0,r7,ror#19 @ Sigma1(e)
645 # ifndef __ARMEB__
646         rev     r2,r2
647 # endif
648 #else
649         @ ldrb  r2,[r1,#3]                      @ 9
650         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
651         ldrb    r3,[r1,#2]
652         ldrb    r0,[r1,#1]
653         orr     r2,r2,r3,lsl#8
654         ldrb    r3,[r1],#4
655         orr     r2,r2,r0,lsl#16
656 # if 9==15
657         str     r1,[sp,#17*4]                   @ make room for r1
658 # endif
659         eor     r0,r7,r7,ror#5
660         orr     r2,r2,r3,lsl#24
661         eor     r0,r0,r7,ror#19 @ Sigma1(e)
662 #endif
663         ldr     r3,[r14],#4                     @ *K256++
664         add     r10,r10,r2                      @ h+=X[i]
665         str     r2,[sp,#9*4]
666         eor     r2,r8,r9
667         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
668         and     r2,r2,r7
669         add     r10,r10,r3                      @ h+=K256[i]
670         eor     r2,r2,r9                        @ Ch(e,f,g)
671         eor     r0,r11,r11,ror#11
672         add     r10,r10,r2                      @ h+=Ch(e,f,g)
673 #if 9==31
674         and     r3,r3,#0xff
675         cmp     r3,#0xf2                        @ done?
676 #endif
677 #if 9<15
678 # if __ARM_ARCH__>=7
679         ldr     r2,[r1],#4                      @ prefetch
680 # else
681         ldrb    r2,[r1,#3]
682 # endif
683         eor     r3,r11,r4                       @ a^b, b^c in next round
684 #else
685         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
686         eor     r3,r11,r4                       @ a^b, b^c in next round
687         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
688 #endif
689         eor     r0,r0,r11,ror#20        @ Sigma0(a)
690         and     r12,r12,r3                      @ (b^c)&=(a^b)
691         add     r6,r6,r10                       @ d+=h
692         eor     r12,r12,r4                      @ Maj(a,b,c)
693         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
694         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
695 #if __ARM_ARCH__>=7
696         @ ldr   r2,[r1],#4                      @ 10
697 # if 10==15
698         str     r1,[sp,#17*4]                   @ make room for r1
699 # endif
700         eor     r0,r6,r6,ror#5
701         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
702         eor     r0,r0,r6,ror#19 @ Sigma1(e)
703 # ifndef __ARMEB__
704         rev     r2,r2
705 # endif
706 #else
707         @ ldrb  r2,[r1,#3]                      @ 10
708         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
709         ldrb    r12,[r1,#2]
710         ldrb    r0,[r1,#1]
711         orr     r2,r2,r12,lsl#8
712         ldrb    r12,[r1],#4
713         orr     r2,r2,r0,lsl#16
714 # if 10==15
715         str     r1,[sp,#17*4]                   @ make room for r1
716 # endif
717         eor     r0,r6,r6,ror#5
718         orr     r2,r2,r12,lsl#24
719         eor     r0,r0,r6,ror#19 @ Sigma1(e)
720 #endif
721         ldr     r12,[r14],#4                    @ *K256++
722         add     r9,r9,r2                        @ h+=X[i]
723         str     r2,[sp,#10*4]
724         eor     r2,r7,r8
725         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
726         and     r2,r2,r6
727         add     r9,r9,r12                       @ h+=K256[i]
728         eor     r2,r2,r8                        @ Ch(e,f,g)
729         eor     r0,r10,r10,ror#11
730         add     r9,r9,r2                        @ h+=Ch(e,f,g)
731 #if 10==31
732         and     r12,r12,#0xff
733         cmp     r12,#0xf2                       @ done?
734 #endif
735 #if 10<15
736 # if __ARM_ARCH__>=7
737         ldr     r2,[r1],#4                      @ prefetch
738 # else
739         ldrb    r2,[r1,#3]
740 # endif
741         eor     r12,r10,r11                     @ a^b, b^c in next round
742 #else
743         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
744         eor     r12,r10,r11                     @ a^b, b^c in next round
745         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
746 #endif
747         eor     r0,r0,r10,ror#20        @ Sigma0(a)
748         and     r3,r3,r12                       @ (b^c)&=(a^b)
749         add     r5,r5,r9                        @ d+=h
750         eor     r3,r3,r11                       @ Maj(a,b,c)
751         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
752         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
753 #if __ARM_ARCH__>=7
754         @ ldr   r2,[r1],#4                      @ 11
755 # if 11==15
756         str     r1,[sp,#17*4]                   @ make room for r1
757 # endif
758         eor     r0,r5,r5,ror#5
759         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
760         eor     r0,r0,r5,ror#19 @ Sigma1(e)
761 # ifndef __ARMEB__
762         rev     r2,r2
763 # endif
764 #else
765         @ ldrb  r2,[r1,#3]                      @ 11
766         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
767         ldrb    r3,[r1,#2]
768         ldrb    r0,[r1,#1]
769         orr     r2,r2,r3,lsl#8
770         ldrb    r3,[r1],#4
771         orr     r2,r2,r0,lsl#16
772 # if 11==15
773         str     r1,[sp,#17*4]                   @ make room for r1
774 # endif
775         eor     r0,r5,r5,ror#5
776         orr     r2,r2,r3,lsl#24
777         eor     r0,r0,r5,ror#19 @ Sigma1(e)
778 #endif
779         ldr     r3,[r14],#4                     @ *K256++
780         add     r8,r8,r2                        @ h+=X[i]
781         str     r2,[sp,#11*4]
782         eor     r2,r6,r7
783         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
784         and     r2,r2,r5
785         add     r8,r8,r3                        @ h+=K256[i]
786         eor     r2,r2,r7                        @ Ch(e,f,g)
787         eor     r0,r9,r9,ror#11
788         add     r8,r8,r2                        @ h+=Ch(e,f,g)
789 #if 11==31
790         and     r3,r3,#0xff
791         cmp     r3,#0xf2                        @ done?
792 #endif
793 #if 11<15
794 # if __ARM_ARCH__>=7
795         ldr     r2,[r1],#4                      @ prefetch
796 # else
797         ldrb    r2,[r1,#3]
798 # endif
799         eor     r3,r9,r10                       @ a^b, b^c in next round
800 #else
801         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
802         eor     r3,r9,r10                       @ a^b, b^c in next round
803         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
804 #endif
805         eor     r0,r0,r9,ror#20 @ Sigma0(a)
806         and     r12,r12,r3                      @ (b^c)&=(a^b)
807         add     r4,r4,r8                        @ d+=h
808         eor     r12,r12,r10                     @ Maj(a,b,c)
809         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
810         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
811 #if __ARM_ARCH__>=7
812         @ ldr   r2,[r1],#4                      @ 12
813 # if 12==15
814         str     r1,[sp,#17*4]                   @ make room for r1
815 # endif
816         eor     r0,r4,r4,ror#5
817         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
818         eor     r0,r0,r4,ror#19 @ Sigma1(e)
819 # ifndef __ARMEB__
820         rev     r2,r2
821 # endif
822 #else
823         @ ldrb  r2,[r1,#3]                      @ 12
824         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
825         ldrb    r12,[r1,#2]
826         ldrb    r0,[r1,#1]
827         orr     r2,r2,r12,lsl#8
828         ldrb    r12,[r1],#4
829         orr     r2,r2,r0,lsl#16
830 # if 12==15
831         str     r1,[sp,#17*4]                   @ make room for r1
832 # endif
833         eor     r0,r4,r4,ror#5
834         orr     r2,r2,r12,lsl#24
835         eor     r0,r0,r4,ror#19 @ Sigma1(e)
836 #endif
837         ldr     r12,[r14],#4                    @ *K256++
838         add     r7,r7,r2                        @ h+=X[i]
839         str     r2,[sp,#12*4]
840         eor     r2,r5,r6
841         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
842         and     r2,r2,r4
843         add     r7,r7,r12                       @ h+=K256[i]
844         eor     r2,r2,r6                        @ Ch(e,f,g)
845         eor     r0,r8,r8,ror#11
846         add     r7,r7,r2                        @ h+=Ch(e,f,g)
847 #if 12==31
848         and     r12,r12,#0xff
849         cmp     r12,#0xf2                       @ done?
850 #endif
851 #if 12<15
852 # if __ARM_ARCH__>=7
853         ldr     r2,[r1],#4                      @ prefetch
854 # else
855         ldrb    r2,[r1,#3]
856 # endif
857         eor     r12,r8,r9                       @ a^b, b^c in next round
858 #else
859         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
860         eor     r12,r8,r9                       @ a^b, b^c in next round
861         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
862 #endif
863         eor     r0,r0,r8,ror#20 @ Sigma0(a)
864         and     r3,r3,r12                       @ (b^c)&=(a^b)
865         add     r11,r11,r7                      @ d+=h
866         eor     r3,r3,r9                        @ Maj(a,b,c)
867         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
868         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
869 #if __ARM_ARCH__>=7
870         @ ldr   r2,[r1],#4                      @ 13
871 # if 13==15
872         str     r1,[sp,#17*4]                   @ make room for r1
873 # endif
874         eor     r0,r11,r11,ror#5
875         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
876         eor     r0,r0,r11,ror#19        @ Sigma1(e)
877 # ifndef __ARMEB__
878         rev     r2,r2
879 # endif
880 #else
881         @ ldrb  r2,[r1,#3]                      @ 13
882         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
883         ldrb    r3,[r1,#2]
884         ldrb    r0,[r1,#1]
885         orr     r2,r2,r3,lsl#8
886         ldrb    r3,[r1],#4
887         orr     r2,r2,r0,lsl#16
888 # if 13==15
889         str     r1,[sp,#17*4]                   @ make room for r1
890 # endif
891         eor     r0,r11,r11,ror#5
892         orr     r2,r2,r3,lsl#24
893         eor     r0,r0,r11,ror#19        @ Sigma1(e)
894 #endif
895         ldr     r3,[r14],#4                     @ *K256++
896         add     r6,r6,r2                        @ h+=X[i]
897         str     r2,[sp,#13*4]
898         eor     r2,r4,r5
899         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
900         and     r2,r2,r11
901         add     r6,r6,r3                        @ h+=K256[i]
902         eor     r2,r2,r5                        @ Ch(e,f,g)
903         eor     r0,r7,r7,ror#11
904         add     r6,r6,r2                        @ h+=Ch(e,f,g)
905 #if 13==31
906         and     r3,r3,#0xff
907         cmp     r3,#0xf2                        @ done?
908 #endif
909 #if 13<15
910 # if __ARM_ARCH__>=7
911         ldr     r2,[r1],#4                      @ prefetch
912 # else
913         ldrb    r2,[r1,#3]
914 # endif
915         eor     r3,r7,r8                        @ a^b, b^c in next round
916 #else
917         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
918         eor     r3,r7,r8                        @ a^b, b^c in next round
919         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
920 #endif
921         eor     r0,r0,r7,ror#20 @ Sigma0(a)
922         and     r12,r12,r3                      @ (b^c)&=(a^b)
923         add     r10,r10,r6                      @ d+=h
924         eor     r12,r12,r8                      @ Maj(a,b,c)
925         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
926         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
927 #if __ARM_ARCH__>=7
928         @ ldr   r2,[r1],#4                      @ 14
929 # if 14==15
930         str     r1,[sp,#17*4]                   @ make room for r1
931 # endif
932         eor     r0,r10,r10,ror#5
933         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
934         eor     r0,r0,r10,ror#19        @ Sigma1(e)
935 # ifndef __ARMEB__
936         rev     r2,r2
937 # endif
938 #else
939         @ ldrb  r2,[r1,#3]                      @ 14
940         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
941         ldrb    r12,[r1,#2]
942         ldrb    r0,[r1,#1]
943         orr     r2,r2,r12,lsl#8
944         ldrb    r12,[r1],#4
945         orr     r2,r2,r0,lsl#16
946 # if 14==15
947         str     r1,[sp,#17*4]                   @ make room for r1
948 # endif
949         eor     r0,r10,r10,ror#5
950         orr     r2,r2,r12,lsl#24
951         eor     r0,r0,r10,ror#19        @ Sigma1(e)
952 #endif
953         ldr     r12,[r14],#4                    @ *K256++
954         add     r5,r5,r2                        @ h+=X[i]
955         str     r2,[sp,#14*4]
956         eor     r2,r11,r4
957         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
958         and     r2,r2,r10
959         add     r5,r5,r12                       @ h+=K256[i]
960         eor     r2,r2,r4                        @ Ch(e,f,g)
961         eor     r0,r6,r6,ror#11
962         add     r5,r5,r2                        @ h+=Ch(e,f,g)
963 #if 14==31
964         and     r12,r12,#0xff
965         cmp     r12,#0xf2                       @ done?
966 #endif
967 #if 14<15
968 # if __ARM_ARCH__>=7
969         ldr     r2,[r1],#4                      @ prefetch
970 # else
971         ldrb    r2,[r1,#3]
972 # endif
973         eor     r12,r6,r7                       @ a^b, b^c in next round
974 #else
975         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
976         eor     r12,r6,r7                       @ a^b, b^c in next round
977         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
978 #endif
979         eor     r0,r0,r6,ror#20 @ Sigma0(a)
980         and     r3,r3,r12                       @ (b^c)&=(a^b)
981         add     r9,r9,r5                        @ d+=h
982         eor     r3,r3,r7                        @ Maj(a,b,c)
983         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
984         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
985 #if __ARM_ARCH__>=7
986         @ ldr   r2,[r1],#4                      @ 15
987 # if 15==15
988         str     r1,[sp,#17*4]                   @ make room for r1
989 # endif
990         eor     r0,r9,r9,ror#5
991         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
992         eor     r0,r0,r9,ror#19 @ Sigma1(e)
993 # ifndef __ARMEB__
994         rev     r2,r2
995 # endif
996 #else
997         @ ldrb  r2,[r1,#3]                      @ 15
998         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
999         ldrb    r3,[r1,#2]
1000         ldrb    r0,[r1,#1]
1001         orr     r2,r2,r3,lsl#8
1002         ldrb    r3,[r1],#4
1003         orr     r2,r2,r0,lsl#16
1004 # if 15==15
1005         str     r1,[sp,#17*4]                   @ make room for r1
1006 # endif
1007         eor     r0,r9,r9,ror#5
1008         orr     r2,r2,r3,lsl#24
1009         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1010 #endif
1011         ldr     r3,[r14],#4                     @ *K256++
1012         add     r4,r4,r2                        @ h+=X[i]
1013         str     r2,[sp,#15*4]
1014         eor     r2,r10,r11
1015         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1016         and     r2,r2,r9
1017         add     r4,r4,r3                        @ h+=K256[i]
1018         eor     r2,r2,r11                       @ Ch(e,f,g)
1019         eor     r0,r5,r5,ror#11
1020         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1021 #if 15==31
1022         and     r3,r3,#0xff
1023         cmp     r3,#0xf2                        @ done?
1024 #endif
1025 #if 15<15
1026 # if __ARM_ARCH__>=7
1027         ldr     r2,[r1],#4                      @ prefetch
1028 # else
1029         ldrb    r2,[r1,#3]
1030 # endif
1031         eor     r3,r5,r6                        @ a^b, b^c in next round
1032 #else
1033         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1034         eor     r3,r5,r6                        @ a^b, b^c in next round
1035         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1036 #endif
1037         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1038         and     r12,r12,r3                      @ (b^c)&=(a^b)
1039         add     r8,r8,r4                        @ d+=h
1040         eor     r12,r12,r6                      @ Maj(a,b,c)
1041         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1042         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1043 .Lrounds_16_xx:
1044         @ ldr   r2,[sp,#1*4]            @ 16
1045         @ ldr   r1,[sp,#14*4]
1046         mov     r0,r2,ror#7
1047         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1048         mov     r12,r1,ror#17
1049         eor     r0,r0,r2,ror#18
1050         eor     r12,r12,r1,ror#19
1051         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1052         ldr     r2,[sp,#0*4]
1053         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1054         ldr     r1,[sp,#9*4]
1055
1056         add     r12,r12,r0
1057         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1058         add     r2,r2,r12
1059         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1060         add     r2,r2,r1                        @ X[i]
1061         ldr     r12,[r14],#4                    @ *K256++
1062         add     r11,r11,r2                      @ h+=X[i]
1063         str     r2,[sp,#0*4]
1064         eor     r2,r9,r10
1065         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1066         and     r2,r2,r8
1067         add     r11,r11,r12                     @ h+=K256[i]
1068         eor     r2,r2,r10                       @ Ch(e,f,g)
1069         eor     r0,r4,r4,ror#11
1070         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1071 #if 16==31
1072         and     r12,r12,#0xff
1073         cmp     r12,#0xf2                       @ done?
1074 #endif
1075 #if 16<15
1076 # if __ARM_ARCH__>=7
1077         ldr     r2,[r1],#4                      @ prefetch
1078 # else
1079         ldrb    r2,[r1,#3]
1080 # endif
1081         eor     r12,r4,r5                       @ a^b, b^c in next round
1082 #else
1083         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
1084         eor     r12,r4,r5                       @ a^b, b^c in next round
1085         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
1086 #endif
1087         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1088         and     r3,r3,r12                       @ (b^c)&=(a^b)
1089         add     r7,r7,r11                       @ d+=h
1090         eor     r3,r3,r5                        @ Maj(a,b,c)
1091         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1092         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1093         @ ldr   r2,[sp,#2*4]            @ 17
1094         @ ldr   r1,[sp,#15*4]
1095         mov     r0,r2,ror#7
1096         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1097         mov     r3,r1,ror#17
1098         eor     r0,r0,r2,ror#18
1099         eor     r3,r3,r1,ror#19
1100         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1101         ldr     r2,[sp,#1*4]
1102         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1103         ldr     r1,[sp,#10*4]
1104
1105         add     r3,r3,r0
1106         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1107         add     r2,r2,r3
1108         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1109         add     r2,r2,r1                        @ X[i]
1110         ldr     r3,[r14],#4                     @ *K256++
1111         add     r10,r10,r2                      @ h+=X[i]
1112         str     r2,[sp,#1*4]
1113         eor     r2,r8,r9
1114         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1115         and     r2,r2,r7
1116         add     r10,r10,r3                      @ h+=K256[i]
1117         eor     r2,r2,r9                        @ Ch(e,f,g)
1118         eor     r0,r11,r11,ror#11
1119         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1120 #if 17==31
1121         and     r3,r3,#0xff
1122         cmp     r3,#0xf2                        @ done?
1123 #endif
1124 #if 17<15
1125 # if __ARM_ARCH__>=7
1126         ldr     r2,[r1],#4                      @ prefetch
1127 # else
1128         ldrb    r2,[r1,#3]
1129 # endif
1130         eor     r3,r11,r4                       @ a^b, b^c in next round
1131 #else
1132         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
1133         eor     r3,r11,r4                       @ a^b, b^c in next round
1134         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
1135 #endif
1136         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1137         and     r12,r12,r3                      @ (b^c)&=(a^b)
1138         add     r6,r6,r10                       @ d+=h
1139         eor     r12,r12,r4                      @ Maj(a,b,c)
1140         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1141         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1142         @ ldr   r2,[sp,#3*4]            @ 18
1143         @ ldr   r1,[sp,#0*4]
1144         mov     r0,r2,ror#7
1145         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1146         mov     r12,r1,ror#17
1147         eor     r0,r0,r2,ror#18
1148         eor     r12,r12,r1,ror#19
1149         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1150         ldr     r2,[sp,#2*4]
1151         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1152         ldr     r1,[sp,#11*4]
1153
1154         add     r12,r12,r0
1155         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1156         add     r2,r2,r12
1157         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1158         add     r2,r2,r1                        @ X[i]
1159         ldr     r12,[r14],#4                    @ *K256++
1160         add     r9,r9,r2                        @ h+=X[i]
1161         str     r2,[sp,#2*4]
1162         eor     r2,r7,r8
1163         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1164         and     r2,r2,r6
1165         add     r9,r9,r12                       @ h+=K256[i]
1166         eor     r2,r2,r8                        @ Ch(e,f,g)
1167         eor     r0,r10,r10,ror#11
1168         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1169 #if 18==31
1170         and     r12,r12,#0xff
1171         cmp     r12,#0xf2                       @ done?
1172 #endif
1173 #if 18<15
1174 # if __ARM_ARCH__>=7
1175         ldr     r2,[r1],#4                      @ prefetch
1176 # else
1177         ldrb    r2,[r1,#3]
1178 # endif
1179         eor     r12,r10,r11                     @ a^b, b^c in next round
1180 #else
1181         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
1182         eor     r12,r10,r11                     @ a^b, b^c in next round
1183         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
1184 #endif
1185         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1186         and     r3,r3,r12                       @ (b^c)&=(a^b)
1187         add     r5,r5,r9                        @ d+=h
1188         eor     r3,r3,r11                       @ Maj(a,b,c)
1189         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1190         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1191         @ ldr   r2,[sp,#4*4]            @ 19
1192         @ ldr   r1,[sp,#1*4]
1193         mov     r0,r2,ror#7
1194         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1195         mov     r3,r1,ror#17
1196         eor     r0,r0,r2,ror#18
1197         eor     r3,r3,r1,ror#19
1198         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1199         ldr     r2,[sp,#3*4]
1200         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1201         ldr     r1,[sp,#12*4]
1202
1203         add     r3,r3,r0
1204         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1205         add     r2,r2,r3
1206         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1207         add     r2,r2,r1                        @ X[i]
1208         ldr     r3,[r14],#4                     @ *K256++
1209         add     r8,r8,r2                        @ h+=X[i]
1210         str     r2,[sp,#3*4]
1211         eor     r2,r6,r7
1212         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1213         and     r2,r2,r5
1214         add     r8,r8,r3                        @ h+=K256[i]
1215         eor     r2,r2,r7                        @ Ch(e,f,g)
1216         eor     r0,r9,r9,ror#11
1217         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1218 #if 19==31
1219         and     r3,r3,#0xff
1220         cmp     r3,#0xf2                        @ done?
1221 #endif
1222 #if 19<15
1223 # if __ARM_ARCH__>=7
1224         ldr     r2,[r1],#4                      @ prefetch
1225 # else
1226         ldrb    r2,[r1,#3]
1227 # endif
1228         eor     r3,r9,r10                       @ a^b, b^c in next round
1229 #else
1230         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
1231         eor     r3,r9,r10                       @ a^b, b^c in next round
1232         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
1233 #endif
1234         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1235         and     r12,r12,r3                      @ (b^c)&=(a^b)
1236         add     r4,r4,r8                        @ d+=h
1237         eor     r12,r12,r10                     @ Maj(a,b,c)
1238         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1239         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1240         @ ldr   r2,[sp,#5*4]            @ 20
1241         @ ldr   r1,[sp,#2*4]
1242         mov     r0,r2,ror#7
1243         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1244         mov     r12,r1,ror#17
1245         eor     r0,r0,r2,ror#18
1246         eor     r12,r12,r1,ror#19
1247         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1248         ldr     r2,[sp,#4*4]
1249         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1250         ldr     r1,[sp,#13*4]
1251
1252         add     r12,r12,r0
1253         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1254         add     r2,r2,r12
1255         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1256         add     r2,r2,r1                        @ X[i]
1257         ldr     r12,[r14],#4                    @ *K256++
1258         add     r7,r7,r2                        @ h+=X[i]
1259         str     r2,[sp,#4*4]
1260         eor     r2,r5,r6
1261         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1262         and     r2,r2,r4
1263         add     r7,r7,r12                       @ h+=K256[i]
1264         eor     r2,r2,r6                        @ Ch(e,f,g)
1265         eor     r0,r8,r8,ror#11
1266         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1267 #if 20==31
1268         and     r12,r12,#0xff
1269         cmp     r12,#0xf2                       @ done?
1270 #endif
1271 #if 20<15
1272 # if __ARM_ARCH__>=7
1273         ldr     r2,[r1],#4                      @ prefetch
1274 # else
1275         ldrb    r2,[r1,#3]
1276 # endif
1277         eor     r12,r8,r9                       @ a^b, b^c in next round
1278 #else
1279         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
1280         eor     r12,r8,r9                       @ a^b, b^c in next round
1281         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
1282 #endif
1283         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1284         and     r3,r3,r12                       @ (b^c)&=(a^b)
1285         add     r11,r11,r7                      @ d+=h
1286         eor     r3,r3,r9                        @ Maj(a,b,c)
1287         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1288         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1289         @ ldr   r2,[sp,#6*4]            @ 21
1290         @ ldr   r1,[sp,#3*4]
1291         mov     r0,r2,ror#7
1292         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1293         mov     r3,r1,ror#17
1294         eor     r0,r0,r2,ror#18
1295         eor     r3,r3,r1,ror#19
1296         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1297         ldr     r2,[sp,#5*4]
1298         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1299         ldr     r1,[sp,#14*4]
1300
1301         add     r3,r3,r0
1302         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1303         add     r2,r2,r3
1304         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1305         add     r2,r2,r1                        @ X[i]
1306         ldr     r3,[r14],#4                     @ *K256++
1307         add     r6,r6,r2                        @ h+=X[i]
1308         str     r2,[sp,#5*4]
1309         eor     r2,r4,r5
1310         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1311         and     r2,r2,r11
1312         add     r6,r6,r3                        @ h+=K256[i]
1313         eor     r2,r2,r5                        @ Ch(e,f,g)
1314         eor     r0,r7,r7,ror#11
1315         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1316 #if 21==31
1317         and     r3,r3,#0xff
1318         cmp     r3,#0xf2                        @ done?
1319 #endif
1320 #if 21<15
1321 # if __ARM_ARCH__>=7
1322         ldr     r2,[r1],#4                      @ prefetch
1323 # else
1324         ldrb    r2,[r1,#3]
1325 # endif
1326         eor     r3,r7,r8                        @ a^b, b^c in next round
1327 #else
1328         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
1329         eor     r3,r7,r8                        @ a^b, b^c in next round
1330         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
1331 #endif
1332         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1333         and     r12,r12,r3                      @ (b^c)&=(a^b)
1334         add     r10,r10,r6                      @ d+=h
1335         eor     r12,r12,r8                      @ Maj(a,b,c)
1336         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1337         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1338         @ ldr   r2,[sp,#7*4]            @ 22
1339         @ ldr   r1,[sp,#4*4]
1340         mov     r0,r2,ror#7
1341         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1342         mov     r12,r1,ror#17
1343         eor     r0,r0,r2,ror#18
1344         eor     r12,r12,r1,ror#19
1345         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1346         ldr     r2,[sp,#6*4]
1347         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1348         ldr     r1,[sp,#15*4]
1349
1350         add     r12,r12,r0
1351         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1352         add     r2,r2,r12
1353         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1354         add     r2,r2,r1                        @ X[i]
1355         ldr     r12,[r14],#4                    @ *K256++
1356         add     r5,r5,r2                        @ h+=X[i]
1357         str     r2,[sp,#6*4]
1358         eor     r2,r11,r4
1359         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1360         and     r2,r2,r10
1361         add     r5,r5,r12                       @ h+=K256[i]
1362         eor     r2,r2,r4                        @ Ch(e,f,g)
1363         eor     r0,r6,r6,ror#11
1364         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1365 #if 22==31
1366         and     r12,r12,#0xff
1367         cmp     r12,#0xf2                       @ done?
1368 #endif
1369 #if 22<15
1370 # if __ARM_ARCH__>=7
1371         ldr     r2,[r1],#4                      @ prefetch
1372 # else
1373         ldrb    r2,[r1,#3]
1374 # endif
1375         eor     r12,r6,r7                       @ a^b, b^c in next round
1376 #else
1377         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
1378         eor     r12,r6,r7                       @ a^b, b^c in next round
1379         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
1380 #endif
1381         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1382         and     r3,r3,r12                       @ (b^c)&=(a^b)
1383         add     r9,r9,r5                        @ d+=h
1384         eor     r3,r3,r7                        @ Maj(a,b,c)
1385         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1386         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1387         @ ldr   r2,[sp,#8*4]            @ 23
1388         @ ldr   r1,[sp,#5*4]
1389         mov     r0,r2,ror#7
1390         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1391         mov     r3,r1,ror#17
1392         eor     r0,r0,r2,ror#18
1393         eor     r3,r3,r1,ror#19
1394         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1395         ldr     r2,[sp,#7*4]
1396         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1397         ldr     r1,[sp,#0*4]
1398
1399         add     r3,r3,r0
1400         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1401         add     r2,r2,r3
1402         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1403         add     r2,r2,r1                        @ X[i]
1404         ldr     r3,[r14],#4                     @ *K256++
1405         add     r4,r4,r2                        @ h+=X[i]
1406         str     r2,[sp,#7*4]
1407         eor     r2,r10,r11
1408         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1409         and     r2,r2,r9
1410         add     r4,r4,r3                        @ h+=K256[i]
1411         eor     r2,r2,r11                       @ Ch(e,f,g)
1412         eor     r0,r5,r5,ror#11
1413         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1414 #if 23==31
1415         and     r3,r3,#0xff
1416         cmp     r3,#0xf2                        @ done?
1417 #endif
1418 #if 23<15
1419 # if __ARM_ARCH__>=7
1420         ldr     r2,[r1],#4                      @ prefetch
1421 # else
1422         ldrb    r2,[r1,#3]
1423 # endif
1424         eor     r3,r5,r6                        @ a^b, b^c in next round
1425 #else
1426         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
1427         eor     r3,r5,r6                        @ a^b, b^c in next round
1428         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
1429 #endif
1430         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1431         and     r12,r12,r3                      @ (b^c)&=(a^b)
1432         add     r8,r8,r4                        @ d+=h
1433         eor     r12,r12,r6                      @ Maj(a,b,c)
1434         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1435         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1436         @ ldr   r2,[sp,#9*4]            @ 24
1437         @ ldr   r1,[sp,#6*4]
1438         mov     r0,r2,ror#7
1439         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1440         mov     r12,r1,ror#17
1441         eor     r0,r0,r2,ror#18
1442         eor     r12,r12,r1,ror#19
1443         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1444         ldr     r2,[sp,#8*4]
1445         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1446         ldr     r1,[sp,#1*4]
1447
1448         add     r12,r12,r0
1449         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1450         add     r2,r2,r12
1451         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1452         add     r2,r2,r1                        @ X[i]
1453         ldr     r12,[r14],#4                    @ *K256++
1454         add     r11,r11,r2                      @ h+=X[i]
1455         str     r2,[sp,#8*4]
1456         eor     r2,r9,r10
1457         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1458         and     r2,r2,r8
1459         add     r11,r11,r12                     @ h+=K256[i]
1460         eor     r2,r2,r10                       @ Ch(e,f,g)
1461         eor     r0,r4,r4,ror#11
1462         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1463 #if 24==31
1464         and     r12,r12,#0xff
1465         cmp     r12,#0xf2                       @ done?
1466 #endif
1467 #if 24<15
1468 # if __ARM_ARCH__>=7
1469         ldr     r2,[r1],#4                      @ prefetch
1470 # else
1471         ldrb    r2,[r1,#3]
1472 # endif
1473         eor     r12,r4,r5                       @ a^b, b^c in next round
1474 #else
1475         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
1476         eor     r12,r4,r5                       @ a^b, b^c in next round
1477         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
1478 #endif
1479         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1480         and     r3,r3,r12                       @ (b^c)&=(a^b)
1481         add     r7,r7,r11                       @ d+=h
1482         eor     r3,r3,r5                        @ Maj(a,b,c)
1483         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1484         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1485         @ ldr   r2,[sp,#10*4]           @ 25
1486         @ ldr   r1,[sp,#7*4]
1487         mov     r0,r2,ror#7
1488         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1489         mov     r3,r1,ror#17
1490         eor     r0,r0,r2,ror#18
1491         eor     r3,r3,r1,ror#19
1492         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1493         ldr     r2,[sp,#9*4]
1494         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1495         ldr     r1,[sp,#2*4]
1496
1497         add     r3,r3,r0
1498         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1499         add     r2,r2,r3
1500         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1501         add     r2,r2,r1                        @ X[i]
1502         ldr     r3,[r14],#4                     @ *K256++
1503         add     r10,r10,r2                      @ h+=X[i]
1504         str     r2,[sp,#9*4]
1505         eor     r2,r8,r9
1506         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1507         and     r2,r2,r7
1508         add     r10,r10,r3                      @ h+=K256[i]
1509         eor     r2,r2,r9                        @ Ch(e,f,g)
1510         eor     r0,r11,r11,ror#11
1511         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1512 #if 25==31
1513         and     r3,r3,#0xff
1514         cmp     r3,#0xf2                        @ done?
1515 #endif
1516 #if 25<15
1517 # if __ARM_ARCH__>=7
1518         ldr     r2,[r1],#4                      @ prefetch
1519 # else
1520         ldrb    r2,[r1,#3]
1521 # endif
1522         eor     r3,r11,r4                       @ a^b, b^c in next round
1523 #else
1524         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
1525         eor     r3,r11,r4                       @ a^b, b^c in next round
1526         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
1527 #endif
1528         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1529         and     r12,r12,r3                      @ (b^c)&=(a^b)
1530         add     r6,r6,r10                       @ d+=h
1531         eor     r12,r12,r4                      @ Maj(a,b,c)
1532         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1533         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1534         @ ldr   r2,[sp,#11*4]           @ 26
1535         @ ldr   r1,[sp,#8*4]
1536         mov     r0,r2,ror#7
1537         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1538         mov     r12,r1,ror#17
1539         eor     r0,r0,r2,ror#18
1540         eor     r12,r12,r1,ror#19
1541         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1542         ldr     r2,[sp,#10*4]
1543         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1544         ldr     r1,[sp,#3*4]
1545
1546         add     r12,r12,r0
1547         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1548         add     r2,r2,r12
1549         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1550         add     r2,r2,r1                        @ X[i]
1551         ldr     r12,[r14],#4                    @ *K256++
1552         add     r9,r9,r2                        @ h+=X[i]
1553         str     r2,[sp,#10*4]
1554         eor     r2,r7,r8
1555         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1556         and     r2,r2,r6
1557         add     r9,r9,r12                       @ h+=K256[i]
1558         eor     r2,r2,r8                        @ Ch(e,f,g)
1559         eor     r0,r10,r10,ror#11
1560         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1561 #if 26==31
1562         and     r12,r12,#0xff
1563         cmp     r12,#0xf2                       @ done?
1564 #endif
1565 #if 26<15
1566 # if __ARM_ARCH__>=7
1567         ldr     r2,[r1],#4                      @ prefetch
1568 # else
1569         ldrb    r2,[r1,#3]
1570 # endif
1571         eor     r12,r10,r11                     @ a^b, b^c in next round
1572 #else
1573         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
1574         eor     r12,r10,r11                     @ a^b, b^c in next round
1575         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
1576 #endif
1577         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1578         and     r3,r3,r12                       @ (b^c)&=(a^b)
1579         add     r5,r5,r9                        @ d+=h
1580         eor     r3,r3,r11                       @ Maj(a,b,c)
1581         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1582         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1583         @ ldr   r2,[sp,#12*4]           @ 27
1584         @ ldr   r1,[sp,#9*4]
1585         mov     r0,r2,ror#7
1586         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1587         mov     r3,r1,ror#17
1588         eor     r0,r0,r2,ror#18
1589         eor     r3,r3,r1,ror#19
1590         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1591         ldr     r2,[sp,#11*4]
1592         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1593         ldr     r1,[sp,#4*4]
1594
1595         add     r3,r3,r0
1596         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1597         add     r2,r2,r3
1598         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1599         add     r2,r2,r1                        @ X[i]
1600         ldr     r3,[r14],#4                     @ *K256++
1601         add     r8,r8,r2                        @ h+=X[i]
1602         str     r2,[sp,#11*4]
1603         eor     r2,r6,r7
1604         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1605         and     r2,r2,r5
1606         add     r8,r8,r3                        @ h+=K256[i]
1607         eor     r2,r2,r7                        @ Ch(e,f,g)
1608         eor     r0,r9,r9,ror#11
1609         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1610 #if 27==31
1611         and     r3,r3,#0xff
1612         cmp     r3,#0xf2                        @ done?
1613 #endif
1614 #if 27<15
1615 # if __ARM_ARCH__>=7
1616         ldr     r2,[r1],#4                      @ prefetch
1617 # else
1618         ldrb    r2,[r1,#3]
1619 # endif
1620         eor     r3,r9,r10                       @ a^b, b^c in next round
1621 #else
1622         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
1623         eor     r3,r9,r10                       @ a^b, b^c in next round
1624         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
1625 #endif
1626         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1627         and     r12,r12,r3                      @ (b^c)&=(a^b)
1628         add     r4,r4,r8                        @ d+=h
1629         eor     r12,r12,r10                     @ Maj(a,b,c)
1630         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1631         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1632         @ ldr   r2,[sp,#13*4]           @ 28
1633         @ ldr   r1,[sp,#10*4]
1634         mov     r0,r2,ror#7
1635         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1636         mov     r12,r1,ror#17
1637         eor     r0,r0,r2,ror#18
1638         eor     r12,r12,r1,ror#19
1639         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1640         ldr     r2,[sp,#12*4]
1641         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1642         ldr     r1,[sp,#5*4]
1643
1644         add     r12,r12,r0
1645         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1646         add     r2,r2,r12
1647         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1648         add     r2,r2,r1                        @ X[i]
1649         ldr     r12,[r14],#4                    @ *K256++
1650         add     r7,r7,r2                        @ h+=X[i]
1651         str     r2,[sp,#12*4]
1652         eor     r2,r5,r6
1653         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1654         and     r2,r2,r4
1655         add     r7,r7,r12                       @ h+=K256[i]
1656         eor     r2,r2,r6                        @ Ch(e,f,g)
1657         eor     r0,r8,r8,ror#11
1658         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1659 #if 28==31
1660         and     r12,r12,#0xff
1661         cmp     r12,#0xf2                       @ done?
1662 #endif
1663 #if 28<15
1664 # if __ARM_ARCH__>=7
1665         ldr     r2,[r1],#4                      @ prefetch
1666 # else
1667         ldrb    r2,[r1,#3]
1668 # endif
1669         eor     r12,r8,r9                       @ a^b, b^c in next round
1670 #else
1671         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
1672         eor     r12,r8,r9                       @ a^b, b^c in next round
1673         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
1674 #endif
1675         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1676         and     r3,r3,r12                       @ (b^c)&=(a^b)
1677         add     r11,r11,r7                      @ d+=h
1678         eor     r3,r3,r9                        @ Maj(a,b,c)
1679         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1680         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1681         @ ldr   r2,[sp,#14*4]           @ 29
1682         @ ldr   r1,[sp,#11*4]
1683         mov     r0,r2,ror#7
1684         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1685         mov     r3,r1,ror#17
1686         eor     r0,r0,r2,ror#18
1687         eor     r3,r3,r1,ror#19
1688         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1689         ldr     r2,[sp,#13*4]
1690         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1691         ldr     r1,[sp,#6*4]
1692
1693         add     r3,r3,r0
1694         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1695         add     r2,r2,r3
1696         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1697         add     r2,r2,r1                        @ X[i]
1698         ldr     r3,[r14],#4                     @ *K256++
1699         add     r6,r6,r2                        @ h+=X[i]
1700         str     r2,[sp,#13*4]
1701         eor     r2,r4,r5
1702         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1703         and     r2,r2,r11
1704         add     r6,r6,r3                        @ h+=K256[i]
1705         eor     r2,r2,r5                        @ Ch(e,f,g)
1706         eor     r0,r7,r7,ror#11
1707         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1708 #if 29==31
1709         and     r3,r3,#0xff
1710         cmp     r3,#0xf2                        @ done?
1711 #endif
1712 #if 29<15
1713 # if __ARM_ARCH__>=7
1714         ldr     r2,[r1],#4                      @ prefetch
1715 # else
1716         ldrb    r2,[r1,#3]
1717 # endif
1718         eor     r3,r7,r8                        @ a^b, b^c in next round
1719 #else
1720         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
1721         eor     r3,r7,r8                        @ a^b, b^c in next round
1722         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
1723 #endif
1724         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1725         and     r12,r12,r3                      @ (b^c)&=(a^b)
1726         add     r10,r10,r6                      @ d+=h
1727         eor     r12,r12,r8                      @ Maj(a,b,c)
1728         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1729         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1730         @ ldr   r2,[sp,#15*4]           @ 30
1731         @ ldr   r1,[sp,#12*4]
1732         mov     r0,r2,ror#7
1733         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1734         mov     r12,r1,ror#17
1735         eor     r0,r0,r2,ror#18
1736         eor     r12,r12,r1,ror#19
1737         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1738         ldr     r2,[sp,#14*4]
1739         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1740         ldr     r1,[sp,#7*4]
1741
1742         add     r12,r12,r0
1743         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1744         add     r2,r2,r12
1745         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1746         add     r2,r2,r1                        @ X[i]
1747         ldr     r12,[r14],#4                    @ *K256++
1748         add     r5,r5,r2                        @ h+=X[i]
1749         str     r2,[sp,#14*4]
1750         eor     r2,r11,r4
1751         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1752         and     r2,r2,r10
1753         add     r5,r5,r12                       @ h+=K256[i]
1754         eor     r2,r2,r4                        @ Ch(e,f,g)
1755         eor     r0,r6,r6,ror#11
1756         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1757 #if 30==31
1758         and     r12,r12,#0xff
1759         cmp     r12,#0xf2                       @ done?
1760 #endif
1761 #if 30<15
1762 # if __ARM_ARCH__>=7
1763         ldr     r2,[r1],#4                      @ prefetch
1764 # else
1765         ldrb    r2,[r1,#3]
1766 # endif
1767         eor     r12,r6,r7                       @ a^b, b^c in next round
1768 #else
1769         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
1770         eor     r12,r6,r7                       @ a^b, b^c in next round
1771         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
1772 #endif
1773         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1774         and     r3,r3,r12                       @ (b^c)&=(a^b)
1775         add     r9,r9,r5                        @ d+=h
1776         eor     r3,r3,r7                        @ Maj(a,b,c)
1777         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1778         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1779         @ ldr   r2,[sp,#0*4]            @ 31
1780         @ ldr   r1,[sp,#13*4]
1781         mov     r0,r2,ror#7
1782         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1783         mov     r3,r1,ror#17
1784         eor     r0,r0,r2,ror#18
1785         eor     r3,r3,r1,ror#19
1786         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1787         ldr     r2,[sp,#15*4]
1788         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1789         ldr     r1,[sp,#8*4]
1790
1791         add     r3,r3,r0
1792         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1793         add     r2,r2,r3
1794         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1795         add     r2,r2,r1                        @ X[i]
1796         ldr     r3,[r14],#4                     @ *K256++
1797         add     r4,r4,r2                        @ h+=X[i]
1798         str     r2,[sp,#15*4]
1799         eor     r2,r10,r11
1800         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1801         and     r2,r2,r9
1802         add     r4,r4,r3                        @ h+=K256[i]
1803         eor     r2,r2,r11                       @ Ch(e,f,g)
1804         eor     r0,r5,r5,ror#11
1805         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1806 #if 31==31
1807         and     r3,r3,#0xff
1808         cmp     r3,#0xf2                        @ done?
1809 #endif
1810 #if 31<15
1811 # if __ARM_ARCH__>=7
1812         ldr     r2,[r1],#4                      @ prefetch
1813 # else
1814         ldrb    r2,[r1,#3]
1815 # endif
1816         eor     r3,r5,r6                        @ a^b, b^c in next round
1817 #else
1818         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1819         eor     r3,r5,r6                        @ a^b, b^c in next round
1820         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1821 #endif
1822         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1823         and     r12,r12,r3                      @ (b^c)&=(a^b)
1824         add     r8,r8,r4                        @ d+=h
1825         eor     r12,r12,r6                      @ Maj(a,b,c)
1826         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1827         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1828 #if __ARM_ARCH__>=7
1829         ite     eq                      @ Thumb2 thing, sanity check in ARM
1830 #endif
1831         ldreq   r3,[sp,#16*4]           @ pull ctx
1832         bne     .Lrounds_16_xx
1833
1834         add     r4,r4,r12               @ h+=Maj(a,b,c) from the past
1835         ldr     r0,[r3,#0]
1836         ldr     r2,[r3,#4]
1837         ldr     r12,[r3,#8]
1838         add     r4,r4,r0
1839         ldr     r0,[r3,#12]
1840         add     r5,r5,r2
1841         ldr     r2,[r3,#16]
1842         add     r6,r6,r12
1843         ldr     r12,[r3,#20]
1844         add     r7,r7,r0
1845         ldr     r0,[r3,#24]
1846         add     r8,r8,r2
1847         ldr     r2,[r3,#28]
1848         add     r9,r9,r12
1849         ldr     r1,[sp,#17*4]           @ pull inp
1850         ldr     r12,[sp,#18*4]          @ pull inp+len
1851         add     r10,r10,r0
1852         add     r11,r11,r2
1853         stmia   r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1854         cmp     r1,r12
1855         sub     r14,r14,#256    @ rewind Ktbl
1856         bne     .Loop
1857
1858         add     sp,sp,#19*4     @ destroy frame
1859 #if __ARM_ARCH__>=5
1860         ldmia   sp!,{r4-r11,pc}
1861 #else
1862         ldmia   sp!,{r4-r11,lr}
1863         tst     lr,#1
1864         moveq   pc,lr                   @ be binary compatible with V4, yet
1865         .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
1866 #endif
1867 .size   sha256_block_data_order,.-sha256_block_data_order
1868 #if __ARM_MAX_ARCH__>=7
1869 .arch   armv7-a
1870 .fpu    neon
1871
1872 .global sha256_block_data_order_neon
1873 .type   sha256_block_data_order_neon,%function
1874 .align  4
1875 sha256_block_data_order_neon:
1876 .LNEON:
1877         stmdb   sp!,{r4-r12,lr}
1878
1879         sub     r11,sp,#16*4+16
1880         adrl    r14,K256
1881         bic     r11,r11,#15             @ align for 128-bit stores
1882         mov     r12,sp
1883         mov     sp,r11                  @ alloca
1884         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
1885
1886         vld1.8          {q0},[r1]!
1887         vld1.8          {q1},[r1]!
1888         vld1.8          {q2},[r1]!
1889         vld1.8          {q3},[r1]!
1890         vld1.32         {q8},[r14,:128]!
1891         vld1.32         {q9},[r14,:128]!
1892         vld1.32         {q10},[r14,:128]!
1893         vld1.32         {q11},[r14,:128]!
1894         vrev32.8        q0,q0           @ yes, even on
1895         str             r0,[sp,#64]
1896         vrev32.8        q1,q1           @ big-endian
1897         str             r1,[sp,#68]
1898         mov             r1,sp
1899         vrev32.8        q2,q2
1900         str             r2,[sp,#72]
1901         vrev32.8        q3,q3
1902         str             r12,[sp,#76]            @ save original sp
1903         vadd.i32        q8,q8,q0
1904         vadd.i32        q9,q9,q1
1905         vst1.32         {q8},[r1,:128]!
1906         vadd.i32        q10,q10,q2
1907         vst1.32         {q9},[r1,:128]!
1908         vadd.i32        q11,q11,q3
1909         vst1.32         {q10},[r1,:128]!
1910         vst1.32         {q11},[r1,:128]!
1911
1912         ldmia           r0,{r4-r11}
1913         sub             r1,r1,#64
1914         ldr             r2,[sp,#0]
1915         eor             r12,r12,r12
1916         eor             r3,r5,r6
1917         b               .L_00_48
1918
1919 .align  4
1920 .L_00_48:
1921         vext.8  q8,q0,q1,#4
1922         add     r11,r11,r2
1923         eor     r2,r9,r10
1924         eor     r0,r8,r8,ror#5
1925         vext.8  q9,q2,q3,#4
1926         add     r4,r4,r12
1927         and     r2,r2,r8
1928         eor     r12,r0,r8,ror#19
1929         vshr.u32        q10,q8,#7
1930         eor     r0,r4,r4,ror#11
1931         eor     r2,r2,r10
1932         vadd.i32        q0,q0,q9
1933         add     r11,r11,r12,ror#6
1934         eor     r12,r4,r5
1935         vshr.u32        q9,q8,#3
1936         eor     r0,r0,r4,ror#20
1937         add     r11,r11,r2
1938         vsli.32 q10,q8,#25
1939         ldr     r2,[sp,#4]
1940         and     r3,r3,r12
1941         vshr.u32        q11,q8,#18
1942         add     r7,r7,r11
1943         add     r11,r11,r0,ror#2
1944         eor     r3,r3,r5
1945         veor    q9,q9,q10
1946         add     r10,r10,r2
1947         vsli.32 q11,q8,#14
1948         eor     r2,r8,r9
1949         eor     r0,r7,r7,ror#5
1950         vshr.u32        d24,d7,#17
1951         add     r11,r11,r3
1952         and     r2,r2,r7
1953         veor    q9,q9,q11
1954         eor     r3,r0,r7,ror#19
1955         eor     r0,r11,r11,ror#11
1956         vsli.32 d24,d7,#15
1957         eor     r2,r2,r9
1958         add     r10,r10,r3,ror#6
1959         vshr.u32        d25,d7,#10
1960         eor     r3,r11,r4
1961         eor     r0,r0,r11,ror#20
1962         vadd.i32        q0,q0,q9
1963         add     r10,r10,r2
1964         ldr     r2,[sp,#8]
1965         veor    d25,d25,d24
1966         and     r12,r12,r3
1967         add     r6,r6,r10
1968         vshr.u32        d24,d7,#19
1969         add     r10,r10,r0,ror#2
1970         eor     r12,r12,r4
1971         vsli.32 d24,d7,#13
1972         add     r9,r9,r2
1973         eor     r2,r7,r8
1974         veor    d25,d25,d24
1975         eor     r0,r6,r6,ror#5
1976         add     r10,r10,r12
1977         vadd.i32        d0,d0,d25
1978         and     r2,r2,r6
1979         eor     r12,r0,r6,ror#19
1980         vshr.u32        d24,d0,#17
1981         eor     r0,r10,r10,ror#11
1982         eor     r2,r2,r8
1983         vsli.32 d24,d0,#15
1984         add     r9,r9,r12,ror#6
1985         eor     r12,r10,r11
1986         vshr.u32        d25,d0,#10
1987         eor     r0,r0,r10,ror#20
1988         add     r9,r9,r2
1989         veor    d25,d25,d24
1990         ldr     r2,[sp,#12]
1991         and     r3,r3,r12
1992         vshr.u32        d24,d0,#19
1993         add     r5,r5,r9
1994         add     r9,r9,r0,ror#2
1995         eor     r3,r3,r11
1996         vld1.32 {q8},[r14,:128]!
1997         add     r8,r8,r2
1998         vsli.32 d24,d0,#13
1999         eor     r2,r6,r7
2000         eor     r0,r5,r5,ror#5
2001         veor    d25,d25,d24
2002         add     r9,r9,r3
2003         and     r2,r2,r5
2004         vadd.i32        d1,d1,d25
2005         eor     r3,r0,r5,ror#19
2006         eor     r0,r9,r9,ror#11
2007         vadd.i32        q8,q8,q0
2008         eor     r2,r2,r7
2009         add     r8,r8,r3,ror#6
2010         eor     r3,r9,r10
2011         eor     r0,r0,r9,ror#20
2012         add     r8,r8,r2
2013         ldr     r2,[sp,#16]
2014         and     r12,r12,r3
2015         add     r4,r4,r8
2016         vst1.32 {q8},[r1,:128]!
2017         add     r8,r8,r0,ror#2
2018         eor     r12,r12,r10
2019         vext.8  q8,q1,q2,#4
2020         add     r7,r7,r2
2021         eor     r2,r5,r6
2022         eor     r0,r4,r4,ror#5
2023         vext.8  q9,q3,q0,#4
2024         add     r8,r8,r12
2025         and     r2,r2,r4
2026         eor     r12,r0,r4,ror#19
2027         vshr.u32        q10,q8,#7
2028         eor     r0,r8,r8,ror#11
2029         eor     r2,r2,r6
2030         vadd.i32        q1,q1,q9
2031         add     r7,r7,r12,ror#6
2032         eor     r12,r8,r9
2033         vshr.u32        q9,q8,#3
2034         eor     r0,r0,r8,ror#20
2035         add     r7,r7,r2
2036         vsli.32 q10,q8,#25
2037         ldr     r2,[sp,#20]
2038         and     r3,r3,r12
2039         vshr.u32        q11,q8,#18
2040         add     r11,r11,r7
2041         add     r7,r7,r0,ror#2
2042         eor     r3,r3,r9
2043         veor    q9,q9,q10
2044         add     r6,r6,r2
2045         vsli.32 q11,q8,#14
2046         eor     r2,r4,r5
2047         eor     r0,r11,r11,ror#5
2048         vshr.u32        d24,d1,#17
2049         add     r7,r7,r3
2050         and     r2,r2,r11
2051         veor    q9,q9,q11
2052         eor     r3,r0,r11,ror#19
2053         eor     r0,r7,r7,ror#11
2054         vsli.32 d24,d1,#15
2055         eor     r2,r2,r5
2056         add     r6,r6,r3,ror#6
2057         vshr.u32        d25,d1,#10
2058         eor     r3,r7,r8
2059         eor     r0,r0,r7,ror#20
2060         vadd.i32        q1,q1,q9
2061         add     r6,r6,r2
2062         ldr     r2,[sp,#24]
2063         veor    d25,d25,d24
2064         and     r12,r12,r3
2065         add     r10,r10,r6
2066         vshr.u32        d24,d1,#19
2067         add     r6,r6,r0,ror#2
2068         eor     r12,r12,r8
2069         vsli.32 d24,d1,#13
2070         add     r5,r5,r2
2071         eor     r2,r11,r4
2072         veor    d25,d25,d24
2073         eor     r0,r10,r10,ror#5
2074         add     r6,r6,r12
2075         vadd.i32        d2,d2,d25
2076         and     r2,r2,r10
2077         eor     r12,r0,r10,ror#19
2078         vshr.u32        d24,d2,#17
2079         eor     r0,r6,r6,ror#11
2080         eor     r2,r2,r4
2081         vsli.32 d24,d2,#15
2082         add     r5,r5,r12,ror#6
2083         eor     r12,r6,r7
2084         vshr.u32        d25,d2,#10
2085         eor     r0,r0,r6,ror#20
2086         add     r5,r5,r2
2087         veor    d25,d25,d24
2088         ldr     r2,[sp,#28]
2089         and     r3,r3,r12
2090         vshr.u32        d24,d2,#19
2091         add     r9,r9,r5
2092         add     r5,r5,r0,ror#2
2093         eor     r3,r3,r7
2094         vld1.32 {q8},[r14,:128]!
2095         add     r4,r4,r2
2096         vsli.32 d24,d2,#13
2097         eor     r2,r10,r11
2098         eor     r0,r9,r9,ror#5
2099         veor    d25,d25,d24
2100         add     r5,r5,r3
2101         and     r2,r2,r9
2102         vadd.i32        d3,d3,d25
2103         eor     r3,r0,r9,ror#19
2104         eor     r0,r5,r5,ror#11
2105         vadd.i32        q8,q8,q1
2106         eor     r2,r2,r11
2107         add     r4,r4,r3,ror#6
2108         eor     r3,r5,r6
2109         eor     r0,r0,r5,ror#20
2110         add     r4,r4,r2
2111         ldr     r2,[sp,#32]
2112         and     r12,r12,r3
2113         add     r8,r8,r4
2114         vst1.32 {q8},[r1,:128]!
2115         add     r4,r4,r0,ror#2
2116         eor     r12,r12,r6
2117         vext.8  q8,q2,q3,#4
2118         add     r11,r11,r2
2119         eor     r2,r9,r10
2120         eor     r0,r8,r8,ror#5
2121         vext.8  q9,q0,q1,#4
2122         add     r4,r4,r12
2123         and     r2,r2,r8
2124         eor     r12,r0,r8,ror#19
2125         vshr.u32        q10,q8,#7
2126         eor     r0,r4,r4,ror#11
2127         eor     r2,r2,r10
2128         vadd.i32        q2,q2,q9
2129         add     r11,r11,r12,ror#6
2130         eor     r12,r4,r5
2131         vshr.u32        q9,q8,#3
2132         eor     r0,r0,r4,ror#20
2133         add     r11,r11,r2
2134         vsli.32 q10,q8,#25
2135         ldr     r2,[sp,#36]
2136         and     r3,r3,r12
2137         vshr.u32        q11,q8,#18
2138         add     r7,r7,r11
2139         add     r11,r11,r0,ror#2
2140         eor     r3,r3,r5
2141         veor    q9,q9,q10
2142         add     r10,r10,r2
2143         vsli.32 q11,q8,#14
2144         eor     r2,r8,r9
2145         eor     r0,r7,r7,ror#5
2146         vshr.u32        d24,d3,#17
2147         add     r11,r11,r3
2148         and     r2,r2,r7
2149         veor    q9,q9,q11
2150         eor     r3,r0,r7,ror#19
2151         eor     r0,r11,r11,ror#11
2152         vsli.32 d24,d3,#15
2153         eor     r2,r2,r9
2154         add     r10,r10,r3,ror#6
2155         vshr.u32        d25,d3,#10
2156         eor     r3,r11,r4
2157         eor     r0,r0,r11,ror#20
2158         vadd.i32        q2,q2,q9
2159         add     r10,r10,r2
2160         ldr     r2,[sp,#40]
2161         veor    d25,d25,d24
2162         and     r12,r12,r3
2163         add     r6,r6,r10
2164         vshr.u32        d24,d3,#19
2165         add     r10,r10,r0,ror#2
2166         eor     r12,r12,r4
2167         vsli.32 d24,d3,#13
2168         add     r9,r9,r2
2169         eor     r2,r7,r8
2170         veor    d25,d25,d24
2171         eor     r0,r6,r6,ror#5
2172         add     r10,r10,r12
2173         vadd.i32        d4,d4,d25
2174         and     r2,r2,r6
2175         eor     r12,r0,r6,ror#19
2176         vshr.u32        d24,d4,#17
2177         eor     r0,r10,r10,ror#11
2178         eor     r2,r2,r8
2179         vsli.32 d24,d4,#15
2180         add     r9,r9,r12,ror#6
2181         eor     r12,r10,r11
2182         vshr.u32        d25,d4,#10
2183         eor     r0,r0,r10,ror#20
2184         add     r9,r9,r2
2185         veor    d25,d25,d24
2186         ldr     r2,[sp,#44]
2187         and     r3,r3,r12
2188         vshr.u32        d24,d4,#19
2189         add     r5,r5,r9
2190         add     r9,r9,r0,ror#2
2191         eor     r3,r3,r11
2192         vld1.32 {q8},[r14,:128]!
2193         add     r8,r8,r2
2194         vsli.32 d24,d4,#13
2195         eor     r2,r6,r7
2196         eor     r0,r5,r5,ror#5
2197         veor    d25,d25,d24
2198         add     r9,r9,r3
2199         and     r2,r2,r5
2200         vadd.i32        d5,d5,d25
2201         eor     r3,r0,r5,ror#19
2202         eor     r0,r9,r9,ror#11
2203         vadd.i32        q8,q8,q2
2204         eor     r2,r2,r7
2205         add     r8,r8,r3,ror#6
2206         eor     r3,r9,r10
2207         eor     r0,r0,r9,ror#20
2208         add     r8,r8,r2
2209         ldr     r2,[sp,#48]
2210         and     r12,r12,r3
2211         add     r4,r4,r8
2212         vst1.32 {q8},[r1,:128]!
2213         add     r8,r8,r0,ror#2
2214         eor     r12,r12,r10
2215         vext.8  q8,q3,q0,#4
2216         add     r7,r7,r2
2217         eor     r2,r5,r6
2218         eor     r0,r4,r4,ror#5
2219         vext.8  q9,q1,q2,#4
2220         add     r8,r8,r12
2221         and     r2,r2,r4
2222         eor     r12,r0,r4,ror#19
2223         vshr.u32        q10,q8,#7
2224         eor     r0,r8,r8,ror#11
2225         eor     r2,r2,r6
2226         vadd.i32        q3,q3,q9
2227         add     r7,r7,r12,ror#6
2228         eor     r12,r8,r9
2229         vshr.u32        q9,q8,#3
2230         eor     r0,r0,r8,ror#20
2231         add     r7,r7,r2
2232         vsli.32 q10,q8,#25
2233         ldr     r2,[sp,#52]
2234         and     r3,r3,r12
2235         vshr.u32        q11,q8,#18
2236         add     r11,r11,r7
2237         add     r7,r7,r0,ror#2
2238         eor     r3,r3,r9
2239         veor    q9,q9,q10
2240         add     r6,r6,r2
2241         vsli.32 q11,q8,#14
2242         eor     r2,r4,r5
2243         eor     r0,r11,r11,ror#5
2244         vshr.u32        d24,d5,#17
2245         add     r7,r7,r3
2246         and     r2,r2,r11
2247         veor    q9,q9,q11
2248         eor     r3,r0,r11,ror#19
2249         eor     r0,r7,r7,ror#11
2250         vsli.32 d24,d5,#15
2251         eor     r2,r2,r5
2252         add     r6,r6,r3,ror#6
2253         vshr.u32        d25,d5,#10
2254         eor     r3,r7,r8
2255         eor     r0,r0,r7,ror#20
2256         vadd.i32        q3,q3,q9
2257         add     r6,r6,r2
2258         ldr     r2,[sp,#56]
2259         veor    d25,d25,d24
2260         and     r12,r12,r3
2261         add     r10,r10,r6
2262         vshr.u32        d24,d5,#19
2263         add     r6,r6,r0,ror#2
2264         eor     r12,r12,r8
2265         vsli.32 d24,d5,#13
2266         add     r5,r5,r2
2267         eor     r2,r11,r4
2268         veor    d25,d25,d24
2269         eor     r0,r10,r10,ror#5
2270         add     r6,r6,r12
2271         vadd.i32        d6,d6,d25
2272         and     r2,r2,r10
2273         eor     r12,r0,r10,ror#19
2274         vshr.u32        d24,d6,#17
2275         eor     r0,r6,r6,ror#11
2276         eor     r2,r2,r4
2277         vsli.32 d24,d6,#15
2278         add     r5,r5,r12,ror#6
2279         eor     r12,r6,r7
2280         vshr.u32        d25,d6,#10
2281         eor     r0,r0,r6,ror#20
2282         add     r5,r5,r2
2283         veor    d25,d25,d24
2284         ldr     r2,[sp,#60]
2285         and     r3,r3,r12
2286         vshr.u32        d24,d6,#19
2287         add     r9,r9,r5
2288         add     r5,r5,r0,ror#2
2289         eor     r3,r3,r7
2290         vld1.32 {q8},[r14,:128]!
2291         add     r4,r4,r2
2292         vsli.32 d24,d6,#13
2293         eor     r2,r10,r11
2294         eor     r0,r9,r9,ror#5
2295         veor    d25,d25,d24
2296         add     r5,r5,r3
2297         and     r2,r2,r9
2298         vadd.i32        d7,d7,d25
2299         eor     r3,r0,r9,ror#19
2300         eor     r0,r5,r5,ror#11
2301         vadd.i32        q8,q8,q3
2302         eor     r2,r2,r11
2303         add     r4,r4,r3,ror#6
2304         eor     r3,r5,r6
2305         eor     r0,r0,r5,ror#20
2306         add     r4,r4,r2
2307         ldr     r2,[r14]
2308         and     r12,r12,r3
2309         add     r8,r8,r4
2310         vst1.32 {q8},[r1,:128]!
2311         add     r4,r4,r0,ror#2
2312         eor     r12,r12,r6
2313         teq     r2,#0                           @ check for K256 terminator
2314         ldr     r2,[sp,#0]
2315         sub     r1,r1,#64
2316         bne     .L_00_48
2317
2318         ldr             r1,[sp,#68]
2319         ldr             r0,[sp,#72]
2320         sub             r14,r14,#256    @ rewind r14
2321         teq             r1,r0
2322         it              eq
2323         subeq           r1,r1,#64               @ avoid SEGV
2324         vld1.8          {q0},[r1]!              @ load next input block
2325         vld1.8          {q1},[r1]!
2326         vld1.8          {q2},[r1]!
2327         vld1.8          {q3},[r1]!
2328         it              ne
2329         strne           r1,[sp,#68]
2330         mov             r1,sp
2331         add     r11,r11,r2
2332         eor     r2,r9,r10
2333         eor     r0,r8,r8,ror#5
2334         add     r4,r4,r12
2335         vld1.32 {q8},[r14,:128]!
2336         and     r2,r2,r8
2337         eor     r12,r0,r8,ror#19
2338         eor     r0,r4,r4,ror#11
2339         eor     r2,r2,r10
2340         vrev32.8        q0,q0
2341         add     r11,r11,r12,ror#6
2342         eor     r12,r4,r5
2343         eor     r0,r0,r4,ror#20
2344         add     r11,r11,r2
2345         vadd.i32        q8,q8,q0
2346         ldr     r2,[sp,#4]
2347         and     r3,r3,r12
2348         add     r7,r7,r11
2349         add     r11,r11,r0,ror#2
2350         eor     r3,r3,r5
2351         add     r10,r10,r2
2352         eor     r2,r8,r9
2353         eor     r0,r7,r7,ror#5
2354         add     r11,r11,r3
2355         and     r2,r2,r7
2356         eor     r3,r0,r7,ror#19
2357         eor     r0,r11,r11,ror#11
2358         eor     r2,r2,r9
2359         add     r10,r10,r3,ror#6
2360         eor     r3,r11,r4
2361         eor     r0,r0,r11,ror#20
2362         add     r10,r10,r2
2363         ldr     r2,[sp,#8]
2364         and     r12,r12,r3
2365         add     r6,r6,r10
2366         add     r10,r10,r0,ror#2
2367         eor     r12,r12,r4
2368         add     r9,r9,r2
2369         eor     r2,r7,r8
2370         eor     r0,r6,r6,ror#5
2371         add     r10,r10,r12
2372         and     r2,r2,r6
2373         eor     r12,r0,r6,ror#19
2374         eor     r0,r10,r10,ror#11
2375         eor     r2,r2,r8
2376         add     r9,r9,r12,ror#6
2377         eor     r12,r10,r11
2378         eor     r0,r0,r10,ror#20
2379         add     r9,r9,r2
2380         ldr     r2,[sp,#12]
2381         and     r3,r3,r12
2382         add     r5,r5,r9
2383         add     r9,r9,r0,ror#2
2384         eor     r3,r3,r11
2385         add     r8,r8,r2
2386         eor     r2,r6,r7
2387         eor     r0,r5,r5,ror#5
2388         add     r9,r9,r3
2389         and     r2,r2,r5
2390         eor     r3,r0,r5,ror#19
2391         eor     r0,r9,r9,ror#11
2392         eor     r2,r2,r7
2393         add     r8,r8,r3,ror#6
2394         eor     r3,r9,r10
2395         eor     r0,r0,r9,ror#20
2396         add     r8,r8,r2
2397         ldr     r2,[sp,#16]
2398         and     r12,r12,r3
2399         add     r4,r4,r8
2400         add     r8,r8,r0,ror#2
2401         eor     r12,r12,r10
2402         vst1.32 {q8},[r1,:128]!
2403         add     r7,r7,r2
2404         eor     r2,r5,r6
2405         eor     r0,r4,r4,ror#5
2406         add     r8,r8,r12
2407         vld1.32 {q8},[r14,:128]!
2408         and     r2,r2,r4
2409         eor     r12,r0,r4,ror#19
2410         eor     r0,r8,r8,ror#11
2411         eor     r2,r2,r6
2412         vrev32.8        q1,q1
2413         add     r7,r7,r12,ror#6
2414         eor     r12,r8,r9
2415         eor     r0,r0,r8,ror#20
2416         add     r7,r7,r2
2417         vadd.i32        q8,q8,q1
2418         ldr     r2,[sp,#20]
2419         and     r3,r3,r12
2420         add     r11,r11,r7
2421         add     r7,r7,r0,ror#2
2422         eor     r3,r3,r9
2423         add     r6,r6,r2
2424         eor     r2,r4,r5
2425         eor     r0,r11,r11,ror#5
2426         add     r7,r7,r3
2427         and     r2,r2,r11
2428         eor     r3,r0,r11,ror#19
2429         eor     r0,r7,r7,ror#11
2430         eor     r2,r2,r5
2431         add     r6,r6,r3,ror#6
2432         eor     r3,r7,r8
2433         eor     r0,r0,r7,ror#20
2434         add     r6,r6,r2
2435         ldr     r2,[sp,#24]
2436         and     r12,r12,r3
2437         add     r10,r10,r6
2438         add     r6,r6,r0,ror#2
2439         eor     r12,r12,r8
2440         add     r5,r5,r2
2441         eor     r2,r11,r4
2442         eor     r0,r10,r10,ror#5
2443         add     r6,r6,r12
2444         and     r2,r2,r10
2445         eor     r12,r0,r10,ror#19
2446         eor     r0,r6,r6,ror#11
2447         eor     r2,r2,r4
2448         add     r5,r5,r12,ror#6
2449         eor     r12,r6,r7
2450         eor     r0,r0,r6,ror#20
2451         add     r5,r5,r2
2452         ldr     r2,[sp,#28]
2453         and     r3,r3,r12
2454         add     r9,r9,r5
2455         add     r5,r5,r0,ror#2
2456         eor     r3,r3,r7
2457         add     r4,r4,r2
2458         eor     r2,r10,r11
2459         eor     r0,r9,r9,ror#5
2460         add     r5,r5,r3
2461         and     r2,r2,r9
2462         eor     r3,r0,r9,ror#19
2463         eor     r0,r5,r5,ror#11
2464         eor     r2,r2,r11
2465         add     r4,r4,r3,ror#6
2466         eor     r3,r5,r6
2467         eor     r0,r0,r5,ror#20
2468         add     r4,r4,r2
2469         ldr     r2,[sp,#32]
2470         and     r12,r12,r3
2471         add     r8,r8,r4
2472         add     r4,r4,r0,ror#2
2473         eor     r12,r12,r6
2474         vst1.32 {q8},[r1,:128]!
2475         add     r11,r11,r2
2476         eor     r2,r9,r10
2477         eor     r0,r8,r8,ror#5
2478         add     r4,r4,r12
2479         vld1.32 {q8},[r14,:128]!
2480         and     r2,r2,r8
2481         eor     r12,r0,r8,ror#19
2482         eor     r0,r4,r4,ror#11
2483         eor     r2,r2,r10
2484         vrev32.8        q2,q2
2485         add     r11,r11,r12,ror#6
2486         eor     r12,r4,r5
2487         eor     r0,r0,r4,ror#20
2488         add     r11,r11,r2
2489         vadd.i32        q8,q8,q2
2490         ldr     r2,[sp,#36]
2491         and     r3,r3,r12
2492         add     r7,r7,r11
2493         add     r11,r11,r0,ror#2
2494         eor     r3,r3,r5
2495         add     r10,r10,r2
2496         eor     r2,r8,r9
2497         eor     r0,r7,r7,ror#5
2498         add     r11,r11,r3
2499         and     r2,r2,r7
2500         eor     r3,r0,r7,ror#19
2501         eor     r0,r11,r11,ror#11
2502         eor     r2,r2,r9
2503         add     r10,r10,r3,ror#6
2504         eor     r3,r11,r4
2505         eor     r0,r0,r11,ror#20
2506         add     r10,r10,r2
2507         ldr     r2,[sp,#40]
2508         and     r12,r12,r3
2509         add     r6,r6,r10
2510         add     r10,r10,r0,ror#2
2511         eor     r12,r12,r4
2512         add     r9,r9,r2
2513         eor     r2,r7,r8
2514         eor     r0,r6,r6,ror#5
2515         add     r10,r10,r12
2516         and     r2,r2,r6
2517         eor     r12,r0,r6,ror#19
2518         eor     r0,r10,r10,ror#11
2519         eor     r2,r2,r8
2520         add     r9,r9,r12,ror#6
2521         eor     r12,r10,r11
2522         eor     r0,r0,r10,ror#20
2523         add     r9,r9,r2
2524         ldr     r2,[sp,#44]
2525         and     r3,r3,r12
2526         add     r5,r5,r9
2527         add     r9,r9,r0,ror#2
2528         eor     r3,r3,r11
2529         add     r8,r8,r2
2530         eor     r2,r6,r7
2531         eor     r0,r5,r5,ror#5
2532         add     r9,r9,r3
2533         and     r2,r2,r5
2534         eor     r3,r0,r5,ror#19
2535         eor     r0,r9,r9,ror#11
2536         eor     r2,r2,r7
2537         add     r8,r8,r3,ror#6
2538         eor     r3,r9,r10
2539         eor     r0,r0,r9,ror#20
2540         add     r8,r8,r2
2541         ldr     r2,[sp,#48]
2542         and     r12,r12,r3
2543         add     r4,r4,r8
2544         add     r8,r8,r0,ror#2
2545         eor     r12,r12,r10
2546         vst1.32 {q8},[r1,:128]!
2547         add     r7,r7,r2
2548         eor     r2,r5,r6
2549         eor     r0,r4,r4,ror#5
2550         add     r8,r8,r12
2551         vld1.32 {q8},[r14,:128]!
2552         and     r2,r2,r4
2553         eor     r12,r0,r4,ror#19
2554         eor     r0,r8,r8,ror#11
2555         eor     r2,r2,r6
2556         vrev32.8        q3,q3
2557         add     r7,r7,r12,ror#6
2558         eor     r12,r8,r9
2559         eor     r0,r0,r8,ror#20
2560         add     r7,r7,r2
2561         vadd.i32        q8,q8,q3
2562         ldr     r2,[sp,#52]
2563         and     r3,r3,r12
2564         add     r11,r11,r7
2565         add     r7,r7,r0,ror#2
2566         eor     r3,r3,r9
2567         add     r6,r6,r2
2568         eor     r2,r4,r5
2569         eor     r0,r11,r11,ror#5
2570         add     r7,r7,r3
2571         and     r2,r2,r11
2572         eor     r3,r0,r11,ror#19
2573         eor     r0,r7,r7,ror#11
2574         eor     r2,r2,r5
2575         add     r6,r6,r3,ror#6
2576         eor     r3,r7,r8
2577         eor     r0,r0,r7,ror#20
2578         add     r6,r6,r2
2579         ldr     r2,[sp,#56]
2580         and     r12,r12,r3
2581         add     r10,r10,r6
2582         add     r6,r6,r0,ror#2
2583         eor     r12,r12,r8
2584         add     r5,r5,r2
2585         eor     r2,r11,r4
2586         eor     r0,r10,r10,ror#5
2587         add     r6,r6,r12
2588         and     r2,r2,r10
2589         eor     r12,r0,r10,ror#19
2590         eor     r0,r6,r6,ror#11
2591         eor     r2,r2,r4
2592         add     r5,r5,r12,ror#6
2593         eor     r12,r6,r7
2594         eor     r0,r0,r6,ror#20
2595         add     r5,r5,r2
2596         ldr     r2,[sp,#60]
2597         and     r3,r3,r12
2598         add     r9,r9,r5
2599         add     r5,r5,r0,ror#2
2600         eor     r3,r3,r7
2601         add     r4,r4,r2
2602         eor     r2,r10,r11
2603         eor     r0,r9,r9,ror#5
2604         add     r5,r5,r3
2605         and     r2,r2,r9
2606         eor     r3,r0,r9,ror#19
2607         eor     r0,r5,r5,ror#11
2608         eor     r2,r2,r11
2609         add     r4,r4,r3,ror#6
2610         eor     r3,r5,r6
2611         eor     r0,r0,r5,ror#20
2612         add     r4,r4,r2
2613         ldr     r2,[sp,#64]
2614         and     r12,r12,r3
2615         add     r8,r8,r4
2616         add     r4,r4,r0,ror#2
2617         eor     r12,r12,r6
2618         vst1.32 {q8},[r1,:128]!
2619         ldr     r0,[r2,#0]
2620         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
2621         ldr     r12,[r2,#4]
2622         ldr     r3,[r2,#8]
2623         ldr     r1,[r2,#12]
2624         add     r4,r4,r0                        @ accumulate
2625         ldr     r0,[r2,#16]
2626         add     r5,r5,r12
2627         ldr     r12,[r2,#20]
2628         add     r6,r6,r3
2629         ldr     r3,[r2,#24]
2630         add     r7,r7,r1
2631         ldr     r1,[r2,#28]
2632         add     r8,r8,r0
2633         str     r4,[r2],#4
2634         add     r9,r9,r12
2635         str     r5,[r2],#4
2636         add     r10,r10,r3
2637         str     r6,[r2],#4
2638         add     r11,r11,r1
2639         str     r7,[r2],#4
2640         stmia   r2,{r8-r11}
2641
2642         ittte   ne
2643         movne   r1,sp
2644         ldrne   r2,[sp,#0]
2645         eorne   r12,r12,r12
2646         ldreq   sp,[sp,#76]                     @ restore original sp
2647         itt     ne
2648         eorne   r3,r5,r6
2649         bne     .L_00_48
2650
2651         ldmia   sp!,{r4-r12,pc}
2652 .size   sha256_block_data_order_neon,.-sha256_block_data_order_neon
2653 #endif
2654 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2655
2656 # ifdef __thumb2__
2657 #  define INST(a,b,c,d) .byte   c,d|0xc,a,b
2658 # else
2659 #  define INST(a,b,c,d) .byte   a,b,c,d
2660 # endif
2661
2662 .type   sha256_block_data_order_armv8,%function
2663 .align  5
2664 sha256_block_data_order_armv8:
2665 .LARMv8:
2666         vld1.32 {q0,q1},[r0]
2667 # ifdef __thumb2__
2668         adr     r3,.LARMv8
2669         sub     r3,r3,#.LARMv8-K256
2670 # else
2671         adrl    r3,K256
2672 # endif
2673         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
2674
2675 .Loop_v8:
2676         vld1.8          {q8-q9},[r1]!
2677         vld1.8          {q10-q11},[r1]!
2678         vld1.32         {q12},[r3]!
2679         vrev32.8        q8,q8
2680         vrev32.8        q9,q9
2681         vrev32.8        q10,q10
2682         vrev32.8        q11,q11
2683         vmov            q14,q0  @ offload
2684         vmov            q15,q1
2685         teq             r1,r2
2686         vld1.32         {q13},[r3]!
2687         vadd.i32        q12,q12,q8
2688         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2689         vmov            q2,q0
2690         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2691         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2692         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2693         vld1.32         {q12},[r3]!
2694         vadd.i32        q13,q13,q9
2695         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2696         vmov            q2,q0
2697         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2698         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2699         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2700         vld1.32         {q13},[r3]!
2701         vadd.i32        q12,q12,q10
2702         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2703         vmov            q2,q0
2704         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2705         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2706         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2707         vld1.32         {q12},[r3]!
2708         vadd.i32        q13,q13,q11
2709         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2710         vmov            q2,q0
2711         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2712         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2713         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2714         vld1.32         {q13},[r3]!
2715         vadd.i32        q12,q12,q8
2716         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2717         vmov            q2,q0
2718         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2719         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2720         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2721         vld1.32         {q12},[r3]!
2722         vadd.i32        q13,q13,q9
2723         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2724         vmov            q2,q0
2725         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2726         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2727         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2728         vld1.32         {q13},[r3]!
2729         vadd.i32        q12,q12,q10
2730         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2731         vmov            q2,q0
2732         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2733         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2734         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2735         vld1.32         {q12},[r3]!
2736         vadd.i32        q13,q13,q11
2737         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2738         vmov            q2,q0
2739         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2740         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2741         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2742         vld1.32         {q13},[r3]!
2743         vadd.i32        q12,q12,q8
2744         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2745         vmov            q2,q0
2746         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2747         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2748         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2749         vld1.32         {q12},[r3]!
2750         vadd.i32        q13,q13,q9
2751         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2752         vmov            q2,q0
2753         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2754         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2755         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2756         vld1.32         {q13},[r3]!
2757         vadd.i32        q12,q12,q10
2758         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2759         vmov            q2,q0
2760         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2761         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2762         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2763         vld1.32         {q12},[r3]!
2764         vadd.i32        q13,q13,q11
2765         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2766         vmov            q2,q0
2767         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2768         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2769         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2770         vld1.32         {q13},[r3]!
2771         vadd.i32        q12,q12,q8
2772         vmov            q2,q0
2773         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2774         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2775
2776         vld1.32         {q12},[r3]!
2777         vadd.i32        q13,q13,q9
2778         vmov            q2,q0
2779         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2780         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2781
2782         vld1.32         {q13},[r3]
2783         vadd.i32        q12,q12,q10
2784         sub             r3,r3,#256-16   @ rewind
2785         vmov            q2,q0
2786         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2787         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2788
2789         vadd.i32        q13,q13,q11
2790         vmov            q2,q0
2791         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2792         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2793
2794         vadd.i32        q0,q0,q14
2795         vadd.i32        q1,q1,q15
2796         it              ne
2797         bne             .Loop_v8
2798
2799         vst1.32         {q0,q1},[r0]
2800
2801         bx      lr              @ bx lr
2802 .size   sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2803 #endif
2804 .asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2805 .align  2
2806 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2807 .comm   OPENSSL_armcap_P,4,4
2808 #endif