Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jgarzi...
[sfrench/cifs-2.6.git] / arch / x86 / crypto / salsa20-i586-asm_32.S
1 # salsa20_pm.s version 20051229
2 # D. J. Bernstein
3 # Public domain.
4
5 # enter ECRYPT_encrypt_bytes
6 .text
7 .p2align 5
8 .globl ECRYPT_encrypt_bytes
9 ECRYPT_encrypt_bytes:
10         mov     %esp,%eax
11         and     $31,%eax
12         add     $256,%eax
13         sub     %eax,%esp
14         # eax_stack = eax
15         movl    %eax,80(%esp)
16         # ebx_stack = ebx
17         movl    %ebx,84(%esp)
18         # esi_stack = esi
19         movl    %esi,88(%esp)
20         # edi_stack = edi
21         movl    %edi,92(%esp)
22         # ebp_stack = ebp
23         movl    %ebp,96(%esp)
24         # x = arg1
25         movl    4(%esp,%eax),%edx
26         # m = arg2
27         movl    8(%esp,%eax),%esi
28         # out = arg3
29         movl    12(%esp,%eax),%edi
30         # bytes = arg4
31         movl    16(%esp,%eax),%ebx
32         # bytes -= 0
33         sub     $0,%ebx
34         # goto done if unsigned<=
35         jbe     ._done
36 ._start:
37         # in0 = *(uint32 *) (x + 0)
38         movl    0(%edx),%eax
39         # in1 = *(uint32 *) (x + 4)
40         movl    4(%edx),%ecx
41         # in2 = *(uint32 *) (x + 8)
42         movl    8(%edx),%ebp
43         # j0 = in0
44         movl    %eax,164(%esp)
45         # in3 = *(uint32 *) (x + 12)
46         movl    12(%edx),%eax
47         # j1 = in1
48         movl    %ecx,168(%esp)
49         # in4 = *(uint32 *) (x + 16)
50         movl    16(%edx),%ecx
51         # j2 = in2
52         movl    %ebp,172(%esp)
53         # in5 = *(uint32 *) (x + 20)
54         movl    20(%edx),%ebp
55         # j3 = in3
56         movl    %eax,176(%esp)
57         # in6 = *(uint32 *) (x + 24)
58         movl    24(%edx),%eax
59         # j4 = in4
60         movl    %ecx,180(%esp)
61         # in7 = *(uint32 *) (x + 28)
62         movl    28(%edx),%ecx
63         # j5 = in5
64         movl    %ebp,184(%esp)
65         # in8 = *(uint32 *) (x + 32)
66         movl    32(%edx),%ebp
67         # j6 = in6
68         movl    %eax,188(%esp)
69         # in9 = *(uint32 *) (x + 36)
70         movl    36(%edx),%eax
71         # j7 = in7
72         movl    %ecx,192(%esp)
73         # in10 = *(uint32 *) (x + 40)
74         movl    40(%edx),%ecx
75         # j8 = in8
76         movl    %ebp,196(%esp)
77         # in11 = *(uint32 *) (x + 44)
78         movl    44(%edx),%ebp
79         # j9 = in9
80         movl    %eax,200(%esp)
81         # in12 = *(uint32 *) (x + 48)
82         movl    48(%edx),%eax
83         # j10 = in10
84         movl    %ecx,204(%esp)
85         # in13 = *(uint32 *) (x + 52)
86         movl    52(%edx),%ecx
87         # j11 = in11
88         movl    %ebp,208(%esp)
89         # in14 = *(uint32 *) (x + 56)
90         movl    56(%edx),%ebp
91         # j12 = in12
92         movl    %eax,212(%esp)
93         # in15 = *(uint32 *) (x + 60)
94         movl    60(%edx),%eax
95         # j13 = in13
96         movl    %ecx,216(%esp)
97         # j14 = in14
98         movl    %ebp,220(%esp)
99         # j15 = in15
100         movl    %eax,224(%esp)
101         # x_backup = x
102         movl    %edx,64(%esp)
103 ._bytesatleast1:
104         #   bytes - 64
105         cmp     $64,%ebx
106         #   goto nocopy if unsigned>=
107         jae     ._nocopy
108         #     ctarget = out
109         movl    %edi,228(%esp)
110         #     out = &tmp
111         leal    0(%esp),%edi
112         #     i = bytes
113         mov     %ebx,%ecx
114         #     while (i) { *out++ = *m++; --i }
115         rep     movsb
116         #     out = &tmp
117         leal    0(%esp),%edi
118         #     m = &tmp
119         leal    0(%esp),%esi
120 ._nocopy:
121         #   out_backup = out
122         movl    %edi,72(%esp)
123         #   m_backup = m
124         movl    %esi,68(%esp)
125         #   bytes_backup = bytes
126         movl    %ebx,76(%esp)
127         #   in0 = j0
128         movl    164(%esp),%eax
129         #   in1 = j1
130         movl    168(%esp),%ecx
131         #   in2 = j2
132         movl    172(%esp),%edx
133         #   in3 = j3
134         movl    176(%esp),%ebx
135         #   x0 = in0
136         movl    %eax,100(%esp)
137         #   x1 = in1
138         movl    %ecx,104(%esp)
139         #   x2 = in2
140         movl    %edx,108(%esp)
141         #   x3 = in3
142         movl    %ebx,112(%esp)
143         #   in4 = j4
144         movl    180(%esp),%eax
145         #   in5 = j5
146         movl    184(%esp),%ecx
147         #   in6 = j6
148         movl    188(%esp),%edx
149         #   in7 = j7
150         movl    192(%esp),%ebx
151         #   x4 = in4
152         movl    %eax,116(%esp)
153         #   x5 = in5
154         movl    %ecx,120(%esp)
155         #   x6 = in6
156         movl    %edx,124(%esp)
157         #   x7 = in7
158         movl    %ebx,128(%esp)
159         #   in8 = j8
160         movl    196(%esp),%eax
161         #   in9 = j9
162         movl    200(%esp),%ecx
163         #   in10 = j10
164         movl    204(%esp),%edx
165         #   in11 = j11
166         movl    208(%esp),%ebx
167         #   x8 = in8
168         movl    %eax,132(%esp)
169         #   x9 = in9
170         movl    %ecx,136(%esp)
171         #   x10 = in10
172         movl    %edx,140(%esp)
173         #   x11 = in11
174         movl    %ebx,144(%esp)
175         #   in12 = j12
176         movl    212(%esp),%eax
177         #   in13 = j13
178         movl    216(%esp),%ecx
179         #   in14 = j14
180         movl    220(%esp),%edx
181         #   in15 = j15
182         movl    224(%esp),%ebx
183         #   x12 = in12
184         movl    %eax,148(%esp)
185         #   x13 = in13
186         movl    %ecx,152(%esp)
187         #   x14 = in14
188         movl    %edx,156(%esp)
189         #   x15 = in15
190         movl    %ebx,160(%esp)
191         #   i = 20
192         mov     $20,%ebp
193         # p = x0
194         movl    100(%esp),%eax
195         # s = x5
196         movl    120(%esp),%ecx
197         # t = x10
198         movl    140(%esp),%edx
199         # w = x15
200         movl    160(%esp),%ebx
201 ._mainloop:
202         # x0 = p
203         movl    %eax,100(%esp)
204         #                               x10 = t
205         movl    %edx,140(%esp)
206         # p += x12
207         addl    148(%esp),%eax
208         #               x5 = s
209         movl    %ecx,120(%esp)
210         #                               t += x6
211         addl    124(%esp),%edx
212         #                                               x15 = w
213         movl    %ebx,160(%esp)
214         #               r = x1
215         movl    104(%esp),%esi
216         #               r += s
217         add     %ecx,%esi
218         #                                               v = x11
219         movl    144(%esp),%edi
220         #                                               v += w
221         add     %ebx,%edi
222         # p <<<= 7
223         rol     $7,%eax
224         # p ^= x4
225         xorl    116(%esp),%eax
226         #                               t <<<= 7
227         rol     $7,%edx
228         #                               t ^= x14
229         xorl    156(%esp),%edx
230         #               r <<<= 7
231         rol     $7,%esi
232         #               r ^= x9
233         xorl    136(%esp),%esi
234         #                                               v <<<= 7
235         rol     $7,%edi
236         #                                               v ^= x3
237         xorl    112(%esp),%edi
238         # x4 = p
239         movl    %eax,116(%esp)
240         #                               x14 = t
241         movl    %edx,156(%esp)
242         # p += x0
243         addl    100(%esp),%eax
244         #               x9 = r
245         movl    %esi,136(%esp)
246         #                               t += x10
247         addl    140(%esp),%edx
248         #                                               x3 = v
249         movl    %edi,112(%esp)
250         # p <<<= 9
251         rol     $9,%eax
252         # p ^= x8
253         xorl    132(%esp),%eax
254         #                               t <<<= 9
255         rol     $9,%edx
256         #                               t ^= x2
257         xorl    108(%esp),%edx
258         #               s += r
259         add     %esi,%ecx
260         #               s <<<= 9
261         rol     $9,%ecx
262         #               s ^= x13
263         xorl    152(%esp),%ecx
264         #                                               w += v
265         add     %edi,%ebx
266         #                                               w <<<= 9
267         rol     $9,%ebx
268         #                                               w ^= x7
269         xorl    128(%esp),%ebx
270         # x8 = p
271         movl    %eax,132(%esp)
272         #                               x2 = t
273         movl    %edx,108(%esp)
274         # p += x4
275         addl    116(%esp),%eax
276         #               x13 = s
277         movl    %ecx,152(%esp)
278         #                               t += x14
279         addl    156(%esp),%edx
280         #                                               x7 = w
281         movl    %ebx,128(%esp)
282         # p <<<= 13
283         rol     $13,%eax
284         # p ^= x12
285         xorl    148(%esp),%eax
286         #                               t <<<= 13
287         rol     $13,%edx
288         #                               t ^= x6
289         xorl    124(%esp),%edx
290         #               r += s
291         add     %ecx,%esi
292         #               r <<<= 13
293         rol     $13,%esi
294         #               r ^= x1
295         xorl    104(%esp),%esi
296         #                                               v += w
297         add     %ebx,%edi
298         #                                               v <<<= 13
299         rol     $13,%edi
300         #                                               v ^= x11
301         xorl    144(%esp),%edi
302         # x12 = p
303         movl    %eax,148(%esp)
304         #                               x6 = t
305         movl    %edx,124(%esp)
306         # p += x8
307         addl    132(%esp),%eax
308         #               x1 = r
309         movl    %esi,104(%esp)
310         #                               t += x2
311         addl    108(%esp),%edx
312         #                                               x11 = v
313         movl    %edi,144(%esp)
314         # p <<<= 18
315         rol     $18,%eax
316         # p ^= x0
317         xorl    100(%esp),%eax
318         #                               t <<<= 18
319         rol     $18,%edx
320         #                               t ^= x10
321         xorl    140(%esp),%edx
322         #               s += r
323         add     %esi,%ecx
324         #               s <<<= 18
325         rol     $18,%ecx
326         #               s ^= x5
327         xorl    120(%esp),%ecx
328         #                                               w += v
329         add     %edi,%ebx
330         #                                               w <<<= 18
331         rol     $18,%ebx
332         #                                               w ^= x15
333         xorl    160(%esp),%ebx
334         # x0 = p
335         movl    %eax,100(%esp)
336         #                               x10 = t
337         movl    %edx,140(%esp)
338         # p += x3
339         addl    112(%esp),%eax
340         # p <<<= 7
341         rol     $7,%eax
342         #               x5 = s
343         movl    %ecx,120(%esp)
344         #                               t += x9
345         addl    136(%esp),%edx
346         #                                               x15 = w
347         movl    %ebx,160(%esp)
348         #               r = x4
349         movl    116(%esp),%esi
350         #               r += s
351         add     %ecx,%esi
352         #                                               v = x14
353         movl    156(%esp),%edi
354         #                                               v += w
355         add     %ebx,%edi
356         # p ^= x1
357         xorl    104(%esp),%eax
358         #                               t <<<= 7
359         rol     $7,%edx
360         #                               t ^= x11
361         xorl    144(%esp),%edx
362         #               r <<<= 7
363         rol     $7,%esi
364         #               r ^= x6
365         xorl    124(%esp),%esi
366         #                                               v <<<= 7
367         rol     $7,%edi
368         #                                               v ^= x12
369         xorl    148(%esp),%edi
370         # x1 = p
371         movl    %eax,104(%esp)
372         #                               x11 = t
373         movl    %edx,144(%esp)
374         # p += x0
375         addl    100(%esp),%eax
376         #               x6 = r
377         movl    %esi,124(%esp)
378         #                               t += x10
379         addl    140(%esp),%edx
380         #                                               x12 = v
381         movl    %edi,148(%esp)
382         # p <<<= 9
383         rol     $9,%eax
384         # p ^= x2
385         xorl    108(%esp),%eax
386         #                               t <<<= 9
387         rol     $9,%edx
388         #                               t ^= x8
389         xorl    132(%esp),%edx
390         #               s += r
391         add     %esi,%ecx
392         #               s <<<= 9
393         rol     $9,%ecx
394         #               s ^= x7
395         xorl    128(%esp),%ecx
396         #                                               w += v
397         add     %edi,%ebx
398         #                                               w <<<= 9
399         rol     $9,%ebx
400         #                                               w ^= x13
401         xorl    152(%esp),%ebx
402         # x2 = p
403         movl    %eax,108(%esp)
404         #                               x8 = t
405         movl    %edx,132(%esp)
406         # p += x1
407         addl    104(%esp),%eax
408         #               x7 = s
409         movl    %ecx,128(%esp)
410         #                               t += x11
411         addl    144(%esp),%edx
412         #                                               x13 = w
413         movl    %ebx,152(%esp)
414         # p <<<= 13
415         rol     $13,%eax
416         # p ^= x3
417         xorl    112(%esp),%eax
418         #                               t <<<= 13
419         rol     $13,%edx
420         #                               t ^= x9
421         xorl    136(%esp),%edx
422         #               r += s
423         add     %ecx,%esi
424         #               r <<<= 13
425         rol     $13,%esi
426         #               r ^= x4
427         xorl    116(%esp),%esi
428         #                                               v += w
429         add     %ebx,%edi
430         #                                               v <<<= 13
431         rol     $13,%edi
432         #                                               v ^= x14
433         xorl    156(%esp),%edi
434         # x3 = p
435         movl    %eax,112(%esp)
436         #                               x9 = t
437         movl    %edx,136(%esp)
438         # p += x2
439         addl    108(%esp),%eax
440         #               x4 = r
441         movl    %esi,116(%esp)
442         #                               t += x8
443         addl    132(%esp),%edx
444         #                                               x14 = v
445         movl    %edi,156(%esp)
446         # p <<<= 18
447         rol     $18,%eax
448         # p ^= x0
449         xorl    100(%esp),%eax
450         #                               t <<<= 18
451         rol     $18,%edx
452         #                               t ^= x10
453         xorl    140(%esp),%edx
454         #               s += r
455         add     %esi,%ecx
456         #               s <<<= 18
457         rol     $18,%ecx
458         #               s ^= x5
459         xorl    120(%esp),%ecx
460         #                                               w += v
461         add     %edi,%ebx
462         #                                               w <<<= 18
463         rol     $18,%ebx
464         #                                               w ^= x15
465         xorl    160(%esp),%ebx
466         # x0 = p
467         movl    %eax,100(%esp)
468         #                               x10 = t
469         movl    %edx,140(%esp)
470         # p += x12
471         addl    148(%esp),%eax
472         #               x5 = s
473         movl    %ecx,120(%esp)
474         #                               t += x6
475         addl    124(%esp),%edx
476         #                                               x15 = w
477         movl    %ebx,160(%esp)
478         #               r = x1
479         movl    104(%esp),%esi
480         #               r += s
481         add     %ecx,%esi
482         #                                               v = x11
483         movl    144(%esp),%edi
484         #                                               v += w
485         add     %ebx,%edi
486         # p <<<= 7
487         rol     $7,%eax
488         # p ^= x4
489         xorl    116(%esp),%eax
490         #                               t <<<= 7
491         rol     $7,%edx
492         #                               t ^= x14
493         xorl    156(%esp),%edx
494         #               r <<<= 7
495         rol     $7,%esi
496         #               r ^= x9
497         xorl    136(%esp),%esi
498         #                                               v <<<= 7
499         rol     $7,%edi
500         #                                               v ^= x3
501         xorl    112(%esp),%edi
502         # x4 = p
503         movl    %eax,116(%esp)
504         #                               x14 = t
505         movl    %edx,156(%esp)
506         # p += x0
507         addl    100(%esp),%eax
508         #               x9 = r
509         movl    %esi,136(%esp)
510         #                               t += x10
511         addl    140(%esp),%edx
512         #                                               x3 = v
513         movl    %edi,112(%esp)
514         # p <<<= 9
515         rol     $9,%eax
516         # p ^= x8
517         xorl    132(%esp),%eax
518         #                               t <<<= 9
519         rol     $9,%edx
520         #                               t ^= x2
521         xorl    108(%esp),%edx
522         #               s += r
523         add     %esi,%ecx
524         #               s <<<= 9
525         rol     $9,%ecx
526         #               s ^= x13
527         xorl    152(%esp),%ecx
528         #                                               w += v
529         add     %edi,%ebx
530         #                                               w <<<= 9
531         rol     $9,%ebx
532         #                                               w ^= x7
533         xorl    128(%esp),%ebx
534         # x8 = p
535         movl    %eax,132(%esp)
536         #                               x2 = t
537         movl    %edx,108(%esp)
538         # p += x4
539         addl    116(%esp),%eax
540         #               x13 = s
541         movl    %ecx,152(%esp)
542         #                               t += x14
543         addl    156(%esp),%edx
544         #                                               x7 = w
545         movl    %ebx,128(%esp)
546         # p <<<= 13
547         rol     $13,%eax
548         # p ^= x12
549         xorl    148(%esp),%eax
550         #                               t <<<= 13
551         rol     $13,%edx
552         #                               t ^= x6
553         xorl    124(%esp),%edx
554         #               r += s
555         add     %ecx,%esi
556         #               r <<<= 13
557         rol     $13,%esi
558         #               r ^= x1
559         xorl    104(%esp),%esi
560         #                                               v += w
561         add     %ebx,%edi
562         #                                               v <<<= 13
563         rol     $13,%edi
564         #                                               v ^= x11
565         xorl    144(%esp),%edi
566         # x12 = p
567         movl    %eax,148(%esp)
568         #                               x6 = t
569         movl    %edx,124(%esp)
570         # p += x8
571         addl    132(%esp),%eax
572         #               x1 = r
573         movl    %esi,104(%esp)
574         #                               t += x2
575         addl    108(%esp),%edx
576         #                                               x11 = v
577         movl    %edi,144(%esp)
578         # p <<<= 18
579         rol     $18,%eax
580         # p ^= x0
581         xorl    100(%esp),%eax
582         #                               t <<<= 18
583         rol     $18,%edx
584         #                               t ^= x10
585         xorl    140(%esp),%edx
586         #               s += r
587         add     %esi,%ecx
588         #               s <<<= 18
589         rol     $18,%ecx
590         #               s ^= x5
591         xorl    120(%esp),%ecx
592         #                                               w += v
593         add     %edi,%ebx
594         #                                               w <<<= 18
595         rol     $18,%ebx
596         #                                               w ^= x15
597         xorl    160(%esp),%ebx
598         # x0 = p
599         movl    %eax,100(%esp)
600         #                               x10 = t
601         movl    %edx,140(%esp)
602         # p += x3
603         addl    112(%esp),%eax
604         # p <<<= 7
605         rol     $7,%eax
606         #               x5 = s
607         movl    %ecx,120(%esp)
608         #                               t += x9
609         addl    136(%esp),%edx
610         #                                               x15 = w
611         movl    %ebx,160(%esp)
612         #               r = x4
613         movl    116(%esp),%esi
614         #               r += s
615         add     %ecx,%esi
616         #                                               v = x14
617         movl    156(%esp),%edi
618         #                                               v += w
619         add     %ebx,%edi
620         # p ^= x1
621         xorl    104(%esp),%eax
622         #                               t <<<= 7
623         rol     $7,%edx
624         #                               t ^= x11
625         xorl    144(%esp),%edx
626         #               r <<<= 7
627         rol     $7,%esi
628         #               r ^= x6
629         xorl    124(%esp),%esi
630         #                                               v <<<= 7
631         rol     $7,%edi
632         #                                               v ^= x12
633         xorl    148(%esp),%edi
634         # x1 = p
635         movl    %eax,104(%esp)
636         #                               x11 = t
637         movl    %edx,144(%esp)
638         # p += x0
639         addl    100(%esp),%eax
640         #               x6 = r
641         movl    %esi,124(%esp)
642         #                               t += x10
643         addl    140(%esp),%edx
644         #                                               x12 = v
645         movl    %edi,148(%esp)
646         # p <<<= 9
647         rol     $9,%eax
648         # p ^= x2
649         xorl    108(%esp),%eax
650         #                               t <<<= 9
651         rol     $9,%edx
652         #                               t ^= x8
653         xorl    132(%esp),%edx
654         #               s += r
655         add     %esi,%ecx
656         #               s <<<= 9
657         rol     $9,%ecx
658         #               s ^= x7
659         xorl    128(%esp),%ecx
660         #                                               w += v
661         add     %edi,%ebx
662         #                                               w <<<= 9
663         rol     $9,%ebx
664         #                                               w ^= x13
665         xorl    152(%esp),%ebx
666         # x2 = p
667         movl    %eax,108(%esp)
668         #                               x8 = t
669         movl    %edx,132(%esp)
670         # p += x1
671         addl    104(%esp),%eax
672         #               x7 = s
673         movl    %ecx,128(%esp)
674         #                               t += x11
675         addl    144(%esp),%edx
676         #                                               x13 = w
677         movl    %ebx,152(%esp)
678         # p <<<= 13
679         rol     $13,%eax
680         # p ^= x3
681         xorl    112(%esp),%eax
682         #                               t <<<= 13
683         rol     $13,%edx
684         #                               t ^= x9
685         xorl    136(%esp),%edx
686         #               r += s
687         add     %ecx,%esi
688         #               r <<<= 13
689         rol     $13,%esi
690         #               r ^= x4
691         xorl    116(%esp),%esi
692         #                                               v += w
693         add     %ebx,%edi
694         #                                               v <<<= 13
695         rol     $13,%edi
696         #                                               v ^= x14
697         xorl    156(%esp),%edi
698         # x3 = p
699         movl    %eax,112(%esp)
700         #                               x9 = t
701         movl    %edx,136(%esp)
702         # p += x2
703         addl    108(%esp),%eax
704         #               x4 = r
705         movl    %esi,116(%esp)
706         #                               t += x8
707         addl    132(%esp),%edx
708         #                                               x14 = v
709         movl    %edi,156(%esp)
710         # p <<<= 18
711         rol     $18,%eax
712         # p ^= x0
713         xorl    100(%esp),%eax
714         #                               t <<<= 18
715         rol     $18,%edx
716         #                               t ^= x10
717         xorl    140(%esp),%edx
718         #               s += r
719         add     %esi,%ecx
720         #               s <<<= 18
721         rol     $18,%ecx
722         #               s ^= x5
723         xorl    120(%esp),%ecx
724         #                                               w += v
725         add     %edi,%ebx
726         #                                               w <<<= 18
727         rol     $18,%ebx
728         #                                               w ^= x15
729         xorl    160(%esp),%ebx
730         # i -= 4
731         sub     $4,%ebp
732         # goto mainloop if unsigned >
733         ja      ._mainloop
734         # x0 = p
735         movl    %eax,100(%esp)
736         # x5 = s
737         movl    %ecx,120(%esp)
738         # x10 = t
739         movl    %edx,140(%esp)
740         # x15 = w
741         movl    %ebx,160(%esp)
742         #   out = out_backup
743         movl    72(%esp),%edi
744         #   m = m_backup
745         movl    68(%esp),%esi
746         #   in0 = x0
747         movl    100(%esp),%eax
748         #   in1 = x1
749         movl    104(%esp),%ecx
750         #   in0 += j0
751         addl    164(%esp),%eax
752         #   in1 += j1
753         addl    168(%esp),%ecx
754         #   in0 ^= *(uint32 *) (m + 0)
755         xorl    0(%esi),%eax
756         #   in1 ^= *(uint32 *) (m + 4)
757         xorl    4(%esi),%ecx
758         #   *(uint32 *) (out + 0) = in0
759         movl    %eax,0(%edi)
760         #   *(uint32 *) (out + 4) = in1
761         movl    %ecx,4(%edi)
762         #   in2 = x2
763         movl    108(%esp),%eax
764         #   in3 = x3
765         movl    112(%esp),%ecx
766         #   in2 += j2
767         addl    172(%esp),%eax
768         #   in3 += j3
769         addl    176(%esp),%ecx
770         #   in2 ^= *(uint32 *) (m + 8)
771         xorl    8(%esi),%eax
772         #   in3 ^= *(uint32 *) (m + 12)
773         xorl    12(%esi),%ecx
774         #   *(uint32 *) (out + 8) = in2
775         movl    %eax,8(%edi)
776         #   *(uint32 *) (out + 12) = in3
777         movl    %ecx,12(%edi)
778         #   in4 = x4
779         movl    116(%esp),%eax
780         #   in5 = x5
781         movl    120(%esp),%ecx
782         #   in4 += j4
783         addl    180(%esp),%eax
784         #   in5 += j5
785         addl    184(%esp),%ecx
786         #   in4 ^= *(uint32 *) (m + 16)
787         xorl    16(%esi),%eax
788         #   in5 ^= *(uint32 *) (m + 20)
789         xorl    20(%esi),%ecx
790         #   *(uint32 *) (out + 16) = in4
791         movl    %eax,16(%edi)
792         #   *(uint32 *) (out + 20) = in5
793         movl    %ecx,20(%edi)
794         #   in6 = x6
795         movl    124(%esp),%eax
796         #   in7 = x7
797         movl    128(%esp),%ecx
798         #   in6 += j6
799         addl    188(%esp),%eax
800         #   in7 += j7
801         addl    192(%esp),%ecx
802         #   in6 ^= *(uint32 *) (m + 24)
803         xorl    24(%esi),%eax
804         #   in7 ^= *(uint32 *) (m + 28)
805         xorl    28(%esi),%ecx
806         #   *(uint32 *) (out + 24) = in6
807         movl    %eax,24(%edi)
808         #   *(uint32 *) (out + 28) = in7
809         movl    %ecx,28(%edi)
810         #   in8 = x8
811         movl    132(%esp),%eax
812         #   in9 = x9
813         movl    136(%esp),%ecx
814         #   in8 += j8
815         addl    196(%esp),%eax
816         #   in9 += j9
817         addl    200(%esp),%ecx
818         #   in8 ^= *(uint32 *) (m + 32)
819         xorl    32(%esi),%eax
820         #   in9 ^= *(uint32 *) (m + 36)
821         xorl    36(%esi),%ecx
822         #   *(uint32 *) (out + 32) = in8
823         movl    %eax,32(%edi)
824         #   *(uint32 *) (out + 36) = in9
825         movl    %ecx,36(%edi)
826         #   in10 = x10
827         movl    140(%esp),%eax
828         #   in11 = x11
829         movl    144(%esp),%ecx
830         #   in10 += j10
831         addl    204(%esp),%eax
832         #   in11 += j11
833         addl    208(%esp),%ecx
834         #   in10 ^= *(uint32 *) (m + 40)
835         xorl    40(%esi),%eax
836         #   in11 ^= *(uint32 *) (m + 44)
837         xorl    44(%esi),%ecx
838         #   *(uint32 *) (out + 40) = in10
839         movl    %eax,40(%edi)
840         #   *(uint32 *) (out + 44) = in11
841         movl    %ecx,44(%edi)
842         #   in12 = x12
843         movl    148(%esp),%eax
844         #   in13 = x13
845         movl    152(%esp),%ecx
846         #   in12 += j12
847         addl    212(%esp),%eax
848         #   in13 += j13
849         addl    216(%esp),%ecx
850         #   in12 ^= *(uint32 *) (m + 48)
851         xorl    48(%esi),%eax
852         #   in13 ^= *(uint32 *) (m + 52)
853         xorl    52(%esi),%ecx
854         #   *(uint32 *) (out + 48) = in12
855         movl    %eax,48(%edi)
856         #   *(uint32 *) (out + 52) = in13
857         movl    %ecx,52(%edi)
858         #   in14 = x14
859         movl    156(%esp),%eax
860         #   in15 = x15
861         movl    160(%esp),%ecx
862         #   in14 += j14
863         addl    220(%esp),%eax
864         #   in15 += j15
865         addl    224(%esp),%ecx
866         #   in14 ^= *(uint32 *) (m + 56)
867         xorl    56(%esi),%eax
868         #   in15 ^= *(uint32 *) (m + 60)
869         xorl    60(%esi),%ecx
870         #   *(uint32 *) (out + 56) = in14
871         movl    %eax,56(%edi)
872         #   *(uint32 *) (out + 60) = in15
873         movl    %ecx,60(%edi)
874         #   bytes = bytes_backup
875         movl    76(%esp),%ebx
876         #   in8 = j8
877         movl    196(%esp),%eax
878         #   in9 = j9
879         movl    200(%esp),%ecx
880         #   in8 += 1
881         add     $1,%eax
882         #   in9 += 0 + carry
883         adc     $0,%ecx
884         #   j8 = in8
885         movl    %eax,196(%esp)
886         #   j9 = in9
887         movl    %ecx,200(%esp)
888         #   bytes - 64
889         cmp     $64,%ebx
890         #   goto bytesatleast65 if unsigned>
891         ja      ._bytesatleast65
892         #     goto bytesatleast64 if unsigned>=
893         jae     ._bytesatleast64
894         #       m = out
895         mov     %edi,%esi
896         #       out = ctarget
897         movl    228(%esp),%edi
898         #       i = bytes
899         mov     %ebx,%ecx
900         #       while (i) { *out++ = *m++; --i }
901         rep     movsb
902 ._bytesatleast64:
903         #     x = x_backup
904         movl    64(%esp),%eax
905         #     in8 = j8
906         movl    196(%esp),%ecx
907         #     in9 = j9
908         movl    200(%esp),%edx
909         #     *(uint32 *) (x + 32) = in8
910         movl    %ecx,32(%eax)
911         #     *(uint32 *) (x + 36) = in9
912         movl    %edx,36(%eax)
913 ._done:
914         #     eax = eax_stack
915         movl    80(%esp),%eax
916         #     ebx = ebx_stack
917         movl    84(%esp),%ebx
918         #     esi = esi_stack
919         movl    88(%esp),%esi
920         #     edi = edi_stack
921         movl    92(%esp),%edi
922         #     ebp = ebp_stack
923         movl    96(%esp),%ebp
924         #     leave
925         add     %eax,%esp
926         ret
927 ._bytesatleast65:
928         #   bytes -= 64
929         sub     $64,%ebx
930         #   out += 64
931         add     $64,%edi
932         #   m += 64
933         add     $64,%esi
934         # goto bytesatleast1
935         jmp     ._bytesatleast1
936 # enter ECRYPT_keysetup
937 .text
938 .p2align 5
939 .globl ECRYPT_keysetup
940 ECRYPT_keysetup:
941         mov     %esp,%eax
942         and     $31,%eax
943         add     $256,%eax
944         sub     %eax,%esp
945         #   eax_stack = eax
946         movl    %eax,64(%esp)
947         #   ebx_stack = ebx
948         movl    %ebx,68(%esp)
949         #   esi_stack = esi
950         movl    %esi,72(%esp)
951         #   edi_stack = edi
952         movl    %edi,76(%esp)
953         #   ebp_stack = ebp
954         movl    %ebp,80(%esp)
955         #   k = arg2
956         movl    8(%esp,%eax),%ecx
957         #   kbits = arg3
958         movl    12(%esp,%eax),%edx
959         #   x = arg1
960         movl    4(%esp,%eax),%eax
961         #   in1 = *(uint32 *) (k + 0)
962         movl    0(%ecx),%ebx
963         #   in2 = *(uint32 *) (k + 4)
964         movl    4(%ecx),%esi
965         #   in3 = *(uint32 *) (k + 8)
966         movl    8(%ecx),%edi
967         #   in4 = *(uint32 *) (k + 12)
968         movl    12(%ecx),%ebp
969         #   *(uint32 *) (x + 4) = in1
970         movl    %ebx,4(%eax)
971         #   *(uint32 *) (x + 8) = in2
972         movl    %esi,8(%eax)
973         #   *(uint32 *) (x + 12) = in3
974         movl    %edi,12(%eax)
975         #   *(uint32 *) (x + 16) = in4
976         movl    %ebp,16(%eax)
977         #   kbits - 256
978         cmp     $256,%edx
979         #   goto kbits128 if unsigned<
980         jb      ._kbits128
981 ._kbits256:
982         #     in11 = *(uint32 *) (k + 16)
983         movl    16(%ecx),%edx
984         #     in12 = *(uint32 *) (k + 20)
985         movl    20(%ecx),%ebx
986         #     in13 = *(uint32 *) (k + 24)
987         movl    24(%ecx),%esi
988         #     in14 = *(uint32 *) (k + 28)
989         movl    28(%ecx),%ecx
990         #     *(uint32 *) (x + 44) = in11
991         movl    %edx,44(%eax)
992         #     *(uint32 *) (x + 48) = in12
993         movl    %ebx,48(%eax)
994         #     *(uint32 *) (x + 52) = in13
995         movl    %esi,52(%eax)
996         #     *(uint32 *) (x + 56) = in14
997         movl    %ecx,56(%eax)
998         #     in0 = 1634760805
999         mov     $1634760805,%ecx
1000         #     in5 = 857760878
1001         mov     $857760878,%edx
1002         #     in10 = 2036477234
1003         mov     $2036477234,%ebx
1004         #     in15 = 1797285236
1005         mov     $1797285236,%esi
1006         #     *(uint32 *) (x + 0) = in0
1007         movl    %ecx,0(%eax)
1008         #     *(uint32 *) (x + 20) = in5
1009         movl    %edx,20(%eax)
1010         #     *(uint32 *) (x + 40) = in10
1011         movl    %ebx,40(%eax)
1012         #     *(uint32 *) (x + 60) = in15
1013         movl    %esi,60(%eax)
1014         #   goto keysetupdone
1015         jmp     ._keysetupdone
1016 ._kbits128:
1017         #     in11 = *(uint32 *) (k + 0)
1018         movl    0(%ecx),%edx
1019         #     in12 = *(uint32 *) (k + 4)
1020         movl    4(%ecx),%ebx
1021         #     in13 = *(uint32 *) (k + 8)
1022         movl    8(%ecx),%esi
1023         #     in14 = *(uint32 *) (k + 12)
1024         movl    12(%ecx),%ecx
1025         #     *(uint32 *) (x + 44) = in11
1026         movl    %edx,44(%eax)
1027         #     *(uint32 *) (x + 48) = in12
1028         movl    %ebx,48(%eax)
1029         #     *(uint32 *) (x + 52) = in13
1030         movl    %esi,52(%eax)
1031         #     *(uint32 *) (x + 56) = in14
1032         movl    %ecx,56(%eax)
1033         #     in0 = 1634760805
1034         mov     $1634760805,%ecx
1035         #     in5 = 824206446
1036         mov     $824206446,%edx
1037         #     in10 = 2036477238
1038         mov     $2036477238,%ebx
1039         #     in15 = 1797285236
1040         mov     $1797285236,%esi
1041         #     *(uint32 *) (x + 0) = in0
1042         movl    %ecx,0(%eax)
1043         #     *(uint32 *) (x + 20) = in5
1044         movl    %edx,20(%eax)
1045         #     *(uint32 *) (x + 40) = in10
1046         movl    %ebx,40(%eax)
1047         #     *(uint32 *) (x + 60) = in15
1048         movl    %esi,60(%eax)
1049 ._keysetupdone:
1050         #   eax = eax_stack
1051         movl    64(%esp),%eax
1052         #   ebx = ebx_stack
1053         movl    68(%esp),%ebx
1054         #   esi = esi_stack
1055         movl    72(%esp),%esi
1056         #   edi = edi_stack
1057         movl    76(%esp),%edi
1058         #   ebp = ebp_stack
1059         movl    80(%esp),%ebp
1060         # leave
1061         add     %eax,%esp
1062         ret
1063 # enter ECRYPT_ivsetup
1064 .text
1065 .p2align 5
1066 .globl ECRYPT_ivsetup
1067 ECRYPT_ivsetup:
1068         mov     %esp,%eax
1069         and     $31,%eax
1070         add     $256,%eax
1071         sub     %eax,%esp
1072         #   eax_stack = eax
1073         movl    %eax,64(%esp)
1074         #   ebx_stack = ebx
1075         movl    %ebx,68(%esp)
1076         #   esi_stack = esi
1077         movl    %esi,72(%esp)
1078         #   edi_stack = edi
1079         movl    %edi,76(%esp)
1080         #   ebp_stack = ebp
1081         movl    %ebp,80(%esp)
1082         #   iv = arg2
1083         movl    8(%esp,%eax),%ecx
1084         #   x = arg1
1085         movl    4(%esp,%eax),%eax
1086         #   in6 = *(uint32 *) (iv + 0)
1087         movl    0(%ecx),%edx
1088         #   in7 = *(uint32 *) (iv + 4)
1089         movl    4(%ecx),%ecx
1090         #   in8 = 0
1091         mov     $0,%ebx
1092         #   in9 = 0
1093         mov     $0,%esi
1094         #   *(uint32 *) (x + 24) = in6
1095         movl    %edx,24(%eax)
1096         #   *(uint32 *) (x + 28) = in7
1097         movl    %ecx,28(%eax)
1098         #   *(uint32 *) (x + 32) = in8
1099         movl    %ebx,32(%eax)
1100         #   *(uint32 *) (x + 36) = in9
1101         movl    %esi,36(%eax)
1102         #   eax = eax_stack
1103         movl    64(%esp),%eax
1104         #   ebx = ebx_stack
1105         movl    68(%esp),%ebx
1106         #   esi = esi_stack
1107         movl    72(%esp),%esi
1108         #   edi = edi_stack
1109         movl    76(%esp),%edi
1110         #   ebp = ebp_stack
1111         movl    80(%esp),%ebp
1112         # leave
1113         add     %eax,%esp
1114         ret