Merge tag 'docs-4.16' of git://git.lwn.net/linux
[sfrench/cifs-2.6.git] / arch / x86 / crypto / salsa20-i586-asm_32.S
1 # Derived from:
2 #       salsa20_pm.s version 20051229
3 #       D. J. Bernstein
4 #       Public domain.
5
6 #include <linux/linkage.h>
7
8 .text
9
10 # enter salsa20_encrypt_bytes
11 ENTRY(salsa20_encrypt_bytes)
12         mov     %esp,%eax
13         and     $31,%eax
14         add     $256,%eax
15         sub     %eax,%esp
16         # eax_stack = eax
17         movl    %eax,80(%esp)
18         # ebx_stack = ebx
19         movl    %ebx,84(%esp)
20         # esi_stack = esi
21         movl    %esi,88(%esp)
22         # edi_stack = edi
23         movl    %edi,92(%esp)
24         # ebp_stack = ebp
25         movl    %ebp,96(%esp)
26         # x = arg1
27         movl    4(%esp,%eax),%edx
28         # m = arg2
29         movl    8(%esp,%eax),%esi
30         # out = arg3
31         movl    12(%esp,%eax),%edi
32         # bytes = arg4
33         movl    16(%esp,%eax),%ebx
34         # bytes -= 0
35         sub     $0,%ebx
36         # goto done if unsigned<=
37         jbe     ._done
38 ._start:
39         # in0 = *(uint32 *) (x + 0)
40         movl    0(%edx),%eax
41         # in1 = *(uint32 *) (x + 4)
42         movl    4(%edx),%ecx
43         # in2 = *(uint32 *) (x + 8)
44         movl    8(%edx),%ebp
45         # j0 = in0
46         movl    %eax,164(%esp)
47         # in3 = *(uint32 *) (x + 12)
48         movl    12(%edx),%eax
49         # j1 = in1
50         movl    %ecx,168(%esp)
51         # in4 = *(uint32 *) (x + 16)
52         movl    16(%edx),%ecx
53         # j2 = in2
54         movl    %ebp,172(%esp)
55         # in5 = *(uint32 *) (x + 20)
56         movl    20(%edx),%ebp
57         # j3 = in3
58         movl    %eax,176(%esp)
59         # in6 = *(uint32 *) (x + 24)
60         movl    24(%edx),%eax
61         # j4 = in4
62         movl    %ecx,180(%esp)
63         # in7 = *(uint32 *) (x + 28)
64         movl    28(%edx),%ecx
65         # j5 = in5
66         movl    %ebp,184(%esp)
67         # in8 = *(uint32 *) (x + 32)
68         movl    32(%edx),%ebp
69         # j6 = in6
70         movl    %eax,188(%esp)
71         # in9 = *(uint32 *) (x + 36)
72         movl    36(%edx),%eax
73         # j7 = in7
74         movl    %ecx,192(%esp)
75         # in10 = *(uint32 *) (x + 40)
76         movl    40(%edx),%ecx
77         # j8 = in8
78         movl    %ebp,196(%esp)
79         # in11 = *(uint32 *) (x + 44)
80         movl    44(%edx),%ebp
81         # j9 = in9
82         movl    %eax,200(%esp)
83         # in12 = *(uint32 *) (x + 48)
84         movl    48(%edx),%eax
85         # j10 = in10
86         movl    %ecx,204(%esp)
87         # in13 = *(uint32 *) (x + 52)
88         movl    52(%edx),%ecx
89         # j11 = in11
90         movl    %ebp,208(%esp)
91         # in14 = *(uint32 *) (x + 56)
92         movl    56(%edx),%ebp
93         # j12 = in12
94         movl    %eax,212(%esp)
95         # in15 = *(uint32 *) (x + 60)
96         movl    60(%edx),%eax
97         # j13 = in13
98         movl    %ecx,216(%esp)
99         # j14 = in14
100         movl    %ebp,220(%esp)
101         # j15 = in15
102         movl    %eax,224(%esp)
103         # x_backup = x
104         movl    %edx,64(%esp)
105 ._bytesatleast1:
106         #   bytes - 64
107         cmp     $64,%ebx
108         #   goto nocopy if unsigned>=
109         jae     ._nocopy
110         #     ctarget = out
111         movl    %edi,228(%esp)
112         #     out = &tmp
113         leal    0(%esp),%edi
114         #     i = bytes
115         mov     %ebx,%ecx
116         #     while (i) { *out++ = *m++; --i }
117         rep     movsb
118         #     out = &tmp
119         leal    0(%esp),%edi
120         #     m = &tmp
121         leal    0(%esp),%esi
122 ._nocopy:
123         #   out_backup = out
124         movl    %edi,72(%esp)
125         #   m_backup = m
126         movl    %esi,68(%esp)
127         #   bytes_backup = bytes
128         movl    %ebx,76(%esp)
129         #   in0 = j0
130         movl    164(%esp),%eax
131         #   in1 = j1
132         movl    168(%esp),%ecx
133         #   in2 = j2
134         movl    172(%esp),%edx
135         #   in3 = j3
136         movl    176(%esp),%ebx
137         #   x0 = in0
138         movl    %eax,100(%esp)
139         #   x1 = in1
140         movl    %ecx,104(%esp)
141         #   x2 = in2
142         movl    %edx,108(%esp)
143         #   x3 = in3
144         movl    %ebx,112(%esp)
145         #   in4 = j4
146         movl    180(%esp),%eax
147         #   in5 = j5
148         movl    184(%esp),%ecx
149         #   in6 = j6
150         movl    188(%esp),%edx
151         #   in7 = j7
152         movl    192(%esp),%ebx
153         #   x4 = in4
154         movl    %eax,116(%esp)
155         #   x5 = in5
156         movl    %ecx,120(%esp)
157         #   x6 = in6
158         movl    %edx,124(%esp)
159         #   x7 = in7
160         movl    %ebx,128(%esp)
161         #   in8 = j8
162         movl    196(%esp),%eax
163         #   in9 = j9
164         movl    200(%esp),%ecx
165         #   in10 = j10
166         movl    204(%esp),%edx
167         #   in11 = j11
168         movl    208(%esp),%ebx
169         #   x8 = in8
170         movl    %eax,132(%esp)
171         #   x9 = in9
172         movl    %ecx,136(%esp)
173         #   x10 = in10
174         movl    %edx,140(%esp)
175         #   x11 = in11
176         movl    %ebx,144(%esp)
177         #   in12 = j12
178         movl    212(%esp),%eax
179         #   in13 = j13
180         movl    216(%esp),%ecx
181         #   in14 = j14
182         movl    220(%esp),%edx
183         #   in15 = j15
184         movl    224(%esp),%ebx
185         #   x12 = in12
186         movl    %eax,148(%esp)
187         #   x13 = in13
188         movl    %ecx,152(%esp)
189         #   x14 = in14
190         movl    %edx,156(%esp)
191         #   x15 = in15
192         movl    %ebx,160(%esp)
193         #   i = 20
194         mov     $20,%ebp
195         # p = x0
196         movl    100(%esp),%eax
197         # s = x5
198         movl    120(%esp),%ecx
199         # t = x10
200         movl    140(%esp),%edx
201         # w = x15
202         movl    160(%esp),%ebx
203 ._mainloop:
204         # x0 = p
205         movl    %eax,100(%esp)
206         #                               x10 = t
207         movl    %edx,140(%esp)
208         # p += x12
209         addl    148(%esp),%eax
210         #               x5 = s
211         movl    %ecx,120(%esp)
212         #                               t += x6
213         addl    124(%esp),%edx
214         #                                               x15 = w
215         movl    %ebx,160(%esp)
216         #               r = x1
217         movl    104(%esp),%esi
218         #               r += s
219         add     %ecx,%esi
220         #                                               v = x11
221         movl    144(%esp),%edi
222         #                                               v += w
223         add     %ebx,%edi
224         # p <<<= 7
225         rol     $7,%eax
226         # p ^= x4
227         xorl    116(%esp),%eax
228         #                               t <<<= 7
229         rol     $7,%edx
230         #                               t ^= x14
231         xorl    156(%esp),%edx
232         #               r <<<= 7
233         rol     $7,%esi
234         #               r ^= x9
235         xorl    136(%esp),%esi
236         #                                               v <<<= 7
237         rol     $7,%edi
238         #                                               v ^= x3
239         xorl    112(%esp),%edi
240         # x4 = p
241         movl    %eax,116(%esp)
242         #                               x14 = t
243         movl    %edx,156(%esp)
244         # p += x0
245         addl    100(%esp),%eax
246         #               x9 = r
247         movl    %esi,136(%esp)
248         #                               t += x10
249         addl    140(%esp),%edx
250         #                                               x3 = v
251         movl    %edi,112(%esp)
252         # p <<<= 9
253         rol     $9,%eax
254         # p ^= x8
255         xorl    132(%esp),%eax
256         #                               t <<<= 9
257         rol     $9,%edx
258         #                               t ^= x2
259         xorl    108(%esp),%edx
260         #               s += r
261         add     %esi,%ecx
262         #               s <<<= 9
263         rol     $9,%ecx
264         #               s ^= x13
265         xorl    152(%esp),%ecx
266         #                                               w += v
267         add     %edi,%ebx
268         #                                               w <<<= 9
269         rol     $9,%ebx
270         #                                               w ^= x7
271         xorl    128(%esp),%ebx
272         # x8 = p
273         movl    %eax,132(%esp)
274         #                               x2 = t
275         movl    %edx,108(%esp)
276         # p += x4
277         addl    116(%esp),%eax
278         #               x13 = s
279         movl    %ecx,152(%esp)
280         #                               t += x14
281         addl    156(%esp),%edx
282         #                                               x7 = w
283         movl    %ebx,128(%esp)
284         # p <<<= 13
285         rol     $13,%eax
286         # p ^= x12
287         xorl    148(%esp),%eax
288         #                               t <<<= 13
289         rol     $13,%edx
290         #                               t ^= x6
291         xorl    124(%esp),%edx
292         #               r += s
293         add     %ecx,%esi
294         #               r <<<= 13
295         rol     $13,%esi
296         #               r ^= x1
297         xorl    104(%esp),%esi
298         #                                               v += w
299         add     %ebx,%edi
300         #                                               v <<<= 13
301         rol     $13,%edi
302         #                                               v ^= x11
303         xorl    144(%esp),%edi
304         # x12 = p
305         movl    %eax,148(%esp)
306         #                               x6 = t
307         movl    %edx,124(%esp)
308         # p += x8
309         addl    132(%esp),%eax
310         #               x1 = r
311         movl    %esi,104(%esp)
312         #                               t += x2
313         addl    108(%esp),%edx
314         #                                               x11 = v
315         movl    %edi,144(%esp)
316         # p <<<= 18
317         rol     $18,%eax
318         # p ^= x0
319         xorl    100(%esp),%eax
320         #                               t <<<= 18
321         rol     $18,%edx
322         #                               t ^= x10
323         xorl    140(%esp),%edx
324         #               s += r
325         add     %esi,%ecx
326         #               s <<<= 18
327         rol     $18,%ecx
328         #               s ^= x5
329         xorl    120(%esp),%ecx
330         #                                               w += v
331         add     %edi,%ebx
332         #                                               w <<<= 18
333         rol     $18,%ebx
334         #                                               w ^= x15
335         xorl    160(%esp),%ebx
336         # x0 = p
337         movl    %eax,100(%esp)
338         #                               x10 = t
339         movl    %edx,140(%esp)
340         # p += x3
341         addl    112(%esp),%eax
342         # p <<<= 7
343         rol     $7,%eax
344         #               x5 = s
345         movl    %ecx,120(%esp)
346         #                               t += x9
347         addl    136(%esp),%edx
348         #                                               x15 = w
349         movl    %ebx,160(%esp)
350         #               r = x4
351         movl    116(%esp),%esi
352         #               r += s
353         add     %ecx,%esi
354         #                                               v = x14
355         movl    156(%esp),%edi
356         #                                               v += w
357         add     %ebx,%edi
358         # p ^= x1
359         xorl    104(%esp),%eax
360         #                               t <<<= 7
361         rol     $7,%edx
362         #                               t ^= x11
363         xorl    144(%esp),%edx
364         #               r <<<= 7
365         rol     $7,%esi
366         #               r ^= x6
367         xorl    124(%esp),%esi
368         #                                               v <<<= 7
369         rol     $7,%edi
370         #                                               v ^= x12
371         xorl    148(%esp),%edi
372         # x1 = p
373         movl    %eax,104(%esp)
374         #                               x11 = t
375         movl    %edx,144(%esp)
376         # p += x0
377         addl    100(%esp),%eax
378         #               x6 = r
379         movl    %esi,124(%esp)
380         #                               t += x10
381         addl    140(%esp),%edx
382         #                                               x12 = v
383         movl    %edi,148(%esp)
384         # p <<<= 9
385         rol     $9,%eax
386         # p ^= x2
387         xorl    108(%esp),%eax
388         #                               t <<<= 9
389         rol     $9,%edx
390         #                               t ^= x8
391         xorl    132(%esp),%edx
392         #               s += r
393         add     %esi,%ecx
394         #               s <<<= 9
395         rol     $9,%ecx
396         #               s ^= x7
397         xorl    128(%esp),%ecx
398         #                                               w += v
399         add     %edi,%ebx
400         #                                               w <<<= 9
401         rol     $9,%ebx
402         #                                               w ^= x13
403         xorl    152(%esp),%ebx
404         # x2 = p
405         movl    %eax,108(%esp)
406         #                               x8 = t
407         movl    %edx,132(%esp)
408         # p += x1
409         addl    104(%esp),%eax
410         #               x7 = s
411         movl    %ecx,128(%esp)
412         #                               t += x11
413         addl    144(%esp),%edx
414         #                                               x13 = w
415         movl    %ebx,152(%esp)
416         # p <<<= 13
417         rol     $13,%eax
418         # p ^= x3
419         xorl    112(%esp),%eax
420         #                               t <<<= 13
421         rol     $13,%edx
422         #                               t ^= x9
423         xorl    136(%esp),%edx
424         #               r += s
425         add     %ecx,%esi
426         #               r <<<= 13
427         rol     $13,%esi
428         #               r ^= x4
429         xorl    116(%esp),%esi
430         #                                               v += w
431         add     %ebx,%edi
432         #                                               v <<<= 13
433         rol     $13,%edi
434         #                                               v ^= x14
435         xorl    156(%esp),%edi
436         # x3 = p
437         movl    %eax,112(%esp)
438         #                               x9 = t
439         movl    %edx,136(%esp)
440         # p += x2
441         addl    108(%esp),%eax
442         #               x4 = r
443         movl    %esi,116(%esp)
444         #                               t += x8
445         addl    132(%esp),%edx
446         #                                               x14 = v
447         movl    %edi,156(%esp)
448         # p <<<= 18
449         rol     $18,%eax
450         # p ^= x0
451         xorl    100(%esp),%eax
452         #                               t <<<= 18
453         rol     $18,%edx
454         #                               t ^= x10
455         xorl    140(%esp),%edx
456         #               s += r
457         add     %esi,%ecx
458         #               s <<<= 18
459         rol     $18,%ecx
460         #               s ^= x5
461         xorl    120(%esp),%ecx
462         #                                               w += v
463         add     %edi,%ebx
464         #                                               w <<<= 18
465         rol     $18,%ebx
466         #                                               w ^= x15
467         xorl    160(%esp),%ebx
468         # x0 = p
469         movl    %eax,100(%esp)
470         #                               x10 = t
471         movl    %edx,140(%esp)
472         # p += x12
473         addl    148(%esp),%eax
474         #               x5 = s
475         movl    %ecx,120(%esp)
476         #                               t += x6
477         addl    124(%esp),%edx
478         #                                               x15 = w
479         movl    %ebx,160(%esp)
480         #               r = x1
481         movl    104(%esp),%esi
482         #               r += s
483         add     %ecx,%esi
484         #                                               v = x11
485         movl    144(%esp),%edi
486         #                                               v += w
487         add     %ebx,%edi
488         # p <<<= 7
489         rol     $7,%eax
490         # p ^= x4
491         xorl    116(%esp),%eax
492         #                               t <<<= 7
493         rol     $7,%edx
494         #                               t ^= x14
495         xorl    156(%esp),%edx
496         #               r <<<= 7
497         rol     $7,%esi
498         #               r ^= x9
499         xorl    136(%esp),%esi
500         #                                               v <<<= 7
501         rol     $7,%edi
502         #                                               v ^= x3
503         xorl    112(%esp),%edi
504         # x4 = p
505         movl    %eax,116(%esp)
506         #                               x14 = t
507         movl    %edx,156(%esp)
508         # p += x0
509         addl    100(%esp),%eax
510         #               x9 = r
511         movl    %esi,136(%esp)
512         #                               t += x10
513         addl    140(%esp),%edx
514         #                                               x3 = v
515         movl    %edi,112(%esp)
516         # p <<<= 9
517         rol     $9,%eax
518         # p ^= x8
519         xorl    132(%esp),%eax
520         #                               t <<<= 9
521         rol     $9,%edx
522         #                               t ^= x2
523         xorl    108(%esp),%edx
524         #               s += r
525         add     %esi,%ecx
526         #               s <<<= 9
527         rol     $9,%ecx
528         #               s ^= x13
529         xorl    152(%esp),%ecx
530         #                                               w += v
531         add     %edi,%ebx
532         #                                               w <<<= 9
533         rol     $9,%ebx
534         #                                               w ^= x7
535         xorl    128(%esp),%ebx
536         # x8 = p
537         movl    %eax,132(%esp)
538         #                               x2 = t
539         movl    %edx,108(%esp)
540         # p += x4
541         addl    116(%esp),%eax
542         #               x13 = s
543         movl    %ecx,152(%esp)
544         #                               t += x14
545         addl    156(%esp),%edx
546         #                                               x7 = w
547         movl    %ebx,128(%esp)
548         # p <<<= 13
549         rol     $13,%eax
550         # p ^= x12
551         xorl    148(%esp),%eax
552         #                               t <<<= 13
553         rol     $13,%edx
554         #                               t ^= x6
555         xorl    124(%esp),%edx
556         #               r += s
557         add     %ecx,%esi
558         #               r <<<= 13
559         rol     $13,%esi
560         #               r ^= x1
561         xorl    104(%esp),%esi
562         #                                               v += w
563         add     %ebx,%edi
564         #                                               v <<<= 13
565         rol     $13,%edi
566         #                                               v ^= x11
567         xorl    144(%esp),%edi
568         # x12 = p
569         movl    %eax,148(%esp)
570         #                               x6 = t
571         movl    %edx,124(%esp)
572         # p += x8
573         addl    132(%esp),%eax
574         #               x1 = r
575         movl    %esi,104(%esp)
576         #                               t += x2
577         addl    108(%esp),%edx
578         #                                               x11 = v
579         movl    %edi,144(%esp)
580         # p <<<= 18
581         rol     $18,%eax
582         # p ^= x0
583         xorl    100(%esp),%eax
584         #                               t <<<= 18
585         rol     $18,%edx
586         #                               t ^= x10
587         xorl    140(%esp),%edx
588         #               s += r
589         add     %esi,%ecx
590         #               s <<<= 18
591         rol     $18,%ecx
592         #               s ^= x5
593         xorl    120(%esp),%ecx
594         #                                               w += v
595         add     %edi,%ebx
596         #                                               w <<<= 18
597         rol     $18,%ebx
598         #                                               w ^= x15
599         xorl    160(%esp),%ebx
600         # x0 = p
601         movl    %eax,100(%esp)
602         #                               x10 = t
603         movl    %edx,140(%esp)
604         # p += x3
605         addl    112(%esp),%eax
606         # p <<<= 7
607         rol     $7,%eax
608         #               x5 = s
609         movl    %ecx,120(%esp)
610         #                               t += x9
611         addl    136(%esp),%edx
612         #                                               x15 = w
613         movl    %ebx,160(%esp)
614         #               r = x4
615         movl    116(%esp),%esi
616         #               r += s
617         add     %ecx,%esi
618         #                                               v = x14
619         movl    156(%esp),%edi
620         #                                               v += w
621         add     %ebx,%edi
622         # p ^= x1
623         xorl    104(%esp),%eax
624         #                               t <<<= 7
625         rol     $7,%edx
626         #                               t ^= x11
627         xorl    144(%esp),%edx
628         #               r <<<= 7
629         rol     $7,%esi
630         #               r ^= x6
631         xorl    124(%esp),%esi
632         #                                               v <<<= 7
633         rol     $7,%edi
634         #                                               v ^= x12
635         xorl    148(%esp),%edi
636         # x1 = p
637         movl    %eax,104(%esp)
638         #                               x11 = t
639         movl    %edx,144(%esp)
640         # p += x0
641         addl    100(%esp),%eax
642         #               x6 = r
643         movl    %esi,124(%esp)
644         #                               t += x10
645         addl    140(%esp),%edx
646         #                                               x12 = v
647         movl    %edi,148(%esp)
648         # p <<<= 9
649         rol     $9,%eax
650         # p ^= x2
651         xorl    108(%esp),%eax
652         #                               t <<<= 9
653         rol     $9,%edx
654         #                               t ^= x8
655         xorl    132(%esp),%edx
656         #               s += r
657         add     %esi,%ecx
658         #               s <<<= 9
659         rol     $9,%ecx
660         #               s ^= x7
661         xorl    128(%esp),%ecx
662         #                                               w += v
663         add     %edi,%ebx
664         #                                               w <<<= 9
665         rol     $9,%ebx
666         #                                               w ^= x13
667         xorl    152(%esp),%ebx
668         # x2 = p
669         movl    %eax,108(%esp)
670         #                               x8 = t
671         movl    %edx,132(%esp)
672         # p += x1
673         addl    104(%esp),%eax
674         #               x7 = s
675         movl    %ecx,128(%esp)
676         #                               t += x11
677         addl    144(%esp),%edx
678         #                                               x13 = w
679         movl    %ebx,152(%esp)
680         # p <<<= 13
681         rol     $13,%eax
682         # p ^= x3
683         xorl    112(%esp),%eax
684         #                               t <<<= 13
685         rol     $13,%edx
686         #                               t ^= x9
687         xorl    136(%esp),%edx
688         #               r += s
689         add     %ecx,%esi
690         #               r <<<= 13
691         rol     $13,%esi
692         #               r ^= x4
693         xorl    116(%esp),%esi
694         #                                               v += w
695         add     %ebx,%edi
696         #                                               v <<<= 13
697         rol     $13,%edi
698         #                                               v ^= x14
699         xorl    156(%esp),%edi
700         # x3 = p
701         movl    %eax,112(%esp)
702         #                               x9 = t
703         movl    %edx,136(%esp)
704         # p += x2
705         addl    108(%esp),%eax
706         #               x4 = r
707         movl    %esi,116(%esp)
708         #                               t += x8
709         addl    132(%esp),%edx
710         #                                               x14 = v
711         movl    %edi,156(%esp)
712         # p <<<= 18
713         rol     $18,%eax
714         # p ^= x0
715         xorl    100(%esp),%eax
716         #                               t <<<= 18
717         rol     $18,%edx
718         #                               t ^= x10
719         xorl    140(%esp),%edx
720         #               s += r
721         add     %esi,%ecx
722         #               s <<<= 18
723         rol     $18,%ecx
724         #               s ^= x5
725         xorl    120(%esp),%ecx
726         #                                               w += v
727         add     %edi,%ebx
728         #                                               w <<<= 18
729         rol     $18,%ebx
730         #                                               w ^= x15
731         xorl    160(%esp),%ebx
732         # i -= 4
733         sub     $4,%ebp
734         # goto mainloop if unsigned >
735         ja      ._mainloop
736         # x0 = p
737         movl    %eax,100(%esp)
738         # x5 = s
739         movl    %ecx,120(%esp)
740         # x10 = t
741         movl    %edx,140(%esp)
742         # x15 = w
743         movl    %ebx,160(%esp)
744         #   out = out_backup
745         movl    72(%esp),%edi
746         #   m = m_backup
747         movl    68(%esp),%esi
748         #   in0 = x0
749         movl    100(%esp),%eax
750         #   in1 = x1
751         movl    104(%esp),%ecx
752         #   in0 += j0
753         addl    164(%esp),%eax
754         #   in1 += j1
755         addl    168(%esp),%ecx
756         #   in0 ^= *(uint32 *) (m + 0)
757         xorl    0(%esi),%eax
758         #   in1 ^= *(uint32 *) (m + 4)
759         xorl    4(%esi),%ecx
760         #   *(uint32 *) (out + 0) = in0
761         movl    %eax,0(%edi)
762         #   *(uint32 *) (out + 4) = in1
763         movl    %ecx,4(%edi)
764         #   in2 = x2
765         movl    108(%esp),%eax
766         #   in3 = x3
767         movl    112(%esp),%ecx
768         #   in2 += j2
769         addl    172(%esp),%eax
770         #   in3 += j3
771         addl    176(%esp),%ecx
772         #   in2 ^= *(uint32 *) (m + 8)
773         xorl    8(%esi),%eax
774         #   in3 ^= *(uint32 *) (m + 12)
775         xorl    12(%esi),%ecx
776         #   *(uint32 *) (out + 8) = in2
777         movl    %eax,8(%edi)
778         #   *(uint32 *) (out + 12) = in3
779         movl    %ecx,12(%edi)
780         #   in4 = x4
781         movl    116(%esp),%eax
782         #   in5 = x5
783         movl    120(%esp),%ecx
784         #   in4 += j4
785         addl    180(%esp),%eax
786         #   in5 += j5
787         addl    184(%esp),%ecx
788         #   in4 ^= *(uint32 *) (m + 16)
789         xorl    16(%esi),%eax
790         #   in5 ^= *(uint32 *) (m + 20)
791         xorl    20(%esi),%ecx
792         #   *(uint32 *) (out + 16) = in4
793         movl    %eax,16(%edi)
794         #   *(uint32 *) (out + 20) = in5
795         movl    %ecx,20(%edi)
796         #   in6 = x6
797         movl    124(%esp),%eax
798         #   in7 = x7
799         movl    128(%esp),%ecx
800         #   in6 += j6
801         addl    188(%esp),%eax
802         #   in7 += j7
803         addl    192(%esp),%ecx
804         #   in6 ^= *(uint32 *) (m + 24)
805         xorl    24(%esi),%eax
806         #   in7 ^= *(uint32 *) (m + 28)
807         xorl    28(%esi),%ecx
808         #   *(uint32 *) (out + 24) = in6
809         movl    %eax,24(%edi)
810         #   *(uint32 *) (out + 28) = in7
811         movl    %ecx,28(%edi)
812         #   in8 = x8
813         movl    132(%esp),%eax
814         #   in9 = x9
815         movl    136(%esp),%ecx
816         #   in8 += j8
817         addl    196(%esp),%eax
818         #   in9 += j9
819         addl    200(%esp),%ecx
820         #   in8 ^= *(uint32 *) (m + 32)
821         xorl    32(%esi),%eax
822         #   in9 ^= *(uint32 *) (m + 36)
823         xorl    36(%esi),%ecx
824         #   *(uint32 *) (out + 32) = in8
825         movl    %eax,32(%edi)
826         #   *(uint32 *) (out + 36) = in9
827         movl    %ecx,36(%edi)
828         #   in10 = x10
829         movl    140(%esp),%eax
830         #   in11 = x11
831         movl    144(%esp),%ecx
832         #   in10 += j10
833         addl    204(%esp),%eax
834         #   in11 += j11
835         addl    208(%esp),%ecx
836         #   in10 ^= *(uint32 *) (m + 40)
837         xorl    40(%esi),%eax
838         #   in11 ^= *(uint32 *) (m + 44)
839         xorl    44(%esi),%ecx
840         #   *(uint32 *) (out + 40) = in10
841         movl    %eax,40(%edi)
842         #   *(uint32 *) (out + 44) = in11
843         movl    %ecx,44(%edi)
844         #   in12 = x12
845         movl    148(%esp),%eax
846         #   in13 = x13
847         movl    152(%esp),%ecx
848         #   in12 += j12
849         addl    212(%esp),%eax
850         #   in13 += j13
851         addl    216(%esp),%ecx
852         #   in12 ^= *(uint32 *) (m + 48)
853         xorl    48(%esi),%eax
854         #   in13 ^= *(uint32 *) (m + 52)
855         xorl    52(%esi),%ecx
856         #   *(uint32 *) (out + 48) = in12
857         movl    %eax,48(%edi)
858         #   *(uint32 *) (out + 52) = in13
859         movl    %ecx,52(%edi)
860         #   in14 = x14
861         movl    156(%esp),%eax
862         #   in15 = x15
863         movl    160(%esp),%ecx
864         #   in14 += j14
865         addl    220(%esp),%eax
866         #   in15 += j15
867         addl    224(%esp),%ecx
868         #   in14 ^= *(uint32 *) (m + 56)
869         xorl    56(%esi),%eax
870         #   in15 ^= *(uint32 *) (m + 60)
871         xorl    60(%esi),%ecx
872         #   *(uint32 *) (out + 56) = in14
873         movl    %eax,56(%edi)
874         #   *(uint32 *) (out + 60) = in15
875         movl    %ecx,60(%edi)
876         #   bytes = bytes_backup
877         movl    76(%esp),%ebx
878         #   in8 = j8
879         movl    196(%esp),%eax
880         #   in9 = j9
881         movl    200(%esp),%ecx
882         #   in8 += 1
883         add     $1,%eax
884         #   in9 += 0 + carry
885         adc     $0,%ecx
886         #   j8 = in8
887         movl    %eax,196(%esp)
888         #   j9 = in9
889         movl    %ecx,200(%esp)
890         #   bytes - 64
891         cmp     $64,%ebx
892         #   goto bytesatleast65 if unsigned>
893         ja      ._bytesatleast65
894         #     goto bytesatleast64 if unsigned>=
895         jae     ._bytesatleast64
896         #       m = out
897         mov     %edi,%esi
898         #       out = ctarget
899         movl    228(%esp),%edi
900         #       i = bytes
901         mov     %ebx,%ecx
902         #       while (i) { *out++ = *m++; --i }
903         rep     movsb
904 ._bytesatleast64:
905         #     x = x_backup
906         movl    64(%esp),%eax
907         #     in8 = j8
908         movl    196(%esp),%ecx
909         #     in9 = j9
910         movl    200(%esp),%edx
911         #     *(uint32 *) (x + 32) = in8
912         movl    %ecx,32(%eax)
913         #     *(uint32 *) (x + 36) = in9
914         movl    %edx,36(%eax)
915 ._done:
916         #     eax = eax_stack
917         movl    80(%esp),%eax
918         #     ebx = ebx_stack
919         movl    84(%esp),%ebx
920         #     esi = esi_stack
921         movl    88(%esp),%esi
922         #     edi = edi_stack
923         movl    92(%esp),%edi
924         #     ebp = ebp_stack
925         movl    96(%esp),%ebp
926         #     leave
927         add     %eax,%esp
928         ret
929 ._bytesatleast65:
930         #   bytes -= 64
931         sub     $64,%ebx
932         #   out += 64
933         add     $64,%edi
934         #   m += 64
935         add     $64,%esi
936         # goto bytesatleast1
937         jmp     ._bytesatleast1
938 ENDPROC(salsa20_encrypt_bytes)