2 * x86-64 optimized assembler MD5 implementation
4 * Author: Marc Bevand, 2004
6 * This code was placed in the public domain by the author. The original
7 * publication can be found at:
9 * https://www.zorinaq.com/papers/md5-amd64.html
12 * No modifications were made aside from changing the function and file names.
13 * The MD5_CTX structure as expected here (from OpenSSL) is binary compatible
14 * with the md_context used by rsync, for the fields accessed.
16 * Benchmarks (in MB/s) C ASM
17 * - Intel Atom D2700 302 334
18 * - Intel i7-7700hq 351 376
19 * - AMD ThreadRipper 2950x 728 784
21 * The original code was also incorporated into OpenSSL. It has since been
22 * modified there. Those changes have not been made here due to licensing
23 * incompatibilities. Benchmarks of those changes on the above CPUs did not
24 * show any significant difference in performance, though.
28 #include "md-defines.h"
30 #if !defined USE_OPENSSL && CSUM_CHUNK == 64
35 #ifndef __apple_build_version__
36 .globl md5_process_asm
37 .type md5_process_asm,@function
40 .globl _md5_process_asm
46 push %r13 # not really useful (r13 is unused)
50 # rdi = arg #1 (ctx, MD5_CTX pointer)
51 # rsi = arg #2 (ptr, data pointer)
52 # rdx = arg #3 (nbr, number of 16-word blocks to process)
53 mov %rdi, %rbp # rbp = ctx
54 shl $6, %rdx # rdx = nbr in bytes
55 lea (%rsi,%rdx), %rdi # rdi = end
56 mov 0*4(%rbp), %eax # eax = ctx->A
57 mov 1*4(%rbp), %ebx # ebx = ctx->B
58 mov 2*4(%rbp), %ecx # ecx = ctx->C
59 mov 3*4(%rbp), %edx # edx = ctx->D
67 cmp %rdi, %rsi # cmp end with ptr
68 je 1f # jmp if ptr == end
70 # BEGIN of loop over 16-word blocks
71 2: # save old values of A, B, C, D
76 mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */
77 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
78 xor %ecx, %r11d /* y ^ ... */
79 lea -680876936(%eax,%r10d),%eax /* Const + dst + ... */
80 and %ebx, %r11d /* x & ... */
81 xor %edx, %r11d /* z ^ ... */
82 mov 1*4(%rsi),%r10d /* (NEXT STEP) X[1] */
83 add %r11d, %eax /* dst += ... */
84 rol $7, %eax /* dst <<< s */
85 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
86 add %ebx, %eax /* dst += x */
87 xor %ebx, %r11d /* y ^ ... */
88 lea -389564586(%edx,%r10d),%edx /* Const + dst + ... */
89 and %eax, %r11d /* x & ... */
90 xor %ecx, %r11d /* z ^ ... */
91 mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
92 add %r11d, %edx /* dst += ... */
93 rol $12, %edx /* dst <<< s */
94 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
95 add %eax, %edx /* dst += x */
96 xor %eax, %r11d /* y ^ ... */
97 lea 606105819(%ecx,%r10d),%ecx /* Const + dst + ... */
98 and %edx, %r11d /* x & ... */
99 xor %ebx, %r11d /* z ^ ... */
100 mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
101 add %r11d, %ecx /* dst += ... */
102 rol $17, %ecx /* dst <<< s */
103 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
104 add %edx, %ecx /* dst += x */
105 xor %edx, %r11d /* y ^ ... */
106 lea -1044525330(%ebx,%r10d),%ebx /* Const + dst + ... */
107 and %ecx, %r11d /* x & ... */
108 xor %eax, %r11d /* z ^ ... */
109 mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
110 add %r11d, %ebx /* dst += ... */
111 rol $22, %ebx /* dst <<< s */
112 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
113 add %ecx, %ebx /* dst += x */
114 xor %ecx, %r11d /* y ^ ... */
115 lea -176418897(%eax,%r10d),%eax /* Const + dst + ... */
116 and %ebx, %r11d /* x & ... */
117 xor %edx, %r11d /* z ^ ... */
118 mov 5*4(%rsi),%r10d /* (NEXT STEP) X[5] */
119 add %r11d, %eax /* dst += ... */
120 rol $7, %eax /* dst <<< s */
121 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
122 add %ebx, %eax /* dst += x */
123 xor %ebx, %r11d /* y ^ ... */
124 lea 1200080426(%edx,%r10d),%edx /* Const + dst + ... */
125 and %eax, %r11d /* x & ... */
126 xor %ecx, %r11d /* z ^ ... */
127 mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
128 add %r11d, %edx /* dst += ... */
129 rol $12, %edx /* dst <<< s */
130 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
131 add %eax, %edx /* dst += x */
132 xor %eax, %r11d /* y ^ ... */
133 lea -1473231341(%ecx,%r10d),%ecx /* Const + dst + ... */
134 and %edx, %r11d /* x & ... */
135 xor %ebx, %r11d /* z ^ ... */
136 mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
137 add %r11d, %ecx /* dst += ... */
138 rol $17, %ecx /* dst <<< s */
139 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
140 add %edx, %ecx /* dst += x */
141 xor %edx, %r11d /* y ^ ... */
142 lea -45705983(%ebx,%r10d),%ebx /* Const + dst + ... */
143 and %ecx, %r11d /* x & ... */
144 xor %eax, %r11d /* z ^ ... */
145 mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
146 add %r11d, %ebx /* dst += ... */
147 rol $22, %ebx /* dst <<< s */
148 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
149 add %ecx, %ebx /* dst += x */
150 xor %ecx, %r11d /* y ^ ... */
151 lea 1770035416(%eax,%r10d),%eax /* Const + dst + ... */
152 and %ebx, %r11d /* x & ... */
153 xor %edx, %r11d /* z ^ ... */
154 mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
155 add %r11d, %eax /* dst += ... */
156 rol $7, %eax /* dst <<< s */
157 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
158 add %ebx, %eax /* dst += x */
159 xor %ebx, %r11d /* y ^ ... */
160 lea -1958414417(%edx,%r10d),%edx /* Const + dst + ... */
161 and %eax, %r11d /* x & ... */
162 xor %ecx, %r11d /* z ^ ... */
163 mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
164 add %r11d, %edx /* dst += ... */
165 rol $12, %edx /* dst <<< s */
166 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
167 add %eax, %edx /* dst += x */
168 xor %eax, %r11d /* y ^ ... */
169 lea -42063(%ecx,%r10d),%ecx /* Const + dst + ... */
170 and %edx, %r11d /* x & ... */
171 xor %ebx, %r11d /* z ^ ... */
172 mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
173 add %r11d, %ecx /* dst += ... */
174 rol $17, %ecx /* dst <<< s */
175 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
176 add %edx, %ecx /* dst += x */
177 xor %edx, %r11d /* y ^ ... */
178 lea -1990404162(%ebx,%r10d),%ebx /* Const + dst + ... */
179 and %ecx, %r11d /* x & ... */
180 xor %eax, %r11d /* z ^ ... */
181 mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
182 add %r11d, %ebx /* dst += ... */
183 rol $22, %ebx /* dst <<< s */
184 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
185 add %ecx, %ebx /* dst += x */
186 xor %ecx, %r11d /* y ^ ... */
187 lea 1804603682(%eax,%r10d),%eax /* Const + dst + ... */
188 and %ebx, %r11d /* x & ... */
189 xor %edx, %r11d /* z ^ ... */
190 mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
191 add %r11d, %eax /* dst += ... */
192 rol $7, %eax /* dst <<< s */
193 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
194 add %ebx, %eax /* dst += x */
195 xor %ebx, %r11d /* y ^ ... */
196 lea -40341101(%edx,%r10d),%edx /* Const + dst + ... */
197 and %eax, %r11d /* x & ... */
198 xor %ecx, %r11d /* z ^ ... */
199 mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
200 add %r11d, %edx /* dst += ... */
201 rol $12, %edx /* dst <<< s */
202 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
203 add %eax, %edx /* dst += x */
204 xor %eax, %r11d /* y ^ ... */
205 lea -1502002290(%ecx,%r10d),%ecx /* Const + dst + ... */
206 and %edx, %r11d /* x & ... */
207 xor %ebx, %r11d /* z ^ ... */
208 mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
209 add %r11d, %ecx /* dst += ... */
210 rol $17, %ecx /* dst <<< s */
211 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
212 add %edx, %ecx /* dst += x */
213 xor %edx, %r11d /* y ^ ... */
214 lea 1236535329(%ebx,%r10d),%ebx /* Const + dst + ... */
215 and %ecx, %r11d /* x & ... */
216 xor %eax, %r11d /* z ^ ... */
217 mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
218 add %r11d, %ebx /* dst += ... */
219 rol $22, %ebx /* dst <<< s */
220 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
221 add %ecx, %ebx /* dst += x */
222 mov 1*4(%rsi), %r10d /* (NEXT STEP) X[1] */
223 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
224 mov %edx, %r12d /* (NEXT STEP) z' = %edx */
225 not %r11d /* not z */
226 lea -165796510(%eax,%r10d),%eax /* Const + dst + ... */
227 and %ebx, %r12d /* x & z */
228 and %ecx, %r11d /* y & (not z) */
229 mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
230 or %r11d, %r12d /* (y & (not z)) | (x & z) */
231 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
232 add %r12d, %eax /* dst += ... */
233 mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
234 rol $5, %eax /* dst <<< s */
235 add %ebx, %eax /* dst += x */
236 not %r11d /* not z */
237 lea -1069501632(%edx,%r10d),%edx /* Const + dst + ... */
238 and %eax, %r12d /* x & z */
239 and %ebx, %r11d /* y & (not z) */
240 mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
241 or %r11d, %r12d /* (y & (not z)) | (x & z) */
242 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
243 add %r12d, %edx /* dst += ... */
244 mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
245 rol $9, %edx /* dst <<< s */
246 add %eax, %edx /* dst += x */
247 not %r11d /* not z */
248 lea 643717713(%ecx,%r10d),%ecx /* Const + dst + ... */
249 and %edx, %r12d /* x & z */
250 and %eax, %r11d /* y & (not z) */
251 mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
252 or %r11d, %r12d /* (y & (not z)) | (x & z) */
253 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
254 add %r12d, %ecx /* dst += ... */
255 mov %eax, %r12d /* (NEXT STEP) z' = %eax */
256 rol $14, %ecx /* dst <<< s */
257 add %edx, %ecx /* dst += x */
258 not %r11d /* not z */
259 lea -373897302(%ebx,%r10d),%ebx /* Const + dst + ... */
260 and %ecx, %r12d /* x & z */
261 and %edx, %r11d /* y & (not z) */
262 mov 5*4(%rsi),%r10d /* (NEXT STEP) X[5] */
263 or %r11d, %r12d /* (y & (not z)) | (x & z) */
264 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
265 add %r12d, %ebx /* dst += ... */
266 mov %edx, %r12d /* (NEXT STEP) z' = %edx */
267 rol $20, %ebx /* dst <<< s */
268 add %ecx, %ebx /* dst += x */
269 not %r11d /* not z */
270 lea -701558691(%eax,%r10d),%eax /* Const + dst + ... */
271 and %ebx, %r12d /* x & z */
272 and %ecx, %r11d /* y & (not z) */
273 mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
274 or %r11d, %r12d /* (y & (not z)) | (x & z) */
275 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
276 add %r12d, %eax /* dst += ... */
277 mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
278 rol $5, %eax /* dst <<< s */
279 add %ebx, %eax /* dst += x */
280 not %r11d /* not z */
281 lea 38016083(%edx,%r10d),%edx /* Const + dst + ... */
282 and %eax, %r12d /* x & z */
283 and %ebx, %r11d /* y & (not z) */
284 mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
285 or %r11d, %r12d /* (y & (not z)) | (x & z) */
286 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
287 add %r12d, %edx /* dst += ... */
288 mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
289 rol $9, %edx /* dst <<< s */
290 add %eax, %edx /* dst += x */
291 not %r11d /* not z */
292 lea -660478335(%ecx,%r10d),%ecx /* Const + dst + ... */
293 and %edx, %r12d /* x & z */
294 and %eax, %r11d /* y & (not z) */
295 mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
296 or %r11d, %r12d /* (y & (not z)) | (x & z) */
297 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
298 add %r12d, %ecx /* dst += ... */
299 mov %eax, %r12d /* (NEXT STEP) z' = %eax */
300 rol $14, %ecx /* dst <<< s */
301 add %edx, %ecx /* dst += x */
302 not %r11d /* not z */
303 lea -405537848(%ebx,%r10d),%ebx /* Const + dst + ... */
304 and %ecx, %r12d /* x & z */
305 and %edx, %r11d /* y & (not z) */
306 mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
307 or %r11d, %r12d /* (y & (not z)) | (x & z) */
308 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
309 add %r12d, %ebx /* dst += ... */
310 mov %edx, %r12d /* (NEXT STEP) z' = %edx */
311 rol $20, %ebx /* dst <<< s */
312 add %ecx, %ebx /* dst += x */
313 not %r11d /* not z */
314 lea 568446438(%eax,%r10d),%eax /* Const + dst + ... */
315 and %ebx, %r12d /* x & z */
316 and %ecx, %r11d /* y & (not z) */
317 mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
318 or %r11d, %r12d /* (y & (not z)) | (x & z) */
319 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
320 add %r12d, %eax /* dst += ... */
321 mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
322 rol $5, %eax /* dst <<< s */
323 add %ebx, %eax /* dst += x */
324 not %r11d /* not z */
325 lea -1019803690(%edx,%r10d),%edx /* Const + dst + ... */
326 and %eax, %r12d /* x & z */
327 and %ebx, %r11d /* y & (not z) */
328 mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
329 or %r11d, %r12d /* (y & (not z)) | (x & z) */
330 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
331 add %r12d, %edx /* dst += ... */
332 mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
333 rol $9, %edx /* dst <<< s */
334 add %eax, %edx /* dst += x */
335 not %r11d /* not z */
336 lea -187363961(%ecx,%r10d),%ecx /* Const + dst + ... */
337 and %edx, %r12d /* x & z */
338 and %eax, %r11d /* y & (not z) */
339 mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
340 or %r11d, %r12d /* (y & (not z)) | (x & z) */
341 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
342 add %r12d, %ecx /* dst += ... */
343 mov %eax, %r12d /* (NEXT STEP) z' = %eax */
344 rol $14, %ecx /* dst <<< s */
345 add %edx, %ecx /* dst += x */
346 not %r11d /* not z */
347 lea 1163531501(%ebx,%r10d),%ebx /* Const + dst + ... */
348 and %ecx, %r12d /* x & z */
349 and %edx, %r11d /* y & (not z) */
350 mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
351 or %r11d, %r12d /* (y & (not z)) | (x & z) */
352 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
353 add %r12d, %ebx /* dst += ... */
354 mov %edx, %r12d /* (NEXT STEP) z' = %edx */
355 rol $20, %ebx /* dst <<< s */
356 add %ecx, %ebx /* dst += x */
357 not %r11d /* not z */
358 lea -1444681467(%eax,%r10d),%eax /* Const + dst + ... */
359 and %ebx, %r12d /* x & z */
360 and %ecx, %r11d /* y & (not z) */
361 mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
362 or %r11d, %r12d /* (y & (not z)) | (x & z) */
363 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
364 add %r12d, %eax /* dst += ... */
365 mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
366 rol $5, %eax /* dst <<< s */
367 add %ebx, %eax /* dst += x */
368 not %r11d /* not z */
369 lea -51403784(%edx,%r10d),%edx /* Const + dst + ... */
370 and %eax, %r12d /* x & z */
371 and %ebx, %r11d /* y & (not z) */
372 mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
373 or %r11d, %r12d /* (y & (not z)) | (x & z) */
374 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
375 add %r12d, %edx /* dst += ... */
376 mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
377 rol $9, %edx /* dst <<< s */
378 add %eax, %edx /* dst += x */
379 not %r11d /* not z */
380 lea 1735328473(%ecx,%r10d),%ecx /* Const + dst + ... */
381 and %edx, %r12d /* x & z */
382 and %eax, %r11d /* y & (not z) */
383 mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
384 or %r11d, %r12d /* (y & (not z)) | (x & z) */
385 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
386 add %r12d, %ecx /* dst += ... */
387 mov %eax, %r12d /* (NEXT STEP) z' = %eax */
388 rol $14, %ecx /* dst <<< s */
389 add %edx, %ecx /* dst += x */
390 not %r11d /* not z */
391 lea -1926607734(%ebx,%r10d),%ebx /* Const + dst + ... */
392 and %ecx, %r12d /* x & z */
393 and %edx, %r11d /* y & (not z) */
394 mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
395 or %r11d, %r12d /* (y & (not z)) | (x & z) */
396 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
397 add %r12d, %ebx /* dst += ... */
398 mov %edx, %r12d /* (NEXT STEP) z' = %edx */
399 rol $20, %ebx /* dst <<< s */
400 add %ecx, %ebx /* dst += x */
401 mov 5*4(%rsi), %r10d /* (NEXT STEP) X[5] */
402 mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
403 lea -378558(%eax,%r10d),%eax /* Const + dst + ... */
404 mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
405 xor %edx, %r11d /* z ^ ... */
406 xor %ebx, %r11d /* x ^ ... */
407 add %r11d, %eax /* dst += ... */
408 rol $4, %eax /* dst <<< s */
409 mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
410 add %ebx, %eax /* dst += x */
411 lea -2022574463(%edx,%r10d),%edx /* Const + dst + ... */
412 mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
413 xor %ecx, %r11d /* z ^ ... */
414 xor %eax, %r11d /* x ^ ... */
415 add %r11d, %edx /* dst += ... */
416 rol $11, %edx /* dst <<< s */
417 mov %eax, %r11d /* (NEXT STEP) y' = %eax */
418 add %eax, %edx /* dst += x */
419 lea 1839030562(%ecx,%r10d),%ecx /* Const + dst + ... */
420 mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
421 xor %ebx, %r11d /* z ^ ... */
422 xor %edx, %r11d /* x ^ ... */
423 add %r11d, %ecx /* dst += ... */
424 rol $16, %ecx /* dst <<< s */
425 mov %edx, %r11d /* (NEXT STEP) y' = %edx */
426 add %edx, %ecx /* dst += x */
427 lea -35309556(%ebx,%r10d),%ebx /* Const + dst + ... */
428 mov 1*4(%rsi),%r10d /* (NEXT STEP) X[1] */
429 xor %eax, %r11d /* z ^ ... */
430 xor %ecx, %r11d /* x ^ ... */
431 add %r11d, %ebx /* dst += ... */
432 rol $23, %ebx /* dst <<< s */
433 mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
434 add %ecx, %ebx /* dst += x */
435 lea -1530992060(%eax,%r10d),%eax /* Const + dst + ... */
436 mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
437 xor %edx, %r11d /* z ^ ... */
438 xor %ebx, %r11d /* x ^ ... */
439 add %r11d, %eax /* dst += ... */
440 rol $4, %eax /* dst <<< s */
441 mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
442 add %ebx, %eax /* dst += x */
443 lea 1272893353(%edx,%r10d),%edx /* Const + dst + ... */
444 mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
445 xor %ecx, %r11d /* z ^ ... */
446 xor %eax, %r11d /* x ^ ... */
447 add %r11d, %edx /* dst += ... */
448 rol $11, %edx /* dst <<< s */
449 mov %eax, %r11d /* (NEXT STEP) y' = %eax */
450 add %eax, %edx /* dst += x */
451 lea -155497632(%ecx,%r10d),%ecx /* Const + dst + ... */
452 mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
453 xor %ebx, %r11d /* z ^ ... */
454 xor %edx, %r11d /* x ^ ... */
455 add %r11d, %ecx /* dst += ... */
456 rol $16, %ecx /* dst <<< s */
457 mov %edx, %r11d /* (NEXT STEP) y' = %edx */
458 add %edx, %ecx /* dst += x */
459 lea -1094730640(%ebx,%r10d),%ebx /* Const + dst + ... */
460 mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
461 xor %eax, %r11d /* z ^ ... */
462 xor %ecx, %r11d /* x ^ ... */
463 add %r11d, %ebx /* dst += ... */
464 rol $23, %ebx /* dst <<< s */
465 mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
466 add %ecx, %ebx /* dst += x */
467 lea 681279174(%eax,%r10d),%eax /* Const + dst + ... */
468 mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
469 xor %edx, %r11d /* z ^ ... */
470 xor %ebx, %r11d /* x ^ ... */
471 add %r11d, %eax /* dst += ... */
472 rol $4, %eax /* dst <<< s */
473 mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
474 add %ebx, %eax /* dst += x */
475 lea -358537222(%edx,%r10d),%edx /* Const + dst + ... */
476 mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
477 xor %ecx, %r11d /* z ^ ... */
478 xor %eax, %r11d /* x ^ ... */
479 add %r11d, %edx /* dst += ... */
480 rol $11, %edx /* dst <<< s */
481 mov %eax, %r11d /* (NEXT STEP) y' = %eax */
482 add %eax, %edx /* dst += x */
483 lea -722521979(%ecx,%r10d),%ecx /* Const + dst + ... */
484 mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
485 xor %ebx, %r11d /* z ^ ... */
486 xor %edx, %r11d /* x ^ ... */
487 add %r11d, %ecx /* dst += ... */
488 rol $16, %ecx /* dst <<< s */
489 mov %edx, %r11d /* (NEXT STEP) y' = %edx */
490 add %edx, %ecx /* dst += x */
491 lea 76029189(%ebx,%r10d),%ebx /* Const + dst + ... */
492 mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
493 xor %eax, %r11d /* z ^ ... */
494 xor %ecx, %r11d /* x ^ ... */
495 add %r11d, %ebx /* dst += ... */
496 rol $23, %ebx /* dst <<< s */
497 mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
498 add %ecx, %ebx /* dst += x */
499 lea -640364487(%eax,%r10d),%eax /* Const + dst + ... */
500 mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
501 xor %edx, %r11d /* z ^ ... */
502 xor %ebx, %r11d /* x ^ ... */
503 add %r11d, %eax /* dst += ... */
504 rol $4, %eax /* dst <<< s */
505 mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
506 add %ebx, %eax /* dst += x */
507 lea -421815835(%edx,%r10d),%edx /* Const + dst + ... */
508 mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
509 xor %ecx, %r11d /* z ^ ... */
510 xor %eax, %r11d /* x ^ ... */
511 add %r11d, %edx /* dst += ... */
512 rol $11, %edx /* dst <<< s */
513 mov %eax, %r11d /* (NEXT STEP) y' = %eax */
514 add %eax, %edx /* dst += x */
515 lea 530742520(%ecx,%r10d),%ecx /* Const + dst + ... */
516 mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
517 xor %ebx, %r11d /* z ^ ... */
518 xor %edx, %r11d /* x ^ ... */
519 add %r11d, %ecx /* dst += ... */
520 rol $16, %ecx /* dst <<< s */
521 mov %edx, %r11d /* (NEXT STEP) y' = %edx */
522 add %edx, %ecx /* dst += x */
523 lea -995338651(%ebx,%r10d),%ebx /* Const + dst + ... */
524 mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
525 xor %eax, %r11d /* z ^ ... */
526 xor %ecx, %r11d /* x ^ ... */
527 add %r11d, %ebx /* dst += ... */
528 rol $23, %ebx /* dst <<< s */
529 mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
530 add %ecx, %ebx /* dst += x */
531 mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */
532 mov $0xffffffff, %r11d
533 xor %edx, %r11d /* (NEXT STEP) not z' = not %edx*/
534 lea -198630844(%eax,%r10d),%eax /* Const + dst + ... */
535 or %ebx, %r11d /* x | ... */
536 xor %ecx, %r11d /* y ^ ... */
537 add %r11d, %eax /* dst += ... */
538 mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
539 mov $0xffffffff, %r11d
540 rol $6, %eax /* dst <<< s */
541 xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
542 add %ebx, %eax /* dst += x */
543 lea 1126891415(%edx,%r10d),%edx /* Const + dst + ... */
544 or %eax, %r11d /* x | ... */
545 xor %ebx, %r11d /* y ^ ... */
546 add %r11d, %edx /* dst += ... */
547 mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
548 mov $0xffffffff, %r11d
549 rol $10, %edx /* dst <<< s */
550 xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
551 add %eax, %edx /* dst += x */
552 lea -1416354905(%ecx,%r10d),%ecx /* Const + dst + ... */
553 or %edx, %r11d /* x | ... */
554 xor %eax, %r11d /* y ^ ... */
555 add %r11d, %ecx /* dst += ... */
556 mov 5*4(%rsi),%r10d /* (NEXT STEP) X[5] */
557 mov $0xffffffff, %r11d
558 rol $15, %ecx /* dst <<< s */
559 xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
560 add %edx, %ecx /* dst += x */
561 lea -57434055(%ebx,%r10d),%ebx /* Const + dst + ... */
562 or %ecx, %r11d /* x | ... */
563 xor %edx, %r11d /* y ^ ... */
564 add %r11d, %ebx /* dst += ... */
565 mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
566 mov $0xffffffff, %r11d
567 rol $21, %ebx /* dst <<< s */
568 xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
569 add %ecx, %ebx /* dst += x */
570 lea 1700485571(%eax,%r10d),%eax /* Const + dst + ... */
571 or %ebx, %r11d /* x | ... */
572 xor %ecx, %r11d /* y ^ ... */
573 add %r11d, %eax /* dst += ... */
574 mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
575 mov $0xffffffff, %r11d
576 rol $6, %eax /* dst <<< s */
577 xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
578 add %ebx, %eax /* dst += x */
579 lea -1894986606(%edx,%r10d),%edx /* Const + dst + ... */
580 or %eax, %r11d /* x | ... */
581 xor %ebx, %r11d /* y ^ ... */
582 add %r11d, %edx /* dst += ... */
583 mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
584 mov $0xffffffff, %r11d
585 rol $10, %edx /* dst <<< s */
586 xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
587 add %eax, %edx /* dst += x */
588 lea -1051523(%ecx,%r10d),%ecx /* Const + dst + ... */
589 or %edx, %r11d /* x | ... */
590 xor %eax, %r11d /* y ^ ... */
591 add %r11d, %ecx /* dst += ... */
592 mov 1*4(%rsi),%r10d /* (NEXT STEP) X[1] */
593 mov $0xffffffff, %r11d
594 rol $15, %ecx /* dst <<< s */
595 xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
596 add %edx, %ecx /* dst += x */
597 lea -2054922799(%ebx,%r10d),%ebx /* Const + dst + ... */
598 or %ecx, %r11d /* x | ... */
599 xor %edx, %r11d /* y ^ ... */
600 add %r11d, %ebx /* dst += ... */
601 mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
602 mov $0xffffffff, %r11d
603 rol $21, %ebx /* dst <<< s */
604 xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
605 add %ecx, %ebx /* dst += x */
606 lea 1873313359(%eax,%r10d),%eax /* Const + dst + ... */
607 or %ebx, %r11d /* x | ... */
608 xor %ecx, %r11d /* y ^ ... */
609 add %r11d, %eax /* dst += ... */
610 mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
611 mov $0xffffffff, %r11d
612 rol $6, %eax /* dst <<< s */
613 xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
614 add %ebx, %eax /* dst += x */
615 lea -30611744(%edx,%r10d),%edx /* Const + dst + ... */
616 or %eax, %r11d /* x | ... */
617 xor %ebx, %r11d /* y ^ ... */
618 add %r11d, %edx /* dst += ... */
619 mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
620 mov $0xffffffff, %r11d
621 rol $10, %edx /* dst <<< s */
622 xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
623 add %eax, %edx /* dst += x */
624 lea -1560198380(%ecx,%r10d),%ecx /* Const + dst + ... */
625 or %edx, %r11d /* x | ... */
626 xor %eax, %r11d /* y ^ ... */
627 add %r11d, %ecx /* dst += ... */
628 mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
629 mov $0xffffffff, %r11d
630 rol $15, %ecx /* dst <<< s */
631 xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
632 add %edx, %ecx /* dst += x */
633 lea 1309151649(%ebx,%r10d),%ebx /* Const + dst + ... */
634 or %ecx, %r11d /* x | ... */
635 xor %edx, %r11d /* y ^ ... */
636 add %r11d, %ebx /* dst += ... */
637 mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
638 mov $0xffffffff, %r11d
639 rol $21, %ebx /* dst <<< s */
640 xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
641 add %ecx, %ebx /* dst += x */
642 lea -145523070(%eax,%r10d),%eax /* Const + dst + ... */
643 or %ebx, %r11d /* x | ... */
644 xor %ecx, %r11d /* y ^ ... */
645 add %r11d, %eax /* dst += ... */
646 mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
647 mov $0xffffffff, %r11d
648 rol $6, %eax /* dst <<< s */
649 xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
650 add %ebx, %eax /* dst += x */
651 lea -1120210379(%edx,%r10d),%edx /* Const + dst + ... */
652 or %eax, %r11d /* x | ... */
653 xor %ebx, %r11d /* y ^ ... */
654 add %r11d, %edx /* dst += ... */
655 mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
656 mov $0xffffffff, %r11d
657 rol $10, %edx /* dst <<< s */
658 xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
659 add %eax, %edx /* dst += x */
660 lea 718787259(%ecx,%r10d),%ecx /* Const + dst + ... */
661 or %edx, %r11d /* x | ... */
662 xor %eax, %r11d /* y ^ ... */
663 add %r11d, %ecx /* dst += ... */
664 mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
665 mov $0xffffffff, %r11d
666 rol $15, %ecx /* dst <<< s */
667 xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
668 add %edx, %ecx /* dst += x */
669 lea -343485551(%ebx,%r10d),%ebx /* Const + dst + ... */
670 or %ecx, %r11d /* x | ... */
671 xor %edx, %r11d /* y ^ ... */
672 add %r11d, %ebx /* dst += ... */
673 mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
674 mov $0xffffffff, %r11d
675 rol $21, %ebx /* dst <<< s */
676 xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
677 add %ecx, %ebx /* dst += x */
678 # add old values of A, B, C, D
685 add $64, %rsi # ptr += 64
686 cmp %rdi, %rsi # cmp end with ptr
687 jb 2b # jmp if ptr < end
688 # END of loop over 16-word blocks
690 mov %eax, 0*4(%rbp) # ctx->A = A
691 mov %ebx, 1*4(%rbp) # ctx->B = B
692 mov %ecx, 2*4(%rbp) # ctx->C = C
693 mov %edx, 3*4(%rbp) # ctx->D = D
697 pop %r13 # not really useful (r13 is unused)
702 #ifndef __apple_build_version__
703 .L_md5_process_asm_end:
704 .size md5_process_asm,.L_md5_process_asm_end-md5_process_asm
706 L_md5_process_asm_end:
709 #endif /* !USE_OPENSSL ... */