2 * Routines to support checksumming of bytes.
4 * Copyright (C) 1996 Andrew Tridgell
5 * Copyright (C) 1996 Paul Mackerras
6 * Copyright (C) 2004-2020 Wayne Davison
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * In addition, as a special exception, the copyright holders give
14 * permission to dynamically link rsync with the OpenSSL and xxhash
15 * libraries when those libraries are being distributed in compliance
16 * with their license terms, and to distribute a dynamically linked
17 * combination of rsync and these libraries. This is also considered
18 * to be covered under the GPL's System Libraries exception.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, visit the http://fsf.org website.
34 #include "openssl/md4.h"
35 #include "openssl/md5.h"
39 extern int local_server;
40 extern int whole_file;
41 extern int read_batch;
42 extern int checksum_seed;
43 extern int protocol_version;
44 extern int proper_seed_order;
45 extern char *checksum_choice;
48 #define CSUM_MD4_ARCHAIC 1
49 #define CSUM_MD4_BUSTED 2
50 #define CSUM_MD4_OLD 3
55 struct name_num_obj valid_checksums = {
56 "checksum", NULL, NULL, 0, 0, {
58 { CSUM_XXH64, "xxh64", NULL },
59 { CSUM_XXH64, "xxhash", NULL },
61 { CSUM_MD5, "md5", NULL },
62 { CSUM_MD4, "md4", NULL },
63 { CSUM_NONE, "none", NULL },
69 #define MD5_CTX md_context
70 #define MD5_Init md5_begin
71 #define MD5_Update md5_update
72 #define MD5_Final(digest, cptr) md5_result(cptr, digest)
75 int xfersum_type = 0; /* used for the file transfer checksums */
76 int checksum_type = 0; /* used for the pre-transfer (--checksum) checksums */
78 static int parse_csum_name(const char *name, int len)
80 struct name_num_item *nni;
85 if (!name || (len == 4 && strncasecmp(name, "auto", 4) == 0)) {
86 if (protocol_version >= 30)
88 if (protocol_version >= 27)
90 if (protocol_version >= 21)
91 return CSUM_MD4_BUSTED;
92 return CSUM_MD4_ARCHAIC;
95 nni = get_nni_by_name(&valid_checksums, name, len);
98 rprintf(FERROR, "unknown checksum name: %s\n", name);
99 exit_cleanup(RERR_UNSUPPORTED);
105 static const char *checksum_name(int num)
107 struct name_num_item *nni = get_nni_by_num(&valid_checksums, num);
109 return nni ? nni->name : num < CSUM_MD4 ? "MD4" : "UNKNOWN";
112 void parse_checksum_choice(int final_call)
114 if (!valid_checksums.negotiated_name) {
115 char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
117 xfersum_type = parse_csum_name(checksum_choice, cp - checksum_choice);
118 checksum_type = parse_csum_name(cp+1, -1);
120 xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1);
123 if (xfersum_type == CSUM_NONE)
126 if (final_call && DEBUG_GTE(NSTR, am_server ? 2 : 1)) {
127 const char *c_s = am_server ? "Server" : "Client";
128 if (valid_checksums.negotiated_name)
129 rprintf(FINFO, "%s negotiated checksum: %s\n", c_s, valid_checksums.negotiated_name);
130 else if (xfersum_type == checksum_type) {
131 rprintf(FINFO, "%s %s checksum: %s\n", c_s,
132 checksum_choice ? "chosen" : "protocol-based",
133 checksum_name(xfersum_type));
135 rprintf(FINFO, "%s chosen transfer checksum: %s\n",
136 c_s, checksum_name(xfersum_type));
137 rprintf(FINFO, "%s chosen pre-transfer checksum: %s\n",
138 c_s, checksum_name(checksum_type));
143 int csum_len_for_type(int cst, BOOL flist_csum)
148 case CSUM_MD4_ARCHAIC:
149 /* The oldest checksum code is rather weird: the file-list code only sent
150 * 2-byte checksums, but all other checksums were full MD4 length. */
151 return flist_csum ? 2 : MD4_DIGEST_LEN;
154 case CSUM_MD4_BUSTED:
155 return MD4_DIGEST_LEN;
157 return MD5_DIGEST_LEN;
158 #ifdef SUPPORT_XXHASH
162 default: /* paranoia to prevent missing case values */
163 exit_cleanup(RERR_UNSUPPORTED);
168 int canonical_checksum(int csum_type)
170 return csum_type >= CSUM_MD4 ? 1 : 0;
173 #ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
175 a simple 32 bit checksum that can be updated from either end
176 (inspired by Mark Adler's Adler-32 checksum)
178 uint32 get_checksum1(char *buf1, int32 len)
182 schar *buf = (schar *)buf1;
185 for (i = 0; i < (len-4); i+=4) {
186 s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET;
187 s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
189 for (; i < len; i++) {
190 s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
192 return (s1 & 0xffff) + (s2 << 16);
196 void get_checksum2(char *buf, int32 len, char *sum)
198 switch (xfersum_type) {
203 if (proper_seed_order) {
205 SIVALu(seedbuf, 0, checksum_seed);
206 MD5_Update(&m5, seedbuf, 4);
208 MD5_Update(&m5, (uchar *)buf, len);
210 MD5_Update(&m5, (uchar *)buf, len);
212 SIVALu(seedbuf, 0, checksum_seed);
213 MD5_Update(&m5, seedbuf, 4);
216 MD5_Final((uchar *)sum, &m5);
224 MD4_Update(&m4, (uchar *)buf, len);
227 SIVALu(seedbuf, 0, checksum_seed);
228 MD4_Update(&m4, seedbuf, 4);
230 MD4_Final((uchar *)sum, &m4);
235 case CSUM_MD4_BUSTED:
236 case CSUM_MD4_ARCHAIC: {
247 buf1 = new_array(char, len+4);
250 out_of_memory("get_checksum2");
253 memcpy(buf1, buf, len);
255 SIVAL(buf1,len,checksum_seed);
259 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
260 mdfour_update(&m, (uchar *)(buf1+i), CSUM_CHUNK);
263 * Prior to version 27 an incorrect MD4 checksum was computed
264 * by failing to call mdfour_tail() for block sizes that
265 * are multiples of 64. This is fixed by calling mdfour_update()
266 * even when there are no more bytes.
268 if (len - i > 0 || xfersum_type > CSUM_MD4_BUSTED)
269 mdfour_update(&m, (uchar *)(buf1+i), len-i);
271 mdfour_result(&m, (uchar *)sum);
274 #ifdef SUPPORT_XXHASH
276 SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
279 default: /* paranoia to prevent missing case values */
280 exit_cleanup(RERR_UNSUPPORTED);
284 void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
286 struct map_struct *buf;
287 OFF_T i, len = st_p->st_size;
291 memset(sum, 0, MAX_DIGEST_LEN);
293 fd = do_open(fname, O_RDONLY, 0);
297 buf = map_file(fd, len, MAX_MAP_SIZE, CHUNK_SIZE);
299 switch (checksum_type) {
305 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
306 MD5_Update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
308 remainder = (int32)(len - i);
310 MD5_Update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
312 MD5_Final((uchar *)sum, &m5);
322 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
323 MD4_Update(&m4, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
325 remainder = (int32)(len - i);
327 MD4_Update(&m4, (uchar *)map_ptr(buf, i, remainder), remainder);
329 MD4_Final((uchar *)sum, &m4);
334 case CSUM_MD4_BUSTED:
335 case CSUM_MD4_ARCHAIC: {
340 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
341 mdfour_update(&m, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
343 /* Prior to version 27 an incorrect MD4 checksum was computed
344 * by failing to call mdfour_tail() for block sizes that
345 * are multiples of 64. This is fixed by calling mdfour_update()
346 * even when there are no more bytes. */
347 remainder = (int32)(len - i);
348 if (remainder > 0 || checksum_type > CSUM_MD4_BUSTED)
349 mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
351 mdfour_result(&m, (uchar *)sum);
354 #ifdef SUPPORT_XXHASH
356 XXH64_state_t* state = XXH64_createState();
358 out_of_memory("file_checksum XXH64");
360 if (XXH64_reset(state, 0) == XXH_ERROR) {
361 rprintf(FERROR, "error resetting XXH64 seed");
362 exit_cleanup(RERR_STREAMIO);
365 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) {
366 XXH_errorcode const updateResult =
367 XXH64_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
368 if (updateResult == XXH_ERROR) {
369 rprintf(FERROR, "error computing XXH64 hash");
370 exit_cleanup(RERR_STREAMIO);
374 remainder = (int32)(len - i);
376 XXH64_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), remainder);
377 SIVAL64(sum, 0, XXH64_digest(state));
379 XXH64_freeState(state);
384 rprintf(FERROR, "Invalid checksum-choice for --checksum: %s (%d)\n",
385 checksum_name(checksum_type), checksum_type);
386 exit_cleanup(RERR_UNSUPPORTED);
393 static int32 sumresidue;
401 #ifdef SUPPORT_XXHASH
402 static XXH64_state_t* xxh64_state;
404 static int cursum_type;
406 void sum_init(int csum_type, int seed)
411 csum_type = parse_csum_name(NULL, 0);
412 cursum_type = csum_type;
422 mdfour_begin(&ctx.md);
427 case CSUM_MD4_BUSTED:
428 case CSUM_MD4_ARCHAIC:
429 mdfour_begin(&ctx.md);
434 #ifdef SUPPORT_XXHASH
436 if (xxh64_state == NULL) {
437 xxh64_state = XXH64_createState();
438 if (xxh64_state == NULL)
439 out_of_memory("sum_init xxh64");
441 if (XXH64_reset(xxh64_state, 0) == XXH_ERROR) {
442 rprintf(FERROR, "error resetting XXH64 state");
443 exit_cleanup(RERR_STREAMIO);
449 default: /* paranoia to prevent missing case values */
450 exit_cleanup(RERR_UNSUPPORTED);
455 * Feed data into an MD4 accumulator, md. The results may be
456 * retrieved using sum_end(). md is used for different purposes at
457 * different points during execution.
459 * @todo Perhaps get rid of md and just pass in the address each time.
460 * Very slightly clearer and slower.
462 void sum_update(const char *p, int32 len)
464 switch (cursum_type) {
466 MD5_Update(&ctx.m5, (uchar *)p, len);
470 MD4_Update(&ctx.m4, (uchar *)p, len);
474 case CSUM_MD4_BUSTED:
475 case CSUM_MD4_ARCHAIC:
476 if (len + sumresidue < CSUM_CHUNK) {
477 memcpy(ctx.md.buffer + sumresidue, p, len);
483 int32 i = CSUM_CHUNK - sumresidue;
484 memcpy(ctx.md.buffer + sumresidue, p, i);
485 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, CSUM_CHUNK);
490 while (len >= CSUM_CHUNK) {
491 mdfour_update(&ctx.md, (uchar *)p, CSUM_CHUNK);
498 memcpy(ctx.md.buffer, p, sumresidue);
500 #ifdef SUPPORT_XXHASH
502 if (XXH64_update(xxh64_state, p, len) == XXH_ERROR) {
503 rprintf(FERROR, "error computing XXH64 hash");
504 exit_cleanup(RERR_STREAMIO);
510 default: /* paranoia to prevent missing case values */
511 exit_cleanup(RERR_UNSUPPORTED);
515 /* NOTE: all the callers of sum_end() pass in a pointer to a buffer that is
516 * MAX_DIGEST_LEN in size, so even if the csum-len is shorter that that (i.e.
517 * CSUM_MD4_ARCHAIC), we don't have to worry about limiting the data we write
518 * into the "sum" buffer. */
519 int sum_end(char *sum)
521 switch (cursum_type) {
523 MD5_Final((uchar *)sum, &ctx.m5);
527 MD4_Final((uchar *)sum, &ctx.m4);
531 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
532 mdfour_result(&ctx.md, (uchar *)sum);
534 case CSUM_MD4_BUSTED:
535 case CSUM_MD4_ARCHAIC:
537 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
538 mdfour_result(&ctx.md, (uchar *)sum);
540 #ifdef SUPPORT_XXHASH
542 SIVAL64(sum, 0, XXH64_digest(xxh64_state));
548 default: /* paranoia to prevent missing case values */
549 exit_cleanup(RERR_UNSUPPORTED);
552 return csum_len_for_type(cursum_type, 0);