2 * Routines to support checksumming of bytes.
4 * Copyright (C) 1996 Andrew Tridgell
5 * Copyright (C) 1996 Paul Mackerras
6 * Copyright (C) 2004-2022 Wayne Davison
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * In addition, as a special exception, the copyright holders give
14 * permission to dynamically link rsync with the OpenSSL and xxhash
15 * libraries when those libraries are being distributed in compliance
16 * with their license terms, and to distribute a dynamically linked
17 * combination of rsync and these libraries. This is also considered
18 * to be covered under the GPL's System Libraries exception.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, visit the http://fsf.org website.
33 # if XXH_VERSION_NUMBER >= 800
34 # define SUPPORT_XXH3 1
39 extern int whole_file;
40 extern int checksum_seed;
41 extern int protocol_version;
42 extern int proper_seed_order;
43 extern const char *checksum_choice;
45 #define NNI_BUILTIN (1<<0)
46 #define NNI_EVP (1<<1)
47 #define NNI_EVP_OK (1<<2)
49 struct name_num_item valid_checksums_items[] = {
51 { CSUM_XXH3_128, 0, "xxh128", NULL },
52 { CSUM_XXH3_64, 0, "xxh3", NULL },
55 { CSUM_XXH64, 0, "xxh64", NULL },
56 { CSUM_XXH64, 0, "xxhash", NULL },
58 { CSUM_MD5, NNI_BUILTIN|NNI_EVP, "md5", NULL },
59 { CSUM_MD4, NNI_BUILTIN|NNI_EVP, "md4", NULL },
60 #ifdef SHA_DIGEST_LENGTH
61 { CSUM_SHA1, NNI_EVP, "sha1", NULL },
63 #ifdef SHA256_DIGEST_LENGTH
64 { CSUM_SHA256, NNI_EVP, "sha256", NULL },
66 #ifdef SHA512_DIGEST_LENGTH
67 { CSUM_SHA512, NNI_EVP, "sha512", NULL },
69 { CSUM_NONE, 0, "none", NULL },
73 struct name_num_obj valid_checksums = {
74 "checksum", NULL, 0, 0, valid_checksums_items
77 struct name_num_item valid_auth_checksums_items[] = {
78 #ifdef SHA512_DIGEST_LENGTH
79 { CSUM_SHA512, NNI_EVP, "sha512", NULL },
81 #ifdef SHA256_DIGEST_LENGTH
82 { CSUM_SHA256, NNI_EVP, "sha256", NULL },
84 #ifdef SHA_DIGEST_LENGTH
85 { CSUM_SHA1, NNI_EVP, "sha1", NULL },
87 { CSUM_MD5, NNI_BUILTIN|NNI_EVP, "md5", NULL },
88 { CSUM_MD4, NNI_BUILTIN|NNI_EVP, "md4", NULL },
92 struct name_num_obj valid_auth_checksums = {
93 "daemon auth checksum", NULL, 0, 0, valid_auth_checksums_items
96 /* These cannot make use of openssl, so they're marked just as built-in */
97 struct name_num_item implied_checksum_md4 =
98 { CSUM_MD4, NNI_BUILTIN, "md4", NULL };
99 struct name_num_item implied_checksum_md5 =
100 { CSUM_MD5, NNI_BUILTIN, "md5", NULL };
102 struct name_num_item *xfer_sum_nni; /* used for the transfer checksum2 computations */
103 const EVP_MD *xfer_sum_evp_md;
105 struct name_num_item *file_sum_nni; /* used for the pre-transfer --checksum computations */
106 const EVP_MD *file_sum_evp_md;
110 EVP_MD_CTX *ctx_evp = NULL;
112 static int initialized_choices = 0;
114 struct name_num_item *parse_csum_name(const char *name, int len)
116 struct name_num_item *nni;
121 init_checksum_choices();
123 if (!name || (len == 4 && strncasecmp(name, "auto", 4) == 0)) {
124 if (protocol_version >= 30) {
125 if (!proper_seed_order)
126 return &implied_checksum_md5;
130 if (protocol_version >= 27)
131 implied_checksum_md4.num = CSUM_MD4_OLD;
132 else if (protocol_version >= 21)
133 implied_checksum_md4.num = CSUM_MD4_BUSTED;
135 implied_checksum_md4.num = CSUM_MD4_ARCHAIC;
136 return &implied_checksum_md4;
140 nni = get_nni_by_name(&valid_checksums, name, len);
143 rprintf(FERROR, "unknown checksum name: %s\n", name);
144 exit_cleanup(RERR_UNSUPPORTED);
150 static const EVP_MD *csum_evp_md(struct name_num_item *nni)
154 if (!(nni->flags & NNI_EVP))
158 if (nni->num == CSUM_MD5)
162 emd = EVP_get_digestbyname(nni->name);
163 if (emd && !(nni->flags & NNI_EVP_OK)) { /* Make sure it works before we advertise it */
164 if (!ctx_evp && !(ctx_evp = EVP_MD_CTX_create()))
165 out_of_memory("csum_evp_md");
166 /* Some routines are marked as legacy and are not enabled in the openssl.cnf file.
167 * If we can't init the emd, we'll fall back to our built-in code. */
168 if (EVP_DigestInit_ex(ctx_evp, emd, NULL) == 0)
171 nni->flags = (nni->flags & ~NNI_BUILTIN) | NNI_EVP_OK;
174 nni->flags &= ~NNI_EVP;
181 void parse_checksum_choice(int final_call)
183 if (valid_checksums.negotiated_nni)
184 xfer_sum_nni = file_sum_nni = valid_checksums.negotiated_nni;
186 char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
188 xfer_sum_nni = parse_csum_name(checksum_choice, cp - checksum_choice);
189 file_sum_nni = parse_csum_name(cp+1, -1);
191 xfer_sum_nni = file_sum_nni = parse_csum_name(checksum_choice, -1);
192 if (am_server && checksum_choice)
193 validate_choice_vs_env(NSTR_CHECKSUM, xfer_sum_nni->num, file_sum_nni->num);
195 xfer_sum_len = csum_len_for_type(xfer_sum_nni->num, 0);
196 file_sum_len = csum_len_for_type(file_sum_nni->num, 0);
197 xfer_sum_evp_md = csum_evp_md(xfer_sum_nni);
198 file_sum_evp_md = csum_evp_md(file_sum_nni);
200 if (xfer_sum_nni->num == CSUM_NONE)
203 /* Snag the checksum name for both write_batch's option output & the following debug output. */
204 if (valid_checksums.negotiated_nni)
205 checksum_choice = valid_checksums.negotiated_nni->name;
206 else if (checksum_choice == NULL)
207 checksum_choice = xfer_sum_nni->name;
209 if (final_call && DEBUG_GTE(NSTR, am_server ? 3 : 1)) {
210 rprintf(FINFO, "%s%s checksum: %s\n",
211 am_server ? "Server" : "Client",
212 valid_checksums.negotiated_nni ? " negotiated" : "",
217 int csum_len_for_type(int cst, BOOL flist_csum)
222 case CSUM_MD4_ARCHAIC:
223 /* The oldest checksum code is rather weird: the file-list code only sent
224 * 2-byte checksums, but all other checksums were full MD4 length. */
225 return flist_csum ? 2 : MD4_DIGEST_LEN;
228 case CSUM_MD4_BUSTED:
229 return MD4_DIGEST_LEN;
231 return MD5_DIGEST_LEN;
232 #ifdef SHA_DIGEST_LENGTH
234 return SHA_DIGEST_LENGTH;
236 #ifdef SHA256_DIGEST_LENGTH
238 return SHA256_DIGEST_LENGTH;
240 #ifdef SHA512_DIGEST_LENGTH
242 return SHA512_DIGEST_LENGTH;
249 default: /* paranoia to prevent missing case values */
250 exit_cleanup(RERR_UNSUPPORTED);
255 /* Returns 0 if the checksum is not canonical (i.e. it includes a seed value).
256 * Returns 1 if the public sum order matches our internal sum order.
257 * Returns -1 if the public sum order is the reverse of our internal sum order.
259 int canonical_checksum(int csum_type)
263 case CSUM_MD4_ARCHAIC:
265 case CSUM_MD4_BUSTED:
277 default: /* paranoia to prevent missing case values */
278 exit_cleanup(RERR_UNSUPPORTED);
283 #ifndef USE_ROLL_SIMD /* See simd-checksum-*.cpp. */
285 a simple 32 bit checksum that can be updated from either end
286 (inspired by Mark Adler's Adler-32 checksum)
288 uint32 get_checksum1(char *buf1, int32 len)
292 schar *buf = (schar *)buf1;
295 for (i = 0; i < (len-4); i+=4) {
296 s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET;
297 s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
299 for (; i < len; i++) {
300 s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
302 return (s1 & 0xffff) + (s2 << 16);
306 void get_checksum2(char *buf, int32 len, char *sum)
309 if (xfer_sum_evp_md) {
310 static EVP_MD_CTX *evp = NULL;
312 if (!evp && !(evp = EVP_MD_CTX_create()))
313 out_of_memory("get_checksum2");
314 EVP_DigestInit_ex(evp, xfer_sum_evp_md, NULL);
316 SIVALu(seedbuf, 0, checksum_seed);
317 EVP_DigestUpdate(evp, seedbuf, 4);
319 EVP_DigestUpdate(evp, (uchar *)buf, len);
320 EVP_DigestFinal_ex(evp, (uchar *)sum, NULL);
323 switch (xfer_sum_nni->num) {
324 #ifdef SUPPORT_XXHASH
326 SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
331 SIVAL64(sum, 0, XXH3_64bits_withSeed(buf, len, checksum_seed));
333 case CSUM_XXH3_128: {
334 XXH128_hash_t digest = XXH3_128bits_withSeed(buf, len, checksum_seed);
335 SIVAL64(sum, 0, digest.low64);
336 SIVAL64(sum, 8, digest.high64);
344 if (proper_seed_order) {
346 SIVALu(seedbuf, 0, checksum_seed);
347 md5_update(&m5, seedbuf, 4);
349 md5_update(&m5, (uchar *)buf, len);
351 md5_update(&m5, (uchar *)buf, len);
353 SIVALu(seedbuf, 0, checksum_seed);
354 md5_update(&m5, seedbuf, 4);
357 md5_result(&m5, (uchar *)sum);
362 case CSUM_MD4_BUSTED:
363 case CSUM_MD4_ARCHAIC: {
374 buf1 = new_array(char, len+4);
378 memcpy(buf1, buf, len);
380 SIVAL(buf1,len,checksum_seed);
384 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
385 mdfour_update(&m, (uchar *)(buf1+i), CSUM_CHUNK);
388 * Prior to version 27 an incorrect MD4 checksum was computed
389 * by failing to call mdfour_tail() for block sizes that
390 * are multiples of 64. This is fixed by calling mdfour_update()
391 * even when there are no more bytes.
393 if (len - i > 0 || xfer_sum_nni->num > CSUM_MD4_BUSTED)
394 mdfour_update(&m, (uchar *)(buf1+i), len-i);
396 mdfour_result(&m, (uchar *)sum);
399 default: /* paranoia to prevent missing case values */
400 exit_cleanup(RERR_UNSUPPORTED);
404 void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
406 struct map_struct *buf;
407 OFF_T i, len = st_p->st_size;
411 fd = do_open(fname, O_RDONLY, 0);
413 memset(sum, 0, file_sum_len);
417 buf = map_file(fd, len, MAX_MAP_SIZE, CHUNK_SIZE);
420 if (file_sum_evp_md) {
421 static EVP_MD_CTX *evp = NULL;
422 if (!evp && !(evp = EVP_MD_CTX_create()))
423 out_of_memory("file_checksum");
425 EVP_DigestInit_ex(evp, file_sum_evp_md, NULL);
427 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
428 EVP_DigestUpdate(evp, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
430 remainder = (int32)(len - i);
432 EVP_DigestUpdate(evp, (uchar *)map_ptr(buf, i, remainder), remainder);
434 EVP_DigestFinal_ex(evp, (uchar *)sum, NULL);
437 switch (file_sum_nni->num) {
438 #ifdef SUPPORT_XXHASH
440 static XXH64_state_t* state = NULL;
441 if (!state && !(state = XXH64_createState()))
442 out_of_memory("file_checksum");
444 XXH64_reset(state, 0);
446 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
447 XXH64_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
449 remainder = (int32)(len - i);
451 XXH64_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
453 SIVAL64(sum, 0, XXH64_digest(state));
459 static XXH3_state_t* state = NULL;
460 if (!state && !(state = XXH3_createState()))
461 out_of_memory("file_checksum");
463 XXH3_64bits_reset(state);
465 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
466 XXH3_64bits_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
468 remainder = (int32)(len - i);
470 XXH3_64bits_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
472 SIVAL64(sum, 0, XXH3_64bits_digest(state));
475 case CSUM_XXH3_128: {
476 XXH128_hash_t digest;
477 static XXH3_state_t* state = NULL;
478 if (!state && !(state = XXH3_createState()))
479 out_of_memory("file_checksum");
481 XXH3_128bits_reset(state);
483 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
484 XXH3_128bits_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
486 remainder = (int32)(len - i);
488 XXH3_128bits_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
490 digest = XXH3_128bits_digest(state);
491 SIVAL64(sum, 0, digest.low64);
492 SIVAL64(sum, 8, digest.high64);
501 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
502 md5_update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
504 remainder = (int32)(len - i);
506 md5_update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
508 md5_result(&m5, (uchar *)sum);
513 case CSUM_MD4_BUSTED:
514 case CSUM_MD4_ARCHAIC: {
519 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
520 mdfour_update(&m, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
522 /* Prior to version 27 an incorrect MD4 checksum was computed
523 * by failing to call mdfour_tail() for block sizes that
524 * are multiples of 64. This is fixed by calling mdfour_update()
525 * even when there are no more bytes. */
526 remainder = (int32)(len - i);
527 if (remainder > 0 || file_sum_nni->num > CSUM_MD4_BUSTED)
528 mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
530 mdfour_result(&m, (uchar *)sum);
534 rprintf(FERROR, "Invalid checksum-choice for --checksum: %s (%d)\n",
535 file_sum_nni->name, file_sum_nni->num);
536 exit_cleanup(RERR_UNSUPPORTED);
543 static int32 sumresidue;
544 static md_context ctx_md;
545 #ifdef SUPPORT_XXHASH
546 static XXH64_state_t* xxh64_state;
549 static XXH3_state_t* xxh3_state;
551 static struct name_num_item *cur_sum_nni;
552 static const EVP_MD *cur_sum_evp_md;
555 int sum_init(struct name_num_item *nni, int seed)
560 nni = parse_csum_name(NULL, 0);
562 cur_sum_len = csum_len_for_type(nni->num, 0);
563 cur_sum_evp_md = csum_evp_md(nni);
566 if (cur_sum_evp_md) {
567 if (!ctx_evp && !(ctx_evp = EVP_MD_CTX_create()))
568 out_of_memory("file_checksum");
569 EVP_DigestInit_ex(ctx_evp, cur_sum_evp_md, NULL);
572 switch (cur_sum_nni->num) {
573 #ifdef SUPPORT_XXHASH
575 if (!xxh64_state && !(xxh64_state = XXH64_createState()))
576 out_of_memory("sum_init");
577 XXH64_reset(xxh64_state, 0);
582 if (!xxh3_state && !(xxh3_state = XXH3_createState()))
583 out_of_memory("sum_init");
584 XXH3_64bits_reset(xxh3_state);
587 if (!xxh3_state && !(xxh3_state = XXH3_createState()))
588 out_of_memory("sum_init");
589 XXH3_128bits_reset(xxh3_state);
596 mdfour_begin(&ctx_md);
600 case CSUM_MD4_BUSTED:
601 case CSUM_MD4_ARCHAIC:
602 mdfour_begin(&ctx_md);
609 default: /* paranoia to prevent missing case values */
610 exit_cleanup(RERR_UNSUPPORTED);
617 * Feed data into an MD4 accumulator, md. The results may be
618 * retrieved using sum_end(). md is used for different purposes at
619 * different points during execution.
621 * @todo Perhaps get rid of md and just pass in the address each time.
622 * Very slightly clearer and slower.
624 void sum_update(const char *p, int32 len)
627 if (cur_sum_evp_md) {
628 EVP_DigestUpdate(ctx_evp, (uchar *)p, len);
631 switch (cur_sum_nni->num) {
632 #ifdef SUPPORT_XXHASH
634 XXH64_update(xxh64_state, p, len);
639 XXH3_64bits_update(xxh3_state, p, len);
642 XXH3_128bits_update(xxh3_state, p, len);
646 md5_update(&ctx_md, (uchar *)p, len);
650 case CSUM_MD4_BUSTED:
651 case CSUM_MD4_ARCHAIC:
652 if (len + sumresidue < CSUM_CHUNK) {
653 memcpy(ctx_md.buffer + sumresidue, p, len);
659 int32 i = CSUM_CHUNK - sumresidue;
660 memcpy(ctx_md.buffer + sumresidue, p, i);
661 mdfour_update(&ctx_md, (uchar *)ctx_md.buffer, CSUM_CHUNK);
666 while (len >= CSUM_CHUNK) {
667 mdfour_update(&ctx_md, (uchar *)p, CSUM_CHUNK);
674 memcpy(ctx_md.buffer, p, sumresidue);
678 default: /* paranoia to prevent missing case values */
679 exit_cleanup(RERR_UNSUPPORTED);
683 /* The sum buffer only needs to be as long as the current checksum's digest
684 * len, not MAX_DIGEST_LEN. Note that for CSUM_MD4_ARCHAIC that is the full
685 * MD4_DIGEST_LEN even if the file-list code is going to ignore all but the
686 * first 2 bytes of it. */
687 void sum_end(char *sum)
690 if (cur_sum_evp_md) {
691 EVP_DigestFinal_ex(ctx_evp, (uchar *)sum, NULL);
694 switch (cur_sum_nni->num) {
695 #ifdef SUPPORT_XXHASH
697 SIVAL64(sum, 0, XXH64_digest(xxh64_state));
702 SIVAL64(sum, 0, XXH3_64bits_digest(xxh3_state));
704 case CSUM_XXH3_128: {
705 XXH128_hash_t digest = XXH3_128bits_digest(xxh3_state);
706 SIVAL64(sum, 0, digest.low64);
707 SIVAL64(sum, 8, digest.high64);
712 md5_result(&ctx_md, (uchar *)sum);
716 mdfour_update(&ctx_md, (uchar *)ctx_md.buffer, sumresidue);
717 mdfour_result(&ctx_md, (uchar *)sum);
719 case CSUM_MD4_BUSTED:
720 case CSUM_MD4_ARCHAIC:
722 mdfour_update(&ctx_md, (uchar *)ctx_md.buffer, sumresidue);
723 mdfour_result(&ctx_md, (uchar *)sum);
728 default: /* paranoia to prevent missing case values */
729 exit_cleanup(RERR_UNSUPPORTED);
733 #if defined SUPPORT_XXH3 || defined USE_OPENSSL
734 static void verify_digest(struct name_num_item *nni, BOOL check_auth_list)
737 static int xxh3_result = 0;
740 static int prior_num = 0, prior_flags = 0, prior_result = 0;
744 if (nni->num == CSUM_XXH3_64 || nni->num == CSUM_XXH3_128) {
748 for (j = 0; j < (int)sizeof buf; j++)
749 buf[j] = ' ' + (j % 96);
751 sum_update(buf, 32816);
752 sum_update(buf, 31152);
753 sum_update(buf, 32474);
754 sum_update(buf, 9322);
755 xxh3_result = XXH3_64bits_digest(xxh3_state) != 0xadbcf16d4678d1de ? -1 : 1;
758 nni->num = CSUM_gone;
764 if (BITS_SETnUNSET(nni->flags, NNI_EVP, NNI_BUILTIN|NNI_EVP_OK)) {
765 if (nni->num == prior_num && nni->flags == prior_flags) {
766 nni->flags = prior_result;
767 if (!(nni->flags & NNI_EVP))
768 nni->num = CSUM_gone;
770 prior_num = nni->num;
771 prior_flags = nni->flags;
772 if (!csum_evp_md(nni))
773 nni->num = CSUM_gone;
774 prior_result = nni->flags;
775 if (check_auth_list && (nni = get_nni_by_num(&valid_auth_checksums, prior_num)) != NULL)
776 verify_digest(nni, False);
783 void init_checksum_choices()
785 struct name_num_item *nni;
787 if (initialized_choices)
790 #if defined SUPPORT_XXH3 || defined USE_OPENSSL
791 for (nni = valid_checksums.list; nni->name; nni++)
792 verify_digest(nni, True);
794 for (nni = valid_auth_checksums.list; nni->name; nni++)
795 verify_digest(nni, False);
798 initialized_choices = 1;