*
* Copyright (C) 1996 Andrew Tridgell
* Copyright (C) 1996 Paul Mackerras
- * Copyright (C) 2004-2014 Wayne Davison
+ * Copyright (C) 2004-2020 Wayne Davison
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
*/
#include "rsync.h"
-
+#ifdef SUPPORT_XXHASH
+#include "xxhash.h"
+#endif
+
+extern int am_server;
+extern int local_server;
+extern int whole_file;
+extern int read_batch;
extern int checksum_seed;
extern int protocol_version;
extern int proper_seed_order;
+extern char *checksum_choice;
+
+#define CSUM_NONE 0
+#define CSUM_MD4_ARCHAIC 1
+#define CSUM_MD4_BUSTED 2
+#define CSUM_MD4_OLD 3
+#define CSUM_MD4 4
+#define CSUM_MD5 5
+#define CSUM_XXHASH 6
+
+#define CSUM_SAW_BUFLEN 10
+
+struct csum_struct {
+ int num;
+ const char *name;
+} valid_checksums[] = {
+#ifdef SUPPORT_XXHASH
+ { CSUM_XXHASH, "xxhash" },
+#endif
+ { CSUM_MD5, "md5" },
+ { CSUM_MD4, "md4" },
+ { CSUM_NONE, "none" },
+ { -1, NULL }
+};
+
+#define MAX_CHECKSUM_LIST 1024
+
+int xfersum_type = 0; /* used for the file transfer checksums */
+int checksum_type = 0; /* used for the pre-transfer (--checksum) checksums */
+const char *negotiated_csum_name = NULL;
+
+static int parse_csum_name(const char *name, int len, int allow_auto)
+{
+ struct csum_struct *cs;
+
+ if (len < 0 && name)
+ len = strlen(name);
+
+ if (!name || (allow_auto && len == 4 && strncasecmp(name, "auto", 4) == 0)) {
+ if (protocol_version >= 30)
+ return CSUM_MD5;
+ if (protocol_version >= 27)
+ return CSUM_MD4_OLD;
+ if (protocol_version >= 21)
+ return CSUM_MD4_BUSTED;
+ return CSUM_MD4_ARCHAIC;
+ }
+
+ for (cs = valid_checksums; cs->name; cs++) {
+ if (strncasecmp(name, cs->name, len) == 0 && cs->name[len] == '\0')
+ return cs->num;
+ }
+
+ if (allow_auto) {
+ rprintf(FERROR, "unknown checksum name: %s\n", name);
+ exit_cleanup(RERR_UNSUPPORTED);
+ }
+
+ return -1;
+}
+
+static const char *checksum_name(int num)
+{
+ struct csum_struct *cs;
+
+ for (cs = valid_checksums; cs->name; cs++) {
+ if (num == cs->num)
+ return cs->name;
+ }
+
+ if (num < CSUM_MD4)
+ return "MD4";
+
+ return "UNKNOWN";
+}
+
+void parse_checksum_choice(int final_call)
+{
+ if (!negotiated_csum_name) {
+ char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
+ if (cp) {
+ xfersum_type = parse_csum_name(checksum_choice, cp - checksum_choice, 1);
+ checksum_type = parse_csum_name(cp+1, -1, 1);
+ } else
+ xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1, 1);
+ }
+
+ if (xfersum_type == CSUM_NONE)
+ whole_file = 1;
+
+ if (final_call && DEBUG_GTE(CSUM, 1)) {
+ if (negotiated_csum_name)
+ rprintf(FINFO, "[%s] negotiated checksum: %s\n", who_am_i(), negotiated_csum_name);
+ else if (xfersum_type == checksum_type) {
+ rprintf(FINFO, "[%s] %s checksum: %s\n", who_am_i(),
+ checksum_choice ? "chosen" : "protocol-based",
+ checksum_name(xfersum_type));
+ } else {
+ rprintf(FINFO, "[%s] chosen transfer checksum: %s\n",
+ who_am_i(), checksum_name(xfersum_type));
+ rprintf(FINFO, "[%s] chosen pre-transfer checksum: %s\n",
+ who_am_i(), checksum_name(checksum_type));
+ }
+ }
+}
+
+static int parse_checksum_list(const char *from, char *sumbuf, int sumbuf_len, char *saw)
+{
+ char *to = sumbuf, *tok = NULL;
+ int cnt = 0;
+
+ memset(saw, 0, CSUM_SAW_BUFLEN);
+
+ while (1) {
+ if (*from == ' ' || !*from) {
+ if (tok) {
+ int sum_type = parse_csum_name(tok, to - tok, 0);
+ if (sum_type >= 0 && !saw[sum_type])
+ saw[sum_type] = ++cnt;
+ else
+ to = tok - (tok != sumbuf);
+ tok = NULL;
+ }
+ if (!*from++)
+ break;
+ continue;
+ }
+ if (!tok) {
+ if (to != sumbuf)
+ *to++ = ' ';
+ tok = to;
+ }
+ if (to - sumbuf >= sumbuf_len - 1) {
+ to = tok - (tok != sumbuf);
+ break;
+ }
+ *to++ = *from++;
+ }
+ *to = '\0';
+
+ return to - sumbuf;
+}
+
+void negotiate_checksum(int f_in, int f_out, const char *csum_list, int saw_fail)
+{
+ char *tok, sumbuf[MAX_CHECKSUM_LIST], saw[CSUM_SAW_BUFLEN];
+ int sum_type, len;
+
+ /* Simplify the user-provided string so that it contains valid
+ * checksum names without any duplicates. The client side also
+ * makes use of the saw values when scanning the server's list. */
+ if (csum_list && *csum_list && (!am_server || local_server)) {
+ len = parse_checksum_list(csum_list, sumbuf, sizeof sumbuf, saw);
+ if (saw_fail && !len)
+ len = strlcpy(sumbuf, "FAIL", sizeof sumbuf);
+ csum_list = sumbuf;
+ } else
+ csum_list = NULL;
+
+ if (!csum_list || !*csum_list) {
+ struct csum_struct *cs;
+ for (tok = sumbuf, cs = valid_checksums, len = 0; cs->name; cs++) {
+ if (cs->num == CSUM_NONE)
+ continue;
+ if (tok != sumbuf)
+ *tok++ = ' ';
+ tok += strlcpy(tok, cs->name, sizeof sumbuf - (tok - sumbuf));
+ saw[cs->num] = ++len;
+ }
+ *tok = '\0';
+ len = tok - sumbuf;
+ }
+
+ /* Each side sends their list of valid checksum names to the other side and
+ * then both sides pick the first name in the client's list that is also in
+ * the server's list. */
+ if (!local_server)
+ write_vstring(f_out, sumbuf, len);
+
+ if (!local_server || read_batch)
+ len = read_vstring(f_in, sumbuf, sizeof sumbuf);
+
+ if (len > 0) {
+ int best = CSUM_SAW_BUFLEN; /* We want best == 1 from the client list */
+ if (am_server)
+ memset(saw, 1, CSUM_SAW_BUFLEN); /* The first client's choice is the best choice */
+ for (tok = strtok(sumbuf, " \t"); tok; tok = strtok(NULL, " \t")) {
+ sum_type = parse_csum_name(tok, -1, 0);
+ if (sum_type < 0 || !saw[sum_type] || best < saw[sum_type])
+ continue;
+ xfersum_type = checksum_type = sum_type;
+ negotiated_csum_name = tok;
+ best = saw[sum_type];
+ if (best == 1)
+ break;
+ }
+ if (negotiated_csum_name) {
+ negotiated_csum_name = strdup(negotiated_csum_name);
+ return;
+ }
+ }
+
+ if (!am_server)
+ msleep(20);
+ rprintf(FERROR, "Failed to negotiate a common checksum\n");
+ exit_cleanup(RERR_UNSUPPORTED);
+}
+
+int csum_len_for_type(int cst, BOOL flist_csum)
+{
+ switch (cst) {
+ case CSUM_NONE:
+ return 1;
+ case CSUM_MD4_ARCHAIC:
+ /* The oldest checksum code is rather weird: the file-list code only sent
+ * 2-byte checksums, but all other checksums were full MD4 length. */
+ return flist_csum ? 2 : MD4_DIGEST_LEN;
+ case CSUM_MD4:
+ case CSUM_MD4_OLD:
+ case CSUM_MD4_BUSTED:
+ return MD4_DIGEST_LEN;
+ case CSUM_MD5:
+ return MD5_DIGEST_LEN;
+#ifdef SUPPORT_XXHASH
+ case CSUM_XXHASH:
+ return sizeof (XXH64_hash_t);
+#endif
+ default: /* paranoia to prevent missing case values */
+ exit_cleanup(RERR_UNSUPPORTED);
+ }
+ return 0;
+}
+
+int canonical_checksum(int csum_type)
+{
+ return csum_type >= CSUM_MD4 ? 1 : 0;
+}
+#ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
/*
- a simple 32 bit checksum that can be upadted from either end
+ a simple 32 bit checksum that can be updated from either end
(inspired by Mark Adler's Adler-32 checksum)
*/
uint32 get_checksum1(char *buf1, int32 len)
{
- int32 i;
- uint32 s1, s2;
- schar *buf = (schar *)buf1;
-
- s1 = s2 = 0;
- for (i = 0; i < (len-4); i+=4) {
- s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] +
- 10*CHAR_OFFSET;
- s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
- }
- for (; i < len; i++) {
- s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
- }
- return (s1 & 0xffff) + (s2 << 16);
+ int32 i;
+ uint32 s1, s2;
+ schar *buf = (schar *)buf1;
+
+ s1 = s2 = 0;
+ for (i = 0; i < (len-4); i+=4) {
+ s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET;
+ s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
+ }
+ for (; i < len; i++) {
+ s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
+ }
+ return (s1 & 0xffff) + (s2 << 16);
}
-
+#endif
void get_checksum2(char *buf, int32 len, char *sum)
{
md_context m;
- if (protocol_version >= 30) {
+ switch (xfersum_type) {
+ case CSUM_MD5: {
uchar seedbuf[4];
md5_begin(&m);
if (proper_seed_order) {
}
}
md5_result(&m, (uchar *)sum);
- } else {
+ break;
+ }
+ case CSUM_MD4:
+ case CSUM_MD4_OLD:
+ case CSUM_MD4_BUSTED:
+ case CSUM_MD4_ARCHAIC: {
int32 i;
static char *buf1;
static int32 len1;
* are multiples of 64. This is fixed by calling mdfour_update()
* even when there are no more bytes.
*/
- if (len - i > 0 || protocol_version >= 27)
+ if (len - i > 0 || xfersum_type > CSUM_MD4_BUSTED)
mdfour_update(&m, (uchar *)(buf1+i), len-i);
mdfour_result(&m, (uchar *)sum);
+ break;
+ }
+#ifdef SUPPORT_XXHASH
+ case CSUM_XXHASH:
+ SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
+ break;
+#endif
+ default: /* paranoia to prevent missing case values */
+ exit_cleanup(RERR_UNSUPPORTED);
}
}
buf = map_file(fd, len, MAX_MAP_SIZE, CSUM_CHUNK);
- if (protocol_version >= 30) {
+ switch (checksum_type) {
+ case CSUM_MD5:
md5_begin(&m);
for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) {
md5_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
md5_result(&m, (uchar *)sum);
- } else {
+ break;
+ case CSUM_MD4:
+ case CSUM_MD4_OLD:
+ case CSUM_MD4_BUSTED:
+ case CSUM_MD4_ARCHAIC:
mdfour_begin(&m);
for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) {
- mdfour_update(&m, (uchar *)map_ptr(buf, i, CSUM_CHUNK),
- CSUM_CHUNK);
+ mdfour_update(&m, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
}
/* Prior to version 27 an incorrect MD4 checksum was computed
* are multiples of 64. This is fixed by calling mdfour_update()
* even when there are no more bytes. */
remainder = (int32)(len - i);
- if (remainder > 0 || protocol_version >= 27)
+ if (remainder > 0 || checksum_type > CSUM_MD4_BUSTED)
mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
mdfour_result(&m, (uchar *)sum);
+ break;
+#ifdef SUPPORT_XXHASH
+ case CSUM_XXHASH: {
+ XXH64_state_t* state = XXH64_createState();
+ if (state == NULL)
+ out_of_memory("file_checksum xx64");
+
+ if (XXH64_reset(state, 0) == XXH_ERROR) {
+ rprintf(FERROR, "error resetting XXH64 seed");
+ exit_cleanup(RERR_STREAMIO);
+ }
+
+ for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) {
+ XXH_errorcode const updateResult =
+ XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
+ if (updateResult == XXH_ERROR) {
+ rprintf(FERROR, "error computing XX64 hash");
+ exit_cleanup(RERR_STREAMIO);
+ }
+ }
+ remainder = (int32)(len - i);
+ if (remainder > 0)
+ XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), remainder);
+ SIVAL64(sum, 0, XXH64_digest(state));
+
+ XXH64_freeState(state);
+ break;
+ }
+#endif
+ default:
+ rprintf(FERROR, "invalid checksum-choice for the --checksum option (%d)\n", checksum_type);
+ exit_cleanup(RERR_UNSUPPORTED);
}
close(fd);
static int32 sumresidue;
static md_context md;
+static int cursum_type;
+#ifdef SUPPORT_XXHASH
+XXH64_state_t* xxh64_state = NULL;
+#endif
-void sum_init(int seed)
+void sum_init(int csum_type, int seed)
{
char s[4];
- if (protocol_version >= 30)
+ if (csum_type < 0)
+ csum_type = parse_csum_name(NULL, 0, 1);
+ cursum_type = csum_type;
+
+ switch (csum_type) {
+ case CSUM_MD5:
md5_begin(&md);
- else {
+ break;
+ case CSUM_MD4:
+ mdfour_begin(&md);
+ sumresidue = 0;
+ break;
+ case CSUM_MD4_OLD:
+ case CSUM_MD4_BUSTED:
+ case CSUM_MD4_ARCHAIC:
mdfour_begin(&md);
sumresidue = 0;
SIVAL(s, 0, seed);
sum_update(s, 4);
+ break;
+#ifdef SUPPORT_XXHASH
+ case CSUM_XXHASH:
+ if (xxh64_state == NULL) {
+ xxh64_state = XXH64_createState();
+ if (xxh64_state == NULL)
+ out_of_memory("sum_init xxh64");
+ }
+ if (XXH64_reset(xxh64_state, 0) == XXH_ERROR) {
+ rprintf(FERROR, "error resetting XXH64 state");
+ exit_cleanup(RERR_STREAMIO);
+ }
+ break;
+#endif
+ case CSUM_NONE:
+ break;
+ default: /* paranoia to prevent missing case values */
+ exit_cleanup(RERR_UNSUPPORTED);
}
}
**/
void sum_update(const char *p, int32 len)
{
- if (protocol_version >= 30) {
+ switch (cursum_type) {
+ case CSUM_MD5:
md5_update(&md, (uchar *)p, len);
- return;
- }
+ break;
+ case CSUM_MD4:
+ case CSUM_MD4_OLD:
+ case CSUM_MD4_BUSTED:
+ case CSUM_MD4_ARCHAIC:
+ if (len + sumresidue < CSUM_CHUNK) {
+ memcpy(md.buffer + sumresidue, p, len);
+ sumresidue += len;
+ break;
+ }
- if (len + sumresidue < CSUM_CHUNK) {
- memcpy(md.buffer + sumresidue, p, len);
- sumresidue += len;
- return;
- }
+ if (sumresidue) {
+ int32 i = CSUM_CHUNK - sumresidue;
+ memcpy(md.buffer + sumresidue, p, i);
+ mdfour_update(&md, (uchar *)md.buffer, CSUM_CHUNK);
+ len -= i;
+ p += i;
+ }
- if (sumresidue) {
- int32 i = CSUM_CHUNK - sumresidue;
- memcpy(md.buffer + sumresidue, p, i);
- mdfour_update(&md, (uchar *)md.buffer, CSUM_CHUNK);
- len -= i;
- p += i;
- }
+ while (len >= CSUM_CHUNK) {
+ mdfour_update(&md, (uchar *)p, CSUM_CHUNK);
+ len -= CSUM_CHUNK;
+ p += CSUM_CHUNK;
+ }
- while (len >= CSUM_CHUNK) {
- mdfour_update(&md, (uchar *)p, CSUM_CHUNK);
- len -= CSUM_CHUNK;
- p += CSUM_CHUNK;
+ sumresidue = len;
+ if (sumresidue)
+ memcpy(md.buffer, p, sumresidue);
+ break;
+#ifdef SUPPORT_XXHASH
+ case CSUM_XXHASH:
+ if (XXH64_update(xxh64_state, p, len) == XXH_ERROR) {
+ rprintf(FERROR, "error computing XX64 hash");
+ exit_cleanup(RERR_STREAMIO);
+ }
+ break;
+#endif
+ case CSUM_NONE:
+ break;
+ default: /* paranoia to prevent missing case values */
+ exit_cleanup(RERR_UNSUPPORTED);
}
-
- sumresidue = len;
- if (sumresidue)
- memcpy(md.buffer, p, sumresidue);
}
+/* NOTE: all the callers of sum_end() pass in a pointer to a buffer that is
+ * MAX_DIGEST_LEN in size, so even if the csum-len is shorter that that (i.e.
+ * CSUM_MD4_ARCHAIC), we don't have to worry about limiting the data we write
+ * into the "sum" buffer. */
int sum_end(char *sum)
{
- if (protocol_version >= 30) {
+ switch (cursum_type) {
+ case CSUM_MD5:
md5_result(&md, (uchar *)sum);
- return MD5_DIGEST_LEN;
- }
-
- if (sumresidue || protocol_version >= 27)
+ break;
+ case CSUM_MD4:
+ case CSUM_MD4_OLD:
mdfour_update(&md, (uchar *)md.buffer, sumresidue);
+ mdfour_result(&md, (uchar *)sum);
+ break;
+ case CSUM_MD4_BUSTED:
+ case CSUM_MD4_ARCHAIC:
+ if (sumresidue)
+ mdfour_update(&md, (uchar *)md.buffer, sumresidue);
+ mdfour_result(&md, (uchar *)sum);
+ break;
+#ifdef SUPPORT_XXHASH
+ case CSUM_XXHASH:
+ SIVAL64(sum, 0, XXH64_digest(xxh64_state));
+ break;
+#endif
+ case CSUM_NONE:
+ *sum = '\0';
+ break;
+ default: /* paranoia to prevent missing case values */
+ exit_cleanup(RERR_UNSUPPORTED);
+ }
- mdfour_result(&md, (uchar *)sum);
-
- return MD4_DIGEST_LEN;
+ return csum_len_for_type(cursum_type, 0);
}