Promoting xxhash support.
[rsync.git] / checksum.c
index 8b3883363d64b9a685c13361b2142e7f365259b5..17a9507acda203af6be9ccb6841e59d9034f7acb 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 1996 Andrew Tridgell
  * Copyright (C) 1996 Paul Mackerras
- * Copyright (C) 2004-2015 Wayne Davison
+ * Copyright (C) 2004-2020 Wayne Davison
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  */
 
 #include "rsync.h"
-
+#ifdef SUPPORT_XXHASH
+#include "xxhash.h"
+#endif
+
+extern int am_server;
+extern int local_server;
+extern int whole_file;
+extern int read_batch;
 extern int checksum_seed;
 extern int protocol_version;
 extern int proper_seed_order;
 extern char *checksum_choice;
 
 #define CSUM_NONE 0
-#define CSUM_ARCHAIC 1
+#define CSUM_MD4_ARCHAIC 1
 #define CSUM_MD4_BUSTED 2
 #define CSUM_MD4_OLD 3
 #define CSUM_MD4 4
 #define CSUM_MD5 5
+#define CSUM_XXHASH 6
+
+#define CSUM_SAW_BUFLEN 10
+
+struct csum_struct {
+       int num;
+       const char *name;
+} valid_checksums[] = {
+#ifdef SUPPORT_XXHASH
+       { CSUM_XXHASH, "xxhash" },
+#endif
+       { CSUM_MD5, "md5" },
+       { CSUM_MD4, "md4" },
+       { CSUM_NONE, "none" },
+       { -1, NULL }
+};
+
+#define MAX_CHECKSUM_LIST 1024
 
 int xfersum_type = 0; /* used for the file transfer checksums */
 int checksum_type = 0; /* used for the pre-transfer (--checksum) checksums */
+const char *negotiated_csum_name = NULL;
 
-/* Returns 1 if --whole-file must be enabled. */
-int parse_checksum_choice(void)
+static int parse_csum_name(const char *name, int len, int allow_auto)
 {
-       char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
-       if (cp) {
-               xfersum_type = parse_csum_name(checksum_choice, cp - checksum_choice);
-               checksum_type = parse_csum_name(cp+1, -1);
-       } else
-               xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1);
-       return xfersum_type == CSUM_NONE;
-}
+       struct csum_struct *cs;
 
-int parse_csum_name(const char *name, int len)
-{
        if (len < 0 && name)
                len = strlen(name);
 
-       if (!name || (len == 4 && strncasecmp(name, "auto", 4) == 0)) {
+       if (!name || (allow_auto && len == 4 && strncasecmp(name, "auto", 4) == 0)) {
                if (protocol_version >= 30)
                        return CSUM_MD5;
                if (protocol_version >= 27)
                        return CSUM_MD4_OLD;
                if (protocol_version >= 21)
                        return CSUM_MD4_BUSTED;
-               return CSUM_ARCHAIC;
+               return CSUM_MD4_ARCHAIC;
+       }
+
+       for (cs = valid_checksums; cs->name; cs++) {
+               if (strncasecmp(name, cs->name, len) == 0 && cs->name[len] == '\0')
+                       return cs->num;
+       }
+
+       if (allow_auto) {
+               rprintf(FERROR, "unknown checksum name: %s\n", name);
+               exit_cleanup(RERR_UNSUPPORTED);
+       }
+
+       return -1;
+}
+
+static const char *checksum_name(int num)
+{
+       struct csum_struct *cs;
+
+       for (cs = valid_checksums; cs->name; cs++) {
+               if (num == cs->num)
+                       return cs->name;
+       }
+
+       if (num < CSUM_MD4)
+               return "MD4";
+
+       return "UNKNOWN";
+}
+
+void parse_checksum_choice(int final_call)
+{
+       if (!negotiated_csum_name) {
+               char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
+               if (cp) {
+                       xfersum_type = parse_csum_name(checksum_choice, cp - checksum_choice, 1);
+                       checksum_type = parse_csum_name(cp+1, -1, 1);
+               } else
+                       xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1, 1);
+       }
+
+       if (xfersum_type == CSUM_NONE)
+               whole_file = 1;
+
+       if (final_call && DEBUG_GTE(CSUM, 1)) {
+               if (negotiated_csum_name)
+                       rprintf(FINFO, "[%s] negotiated checksum: %s\n", who_am_i(), negotiated_csum_name);
+               else if (xfersum_type == checksum_type) {
+                       rprintf(FINFO, "[%s] %s checksum: %s\n", who_am_i(),
+                               checksum_choice ? "chosen" : "protocol-based",
+                               checksum_name(xfersum_type));
+               } else {
+                       rprintf(FINFO, "[%s] chosen transfer checksum: %s\n",
+                               who_am_i(), checksum_name(xfersum_type));
+                       rprintf(FINFO, "[%s] chosen pre-transfer checksum: %s\n",
+                               who_am_i(), checksum_name(checksum_type));
+               }
+       }
+}
+
+static int parse_checksum_list(const char *from, char *sumbuf, int sumbuf_len, char *saw)
+{
+       char *to = sumbuf, *tok = NULL;
+       int cnt = 0;
+
+       memset(saw, 0, CSUM_SAW_BUFLEN);
+
+       while (1) {
+               if (*from == ' ' || !*from) {
+                       if (tok) {
+                               int sum_type = parse_csum_name(tok, to - tok, 0);
+                               if (sum_type >= 0 && !saw[sum_type])
+                                       saw[sum_type] = ++cnt;
+                               else
+                                       to = tok - (tok != sumbuf);
+                               tok = NULL;
+                       }
+                       if (!*from++)
+                               break;
+                       continue;
+               }
+               if (!tok) {
+                       if (to != sumbuf)
+                               *to++ = ' ';
+                       tok = to;
+               }
+               if (to - sumbuf >= sumbuf_len - 1) {
+                       to = tok - (tok != sumbuf);
+                       break;
+               }
+               *to++ = *from++;
        }
-       if (len == 3 && strncasecmp(name, "md4", 3) == 0)
-               return CSUM_MD4;
-       if (len == 3 && strncasecmp(name, "md5", 3) == 0)
-               return CSUM_MD5;
-       if (len == 4 && strncasecmp(name, "none", 4) == 0)
-               return CSUM_NONE;
-
-       rprintf(FERROR, "unknown checksum name: %s\n", name);
+       *to = '\0';
+
+       return to - sumbuf;
+}
+
+void negotiate_checksum(int f_in, int f_out, const char *csum_list, int saw_fail)
+{
+       char *tok, sumbuf[MAX_CHECKSUM_LIST], saw[CSUM_SAW_BUFLEN];
+       int sum_type, len;
+
+       /* Simplify the user-provided string so that it contains valid
+        * checksum names without any duplicates. The client side also
+        * makes use of the saw values when scanning the server's list. */
+       if (csum_list && *csum_list && (!am_server || local_server)) {
+               len = parse_checksum_list(csum_list, sumbuf, sizeof sumbuf, saw);
+               if (saw_fail && !len)
+                       len = strlcpy(sumbuf, "FAIL", sizeof sumbuf);
+               csum_list = sumbuf;
+       } else
+               csum_list = NULL;
+
+       if (!csum_list || !*csum_list) {
+               struct csum_struct *cs;
+               for (tok = sumbuf, cs = valid_checksums, len = 0; cs->name; cs++) {
+                       if (cs->num == CSUM_NONE)
+                               continue;
+                       if (tok != sumbuf)
+                               *tok++ = ' ';
+                       tok += strlcpy(tok, cs->name, sizeof sumbuf - (tok - sumbuf));
+                       saw[cs->num] = ++len;
+               }
+               *tok = '\0';
+               len = tok - sumbuf;
+       }
+
+       /* Each side sends their list of valid checksum names to the other side and
+        * then both sides pick the first name in the client's list that is also in
+        * the server's list. */
+       if (!local_server)
+               write_vstring(f_out, sumbuf, len);
+
+       if (!local_server || read_batch)
+               len = read_vstring(f_in, sumbuf, sizeof sumbuf);
+
+       if (len > 0) {
+               int best = CSUM_SAW_BUFLEN; /* We want best == 1 from the client list */
+               if (am_server)
+                       memset(saw, 1, CSUM_SAW_BUFLEN); /* The first client's choice is the best choice */
+               for (tok = strtok(sumbuf, " \t"); tok; tok = strtok(NULL, " \t")) {
+                       sum_type = parse_csum_name(tok, -1, 0);
+                       if (sum_type < 0 || !saw[sum_type] || best < saw[sum_type])
+                               continue;
+                       xfersum_type = checksum_type = sum_type;
+                       negotiated_csum_name = tok;
+                       best = saw[sum_type];
+                       if (best == 1)
+                               break;
+               }
+               if (negotiated_csum_name) {
+                       negotiated_csum_name = strdup(negotiated_csum_name);
+                       return;
+               }
+       }
+
+       if (!am_server)
+               msleep(20);
+       rprintf(FERROR, "Failed to negotiate a common checksum\n");
        exit_cleanup(RERR_UNSUPPORTED);
 }
 
-int csum_len_for_type(int cst)
+int csum_len_for_type(int cst, BOOL flist_csum)
 {
        switch (cst) {
          case CSUM_NONE:
                return 1;
-         case CSUM_ARCHAIC:
-               return 2;
+         case CSUM_MD4_ARCHAIC:
+               /* The oldest checksum code is rather weird: the file-list code only sent
+                * 2-byte checksums, but all other checksums were full MD4 length. */
+               return flist_csum ? 2 : MD4_DIGEST_LEN;
          case CSUM_MD4:
          case CSUM_MD4_OLD:
          case CSUM_MD4_BUSTED:
                return MD4_DIGEST_LEN;
          case CSUM_MD5:
                return MD5_DIGEST_LEN;
+#ifdef SUPPORT_XXHASH
+         case CSUM_XXHASH:
+               return sizeof (XXH64_hash_t);
+#endif
+         default: /* paranoia to prevent missing case values */
+               exit_cleanup(RERR_UNSUPPORTED);
        }
        return 0;
 }
 
 int canonical_checksum(int csum_type)
 {
-    return csum_type >= CSUM_MD4 ? 1 : 0;
+       return csum_type >= CSUM_MD4 ? 1 : 0;
 }
 
+#ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
 /*
-  a simple 32 bit checksum that can be upadted from either end
+  a simple 32 bit checksum that can be updated from either end
   (inspired by Mark Adler's Adler-32 checksum)
   */
 uint32 get_checksum1(char *buf1, int32 len)
 {
-    int32 i;
-    uint32 s1, s2;
-    schar *buf = (schar *)buf1;
-
-    s1 = s2 = 0;
-    for (i = 0; i < (len-4); i+=4) {
-       s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] +
-         10*CHAR_OFFSET;
-       s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
-    }
-    for (; i < len; i++) {
-       s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
-    }
-    return (s1 & 0xffff) + (s2 << 16);
+       int32 i;
+       uint32 s1, s2;
+       schar *buf = (schar *)buf1;
+
+       s1 = s2 = 0;
+       for (i = 0; i < (len-4); i+=4) {
+               s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET;
+               s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
+       }
+       for (; i < len; i++) {
+               s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
+       }
+       return (s1 & 0xffff) + (s2 << 16);
 }
+#endif
 
 void get_checksum2(char *buf, int32 len, char *sum)
 {
@@ -143,7 +318,8 @@ void get_checksum2(char *buf, int32 len, char *sum)
          }
          case CSUM_MD4:
          case CSUM_MD4_OLD:
-         case CSUM_MD4_BUSTED: {
+         case CSUM_MD4_BUSTED:
+         case CSUM_MD4_ARCHAIC: {
                int32 i;
                static char *buf1;
                static int32 len1;
@@ -174,12 +350,19 @@ void get_checksum2(char *buf, int32 len, char *sum)
                 * are multiples of 64.  This is fixed by calling mdfour_update()
                 * even when there are no more bytes.
                 */
-               if (len - i > 0 || xfersum_type != CSUM_MD4_BUSTED)
+               if (len - i > 0 || xfersum_type > CSUM_MD4_BUSTED)
                        mdfour_update(&m, (uchar *)(buf1+i), len-i);
 
                mdfour_result(&m, (uchar *)sum);
                break;
          }
+#ifdef SUPPORT_XXHASH
+         case CSUM_XXHASH: 
+               SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
+               break;
+#endif
+         default: /* paranoia to prevent missing case values */
+               exit_cleanup(RERR_UNSUPPORTED);
        }
 }
 
@@ -217,11 +400,11 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
          case CSUM_MD4:
          case CSUM_MD4_OLD:
          case CSUM_MD4_BUSTED:
+         case CSUM_MD4_ARCHAIC:
                mdfour_begin(&m);
 
                for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) {
-                       mdfour_update(&m, (uchar *)map_ptr(buf, i, CSUM_CHUNK),
-                                     CSUM_CHUNK);
+                       mdfour_update(&m, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
                }
 
                /* Prior to version 27 an incorrect MD4 checksum was computed
@@ -229,11 +412,39 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
                 * are multiples of 64.  This is fixed by calling mdfour_update()
                 * even when there are no more bytes. */
                remainder = (int32)(len - i);
-               if (remainder > 0 || checksum_type != CSUM_MD4_BUSTED)
+               if (remainder > 0 || checksum_type > CSUM_MD4_BUSTED)
                        mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
 
                mdfour_result(&m, (uchar *)sum);
                break;
+#ifdef SUPPORT_XXHASH
+         case CSUM_XXHASH: {
+               XXH64_state_t* state = XXH64_createState();
+               if (state == NULL)
+                       out_of_memory("file_checksum xx64");
+
+               if (XXH64_reset(state, 0) == XXH_ERROR) {
+                       rprintf(FERROR, "error resetting XXH64 seed");
+                       exit_cleanup(RERR_STREAMIO);
+               }
+
+               for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) {
+                       XXH_errorcode const updateResult =
+                           XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
+                       if (updateResult == XXH_ERROR) {
+                               rprintf(FERROR, "error computing XX64 hash");
+                               exit_cleanup(RERR_STREAMIO);
+                       }
+               }
+               remainder = (int32)(len - i);
+               if (remainder > 0)
+                       XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), remainder);
+               SIVAL64(sum, 0, XXH64_digest(state));
+
+               XXH64_freeState(state);
+               break;
+         }
+#endif
          default:
                rprintf(FERROR, "invalid checksum-choice for the --checksum option (%d)\n", checksum_type);
                exit_cleanup(RERR_UNSUPPORTED);
@@ -246,13 +457,16 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
 static int32 sumresidue;
 static md_context md;
 static int cursum_type;
+#ifdef SUPPORT_XXHASH
+XXH64_state_t* xxh64_state = NULL;
+#endif
 
 void sum_init(int csum_type, int seed)
 {
        char s[4];
 
        if (csum_type < 0)
-               csum_type = parse_csum_name(NULL, 0);
+               csum_type = parse_csum_name(NULL, 0, 1);
        cursum_type = csum_type;
 
        switch (csum_type) {
@@ -265,13 +479,29 @@ void sum_init(int csum_type, int seed)
                break;
          case CSUM_MD4_OLD:
          case CSUM_MD4_BUSTED:
+         case CSUM_MD4_ARCHAIC:
                mdfour_begin(&md);
                sumresidue = 0;
                SIVAL(s, 0, seed);
                sum_update(s, 4);
                break;
+#ifdef SUPPORT_XXHASH
+         case CSUM_XXHASH:
+               if (xxh64_state == NULL) {
+                       xxh64_state = XXH64_createState();
+                       if (xxh64_state == NULL)
+                               out_of_memory("sum_init xxh64");
+               }
+               if (XXH64_reset(xxh64_state, 0) == XXH_ERROR) {
+                       rprintf(FERROR, "error resetting XXH64 state");
+                       exit_cleanup(RERR_STREAMIO);
+               }
+               break;
+#endif
          case CSUM_NONE:
                break;
+         default: /* paranoia to prevent missing case values */
+               exit_cleanup(RERR_UNSUPPORTED);
        }
 }
 
@@ -292,6 +522,7 @@ void sum_update(const char *p, int32 len)
          case CSUM_MD4:
          case CSUM_MD4_OLD:
          case CSUM_MD4_BUSTED:
+         case CSUM_MD4_ARCHAIC:
                if (len + sumresidue < CSUM_CHUNK) {
                        memcpy(md.buffer + sumresidue, p, len);
                        sumresidue += len;
@@ -316,11 +547,25 @@ void sum_update(const char *p, int32 len)
                if (sumresidue)
                        memcpy(md.buffer, p, sumresidue);
                break;
+#ifdef SUPPORT_XXHASH
+         case CSUM_XXHASH:
+               if (XXH64_update(xxh64_state, p, len) == XXH_ERROR) {
+                       rprintf(FERROR, "error computing XX64 hash");
+                       exit_cleanup(RERR_STREAMIO);
+               }
+               break;
+#endif
          case CSUM_NONE:
                break;
+         default: /* paranoia to prevent missing case values */
+               exit_cleanup(RERR_UNSUPPORTED);
        }
 }
 
+/* NOTE: all the callers of sum_end() pass in a pointer to a buffer that is
+ * MAX_DIGEST_LEN in size, so even if the csum-len is shorter that that (i.e.
+ * CSUM_MD4_ARCHAIC), we don't have to worry about limiting the data we write
+ * into the "sum" buffer. */
 int sum_end(char *sum)
 {
        switch (cursum_type) {
@@ -333,14 +578,22 @@ int sum_end(char *sum)
                mdfour_result(&md, (uchar *)sum);
                break;
          case CSUM_MD4_BUSTED:
+         case CSUM_MD4_ARCHAIC:
                if (sumresidue)
                        mdfour_update(&md, (uchar *)md.buffer, sumresidue);
                mdfour_result(&md, (uchar *)sum);
                break;
+#ifdef SUPPORT_XXHASH
+         case CSUM_XXHASH:
+               SIVAL64(sum, 0, XXH64_digest(xxh64_state));
+               break;
+#endif
          case CSUM_NONE:
                *sum = '\0';
                break;
+         default: /* paranoia to prevent missing case values */
+               exit_cleanup(RERR_UNSUPPORTED);
        }
 
-       return csum_len_for_type(cursum_type);
+       return csum_len_for_type(cursum_type, 0);
 }