Some checksum improvements
authorWayne Davison <wayne@opencoder.net>
Fri, 22 May 2020 22:06:20 +0000 (15:06 -0700)
committerWayne Davison <wayne@opencoder.net>
Sat, 23 May 2020 00:59:12 +0000 (17:59 -0700)
- Improve csum negotation logic.
- Define the csum names in a single structure.
- Add --debug=CSUM.

checksum.c
compat.c
flist.c
io.c
options.c
rsync.h

index 54e2c4aaef00475c01e1b1683b5fc6676c1085d5..99c29d989d2200e7d132153f66b9ef000316dbfe 100644 (file)
@@ -37,8 +37,17 @@ extern char *checksum_choice;
 #define CSUM_MD4 4
 #define CSUM_MD5 5
 
-const char *default_checksum_list =
-       "md5 md4";
+#define CSUM_SAW_BUFLEN 10
+
+struct csum_struct {
+       int num;
+       const char *name;
+} valid_checksums[] = {
+       { CSUM_MD5, "md5" },
+       { CSUM_MD4, "md4" },
+       { CSUM_NONE, "none" },
+       { -1, NULL }
+};
 
 #define MAX_CHECKSUM_LIST 1024
 
@@ -48,6 +57,8 @@ const char *negotiated_csum_name = NULL;
 
 static int parse_csum_name(const char *name, int len, int allow_auto)
 {
+       struct csum_struct *cs;
+
        if (len < 0 && name)
                len = strlen(name);
 
@@ -60,12 +71,11 @@ static int parse_csum_name(const char *name, int len, int allow_auto)
                        return CSUM_MD4_BUSTED;
                return CSUM_MD4_ARCHAIC;
        }
-       if (len == 3 && strncasecmp(name, "md4", 3) == 0)
-               return CSUM_MD4;
-       if (len == 3 && strncasecmp(name, "md5", 3) == 0)
-               return CSUM_MD5;
-       if (len == 4 && strncasecmp(name, "none", 4) == 0)
-               return CSUM_NONE;
+
+       for (cs = valid_checksums; cs->name; cs++) {
+               if (strncasecmp(name, cs->name, len) == 0 && cs->name[len] == '\0')
+                       return cs->num;
+       }
 
        if (allow_auto) {
                rprintf(FERROR, "unknown checksum name: %s\n", name);
@@ -75,7 +85,22 @@ static int parse_csum_name(const char *name, int len, int allow_auto)
        return -1;
 }
 
-void parse_checksum_choice(void)
+static const char *checksum_name(int num)
+{
+       struct csum_struct *cs;
+
+       for (cs = valid_checksums; cs->name; cs++) {
+               if (num == cs->num)
+                       return cs->name;
+       }
+
+       if (num < CSUM_MD4)
+               return "MD4";
+
+       return "UNKNOWN";
+}
+
+void parse_checksum_choice(int final_call)
 {
        if (!negotiated_csum_name) {
                char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
@@ -85,46 +110,124 @@ void parse_checksum_choice(void)
                } else
                        xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1, 1);
        }
+
        if (xfersum_type == CSUM_NONE)
                whole_file = 1;
+
+       if (final_call && DEBUG_GTE(CSUM, 1)) {
+               if (negotiated_csum_name)
+                       rprintf(FINFO, "[%s] negotiated checksum: %s\n", who_am_i(), negotiated_csum_name);
+               else if (xfersum_type == checksum_type) {
+                       rprintf(FINFO, "[%s] %s checksum: %s\n", who_am_i(),
+                               checksum_choice ? "chosen" : "protocol-based",
+                               checksum_name(xfersum_type));
+               } else {
+                       rprintf(FINFO, "[%s] chosen transfer checksum: %s\n",
+                               who_am_i(), checksum_name(xfersum_type));
+                       rprintf(FINFO, "[%s] chosen pre-transfer checksum: %s\n",
+                               who_am_i(), checksum_name(checksum_type));
+               }
+       }
+}
+
+static int parse_checksum_list(const char *from, char *sumbuf, int sumbuf_len, char *saw)
+{
+       char *to = sumbuf, *tok = NULL;
+       int cnt = 0;
+
+       memset(saw, 0, CSUM_SAW_BUFLEN);
+
+       while (1) {
+               if (*from == ' ' || !*from) {
+                       if (tok) {
+                               int sum_type = parse_csum_name(tok, to - tok, 0);
+                               if (sum_type >= 0 && !saw[sum_type])
+                                       saw[sum_type] = ++cnt;
+                               else
+                                       to = tok - (tok != sumbuf);
+                               tok = NULL;
+                       }
+                       if (!*from++)
+                               break;
+                       continue;
+               }
+               if (!tok) {
+                       if (to != sumbuf)
+                               *to++ = ' ';
+                       tok = to;
+               }
+               if (to - sumbuf >= sumbuf_len - 1) {
+                       to = tok - (tok != sumbuf);
+                       break;
+               }
+               *to++ = *from++;
+       }
+       *to = '\0';
+
+       return to - sumbuf;
 }
 
-void negotiate_checksum(int f_in, int f_out, const char *csum_list)
+void negotiate_checksum(int f_in, int f_out, const char *csum_list, int saw_fail)
 {
-       char *tok, sumbuf[MAX_CHECKSUM_LIST];
+       char *tok, sumbuf[MAX_CHECKSUM_LIST], saw[CSUM_SAW_BUFLEN];
        int sum_type, len;
 
-       if (!am_server || local_server) {
-               if (!csum_list || !*csum_list)
-                       csum_list = default_checksum_list;
-               len = strlen(csum_list);
-               if (len >= (int)sizeof sumbuf) {
-                       rprintf(FERROR, "The checksum list is too long.\n");
-                       exit_cleanup(RERR_UNSUPPORTED);
+       /* Simplify the user-provided string so that it contains valid
+        * checksum names without any duplicates. The client side also
+        * makes use of the saw values when scanning the server's list. */
+       if (csum_list && *csum_list && (!am_server || local_server)) {
+               len = parse_checksum_list(csum_list, sumbuf, sizeof sumbuf, saw);
+               if (saw_fail && !len)
+                       len = strlcpy(sumbuf, "FAIL", sizeof sumbuf);
+               csum_list = sumbuf;
+       } else
+               csum_list = NULL;
+
+       if (!csum_list || !*csum_list) {
+               struct csum_struct *cs;
+               for (tok = sumbuf, cs = valid_checksums, len = 0; cs->name; cs++) {
+                       if (cs->num == CSUM_NONE)
+                               continue;
+                       if (tok != sumbuf)
+                               *tok++ = ' ';
+                       tok += strlcpy(tok, cs->name, sizeof sumbuf - (tok - sumbuf));
+                       saw[cs->num] = ++len;
                }
-               if (!local_server)
-                       write_vstring(f_out, csum_list, len);
+               *tok = '\0';
+               len = tok - sumbuf;
        }
 
-       if (local_server && !read_batch)
-               memcpy(sumbuf, csum_list, len+1);
-       else
+       /* Each side sends their list of valid checksum names to the other side and
+        * then both sides pick the first name in the client's list that is also in
+        * the server's list. */
+       if (!local_server)
+               write_vstring(f_out, sumbuf, len);
+
+       if (!local_server || read_batch)
                len = read_vstring(f_in, sumbuf, sizeof sumbuf);
 
        if (len > 0) {
+               int best = CSUM_SAW_BUFLEN; /* We want best == 1 from the client list */
+               if (am_server)
+                       memset(saw, 1, CSUM_SAW_BUFLEN); /* The first client's choice is the best choice */
                for (tok = strtok(sumbuf, " \t"); tok; tok = strtok(NULL, " \t")) {
-                       len = strlen(tok);
-                       sum_type = parse_csum_name(tok, len, 0);
-                       if (sum_type >= CSUM_NONE) {
-                               xfersum_type = checksum_type = sum_type;
-                               if (am_server && !local_server)
-                                       write_vstring(f_out, tok, len);
-                               negotiated_csum_name = strdup(tok);
-                               return;
-                       }
+                       sum_type = parse_csum_name(tok, -1, 0);
+                       if (sum_type < 0 || !saw[sum_type] || best < saw[sum_type])
+                               continue;
+                       xfersum_type = checksum_type = sum_type;
+                       negotiated_csum_name = tok;
+                       best = saw[sum_type];
+                       if (best == 1)
+                               break;
+               }
+               if (negotiated_csum_name) {
+                       negotiated_csum_name = strdup(negotiated_csum_name);
+                       return;
                }
        }
 
+       if (!am_server)
+               msleep(20);
        rprintf(FERROR, "Failed to negotiate a common checksum\n");
        exit_cleanup(RERR_UNSUPPORTED);
 }
index b29b9637ffdb131ba955c69fac09b6d12f67f2cc..8c77ea6934da5b4a5e5310a2761ffe28fcb266dd 100644 (file)
--- a/compat.c
+++ b/compat.c
@@ -367,24 +367,25 @@ void setup_protocol(int f_out,int f_in)
        }
 #endif
 
-       if (am_server) {
-               if (!checksum_seed)
-                       checksum_seed = time(NULL) ^ (getpid() << 6);
-               write_int(f_out, checksum_seed);
-       } else {
-               checksum_seed = read_int(f_in);
-       }
-
        if (!checksum_choice) {
                const char *rcl = getenv("RSYNC_CHECKSUM_LIST");
+               int saw_fail = rcl && strstr(rcl, "FAIL");
                if (csum_exchange)
-                       negotiate_checksum(f_in, f_out, rcl);
-               else if (!am_server && rcl && *rcl && strstr(rcl, "FAIL")) {
+                       negotiate_checksum(f_in, f_out, rcl, saw_fail);
+               else if (!am_server && saw_fail) {
                        rprintf(FERROR, "Remote rsync is too old for checksum negotation\n");
                        exit_cleanup(RERR_UNSUPPORTED);
                }
        }
 
+       if (am_server) {
+               if (!checksum_seed)
+                       checksum_seed = time(NULL) ^ (getpid() << 6);
+               write_int(f_out, checksum_seed);
+       } else {
+               checksum_seed = read_int(f_in);
+       }
+
        init_flist();
 }
 
diff --git a/flist.c b/flist.c
index 1f2b278d24b824d1bbe14b18f3a5d0384edfb65b..940071fd43604c4244949ac08faf872530a764ee 100644 (file)
--- a/flist.c
+++ b/flist.c
@@ -142,7 +142,7 @@ void init_flist(void)
                rprintf(FINFO, "FILE_STRUCT_LEN=%d, EXTRA_LEN=%d\n",
                        (int)FILE_STRUCT_LEN, (int)EXTRA_LEN);
        }
-       parse_checksum_choice(); /* Sets checksum_type && xfersum_type */
+       parse_checksum_choice(1); /* Sets checksum_type && xfersum_type */
        flist_csum_len = csum_len_for_type(checksum_type, 1);
 
        show_filelist_progress = INFO_GTE(FLIST, 1) && xfer_dirs && !am_server && !inc_recurse;
diff --git a/io.c b/io.c
index 446a5f343c3300a8f6c87d90023d2a79bf4bd2bc..189bc23237916b35b68d799db9a9e1da5354fd10 100644 (file)
--- a/io.c
+++ b/io.c
@@ -2369,8 +2369,8 @@ void start_write_batch(int fd)
        write_int(batch_fd, protocol_version);
        if (protocol_version >= 30)
                write_varint(batch_fd, compat_flags);
-       write_int(batch_fd, checksum_seed);
        maybe_write_checksum(batch_fd);
+       write_int(batch_fd, checksum_seed);
 
        if (am_sender)
                write_batch_monitor_out = fd;
index e2adcf837be4c26e50acc418d260940f64dd4d36..6926f96c98bc46c0ea9640470cfda781cad867f0 100644 (file)
--- a/options.c
+++ b/options.c
@@ -276,6 +276,7 @@ static struct output_struct debug_words[COUNT_DEBUG+1] = {
        DEBUG_WORD(CHDIR, W_CLI|W_SRV, "Debug when the current directory changes"),
        DEBUG_WORD(CONNECT, W_CLI, "Debug connection events (levels 1-2)"),
        DEBUG_WORD(CMD, W_CLI, "Debug commands+options that are issued (levels 1-2)"),
+       DEBUG_WORD(CSUM, W_CLI|W_SRV, "Debug checksum negotiation"),
        DEBUG_WORD(DEL, W_REC, "Debug delete actions (levels 1-3)"),
        DEBUG_WORD(DELTASUM, W_SND|W_REC, "Debug delta-transfer checksumming (levels 1-4)"),
        DEBUG_WORD(DUP, W_REC, "Debug weeding of duplicate names"),
@@ -1932,7 +1933,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
                /* Call this early to verify the args and figure out if we need to force
                 * --whole-file. Note that the parse function will get called again later,
                 * just in case an "auto" choice needs to know the protocol_version. */
-               parse_checksum_choice();
+               parse_checksum_choice(0);
        } else
                checksum_choice = NULL;
 
diff --git a/rsync.h b/rsync.h
index 0eb4fbc0ca1fcdaa61c7b742f79e08388f2dc5e2..d6c5de52f62c91b42f31bb0cdc475d9ffce5cc1c 100644 (file)
--- a/rsync.h
+++ b/rsync.h
@@ -1317,7 +1317,8 @@ extern short info_levels[], debug_levels[];
 #define DEBUG_CHDIR (DEBUG_BIND+1)
 #define DEBUG_CONNECT (DEBUG_CHDIR+1)
 #define DEBUG_CMD (DEBUG_CONNECT+1)
-#define DEBUG_DEL (DEBUG_CMD+1)
+#define DEBUG_CSUM (DEBUG_CMD+1)
+#define DEBUG_DEL (DEBUG_CSUM+1)
 #define DEBUG_DELTASUM (DEBUG_DEL+1)
 #define DEBUG_DUP (DEBUG_DELTASUM+1)
 #define DEBUG_EXIT (DEBUG_DUP+1)