Various file comparison improvements
authorWayne Davison <wayne@opencoder.net>
Tue, 22 Sep 2020 18:46:36 +0000 (11:46 -0700)
committerWayne Davison <wayne@opencoder.net>
Tue, 22 Sep 2020 19:48:02 +0000 (12:48 -0700)
- Rename unchanged_file() to quick_check_ok().
- Enhance quick_check_ok() to work with non-regular files.
- Add a get_file_type() function to the generator.
- Use the new functions in the generator code to make the logic simpler.
- Fix a bug where the `--alt-dest` functions were not checking if a
  special file fully matched the non-permission mode bits before
  deciding if we have found an alt-dest match.
- Enhance the `--info=skip --ignore-existing` output to include extra
  info on if the existing file differs in type or passes the standard
  quick-check logic.
- Add `--info=skip2` that authorizes rsync to perform a slow checksum
  "quick check" when ignoring existing files. This provides the uptodate
  and differs info even if we need to checksum a file to get it.

NEWS.md
generator.c
hlink.c
options.c
rsync.1.md
rsync.h

diff --git a/NEWS.md b/NEWS.md
index a3ac7b7149c15096ee0cf7c04facd4f1f8b4b785..5254e6a9681ed4f79a25e0e380b4dc3e20d9e24c 100644 (file)
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,16 @@
 
 ## Changes in this version:
 
+### OUTPUT CHANGES:
+
+ - Added a parenthetic suffix to the "FILENAME exists" output of
+   `--ignore-existing --info=skip` (note that `-vv` implies `--info=skip`).
+   The skip message is now "FILENAME exists (INFO)" where the INFO is one of
+   uptodate, type differs, or differs.  The suffix may be omitted when using
+   `--checksum` unless `--info=skip2` was used (since we don't want to slow
+   down rsync with extra checksum operations unless the user really wants to
+   see the full difference info).
+
 ### BUG FIXES:
 
  - Fix a bug with `--mkpath` if a single-file copy specifies an existing
    or it is skipped. Fixes a crash that could occur when the size changes to 0
    in the middle of the send negotiations.
 
+ - When dealing with a special file in an alt-dest hierarchy, rsync now checks
+   the non-permissions mode bits to ensure that the 2 special files are really
+   the same.
+
  - Avoid a weird failure if you run a local copy with a (useless) `--rsh`
    option that contains a `V`.
 
index f83ac501b3d0a50120ba7909fa182c2e048fd345..2265f6023e5f15a5ee09a5dfe37ddb5a38329331 100644 (file)
@@ -112,10 +112,6 @@ static int need_retouch_dir_times;
 static int need_retouch_dir_perms;
 static const char *solo_file = NULL;
 
-enum nonregtype {
-       TYPE_DIR, TYPE_SPECIAL, TYPE_DEVICE, TYPE_SYMLINK
-};
-
 /* Forward declarations. */
 #ifdef SUPPORT_HARD_LINKS
 static void handle_skipped_hlink(struct file_struct *file, int itemizing,
@@ -599,31 +595,78 @@ void itemize(const char *fnamecmp, struct file_struct *file, int ndx, int statre
        }
 }
 
+static enum filetype get_file_type(mode_t mode)
+{
+       if (S_ISREG(mode))
+               return FT_REG;
+       if (S_ISLNK(mode))
+               return FT_SYMLINK;
+       if (S_ISDIR(mode))
+               return FT_DIR;
+       if (IS_SPECIAL(mode))
+               return FT_SPECIAL;
+       if (IS_DEVICE(mode))
+               return FT_DEVICE;
+       return FT_UNSUPPORTED;
+}
 
 /* Perform our quick-check heuristic for determining if a file is unchanged. */
-int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
+int quick_check_ok(enum filetype ftype, const char *fn, struct file_struct *file, STRUCT_STAT *st)
 {
-       if (st->st_size != F_LENGTH(file))
-               return 0;
+       switch (ftype) {
+         case FT_REG:
+               if (st->st_size != F_LENGTH(file))
+                       return 0;
 
-       /* if always checksum is set then we use the checksum instead
-          of the file time to determine whether to sync */
-       if (always_checksum > 0 && S_ISREG(st->st_mode)) {
-               char sum[MAX_DIGEST_LEN];
-               file_checksum(fn, st, sum);
-               return memcmp(sum, F_SUM(file), flist_csum_len) == 0;
-       }
+               /* If always_checksum is set then we use the checksum instead
+                * of the file mtime to determine whether to sync. */
+               if (always_checksum > 0) {
+                       char sum[MAX_DIGEST_LEN];
+                       file_checksum(fn, st, sum);
+                       return memcmp(sum, F_SUM(file), flist_csum_len) == 0;
+               }
 
-       if (size_only > 0)
-               return 1;
+               if (size_only > 0)
+                       return 1;
 
-       if (ignore_times)
-               return 0;
+               if (ignore_times)
+                       return 0;
 
-       return !mtime_differs(st, file);
+               if (mtime_differs(st, file))
+                       return 0;
+               break;
+         case FT_DIR:
+               break;
+         case FT_SYMLINK: {
+#ifdef SUPPORT_LINKS
+               char lnk[MAXPATHLEN];
+               int len = do_readlink(fn, lnk, MAXPATHLEN-1);
+               if (len <= 0)
+                       return 0;
+               lnk[len] = '\0';
+               if (strcmp(lnk, F_SYMLINK(file)) != 0)
+                       return 0;
+               break;
+#else
+               return -1;
+#endif
+         }
+         case FT_SPECIAL:
+               if (!BITS_EQUAL(file->mode, st->st_mode, _S_IFMT))
+                       return 0;
+               break;
+         case FT_DEVICE: {
+               uint32 *devp = F_RDEV_P(file);
+               if (st->st_rdev != MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp)))
+                       return 0;
+               break;
+         }
+         case FT_UNSUPPORTED:
+               return -1;
+       }
+       return 1;
 }
 
-
 /*
  * set (initialize) the size entries in the per-file sum_struct
  * calculating dynamic block and checksum sizes.
@@ -907,7 +950,7 @@ static int try_dests_reg(struct file_struct *file, char *fname, int ndx,
                        best_match = j;
                        match_level = 1;
                }
-               if (!unchanged_file(cmpbuf, file, &sxp->st))
+               if (!quick_check_ok(FT_REG, cmpbuf, file, &sxp->st))
                        continue;
                if (match_level == 1) {
                        best_match = j;
@@ -1006,29 +1049,14 @@ static int try_dests_non(struct file_struct *file, char *fname, int ndx,
 {
        int best_match = -1;
        int match_level = 0;
-       enum nonregtype type;
-       uint32 *devp;
-#ifdef SUPPORT_LINKS
-       char lnk[MAXPATHLEN];
-       int len;
-#endif
+       enum filetype ftype = get_file_type(file->mode);
        int j = 0;
 
 #ifndef SUPPORT_LINKS
-       if (S_ISLNK(file->mode))
+       if (ftype == FT_SYMLINK)
                return -1;
 #endif
-       if (S_ISDIR(file->mode)) {
-               type = TYPE_DIR;
-       } else if (IS_SPECIAL(file->mode))
-               type = TYPE_SPECIAL;
-       else if (IS_DEVICE(file->mode))
-               type = TYPE_DEVICE;
-#ifdef SUPPORT_LINKS
-       else if (S_ISLNK(file->mode))
-               type = TYPE_SYMLINK;
-#endif
-       else {
+       if (ftype == FT_REG || ftype == FT_UNSUPPORTED) {
                rprintf(FERROR,
                        "internal: try_dests_non() called with invalid mode (%o)\n",
                        (int)file->mode);
@@ -1039,53 +1067,14 @@ static int try_dests_non(struct file_struct *file, char *fname, int ndx,
                pathjoin(cmpbuf, MAXPATHLEN, basis_dir[j], fname);
                if (link_stat(cmpbuf, &sxp->st, 0) < 0)
                        continue;
-               switch (type) {
-               case TYPE_DIR:
-                       if (!S_ISDIR(sxp->st.st_mode))
-                               continue;
-                       break;
-               case TYPE_SPECIAL:
-                       if (!IS_SPECIAL(sxp->st.st_mode))
-                               continue;
-                       break;
-               case TYPE_DEVICE:
-                       if (!IS_DEVICE(sxp->st.st_mode))
-                               continue;
-                       break;
-               case TYPE_SYMLINK:
-#ifdef SUPPORT_LINKS
-                       if (!S_ISLNK(sxp->st.st_mode))
-                               continue;
-                       break;
-#else
-                       return -1;
-#endif
-               }
+               if (ftype != get_file_type(sxp->st.st_mode))
+                       continue;
                if (match_level < 1) {
                        match_level = 1;
                        best_match = j;
                }
-               switch (type) {
-               case TYPE_DIR:
-               case TYPE_SPECIAL:
-                       break;
-               case TYPE_DEVICE:
-                       devp = F_RDEV_P(file);
-                       if (sxp->st.st_rdev != MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp)))
-                               continue;
-                       break;
-               case TYPE_SYMLINK:
-#ifdef SUPPORT_LINKS
-                       if ((len = do_readlink(cmpbuf, lnk, MAXPATHLEN-1)) <= 0)
-                               continue;
-                       lnk[len] = '\0';
-                       if (strcmp(lnk, F_SYMLINK(file)) != 0)
-                               continue;
-                       break;
-#else
-                       return -1;
-#endif
-               }
+               if (!quick_check_ok(ftype, cmpbuf, file, &sxp->st))
+                       continue;
                if (match_level < 2) {
                        match_level = 2;
                        best_match = j;
@@ -1130,14 +1119,14 @@ static int try_dests_non(struct file_struct *file, char *fname, int ndx,
                        match_level = 2;
                if (itemizing && stdout_format_has_i
                 && (INFO_GTE(NAME, 2) || stdout_format_has_i > 1)) {
-                       int chg = alt_dest_type == COMPARE_DEST && type != TYPE_DIR ? 0
+                       int chg = alt_dest_type == COMPARE_DEST && ftype != FT_DIR ? 0
                            : ITEM_LOCAL_CHANGE + (match_level == 3 ? ITEM_XNAME_FOLLOWS : 0);
                        char *lp = match_level == 3 ? "" : NULL;
                        itemize(cmpbuf, file, ndx, 0, sxp, chg + ITEM_MATCHED, 0, lp);
                }
                if (INFO_GTE(NAME, 2) && maybe_ATTRS_REPORT) {
                        rprintf(FCLIENT, "%s%s is uptodate\n",
-                               fname, type == TYPE_DIR ? "/" : "");
+                               fname, ftype == FT_DIR ? "/" : "");
                }
                return -2;
        }
@@ -1231,7 +1220,8 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
        char fnamecmpbuf[MAXPATHLEN];
        uchar fnamecmp_type;
        int del_opts = delete_mode || force_delete ? DEL_RECURSE : 0;
-       int is_dir = !S_ISDIR(file->mode) ? 0
+       enum filetype stype, ftype = get_file_type(file->mode);
+       int is_dir = ftype != FT_DIR ? 0
                   : inc_recurse && ndx != cur_flist->ndx_start - 1 ? -1
                   : 1;
 
@@ -1380,10 +1370,25 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
         && !am_root && sx.st.st_uid == our_uid)
                del_opts |= DEL_NO_UID_WRITE;
 
+       if (statret == 0)
+               stype = get_file_type(sx.st.st_mode);
+       else
+               stype = FT_UNSUPPORTED;
+
        if (ignore_existing > 0 && statret == 0
-        && (!is_dir || !S_ISDIR(sx.st.st_mode))) {
-               if (INFO_GTE(SKIP, 1) && is_dir >= 0)
-                       rprintf(FINFO, "%s exists\n", fname);
+        && (!is_dir || stype != FT_DIR)) {
+               if (INFO_GTE(SKIP, 1) && is_dir >= 0) {
+                       const char *suf;
+                       if (ftype != stype)
+                               suf = " (type differs)";
+                       else if (ftype == FT_REG && always_checksum > 0 && !INFO_GTE(SKIP, 2))
+                               suf = ""; /* skip quick-check checksum unless SKIP2 was specified */
+                       else if (quick_check_ok(ftype, fname, file, &sx.st))
+                               suf = " (uptodate)";
+                       else
+                               suf = " (differs)";
+                       rprintf(FINFO, "%s exists%s\n", fname, suf);
+               }
 #ifdef SUPPORT_HARD_LINKS
                if (F_IS_HLINKED(file))
                        handle_skipped_hlink(file, itemizing, code, f_out);
@@ -1412,7 +1417,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                         * dir's mtime right away).  We will handle the dir in
                         * full later (right before we handle its contents). */
                        if (statret == 0
-                        && (S_ISDIR(sx.st.st_mode)
+                        && (stype == FT_DIR
                          || delete_item(fname, sx.st.st_mode, del_opts | DEL_FOR_DIR) != 0))
                                goto cleanup; /* Any errors get reported later. */
                        if (do_mkdir(fname, (file->mode|added_perms) & 0700) == 0)
@@ -1424,7 +1429,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                 * file of that name and it is *not* a directory, then
                 * we need to delete it.  If it doesn't exist, then
                 * (perhaps recursively) create it. */
-               if (statret == 0 && !S_ISDIR(sx.st.st_mode)) {
+               if (statret == 0 && stype != FT_DIR) {
                        if (delete_item(fname, sx.st.st_mode, del_opts | DEL_FOR_DIR) != 0)
                                goto skipping_dir_contents;
                        statret = -1;
@@ -1519,7 +1524,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
        /* If we're not preserving permissions, change the file-list's
         * mode based on the local permissions and some heuristics. */
        if (!preserve_perms) {
-               int exists = statret == 0 && !S_ISDIR(sx.st.st_mode);
+               int exists = statret == 0 && stype != FT_DIR;
                file->mode = dest_mode(file->mode, sx.st.st_mode, dflt_perms, exists);
        }
 
@@ -1529,7 +1534,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                goto cleanup;
 #endif
 
-       if (preserve_links && S_ISLNK(file->mode)) {
+       if (preserve_links && ftype == FT_SYMLINK) {
 #ifdef SUPPORT_LINKS
                const char *sl = F_SYMLINK(file);
                if (safe_symlinks && unsafe_symlink(sl, fname)) {
@@ -1546,12 +1551,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                        goto cleanup;
                }
                if (statret == 0) {
-                       char lnk[MAXPATHLEN];
-                       int len;
-
-                       if (S_ISLNK(sx.st.st_mode)
-                        && (len = do_readlink(fname, lnk, MAXPATHLEN-1)) > 0
-                        && strncmp(lnk, sl, len) == 0 && sl[len] == '\0') {
+                       if (stype == FT_SYMLINK && quick_check_ok(stype, fname, file, &sx.st)) {
                                /* The link is pointing to the right place. */
                                set_file_attrs(fname, file, &sx, NULL, maybe_ATTRS_REPORT);
                                if (itemizing)
@@ -1584,7 +1584,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                if (atomic_create(file, fname, sl, NULL, MAKEDEV(0, 0), &sx, statret == 0 ? DEL_FOR_SYMLINK : 0)) {
                        set_file_attrs(fname, file, NULL, NULL, 0);
                        if (itemizing) {
-                               if (statret == 0 && !S_ISLNK(sx.st.st_mode))
+                               if (statret == 0 && stype != FT_SYMLINK)
                                        statret = -1;
                                itemize(fnamecmp, file, ndx, statret, &sx,
                                        ITEM_LOCAL_CHANGE|ITEM_REPORT_CHANGE, 0, NULL);
@@ -1605,28 +1605,22 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                goto cleanup;
        }
 
-       if ((am_root && preserve_devices && IS_DEVICE(file->mode))
-        || (preserve_specials && IS_SPECIAL(file->mode))) {
+       if ((am_root && preserve_devices && ftype == FT_DEVICE)
+        || (preserve_specials && ftype == FT_SPECIAL)) {
                dev_t rdev;
-               int del_for_flag = 0;
-               if (IS_DEVICE(file->mode)) {
+               int del_for_flag;
+               if (ftype == FT_DEVICE) {
                        uint32 *devp = F_RDEV_P(file);
                        rdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
-               } else
+                       del_for_flag = DEL_FOR_DEVICE;
+               } else {
                        rdev = 0;
+                       del_for_flag = DEL_FOR_SPECIAL;
+               }
                if (statret == 0) {
-                       if (IS_DEVICE(file->mode)) {
-                               if (!IS_DEVICE(sx.st.st_mode))
-                                       statret = -1;
-                               del_for_flag = DEL_FOR_DEVICE;
-                       } else {
-                               if (!IS_SPECIAL(sx.st.st_mode))
-                                       statret = -1;
-                               del_for_flag = DEL_FOR_SPECIAL;
-                       }
-                       if (statret == 0
-                        && BITS_EQUAL(sx.st.st_mode, file->mode, _S_IFMT)
-                        && (IS_SPECIAL(sx.st.st_mode) || sx.st.st_rdev == rdev)) {
+                       if (ftype != stype)
+                               statret = -1;
+                       else if (quick_check_ok(ftype, fname, file, &sx.st)) {
                                /* The device or special file is identical. */
                                set_file_attrs(fname, file, &sx, NULL, maybe_ATTRS_REPORT);
                                if (itemizing)
@@ -1679,7 +1673,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                goto cleanup;
        }
 
-       if (!S_ISREG(file->mode)) {
+       if (ftype != FT_REG) {
                if (solo_file)
                        fname = f_name(file, NULL);
                rprintf(FINFO, "skipping non-regular file \"%s\"\n", fname);
@@ -1715,7 +1709,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
 
        fnamecmp_type = FNAMECMP_FNAME;
 
-       if (statret == 0 && !(S_ISREG(sx.st.st_mode) || (write_devices && IS_DEVICE(sx.st.st_mode)))) {
+       if (statret == 0 && !(stype == FT_REG || (write_devices && stype == FT_DEVICE))) {
                if (delete_item(fname, sx.st.st_mode, del_opts | DEL_FOR_FILE) != 0)
                        goto cleanup;
                statret = -1;
@@ -1749,7 +1743,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                partialptr = NULL;
 
        if (statret != 0 && fuzzy_basis) {
-               if (need_fuzzy_dirlist && S_ISREG(file->mode)) {
+               if (need_fuzzy_dirlist) {
                        const char *dn = file->dirname ? file->dirname : ".";
                        int i;
                        strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
@@ -1797,7 +1791,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                ;
        else if (fnamecmp_type >= FNAMECMP_FUZZY)
                ;
-       else if (unchanged_file(fnamecmp, file, &sx.st)) {
+       else if (quick_check_ok(FT_REG, fnamecmp, file, &sx.st)) {
                if (partialptr) {
                        do_unlink(partialptr);
                        handle_partial_dir(partialptr, PDIR_DELETE);
diff --git a/hlink.c b/hlink.c
index adec89b0d40f2eefec059b58ce94daa8563a69d4..66810a3eb7d3654cbbeedd92668a30f69f7f5e0b 100644 (file)
--- a/hlink.c
+++ b/hlink.c
@@ -406,7 +406,7 @@ int hard_link_check(struct file_struct *file, int ndx, char *fname,
                                }
                                break;
                        }
-                       if (!unchanged_file(cmpbuf, file, &alt_sx.st))
+                       if (!quick_check_ok(FT_REG, cmpbuf, file, &alt_sx.st))
                                continue;
                        statret = 1;
                        if (unchanged_attrs(cmpbuf, file, &alt_sx))
index 06f91098105120ed47bb1430055ec4340c7f10da..9ffc3cf7a8628e872c80da0fefe84437650cb68e 100644 (file)
--- a/options.c
+++ b/options.c
@@ -267,7 +267,7 @@ static struct output_struct info_words[COUNT_INFO+1] = {
        INFO_WORD(NAME, W_SND|W_REC, "Mention 1) updated file/dir names, 2) unchanged names"),
        INFO_WORD(PROGRESS, W_CLI, "Mention 1) per-file progress or 2) total transfer progress"),
        INFO_WORD(REMOVE, W_SND, "Mention files removed on the sending side"),
-       INFO_WORD(SKIP, W_REC, "Mention files that are skipped due to options used"),
+       INFO_WORD(SKIP, W_REC, "Mention files that are skipped due to options used (levels 1-2)"),
        INFO_WORD(STATS, W_CLI|W_SRV, "Mention statistics at end of run (levels 1-3)"),
        INFO_WORD(SYMSAFE, W_SND|W_REC, "Mention symlinks that are unsafe"),
        { NULL, "--info", 0, 0, 0, 0 }
index 7bb4c5a1b4dc44abba3173bbb685ae6c8c60429f..d205d0ba56319f22eb7b233a1c522b2474f507d0 100644 (file)
@@ -651,6 +651,10 @@ your home directory (remove the '=' for that).
     the same modification timestamp.  This option turns off this "quick check"
     behavior, causing all files to be updated.
 
+    This option can be a little confusing compared to `--ignore-existing` and
+    `--ignore-non-existing` in that that they cause rsync to transfer fewer
+    files, while this option causes rsync to transfer more files.
+
 0.  `--size-only`
 
     This modifies rsync's "quick check" algorithm for finding files that need
@@ -1602,6 +1606,15 @@ your home directory (remove the '=' for that).
     permissions on the hard-linked files).  This does mean that this option is
     only looking at the existing files in the destination hierarchy itself.
 
+    If `--info=skip` was specified (which is implied by `-vv`) then rsync
+    outputs a "FILENAME exists (INFO)" message where the INFO indicates one of
+    "uptodate", "type differs", or "differs".  However, if you specified the
+    `--checksum` option, you must have specified `--info-skip2` to get the
+    "differs" or "uptodate" info since rsync will not take the extra time to
+    checksum these skipped files unless you really want it to (a parenthetical
+    suffix that is not "type differs" is elided if we are skipping the checksum
+    check for an existing file).
+
 0.  `--remove-source-files`
 
     This tells rsync to remove from the sending side the files (meaning
diff --git a/rsync.h b/rsync.h
index 345a68a65b4c631e5a84079ea8913ffea32bbd9e..68dfba51a04f4835fe369d77a9323ecf238172f7 100644 (file)
--- a/rsync.h
+++ b/rsync.h
@@ -277,6 +277,10 @@ enum msgcode {
        MSG_NO_SEND=102,/* sender failed to open a file we wanted */
 };
 
+enum filetype {
+       FT_UNSUPPORTED, FT_REG, FT_DIR, FT_SYMLINK, FT_SPECIAL, FT_DEVICE
+};
+
 #define NDX_DONE -1
 #define NDX_FLIST_EOF -2
 #define NDX_DEL_STATS -3