X-Git-Url: http://git.samba.org/samba.git/?p=rsync-patches.git;a=blobdiff_plain;f=checksum-reading.diff;h=8fcb3541981ef6e2e784af3ecda0c5e03f72c9d4;hp=0a23786668779698bd3639af38c4f66bebb660f8;hb=952918f09db2a4aa51e7c0ca0e689bf3f922b48f;hpb=f2863bc00ee660400c314a756d19ce5455dce87d diff --git a/checksum-reading.diff b/checksum-reading.diff index 0a23786..8fcb354 100644 --- a/checksum-reading.diff +++ b/checksum-reading.diff @@ -1,14 +1,14 @@ -Optimize the ability of a mirror to send checksums. +Optimize the --checksum option using externally created .rsyncsums files. -This adds a sender optimization feature that allows a cache of checksums -to be used when the client specifies the --checksum option. The checksum -files (.rsyncsums) must be created by some other process (see the perl -script in the support dir for one way). +This adds a new option, --sumfiles=MODE, that allows you to use a cache of +checksums when performing a --checksum transfer. These checksum files +(.rsyncsums) must be created by some other process -- see the perl script, +rsyncsums, in the support dir for one way. -This option should be used by mirrors that contain files that get created and -not changed. There is a minimal amount of sanity-check information in the -.rsyncsums file (size and mtime) so that the sum files can be shared with your -mirror network. +This option can be particularly helpful to a public mirror that wants to +pre-compute their .rsyncsums files, set the "checksum files = strict" option +in their daemon config file, and thus make it quite efficient for a client +rsync to make use of the --checksum option on their server. To use this patch, run these commands for a successful build: @@ -16,49 +16,163 @@ To use this patch, run these commands for a successful build: ./configure (optional if already run) make +based-on: d64bda1c1e79dc385f194d74f7957ce7cd118654 +diff --git a/checksum.c b/checksum.c +--- a/checksum.c ++++ b/checksum.c +@@ -98,7 +98,7 @@ void get_checksum2(char *buf, int32 len, char *sum) + } + } + +-void file_checksum(char *fname, char *sum, OFF_T size) ++void file_checksum(const char *fname, OFF_T size, char *sum) + { + struct map_struct *buf; + OFF_T i, len = size; +diff --git a/clientserver.c b/clientserver.c +--- a/clientserver.c ++++ b/clientserver.c +@@ -42,6 +42,8 @@ extern int numeric_ids; + extern int filesfrom_fd; + extern int remote_protocol; + extern int protocol_version; ++extern int always_checksum; ++extern int checksum_files; + extern int io_timeout; + extern int no_detach; + extern int write_batch; +@@ -879,6 +881,9 @@ static int rsync_module(int f_in, int f_out, int i, const char *addr, const char + } else if (am_root < 0) /* Treat --fake-super from client as --super. */ + am_root = 2; + ++ checksum_files = always_checksum ? lp_checksum_files(i) ++ : CSF_IGNORE_FILES; ++ + if (filesfrom_fd == 0) + filesfrom_fd = f_in; + diff --git a/flist.c b/flist.c --- a/flist.c +++ b/flist.c -@@ -121,6 +121,7 @@ static char tmp_sum[MAX_DIGEST_LEN]; +@@ -22,6 +22,7 @@ + + #include "rsync.h" + #include "ifuncs.h" ++#include "itypes.h" + #include "rounding.h" + #include "inums.h" + #include "io.h" +@@ -33,6 +34,7 @@ extern int am_sender; + extern int am_generator; + extern int inc_recurse; + extern int always_checksum; ++extern int basis_dir_cnt; + extern int module_id; + extern int ignore_errors; + extern int numeric_ids; +@@ -61,6 +63,7 @@ extern int file_extra_cnt; + extern int ignore_perishable; + extern int non_perishable_cnt; + extern int prune_empty_dirs; ++extern int checksum_files; + extern int copy_links; + extern int copy_unsafe_links; + extern int protocol_version; +@@ -72,6 +75,7 @@ extern int sender_symlink_iconv; + extern int output_needs_newline; + extern int sender_keeps_checksum; + extern int unsort_ndx; ++extern char *basis_dir[]; + extern struct stats stats; + extern char *filesfrom_host; + extern char *usermap, *groupmap; +@@ -96,6 +100,12 @@ extern iconv_t ic_send, ic_recv; + #endif + #endif + ++#define RSYNCSUMS_FILE ".rsyncsums" ++#define RSYNCSUMS_LEN (sizeof RSYNCSUMS_FILE-1) ++ ++#define CLEAN_STRIP_ROOT (1<<0) ++#define CLEAN_KEEP_LAST (1<<1) ++ + #define PTR_SIZE (sizeof (struct file_struct *)) + + int io_error; +@@ -137,7 +147,11 @@ static char tmp_sum[MAX_DIGEST_LEN]; static char empty_sum[MAX_DIGEST_LEN]; static int flist_count_offset; /* for --delete --progress */ - static int dir_count = 0; -+static struct file_list *checksum_flist = NULL; - static void flist_sort_and_clean(struct file_list *flist, int strip_root); +-static void flist_sort_and_clean(struct file_list *flist, int strip_root); ++static struct csum_cache { ++ struct file_list *flist; ++} *csum_cache = NULL; ++ ++static void flist_sort_and_clean(struct file_list *flist, int flags); static void output_flist(struct file_list *flist); -@@ -313,6 +314,186 @@ static void flist_done_allocating(struct file_list *flist) + + void init_flist(void) +@@ -352,6 +366,238 @@ static void flist_done_allocating(struct file_list *flist) flist->pool_boundary = ptr; } -+/* The len count is the length of the basename + 1 for the null. */ -+static int add_checksum(const char *dirname, const char *basename, int len, -+ OFF_T file_length, time_t mtime, const char *sum) ++void reset_checksum_cache() ++{ ++ int slot, slots = am_sender ? 1 : basis_dir_cnt + 1; ++ ++ if (!csum_cache) { ++ csum_cache = new_array0(struct csum_cache, slots); ++ if (!csum_cache) ++ out_of_memory("reset_checksum_cache"); ++ } ++ ++ for (slot = 0; slot < slots; slot++) { ++ struct file_list *flist = csum_cache[slot].flist; ++ ++ if (flist) { ++ /* Reset the pool memory and empty the file-list array. */ ++ pool_free_old(flist->file_pool, ++ pool_boundary(flist->file_pool, 0)); ++ flist->used = 0; ++ } else ++ flist = csum_cache[slot].flist = flist_new(FLIST_TEMP, "reset_checksum_cache"); ++ ++ flist->low = 0; ++ flist->high = -1; ++ flist->next = NULL; ++ } ++} ++ ++/* The basename_len count is the length of the basename + 1 for the '\0'. */ ++static int add_checksum(struct file_list *flist, const char *dirname, ++ const char *basename, int basename_len, OFF_T file_length, ++ time_t mtime, uint32 ctime, uint32 inode, ++ const char *sum) +{ + struct file_struct *file; + int alloc_len, extra_len; + char *bp; + -+ if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0) -+ return 0; -+ if (file_length == 0) ++ if (basename_len == RSYNCSUMS_LEN+1 && *basename == '.' ++ && strcmp(basename, RSYNCSUMS_FILE) == 0) + return 0; + -+ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT) ++ /* "2" is for a 32-bit ctime num and an 32-bit inode num. */ ++ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT + 2) + * EXTRA_LEN; +#if EXTRA_ROUNDING > 0 + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN)) + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN; +#endif -+ alloc_len = FILE_STRUCT_LEN + extra_len + len; -+ bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum"); ++ alloc_len = FILE_STRUCT_LEN + extra_len + basename_len; ++ bp = pool_alloc(flist->file_pool, alloc_len, "add_checksum"); + + memset(bp, 0, extra_len + FILE_STRUCT_LEN); + bp += extra_len; + file = (struct file_struct *)bp; + bp += FILE_STRUCT_LEN; + -+ memcpy(bp, basename, len); ++ memcpy(bp, basename, basename_len); + + file->mode = S_IFREG; + file->modtime = mtime; @@ -68,51 +182,45 @@ diff --git a/flist.c b/flist.c + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32); + } + file->dirname = dirname; ++ F_CTIME(file) = ctime; ++ F_INODE(file) = inode; + bp = F_SUM(file); + memcpy(bp, sum, checksum_len); + -+ flist_expand(checksum_flist, 1); -+ checksum_flist->files[checksum_flist->used++] = file; ++ flist_expand(flist, 1); ++ flist->files[flist->used++] = file; + -+ checksum_flist->sorted = checksum_flist->files; ++ flist->sorted = flist->files; + + return 1; +} + -+/* The direname value must remain unchanged during the lifespan of the -+ * created checksum_flist object because we use it directly. */ -+static void read_checksums(const char *dirname) ++/* The "dirname" arg's data must remain unchanged during the lifespan of ++ * the created csum_cache[].flist object because we use it directly. */ ++static void read_checksums(int slot, struct file_list *flist, const char *dirname) +{ + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN]; -+ OFF_T file_length; -+ time_t mtime; -+ int len, dlen, i; -+ char *cp; + FILE *fp; ++ char *cp; ++ int len, i; ++ time_t mtime; ++ OFF_T file_length; ++ uint32 ctime, inode; ++ int dlen = dirname ? strlcpy(fbuf, dirname, sizeof fbuf) : 0; + -+ if (checksum_flist) { -+ /* Reset the pool memory and empty the file-list array. */ -+ pool_free_old(checksum_flist->file_pool, -+ pool_boundary(checksum_flist->file_pool, 0)); -+ checksum_flist->used = 0; -+ } else -+ checksum_flist = flist_new(FLIST_TEMP, "read_checksums"); -+ -+ checksum_flist->low = 0; -+ checksum_flist->high = -1; -+ -+ if (!dirname) -+ return; -+ -+ dlen = strlcpy(fbuf, dirname, sizeof fbuf); -+ if (dlen >= (int)sizeof fbuf) ++ if (dlen >= (int)(sizeof fbuf - 1 - RSYNCSUMS_LEN)) + return; + if (dlen) + fbuf[dlen++] = '/'; + else + dirname = NULL; -+ strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen); -+ if (!(fp = fopen(fbuf, "r"))) ++ strlcpy(fbuf+dlen, RSYNCSUMS_FILE, sizeof fbuf - dlen); ++ if (slot) { ++ pathjoin(line, sizeof line, basis_dir[slot-1], fbuf); ++ cp = line; ++ } else ++ cp = fbuf; ++ if (!(fp = fopen(cp, "r"))) + return; + + while (fgets(line, sizeof line, fp)) { @@ -177,16 +285,16 @@ diff --git a/flist.c b/flist.c + break; + while (*++cp == ' ') {} + -+ /* Ignore ctime. */ ++ ctime = 0; + while (isDigit(cp)) -+ cp++; ++ ctime = ctime * 10 + *cp++ - '0'; + if (*cp != ' ') + break; + while (*++cp == ' ') {} + -+ /* Ignore inode. */ ++ inode = 0; + while (isDigit(cp)) -+ cp++; ++ inode = inode * 10 + *cp++ - '0'; + if (*cp != ' ') + break; + while (*++cp == ' ') {} @@ -204,17 +312,46 @@ diff --git a/flist.c b/flist.c + + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen); + -+ add_checksum(dirname, cp, len, file_length, mtime, sum); ++ add_checksum(flist, dirname, cp, len, file_length, ++ mtime, ctime, inode, ++ sum); + } + fclose(fp); + -+ clean_flist(checksum_flist, 0); ++ flist_sort_and_clean(flist, CLEAN_KEEP_LAST); +} + - int push_pathname(const char *dir, int len) - { - if (dir == pathname) -@@ -1003,7 +1184,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, ++void get_cached_checksum(int slot, const char *fname, struct file_struct *file, ++ STRUCT_STAT *stp, char *sum_buf) ++{ ++ struct file_list *flist = csum_cache[slot].flist; ++ int j; ++ ++ if (!flist->next) { ++ flist->next = cur_flist; /* next points from checksum flist to file flist */ ++ read_checksums(slot, flist, file->dirname); ++ } ++ ++ if ((j = flist_find(flist, file)) >= 0) { ++ struct file_struct *fp = flist->sorted[j]; ++ ++ if (F_LENGTH(fp) == stp->st_size ++ && fp->modtime == stp->st_mtime ++ && (checksum_files & CSF_LAX ++ || (F_CTIME(fp) == (uint32)stp->st_ctime ++ && F_INODE(fp) == (uint32)stp->st_ino))) { ++ memcpy(sum_buf, F_SUM(fp), MAX_DIGEST_LEN); ++ return; ++ } ++ } ++ ++ file_checksum(fname, stp->st_size, sum_buf); ++} ++ + /* Call this with EITHER (1) "file, NULL, 0" to chdir() to the file's + * F_PATHNAME(), or (2) "NULL, dir, dirlen" to chdir() to the supplied dir, + * with dir == NULL taken to be the starting directory, and dirlen < 0 +@@ -1145,7 +1391,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, STRUCT_STAT *stp, int flags, int filter_level) { static char *lastdir; @@ -223,66 +360,208 @@ diff --git a/flist.c b/flist.c struct file_struct *file; char thisname[MAXPATHLEN]; char linkname[MAXPATHLEN]; -@@ -1130,9 +1311,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, +@@ -1291,9 +1537,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, memcpy(lastdir, thisname, len); lastdir[len] = '\0'; lastdir_len = len; -+ if (always_checksum && am_sender && flist) -+ read_checksums(lastdir); ++ if (checksum_files && am_sender && flist) ++ reset_checksum_cache(); } - } else + } else { basename = thisname; -+ if (always_checksum && am_sender && flist && lastdir_len == -2) { ++ if (checksum_files && am_sender && flist && lastdir_len == -2) { + lastdir_len = -1; -+ read_checksums(""); ++ reset_checksum_cache(); + } + } basename_len = strlen(basename) + 1; /* count the '\0' */ #ifdef SUPPORT_LINKS -@@ -1208,11 +1396,21 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, - } +@@ -1311,11 +1564,8 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, + extra_len += EXTRA_LEN; #endif -- if (always_checksum && am_sender && S_ISREG(st.st_mode)) +- if (always_checksum && am_sender && S_ISREG(st.st_mode)) { - file_checksum(thisname, tmp_sum, st.st_size); -- - F_PATHNAME(file) = pathname; +- if (sender_keeps_checksum) +- extra_len += SUM_EXTRA_CNT * EXTRA_LEN; +- } ++ if (sender_keeps_checksum && S_ISREG(st.st_mode)) ++ extra_len += SUM_EXTRA_CNT * EXTRA_LEN; + #if EXTRA_ROUNDING > 0 + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN)) +@@ -1398,8 +1648,14 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, + return NULL; + } + +- if (sender_keeps_checksum && S_ISREG(st.st_mode)) +- memcpy(F_SUM(file), tmp_sum, checksum_len); + if (always_checksum && am_sender && S_ISREG(st.st_mode)) { -+ int j; -+ if (flist && (j = flist_find(checksum_flist, file)) >= 0) { -+ struct file_struct *fp = checksum_flist->sorted[j]; -+ if (F_LENGTH(fp) == st.st_size -+ && fp->modtime == st.st_mtime) -+ memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN); -+ else -+ file_checksum(thisname, tmp_sum, st.st_size); -+ } else -+ file_checksum(thisname, tmp_sum, st.st_size); ++ if (flist && checksum_files) ++ get_cached_checksum(0, thisname, file, &st, tmp_sum); ++ else ++ file_checksum(thisname, st.st_size, tmp_sum); ++ if (sender_keeps_checksum) ++ memcpy(F_SUM(file), tmp_sum, checksum_len); + } -+ - /* This code is only used by the receiver when it is building - * a list of files for a delete pass. */ - if (keep_dirlinks && linkname_len && flist) { -@@ -2063,7 +2261,11 @@ struct file_list *send_file_list(int f, int argc, char *argv[]) - * file-list to check if this is a 1-file xfer. */ - send_extra_file_list(f, 1); + + if (unsort_ndx) + F_NDX(file) = stats.num_dirs; +@@ -2550,7 +2806,7 @@ struct file_list *recv_file_list(int f) + rprintf(FINFO, "[%s] flist_eof=1\n", who_am_i()); + } + +- flist_sort_and_clean(flist, relative_paths); ++ flist_sort_and_clean(flist, relative_paths ? CLEAN_STRIP_ROOT : 0); + + if (protocol_version < 30) { + /* Recv the io_error flag */ +@@ -2773,7 +3029,7 @@ void flist_free(struct file_list *flist) + + /* This routine ensures we don't have any duplicate names in our file list. + * duplicate names can cause corruption because of the pipelining. */ +-static void flist_sort_and_clean(struct file_list *flist, int strip_root) ++static void flist_sort_and_clean(struct file_list *flist, int flags) + { + char fbuf[MAXPATHLEN]; + int i, prev_i; +@@ -2824,7 +3080,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root) + /* If one is a dir and the other is not, we want to + * keep the dir because it might have contents in the + * list. Otherwise keep the first one. */ +- if (S_ISDIR(file->mode)) { ++ if (S_ISDIR(file->mode) || flags & CLEAN_KEEP_LAST) { + struct file_struct *fp = flist->sorted[j]; + if (!S_ISDIR(fp->mode)) + keep = i, drop = j; +@@ -2840,8 +3096,8 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root) + } else + keep = j, drop = i; + +- if (!am_sender) { +- if (DEBUG_GTE(DUP, 1)) { ++ if (!am_sender || flags & CLEAN_KEEP_LAST) { ++ if (DEBUG_GTE(DUP, 1) && !(flags & CLEAN_KEEP_LAST)) { + rprintf(FINFO, + "removing duplicate name %s from file list (%d)\n", + f_name(file, fbuf), drop + flist->ndx_start); +@@ -2863,7 +3119,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root) + } + flist->high = prev_i; + +- if (strip_root) { ++ if (flags & CLEAN_STRIP_ROOT) { + /* We need to strip off the leading slashes for relative + * paths, but this must be done _after_ the sorting phase. */ + for (i = flist->low; i <= flist->high; i++) { +diff --git a/generator.c b/generator.c +--- a/generator.c ++++ b/generator.c +@@ -53,6 +53,7 @@ extern int delete_after; + extern int missing_args; + extern int msgdone_cnt; + extern int ignore_errors; ++extern int checksum_files; + extern int remove_source_files; + extern int delay_updates; + extern int update_only; +@@ -515,7 +516,7 @@ void itemize(const char *fnamecmp, struct file_struct *file, int ndx, int statre + + + /* Perform our quick-check heuristic for determining if a file is unchanged. */ +-int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st) ++int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st, int slot) + { + if (st->st_size != F_LENGTH(file)) + return 0; +@@ -524,7 +525,10 @@ int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st) + of the file time to determine whether to sync */ + if (always_checksum > 0 && S_ISREG(st->st_mode)) { + char sum[MAX_DIGEST_LEN]; +- file_checksum(fn, sum, st->st_size); ++ if (checksum_files && slot >= 0) ++ get_cached_checksum(slot, fn, file, st, sum); ++ else ++ file_checksum(fn, st->st_size, sum); + return memcmp(sum, F_SUM(file), checksum_len) == 0; + } + +@@ -794,7 +798,7 @@ static int try_dests_reg(struct file_struct *file, char *fname, int ndx, + match_level = 1; + /* FALL THROUGH */ + case 1: +- if (!unchanged_file(cmpbuf, file, &sxp->st)) ++ if (!unchanged_file(cmpbuf, file, &sxp->st, j+1)) + continue; + best_match = j; + match_level = 2; +@@ -1080,7 +1084,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, + * --ignore-non-existing, daemon exclude, or mkdir failure. */ + static struct file_struct *skip_dir = NULL; + static struct file_list *fuzzy_dirlist = NULL; +- static int need_fuzzy_dirlist = 0; ++ static int need_new_dirscan = 0; + struct file_struct *fuzzy_file = NULL; + int fd = -1, f_copy = -1; + stat_x sx, real_sx; +@@ -1164,8 +1168,8 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, + flist_free(fuzzy_dirlist); + fuzzy_dirlist = NULL; + } +- if (fuzzy_basis) +- need_fuzzy_dirlist = 1; ++ if (fuzzy_basis || checksum_files) ++ need_new_dirscan = 1; + #ifdef SUPPORT_ACLS + if (!preserve_perms) + dflt_perms = default_perms_for_dir(dn); +@@ -1173,10 +1177,15 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, } -- } -+ } else -+ flist_eof = 1; -+ -+ if (checksum_updating && always_checksum && flist_eof) -+ read_checksums(NULL); + parent_dirname = dn; - return flist; - } -diff --git a/ifuncs.h b/ifuncs.h ---- a/ifuncs.h -+++ b/ifuncs.h -@@ -64,6 +64,12 @@ isDigit(const char *ptr) +- if (need_fuzzy_dirlist && S_ISREG(file->mode)) { +- strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf); +- fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, 1); +- need_fuzzy_dirlist = 0; ++ if (need_new_dirscan && S_ISREG(file->mode)) { ++ if (fuzzy_basis) { ++ strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf); ++ fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, 1); ++ } ++ if (checksum_files) { ++ reset_checksum_cache(); ++ } ++ need_new_dirscan = 0; + } + + statret = link_stat(fname, &sx.st, keep_dirlinks && is_dir); +@@ -1602,7 +1611,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, + ; + else if (fnamecmp_type == FNAMECMP_FUZZY) + ; +- else if (unchanged_file(fnamecmp, file, &sx.st)) { ++ else if (unchanged_file(fnamecmp, file, &sx.st, fnamecmp_type == FNAMECMP_FNAME ? 0 : -1)) { + if (partialptr) { + do_unlink(partialptr); + handle_partial_dir(partialptr, PDIR_DELETE); +diff --git a/hlink.c b/hlink.c +--- a/hlink.c ++++ b/hlink.c +@@ -410,7 +410,7 @@ int hard_link_check(struct file_struct *file, int ndx, char *fname, + } + break; + } +- if (!unchanged_file(cmpbuf, file, &alt_sx.st)) ++ if (!unchanged_file(cmpbuf, file, &alt_sx.st, j+1)) + continue; + statret = 1; + if (unchanged_attrs(cmpbuf, file, &alt_sx)) +diff --git a/itypes.h b/itypes.h +--- a/itypes.h ++++ b/itypes.h +@@ -23,6 +23,12 @@ isDigit(const char *ptr) } static inline int @@ -295,11 +574,249 @@ diff --git a/ifuncs.h b/ifuncs.h isPrint(const char *ptr) { return isprint(*(unsigned char *)ptr); +diff --git a/loadparm.c b/loadparm.c +--- a/loadparm.c ++++ b/loadparm.c +@@ -133,6 +133,7 @@ typedef struct { + /* NOTE: update this macro if the last char* variable changes! */ + #define LOCAL_STRING_COUNT() (offsetof(local_vars, uid) / sizeof (char*) + 1) + ++ int checksum_files; + int max_connections; + int max_verbosity; + int syslog_facility; +@@ -205,6 +206,7 @@ static const all_vars Defaults = { + /* temp_dir; */ NULL, + /* uid; */ NULL, + ++ /* checksum_files; */ CSF_IGNORE_FILES, + /* max_connections; */ 0, + /* max_verbosity; */ 1, + /* syslog_facility; */ LOG_DAEMON, +@@ -306,6 +308,13 @@ static struct enum_list enum_facilities[] = { + { -1, NULL } + }; + ++static struct enum_list enum_csum_modes[] = { ++ { CSF_IGNORE_FILES, "none" }, ++ { CSF_LAX_MODE, "lax" }, ++ { CSF_STRICT_MODE, "strict" }, ++ { -1, NULL } ++}; ++ + static struct parm_struct parm_table[] = + { + {"address", P_STRING, P_GLOBAL,&Vars.g.bind_address, NULL,0}, +@@ -316,6 +325,7 @@ static struct parm_struct parm_table[] = + + {"auth users", P_STRING, P_LOCAL, &Vars.l.auth_users, NULL,0}, + {"charset", P_STRING, P_LOCAL, &Vars.l.charset, NULL,0}, ++ {"checksum files", P_ENUM, P_LOCAL, &Vars.l.checksum_files, enum_csum_modes,0}, + {"comment", P_STRING, P_LOCAL, &Vars.l.comment, NULL,0}, + {"dont compress", P_STRING, P_LOCAL, &Vars.l.dont_compress, NULL,0}, + {"exclude from", P_STRING, P_LOCAL, &Vars.l.exclude_from, NULL,0}, +@@ -470,6 +480,7 @@ FN_LOCAL_STRING(lp_secrets_file, secrets_file) + FN_LOCAL_STRING(lp_temp_dir, temp_dir) + FN_LOCAL_STRING(lp_uid, uid) + ++FN_LOCAL_INTEGER(lp_checksum_files, checksum_files) + FN_LOCAL_INTEGER(lp_max_connections, max_connections) + FN_LOCAL_INTEGER(lp_max_verbosity, max_verbosity) + FN_LOCAL_INTEGER(lp_syslog_facility, syslog_facility) +diff --git a/options.c b/options.c +--- a/options.c ++++ b/options.c +@@ -112,6 +112,7 @@ size_t bwlimit_writemax = 0; + int ignore_existing = 0; + int ignore_non_existing = 0; + int need_messages_from_generator = 0; ++int checksum_files = CSF_IGNORE_FILES; + int max_delete = INT_MIN; + OFF_T max_size = 0; + OFF_T min_size = 0; +@@ -663,6 +664,7 @@ void usage(enum logcode F) + rprintf(F," -q, --quiet suppress non-error messages\n"); + rprintf(F," --no-motd suppress daemon-mode MOTD (see manpage caveat)\n"); + rprintf(F," -c, --checksum skip based on checksum, not mod-time & size\n"); ++ rprintf(F," --sumfiles=MODE use .rsyncsums to speedup --checksum mode\n"); + rprintf(F," -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)\n"); + rprintf(F," --no-OPTION turn off an implied OPTION (e.g. --no-D)\n"); + rprintf(F," -r, --recursive recurse into directories\n"); +@@ -800,7 +802,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM, + OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST, OPT_HELP, + OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD, + OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE, +- OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG, ++ OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG, OPT_SUMFILES, + OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT, + OPT_SERVER, OPT_REFUSED_BASE = 9000}; + +@@ -939,6 +941,7 @@ static struct poptOption long_options[] = { + {"checksum", 'c', POPT_ARG_VAL, &always_checksum, 1, 0, 0 }, + {"no-checksum", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 }, + {"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 }, ++ {"sumfiles", 0, POPT_ARG_STRING, 0, OPT_SUMFILES, 0, 0 }, + {"block-size", 'B', POPT_ARG_LONG, &block_size, 0, 0, 0 }, + {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 }, + {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, +@@ -1657,6 +1660,23 @@ int parse_arguments(int *argc_p, const char ***argv_p) + } + break; + ++ case OPT_SUMFILES: ++ arg = poptGetOptArg(pc); ++ checksum_files = 0; ++ if (strcmp(arg, "lax") == 0) ++ checksum_files |= CSF_LAX_MODE; ++ else if (strcmp(arg, "strict") == 0) ++ checksum_files |= CSF_STRICT_MODE; ++ else if (strcmp(arg, "none") == 0) ++ checksum_files = CSF_IGNORE_FILES; ++ else { ++ snprintf(err_buf, sizeof err_buf, ++ "Invalid argument passed to --sumfiles (%s)\n", ++ arg); ++ return 0; ++ } ++ break; ++ + case OPT_INFO: + arg = poptGetOptArg(pc); + parse_output_words(info_words, info_levels, arg, USER_PRIORITY); +@@ -1871,6 +1891,9 @@ int parse_arguments(int *argc_p, const char ***argv_p) + } + #endif + ++ if (!always_checksum) ++ checksum_files = CSF_IGNORE_FILES; ++ + if (write_batch && read_batch) { + snprintf(err_buf, sizeof err_buf, + "--write-batch and --read-batch can not be used together\n"); +diff --git a/rsync.h b/rsync.h +--- a/rsync.h ++++ b/rsync.h +@@ -733,6 +733,10 @@ extern int xattrs_ndx; + #define F_SUM(f) ((char*)OPT_EXTRA(f, START_BUMP(f) + HLINK_BUMP(f) \ + + SUM_EXTRA_CNT - 1)) + ++/* These are only valid on an entry read from a checksum file. */ ++#define F_CTIME(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT)->unum ++#define F_INODE(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT + 1)->unum ++ + /* Some utility defines: */ + #define F_IS_ACTIVE(f) (f)->basename[0] + #define F_IS_HLINKED(f) ((f)->flags & FLAG_HLINKED) +@@ -929,6 +933,13 @@ typedef struct { + char fname[1]; /* has variable size */ + } relnamecache; + ++#define CSF_ENABLE (1<<1) ++#define CSF_LAX (1<<2) ++ ++#define CSF_IGNORE_FILES 0 ++#define CSF_LAX_MODE (CSF_ENABLE|CSF_LAX) ++#define CSF_STRICT_MODE (CSF_ENABLE) ++ + #include "byteorder.h" + #include "lib/mdigest.h" + #include "lib/wildmatch.h" +diff --git a/rsync.yo b/rsync.yo +--- a/rsync.yo ++++ b/rsync.yo +@@ -323,6 +323,7 @@ to the detailed description below for a complete description. verb( + -q, --quiet suppress non-error messages + --no-motd suppress daemon-mode MOTD (see caveat) + -c, --checksum skip based on checksum, not mod-time & size ++ --sumfiles=MODE use .rsyncsums to speedup --checksum mode + -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X) + --no-OPTION turn off an implied OPTION (e.g. --no-D) + -r, --recursive recurse into directories +@@ -569,9 +570,9 @@ uses a "quick check" that (by default) checks if each file's size and time + of last modification match between the sender and receiver. This option + changes this to compare a 128-bit checksum for each file that has a + matching size. Generating the checksums means that both sides will expend +-a lot of disk I/O reading all the data in the files in the transfer (and +-this is prior to any reading that will be done to transfer changed files), +-so this can slow things down significantly. ++a lot of disk I/O reading the data in all the files in the transfer, so ++this can slow things down significantly (and this is prior to any reading ++that will be done to transfer the files that have changed). + + The sending side generates its checksums while it is doing the file-system + scan that builds the list of the available files. The receiver generates +@@ -579,6 +580,8 @@ its checksums when it is scanning for changed files, and will checksum any + file that has the same size as the corresponding sender's file: files with + either a changed size or a changed checksum are selected for transfer. + ++See also the bf(--sumfiles) option for a way to use cached checksum data. ++ + Note that rsync always verifies that each em(transferred) file was + correctly reconstructed on the receiving side by checking a whole-file + checksum that is generated as the file is transferred, but that +@@ -588,6 +591,36 @@ option's before-the-transfer "Does this file need to be updated?" check. + For protocol 30 and beyond (first supported in 3.0.0), the checksum used is + MD5. For older protocols, the checksum used is MD4. + ++dit(bf(--sumfiles=MODE)) This option tells rsync to make use of any cached ++checksum information it finds in per-directory .rsyncsums files when the ++current transfer is using the bf(--checksum) option. If the checksum data ++is up-to-date, it is used instead of recomputing it, saving both disk I/O ++and CPU time. If the checksum data is missing or outdated, the checksum is ++computed just as it would be if bf(--sumfiles) was not specified. ++ ++The MODE value is either "lax", for relaxed checking (which compares size ++and mtime), "strict" (which also compares ctime and inode), or "none" to ++ignore any .rsyncsums files ("none" is the default). Rsync does not create ++or update these files, but there is a perl script in the support directory ++named "rsyncsums" that can be used for that. ++ ++This option has no effect unless bf(--checksum, -c) was also specified. It ++also only affects the current side of the transfer, so if you want the ++remote side to parse its own .rsyncsums files, specify the option via the ++bf(--rsync-path) option (e.g. "--rsync-path="rsync --sumfiles=lax"). ++ ++To avoid transferring the system's checksum files, you can use an exclude ++(e.g. bf(--exclude=.rsyncsums)). To make this easier to type, you can use ++a popt alias. For instance, adding the following line in your ~/.popt file ++defines a bf(--cc) option that enables lax checksum files and excludes the ++checksum files: ++ ++verb( rsync alias --cc -c --sumfiles=lax --exclude=.rsyncsums) ++ ++An rsync daemon does not allow the client to control this setting, so see ++the "checksum files" daemon parameter for information on how to make a ++daemon use cached checksum data. ++ + dit(bf(-a, --archive)) This is equivalent to bf(-rlptgoD). It is a quick + way of saying you want recursion and want to preserve almost + everything (with -H being a notable omission). +diff --git a/rsyncd.conf.yo b/rsyncd.conf.yo +--- a/rsyncd.conf.yo ++++ b/rsyncd.conf.yo +@@ -314,6 +314,17 @@ locking on this file to ensure that the max connections limit is not + exceeded for the modules sharing the lock file. + The default is tt(/var/run/rsyncd.lock). + ++dit(bf(checksum files)) This parameter tells rsync to make use of any cached ++checksum information it finds in per-directory .rsyncsums files when the ++current transfer is using the bf(--checksum) option. The value can be set ++to either "lax", "strict", or "none" -- see the client's bf(--sumfiles) ++option for what these choices do. ++ ++Note also that the client's command-line option, bf(--sumfiles), has no ++effect on a daemon. A daemon will only access checksum files if this ++config option tells it to. See also the bf(exclude) directive for a way ++to hide the .rsyncsums files from the user. ++ + dit(bf(read only)) This parameter determines whether clients + will be able to upload files or not. If "read only" is true then any + attempted uploads will fail. If "read only" is false then uploads will diff --git a/support/rsyncsums b/support/rsyncsums -new file mode 100644 +new file mode 100755 --- /dev/null +++ b/support/rsyncsums -@@ -0,0 +1,203 @@ +@@ -0,0 +1,201 @@ +#!/usr/bin/perl -w +use strict; + @@ -313,12 +830,14 @@ new file mode 100644 +&Getopt::Long::Configure('bundling'); +&usage if !&GetOptions( + 'recurse|r' => \( my $recurse_opt ), -+ 'simple-cmp|s' => \( my $ignore_ctime_and_inode ), ++ 'mode|m=s' => \( my $cmp_mode = 'strict' ), + 'check|c' => \( my $check_opt ), + 'verbose|v+' => \( my $verbosity = 0 ), + 'help|h' => \( my $help_opt ), +); -+&usage if $help_opt; ++&usage if $help_opt || $cmp_mode !~ /^(lax|strict)$/; ++ ++my $ignore_ctime_and_inode = $cmp_mode eq 'lax' ? 0 : 1; + +my $start_dir = cwd(); + @@ -378,17 +897,9 @@ new file mode 100644 + next unless -f _; + + my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1]; ++ $ctime &= 0xFFFFFFFF; ++ $inode &= 0xFFFFFFFF; + my $ref = $cache{$fn}; -+ if ($size == 0) { -+ if (defined $ref) { -+ delete $cache{$fn}; -+ $f_cnt--; -+ if (!$check_opt && !$update_cnt++) { -+ print "UPDATING\n" if $verbosity; -+ } -+ } -+ next; -+ } + $d_cnt++; + + if (!$check_opt) { @@ -426,6 +937,8 @@ new file mode 100644 + print " $sum4 $sum5" if $verbosity > 2; + print " $fn" if $verbosity > 1; + my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1]; ++ $ctime2 &= 0xFFFFFFFF; ++ $inode2 &= 0xFFFFFFFF; + last if $size == $size2 && $mtime == $mtime2 + && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2)); + $size = $size2; @@ -456,7 +969,7 @@ new file mode 100644 + $exit_code = 1; + } else { + print "\n" if $verbosity > 1; -+ $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ]; ++ $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime, $inode ]; + } + } + @@ -497,7 +1010,9 @@ new file mode 100644 + +Options: + -r, --recurse Update $SUMS_FILE files in subdirectories too. -+ -s, --simple-cmp Ignore ctime and inode values when comparing identicality. ++ -m, --mode=MODE Compare entries in either "lax" or "strict" mode. Using ++ "lax" compares size and mtime, while "strict" additionally ++ compares ctime and inode. Default: strict. + -c, --check Check if the checksums are right (doesn't update). + -v, --verbose Mention what we're doing. Repeat for more info. + -h, --help Display this help message.