1 Optimize the --checksum option using externally created .rsyncsums files.
3 This adds a new option, --sumfiles=MODE, that allows you to use a cache of
4 checksums when performing a --checksum transfer. These checksum files
5 (.rsyncsums) must be created by some other process -- see the perl script,
6 rsyncsums, in the support dir for one way.
8 This option can be particularly helpful to a public mirror that wants to
9 pre-compute their .rsyncsums files, set the "checksum files = strict" option
10 in their daemon config file, and thus make it quite efficient for a client
11 rsync to make use of the --checksum option on their server.
13 To use this patch, run these commands for a successful build:
15 patch -p1 <patches/checksum-reading.diff
16 ./configure (optional if already run)
19 based-on: 603cf476ef5a1155203037d2127341cdbb8646d7
20 diff --git a/clientserver.c b/clientserver.c
23 @@ -44,6 +44,8 @@ extern int numeric_ids;
24 extern int filesfrom_fd;
25 extern int remote_protocol;
26 extern int protocol_version;
27 +extern int always_checksum;
28 +extern int checksum_files;
29 extern int io_timeout;
31 extern int write_batch;
32 @@ -1033,6 +1035,9 @@ static int rsync_module(int f_in, int f_out, int i, const char *addr, const char
33 } else if (am_root < 0) /* Treat --fake-super from client as --super. */
36 + checksum_files = always_checksum ? lp_checksum_files(i)
39 if (filesfrom_fd == 0)
42 diff --git a/daemon-parm.txt b/daemon-parm.txt
45 @@ -49,6 +49,7 @@ INTEGER max_connections 0
46 INTEGER max_verbosity 1
49 +ENUM checksum_files CSF_IGNORE_FILES
50 ENUM syslog_facility LOG_DAEMON
53 diff --git a/flist.c b/flist.c
64 @@ -33,6 +34,7 @@ extern int am_sender;
65 extern int am_generator;
66 extern int inc_recurse;
67 extern int always_checksum;
68 +extern int basis_dir_cnt;
69 extern int checksum_type;
71 extern int ignore_errors;
72 @@ -62,6 +64,7 @@ extern int implied_dirs;
73 extern int ignore_perishable;
74 extern int non_perishable_cnt;
75 extern int prune_empty_dirs;
76 +extern int checksum_files;
77 extern int copy_links;
78 extern int copy_unsafe_links;
79 extern int protocol_version;
80 @@ -73,6 +76,7 @@ extern int sender_symlink_iconv;
81 extern int output_needs_newline;
82 extern int sender_keeps_checksum;
83 extern int unsort_ndx;
84 +extern char *basis_dir[];
86 extern struct stats stats;
87 extern char *filesfrom_host;
88 @@ -90,6 +94,20 @@ extern int filesfrom_convert;
89 extern iconv_t ic_send, ic_recv;
92 +#ifdef HAVE_UTIMENSAT
93 +#ifdef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
94 +#define ST_MTIME_NSEC st_mtim.tv_nsec
95 +#elif defined(HAVE_STRUCT_STAT_ST_MTIMENSEC)
96 +#define ST_MTIME_NSEC st_mtimensec
100 +#define RSYNCSUMS_FILE ".rsyncsums"
101 +#define RSYNCSUMS_LEN (sizeof RSYNCSUMS_FILE-1)
103 +#define CLEAN_STRIP_ROOT (1<<0)
104 +#define CLEAN_KEEP_LAST (1<<1)
106 #define PTR_SIZE (sizeof (struct file_struct *))
109 @@ -134,8 +152,12 @@ static char empty_sum[MAX_DIGEST_LEN];
110 static int flist_count_offset; /* for --delete --progress */
111 static int show_filelist_progress;
113 +static struct csum_cache {
114 + struct file_list *flist;
115 +} *csum_cache = NULL;
117 static struct file_list *flist_new(int flags, const char *msg);
118 -static void flist_sort_and_clean(struct file_list *flist, int strip_root);
119 +static void flist_sort_and_clean(struct file_list *flist, int flags);
120 static void output_flist(struct file_list *flist);
122 void init_flist(void)
123 @@ -324,6 +346,235 @@ static void flist_done_allocating(struct file_list *flist)
124 flist->pool_boundary = ptr;
127 +void reset_checksum_cache()
129 + int slot, slots = am_sender ? 1 : basis_dir_cnt + 1;
132 + csum_cache = new_array0(struct csum_cache, slots);
134 + for (slot = 0; slot < slots; slot++) {
135 + struct file_list *flist = csum_cache[slot].flist;
138 + /* Reset the pool memory and empty the file-list array. */
139 + pool_free_old(flist->file_pool,
140 + pool_boundary(flist->file_pool, 0));
143 + flist = csum_cache[slot].flist = flist_new(FLIST_TEMP, "reset_checksum_cache");
147 + flist->next = NULL;
151 +/* The basename_len count is the length of the basename + 1 for the '\0'. */
152 +static int add_checksum(struct file_list *flist, const char *dirname,
153 + const char *basename, int basename_len, OFF_T file_length,
154 + time_t mtime, uint32 ctime, uint32 inode,
157 + struct file_struct *file;
158 + int alloc_len, extra_len;
161 + if (basename_len == RSYNCSUMS_LEN+1 && *basename == '.'
162 + && strcmp(basename, RSYNCSUMS_FILE) == 0)
165 + /* "2" is for a 32-bit ctime num and an 32-bit inode num. */
166 + extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT + 2)
168 +#if EXTRA_ROUNDING > 0
169 + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
170 + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
172 + alloc_len = FILE_STRUCT_LEN + extra_len + basename_len;
173 + bp = pool_alloc(flist->file_pool, alloc_len, "add_checksum");
175 + memset(bp, 0, extra_len + FILE_STRUCT_LEN);
177 + file = (struct file_struct *)bp;
178 + bp += FILE_STRUCT_LEN;
180 + memcpy(bp, basename, basename_len);
182 + file->mode = S_IFREG;
183 + file->modtime = mtime;
184 + file->len32 = (uint32)file_length;
185 + if (file_length > 0xFFFFFFFFu) {
186 + file->flags |= FLAG_LENGTH64;
187 + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
189 + file->dirname = dirname;
190 + F_CTIME(file) = ctime;
191 + F_INODE(file) = inode;
193 + memcpy(bp, sum, flist_csum_len);
195 + flist_expand(flist, 1);
196 + flist->files[flist->used++] = file;
198 + flist->sorted = flist->files;
203 +/* The "dirname" arg's data must remain unchanged during the lifespan of
204 + * the created csum_cache[].flist object because we use it directly. */
205 +static void read_checksums(int slot, struct file_list *flist, const char *dirname)
207 + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
213 + uint32 ctime, inode;
214 + int dlen = dirname ? strlcpy(fbuf, dirname, sizeof fbuf) : 0;
216 + if (dlen >= (int)(sizeof fbuf - 1 - RSYNCSUMS_LEN))
219 + fbuf[dlen++] = '/';
222 + strlcpy(fbuf+dlen, RSYNCSUMS_FILE, sizeof fbuf - dlen);
224 + pathjoin(line, sizeof line, basis_dir[slot-1], fbuf);
228 + if (!(fp = fopen(cp, "r")))
231 + while (fgets(line, sizeof line, fp)) {
233 + if (checksum_type == 5) {
234 + char *alt_sum = cp;
236 + while (*++cp == '=') {}
238 + while (isHexDigit(cp)) cp++;
239 + if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
241 + while (*++cp == ' ') {}
247 + for (i = 0; i < flist_csum_len*2; i++, cp++) {
249 + if (isHexDigit(cp)) {
253 + x = (*cp & 0xF) + 9;
266 + while (*++cp == ' ') {}
268 + if (checksum_type != 5) {
269 + char *alt_sum = cp;
271 + while (*++cp == '=') {}
273 + while (isHexDigit(cp)) cp++;
274 + if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
276 + while (*++cp == ' ') {}
280 + while (isDigit(cp))
281 + file_length = file_length * 10 + *cp++ - '0';
284 + while (*++cp == ' ') {}
287 + while (isDigit(cp))
288 + mtime = mtime * 10 + *cp++ - '0';
291 + while (*++cp == ' ') {}
294 + while (isDigit(cp))
295 + ctime = ctime * 10 + *cp++ - '0';
298 + while (*++cp == ' ') {}
301 + while (isDigit(cp))
302 + inode = inode * 10 + *cp++ - '0';
305 + while (*++cp == ' ') {}
308 + while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
312 + cp[len++] = '\0'; /* len now counts the null */
313 + if (strchr(cp, '/'))
315 + if (len > MAXPATHLEN)
318 + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
320 + add_checksum(flist, dirname, cp, len, file_length,
321 + mtime, ctime, inode,
326 + flist_sort_and_clean(flist, CLEAN_KEEP_LAST);
329 +void get_cached_checksum(int slot, const char *fname, struct file_struct *file,
330 + STRUCT_STAT *stp, char *sum_buf)
332 + struct file_list *flist = csum_cache[slot].flist;
335 + if (!flist->next) {
336 + flist->next = cur_flist; /* next points from checksum flist to file flist */
337 + read_checksums(slot, flist, file->dirname);
340 + if ((j = flist_find(flist, file)) >= 0) {
341 + struct file_struct *fp = flist->sorted[j];
343 + if (F_LENGTH(fp) == stp->st_size
344 + && fp->modtime == stp->st_mtime
345 + && (checksum_files & CSF_LAX
346 + || (F_CTIME(fp) == (uint32)stp->st_ctime
347 + && F_INODE(fp) == (uint32)stp->st_ino))) {
348 + memcpy(sum_buf, F_SUM(fp), MAX_DIGEST_LEN);
353 + file_checksum(fname, stp, sum_buf);
356 /* Call this with EITHER (1) "file, NULL, 0" to chdir() to the file's
357 * F_PATHNAME(), or (2) "NULL, dir, dirlen" to chdir() to the supplied dir,
358 * with dir == NULL taken to be the starting directory, and dirlen < 0
359 @@ -1201,7 +1452,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
360 STRUCT_STAT *stp, int flags, int filter_level)
362 static char *lastdir;
363 - static int lastdir_len = -1;
364 + static int lastdir_len = -2;
365 struct file_struct *file;
366 char thisname[MAXPATHLEN];
367 char linkname[MAXPATHLEN];
368 @@ -1347,9 +1598,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
369 memcpy(lastdir, thisname, len);
372 + if (checksum_files && am_sender && flist)
373 + reset_checksum_cache();
378 + if (checksum_files && am_sender && flist && lastdir_len == -2) {
380 + reset_checksum_cache();
383 basename_len = strlen(basename) + 1; /* count the '\0' */
386 @@ -1367,11 +1625,8 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
387 extra_len += EXTRA_LEN;
390 - if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
391 - file_checksum(thisname, &st, tmp_sum);
392 - if (sender_keeps_checksum)
393 - extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
395 + if (sender_keeps_checksum && S_ISREG(st.st_mode))
396 + extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
398 #if EXTRA_ROUNDING > 0
399 if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
400 @@ -1460,8 +1715,14 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
404 - if (sender_keeps_checksum && S_ISREG(st.st_mode))
405 - memcpy(F_SUM(file), tmp_sum, flist_csum_len);
406 + if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
407 + if (flist && checksum_files)
408 + get_cached_checksum(0, thisname, file, &st, tmp_sum);
410 + file_checksum(thisname, &st, tmp_sum);
411 + if (sender_keeps_checksum)
412 + memcpy(F_SUM(file), tmp_sum, flist_csum_len);
416 F_NDX(file) = stats.num_dirs;
417 @@ -2673,7 +2934,7 @@ struct file_list *recv_file_list(int f, int dir_ndx)
418 /* The --relative option sends paths with a leading slash, so we need
419 * to specify the strip_root option here. We rejected leading slashes
420 * for a non-relative transfer in recv_file_entry(). */
421 - flist_sort_and_clean(flist, relative_paths);
422 + flist_sort_and_clean(flist, relative_paths ? CLEAN_STRIP_ROOT : 0);
424 if (protocol_version < 30) {
425 /* Recv the io_error flag */
426 @@ -2918,7 +3179,7 @@ void flist_free(struct file_list *flist)
428 /* This routine ensures we don't have any duplicate names in our file list.
429 * duplicate names can cause corruption because of the pipelining. */
430 -static void flist_sort_and_clean(struct file_list *flist, int strip_root)
431 +static void flist_sort_and_clean(struct file_list *flist, int flags)
433 char fbuf[MAXPATHLEN];
435 @@ -2969,7 +3230,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
436 /* If one is a dir and the other is not, we want to
437 * keep the dir because it might have contents in the
438 * list. Otherwise keep the first one. */
439 - if (S_ISDIR(file->mode)) {
440 + if (S_ISDIR(file->mode) || flags & CLEAN_KEEP_LAST) {
441 struct file_struct *fp = flist->sorted[j];
442 if (!S_ISDIR(fp->mode))
444 @@ -2985,8 +3246,8 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
449 - if (DEBUG_GTE(DUP, 1)) {
450 + if (!am_sender || flags & CLEAN_KEEP_LAST) {
451 + if (DEBUG_GTE(DUP, 1) && !(flags & CLEAN_KEEP_LAST)) {
453 "removing duplicate name %s from file list (%d)\n",
454 f_name(file, fbuf), drop + flist->ndx_start);
455 @@ -3008,7 +3269,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
457 flist->high = prev_i;
460 + if (flags & CLEAN_STRIP_ROOT) {
461 /* We need to strip off the leading slashes for relative
462 * paths, but this must be done _after_ the sorting phase. */
463 for (i = flist->low; i <= flist->high; i++) {
464 diff --git a/generator.c b/generator.c
467 @@ -52,6 +52,7 @@ extern int delete_after;
468 extern int missing_args;
469 extern int msgdone_cnt;
470 extern int ignore_errors;
471 +extern int checksum_files;
472 extern int remove_source_files;
473 extern int delay_updates;
474 extern int update_only;
475 @@ -612,7 +613,7 @@ static enum filetype get_file_type(mode_t mode)
478 /* Perform our quick-check heuristic for determining if a file is unchanged. */
479 -int quick_check_ok(enum filetype ftype, const char *fn, struct file_struct *file, STRUCT_STAT *st)
480 +int quick_check_ok(enum filetype ftype, const char *fn, struct file_struct *file, STRUCT_STAT *st, int slot)
484 @@ -623,7 +624,10 @@ int quick_check_ok(enum filetype ftype, const char *fn, struct file_struct *file
485 * of the file mtime to determine whether to sync. */
486 if (always_checksum > 0) {
487 char sum[MAX_DIGEST_LEN];
488 - file_checksum(fn, st, sum);
489 + if (checksum_files && slot >= 0)
490 + get_cached_checksum(slot, fn, file, st, sum);
492 + file_checksum(fn, st, sum);
493 return memcmp(sum, F_SUM(file), flist_csum_len) == 0;
496 @@ -951,7 +955,7 @@ static int try_dests_reg(struct file_struct *file, char *fname, int ndx,
500 - if (!quick_check_ok(FT_REG, cmpbuf, file, &sxp->st))
501 + if (!quick_check_ok(FT_REG, cmpbuf, file, &sxp->st, j+1))
503 if (match_level == 1) {
505 @@ -1210,7 +1214,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
506 * --ignore-non-existing, daemon exclude, or mkdir failure. */
507 static struct file_struct *skip_dir = NULL;
508 static struct file_list *fuzzy_dirlist[MAX_BASIS_DIRS+1];
509 - static int need_fuzzy_dirlist = 0;
510 + static int need_new_dirscan = 0;
511 struct file_struct *fuzzy_file = NULL;
512 int fd = -1, f_copy = -1;
514 @@ -1328,8 +1332,9 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
515 fuzzy_dirlist[i] = NULL;
518 - need_fuzzy_dirlist = 1;
520 + need_new_dirscan = 1;
521 + } else if (checksum_files)
522 + need_new_dirscan = 1;
525 dflt_perms = default_perms_for_dir(dn);
526 @@ -1337,6 +1342,24 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
530 + if (need_new_dirscan && ftype == FT_REG) {
532 + strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
533 + for (i = 0; i < fuzzy_basis; i++) {
534 + if (i && pathjoin(fnamecmpbuf, MAXPATHLEN, basis_dir[i-1], dn) >= MAXPATHLEN)
536 + fuzzy_dirlist[i] = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES | GDL_PERHAPS_DIR);
537 + if (fuzzy_dirlist[i] && fuzzy_dirlist[i]->used == 0) {
538 + flist_free(fuzzy_dirlist[i]);
539 + fuzzy_dirlist[i] = NULL;
542 + if (checksum_files) {
543 + reset_checksum_cache();
545 + need_new_dirscan = 0;
548 statret = link_stat(fname, &sx.st, keep_dirlinks && is_dir);
551 @@ -1746,22 +1769,6 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
554 if (statret != 0 && fuzzy_basis) {
555 - if (need_fuzzy_dirlist) {
556 - const char *dn = file->dirname ? file->dirname : ".";
558 - strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
559 - for (i = 0; i < fuzzy_basis; i++) {
560 - if (i && pathjoin(fnamecmpbuf, MAXPATHLEN, basis_dir[i-1], dn) >= MAXPATHLEN)
562 - fuzzy_dirlist[i] = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES | GDL_PERHAPS_DIR);
563 - if (fuzzy_dirlist[i] && fuzzy_dirlist[i]->used == 0) {
564 - flist_free(fuzzy_dirlist[i]);
565 - fuzzy_dirlist[i] = NULL;
568 - need_fuzzy_dirlist = 0;
571 /* Sets fnamecmp_type to FNAMECMP_FUZZY or above. */
572 fuzzy_file = find_fuzzy(file, fuzzy_dirlist, &fnamecmp_type);
574 @@ -1794,7 +1801,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
576 else if (fnamecmp_type >= FNAMECMP_FUZZY)
578 - else if (quick_check_ok(FT_REG, fnamecmp, file, &sx.st)) {
579 + else if (quick_check_ok(FT_REG, fnamecmp, file, &sx.st, fnamecmp_type == FNAMECMP_FNAME ? 0 : -1)) {
581 do_unlink(partialptr);
582 handle_partial_dir(partialptr, PDIR_DELETE);
583 diff --git a/hlink.c b/hlink.c
586 @@ -406,7 +406,7 @@ int hard_link_check(struct file_struct *file, int ndx, char *fname,
590 - if (!quick_check_ok(FT_REG, cmpbuf, file, &alt_sx.st))
591 + if (!quick_check_ok(FT_REG, cmpbuf, file, &alt_sx.st, j+1))
594 if (unchanged_attrs(cmpbuf, file, &alt_sx))
595 diff --git a/loadparm.c b/loadparm.c
598 @@ -162,6 +162,13 @@ static struct enum_list enum_syslog_facility[] = {
602 +static struct enum_list enum_checksum_files[] = {
603 + { CSF_IGNORE_FILES, "none" },
604 + { CSF_LAX_MODE, "lax" },
605 + { CSF_STRICT_MODE, "strict" },
609 /* Expand %VAR% references. Any unknown vars or unrecognized
610 * syntax leaves the raw chars unchanged. */
611 static char *expand_vars(const char *str)
612 diff --git a/options.c b/options.c
615 @@ -117,6 +117,7 @@ size_t bwlimit_writemax = 0;
616 int ignore_existing = 0;
617 int ignore_non_existing = 0;
618 int need_messages_from_generator = 0;
619 +int checksum_files = CSF_IGNORE_FILES;
620 int max_delete = INT_MIN;
623 @@ -573,7 +574,7 @@ enum {OPT_SERVER = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
624 OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
625 OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
626 OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG, OPT_BLOCK_SIZE,
627 - OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT, OPT_STDERR,
628 + OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT, OPT_STDERR, OPT_SUMFILES,
629 OPT_OLD_COMPRESS, OPT_NEW_COMPRESS, OPT_NO_COMPRESS,
630 OPT_STOP_AFTER, OPT_STOP_AT,
631 OPT_REFUSED_BASE = 9000};
632 @@ -729,6 +730,7 @@ static struct poptOption long_options[] = {
633 {"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
634 {"checksum-choice", 0, POPT_ARG_STRING, &checksum_choice, 0, 0, 0 },
635 {"cc", 0, POPT_ARG_STRING, &checksum_choice, 0, 0, 0 },
636 + {"sumfiles", 0, POPT_ARG_STRING, 0, OPT_SUMFILES, 0, 0 },
637 {"block-size", 'B', POPT_ARG_STRING, 0, OPT_BLOCK_SIZE, 0, 0 },
638 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
639 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
640 @@ -1722,6 +1724,23 @@ int parse_arguments(int *argc_p, const char ***argv_p)
645 + arg = poptGetOptArg(pc);
646 + checksum_files = 0;
647 + if (strcmp(arg, "lax") == 0)
648 + checksum_files |= CSF_LAX_MODE;
649 + else if (strcmp(arg, "strict") == 0)
650 + checksum_files |= CSF_STRICT_MODE;
651 + else if (strcmp(arg, "none") == 0)
652 + checksum_files = CSF_IGNORE_FILES;
654 + snprintf(err_buf, sizeof err_buf,
655 + "Invalid argument passed to --sumfiles (%s)\n",
662 arg = poptGetOptArg(pc);
663 parse_output_words(info_words, info_levels, arg, USER_PRIORITY);
664 @@ -2052,6 +2071,9 @@ int parse_arguments(int *argc_p, const char ***argv_p)
668 + if (!always_checksum)
669 + checksum_files = CSF_IGNORE_FILES;
671 if (write_batch && read_batch) {
672 snprintf(err_buf, sizeof err_buf,
673 "--write-batch and --read-batch can not be used together\n");
674 diff --git a/rsync.1.md b/rsync.1.md
677 @@ -338,6 +338,7 @@ detailed description below for a complete description.
678 --quiet, -q suppress non-error messages
679 --no-motd suppress daemon-mode MOTD
680 --checksum, -c skip based on checksum, not mod-time & size
681 +--sumfiles=MODE use .rsyncsums to speedup --checksum mode
682 --archive, -a archive mode is -rlptgoD (no -A,-X,-U,-N,-H)
683 --no-OPTION turn off an implied OPTION (e.g. --no-D)
684 --recursive, -r recurse into directories
685 @@ -702,6 +703,8 @@ your home directory (remove the '=' for that).
686 file that has the same size as the corresponding sender's file: files with
687 either a changed size or a changed checksum are selected for transfer.
689 + See also the `--sumfiles` option for a way to use cached checksum data.
691 Note that rsync always verifies that each _transferred_ file was correctly
692 reconstructed on the receiving side by checking a whole-file checksum that
693 is generated as the file is transferred, but that automatic
694 @@ -712,6 +715,38 @@ your home directory (remove the '=' for that).
695 can be overridden using either the `--checksum-choice` (`--cc`) option or an
696 environment variable that is discussed in that option's section.
698 +0. `--sumfiles=MODE`
700 + This option tells rsync to make use of any cached checksum information it
701 + finds in per-directory .rsyncsums files when the current transfer is using
702 + the `--checksum` option. If the checksum data is up-to-date, it is used
703 + instead of recomputing it, saving both disk I/O and CPU time. If the
704 + checksum data is missing or outdated, the checksum is computed just as it
705 + would be if `--sumfiles` was not specified.
707 + The MODE value is either "lax", for relaxed checking (which compares size
708 + and mtime), "strict" (which also compares ctime and inode), or "none" to
709 + ignore any .rsyncsums files ("none" is the default). Rsync does not create
710 + or update these files, but there is a perl script in the support directory
711 + named "rsyncsums" that can be used for that.
713 + This option has no effect unless `--checksum`, `-c` was also specified. It
714 + also only affects the current side of the transfer, so if you want the
715 + remote side to parse its own .rsyncsums files, specify the option via
716 + `--remote-option` (`-M`) (e.g. "`-M--sumfiles=lax`").
718 + To avoid transferring the system's checksum files, you can use an exclude
719 + (e.g. `--exclude=.rsyncsums`). To make this easier to type, you can use a
720 + popt alias. For instance, adding the following line in your ~/.popt file
721 + defines a `--cs` option that enables lax checksum files and excludes the
724 + > rsync alias --cs -c --sumfiles=lax -M--sumfiles=lax -f-_.rsyncsums
726 + An rsync daemon does not allow the client to control this setting, so see
727 + the "checksum files" daemon parameter for information on how to make a
728 + daemon use cached checksum data.
732 This is equivalent to `-rlptgoD`. It is a quick way of saying you want
733 diff --git a/rsync.h b/rsync.h
736 @@ -895,6 +895,10 @@ extern int xattrs_ndx;
737 #define F_SUM(f) ((char*)OPT_EXTRA(f, START_BUMP(f) + HLINK_BUMP(f) \
738 + SUM_EXTRA_CNT - 1))
740 +/* These are only valid on an entry derived from a checksum file. */
741 +#define F_CTIME(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT)->unum
742 +#define F_INODE(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT + 1)->unum
744 /* Some utility defines: */
745 #define F_IS_ACTIVE(f) (f)->basename[0]
746 #define F_IS_HLINKED(f) ((f)->flags & FLAG_HLINKED)
747 @@ -1107,6 +1111,13 @@ typedef struct {
748 #define RELNAMECACHE_LEN (offsetof(relnamecache, fname))
751 +#define CSF_ENABLE (1<<1)
752 +#define CSF_LAX (1<<2)
754 +#define CSF_IGNORE_FILES 0
755 +#define CSF_LAX_MODE (CSF_ENABLE|CSF_LAX)
756 +#define CSF_STRICT_MODE (CSF_ENABLE)
758 #include "byteorder.h"
759 #include "lib/mdigest.h"
760 #include "lib/wildmatch.h"
761 diff --git a/rsyncd.conf.5.md b/rsyncd.conf.5.md
762 --- a/rsyncd.conf.5.md
763 +++ b/rsyncd.conf.5.md
764 @@ -419,6 +419,19 @@ the values of parameters. See the GLOBAL PARAMETERS section for more details.
765 the max connections limit is not exceeded for the modules sharing the lock
766 file. The default is `/var/run/rsyncd.lock`.
770 + This parameter tells rsync to make use of any cached checksum information
771 + it finds in per-directory .rsyncsums files when the current transfer is
772 + using the `--checksum` option. The value can be set to either "lax",
773 + "strict", or "none". See the client's `--sumfiles` option for what these
776 + Note also that the client's command-line option, `--sumfiles`, has no
777 + effect on a daemon. A daemon will only access checksum files if this
778 + config option tells it to. See also the `exclude` directive for a way to
779 + hide the .rsyncsums files from the user.
783 This parameter determines whether clients will be able to upload files or
784 diff --git a/support/rsyncsums b/support/rsyncsums
787 +++ b/support/rsyncsums
793 +use Cwd qw(abs_path cwd);
797 +our $SUMS_FILE = '.rsyncsums';
799 +&Getopt::Long::Configure('bundling');
800 +&usage if !&GetOptions(
801 + 'recurse|r' => \( my $recurse_opt ),
802 + 'mode|m=s' => \( my $cmp_mode = 'strict' ),
803 + 'check|c' => \( my $check_opt ),
804 + 'verbose|v+' => \( my $verbosity = 0 ),
805 + 'help|h' => \( my $help_opt ),
807 +&usage if $help_opt || $cmp_mode !~ /^(lax|strict)$/;
809 +my $ignore_ctime_and_inode = $cmp_mode eq 'lax' ? 0 : 1;
811 +my $start_dir = cwd();
814 +@dirs = '.' unless @dirs;
823 +my $md4 = Digest::MD4->new;
824 +my $md5 = Digest::MD5->new;
827 + my $dir = shift @dirs;
829 + if (!chdir($dir)) {
830 + warn "Unable to chdir to $dir: $!\n";
833 + if (!opendir(DP, '.')) {
834 + warn "Unable to opendir $dir: $!\n";
839 + $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
841 + print "$reldir ... ";
842 + print "\n" if $check_opt;
847 + if (open(FP, '<', $SUMS_FILE)) {
850 + my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
851 + $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
859 + my $update_cnt = 0;
860 + while (defined(my $fn = readdir(DP))) {
861 + next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
863 + push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
868 + my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
869 + $ctime &= 0xFFFFFFFF;
870 + $inode &= 0xFFFFFFFF;
871 + my $ref = $cache{$fn};
875 + if (defined $ref) {
877 + if ($$ref[3] == $size
878 + && $$ref[4] == $mtime
879 + && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
880 + && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
884 + if (!$update_cnt++) {
885 + print "UPDATING\n" if $verbosity;
889 + if (!open(IN, $fn)) {
890 + print STDERR "Unable to read $fn: $!\n";
891 + if (defined $ref) {
892 + delete $cache{$fn};
900 + while (sysread(IN, $_, 64*1024)) {
904 + $sum4 = $md4->hexdigest;
905 + $sum5 = $md5->hexdigest;
906 + print " $sum4 $sum5" if $verbosity > 2;
907 + print " $fn" if $verbosity > 1;
908 + my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
909 + $ctime2 &= 0xFFFFFFFF;
910 + $inode2 &= 0xFFFFFFFF;
911 + last if $size == $size2 && $mtime == $mtime2
912 + && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
918 + print " REREADING\n" if $verbosity > 1;
925 + if (!defined $ref) {
927 + } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
930 + print " OK\n" if $verbosity > 1;
933 + if ($verbosity < 2) {
934 + print $verbosity ? ' ' : "$reldir/";
940 + print "\n" if $verbosity > 1;
941 + $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime, $inode ];
947 + unshift(@dirs, sort @subdirs) if $recurse_opt;
951 + } elsif ($d_cnt == 0) {
953 + print "(removed $SUMS_FILE) " if $verbosity;
954 + unlink($SUMS_FILE);
956 + print "empty\n" if $verbosity;
957 + } elsif ($update_cnt || $d_cnt != $f_cnt) {
958 + print "UPDATING\n" if $verbosity && !$update_cnt;
959 + open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
961 + foreach my $fn (sort keys %cache) {
962 + my $ref = $cache{$fn};
963 + my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
964 + next unless $found;
965 + printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
969 + print "ok\n" if $verbosity;
978 +Usage: rsyncsums [OPTIONS] [DIRS]
981 + -r, --recurse Update $SUMS_FILE files in subdirectories too.
982 + -m, --mode=MODE Compare entries in either "lax" or "strict" mode. Using
983 + "lax" compares size and mtime, while "strict" additionally
984 + compares ctime and inode. Default: strict.
985 + -c, --check Check if the checksums are right (doesn't update).
986 + -v, --verbose Mention what we're doing. Repeat for more info.
987 + -h, --help Display this help message.