1 Optimize the ability of a mirror to send checksums.
3 This adds a sender optimization feature that allows a cache of checksums
4 to be used when the client specifies the --checksum option. The checksum
5 files (.rsyncsums) must be created by some other process (see the perl
6 script in the support dir for one way).
8 This option should be used by mirrors that contain files that get created and
9 not changed. There is a minimal amount of sanity-check information in the
10 .rsyncsums file (size and mtime) so that the sum files can be shared with your
13 To use this patch, run these commands for a successful build:
15 patch -p1 <patches/checksum-reading.diff
16 ./configure (optional if already run)
19 diff --git a/flist.c b/flist.c
22 @@ -121,6 +121,7 @@ static char tmp_sum[MAX_DIGEST_LEN];
23 static char empty_sum[MAX_DIGEST_LEN];
24 static int flist_count_offset; /* for --delete --progress */
25 static int dir_count = 0;
26 +static struct file_list *checksum_flist = NULL;
28 static void flist_sort_and_clean(struct file_list *flist, int strip_root);
29 static void output_flist(struct file_list *flist);
30 @@ -313,6 +314,186 @@ static void flist_done_allocating(struct file_list *flist)
31 flist->pool_boundary = ptr;
34 +/* The len count is the length of the basename + 1 for the null. */
35 +static int add_checksum(const char *dirname, const char *basename, int len,
36 + OFF_T file_length, time_t mtime, const char *sum)
38 + struct file_struct *file;
39 + int alloc_len, extra_len;
42 + if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0)
44 + if (file_length == 0)
47 + extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT)
49 +#if EXTRA_ROUNDING > 0
50 + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
51 + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
53 + alloc_len = FILE_STRUCT_LEN + extra_len + len;
54 + bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum");
56 + memset(bp, 0, extra_len + FILE_STRUCT_LEN);
58 + file = (struct file_struct *)bp;
59 + bp += FILE_STRUCT_LEN;
61 + memcpy(bp, basename, len);
63 + file->mode = S_IFREG;
64 + file->modtime = mtime;
65 + file->len32 = (uint32)file_length;
66 + if (file_length > 0xFFFFFFFFu) {
67 + file->flags |= FLAG_LENGTH64;
68 + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
70 + file->dirname = dirname;
72 + memcpy(bp, sum, checksum_len);
74 + flist_expand(checksum_flist, 1);
75 + checksum_flist->files[checksum_flist->used++] = file;
77 + checksum_flist->sorted = checksum_flist->files;
82 +/* The direname value must remain unchanged during the lifespan of the
83 + * created checksum_flist object because we use it directly. */
84 +static void read_checksums(const char *dirname)
86 + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
93 + if (checksum_flist) {
94 + /* Reset the pool memory and empty the file-list array. */
95 + pool_free_old(checksum_flist->file_pool,
96 + pool_boundary(checksum_flist->file_pool, 0));
97 + checksum_flist->used = 0;
99 + checksum_flist = flist_new(FLIST_TEMP, "read_checksums");
101 + checksum_flist->low = 0;
102 + checksum_flist->high = -1;
107 + dlen = strlcpy(fbuf, dirname, sizeof fbuf);
108 + if (dlen >= (int)sizeof fbuf)
111 + fbuf[dlen++] = '/';
114 + strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen);
115 + if (!(fp = fopen(fbuf, "r")))
118 + while (fgets(line, sizeof line, fp)) {
120 + if (protocol_version >= 30) {
121 + char *alt_sum = cp;
123 + while (*++cp == '=') {}
125 + while (isXDigit(cp)) cp++;
126 + if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
128 + while (*++cp == ' ') {}
134 + for (i = 0; i < checksum_len*2; i++, cp++) {
136 + if (isXDigit(cp)) {
140 + x = (*cp & 0xF) + 9;
153 + while (*++cp == ' ') {}
155 + if (protocol_version < 30) {
156 + char *alt_sum = cp;
158 + while (*++cp == '=') {}
160 + while (isXDigit(cp)) cp++;
161 + if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
163 + while (*++cp == ' ') {}
167 + while (isDigit(cp))
168 + file_length = file_length * 10 + *cp++ - '0';
171 + while (*++cp == ' ') {}
174 + while (isDigit(cp))
175 + mtime = mtime * 10 + *cp++ - '0';
178 + while (*++cp == ' ') {}
180 + /* Ignore ctime. */
181 + while (isDigit(cp))
185 + while (*++cp == ' ') {}
187 + /* Ignore inode. */
188 + while (isDigit(cp))
192 + while (*++cp == ' ') {}
195 + while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
199 + cp[len++] = '\0'; /* len now counts the null */
200 + if (strchr(cp, '/'))
202 + if (len > MAXPATHLEN)
205 + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
207 + add_checksum(dirname, cp, len, file_length, mtime, sum);
211 + clean_flist(checksum_flist, 0);
214 int push_pathname(const char *dir, int len)
217 @@ -1003,7 +1184,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
218 STRUCT_STAT *stp, int flags, int filter_level)
220 static char *lastdir;
221 - static int lastdir_len = -1;
222 + static int lastdir_len = -2;
223 struct file_struct *file;
224 char thisname[MAXPATHLEN];
225 char linkname[MAXPATHLEN];
226 @@ -1136,9 +1317,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
227 memcpy(lastdir, thisname, len);
230 + if (always_checksum && am_sender && flist)
231 + read_checksums(lastdir);
236 + if (always_checksum && am_sender && flist && lastdir_len == -2) {
238 + read_checksums("");
241 basename_len = strlen(basename) + 1; /* count the '\0' */
244 @@ -1214,11 +1402,21 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
248 - if (always_checksum && am_sender && S_ISREG(st.st_mode))
249 - file_checksum(thisname, tmp_sum, st.st_size);
251 F_PATHNAME(file) = pathname;
253 + if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
255 + if (flist && (j = flist_find(checksum_flist, file)) >= 0) {
256 + struct file_struct *fp = checksum_flist->sorted[j];
257 + if (F_LENGTH(fp) == st.st_size
258 + && fp->modtime == st.st_mtime)
259 + memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
261 + file_checksum(thisname, tmp_sum, st.st_size);
263 + file_checksum(thisname, tmp_sum, st.st_size);
266 /* This code is only used by the receiver when it is building
267 * a list of files for a delete pass. */
268 if (keep_dirlinks && linkname_len && flist) {
269 @@ -2074,7 +2272,11 @@ struct file_list *send_file_list(int f, int argc, char *argv[])
270 * file-list to check if this is a 1-file xfer. */
271 send_extra_file_list(f, 1);
277 + if (checksum_updating && always_checksum && flist_eof)
278 + read_checksums(NULL);
282 diff --git a/ifuncs.h b/ifuncs.h
285 @@ -64,6 +64,12 @@ isDigit(const char *ptr)
289 +isXDigit(const char *ptr)
291 + return isxdigit(*(unsigned char *)ptr);
295 isPrint(const char *ptr)
297 return isprint(*(unsigned char *)ptr);
298 diff --git a/support/rsyncsums b/support/rsyncsums
301 +++ b/support/rsyncsums
307 +use Cwd qw(abs_path cwd);
311 +our $SUMS_FILE = '.rsyncsums';
313 +&Getopt::Long::Configure('bundling');
314 +&usage if !&GetOptions(
315 + 'recurse|r' => \( my $recurse_opt ),
316 + 'simple-cmp|s' => \( my $ignore_ctime_and_inode ),
317 + 'check|c' => \( my $check_opt ),
318 + 'verbose|v+' => \( my $verbosity = 0 ),
319 + 'help|h' => \( my $help_opt ),
321 +&usage if $help_opt;
323 +my $start_dir = cwd();
326 +@dirs = '.' unless @dirs;
335 +my $md4 = Digest::MD4->new;
336 +my $md5 = Digest::MD5->new;
339 + my $dir = shift @dirs;
341 + if (!chdir($dir)) {
342 + warn "Unable to chdir to $dir: $!\n";
345 + if (!opendir(DP, '.')) {
346 + warn "Unable to opendir $dir: $!\n";
351 + $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
353 + print "$reldir ... ";
354 + print "\n" if $check_opt;
359 + if (open(FP, '<', $SUMS_FILE)) {
362 + my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
363 + $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
371 + my $update_cnt = 0;
372 + while (defined(my $fn = readdir(DP))) {
373 + next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
375 + push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
380 + my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
381 + my $ref = $cache{$fn};
383 + if (defined $ref) {
384 + delete $cache{$fn};
386 + if (!$check_opt && !$update_cnt++) {
387 + print "UPDATING\n" if $verbosity;
395 + if (defined $ref) {
397 + if ($$ref[3] == $size
398 + && $$ref[4] == $mtime
399 + && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
400 + && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
404 + if (!$update_cnt++) {
405 + print "UPDATING\n" if $verbosity;
409 + if (!open(IN, $fn)) {
410 + print STDERR "Unable to read $fn: $!\n";
411 + if (defined $ref) {
412 + delete $cache{$fn};
420 + while (sysread(IN, $_, 64*1024)) {
424 + $sum4 = $md4->hexdigest;
425 + $sum5 = $md5->hexdigest;
426 + print " $sum4 $sum5" if $verbosity > 2;
427 + print " $fn" if $verbosity > 1;
428 + my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
429 + last if $size == $size2 && $mtime == $mtime2
430 + && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
436 + print " REREADING\n" if $verbosity > 1;
443 + if (!defined $ref) {
445 + } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
448 + print " OK\n" if $verbosity > 1;
451 + if ($verbosity < 2) {
452 + print $verbosity ? ' ' : "$reldir/";
458 + print "\n" if $verbosity > 1;
459 + $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
465 + unshift(@dirs, sort @subdirs) if $recurse_opt;
469 + } elsif ($d_cnt == 0) {
471 + print "(removed $SUMS_FILE) " if $verbosity;
472 + unlink($SUMS_FILE);
474 + print "empty\n" if $verbosity;
475 + } elsif ($update_cnt || $d_cnt != $f_cnt) {
476 + print "UPDATING\n" if $verbosity && !$update_cnt;
477 + open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
479 + foreach my $fn (sort keys %cache) {
480 + my $ref = $cache{$fn};
481 + my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
482 + next unless $found;
483 + printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
487 + print "ok\n" if $verbosity;
496 +Usage: rsyncsums [OPTIONS] [DIRS]
499 + -r, --recurse Update $SUMS_FILE files in subdirectories too.
500 + -s, --simple-cmp Ignore ctime and inode values when comparing identicality.
501 + -c, --check Check if the checksums are right (doesn't update).
502 + -v, --verbose Mention what we're doing. Repeat for more info.
503 + -h, --help Display this help message.