--- /dev/null
+From rusty@rustcorp.com.au Wed Apr 3 17:18:42 2002
+Return-Path: <rusty@rustcorp.com.au>
+Delivered-To: mbp@samba.org
+Received: from wagner.rustcorp.com.au (sydney1.au.ibm.com [202.135.142.193])
+ by lists.samba.org (Postfix) with ESMTP id EA7B849DC
+ for <mbp@samba.org>; Tue, 2 Apr 2002 23:06:29 -0800 (PST)
+Received: from wagner.rustcorp.com.au ([127.0.0.1] helo=rustcorp.com.au)
+ by wagner.rustcorp.com.au with esmtp (Exim 3.35 #1 (Debian))
+ id 16set7-0000pL-00
+ for <mbp@samba.org>; Wed, 03 Apr 2002 17:08:57 +1000
+From: Rusty Russell <rusty@rustcorp.com.au>
+To: Martin Pool <mbp@samba.org>
+Subject: Re: gzip patch
+In-reply-to: Your message of "Wed, 03 Apr 2002 12:04:59 +1000."
+ <20020403020455.GC18851@samba.org>
+Date: Wed, 03 Apr 2002 17:08:57 +1000
+Sender: rusty@rustcorp.com.au
+Message-Id: <E16set7-0000pL-00@wagner.rustcorp.com.au>
+Status: RO
+X-Status: A
+Content-Length: 12810
+Lines: 461
+
+In message <20020403020455.GC18851@samba.org> you write:
+> Hi,
+>
+> I think you said the other day that you had a working --rsyncable
+> patch for gzip. Could I have it please?
+
+Hi Martin,
+
+ Just got your mail, sorry for the delay. Found old patch on
+google, and updated it for 2.5.4 (I know, but that's what apt-get
+source gave me).
+
+Compiles, otherwise untested.
+Rusty.
+--
+ Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
+
+diff -urN rsync-2.5.4/Makefile.in rsync-2.5.4-fuzzy/Makefile.in
+--- rsync-2.5.4/Makefile.in Tue Feb 26 05:48:25 2002
++++ rsync-2.5.4-fuzzy/Makefile.in Wed Apr 3 16:35:55 2002
+@@ -28,7 +28,7 @@
+ ZLIBOBJ=zlib/deflate.o zlib/infblock.o zlib/infcodes.o zlib/inffast.o \
+ zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \
+ zlib/zutil.o zlib/adler32.o
+-OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o main.o checksum.o match.o syscall.o log.o backup.o
++OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o main.o checksum.o match.o syscall.o log.o backup.o alternate.o
+ OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o fileio.o batch.o \
+ clientname.o
+ DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
+diff -urN rsync-2.5.4/alternate.c rsync-2.5.4-fuzzy/alternate.c
+--- rsync-2.5.4/alternate.c Thu Jan 1 10:00:00 1970
++++ rsync-2.5.4-fuzzy/alternate.c Wed Apr 3 17:04:15 2002
+@@ -0,0 +1,117 @@
++#include "rsync.h"
++
++extern char *compare_dest;
++extern int verbose;
++
++/* Alternate methods for opening files, if local doesn't exist */
++/* Sanity check that we are about to open regular file */
++int do_open_regular(char *fname)
++{
++ STRUCT_STAT st;
++
++ if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode))
++ return do_open(fname, O_RDONLY, 0);
++
++ return -1;
++}
++
++static void split_names(char *fname, char **dirname, char **basename)
++{
++ char *slash;
++
++ slash = strrchr(fname, '/');
++ if (slash) {
++ *dirname = fname;
++ *slash = '\0';
++ *basename = slash+1;
++ } else {
++ *basename = fname;
++ *dirname = ".";
++ }
++}
++
++static unsigned int measure_name(const char *name,
++ const char *basename,
++ const char *ext)
++{
++ int namelen = strlen(name);
++ int extlen = strlen(ext);
++ unsigned int score = 0;
++
++ /* Extensions must match */
++ if (namelen <= extlen || strcmp(name+namelen-extlen, ext) != 0)
++ return 0;
++
++ /* Now score depends on similarity of prefix */
++ for (; *name==*basename && *name; name++, basename++)
++ score++;
++ return score;
++}
++
++int open_alternate_base_fuzzy(const char *fname)
++{
++ DIR *d;
++ struct dirent *di;
++ char *basename, *dirname;
++ char mangled_name[MAXPATHLEN];
++ char bestname[MAXPATHLEN];
++ unsigned int bestscore = 0;
++ const char *ext;
++
++ /* FIXME: can we assume fname fits here? */
++ strcpy(mangled_name, fname);
++
++ split_names(mangled_name, &dirname, &basename);
++ d = opendir(dirname);
++ if (!d) {
++ rprintf(FERROR,"recv_generator opendir(%s): %s\n",
++ dirname,strerror(errno));
++ return -1;
++ }
++
++ /* Get final extension, eg. .gz; never full basename though. */
++ ext = strrchr(basename + 1, '.');
++ if (!ext)
++ ext = basename + strlen(basename); /* ext = "" */
++
++ while ((di = readdir(d)) != NULL) {
++ const char *dname = d_name(di);
++ unsigned int score;
++
++ if (strcmp(dname,".")==0 ||
++ strcmp(dname,"..")==0)
++ continue;
++
++ score = measure_name(dname, basename, ext);
++ if (verbose > 4)
++ rprintf(FINFO,"fuzzy score for %s = %u\n",
++ dname, score);
++ if (score > bestscore) {
++ strcpy(bestname, dname);
++ bestscore = score;
++ }
++ }
++ closedir(d);
++
++ /* Found a candidate. */
++ if (bestscore != 0) {
++ char fuzzyname[MAXPATHLEN];
++
++ snprintf(fuzzyname,MAXPATHLEN,"%s/%s", dirname, bestname);
++ if (verbose > 2)
++ rprintf(FINFO,"fuzzy match %s->%s\n",
++ fname, fuzzyname);
++ return do_open_regular(fuzzyname);
++ }
++ return -1;
++}
++
++int open_alternate_base_comparedir(const char *fname)
++{
++ char fnamebuf[MAXPATHLEN];
++ /* try the file at compare_dest instead */
++ snprintf(fnamebuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
++
++ /* FIXME: now follows symlinks... */
++ return do_open_regular(fnamebuf);
++}
+diff -urN rsync-2.5.4/generator.c rsync-2.5.4-fuzzy/generator.c
+--- rsync-2.5.4/generator.c Fri Feb 8 03:36:12 2002
++++ rsync-2.5.4-fuzzy/generator.c Wed Apr 3 17:00:06 2002
+@@ -42,11 +42,12 @@
+ extern int always_checksum;
+ extern int modify_window;
+ extern char *compare_dest;
++extern int fuzzy;
+
+
+ /* choose whether to skip a particular file */
+ static int skip_file(char *fname,
+- struct file_struct *file, STRUCT_STAT *st)
++ struct file_struct *file, const STRUCT_STAT *st)
+ {
+ if (st->st_size != file->length) {
+ return 0;
+@@ -185,7 +186,61 @@
+ return s;
+ }
+
++/* Returns -1 for can't open (null file), -2 for skip */
++static int open_base_file(struct file_struct *file,
++ char *fname,
++ int statret,
++ STRUCT_STAT *st)
++{
++ int fd = -1;
++
++ if (statret == 0) {
++ if (S_ISREG(st->st_mode)) {
++ if (update_only
++ && cmp_modtime(st->st_mtime, file->modtime) > 0) {
++ if (verbose > 1)
++ rprintf(FINFO,"%s is newer\n",fname);
++ return -2;
++ }
++ if (skip_file(fname, file, st)) {
++ set_perms(fname, file, st, 1);
++ return -2;
++ }
++ fd = do_open(fname, O_RDONLY, 0);
++ if (fd == -1) {
++ rprintf(FERROR,"failed to open %s, continuing : %s\n",fname,strerror(errno));
++ return -1;
++ } else
++ return fd;
++ } else {
++ /* Try to use symlink contents */
++ if (S_ISLNK(st->st_mode)) {
++ fd = do_open_regular(fname);
++ /* Don't delete yet; receiver will need it */
++ } else {
++ if (delete_file(fname) != 0) {
++ if (fd != -1)
++ close(fd);
++ return -2;
++ }
++ }
++ }
++ }
++
++ if (fd == -1 && compare_dest != NULL)
++ fd = open_alternate_base_comparedir(fname);
+
++ if (fd == -1 && fuzzy)
++ fd = open_alternate_base_fuzzy(fname);
++
++ /* Update stat to understand size */
++ if (fd != -1) {
++ if (do_fstat(fd, st) != 0)
++ rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno));
++ }
++
++ return fd;
++}
+
+ /*
+ * Acts on file number I from FLIST, whose name is fname.
+@@ -203,9 +258,6 @@
+ struct sum_struct *s;
+ int statret;
+ struct file_struct *file = flist->files[i];
+- char *fnamecmp;
+- char fnamecmpbuf[MAXPATHLEN];
+- extern char *compare_dest;
+ extern int list_only;
+ extern int preserve_perms;
+ extern int only_existing;
+@@ -341,82 +393,29 @@
+ return;
+ }
+
+- fnamecmp = fname;
+-
+- if ((statret == -1) && (compare_dest != NULL)) {
+- /* try the file at compare_dest instead */
+- int saveerrno = errno;
+- snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
+- statret = link_stat(fnamecmpbuf,&st);
+- if (!S_ISREG(st.st_mode))
+- statret = -1;
+- if (statret == -1)
+- errno = saveerrno;
+- else
+- fnamecmp = fnamecmpbuf;
+- }
+-
+- if (statret == -1) {
+- if (errno == ENOENT) {
+- write_int(f_out,i);
+- if (!dry_run) send_sums(NULL,f_out);
+- } else {
+- if (verbose > 1)
+- rprintf(FERROR, RSYNC_NAME
+- ": recv_generator failed to open \"%s\": %s\n",
+- fname, strerror(errno));
+- }
+- return;
+- }
+-
+- if (!S_ISREG(st.st_mode)) {
+- if (delete_file(fname) != 0) {
+- return;
+- }
+-
+- /* now pretend the file didn't exist */
+- write_int(f_out,i);
+- if (!dry_run) send_sums(NULL,f_out);
+- return;
+- }
+-
+- if (opt_ignore_existing && fnamecmp == fname) {
+- if (verbose > 1)
+- rprintf(FINFO,"%s exists\n",fname);
+- return;
+- }
+-
+- if (update_only && cmp_modtime(st.st_mtime,file->modtime)>0 && fnamecmp == fname) {
++ /* Failed to stat for some other reason. */
++ if (statret == -1 && errno != ENOENT) {
+ if (verbose > 1)
+- rprintf(FINFO,"%s is newer\n",fname);
++ rprintf(FERROR, RSYNC_NAME
++ ": recv_generator failed to open \"%s\": %s\n",
++ fname, strerror(errno));
+ return;
+ }
+
+- if (skip_file(fname, file, &st)) {
+- if (fnamecmp == fname)
+- set_perms(fname,file,&st,1);
+- return;
+- }
+-
+- if (dry_run) {
+- write_int(f_out,i);
++ fd = open_base_file(file, fname, statret, &st);
++ if (fd == -2)
+ return;
+- }
+-
+- if (whole_file) {
+- write_int(f_out,i);
+- send_sums(NULL,f_out);
+- return;
+- }
+-
+- /* open the file */
+- fd = do_open(fnamecmp, O_RDONLY, 0);
+
+- if (fd == -1) {
+- rprintf(FERROR,RSYNC_NAME": failed to open \"%s\", continuing : %s\n",fnamecmp,strerror(errno));
+- /* pretend the file didn't exist */
++ if ((whole_file || dry_run) && fd != -1) {
++ close(fd);
++ fd = -1;
++ }
++
++ if (fd == -1) {
++ /* the file didn't exist, or we can pretend it doesn't */
+ write_int(f_out,i);
+- send_sums(NULL,f_out);
++ if (!dry_run)
++ send_sums(NULL,f_out);
+ return;
+ }
+
+@@ -427,7 +426,7 @@
+ }
+
+ if (verbose > 3)
+- rprintf(FINFO,"gen mapped %s of size %.0f\n",fnamecmp,(double)st.st_size);
++ rprintf(FINFO,"gen mapped %s of size %.0f\n",fname,(double)st.st_size);
+
+ s = generate_sums(buf,st.st_size,adapt_block_size(file, block_size));
+
+diff -urN rsync-2.5.4/options.c rsync-2.5.4-fuzzy/options.c
+--- rsync-2.5.4/options.c Thu Feb 28 09:49:57 2002
++++ rsync-2.5.4-fuzzy/options.c Wed Apr 3 16:43:54 2002
+@@ -73,6 +73,7 @@
+ #else
+ int modify_window=0;
+ #endif
++int fuzzy=0;
+ int blocking_io=-1;
+
+ /** Network address family. **/
+@@ -245,6 +246,7 @@
+ rprintf(F," --bwlimit=KBPS limit I/O bandwidth, KBytes per second\n");
+ rprintf(F," --write-batch=PREFIX write batch fileset starting with PREFIX\n");
+ rprintf(F," --read-batch=PREFIX read batch fileset starting with PREFIX\n");
++ rprintf(F," --fuzzy use similar file as basis if it does't exist\n");
+ rprintf(F," -h, --help show this help screen\n");
+ #ifdef INET6
+ rprintf(F," -4 prefer IPv4\n");
+@@ -340,6 +342,7 @@
+ {"hard-links", 'H', POPT_ARG_NONE, &preserve_hard_links},
+ {"read-batch", 0, POPT_ARG_STRING, &batch_prefix, OPT_READ_BATCH},
+ {"write-batch", 0, POPT_ARG_STRING, &batch_prefix, OPT_WRITE_BATCH},
++ {"fuzzy", 0, POPT_ARG_NONE, &fuzzy},
+ #ifdef INET6
+ {0, '4', POPT_ARG_VAL, &default_af_hint, AF_INET },
+ {0, '6', POPT_ARG_VAL, &default_af_hint, AF_INET6 },
+@@ -757,7 +760,9 @@
+ args[ac++] = "--compare-dest";
+ args[ac++] = compare_dest;
+ }
+-
++
++ if (fuzzy && am_sender)
++ args[ac++] = "--fuzzy";
+
+ *argc = ac;
+ }
+diff -urN rsync-2.5.4/proto.h rsync-2.5.4-fuzzy/proto.h
+--- rsync-2.5.4/proto.h Sat Feb 23 11:05:06 2002
++++ rsync-2.5.4-fuzzy/proto.h Wed Apr 3 16:35:25 2002
+@@ -256,3 +256,6 @@
+ int cmp_modtime(time_t file1, time_t file2);
+ int _Insure_trap_error(int a1, int a2, int a3, int a4, int a5, int a6);
+ int sys_gettimeofday(struct timeval *tv);
++int do_open_regular(char *fname);
++int open_alternate_base_fuzzy(const char *fname);
++int open_alternate_base_comparedir(const char *fname);
+diff -urN rsync-2.5.4/receiver.c rsync-2.5.4-fuzzy/receiver.c
+--- rsync-2.5.4/receiver.c Thu Feb 14 05:42:20 2002
++++ rsync-2.5.4-fuzzy/receiver.c Wed Apr 3 16:46:46 2002
+@@ -36,6 +36,7 @@
+ extern char *compare_dest;
+ extern int make_backups;
+ extern char *backup_suffix;
++extern int fuzzy;
+
+ static struct delete_list {
+ DEV64_T dev;
+@@ -307,8 +308,6 @@
+ char *fname;
+ char template[MAXPATHLEN];
+ char fnametmp[MAXPATHLEN];
+- char *fnamecmp;
+- char fnamecmpbuf[MAXPATHLEN];
+ struct map_struct *buf;
+ int i;
+ struct file_struct *file;
+@@ -366,28 +365,24 @@
+ if (verbose > 2)
+ rprintf(FINFO,"recv_files(%s)\n",fname);
+
+- fnamecmp = fname;
+-
+ /* open the file */
+- fd1 = do_open(fnamecmp, O_RDONLY, 0);
++ fd1 = do_open(fname, O_RDONLY, 0);
+
+- if ((fd1 == -1) && (compare_dest != NULL)) {
+- /* try the file at compare_dest instead */
+- snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",
+- compare_dest,fname);
+- fnamecmp = fnamecmpbuf;
+- fd1 = do_open(fnamecmp, O_RDONLY, 0);
+- }
++ if (fd1 == -1 && compare_dest != NULL)
++ fd1 = open_alternate_base_comparedir(fname);
++
++ if (fd1 == -1 && fuzzy)
++ fd1 = open_alternate_base_fuzzy(fname);
+
+ if (fd1 != -1 && do_fstat(fd1,&st) != 0) {
+- rprintf(FERROR,"fstat %s : %s\n",fnamecmp,strerror(errno));
++ rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno));
+ receive_data(f_in,NULL,-1,NULL,file->length);
+ close(fd1);
+ continue;
+ }
+
+ if (fd1 != -1 && !S_ISREG(st.st_mode)) {
+- rprintf(FERROR,"%s : not a regular file (recv_files)\n",fnamecmp);
++ rprintf(FERROR,"%s : not a regular file (recv_files)\n",fname);
+ receive_data(f_in,NULL,-1,NULL,file->length);
+ close(fd1);
+ continue;
+@@ -403,7 +398,7 @@
+ if (fd1 != -1 && st.st_size > 0) {
+ buf = map_file(fd1,st.st_size);
+ if (verbose > 2)
+- rprintf(FINFO,"recv mapped %s of size %.0f\n",fnamecmp,(double)st.st_size);
++ rprintf(FINFO,"recv mapped %s of size %.0f\n",fname,(double)st.st_size);
+ } else {
+ buf = NULL;
+ }
+