Move an extern.
[rsync.git/patches.git] / detect-renamed.diff
1 This patch adds the --detect-renamed option which makes rsync notice files
2 that either (1) match in size & modify-time (plus the basename, if possible)
3 or (2) match in size & checksum (when --checksum was also specified) and use
4 each match as an alternate basis file to speed up the transfer.
5
6 The algorithm attempts to scan the receiving-side's files in an efficient
7 manner.  If --delete[-before] is enabled, we'll take advantage of the
8 pre-transfer delete pass to prepare any alternate-basis-file matches we
9 might find.  If --delete-before is not enabled, rsync does the rename scan
10 during the regular file-sending scan (scanning each directory right before
11 the generator starts updating files from that dir).  In this latter mode,
12 rsync might delay the updating of a file (if no alternate-basis match was
13 yet found) until the full scan of the receiving side is complete, at which
14 point any delayed files are processed.
15
16 I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that
17 takes advantage of rsync's pre-existing partial-dir logic.  This uses less
18 memory than trying to keep track of the matches internally, and also allows
19 any deletions or file-updates to occur normally without interfering with
20 these alternate-basis discoveries.
21
22 To use this patch, run these commands for a successful build:
23
24     patch -p1 <patches/detect-renamed.diff
25     ./configure                                 (optional if already run)
26     make
27
28 TODO:
29
30   We need to never return a match from fattr_find() that has a basis
31   file.  This will ensure that we don't try to give a renamed file to
32   a file that can't use it, while missing out on giving it to a file
33   that could use it.
34
35 based-on: e71130fd7739562cd190a92d3f8bcbda02168892
36 diff --git a/compat.c b/compat.c
37 --- a/compat.c
38 +++ b/compat.c
39 @@ -41,6 +41,7 @@ extern int read_batch;
40  extern int delay_updates;
41  extern int checksum_seed;
42  extern int basis_dir_cnt;
43 +extern int detect_renamed;
44  extern int prune_empty_dirs;
45  extern int protocol_version;
46  extern int protect_args;
47 @@ -123,6 +124,7 @@ void set_allow_inc_recurse(void)
48                 allow_inc_recurse = 0;
49         else if (!am_sender
50          && (delete_before || delete_after
51 +         || detect_renamed
52           || delay_updates || prune_empty_dirs))
53                 allow_inc_recurse = 0;
54         else if (am_server && !local_server
55 diff --git a/flist.c b/flist.c
56 --- a/flist.c
57 +++ b/flist.c
58 @@ -60,6 +60,7 @@ extern int non_perishable_cnt;
59  extern int prune_empty_dirs;
60  extern int copy_links;
61  extern int copy_unsafe_links;
62 +extern int detect_renamed;
63  extern int protocol_version;
64  extern int sanitize_paths;
65  extern int munge_symlinks;
66 @@ -120,6 +121,8 @@ static int64 tmp_dev = -1, tmp_ino;
67  #endif
68  static char tmp_sum[MAX_DIGEST_LEN];
69  
70 +struct file_list the_fattr_list;
71 +
72  static char empty_sum[MAX_DIGEST_LEN];
73  static int flist_count_offset; /* for --delete --progress */
74  static int dir_count = 0;
75 @@ -287,6 +290,45 @@ static int is_excluded(const char *fname, int is_dir, int filter_level)
76         return 0;
77  }
78  
79 +static int fattr_compare(struct file_struct **file1, struct file_struct **file2)
80 +{
81 +       struct file_struct *f1 = *file1;
82 +       struct file_struct *f2 = *file2;
83 +       int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2);
84 +       int diff;
85 +
86 +       if (!f1->basename || !S_ISREG(f1->mode) || !len1) {
87 +               if (!f2->basename || !S_ISREG(f2->mode) || !len2)
88 +                       return 0;
89 +               return 1;
90 +       }
91 +       if (!f2->basename || !S_ISREG(f2->mode) || !len2)
92 +               return -1;
93 +
94 +       /* Don't use diff for values that are longer than an int. */
95 +       if (len1 != len2)
96 +               return len1 < len2 ? -1 : 1;
97 +
98 +       if (always_checksum) {
99 +               diff = u_memcmp(F_SUM(f1), F_SUM(f2), checksum_len);
100 +               if (diff)
101 +                       return diff;
102 +       } else if (f1->modtime != f2->modtime)
103 +               return f1->modtime < f2->modtime ? -1 : 1;
104 +
105 +       diff = u_strcmp(f1->basename, f2->basename);
106 +       if (diff)
107 +               return diff;
108 +
109 +       if (f1->dirname == f2->dirname)
110 +               return 0;
111 +       if (!f1->dirname)
112 +               return -1;
113 +       if (!f2->dirname)
114 +               return 1;
115 +       return u_strcmp(f1->dirname, f2->dirname);
116 +}
117 +
118  static void send_directory(int f, struct file_list *flist,
119                            char *fbuf, int len, int flags);
120  
121 @@ -2421,6 +2463,25 @@ struct file_list *recv_file_list(int f)
122  
123         flist_sort_and_clean(flist, relative_paths);
124  
125 +       if (detect_renamed) {
126 +               int j = flist->used;
127 +               the_fattr_list.used = j;
128 +               the_fattr_list.files = new_array(struct file_struct *, j);
129 +               if (!the_fattr_list.files)
130 +                       out_of_memory("recv_file_list");
131 +               memcpy(the_fattr_list.files, flist->files,
132 +                      j * sizeof (struct file_struct *));
133 +               qsort(the_fattr_list.files, j,
134 +                     sizeof the_fattr_list.files[0], (int (*)())fattr_compare);
135 +               the_fattr_list.low = 0;
136 +               while (j-- > 0) {
137 +                       struct file_struct *fp = the_fattr_list.files[j];
138 +                       if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp))
139 +                               break;
140 +               }
141 +               the_fattr_list.high = j;
142 +       }
143 +
144         if (protocol_version < 30) {
145                 /* Recv the io_error flag */
146                 if (ignore_errors)
147 diff --git a/generator.c b/generator.c
148 --- a/generator.c
149 +++ b/generator.c
150 @@ -78,6 +78,7 @@ extern char *basis_dir[MAX_BASIS_DIRS+1];
151  extern int compare_dest;
152  extern int copy_dest;
153  extern int link_dest;
154 +extern int detect_renamed;
155  extern int whole_file;
156  extern int list_only;
157  extern int read_batch;
158 @@ -96,6 +97,7 @@ extern char *backup_suffix;
159  extern int backup_suffix_len;
160  extern struct file_list *cur_flist, *first_flist, *dir_flist;
161  extern struct filter_list_struct daemon_filter_list;
162 +extern struct file_list the_fattr_list;
163  
164  int ignore_perishable = 0;
165  int non_perishable_cnt = 0;
166 @@ -103,6 +105,7 @@ int maybe_ATTRS_REPORT = 0;
167  
168  static dev_t dev_zero;
169  static int deletion_count = 0; /* used to implement --max-delete */
170 +static int unexplored_dirs = 1;
171  static int deldelay_size = 0, deldelay_cnt = 0;
172  static char *deldelay_buf = NULL;
173  static int deldelay_fd = -1;
174 @@ -113,7 +116,7 @@ static int need_retouch_dir_times;
175  static int need_retouch_dir_perms;
176  static const char *solo_file = NULL;
177  
178 -/* For calling delete_item() and delete_dir_contents(). */
179 +/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */
180  #define DEL_NO_UID_WRITE       (1<<0) /* file/dir has our uid w/o write perm */
181  #define DEL_RECURSE            (1<<1) /* if dir, delete all contents */
182  #define DEL_DIR_IS_EMPTY       (1<<2) /* internal delete_FUNCTIONS use only */
183 @@ -122,6 +125,7 @@ static const char *solo_file = NULL;
184  #define DEL_FOR_SYMLINK        (1<<5) /* making room for a replacement symlink */
185  #define DEL_FOR_DEVICE         (1<<6) /* making room for a replacement device */
186  #define DEL_FOR_SPECIAL        (1<<7) /* making room for a replacement special */
187 +#define DEL_NO_DELETIONS       (1<<9) /* just check for renames w/o deleting */
188  
189  #define DEL_MAKE_ROOM (DEL_FOR_FILE|DEL_FOR_DIR|DEL_FOR_SYMLINK|DEL_FOR_DEVICE|DEL_FOR_SPECIAL)
190  
191 @@ -146,11 +150,121 @@ static int is_backup_file(char *fn)
192         return k > 0 && strcmp(fn+k, backup_suffix) == 0;
193  }
194  
195 +/* Search for a regular file that matches either (1) the size & modified
196 + * time (plus the basename, if possible) or (2) the size & checksum.  If
197 + * we find an exact match down to the dirname, return -1 because we found
198 + * an up-to-date file in the transfer, not a renamed file. */
199 +static int fattr_find(struct file_struct *f, char *fname)
200 +{
201 +       int low = the_fattr_list.low, high = the_fattr_list.high;
202 +       int mid, ok_match = -1, good_match = -1;
203 +       struct file_struct *fmid;
204 +       int diff;
205 +
206 +       while (low <= high) {
207 +               mid = (low + high) / 2;
208 +               fmid = the_fattr_list.files[mid];
209 +               if (F_LENGTH(fmid) != F_LENGTH(f)) {
210 +                       if (F_LENGTH(fmid) < F_LENGTH(f))
211 +                               low = mid + 1;
212 +                       else
213 +                               high = mid - 1;
214 +                       continue;
215 +               }
216 +               if (always_checksum) {
217 +                       /* We use the FLAG_FILE_SENT flag to indicate when we
218 +                        * have computed the checksum for an entry. */
219 +                       if (!(f->flags & FLAG_FILE_SENT)) {
220 +                               if (fmid->modtime == f->modtime
221 +                                && f_name_cmp(fmid, f) == 0)
222 +                                       return -1; /* assume we can't help */
223 +                               file_checksum(fname, F_SUM(f), F_LENGTH(f));
224 +                               f->flags |= FLAG_FILE_SENT;
225 +                       }
226 +                       diff = u_memcmp(F_SUM(fmid), F_SUM(f), checksum_len);
227 +                       if (diff) {
228 +                               if (diff < 0)
229 +                                       low = mid + 1;
230 +                               else
231 +                                       high = mid - 1;
232 +                               continue;
233 +                       }
234 +               } else {
235 +                       if (fmid->modtime != f->modtime) {
236 +                               if (fmid->modtime < f->modtime)
237 +                                       low = mid + 1;
238 +                               else
239 +                                       high = mid - 1;
240 +                               continue;
241 +                       }
242 +               }
243 +               ok_match = mid;
244 +               diff = u_strcmp(fmid->basename, f->basename);
245 +               if (diff == 0) {
246 +                       good_match = mid;
247 +                       if (fmid->dirname == f->dirname)
248 +                               return -1; /* file is up-to-date */
249 +                       if (!fmid->dirname) {
250 +                               low = mid + 1;
251 +                               continue;
252 +                       }
253 +                       if (!f->dirname) {
254 +                               high = mid - 1;
255 +                               continue;
256 +                       }
257 +                       diff = u_strcmp(fmid->dirname, f->dirname);
258 +                       if (diff == 0)
259 +                               return -1; /* file is up-to-date */
260 +               }
261 +               if (diff < 0)
262 +                       low = mid + 1;
263 +               else
264 +                       high = mid - 1;
265 +       }
266 +
267 +       return good_match >= 0 ? good_match : ok_match;
268 +}
269 +
270 +static void look_for_rename(struct file_struct *file, char *fname)
271 +{
272 +       struct file_struct *fp;
273 +       char *partialptr, *fn;
274 +       STRUCT_STAT st;
275 +       int ndx;
276 +
277 +       if (!partial_dir || (ndx = fattr_find(file, fname)) < 0)
278 +               return;
279 +
280 +       fp = the_fattr_list.files[ndx];
281 +       fn = f_name(fp, NULL);
282 +       /* We don't provide an alternate-basis file if there is a basis file. */
283 +       if (link_stat(fn, &st, 0) == 0)
284 +               return;
285 +
286 +       if (!dry_run) {
287 +               if ((partialptr = partial_dir_fname(fn)) == NULL
288 +                || !handle_partial_dir(partialptr, PDIR_CREATE))
289 +                       return;
290 +               /* We only use the file if we can hard-link it into our tmp dir. */
291 +               if (link(fname, partialptr) != 0) {
292 +                       if (errno != EEXIST)
293 +                               handle_partial_dir(partialptr, PDIR_DELETE);
294 +                       return;
295 +               }
296 +       }
297 +
298 +       /* I think this falls into the -vv category with "%s is uptodate", etc. */
299 +       if (verbose > 1)
300 +               rprintf(FINFO, "found renamed: %s => %s\n", fname, fn);
301 +}
302 +
303  /* Delete a file or directory.  If DEL_RECURSE is set in the flags, this will
304   * delete recursively.
305   *
306   * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's
307   * a directory! (The buffer is used for recursion, but returned unchanged.)
308 + *
309 + * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
310   */
311  static enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
312  {
313 @@ -177,6 +291,8 @@ static enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
314                         goto check_ret;
315                 /* OK: try to delete the directory. */
316         }
317 +       if (flags & DEL_NO_DELETIONS)
318 +               return DR_SUCCESS;
319  
320         if (!(flags & DEL_MAKE_ROOM) && max_delete >= 0 && ++deletion_count > max_delete)
321                 return DR_AT_LIMIT;
322 @@ -232,6 +348,8 @@ static enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
323   * its contents, otherwise just checks for content.  Returns DR_SUCCESS or
324   * DR_NOT_EMPTY.  Note that fname must point to a MAXPATHLEN buffer!  (The
325   * buffer is used for recursion, but returned unchanged.)
326 + *
327 + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
328   */
329  static enum delret delete_dir_contents(char *fname, uint16 flags)
330  {
331 @@ -251,7 +369,9 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
332         save_filters = push_local_filters(fname, dlen);
333  
334         non_perishable_cnt = 0;
335 +       file_extra_cnt += SUM_EXTRA_CNT;
336         dirlist = get_dirlist(fname, dlen, 0);
337 +       file_extra_cnt -= SUM_EXTRA_CNT;
338         ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS;
339  
340         if (!dirlist->used)
341 @@ -291,7 +411,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
342                 if (S_ISDIR(fp->mode)) {
343                         if (delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS)
344                                 ret = DR_NOT_EMPTY;
345 -               }
346 +               } else if (detect_renamed && S_ISREG(fp->mode))
347 +                       look_for_rename(fp, fname);
348                 if (delete_item(fname, fp->mode, flags) != DR_SUCCESS)
349                         ret = DR_NOT_EMPTY;
350         }
351 @@ -456,13 +577,19 @@ static void do_delayed_deletions(char *delbuf)
352   * all the --delete-WHEN options.  Note that the fbuf pointer must point to a
353   * MAXPATHLEN buffer with the name of the directory in it (the functions we
354   * call will append names onto the end, but the old dir value will be restored
355 - * on exit). */
356 -static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
357 + * on exit).
358 + *
359 + * Note:  --detect-rename may use this routine with DEL_NO_DELETIONS set!
360 + */
361 +static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev,
362 +                         int del_flags)
363  {
364         static int already_warned = 0;
365         struct file_list *dirlist;
366 -       char delbuf[MAXPATHLEN];
367 -       int dlen, i;
368 +       char *p, delbuf[MAXPATHLEN];
369 +       unsigned remainder;
370 +       int dlen, i, restore_dot = 0;
371 +       int save_uid_ndx = uid_ndx;
372  
373         if (!fbuf) {
374                 change_local_filter_dir(NULL, 0, 0);
375 @@ -476,17 +603,22 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
376                 maybe_send_keepalive();
377  
378         if (io_error && !ignore_errors) {
379 -               if (already_warned)
380 +               if (!already_warned) {
381 +                       rprintf(FINFO,
382 +                           "IO error encountered -- skipping file deletion\n");
383 +                       already_warned = 1;
384 +               }
385 +               if (!detect_renamed)
386                         return;
387 -               rprintf(FINFO,
388 -                       "IO error encountered -- skipping file deletion\n");
389 -               already_warned = 1;
390 -               return;
391 +               del_flags |= DEL_NO_DELETIONS;
392         }
393  
394         dlen = strlen(fbuf);
395         change_local_filter_dir(fbuf, dlen, F_DEPTH(file));
396  
397 +       if (detect_renamed)
398 +               unexplored_dirs--;
399 +
400         if (one_file_system) {
401                 if (file->flags & FLAG_TOP_DIR)
402                         filesystem_dev = *fs_dev;
403 @@ -496,6 +628,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
404  
405         dirlist = get_dirlist(fbuf, dlen, 0);
406  
407 +       p = fbuf + dlen;
408 +       if (dlen == 1 && *fbuf == '.') {
409 +               restore_dot = 1;
410 +               p = fbuf;
411 +       } else if (dlen != 1 || *fbuf != '/')
412 +               *p++ = '/';
413 +       remainder = MAXPATHLEN - (p - fbuf);
414 +
415         /* If an item in dirlist is not found in flist, delete it
416          * from the filesystem. */
417         for (i = dirlist->used; i--; ) {
418 @@ -508,6 +648,10 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
419                                         f_name(fp, NULL));
420                         continue;
421                 }
422 +               if (detect_renamed && S_ISREG(fp->mode)) {
423 +                       strlcpy(p, fp->basename, remainder);
424 +                       look_for_rename(fp, fbuf);
425 +               }
426                 /* Here we want to match regardless of file type.  Replacement
427                  * of a file with one of another type is handled separately by
428                  * a delete_item call with a DEL_MAKE_ROOM flag. */
429 @@ -516,14 +660,19 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
430                         if (!(fp->mode & S_IWUSR) && !am_root && fp->flags & FLAG_OWNED_BY_US)
431                                 flags |= DEL_NO_UID_WRITE;
432                         f_name(fp, delbuf);
433 -                       if (delete_during == 2) {
434 -                               if (!remember_delete(fp, delbuf, flags))
435 +                       if (delete_during == 2 && !(del_flags & DEL_NO_DELETIONS)) {
436 +                               if (!remember_delete(fp, delbuf, del_flags | flags))
437                                         break;
438                         } else
439 -                               delete_item(delbuf, fp->mode, flags);
440 -               }
441 +                               delete_item(delbuf, fp->mode, del_flags | flags);
442 +               } else if (detect_renamed && S_ISDIR(fp->mode))
443 +                       unexplored_dirs++;
444         }
445  
446 +       if (restore_dot)
447 +               fbuf[0] = '.';
448 +       fbuf[dlen] = '\0';
449 +
450         flist_free(dirlist);
451  }
452  
453 @@ -556,9 +705,9 @@ static void do_delete_pass(void)
454                  || !S_ISDIR(st.st_mode))
455                         continue;
456  
457 -               delete_in_dir(fbuf, file, &st.st_dev);
458 +               delete_in_dir(fbuf, file, &st.st_dev, 0);
459         }
460 -       delete_in_dir(NULL, NULL, &dev_zero);
461 +       delete_in_dir(NULL, NULL, &dev_zero, 0);
462  
463         if (do_progress && !am_server)
464                 rprintf(FINFO, "                    \r");
465 @@ -1269,6 +1418,7 @@ static void list_file_entry(struct file_struct *f)
466         }
467  }
468  
469 +static struct bitbag *delayed_bits = NULL;
470  static int phase = 0;
471  static int dflt_perms;
472  
473 @@ -1556,9 +1706,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
474                 }
475                 else if (delete_during && f_out != -1 && !phase
476                     && !(file->flags & FLAG_MISSING_DIR)) {
477 -                       if (file->flags & FLAG_CONTENT_DIR)
478 -                               delete_in_dir(fname, file, &real_sx.st.st_dev);
479 -                       else
480 +                       if (file->flags & FLAG_CONTENT_DIR) {
481 +                               if (detect_renamed && real_ret != 0)
482 +                                       unexplored_dirs++;
483 +                               delete_in_dir(fname, file, &real_sx.st.st_dev,
484 +                                             delete_during < 0 ? DEL_NO_DELETIONS : 0);
485 +                       } else
486                                 change_local_filter_dir(fname, strlen(fname), F_DEPTH(file));
487                 }
488                 goto cleanup;
489 @@ -1842,8 +1995,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
490                         goto cleanup;
491                 }
492  #endif
493 -               if (stat_errno == ENOENT)
494 +               if (stat_errno == ENOENT) {
495 +                       if (detect_renamed && unexplored_dirs > 0
496 +                        && F_LENGTH(file)) {
497 +                               bitbag_set_bit(delayed_bits, ndx);
498 +                               return;
499 +                       }
500                         goto notify_others;
501 +               }
502                 rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s",
503                         full_fname(fname));
504                 goto cleanup;
505 @@ -2246,6 +2405,12 @@ void generate_files(int f_out, const char *local_name)
506         if (verbose > 2)
507                 rprintf(FINFO, "generator starting pid=%ld\n", (long)getpid());
508  
509 +       if (detect_renamed) {
510 +               delayed_bits = bitbag_create(cur_flist->used);
511 +               if (!delete_before && !delete_during)
512 +                       delete_during = -1;
513 +       }
514 +
515         if (delete_before && !solo_file && cur_flist->used > 0)
516                 do_delete_pass();
517         if (delete_during == 2) {
518 @@ -2256,7 +2421,7 @@ void generate_files(int f_out, const char *local_name)
519         }
520         do_progress = 0;
521  
522 -       if (append_mode > 0 || whole_file < 0)
523 +       if (append_mode > 0 || detect_renamed || whole_file < 0)
524                 whole_file = 0;
525         if (verbose >= 2) {
526                 rprintf(FINFO, "delta-transmission %s\n",
527 @@ -2298,7 +2463,7 @@ void generate_files(int f_out, const char *local_name)
528                                                 dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
529                                         } else
530                                                 dirdev = MAKEDEV(0, 0);
531 -                                       delete_in_dir(fbuf, fp, &dirdev);
532 +                                       delete_in_dir(fbuf, fp, &dirdev, 0);
533                                 } else
534                                         change_local_filter_dir(fbuf, strlen(fbuf), F_DEPTH(fp));
535                         }
536 @@ -2345,7 +2510,21 @@ void generate_files(int f_out, const char *local_name)
537         } while ((cur_flist = cur_flist->next) != NULL);
538  
539         if (delete_during)
540 -               delete_in_dir(NULL, NULL, &dev_zero);
541 +               delete_in_dir(NULL, NULL, &dev_zero, 0);
542 +       if (detect_renamed) {
543 +               if (delete_during < 0)
544 +                       delete_during = 0;
545 +               detect_renamed = 0;
546 +
547 +               for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) {
548 +                       struct file_struct *file = cur_flist->files[i];
549 +                       if (local_name)
550 +                               strlcpy(fbuf, local_name, sizeof fbuf);
551 +                       else
552 +                               f_name(file, fbuf);
553 +                       recv_generator(fbuf, file, i, itemizing, code, f_out);
554 +               }
555 +       }
556         phase++;
557         if (verbose > 2)
558                 rprintf(FINFO, "generate_files phase=%d\n", phase);
559 diff --git a/options.c b/options.c
560 --- a/options.c
561 +++ b/options.c
562 @@ -81,6 +81,7 @@ int am_sender = 0;
563  int am_starting_up = 1;
564  int relative_paths = -1;
565  int implied_dirs = 1;
566 +int detect_renamed = 0;
567  int numeric_ids = 0;
568  int allow_8bit_chars = 0;
569  int force_delete = 0;
570 @@ -390,6 +391,7 @@ void usage(enum logcode F)
571    rprintf(F,"     --modify-window=NUM     compare mod-times with reduced accuracy\n");
572    rprintf(F," -T, --temp-dir=DIR          create temporary files in directory DIR\n");
573    rprintf(F," -y, --fuzzy                 find similar file for basis if no dest file\n");
574 +  rprintf(F,"     --detect-renamed        try to find renamed files to speed up the transfer\n");
575    rprintf(F,"     --compare-dest=DIR      also compare destination files relative to DIR\n");
576    rprintf(F,"     --copy-dest=DIR         ... and include copies of unchanged files\n");
577    rprintf(F,"     --link-dest=DIR         hardlink to files in DIR when unchanged\n");
578 @@ -577,6 +579,7 @@ static struct poptOption long_options[] = {
579    {"compare-dest",     0,  POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
580    {"copy-dest",        0,  POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
581    {"link-dest",        0,  POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
582 +  {"detect-renamed",   0,  POPT_ARG_NONE,   &detect_renamed, 0, 0, 0 },
583    {"fuzzy",           'y', POPT_ARG_VAL,    &fuzzy_basis, 1, 0, 0 },
584    {"no-fuzzy",         0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
585    {"no-y",             0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
586 @@ -1601,7 +1604,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
587                 inplace = 1;
588         }
589  
590 -       if (delay_updates && !partial_dir)
591 +       if ((delay_updates || detect_renamed) && !partial_dir)
592                 partial_dir = tmp_partialdir;
593  
594         if (inplace) {
595 @@ -1610,6 +1613,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
596                         snprintf(err_buf, sizeof err_buf,
597                                  "--%s cannot be used with --%s\n",
598                                  append_mode ? "append" : "inplace",
599 +                                detect_renamed ? "detect-renamed" :
600                                  delay_updates ? "delay-updates" : "partial-dir");
601                         return 0;
602                 }
603 @@ -1972,6 +1976,8 @@ void server_options(char **args, int *argc_p)
604                         args[ac++] = "--super";
605                 if (size_only)
606                         args[ac++] = "--size-only";
607 +               if (detect_renamed)
608 +                       args[ac++] = "--detect-renamed";
609         } else {
610                 if (skip_compress) {
611                         if (asprintf(&arg, "--skip-compress=%s", skip_compress) < 0)
612 diff --git a/rsync.yo b/rsync.yo
613 --- a/rsync.yo
614 +++ b/rsync.yo
615 @@ -389,6 +389,7 @@ to the detailed description below for a complete description.  verb(
616       --modify-window=NUM     compare mod-times with reduced accuracy
617   -T, --temp-dir=DIR          create temporary files in directory DIR
618   -y, --fuzzy                 find similar file for basis if no dest file
619 +     --detect-renamed        try to find renamed files to speed the xfer
620       --compare-dest=DIR      also compare received files relative to DIR
621       --copy-dest=DIR         ... and include copies of unchanged files
622       --link-dest=DIR         hardlink to files in DIR when unchanged
623 @@ -1561,6 +1562,21 @@ Note that the use of the bf(--delete) option might get rid of any potential
624  fuzzy-match files, so either use bf(--delete-after) or specify some
625  filename exclusions if you need to prevent this.
626  
627 +dit(bf(--detect-renamed)) With this option, for each new source file
628 +(call it em(src/S)), rsync looks for a file em(dest/D) anywhere in the
629 +destination that passes the quick check with em(src/S).  If such a em(dest/D)
630 +is found, rsync uses it as an alternate basis for transferring em(S).  The
631 +idea is that if em(src/S) was renamed from em(src/D) (as opposed to em(src/S)
632 +passing the quick check with em(dest/D) by coincidence), the delta-transfer
633 +algorithm will find that all the data matches between em(src/S) and em(dest/D),
634 +and the transfer will be really fast.
635 +
636 +By default, alternate-basis files are hard-linked into a directory named
637 +".~tmp~" in each file's destination directory, but if you've specified
638 +the bf(--partial-dir) option, that directory will be used instead.  These
639 +potential alternate-basis files will be removed as the transfer progresses.
640 +This option conflicts with bf(--inplace) and bf(--append).
641 +
642  dit(bf(--compare-dest=DIR)) This option instructs rsync to use em(DIR) on
643  the destination machine as an additional hierarchy to compare destination
644  files against doing transfers (if the files are missing in the destination
645 diff --git a/util.c b/util.c
646 --- a/util.c
647 +++ b/util.c
648 @@ -1178,6 +1178,32 @@ int handle_partial_dir(const char *fname, int create)
649         return 1;
650  }
651  
652 +/* We need to supply our own strcmp function for file list comparisons
653 + * to ensure that signed/unsigned usage is consistent between machines. */
654 +int u_strcmp(const char *p1, const char *p2)
655 +{
656 +        for ( ; *p1; p1++, p2++) {
657 +               if (*p1 != *p2)
658 +                       break;
659 +       }
660 +
661 +       return (int)*(uchar*)p1 - (int)*(uchar*)p2;
662 +}
663 +
664 +/* We need a memcmp function compares unsigned-byte values. */
665 +int u_memcmp(const void *p1, const void *p2, size_t len)
666 +{
667 +       const uchar *u1 = p1;
668 +       const uchar *u2 = p2;
669 +
670 +       while (len--) {
671 +               if (*u1 != *u2)
672 +                       return (int)*u1 - (int)*u2;
673 +       }
674 +
675 +       return 0;
676 +}
677 +
678  /* Determine if a symlink points outside the current directory tree.
679   * This is considered "unsafe" because e.g. when mirroring somebody
680   * else's machine it might allow them to establish a symlink to