b1605f3331c486ac351653dd25d213280dd5f490
[rsync-patches.git] / source-filter_dest-filter.diff
1 CAUTION:  This patch compiles, but is otherwise totally untested!
2
3 This patch also implements --times-only.
4
5 Implementation details for the --source-filter and -dest-filter options:
6
7  - These options open a *HUGE* security hole in daemon mode unless they
8    are refused in your rsyncd.conf!
9
10  - Filtering disables rsync alogrithm. (This should be fixed.)
11
12  - Source filter makes temporary files in /tmp. (Should be overridable.)
13
14  - If source filter fails, data is send unfiltered. (Should be changed
15    to abort.)
16
17  - Failure of destination filter, causes data loss!!! (Should be changed
18    to abort.)
19
20  - If filter changes size of file, you should use --times-only option to
21    prevent repeated transfers of unchanged files.
22
23  - If the COMMAND contains single quotes, option-passing breaks.  (Needs
24    to be fixed.)
25
26 To use this patch, run these commands for a successful build:
27
28     patch -p1 <patches/source-filter_dest-filter.diff
29     ./prepare-source
30     ./configure                                (optional if already run)
31     make
32
33 diff --git a/generator.c b/generator.c
34 --- a/generator.c
35 +++ b/generator.c
36 @@ -61,6 +61,7 @@ extern int append_mode;
37  extern int make_backups;
38  extern int csum_length;
39  extern int ignore_times;
40 +extern int times_only;
41  extern int size_only;
42  extern OFF_T max_size;
43  extern OFF_T min_size;
44 @@ -667,7 +668,7 @@ void itemize(const char *fnamecmp, struct file_struct *file, int ndx, int statre
45  /* Perform our quick-check heuristic for determining if a file is unchanged. */
46  int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
47  {
48 -       if (st->st_size != F_LENGTH(file))
49 +       if (!times_only && st->st_size != F_LENGTH(file))
50                 return 0;
51  
52         /* if always checksum is set then we use the checksum instead
53 diff --git a/main.c b/main.c
54 --- a/main.c
55 +++ b/main.c
56 @@ -139,7 +139,7 @@ pid_t wait_process(pid_t pid, int *status_ptr, int flags)
57  }
58  
59  /* Wait for a process to exit, calling io_flush while waiting. */
60 -static void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
61 +void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
62  {
63         pid_t waited_pid;
64         int status;
65 diff --git a/options.c b/options.c
66 --- a/options.c
67 +++ b/options.c
68 @@ -105,6 +105,7 @@ int keep_partial = 0;
69  int safe_symlinks = 0;
70  int copy_unsafe_links = 0;
71  int size_only = 0;
72 +int times_only = 0;
73  int daemon_bwlimit = 0;
74  int bwlimit = 0;
75  int fuzzy_basis = 0;
76 @@ -162,6 +163,8 @@ char *logfile_name = NULL;
77  char *logfile_format = NULL;
78  char *stdout_format = NULL;
79  char *password_file = NULL;
80 +char *source_filter = NULL;
81 +char *dest_filter = NULL;
82  char *rsync_path = RSYNC_PATH;
83  char *backup_dir = NULL;
84  char backup_dir_buf[MAXPATHLEN];
85 @@ -383,6 +386,7 @@ void usage(enum logcode F)
86    rprintf(F,"     --contimeout=SECONDS    set daemon connection timeout in seconds\n");
87    rprintf(F," -I, --ignore-times          don't skip files that match in size and mod-time\n");
88    rprintf(F,"     --size-only             skip files that match in size\n");
89 +  rprintf(F,"     --times-only            skip files that match in mod-time\n");
90    rprintf(F,"     --modify-window=NUM     compare mod-times with reduced accuracy\n");
91    rprintf(F," -T, --temp-dir=DIR          create temporary files in directory DIR\n");
92    rprintf(F," -y, --fuzzy                 find similar file for basis if no dest file\n");
93 @@ -422,6 +426,8 @@ void usage(enum logcode F)
94    rprintf(F,"     --write-batch=FILE      write a batched update to FILE\n");
95    rprintf(F,"     --only-write-batch=FILE like --write-batch but w/o updating destination\n");
96    rprintf(F,"     --read-batch=FILE       read a batched update from FILE\n");
97 +  rprintf(F,"     --source-filter=COMMAND filter file through COMMAND at source\n");
98 +  rprintf(F,"     --dest-filter=COMMAND   filter file through COMMAND at destination\n");
99    rprintf(F,"     --protocol=NUM          force an older protocol version to be used\n");
100  #ifdef ICONV_OPTION
101    rprintf(F,"     --iconv=CONVERT_SPEC    request charset conversion of filenames\n");
102 @@ -523,6 +529,7 @@ static struct poptOption long_options[] = {
103    {"chmod",            0,  POPT_ARG_STRING, 0, OPT_CHMOD, 0, 0 },
104    {"ignore-times",    'I', POPT_ARG_NONE,   &ignore_times, 0, 0, 0 },
105    {"size-only",        0,  POPT_ARG_NONE,   &size_only, 0, 0, 0 },
106 +  {"times-only",       0,  POPT_ARG_NONE,   &times_only , 0, 0, 0 },
107    {"one-file-system", 'x', POPT_ARG_NONE,   0, 'x', 0, 0 },
108    {"update",          'u', POPT_ARG_NONE,   &update_only, 0, 0, 0 },
109    {"existing",         0,  POPT_ARG_NONE,   &ignore_non_existing, 0, 0, 0 },
110 @@ -623,6 +630,8 @@ static struct poptOption long_options[] = {
111    {"password-file",    0,  POPT_ARG_STRING, &password_file, 0, 0, 0 },
112    {"blocking-io",      0,  POPT_ARG_VAL,    &blocking_io, 1, 0, 0 },
113    {"no-blocking-io",   0,  POPT_ARG_VAL,    &blocking_io, 0, 0, 0 },
114 +  {"source-filter",    0,  POPT_ARG_STRING, &source_filter, 0, 0, 0 },
115 +  {"dest-filter",      0,  POPT_ARG_STRING, &dest_filter, 0, 0, 0 },
116    {"protocol",         0,  POPT_ARG_INT,    &protocol_version, 0, 0, 0 },
117    {"checksum-seed",    0,  POPT_ARG_INT,    &checksum_seed, 0, 0, 0 },
118    {"server",           0,  POPT_ARG_NONE,   0, OPT_SERVER, 0, 0 },
119 @@ -1586,6 +1595,16 @@ int parse_arguments(int *argc_p, const char ***argv_p, int frommain)
120                 }
121         }
122  
123 +       if (source_filter || dest_filter) {
124 +               if (whole_file == 0) {
125 +                       snprintf(err_buf, sizeof err_buf,
126 +                                "--no-whole-file cannot be used with --%s-filter\n",
127 +                                source_filter ? "source" : "dest");
128 +                       return 0;
129 +               }
130 +               whole_file = 1;
131 +       }
132 +
133         if (files_from) {
134                 char *h, *p;
135                 int q;
136 @@ -1905,6 +1924,25 @@ void server_options(char **args, int *argc_p)
137                 }
138         }
139  
140 +       if (times_only && am_sender)
141 +               args[ac++] = "--times-only";
142 +
143 +       if (source_filter && !am_sender) {
144 +               /* Need to single quote the arg to keep the remote shell
145 +                * from splitting it.  FIXME: breaks if command has single quotes. */
146 +               if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0)
147 +                       goto oom;
148 +               args[ac++] = arg;
149 +       }
150 +
151 +       if (dest_filter && am_sender) {
152 +               /* Need to single quote the arg to keep the remote shell
153 +                * from splitting it.  FIXME: breaks if command has single quotes. */
154 +               if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0)
155 +                       goto oom;
156 +               args[ac++] = arg;
157 +       }
158 +
159         if (modify_window_set) {
160                 if (asprintf(&arg, "--modify-window=%d", modify_window) < 0)
161                         goto oom;
162 diff --git a/pipe.c b/pipe.c
163 --- a/pipe.c
164 +++ b/pipe.c
165 @@ -167,3 +167,77 @@ pid_t local_child(int argc, char **argv, int *f_in, int *f_out,
166  
167         return pid;
168  }
169 +
170 +pid_t run_filter(char *command[], int out, int *pipe_to_filter)
171 +{
172 +       pid_t pid;
173 +       int pipefds[2];
174 +       
175 +       if (verbose >= 2)
176 +               print_child_argv("opening connection using:", command);
177 +
178 +       if (pipe(pipefds) < 0) {
179 +               rsyserr(FERROR, errno, "pipe");
180 +               exit_cleanup(RERR_IPC);
181 +       }
182 +
183 +       pid = do_fork();
184 +       if (pid == -1) {
185 +               rsyserr(FERROR, errno, "fork");
186 +               exit_cleanup(RERR_IPC);
187 +       }
188 +
189 +       if (pid == 0) {
190 +               if (dup2(pipefds[0], STDIN_FILENO) < 0
191 +                || close(pipefds[1]) < 0
192 +                || dup2(out, STDOUT_FILENO) < 0) {
193 +                       rsyserr(FERROR, errno, "Failed dup/close");
194 +                       exit_cleanup(RERR_IPC);
195 +               }
196 +               umask(orig_umask);
197 +               set_blocking(STDIN_FILENO);
198 +               if (blocking_io)
199 +                       set_blocking(STDOUT_FILENO);
200 +               execvp(command[0], command);
201 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
202 +               exit_cleanup(RERR_IPC);
203 +       }
204 +
205 +       if (close(pipefds[0]) < 0) {
206 +               rsyserr(FERROR, errno, "Failed to close");
207 +               exit_cleanup(RERR_IPC);
208 +       }
209 +
210 +       *pipe_to_filter = pipefds[1];
211 +
212 +       return pid;
213 +}
214 +
215 +pid_t run_filter_on_file(char *command[], int out, int in)
216 +{
217 +       pid_t pid;
218 +       
219 +       if (verbose >= 2)
220 +               print_child_argv("opening connection using:", command);
221 +
222 +       pid = do_fork();
223 +       if (pid == -1) {
224 +               rsyserr(FERROR, errno, "fork");
225 +               exit_cleanup(RERR_IPC);
226 +       }
227 +
228 +       if (pid == 0) {
229 +               if (dup2(in, STDIN_FILENO) < 0
230 +                || dup2(out, STDOUT_FILENO) < 0) {
231 +                       rsyserr(FERROR, errno, "Failed to dup2");
232 +                       exit_cleanup(RERR_IPC);
233 +               }
234 +               if (blocking_io)
235 +                       set_blocking(STDOUT_FILENO);
236 +               execvp(command[0], command);
237 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
238 +               exit_cleanup(RERR_IPC);
239 +       }
240 +
241 +       return pid;
242 +}
243 diff --git a/receiver.c b/receiver.c
244 --- a/receiver.c
245 +++ b/receiver.c
246 @@ -52,6 +52,7 @@ extern int delay_updates;
247  extern mode_t orig_umask;
248  extern struct stats stats;
249  extern char *tmpdir;
250 +extern char *dest_filter;
251  extern char *partial_dir;
252  extern char *basis_dir[];
253  extern struct file_list *cur_flist, *first_flist, *dir_flist;
254 @@ -395,6 +396,8 @@ int recv_files(int f_in, char *local_name)
255         const char *parent_dirname = "";
256  #endif
257         int ndx, recv_ok;
258 +       pid_t pid = 0;
259 +       char *filter_argv[MAX_FILTER_ARGS + 1];
260  
261         if (verbose > 2)
262                 rprintf(FINFO, "recv_files(%d) starting\n", cur_flist->used);
263 @@ -402,6 +405,23 @@ int recv_files(int f_in, char *local_name)
264         if (delay_updates)
265                 delayed_bits = bitbag_create(cur_flist->used + 1);
266  
267 +       if (dest_filter) {
268 +               char *p;
269 +               char *sep = " \t";
270 +               int i;
271 +               for (p = strtok(dest_filter, sep), i = 0;
272 +                    p && i < MAX_FILTER_ARGS;
273 +                    p = strtok(0, sep))
274 +                       filter_argv[i++] = p;
275 +               filter_argv[i] = NULL;
276 +               if (p) {
277 +                       rprintf(FERROR,
278 +                               "Too many arguments to dest-filter (> %d)\n",
279 +                               MAX_FILTER_ARGS);
280 +                       exit_cleanup(RERR_SYNTAX);
281 +               }
282 +       }
283 +
284         while (1) {
285                 cleanup_disable();
286  
287 @@ -674,6 +694,9 @@ int recv_files(int f_in, char *local_name)
288                 else if (!am_server && verbose && do_progress)
289                         rprintf(FINFO, "%s\n", fname);
290  
291 +               if (dest_filter)
292 +                       pid = run_filter(filter_argv, fd2, &fd2);
293 +
294                 /* recv file data */
295                 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
296                                        fname, fd2, F_LENGTH(file));
297 @@ -688,6 +711,16 @@ int recv_files(int f_in, char *local_name)
298                         exit_cleanup(RERR_FILEIO);
299                 }
300  
301 +               if (dest_filter) {
302 +                       int status;
303 +                       wait_process_with_flush(pid, &status);
304 +                       if (status != 0) {
305 +                               rprintf(FERROR, "filter %s exited code: %d\n",
306 +                                       dest_filter, status);
307 +                               continue;
308 +                       }
309 +               }
310 +
311                 if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
312                         if (partialptr == fname)
313                                 partialptr = NULL;
314 diff --git a/rsync.h b/rsync.h
315 --- a/rsync.h
316 +++ b/rsync.h
317 @@ -132,6 +132,7 @@
318  #define IOERR_DEL_LIMIT (1<<2)
319  
320  #define MAX_ARGS 1000
321 +#define MAX_FILTER_ARGS 100
322  #define MAX_BASIS_DIRS 20
323  #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100)
324  
325 diff --git a/rsync.yo b/rsync.yo
326 --- a/rsync.yo
327 +++ b/rsync.yo
328 @@ -382,6 +382,7 @@ to the detailed description below for a complete description.  verb(
329       --contimeout=SECONDS    set daemon connection timeout in seconds
330   -I, --ignore-times          don't skip files that match size and time
331       --size-only             skip files that match in size
332 +     --times-only            skip files that match in mod-time
333       --modify-window=NUM     compare mod-times with reduced accuracy
334   -T, --temp-dir=DIR          create temporary files in directory DIR
335   -y, --fuzzy                 find similar file for basis if no dest file
336 @@ -421,6 +422,8 @@ to the detailed description below for a complete description.  verb(
337       --write-batch=FILE      write a batched update to FILE
338       --only-write-batch=FILE like --write-batch but w/o updating dest
339       --read-batch=FILE       read a batched update from FILE
340 +     --source-filter=COMMAND filter file through COMMAND at source
341 +     --dest-filter=COMMAND   filter file through COMMAND at destination
342       --protocol=NUM          force an older protocol version to be used
343       --iconv=CONVERT_SPEC    request charset conversion of filenames
344       --checksum-seed=NUM     set block/file checksum seed (advanced)
345 @@ -2021,6 +2024,33 @@ file previously generated by bf(--write-batch).
346  If em(FILE) is bf(-), the batch data will be read from standard input.
347  See the "BATCH MODE" section for details.
348  
349 +dit(bf(--source-filter=COMMAND)) This option allows the user to specify a
350 +filter program that will be applied to the contents of all transferred
351 +regular files before the data is sent to destination.  COMMAND will receive
352 +the data on its standard input and it should write the filtered data to
353 +standard output.  COMMAND should exit non-zero if it cannot process the
354 +data or if it encounters an error when writing the data to stdout.
355 +
356 +Example: --source-filter="gzip -9" will cause remote files to be
357 +compressed.
358 +Use of --source-filter automatically enables --whole-file.
359 +If your filter does not output the same number of bytes that it received on
360 +input, you should use --times-only to disable size and content checks on
361 +subsequent rsync runs.
362 +
363 +dit(bf(--dest-filter=COMMAND)) This option allows you to specify a filter
364 +program that will be applied to the contents of all transferred regular
365 +files before the data is written to disk.  COMMAND will receive the data on
366 +its standard input and it should write the filtered data to standard
367 +output.  COMMAND should exit non-zero if it cannot process the data or if
368 +it encounters an error when writing the data to stdout.
369 +
370 +Example: --dest-filter="gzip -9" will cause remote files to be compressed.
371 +Use of --dest-filter automatically enables --whole-file.
372 +If your filter does not output the same number of bytes that it
373 +received on input, you should use --times-only to disable size and
374 +content checks on subsequent rsync runs.
375 +
376  dit(bf(--protocol=NUM)) Force an older protocol version to be used.  This
377  is useful for creating a batch file that is compatible with an older
378  version of rsync.  For instance, if rsync 2.6.4 is being used with the
379 diff --git a/sender.c b/sender.c
380 --- a/sender.c
381 +++ b/sender.c
382 @@ -43,6 +43,7 @@ extern int do_progress;
383  extern int inplace;
384  extern int batch_fd;
385  extern int write_batch;
386 +extern char *source_filter;
387  extern struct stats stats;
388  extern struct file_list *cur_flist, *first_flist, *dir_flist;
389  
390 @@ -181,6 +182,26 @@ void send_files(int f_in, int f_out)
391         enum logcode log_code = log_before_transfer ? FLOG : FINFO;
392         int f_xfer = write_batch < 0 ? batch_fd : f_out;
393         int ndx, j;
394 +       char *filter_argv[MAX_FILTER_ARGS + 1];
395 +       char *tmp = 0;
396 +       int unlink_tmp = 0;
397 +
398 +       if (source_filter) {
399 +               char *p;
400 +               char *sep = " \t";
401 +               int i;
402 +               for (p = strtok(source_filter, sep), i = 0;
403 +                    p && i < MAX_FILTER_ARGS;
404 +                    p = strtok(0, sep))
405 +                       filter_argv[i++] = p;
406 +               filter_argv[i] = NULL;
407 +               if (p) {
408 +                       rprintf(FERROR,
409 +                               "Too many arguments to source-filter (> %d)\n",
410 +                               MAX_FILTER_ARGS);
411 +                       exit_cleanup(RERR_SYNTAX);
412 +               }
413 +       }
414  
415         if (verbose > 2)
416                 rprintf(FINFO, "send_files starting\n");
417 @@ -285,6 +306,7 @@ void send_files(int f_in, int f_out)
418                         exit_cleanup(RERR_PROTOCOL);
419                 }
420  
421 +               unlink_tmp = 0;
422                 fd = do_open(fname, O_RDONLY, 0);
423                 if (fd == -1) {
424                         if (errno == ENOENT) {
425 @@ -306,6 +328,33 @@ void send_files(int f_in, int f_out)
426                         continue;
427                 }
428  
429 +               if (source_filter) {
430 +                       int fd2;
431 +                       char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
432 +
433 +                       tmp = strdup(tmpl);
434 +                       fd2 = mkstemp(tmp);
435 +                       if (fd2 == -1) {
436 +                               rprintf(FERROR, "mkstemp %s failed: %s\n",
437 +                                       tmp, strerror(errno));
438 +                       } else {
439 +                               int status;
440 +                               pid_t pid = run_filter_on_file(filter_argv, fd2, fd);
441 +                               close(fd);
442 +                               close(fd2);
443 +                               wait_process_with_flush(pid, &status);
444 +                               if (status != 0) {
445 +                                       rprintf(FERROR,
446 +                                           "bypassing source filter %s; exited with code: %d\n",
447 +                                           source_filter, status);
448 +                                       fd = do_open(fname, O_RDONLY, 0);
449 +                               } else {
450 +                                       fd = do_open(tmp, O_RDONLY, 0);
451 +                                       unlink_tmp = 1;
452 +                               }
453 +                       }
454 +               }
455 +
456                 /* map the local file */
457                 if (do_fstat(fd, &st) != 0) {
458                         io_error |= IOERR_GENERAL;
459 @@ -356,6 +405,8 @@ void send_files(int f_in, int f_out)
460                         }
461                 }
462                 close(fd);
463 +               if (unlink_tmp)
464 +                       unlink(tmp);
465  
466                 free_sums(s);
467