Moved one fsync() call.
[rsync-patches.git] / source-filter_dest-filter.diff
1 CAUTION:  This patch compiles, but is otherwise totally untested!
2
3 This patch also implements --times-only.
4
5 Implementation details for the --source-filter and -dest-filter options:
6
7  - These options open a *HUGE* security hole in daemon mode unless they
8    are refused in your rsyncd.conf!
9
10  - Filtering disables rsync alogrithm. (This should be fixed.)
11
12  - Source filter makes temporary files in /tmp. (Should be overridable.)
13
14  - If source filter fails, data is send unfiltered. (Should be changed
15    to abort.)
16
17  - Failure of destination filter, causes data loss!!! (Should be changed
18    to abort.)
19
20  - If filter changes size of file, you should use --times-only option to
21    prevent repeated transfers of unchanged files.
22
23  - If the COMMAND contains single quotes, option-passing breaks.  (Needs
24    to be fixed.)
25
26 To use this patch, run these commands for a successful build:
27
28     patch -p1 <patches/source-filter_dest-filter.diff
29     ./prepare-source
30     ./configure                                (optional if already run)
31     make
32
33 --- old/generator.c
34 +++ new/generator.c
35 @@ -61,6 +61,7 @@ extern int append_mode;
36  extern int make_backups;
37  extern int csum_length;
38  extern int ignore_times;
39 +extern int times_only;
40  extern int size_only;
41  extern OFF_T max_size;
42  extern OFF_T min_size;
43 @@ -571,7 +572,7 @@ void itemize(struct file_struct *file, i
44  /* Perform our quick-check heuristic for determining if a file is unchanged. */
45  int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
46  {
47 -       if (st->st_size != F_LENGTH(file))
48 +       if (!times_only && st->st_size != F_LENGTH(file))
49                 return 0;
50  
51         /* if always checksum is set then we use the checksum instead
52 --- old/main.c
53 +++ new/main.c
54 @@ -128,7 +128,7 @@ pid_t wait_process(pid_t pid, int *statu
55  }
56  
57  /* Wait for a process to exit, calling io_flush while waiting. */
58 -static void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
59 +void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
60  {
61         pid_t waited_pid;
62         int status;
63 --- old/options.c
64 +++ new/options.c
65 @@ -99,6 +99,7 @@ int keep_partial = 0;
66  int safe_symlinks = 0;
67  int copy_unsafe_links = 0;
68  int size_only = 0;
69 +int times_only = 0;
70  int daemon_bwlimit = 0;
71  int bwlimit = 0;
72  int fuzzy_basis = 0;
73 @@ -150,6 +151,8 @@ char *logfile_name = NULL;
74  char *logfile_format = NULL;
75  char *stdout_format = NULL;
76  char *password_file = NULL;
77 +char *source_filter = NULL;
78 +char *dest_filter = NULL;
79  char *rsync_path = RSYNC_PATH;
80  char *backup_dir = NULL;
81  char backup_dir_buf[MAXPATHLEN];
82 @@ -340,6 +343,7 @@ void usage(enum logcode F)
83    rprintf(F,"     --timeout=TIME          set I/O timeout in seconds\n");
84    rprintf(F," -I, --ignore-times          don't skip files that match in size and mod-time\n");
85    rprintf(F,"     --size-only             skip files that match in size\n");
86 +  rprintf(F,"     --times-only            skip files that match in mod-time\n");
87    rprintf(F,"     --modify-window=NUM     compare mod-times with reduced accuracy\n");
88    rprintf(F," -T, --temp-dir=DIR          create temporary files in directory DIR\n");
89    rprintf(F," -y, --fuzzy                 find similar file for basis if no dest file\n");
90 @@ -377,6 +381,8 @@ void usage(enum logcode F)
91    rprintf(F,"     --write-batch=FILE      write a batched update to FILE\n");
92    rprintf(F,"     --only-write-batch=FILE like --write-batch but w/o updating destination\n");
93    rprintf(F,"     --read-batch=FILE       read a batched update from FILE\n");
94 +  rprintf(F,"     --source-filter=COMMAND filter file through COMMAND at source\n");
95 +  rprintf(F,"     --dest-filter=COMMAND   filter file through COMMAND at destination\n");
96    rprintf(F,"     --protocol=NUM          force an older protocol version to be used\n");
97  #ifdef INET6
98    rprintf(F," -4, --ipv4                  prefer IPv4\n");
99 @@ -460,6 +466,7 @@ static struct poptOption long_options[] 
100    {"chmod",            0,  POPT_ARG_STRING, 0, OPT_CHMOD, 0, 0 },
101    {"ignore-times",    'I', POPT_ARG_NONE,   &ignore_times, 0, 0, 0 },
102    {"size-only",        0,  POPT_ARG_NONE,   &size_only, 0, 0, 0 },
103 +  {"times-only",       0,  POPT_ARG_NONE,   &times_only , 0, 0, 0 },
104    {"one-file-system", 'x', POPT_ARG_NONE,   0, 'x', 0, 0 },
105    {"update",          'u', POPT_ARG_NONE,   &update_only, 0, 0, 0 },
106    {"existing",         0,  POPT_ARG_NONE,   &ignore_non_existing, 0, 0, 0 },
107 @@ -539,6 +546,8 @@ static struct poptOption long_options[] 
108    {"password-file",    0,  POPT_ARG_STRING, &password_file, 0, 0, 0 },
109    {"blocking-io",      0,  POPT_ARG_VAL,    &blocking_io, 1, 0, 0 },
110    {"no-blocking-io",   0,  POPT_ARG_VAL,    &blocking_io, 0, 0, 0 },
111 +  {"source-filter",    0,  POPT_ARG_STRING, &source_filter, 0, 0, 0 },
112 +  {"dest-filter",      0,  POPT_ARG_STRING, &dest_filter, 0, 0, 0 },
113    {"protocol",         0,  POPT_ARG_INT,    &protocol_version, 0, 0, 0 },
114    {"checksum-seed",    0,  POPT_ARG_INT,    &checksum_seed, 0, 0, 0 },
115    {"server",           0,  POPT_ARG_NONE,   0, OPT_SERVER, 0, 0 },
116 @@ -1416,6 +1425,16 @@ int parse_arguments(int *argc, const cha
117                 }
118         }
119  
120 +       if (source_filter || dest_filter) {
121 +               if (whole_file == 0) {
122 +                       snprintf(err_buf, sizeof err_buf,
123 +                                "--no-whole-file cannot be used with --%s-filter\n",
124 +                                source_filter ? "source" : "dest");
125 +                       return 0;
126 +               }
127 +               whole_file = 1;
128 +       }
129 +
130         if (files_from) {
131                 char *h, *p;
132                 int q;
133 @@ -1692,6 +1711,25 @@ void server_options(char **args,int *arg
134                         args[ac++] = "--size-only";
135         }
136  
137 +       if (times_only && am_sender)
138 +               args[ac++] = "--times-only";
139 +
140 +       if (source_filter && !am_sender) {
141 +               /* Need to single quote the arg to keep the remote shell
142 +                * from splitting it.  FIXME: breaks if command has single quotes. */
143 +               if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0)
144 +                       goto oom;
145 +               args[ac++] = arg;
146 +       }
147 +
148 +       if (dest_filter && am_sender) {
149 +               /* Need to single quote the arg to keep the remote shell
150 +                * from splitting it.  FIXME: breaks if command has single quotes. */
151 +               if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0)
152 +                       goto oom;
153 +               args[ac++] = arg;
154 +       }
155 +
156         if (modify_window_set) {
157                 if (asprintf(&arg, "--modify-window=%d", modify_window) < 0)
158                         goto oom;
159 --- old/pipe.c
160 +++ new/pipe.c
161 @@ -165,3 +165,77 @@ pid_t local_child(int argc, char **argv,
162  
163         return pid;
164  }
165 +
166 +pid_t run_filter(char *command[], int out, int *pipe_to_filter)
167 +{
168 +       pid_t pid;
169 +       int pipefds[2];
170 +       
171 +       if (verbose >= 2)
172 +               print_child_argv(command);
173 +
174 +       if (pipe(pipefds) < 0) {
175 +               rsyserr(FERROR, errno, "pipe");
176 +               exit_cleanup(RERR_IPC);
177 +       }
178 +
179 +       pid = do_fork();
180 +       if (pid == -1) {
181 +               rsyserr(FERROR, errno, "fork");
182 +               exit_cleanup(RERR_IPC);
183 +       }
184 +
185 +       if (pid == 0) {
186 +               if (dup2(pipefds[0], STDIN_FILENO) < 0
187 +                || close(pipefds[1]) < 0
188 +                || dup2(out, STDOUT_FILENO) < 0) {
189 +                       rsyserr(FERROR, errno, "Failed dup/close");
190 +                       exit_cleanup(RERR_IPC);
191 +               }
192 +               umask(orig_umask);
193 +               set_blocking(STDIN_FILENO);
194 +               if (blocking_io)
195 +                       set_blocking(STDOUT_FILENO);
196 +               execvp(command[0], command);
197 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
198 +               exit_cleanup(RERR_IPC);
199 +       }
200 +
201 +       if (close(pipefds[0]) < 0) {
202 +               rsyserr(FERROR, errno, "Failed to close");
203 +               exit_cleanup(RERR_IPC);
204 +       }
205 +
206 +       *pipe_to_filter = pipefds[1];
207 +
208 +       return pid;
209 +}
210 +
211 +pid_t run_filter_on_file(char *command[], int out, int in)
212 +{
213 +       pid_t pid;
214 +       
215 +       if (verbose >= 2)
216 +               print_child_argv(command);
217 +
218 +       pid = do_fork();
219 +       if (pid == -1) {
220 +               rsyserr(FERROR, errno, "fork");
221 +               exit_cleanup(RERR_IPC);
222 +       }
223 +
224 +       if (pid == 0) {
225 +               if (dup2(in, STDIN_FILENO) < 0
226 +                || dup2(out, STDOUT_FILENO) < 0) {
227 +                       rsyserr(FERROR, errno, "Failed to dup2");
228 +                       exit_cleanup(RERR_IPC);
229 +               }
230 +               if (blocking_io)
231 +                       set_blocking(STDOUT_FILENO);
232 +               execvp(command[0], command);
233 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
234 +               exit_cleanup(RERR_IPC);
235 +       }
236 +
237 +       return pid;
238 +}
239 --- old/receiver.c
240 +++ new/receiver.c
241 @@ -52,6 +52,7 @@ extern struct stats stats;
242  extern char *tmpdir;
243  extern char *partial_dir;
244  extern char *basis_dir[];
245 +extern char *dest_filter;
246  extern struct file_list *cur_flist, *first_flist;
247  extern struct filter_list_struct server_filter_list;
248  
249 @@ -349,6 +350,8 @@ int recv_files(int f_in, char *local_nam
250         enum logcode log_code = log_before_transfer ? FLOG : FINFO;
251         int max_phase = protocol_version >= 29 ? 2 : 1;
252         int ndx, recv_ok;
253 +       pid_t pid = 0;
254 +       char *filter_argv[MAX_FILTER_ARGS + 1];
255  
256         if (verbose > 2)
257                 rprintf(FINFO, "recv_files(%d) starting\n", cur_flist->count);
258 @@ -358,6 +361,23 @@ int recv_files(int f_in, char *local_nam
259  
260         updating_basis = inplace;
261  
262 +       if (dest_filter) {
263 +               char *p;
264 +               char *sep = " \t";
265 +               int i;
266 +               for (p = strtok(dest_filter, sep), i = 0;
267 +                    p && i < MAX_FILTER_ARGS;
268 +                    p = strtok(0, sep))
269 +                       filter_argv[i++] = p;
270 +               filter_argv[i] = NULL;
271 +               if (p) {
272 +                       rprintf(FERROR,
273 +                               "Too many arguments to dest-filter (> %d)\n",
274 +                               MAX_FILTER_ARGS);
275 +                       exit_cleanup(RERR_SYNTAX);
276 +               }
277 +       }
278 +
279         while (1) {
280                 cleanup_disable();
281  
282 @@ -620,6 +640,9 @@ int recv_files(int f_in, char *local_nam
283                 else if (!am_server && verbose && do_progress)
284                         rprintf(FINFO, "%s\n", fname);
285  
286 +               if (dest_filter)
287 +                       pid = run_filter(filter_argv, fd2, &fd2);
288 +
289                 /* recv file data */
290                 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
291                                        fname, fd2, F_LENGTH(file));
292 @@ -634,6 +657,16 @@ int recv_files(int f_in, char *local_nam
293                         exit_cleanup(RERR_FILEIO);
294                 }
295  
296 +               if (dest_filter) {
297 +                       int status;
298 +                       wait_process_with_flush(pid, &status);
299 +                       if (status != 0) {
300 +                               rprintf(FERROR, "filter %s exited code: %d\n",
301 +                                       dest_filter, status);
302 +                               continue;
303 +                       }
304 +               }
305 +
306                 if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
307                         char *temp_copy_name;
308                         if (partialptr == fname)
309 --- old/rsync.h
310 +++ new/rsync.h
311 @@ -119,6 +119,7 @@
312  #define IOERR_DEL_LIMIT (1<<2)
313  
314  #define MAX_ARGS 1000
315 +#define MAX_FILTER_ARGS 100
316  #define MAX_BASIS_DIRS 20
317  #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100)
318  
319 --- old/rsync.yo
320 +++ new/rsync.yo
321 @@ -361,6 +361,7 @@ to the detailed description below for a 
322       --timeout=TIME          set I/O timeout in seconds
323   -I, --ignore-times          don't skip files that match size and time
324       --size-only             skip files that match in size
325 +     --times-only            skip files that match in mod-time
326       --modify-window=NUM     compare mod-times with reduced accuracy
327   -T, --temp-dir=DIR          create temporary files in directory DIR
328   -y, --fuzzy                 find similar file for basis if no dest file
329 @@ -398,6 +399,8 @@ to the detailed description below for a 
330       --write-batch=FILE      write a batched update to FILE
331       --only-write-batch=FILE like --write-batch but w/o updating dest
332       --read-batch=FILE       read a batched update from FILE
333 +     --source-filter=COMMAND filter file through COMMAND at source
334 +     --dest-filter=COMMAND   filter file through COMMAND at destination
335       --protocol=NUM          force an older protocol version to be used
336       --checksum-seed=NUM     set block/file checksum seed (advanced)
337   -4, --ipv4                  prefer IPv4
338 @@ -1804,6 +1807,33 @@ file previously generated by bf(--write-
339  If em(FILE) is bf(-), the batch data will be read from standard input.
340  See the "BATCH MODE" section for details.
341  
342 +dit(bf(--source-filter=COMMAND)) This option allows the user to specify a
343 +filter program that will be applied to the contents of all transferred
344 +regular files before the data is sent to destination.  COMMAND will receive
345 +the data on its standard input and it should write the filtered data to
346 +standard output.  COMMAND should exit non-zero if it cannot process the
347 +data or if it encounters an error when writing the data to stdout.
348 +
349 +Example: --source-filter="gzip -9" will cause remote files to be
350 +compressed.
351 +Use of --source-filter automatically enables --whole-file.
352 +If your filter does not output the same number of bytes that it received on
353 +input, you should use --times-only to disable size and content checks on
354 +subsequent rsync runs.
355 +
356 +dit(bf(--dest-filter=COMMAND)) This option allows you to specify a filter
357 +program that will be applied to the contents of all transferred regular
358 +files before the data is written to disk.  COMMAND will receive the data on
359 +its standard input and it should write the filtered data to standard
360 +output.  COMMAND should exit non-zero if it cannot process the data or if
361 +it encounters an error when writing the data to stdout.
362 +
363 +Example: --dest-filter="gzip -9" will cause remote files to be compressed.
364 +Use of --dest-filter automatically enables --whole-file.
365 +If your filter does not output the same number of bytes that it
366 +received on input, you should use --times-only to disable size and
367 +content checks on subsequent rsync runs.
368 +
369  dit(bf(--protocol=NUM)) Force an older protocol version to be used.  This
370  is useful for creating a batch file that is compatible with an older
371  version of rsync.  For instance, if rsync 2.6.4 is being used with the
372 --- old/sender.c
373 +++ new/sender.c
374 @@ -42,6 +42,7 @@ extern int do_progress;
375  extern int inplace;
376  extern int batch_fd;
377  extern int write_batch;
378 +extern char *source_filter;
379  extern struct stats stats;
380  extern struct file_list *cur_flist, *first_flist;
381  
382 @@ -175,6 +176,26 @@ void send_files(int f_in, int f_out)
383         enum logcode log_code = log_before_transfer ? FLOG : FINFO;
384         int f_xfer = write_batch < 0 ? batch_fd : f_out;
385         int ndx, j;
386 +       char *filter_argv[MAX_FILTER_ARGS + 1];
387 +       char *tmp = 0;
388 +       int unlink_tmp = 0;
389 +
390 +       if (source_filter) {
391 +               char *p;
392 +               char *sep = " \t";
393 +               int i;
394 +               for (p = strtok(source_filter, sep), i = 0;
395 +                    p && i < MAX_FILTER_ARGS;
396 +                    p = strtok(0, sep))
397 +                       filter_argv[i++] = p;
398 +               filter_argv[i] = NULL;
399 +               if (p) {
400 +                       rprintf(FERROR,
401 +                               "Too many arguments to source-filter (> %d)\n",
402 +                               MAX_FILTER_ARGS);
403 +                       exit_cleanup(RERR_SYNTAX);
404 +               }
405 +       }
406  
407         if (verbose > 2)
408                 rprintf(FINFO, "send_files starting\n");
409 @@ -265,6 +286,7 @@ void send_files(int f_in, int f_out)
410                         exit_cleanup(RERR_PROTOCOL);
411                 }
412  
413 +               unlink_tmp = 0;
414                 fd = do_open(fname, O_RDONLY, 0);
415                 if (fd == -1) {
416                         if (errno == ENOENT) {
417 @@ -295,6 +317,33 @@ void send_files(int f_in, int f_out)
418                         exit_cleanup(RERR_PROTOCOL);
419                 }
420  
421 +               if (source_filter) {
422 +                       int fd2;
423 +                       char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
424 +
425 +                       tmp = strdup(tmpl);
426 +                       fd2 = mkstemp(tmp);
427 +                       if (fd2 == -1) {
428 +                               rprintf(FERROR, "mkstemp %s failed: %s\n",
429 +                                       tmp, strerror(errno));
430 +                       } else {
431 +                               int status;
432 +                               pid_t pid = run_filter_on_file(filter_argv, fd2, fd);
433 +                               close(fd);
434 +                               close(fd2);
435 +                               wait_process_with_flush(pid, &status);
436 +                               if (status != 0) {
437 +                                       rprintf(FERROR,
438 +                                           "bypassing source filter %s; exited with code: %d\n",
439 +                                           source_filter, status);
440 +                                       fd = do_open(fname, O_RDONLY, 0);
441 +                               } else {
442 +                                       fd = do_open(tmp, O_RDONLY, 0);
443 +                                       unlink_tmp = 1;
444 +                               }
445 +                       }
446 +               }
447 +
448                 if (st.st_size) {
449                         int32 read_size = MAX(s->blength * 3, MAX_MAP_SIZE);
450                         mbuf = map_file(fd, st.st_size, read_size, s->blength);
451 @@ -336,6 +385,8 @@ void send_files(int f_in, int f_out)
452                         }
453                 }
454                 close(fd);
455 +               if (unlink_tmp)
456 +                       unlink(tmp);
457  
458                 free_sums(s);
459