Changed the style of the diff headers (use "patch -p1" now).
[rsync-patches.git] / source-filter_dest-filter.diff
1 CAUTION:  This patch compiles, but is otherwise totally untested!
2
3 This patch also implements --times-only.
4
5 Implementation details for the --source-filter and -dest-filter options:
6
7  - These options open a *HUGE* security hole in daemon mode unless they
8    are refused in your rsyncd.conf!
9
10  - Filtering disables rsync alogrithm. (This should be fixed.)
11
12  - Source filter makes temporary files in /tmp. (Should be overridable.)
13
14  - If source filter fails, data is send unfiltered. (Should be changed
15    to abort.)
16
17  - Failure of destination filter, causes data loss!!! (Should be changed
18    to abort.)
19
20  - If filter changes size of file, you should use --times-only option to
21    prevent repeated transfers of unchanged files.
22
23  - If the COMMAND contains single quotes, option-passing breaks.  (Needs
24    to be fixed.)
25
26 After applying this patch, run these commands for a successful build:
27
28     ./prepare-source
29     ./configure                      (optional if already run)
30     make
31
32 --- old/generator.c
33 +++ new/generator.c
34 @@ -61,6 +61,7 @@ extern int append_mode;
35  extern int make_backups;
36  extern int csum_length;
37  extern int ignore_times;
38 +extern int times_only;
39  extern int size_only;
40  extern OFF_T max_size;
41  extern OFF_T min_size;
42 @@ -379,7 +380,7 @@ void itemize(struct file_struct *file, i
43  /* Perform our quick-check heuristic for determining if a file is unchanged. */
44  int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
45  {
46 -       if (st->st_size != file->length)
47 +       if (!times_only && st->st_size != file->length)
48                 return 0;
49  
50         /* if always checksum is set then we use the checksum instead
51 --- old/options.c
52 +++ new/options.c
53 @@ -98,6 +98,7 @@ int keep_partial = 0;
54  int safe_symlinks = 0;
55  int copy_unsafe_links = 0;
56  int size_only = 0;
57 +int times_only = 0;
58  int daemon_bwlimit = 0;
59  int bwlimit = 0;
60  int fuzzy_basis = 0;
61 @@ -147,6 +148,8 @@ char *basis_dir[MAX_BASIS_DIRS+1];
62  char *config_file = NULL;
63  char *shell_cmd = NULL;
64  char *log_format = NULL;
65 +char *source_filter = NULL;
66 +char *dest_filter = NULL;
67  char *password_file = NULL;
68  char *rsync_path = RSYNC_PATH;
69  char *backup_dir = NULL;
70 @@ -331,6 +334,7 @@ void usage(enum logcode F)
71    rprintf(F,"     --timeout=TIME          set I/O timeout in seconds\n");
72    rprintf(F," -I, --ignore-times          don't skip files that match in size and mod-time\n");
73    rprintf(F,"     --size-only             skip files that match in size\n");
74 +  rprintf(F,"     --times-only            skip files that match in mod-time\n");
75    rprintf(F,"     --modify-window=NUM     compare mod-times with reduced accuracy\n");
76    rprintf(F," -T, --temp-dir=DIR          create temporary files in directory DIR\n");
77    rprintf(F," -y, --fuzzy                 find similar file for basis if no dest file\n");
78 @@ -366,6 +370,8 @@ void usage(enum logcode F)
79    rprintf(F,"     --write-batch=FILE      write a batched update to FILE\n");
80    rprintf(F,"     --only-write-batch=FILE like --write-batch but w/o updating destination\n");
81    rprintf(F,"     --read-batch=FILE       read a batched update from FILE\n");
82 +  rprintf(F,"     --source-filter=COMMAND filter file through COMMAND at source\n");
83 +  rprintf(F,"     --dest-filter=COMMAND   filter file through COMMAND at destination\n");
84    rprintf(F,"     --protocol=NUM          force an older protocol version to be used\n");
85  #ifdef INET6
86    rprintf(F," -4, --ipv4                  prefer IPv4\n");
87 @@ -443,6 +449,7 @@ static struct poptOption long_options[] 
88    {"chmod",            0,  POPT_ARG_STRING, 0, OPT_CHMOD, 0, 0 },
89    {"ignore-times",    'I', POPT_ARG_NONE,   &ignore_times, 0, 0, 0 },
90    {"size-only",        0,  POPT_ARG_NONE,   &size_only, 0, 0, 0 },
91 +  {"times-only",       0,  POPT_ARG_NONE,   &times_only , 0, 0, 0 },
92    {"one-file-system", 'x', POPT_ARG_NONE,   0, 'x', 0, 0 },
93    {"update",          'u', POPT_ARG_NONE,   &update_only, 0, 0, 0 },
94    {"existing",         0,  POPT_ARG_NONE,   &ignore_non_existing, 0, 0, 0 },
95 @@ -516,6 +523,8 @@ static struct poptOption long_options[] 
96    {"password-file",    0,  POPT_ARG_STRING, &password_file, 0, 0, 0 },
97    {"blocking-io",      0,  POPT_ARG_VAL,    &blocking_io, 1, 0, 0 },
98    {"no-blocking-io",   0,  POPT_ARG_VAL,    &blocking_io, 0, 0, 0 },
99 +  {"source-filter",    0,  POPT_ARG_STRING, &source_filter, 0, 0, 0 },
100 +  {"dest-filter",      0,  POPT_ARG_STRING, &dest_filter, 0, 0, 0 },
101    {"protocol",         0,  POPT_ARG_INT,    &protocol_version, 0, 0, 0 },
102    {"checksum-seed",    0,  POPT_ARG_INT,    &checksum_seed, 0, 0, 0 },
103    {"server",           0,  POPT_ARG_NONE,   0, OPT_SERVER, 0, 0 },
104 @@ -1380,6 +1389,16 @@ int parse_arguments(int *argc, const cha
105                 }
106         }
107  
108 +       if (source_filter || dest_filter) {
109 +               if (whole_file == 0) {
110 +                       snprintf(err_buf, sizeof err_buf,
111 +                                "--no-whole-file cannot be used with --%s-filter\n",
112 +                                source_filter ? "source" : "dest");
113 +                       return 0;
114 +               }
115 +               whole_file = 1;
116 +       }
117 +
118         if (files_from) {
119                 char *h, *p;
120                 int q;
121 @@ -1640,6 +1659,25 @@ void server_options(char **args,int *arg
122                         args[ac++] = "--super";
123         }
124  
125 +       if (times_only && am_sender)
126 +               args[ac++] = "--times-only";
127 +
128 +       if (source_filter && !am_sender) {
129 +               /* Need to single quote the arg to keep the remote shell
130 +                * from splitting it.  FIXME: breaks if command has single quotes. */
131 +               if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0)
132 +                       goto oom;
133 +               args[ac++] = arg;
134 +       }
135 +
136 +       if (dest_filter && am_sender) {
137 +               /* Need to single quote the arg to keep the remote shell
138 +                * from splitting it.  FIXME: breaks if command has single quotes. */
139 +               if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0)
140 +                       goto oom;
141 +               args[ac++] = arg;
142 +       }
143 +
144         if (size_only)
145                 args[ac++] = "--size-only";
146  
147 --- old/pipe.c
148 +++ new/pipe.c
149 @@ -157,3 +157,77 @@ pid_t local_child(int argc, char **argv,
150  
151         return pid;
152  }
153 +
154 +pid_t run_filter(char *command[], int out, int *pipe_to_filter)
155 +{
156 +       pid_t pid;
157 +       int pipefds[2];
158 +       
159 +       if (verbose >= 2)
160 +               print_child_argv(command);
161 +
162 +       if (pipe(pipefds) < 0) {
163 +               rsyserr(FERROR, errno, "pipe");
164 +               exit_cleanup(RERR_IPC);
165 +       }
166 +
167 +       pid = do_fork();
168 +       if (pid == -1) {
169 +               rsyserr(FERROR, errno, "fork");
170 +               exit_cleanup(RERR_IPC);
171 +       }
172 +
173 +       if (pid == 0) {
174 +               if (dup2(pipefds[0], STDIN_FILENO) < 0
175 +                || close(pipefds[1]) < 0
176 +                || dup2(out, STDOUT_FILENO) < 0) {
177 +                       rsyserr(FERROR, errno, "Failed dup/close");
178 +                       exit_cleanup(RERR_IPC);
179 +               }
180 +               umask(orig_umask);
181 +               set_blocking(STDIN_FILENO);
182 +               if (blocking_io)
183 +                       set_blocking(STDOUT_FILENO);
184 +               execvp(command[0], command);
185 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
186 +               exit_cleanup(RERR_IPC);
187 +       }
188 +
189 +       if (close(pipefds[0]) < 0) {
190 +               rsyserr(FERROR, errno, "Failed to close");
191 +               exit_cleanup(RERR_IPC);
192 +       }
193 +
194 +       *pipe_to_filter = pipefds[1];
195 +
196 +       return pid;
197 +}
198 +
199 +pid_t run_filter_on_file(char *command[], int out, int in)
200 +{
201 +       pid_t pid;
202 +       
203 +       if (verbose >= 2)
204 +               print_child_argv(command);
205 +
206 +       pid = do_fork();
207 +       if (pid == -1) {
208 +               rsyserr(FERROR, errno, "fork");
209 +               exit_cleanup(RERR_IPC);
210 +       }
211 +
212 +       if (pid == 0) {
213 +               if (dup2(in, STDIN_FILENO) < 0
214 +                || dup2(out, STDOUT_FILENO) < 0) {
215 +                       rsyserr(FERROR, errno, "Failed to dup2");
216 +                       exit_cleanup(RERR_IPC);
217 +               }
218 +               if (blocking_io)
219 +                       set_blocking(STDOUT_FILENO);
220 +               execvp(command[0], command);
221 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
222 +               exit_cleanup(RERR_IPC);
223 +       }
224 +
225 +       return pid;
226 +}
227 --- old/receiver.c
228 +++ new/receiver.c
229 @@ -53,6 +53,7 @@ extern int inplace;
230  extern int delay_updates;
231  extern struct stats stats;
232  extern char *log_format;
233 +extern char *dest_filter;
234  extern char *tmpdir;
235  extern char *partial_dir;
236  extern char *basis_dir[];
237 @@ -411,6 +412,8 @@ int recv_files(int f_in, struct file_lis
238                       : !am_server && log_format_has_i;
239         int max_phase = protocol_version >= 29 ? 2 : 1;
240         int i, recv_ok;
241 +       pid_t pid = 0;
242 +       char *filter_argv[MAX_FILTER_ARGS + 1];
243  
244         if (verbose > 2)
245                 rprintf(FINFO,"recv_files(%d) starting\n",flist->count);
246 @@ -423,6 +426,23 @@ int recv_files(int f_in, struct file_lis
247         if (delay_updates)
248                 init_delayed_bits(flist->count);
249  
250 +       if (dest_filter) {
251 +               char *p;
252 +               char *sep = " \t";
253 +               int i;
254 +               for (p = strtok(dest_filter, sep), i = 0;
255 +                    p && i < MAX_FILTER_ARGS;
256 +                    p = strtok(0, sep))
257 +                       filter_argv[i++] = p;
258 +               filter_argv[i] = NULL;
259 +               if (p) {
260 +                       rprintf(FERROR,
261 +                               "Too many arguments to dest-filter (> %d)\n",
262 +                               MAX_FILTER_ARGS);
263 +                       exit_cleanup(RERR_SYNTAX);
264 +               }
265 +       }
266 +
267         while (1) {
268                 cleanup_disable();
269  
270 @@ -665,6 +685,9 @@ int recv_files(int f_in, struct file_lis
271                 else if (!am_server && verbose && do_progress)
272                         rprintf(FINFO, "%s\n", fname);
273  
274 +               if (dest_filter)
275 +                       pid = run_filter(filter_argv, fd2, &fd2);
276 +
277                 /* recv file data */
278                 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
279                                        fname, fd2, file->length);
280 @@ -680,6 +703,16 @@ int recv_files(int f_in, struct file_lis
281                         exit_cleanup(RERR_FILEIO);
282                 }
283  
284 +               if (dest_filter) {
285 +                       int status;
286 +                       wait_process(pid, &status);
287 +                       if (status != 0) {
288 +                               rprintf(FERROR, "filter %s exited code: %d\n",
289 +                                       dest_filter, status);
290 +                               continue;
291 +                       }
292 +               }
293 +
294                 if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
295                         if (partialptr == fname || *partial_dir == '/')
296                                 partialptr = NULL;
297 --- old/rsync.h
298 +++ new/rsync.h
299 @@ -103,6 +103,7 @@
300  #define IOERR_DEL_LIMIT (1<<2)
301  
302  #define MAX_ARGS 1000
303 +#define MAX_FILTER_ARGS 100
304  #define MAX_BASIS_DIRS 20
305  #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100)
306  
307 --- old/rsync.yo
308 +++ new/rsync.yo
309 @@ -355,6 +355,7 @@ to the detailed description below for a 
310       --timeout=TIME          set I/O timeout in seconds
311   -I, --ignore-times          don't skip files that match size and time
312       --size-only             skip files that match in size
313 +     --times-only            skip files that match in mod-time
314       --modify-window=NUM     compare mod-times with reduced accuracy
315   -T, --temp-dir=DIR          create temporary files in directory DIR
316   -y, --fuzzy                 find similar file for basis if no dest file
317 @@ -390,6 +391,8 @@ to the detailed description below for a 
318       --write-batch=FILE      write a batched update to FILE
319       --only-write-batch=FILE like --write-batch but w/o updating dest
320       --read-batch=FILE       read a batched update from FILE
321 +     --source-filter=COMMAND filter file through COMMAND at source
322 +     --dest-filter=COMMAND   filter file through COMMAND at destination
323       --protocol=NUM          force an older protocol version to be used
324       --checksum-seed=NUM     set block/file checksum seed (advanced)
325   -4, --ipv4                  prefer IPv4
326 @@ -1598,6 +1601,33 @@ file previously generated by bf(--write-
327  If em(FILE) is bf(-), the batch data will be read from standard input.
328  See the "BATCH MODE" section for details.
329  
330 +dit(bf(--source-filter=COMMAND)) This option allows the user to specify a
331 +filter program that will be applied to the contents of all transferred
332 +regular files before the data is sent to destination.  COMMAND will receive
333 +the data on its standard input and it should write the filtered data to
334 +standard output.  COMMAND should exit non-zero if it cannot process the
335 +data or if it encounters an error when writing the data to stdout.
336 +
337 +Example: --source-filter="gzip -9" will cause remote files to be
338 +compressed.
339 +Use of --source-filter automatically enables --whole-file.
340 +If your filter does not output the same number of bytes that it received on
341 +input, you should use --times-only to disable size and content checks on
342 +subsequent rsync runs.
343 +
344 +dit(bf(--dest-filter=COMMAND)) This option allows you to specify a filter
345 +program that will be applied to the contents of all transferred regular
346 +files before the data is written to disk.  COMMAND will receive the data on
347 +its standard input and it should write the filtered data to standard
348 +output.  COMMAND should exit non-zero if it cannot process the data or if
349 +it encounters an error when writing the data to stdout.
350 +
351 +Example: --dest-filter="gzip -9" will cause remote files to be compressed.
352 +Use of --dest-filter automatically enables --whole-file.
353 +If your filter does not output the same number of bytes that it
354 +received on input, you should use --times-only to disable size and
355 +content checks on subsequent rsync runs.
356 +
357  dit(bf(--protocol=NUM)) Force an older protocol version to be used.  This
358  is useful for creating a batch file that is compatible with an older
359  version of rsync.  For instance, if rsync 2.6.4 is being used with the
360 --- old/sender.c
361 +++ new/sender.c
362 @@ -41,6 +41,7 @@ extern int write_batch;
363  extern struct stats stats;
364  extern struct file_list *the_file_list;
365  extern char *log_format;
366 +extern char *source_filter;
367  
368  
369  /**
370 @@ -219,6 +220,26 @@ void send_files(struct file_list *flist,
371                       : !am_server && log_format_has_i;
372         int f_xfer = write_batch < 0 ? batch_fd : f_out;
373         int i, j;
374 +       char *filter_argv[MAX_FILTER_ARGS + 1];
375 +       char *tmp = 0;
376 +       int unlink_tmp = 0;
377 +
378 +       if (source_filter) {
379 +               char *p;
380 +               char *sep = " \t";
381 +               int i;
382 +               for (p = strtok(source_filter, sep), i = 0;
383 +                    p && i < MAX_FILTER_ARGS;
384 +                    p = strtok(0, sep))
385 +                       filter_argv[i++] = p;
386 +               filter_argv[i] = NULL;
387 +               if (p) {
388 +                       rprintf(FERROR,
389 +                               "Too many arguments to source-filter (> %d)\n",
390 +                               MAX_FILTER_ARGS);
391 +                       exit_cleanup(RERR_SYNTAX);
392 +               }
393 +       }
394  
395         if (verbose > 2)
396                 rprintf(FINFO, "send_files starting\n");
397 @@ -293,6 +314,7 @@ void send_files(struct file_list *flist,
398                         return;
399                 }
400  
401 +               unlink_tmp = 0;
402                 fd = do_open(fname, O_RDONLY, 0);
403                 if (fd == -1) {
404                         if (errno == ENOENT) {
405 @@ -321,6 +343,33 @@ void send_files(struct file_list *flist,
406                         return;
407                 }
408  
409 +               if (source_filter) {
410 +                       int fd2;
411 +                       char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
412 +
413 +                       tmp = strdup(tmpl);
414 +                       fd2 = mkstemp(tmp);
415 +                       if (fd2 == -1) {
416 +                               rprintf(FERROR, "mkstemp %s failed: %s\n",
417 +                                       tmp, strerror(errno));
418 +                       } else {
419 +                               int status;
420 +                               pid_t pid = run_filter_on_file(filter_argv, fd2, fd);
421 +                               close(fd);
422 +                               close(fd2);
423 +                               wait_process(pid, &status);
424 +                               if (status != 0) {
425 +                                       rprintf(FERROR,
426 +                                           "bypassing source filter %s; exited with code: %d\n",
427 +                                           source_filter, status);
428 +                                       fd = do_open(fname, O_RDONLY, 0);
429 +                               } else {
430 +                                       fd = do_open(tmp, O_RDONLY, 0);
431 +                                       unlink_tmp = 1;
432 +                               }
433 +                       }
434 +               }
435 +
436                 if (st.st_size) {
437                         int32 read_size = MAX(s->blength * 3, MAX_MAP_SIZE);
438                         mbuf = map_file(fd, st.st_size, read_size, s->blength);
439 @@ -363,6 +412,8 @@ void send_files(struct file_list *flist,
440                         }
441                 }
442                 close(fd);
443 +               if (unlink_tmp)
444 +                       unlink(tmp);
445  
446                 free_sums(s);
447