1 This patch adds an option --tr=BAD/GOOD to transliterate filenames. It
2 can be used to remove characters illegal on the destination filesystem.
3 Jeff Weber expressed interest in this:
5 http://lists.samba.org/archive/rsync/2007-October/018996.html
7 To use this patch, run these commands for a successful build:
9 patch -p1 <patches/transliterate.diff
10 ./configure (optional if already run)
13 based-on: 7c8f180900432e646c0a4bd02e2c4033068dbb7c
14 diff --git a/flist.c b/flist.c
17 @@ -73,6 +73,7 @@ extern uid_t our_uid;
18 extern struct stats stats;
19 extern char *filesfrom_host;
20 extern char *usermap, *groupmap;
23 extern char curr_dir[MAXPATHLEN];
25 @@ -99,6 +100,8 @@ int file_total = 0; /* total of all active items over all file-lists */
26 int file_old_total = 0; /* total of active items that will soon be gone */
27 int flist_eof = 0; /* all the file-lists are now known */
29 +char tr_substitutions[256];
32 #define SLASH_ENDING_NAME 1
34 @@ -667,6 +670,23 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file,
35 stats.total_size += F_LENGTH(file);
38 +static void transliterate(char *path, int len)
41 + /* Find position of any char in tr_opt in path, or the end of the path. */
42 + int span = strcspn(path, tr_opt);
43 + if ((len -= span) == 0)
46 + if ((*path = tr_substitutions[*(uchar*)path]) == '\0')
47 + memmove(path, path+1, len--); /* copies the trailing '\0' too. */
55 static struct file_struct *recv_file_entry(int f, struct file_list *flist, int xflags)
58 @@ -732,9 +752,13 @@ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int x
61 thisname[outbuf.len] = '\0';
62 + basename_len = outbuf.len;
67 + transliterate(thisname, basename_len);
70 clean_fname(thisname, 0);
72 @@ -2436,6 +2460,15 @@ struct file_list *recv_file_list(int f)
73 parse_name_map(usermap, True);
75 parse_name_map(groupmap, False);
76 + if (tr_opt) { /* Parse FROM/TO string and populate tr_substitutions[] */
78 + if ((t = strchr(tr_opt, '/')) != NULL)
82 + for (f = tr_opt; *f; f++)
83 + tr_substitutions[*(uchar*)f] = *t ? *t++ : '\0';
87 start_read = stats.total_read;
88 diff --git a/options.c b/options.c
91 @@ -191,6 +191,7 @@ int logfile_format_has_i = 0;
92 int logfile_format_has_o_or_i = 0;
93 int always_checksum = 0;
97 #define MAX_BATCH_NAME_LEN 256 /* Must be less than MAXPATHLEN-13 */
98 char *batch_name = NULL;
99 @@ -796,6 +797,7 @@ void usage(enum logcode F)
101 rprintf(F," --iconv=CONVERT_SPEC request charset conversion of filenames\n");
103 + rprintf(F," --tr=BAD/GOOD transliterate filenames\n");
104 rprintf(F," -4, --ipv4 prefer IPv4\n");
105 rprintf(F," -6, --ipv6 prefer IPv6\n");
106 rprintf(F," --version print version number\n");
107 @@ -1013,6 +1015,7 @@ static struct poptOption long_options[] = {
108 {"iconv", 0, POPT_ARG_STRING, &iconv_opt, 0, 0, 0 },
109 {"no-iconv", 0, POPT_ARG_NONE, 0, OPT_NO_ICONV, 0, 0 },
111 + {"tr", 0, POPT_ARG_STRING, &tr_opt, 0, 0, 0 },
112 {"ipv4", '4', POPT_ARG_VAL, &default_af_hint, AF_INET, 0, 0 },
113 {"ipv6", '6', POPT_ARG_VAL, &default_af_hint, AF_INET6, 0, 0 },
114 {"8-bit-output", '8', POPT_ARG_VAL, &allow_8bit_chars, 1, 0, 0 },
115 @@ -2272,6 +2275,24 @@ int parse_arguments(int *argc_p, const char ***argv_p)
120 + if (*tr_opt == '/' && tr_opt[1]) {
121 + snprintf(err_buf, sizeof err_buf,
122 + "Do not start the --tr arg with a slash\n");
125 + if (*tr_opt && *tr_opt != '/') {
126 + need_unsorted_flist = 1;
127 + arg = strchr(tr_opt, '/');
128 + if (arg && strchr(arg+1, '/')) {
129 + snprintf(err_buf, sizeof err_buf,
130 + "--tr cannot transliterate slashes\n");
140 @@ -2683,6 +2704,12 @@ void server_options(char **args, int *argc_p)
141 if (fuzzy_basis && am_sender)
142 args[ac++] = "--fuzzy";
145 + if (asprintf(&arg, "--tr=%s", tr_opt) < 0)
150 if (remove_source_files == 1)
151 args[ac++] = "--remove-source-files";
152 else if (remove_source_files)
153 diff --git a/rsync.yo b/rsync.yo
156 @@ -452,6 +452,7 @@ to the detailed description below for a complete description. verb(
157 --read-batch=FILE read a batched update from FILE
158 --protocol=NUM force an older protocol version to be used
159 --iconv=CONVERT_SPEC request charset conversion of filenames
160 + --tr=BAD/GOOD transliterate filenames
161 --checksum-seed=NUM set block/file checksum seed (advanced)
162 -4, --ipv4 prefer IPv4
163 -6, --ipv6 prefer IPv6
164 @@ -2512,6 +2513,22 @@ daemon uses the charset specified in its "charset" configuration parameter
165 regardless of the remote charset you actually pass. Thus, you may feel free to
166 specify just the local charset for a daemon transfer (e.g. bf(--iconv=utf8)).
168 +dit(bf(--tr=BAD/GOOD)) Transliterates filenames on the receiver, after the
169 +iconv conversion (if any). This can be used to remove characters illegal
170 +on the destination filesystem. If you use this option, consider saving a
171 +"find . -ls" listing of the source in the destination to help you determine
172 +the original filenames in case of need.
174 +The argument consists of a string of characters to remove, optionally
175 +followed by a slash and a string of corresponding characters with which to
176 +replace them. The second string may be shorter, in which case any leftover
177 +characters in the first string are simply deleted. For example,
178 +bf(--tr=':\/!') replaces colons with exclamation marks and deletes backslashes.
179 +Slashes cannot be transliterated because it would cause havoc.
181 +If the receiver is invoked over a remote shell, use bf(--protect-args) to
182 +stop the shell from interpreting any nasty characters in the argument.
184 dit(bf(-4, --ipv4) or bf(-6, --ipv6)) Tells rsync to prefer IPv4/IPv6
185 when creating sockets. This only affects sockets that rsync has direct
186 control over, such as the outgoing socket when directly contacting an