1 This patch adds an option --tr=BAD/GOOD to transliterate filenames. It
2 can be used to remove characters illegal on the destination filesystem.
3 Jeff Weber expressed interest in this:
5 http://lists.samba.org/archive/rsync/2007-October/018996.html
7 To use this patch, run these commands for a successful build:
9 patch -p1 <patches/transliterate.diff
10 ./configure (optional if already run)
13 based-on: 9a06b2edb0ea1a226bcc642682c07bacd2ea47d3
14 diff --git a/flist.c b/flist.c
17 @@ -78,6 +78,7 @@ extern uid_t our_uid;
18 extern struct stats stats;
19 extern char *filesfrom_host;
20 extern char *usermap, *groupmap;
23 extern struct name_num_item *file_sum_nni;
25 @@ -106,6 +107,8 @@ int file_old_total = 0; /* total of active items that will soon be gone */
26 int flist_eof = 0; /* all the file-lists are now known */
27 int xfer_flags_as_varint = 0;
29 +char tr_substitutions[256];
32 #define SLASH_ENDING_NAME 1
34 @@ -679,6 +682,23 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file,
35 stats.total_size += F_LENGTH(file);
38 +static void transliterate(char *path, int len)
41 + /* Find position of any char in tr_opt in path, or the end of the path. */
42 + int span = strcspn(path, tr_opt);
43 + if ((len -= span) == 0)
46 + if ((*path = tr_substitutions[*(uchar*)path]) == '\0')
47 + memmove(path, path+1, len--); /* copies the trailing '\0' too. */
55 static struct file_struct *recv_file_entry(int f, struct file_list *flist, int xflags)
57 static int64 modtime, atime;
58 @@ -750,9 +770,13 @@ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int x
61 thisname[outbuf.len] = '\0';
62 + basename_len = outbuf.len;
67 + transliterate(thisname, basename_len);
70 && (clean_fname(thisname, CFN_REFUSE_DOT_DOT_DIRS) < 0 || (!relative_paths && *thisname == '/'))) {
71 rprintf(FERROR, "ABORTING due to unsafe pathname from sender: %s\n", thisname);
72 @@ -2575,6 +2599,15 @@ struct file_list *recv_file_list(int f, int dir_ndx)
73 parse_name_map(usermap, True);
75 parse_name_map(groupmap, False);
76 + if (tr_opt) { /* Parse FROM/TO string and populate tr_substitutions[] */
78 + if ((t = strchr(tr_opt, '/')) != NULL)
82 + for (f = tr_opt; *f; f++)
83 + tr_substitutions[*(uchar*)f] = *t ? *t++ : '\0';
87 start_read = stats.total_read;
88 diff --git a/options.c b/options.c
91 @@ -211,6 +211,7 @@ int logfile_format_has_i = 0;
92 int logfile_format_has_o_or_i = 0;
93 int always_checksum = 0;
97 #define MAX_BATCH_NAME_LEN 256 /* Must be less than MAXPATHLEN-13 */
98 char *batch_name = NULL;
99 @@ -813,6 +814,7 @@ static struct poptOption long_options[] = {
100 {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
101 {"iconv", 0, POPT_ARG_STRING, &iconv_opt, 0, 0, 0 },
102 {"no-iconv", 0, POPT_ARG_NONE, 0, OPT_NO_ICONV, 0, 0 },
103 + {"tr", 0, POPT_ARG_STRING, &tr_opt, 0, 0, 0 },
104 {"ipv4", '4', POPT_ARG_VAL, &default_af_hint, AF_INET, 0, 0 },
105 {"ipv6", '6', POPT_ARG_VAL, &default_af_hint, AF_INET6, 0, 0 },
106 {"8-bit-output", '8', POPT_ARG_VAL, &allow_8bit_chars, 1, 0, 0 },
107 @@ -2488,6 +2490,24 @@ int parse_arguments(int *argc_p, const char ***argv_p)
112 + if (*tr_opt == '/' && tr_opt[1]) {
113 + snprintf(err_buf, sizeof err_buf,
114 + "Do not start the --tr arg with a slash\n");
117 + if (*tr_opt && *tr_opt != '/') {
118 + need_unsorted_flist = 1;
119 + arg = strchr(tr_opt, '/');
120 + if (arg && strchr(arg+1, '/')) {
121 + snprintf(err_buf, sizeof err_buf,
122 + "--tr cannot transliterate slashes\n");
129 if (trust_sender || am_server || read_batch)
130 trust_sender_args = trust_sender_filter = 1;
131 else if (old_style_args || filesfrom_host != NULL)
132 @@ -2956,6 +2976,12 @@ void server_options(char **args, int *argc_p)
133 if (relative_paths && !implied_dirs && (!am_sender || protocol_version >= 30))
134 args[ac++] = "--no-implied-dirs";
137 + if (asprintf(&arg, "--tr=%s", tr_opt) < 0)
142 if (write_devices && am_sender)
143 args[ac++] = "--write-devices";
145 diff --git a/rsync.1.md b/rsync.1.md
148 @@ -555,6 +555,7 @@ has its own detailed description later in this manpage.
149 --read-batch=FILE read a batched update from FILE
150 --protocol=NUM force an older protocol version to be used
151 --iconv=CONVERT_SPEC request charset conversion of filenames
152 +--tr=BAD/GOOD transliterate filenames
153 --checksum-seed=NUM set block/file checksum seed (advanced)
154 --ipv4, -4 prefer IPv4
155 --ipv6, -6 prefer IPv6
156 @@ -3754,6 +3755,22 @@ expand it.
157 free to specify just the local charset for a daemon transfer (e.g.
162 + Transliterates filenames on the receiver, after the iconv conversion (if
163 + any). This can be used to remove characters illegal on the destination
164 + filesystem. If you use this option, consider saving a "find . -ls" listing
165 + of the source in the destination to help you determine the original
166 + filenames in case of need.
168 + The argument consists of a string of characters to remove, optionally
169 + followed by a slash and a string of corresponding characters with which to
170 + replace them. The second string may be shorter, in which case any leftover
171 + characters in the first string are simply deleted. For example,
172 + `--tr=':\/!'` replaces colons with exclamation marks and deletes
173 + backslashes. Slashes cannot be transliterated because it would cause
176 0. `--ipv4`, `-4` or `--ipv6`, `-6`
178 Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh. This