2 Unix SMB/CIFS implementation.
3 new hash based name mangling implementation
4 Copyright (C) Andrew Tridgell 2002
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 this mangling scheme uses the following format
26 where nnnnn is a base 36 hash, and A represents characters from the original string
28 The hash is taken of the leading part of the long filename, in uppercase
30 for simplicity, we only allow ascii characters in 8.3 names
35 ===============================================================================
38 This file deliberately uses non-multibyte string functions in many places. This
39 is *not* a mistake. This code is multi-byte safe, but it gets this property
40 through some very subtle knowledge of the way multi-byte strings are encoded
41 and the fact that this mangling algorithm only supports ascii characters in
44 please don't convert this file to use the *_m() functions!!
45 ===============================================================================
52 #define M_DEBUG(level, x) DEBUG(level, x)
54 #define M_DEBUG(level, x)
57 #define FLAG_BASECHAR 1
59 #define FLAG_ILLEGAL 4
60 #define FLAG_WILDCARD 8
61 #define FLAG_POSSIBLE1 16
62 #define FLAG_POSSIBLE2 32
63 #define FLAG_POSSIBLE3 64
64 #define FLAG_POSSIBLE4 128
66 #ifndef MANGLE_CACHE_SIZE
67 #define MANGLE_CACHE_SIZE 4096
70 /* these tables are used to provide fast tests for characters */
71 static unsigned char char_flags[256];
73 /* we will use a very simple direct mapped prefix cache. The big
74 advantage of this cache structure is speed and low memory usage
76 The cache is indexed by the low-order bits of the hash, and confirmed by
77 hashing the resulting cache entry to match the known hash
79 static char **prefix_cache;
80 static u32 *prefix_cache_hashes;
82 /* these are the characters we use in the 8.3 hash. Must be 36 chars long */
83 const char *basechars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
84 static unsigned char base_reverse[256];
85 #define base_forward(v) basechars[v]
87 /* the list of reserved dos names - all of these are illegal */
88 const char *reserved_names[] = { "AUX", "LOCK$", "CON", "COM1", "COM2", "COM3", "COM4",
89 "LPT1", "LPT2", "LPT3", "NUL", "PRN", NULL };
92 hash a string of the specified length. The string does not need to be
95 this hash needs to be fast with a low collision rate (what hash doesn't?)
97 static u32 mangle_hash(const char *key, unsigned length)
103 strncpy(str, key, length);
107 /* the length of a multi-byte string can change after a strupper */
108 length = strlen(str);
110 /* Set the initial value from the key size. */
111 for (value = 0x238F13AF * length, i=0; i < length; i++) {
112 value = (value + (((unsigned char)str[i]) << (i*5 % 24)));
115 return (1103515243 * value + 12345);
119 initialise the prefix cache
121 static BOOL cache_init(void)
123 if (prefix_cache) return True;
125 prefix_cache = malloc(sizeof(char *) * MANGLE_CACHE_SIZE);
126 if (!prefix_cache) return False;
128 prefix_cache_hashes = malloc(sizeof(u32) * MANGLE_CACHE_SIZE);
129 if (!prefix_cache_hashes) return False;
131 memset(prefix_cache, 0, sizeof(char *) * MANGLE_CACHE_SIZE);
132 memset(prefix_cache_hashes, 0, sizeof(char *) * MANGLE_CACHE_SIZE);
137 insert an entry into the prefix cache. The string may not be null terminated
139 static void cache_insert(const char *prefix, int length, u32 hash)
141 int i = hash % MANGLE_CACHE_SIZE;
143 if (prefix_cache[i]) {
144 free(prefix_cache[i]);
147 prefix_cache[i] = strndup(prefix, length);
148 prefix_cache_hashes[i] = hash;
152 lookup an entry in the prefix cache. Return NULL if not found.
154 static const char *cache_lookup(u32 hash)
156 int i = hash % MANGLE_CACHE_SIZE;
158 if (!prefix_cache[i] || hash != prefix_cache_hashes[i]) {
162 /* yep, it matched */
163 return prefix_cache[i];
168 determine if a string is possibly in a mangled format, ignoring
171 In this algorithm, mangled names use only pure ascii characters (no
172 multi-byte) so we can avoid doing a UCS2 conversion
174 static BOOL is_mangled(const char *name)
176 /* the best distinguishing characteristic is the ~ */
177 if (name[6] != '~') return False;
179 /* check extension */
180 /* check first character */
181 /* check rest of hash */
188 see if a filename is an allowable 8.3 name.
190 we are only going to allow ascii characters in 8.3 names, as this
191 simplifies things greatly (it means that we know the string won't
192 get larger when converted from UNIX to DOS formats)
194 static BOOL is_8_3(const char *name, BOOL check_case)
199 /* as a special case, the names '.' and '..' are allowable 8.3 names */
200 if (name[0] == '.') {
201 if (!name[1] || (name[1] == '.' && !name[2])) {
206 /* the simplest test is on the overall length of the
207 filename. Note that we deliberately use the ascii string
208 length (not the multi-byte one) as it is faster, and gives us
209 the result we need in this case. Using strlen_m would not
210 only be slower, it would be incorrect */
212 if (len > 12) return False;
214 /* find the '.'. Note that once again we use the non-multibyte
216 dot_p = strchr(name, '.');
219 /* if the name doesn't contain a '.' then its length
220 must be less than 8 */
225 int prefix_len, suffix_len;
227 /* if it does contain a dot then the prefix must be <=
228 8 and the suffix <= 3 in length */
229 prefix_len = PTR_DIFF(dot_p, name);
230 suffix_len = len - (prefix_len+1);
232 if (prefix_len > 8 || suffix_len > 3) {
236 /* a 8.3 name cannot contain more than 1 '.' */
237 if (strchr(dot_p+1, '.')) {
242 /* the length are all OK. Now check to see if the characters themselves are OK */
243 for (i=0; name[i]; i++) {
244 if (!(char_flags[(unsigned)(name[i])] & (FLAG_ASCII|FLAG_WILDCARD))) {
249 /* it is a good 8.3 name */
255 reset the mangling cache on a smb.conf reload. This only really makes sense for
256 mangling backends that have parameters in smb.conf, and as this backend doesn't
257 this is a NULL operation
259 static void mangle_reset(void)
266 try to find a 8.3 name in the cache, and if found then
267 replace the string with the original long name.
269 The filename must be able to hold at least sizeof(fstring)
271 static BOOL check_cache(char *name)
273 u32 hash, multiplier;
278 /* make sure that this is a mangled name from this cache */
279 if (!is_mangled(name)) {
283 /* we need to extract the hash from the 8.3 name */
284 hash = base_reverse[(unsigned char)name[7]];
285 for (multiplier=36, i=5;i>=1;i--) {
286 u32 v = base_reverse[(unsigned char)name[i]];
287 hash += multiplier * v;
291 /* now look in the prefix cache for that hash */
292 prefix = cache_lookup(hash);
297 /* we found it - construct the full name */
298 strncpy(extension, name+9, 3);
301 M_DEBUG(0,("check_cache: %s -> %s.%s\n", name, prefix, extension));
302 slprintf(name, sizeof(fstring), "%s.%s", prefix, extension);
304 M_DEBUG(0,("check_cache: %s -> %s\n", name, prefix));
305 fstrcpy(name, prefix);
313 look for a DOS reserved name
315 static BOOL is_reserved_name(const char *name)
317 if ((char_flags[(unsigned char)name[0]] & FLAG_POSSIBLE1) &&
318 (char_flags[(unsigned char)name[1]] & FLAG_POSSIBLE2) &&
319 (char_flags[(unsigned char)name[2]] & FLAG_POSSIBLE3) &&
320 (char_flags[(unsigned char)name[3]] & FLAG_POSSIBLE4)) {
321 /* a likely match, scan the lot */
323 for (i=0; reserved_names[i]; i++) {
324 int len = strlen(reserved_names[i]);
325 /* note that we match on COM1 as well as COM1.foo */
326 if (strncasecmp(name, reserved_names[i], len) == 0 &&
327 (name[len] == '.' || name[len] == 0)) {
337 see if a filename is a legal long filename
339 static BOOL is_legal_name(const char *name)
342 if (char_flags[(unsigned char)*name] & FLAG_ILLEGAL) {
352 the main forward mapping function, which converts a long filename to
355 if need83 is not set then we only do the mangling if the name is illegal
358 if cache83 is not set then we don't cache the result
360 the name parameter must be able to hold 13 bytes
362 static BOOL name_map(char *name, BOOL need83, BOOL cache83)
367 int extension_length, i;
372 /* reserved names are handled specially */
373 if (!is_reserved_name(name)) {
374 /* if the name is already a valid 8.3 name then we don't need to
376 if (is_8_3(name, False)) {
380 /* if the caller doesn't strictly need 8.3 then just check for illegal
382 if (!need83 && is_legal_name(name)) {
387 /* find the '.' if any */
388 dot_p = strchr(name, '.');
390 /* the leading character in the mangled name is taken from
391 the first character of the name, if it is ascii
392 otherwise '_' is used
395 if (! (char_flags[(unsigned char)lead_char] & FLAG_ASCII)) {
398 lead_char = toupper(lead_char);
400 /* the prefix is anything up to the first dot */
402 prefix_len = PTR_DIFF(dot_p, name);
404 prefix_len = strlen(name);
407 /* the extension of the mangled name is taken from the first 3
408 ascii chars after the dot */
409 extension_length = 0;
411 for (i=1; extension_length < 3 && dot_p[i]; i++) {
413 if (char_flags[(unsigned char)c] & FLAG_ASCII) {
414 extension[extension_length++] = toupper(c);
419 /* find the hash for this prefix */
420 v = hash = mangle_hash(name, prefix_len);
422 /* now form the mangled name */
423 new_name[0] = lead_char;
424 new_name[7] = base_forward(v % 36);
426 for (i=5; i>=1; i--) {
428 new_name[i] = base_forward(v % 36);
431 /* add the extension */
432 if (extension_length) {
434 memcpy(&new_name[9], extension, extension_length);
435 new_name[9+extension_length] = 0;
441 /* put it in the cache */
442 cache_insert(name, prefix_len, hash);
445 M_DEBUG(0,("name_map: %s -> %s\n", name, new_name));
447 /* and overwrite the old name */
448 fstrcpy(name, new_name);
450 /* all done, we've managed to mangle it */
455 /* initialise the flags table
457 we allow only a very restricted set of characters as 'ascii' in this
458 mangling backend. This isn't a significant problem as modern clients
459 use the 'long' filenames anyway, and those don't have these
462 static void init_tables(void)
466 memset(char_flags, 0, sizeof(char_flags));
468 for (i=0;i<128;i++) {
469 if ((i >= '0' && i <= '9') ||
470 (i >= 'a' && i <= 'z') ||
471 (i >= 'A' && i <= 'Z')) {
472 char_flags[i] |= (FLAG_ASCII | FLAG_BASECHAR);
474 if (strchr("._-$~", i)) {
475 char_flags[i] |= FLAG_ASCII;
478 if (strchr("*\\/?<>|\":", i)) {
479 char_flags[i] |= FLAG_ILLEGAL;
482 if (strchr("*?\"<>", i)) {
483 char_flags[i] |= FLAG_WILDCARD;
487 memset(base_reverse, 0, sizeof(base_reverse));
489 base_reverse[(unsigned char)base_forward(i)] = i;
492 /* fill in the reserved names flags. These are used as a very
493 fast filter for finding possible DOS reserved filenames */
494 for (i=0; reserved_names[i]; i++) {
495 unsigned char c1, c2, c3, c4;
497 c1 = (unsigned char)reserved_names[i][0];
498 c2 = (unsigned char)reserved_names[i][1];
499 c3 = (unsigned char)reserved_names[i][2];
500 c4 = (unsigned char)reserved_names[i][3];
502 char_flags[c1] |= FLAG_POSSIBLE1;
503 char_flags[c2] |= FLAG_POSSIBLE2;
504 char_flags[c3] |= FLAG_POSSIBLE3;
505 char_flags[c4] |= FLAG_POSSIBLE4;
506 char_flags[tolower(c1)] |= FLAG_POSSIBLE1;
507 char_flags[tolower(c2)] |= FLAG_POSSIBLE2;
508 char_flags[tolower(c3)] |= FLAG_POSSIBLE3;
509 char_flags[tolower(c4)] |= FLAG_POSSIBLE4;
511 char_flags[(unsigned char)'.'] |= FLAG_POSSIBLE4;
517 the following provides the abstraction layer to make it easier
518 to drop in an alternative mangling implementation */
519 static struct mangle_fns mangle_fns = {
527 /* return the methods for this mangling implementation */
528 struct mangle_fns *mangle_hash2_init(void)