2 Unix SMB/CIFS implementation.
4 POSIX NTVFS backend - 8.3 name routines
6 Copyright (C) Andrew Tridgell 2004
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "system/locale.h"
24 #include "vfs_posix.h"
27 this mangling scheme uses the following format
31 where nnnnn is a base 36 hash, and A represents characters from the original string
33 The hash is taken of the leading part of the long filename, in uppercase
35 for simplicity, we only allow ascii characters in 8.3 names
39 ===============================================================================
42 This file deliberately uses non-multibyte string functions in many places. This
43 is *not* a mistake. This code is multi-byte safe, but it gets this property
44 through some very subtle knowledge of the way multi-byte strings are encoded
45 and the fact that this mangling algorithm only supports ascii characters in
48 please don't convert this file to use the *_m() functions!!
49 ===============================================================================
54 #define M_DEBUG(level, x) DEBUG(level, x)
56 #define M_DEBUG(level, x)
59 /* these flags are used to mark characters in as having particular
61 #define FLAG_BASECHAR 1
63 #define FLAG_ILLEGAL 4
64 #define FLAG_WILDCARD 8
66 /* the "possible" flags are used as a fast way to find possible DOS
68 #define FLAG_POSSIBLE1 16
69 #define FLAG_POSSIBLE2 32
70 #define FLAG_POSSIBLE3 64
71 #define FLAG_POSSIBLE4 128
73 #define DEFAULT_MANGLE_PREFIX 4
75 #define MANGLE_BASECHARS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
77 #define FLAG_CHECK(c, flag) (ctx->char_flags[(uint8_t)(c)] & (flag))
79 static const char *reserved_names[] =
80 { "AUX", "CON", "COM1", "COM2", "COM3", "COM4",
81 "LPT1", "LPT2", "LPT3", "NUL", "PRN", NULL };
84 struct pvfs_mangle_context {
85 uint8_t char_flags[256];
87 this determines how many characters are used from the original
88 filename in the 8.3 mangled name. A larger value leads to a weaker
89 hash and more collisions. The largest possible value is 6.
92 uint32_t mangle_modulus;
94 /* we will use a very simple direct mapped prefix cache. The big
95 advantage of this cache structure is speed and low memory usage
97 The cache is indexed by the low-order bits of the hash, and confirmed by
98 hashing the resulting cache entry to match the known hash
102 uint32_t *prefix_cache_hashes;
104 /* this is used to reverse the base 36 mapping */
105 unsigned char base_reverse[256];
110 hash a string of the specified length. The string does not need to be
113 this hash needs to be fast with a low collision rate (what hash doesn't?)
115 static uint32_t mangle_hash(struct pvfs_mangle_context *ctx,
116 const char *key, size_t length)
118 return pvfs_name_hash(key, length) % ctx->mangle_modulus;
122 insert an entry into the prefix cache. The string might not be null
124 static void cache_insert(struct pvfs_mangle_context *ctx,
125 const char *prefix, int length, uint32_t hash)
127 int i = hash % ctx->cache_size;
129 if (ctx->prefix_cache[i]) {
130 talloc_free(ctx->prefix_cache[i]);
133 ctx->prefix_cache[i] = talloc_strndup(ctx->prefix_cache, prefix, length);
134 ctx->prefix_cache_hashes[i] = hash;
138 lookup an entry in the prefix cache. Return NULL if not found.
140 static const char *cache_lookup(struct pvfs_mangle_context *ctx, uint32_t hash)
142 int i = hash % ctx->cache_size;
145 if (!ctx->prefix_cache[i] || hash != ctx->prefix_cache_hashes[i]) {
149 /* yep, it matched */
150 return ctx->prefix_cache[i];
155 determine if a string is possibly in a mangled format, ignoring
158 In this algorithm, mangled names use only pure ascii characters (no
159 multi-byte) so we can avoid doing a UCS2 conversion
161 static BOOL is_mangled_component(struct pvfs_mangle_context *ctx,
162 const char *name, size_t len)
166 M_DEBUG(10,("is_mangled_component %s (len %u) ?\n", name, (unsigned int)len));
168 /* check the length */
169 if (len > 12 || len < 8)
172 /* the best distinguishing characteristic is the ~ */
176 /* check extension */
180 for (i=9; name[i] && i < len; i++) {
181 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
187 /* check lead characters */
188 for (i=0;i<ctx->mangle_prefix;i++) {
189 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
194 /* check rest of hash */
195 if (! FLAG_CHECK(name[7], FLAG_BASECHAR)) {
198 for (i=ctx->mangle_prefix;i<6;i++) {
199 if (! FLAG_CHECK(name[i], FLAG_BASECHAR)) {
204 M_DEBUG(10,("is_mangled_component %s (len %u) -> yes\n", name, (unsigned int)len));
212 determine if a string is possibly in a mangled format, ignoring
215 In this algorithm, mangled names use only pure ascii characters (no
216 multi-byte) so we can avoid doing a UCS2 conversion
218 NOTE! This interface must be able to handle a path with unix
219 directory separators. It should return true if any component is
222 static BOOL is_mangled(struct pvfs_mangle_context *ctx, const char *name)
227 M_DEBUG(10,("is_mangled %s ?\n", name));
229 for (s=name; (p=strchr(s, '/')); s=p+1) {
230 if (is_mangled_component(ctx, s, PTR_DIFF(p, s))) {
235 /* and the last part ... */
236 return is_mangled_component(ctx, s, strlen(s));
241 see if a filename is an allowable 8.3 name.
243 we are only going to allow ascii characters in 8.3 names, as this
244 simplifies things greatly (it means that we know the string won't
245 get larger when converted from UNIX to DOS formats)
247 static BOOL is_8_3(struct pvfs_mangle_context *ctx,
248 const char *name, BOOL check_case, BOOL allow_wildcards)
253 /* as a special case, the names '.' and '..' are allowable 8.3 names */
254 if (name[0] == '.') {
255 if (!name[1] || (name[1] == '.' && !name[2])) {
260 /* the simplest test is on the overall length of the
261 filename. Note that we deliberately use the ascii string
262 length (not the multi-byte one) as it is faster, and gives us
263 the result we need in this case. Using strlen_m would not
264 only be slower, it would be incorrect */
269 /* find the '.'. Note that once again we use the non-multibyte
271 dot_p = strchr(name, '.');
274 /* if the name doesn't contain a '.' then its length
275 must be less than 8 */
280 int prefix_len, suffix_len;
282 /* if it does contain a dot then the prefix must be <=
283 8 and the suffix <= 3 in length */
284 prefix_len = PTR_DIFF(dot_p, name);
285 suffix_len = len - (prefix_len+1);
287 if (prefix_len > 8 || suffix_len > 3 || suffix_len == 0) {
291 /* a 8.3 name cannot contain more than 1 '.' */
292 if (strchr(dot_p+1, '.')) {
297 /* the length are all OK. Now check to see if the characters themselves are OK */
298 for (i=0; name[i]; i++) {
299 /* note that we may allow wildcard petterns! */
300 if (!FLAG_CHECK(name[i], FLAG_ASCII|(allow_wildcards ? FLAG_WILDCARD : 0)) &&
306 /* it is a good 8.3 name */
312 try to find a 8.3 name in the cache, and if found then
313 return the original long name.
315 static char *check_cache(struct pvfs_mangle_context *ctx,
316 TALLOC_CTX *mem_ctx, const char *name)
318 uint32_t hash, multiplier;
323 /* make sure that this is a mangled name from this cache */
324 if (!is_mangled(ctx, name)) {
325 M_DEBUG(10,("check_cache: %s -> not mangled\n", name));
329 /* we need to extract the hash from the 8.3 name */
330 hash = ctx->base_reverse[(unsigned char)name[7]];
331 for (multiplier=36, i=5;i>=ctx->mangle_prefix;i--) {
332 uint32_t v = ctx->base_reverse[(unsigned char)name[i]];
333 hash += multiplier * v;
337 /* now look in the prefix cache for that hash */
338 prefix = cache_lookup(ctx, hash);
340 M_DEBUG(10,("check_cache: %s -> %08X -> not found\n", name, hash));
344 /* we found it - construct the full name */
345 if (name[8] == '.') {
346 strncpy(extension, name+9, 3);
353 return talloc_asprintf(mem_ctx, "%s.%s", prefix, extension);
356 return talloc_strdup(mem_ctx, prefix);
361 look for a DOS reserved name
363 static BOOL is_reserved_name(struct pvfs_mangle_context *ctx, const char *name)
365 if (FLAG_CHECK(name[0], FLAG_POSSIBLE1) &&
366 FLAG_CHECK(name[1], FLAG_POSSIBLE2) &&
367 FLAG_CHECK(name[2], FLAG_POSSIBLE3) &&
368 FLAG_CHECK(name[3], FLAG_POSSIBLE4)) {
369 /* a likely match, scan the lot */
371 for (i=0; reserved_names[i]; i++) {
372 if (strcasecmp(name, reserved_names[i]) == 0) {
383 See if a filename is a legal long filename.
384 A filename ending in a '.' is not legal unless it's "." or "..". JRA.
386 static BOOL is_legal_name(struct pvfs_mangle_context *ctx, const char *name)
390 codepoint_t c = next_codepoint(name, &c_size);
391 if (c == INVALID_CODEPOINT) {
394 /* all high chars are OK */
399 if (FLAG_CHECK(c, FLAG_ILLEGAL)) {
409 the main forward mapping function, which converts a long filename to
412 if need83 is not set then we only do the mangling if the name is illegal
415 if cache83 is not set then we don't cache the result
417 return NULL if we don't need to do any conversion
419 static char *name_map(struct pvfs_mangle_context *ctx,
420 const char *name, BOOL need83, BOOL cache83)
425 unsigned int extension_length, i;
426 unsigned int prefix_len;
429 const char *basechars = MANGLE_BASECHARS;
431 /* reserved names are handled specially */
432 if (!is_reserved_name(ctx, name)) {
433 /* if the name is already a valid 8.3 name then we don't need to
435 if (is_8_3(ctx, name, False, False)) {
439 /* if the caller doesn't strictly need 8.3 then just check for illegal
441 if (!need83 && is_legal_name(ctx, name)) {
446 /* find the '.' if any */
447 dot_p = strrchr(name, '.');
450 /* if the extension contains any illegal characters or
451 is too long or zero length then we treat it as part
453 for (i=0; i<4 && dot_p[i+1]; i++) {
454 if (! FLAG_CHECK(dot_p[i+1], FLAG_ASCII)) {
459 if (i == 0 || i == 4) dot_p = NULL;
462 /* the leading characters in the mangled name is taken from
463 the first characters of the name, if they are ascii otherwise
466 for (i=0;i<ctx->mangle_prefix && name[i];i++) {
467 lead_chars[i] = name[i];
468 if (! FLAG_CHECK(lead_chars[i], FLAG_ASCII)) {
471 lead_chars[i] = toupper((unsigned char)lead_chars[i]);
473 for (;i<ctx->mangle_prefix;i++) {
477 /* the prefix is anything up to the first dot */
479 prefix_len = PTR_DIFF(dot_p, name);
481 prefix_len = strlen(name);
484 /* the extension of the mangled name is taken from the first 3
485 ascii chars after the dot */
486 extension_length = 0;
488 for (i=1; extension_length < 3 && dot_p[i]; i++) {
489 unsigned char c = dot_p[i];
490 if (FLAG_CHECK(c, FLAG_ASCII)) {
491 extension[extension_length++] = toupper(c);
496 /* find the hash for this prefix */
497 v = hash = mangle_hash(ctx, name, prefix_len);
499 new_name = talloc_array(ctx, char, 13);
500 if (new_name == NULL) {
504 /* now form the mangled name. */
505 for (i=0;i<ctx->mangle_prefix;i++) {
506 new_name[i] = lead_chars[i];
508 new_name[7] = basechars[v % 36];
510 for (i=5; i>=ctx->mangle_prefix; i--) {
512 new_name[i] = basechars[v % 36];
515 /* add the extension */
516 if (extension_length) {
518 memcpy(&new_name[9], extension, extension_length);
519 new_name[9+extension_length] = 0;
525 /* put it in the cache */
526 cache_insert(ctx, name, prefix_len, hash);
529 M_DEBUG(10,("name_map: %s -> %08X -> %s (cache=%d)\n",
530 name, hash, new_name, cache83));
536 /* initialise the flags table
538 we allow only a very restricted set of characters as 'ascii' in this
539 mangling backend. This isn't a significant problem as modern clients
540 use the 'long' filenames anyway, and those don't have these
543 static void init_tables(struct pvfs_mangle_context *ctx)
545 const char *basechars = MANGLE_BASECHARS;
547 /* the list of reserved dos names - all of these are illegal */
549 ZERO_STRUCT(ctx->char_flags);
551 for (i=1;i<128;i++) {
552 if ((i >= '0' && i <= '9') ||
553 (i >= 'a' && i <= 'z') ||
554 (i >= 'A' && i <= 'Z')) {
555 ctx->char_flags[i] |= (FLAG_ASCII | FLAG_BASECHAR);
557 if (strchr("_-$~", i)) {
558 ctx->char_flags[i] |= FLAG_ASCII;
561 if (strchr("*\\/?<>|\":", i)) {
562 ctx->char_flags[i] |= FLAG_ILLEGAL;
565 if (strchr("*?\"<>", i)) {
566 ctx->char_flags[i] |= FLAG_WILDCARD;
570 ZERO_STRUCT(ctx->base_reverse);
572 ctx->base_reverse[(uint8_t)basechars[i]] = i;
575 /* fill in the reserved names flags. These are used as a very
576 fast filter for finding possible DOS reserved filenames */
577 for (i=0; reserved_names[i]; i++) {
578 unsigned char c1, c2, c3, c4;
580 c1 = (unsigned char)reserved_names[i][0];
581 c2 = (unsigned char)reserved_names[i][1];
582 c3 = (unsigned char)reserved_names[i][2];
583 c4 = (unsigned char)reserved_names[i][3];
585 ctx->char_flags[c1] |= FLAG_POSSIBLE1;
586 ctx->char_flags[c2] |= FLAG_POSSIBLE2;
587 ctx->char_flags[c3] |= FLAG_POSSIBLE3;
588 ctx->char_flags[c4] |= FLAG_POSSIBLE4;
589 ctx->char_flags[tolower(c1)] |= FLAG_POSSIBLE1;
590 ctx->char_flags[tolower(c2)] |= FLAG_POSSIBLE2;
591 ctx->char_flags[tolower(c3)] |= FLAG_POSSIBLE3;
592 ctx->char_flags[tolower(c4)] |= FLAG_POSSIBLE4;
594 ctx->char_flags[(unsigned char)'.'] |= FLAG_POSSIBLE4;
597 ctx->mangle_modulus = 1;
598 for (i=0;i<(7-ctx->mangle_prefix);i++) {
599 ctx->mangle_modulus *= 36;
604 initialise the mangling code
606 NTSTATUS pvfs_mangle_init(struct pvfs_state *pvfs)
608 struct pvfs_mangle_context *ctx;
610 ctx = talloc(pvfs, struct pvfs_mangle_context);
612 return NT_STATUS_NO_MEMORY;
615 /* by default have a max of 512 entries in the cache. */
616 ctx->cache_size = lp_parm_int(-1, "mangle", "cachesize", 512);
618 ctx->prefix_cache = talloc_array(ctx, char *, ctx->cache_size);
619 if (ctx->prefix_cache == NULL) {
620 return NT_STATUS_NO_MEMORY;
622 ctx->prefix_cache_hashes = talloc_array(ctx, uint32_t, ctx->cache_size);
623 if (ctx->prefix_cache_hashes == NULL) {
624 return NT_STATUS_NO_MEMORY;
627 memset(ctx->prefix_cache, 0, sizeof(char *) * ctx->cache_size);
628 memset(ctx->prefix_cache_hashes, 0, sizeof(uint32_t) * ctx->cache_size);
630 ctx->mangle_prefix = lp_parm_int(-1, "mangle", "prefix", -1);
631 if (ctx->mangle_prefix < 0 || ctx->mangle_prefix > 6) {
632 ctx->mangle_prefix = DEFAULT_MANGLE_PREFIX;
637 pvfs->mangle_ctx = ctx;
644 return the short name for a component of a full name
646 char *pvfs_short_name_component(struct pvfs_state *pvfs, const char *name)
648 return name_map(pvfs->mangle_ctx, name, True, True);
653 return the short name for a given entry in a directory
655 const char *pvfs_short_name(struct pvfs_state *pvfs, TALLOC_CTX *mem_ctx,
656 struct pvfs_filename *name)
658 char *p = strrchr(name->full_name, '/');
659 char *ret = pvfs_short_name_component(pvfs, p+1);
663 talloc_steal(mem_ctx, ret);
668 lookup a mangled name, returning the original long name if present
671 char *pvfs_mangled_lookup(struct pvfs_state *pvfs, TALLOC_CTX *mem_ctx,
674 return check_cache(pvfs->mangle_ctx, mem_ctx, name);
679 look for a DOS reserved name
681 BOOL pvfs_is_reserved_name(struct pvfs_state *pvfs, const char *name)
683 return is_reserved_name(pvfs->mangle_ctx, name);
688 see if a component of a filename could be a mangled name from our
691 BOOL pvfs_is_mangled_component(struct pvfs_state *pvfs, const char *name)
693 return is_mangled_component(pvfs->mangle_ctx, name, strlen(name));