2 Unix SMB/CIFS implementation.
4 POSIX NTVFS backend - 8.3 name routines
6 Copyright (C) Andrew Tridgell 2004
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "system/locale.h"
24 #include "vfs_posix.h"
25 #include "param/param.h"
30 this mangling scheme uses the following format
34 where nnnnn is a base 36 hash, and A represents characters from the original string
36 The hash is taken of the leading part of the long filename, in uppercase
38 for simplicity, we only allow ascii characters in 8.3 names
42 ===============================================================================
45 This file deliberately uses non-multibyte string functions in many places. This
46 is *not* a mistake. This code is multi-byte safe, but it gets this property
47 through some very subtle knowledge of the way multi-byte strings are encoded
48 and the fact that this mangling algorithm only supports ascii characters in
51 please don't convert this file to use the *_m() functions!!
52 ===============================================================================
57 #define M_DEBUG(level, x) DEBUG(level, x)
59 #define M_DEBUG(level, x)
62 /* these flags are used to mark characters in as having particular
64 #define FLAG_BASECHAR 1
66 #define FLAG_ILLEGAL 4
67 #define FLAG_WILDCARD 8
69 /* the "possible" flags are used as a fast way to find possible DOS
71 #define FLAG_POSSIBLE1 16
72 #define FLAG_POSSIBLE2 32
73 #define FLAG_POSSIBLE3 64
74 #define FLAG_POSSIBLE4 128
76 #define DEFAULT_MANGLE_PREFIX 4
78 #define MANGLE_BASECHARS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
80 #define FLAG_CHECK(c, flag) (ctx->char_flags[(uint8_t)(c)] & (flag))
82 static const char *reserved_names[] =
84 "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
85 "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
89 struct pvfs_mangle_context {
90 uint8_t char_flags[256];
92 this determines how many characters are used from the original
93 filename in the 8.3 mangled name. A larger value leads to a weaker
94 hash and more collisions. The largest possible value is 6.
97 uint32_t mangle_modulus;
99 /* we will use a very simple direct mapped prefix cache. The big
100 advantage of this cache structure is speed and low memory usage
102 The cache is indexed by the low-order bits of the hash, and confirmed by
103 hashing the resulting cache entry to match the known hash
107 uint32_t *prefix_cache_hashes;
109 /* this is used to reverse the base 36 mapping */
110 unsigned char base_reverse[256];
115 hash a string of the specified length. The string does not need to be
118 this hash needs to be fast with a low collision rate (what hash doesn't?)
120 static uint32_t mangle_hash(struct pvfs_mangle_context *ctx,
121 const char *key, size_t length)
123 return pvfs_name_hash(key, length) % ctx->mangle_modulus;
127 insert an entry into the prefix cache. The string might not be null
129 static void cache_insert(struct pvfs_mangle_context *ctx,
130 const char *prefix, int length, uint32_t hash)
132 int i = hash % ctx->cache_size;
134 if (ctx->prefix_cache[i]) {
135 talloc_free(ctx->prefix_cache[i]);
138 ctx->prefix_cache[i] = talloc_strndup(ctx->prefix_cache, prefix, length);
139 ctx->prefix_cache_hashes[i] = hash;
143 lookup an entry in the prefix cache. Return NULL if not found.
145 static const char *cache_lookup(struct pvfs_mangle_context *ctx, uint32_t hash)
147 int i = hash % ctx->cache_size;
150 if (!ctx->prefix_cache[i] || hash != ctx->prefix_cache_hashes[i]) {
154 /* yep, it matched */
155 return ctx->prefix_cache[i];
160 determine if a string is possibly in a mangled format, ignoring
163 In this algorithm, mangled names use only pure ascii characters (no
164 multi-byte) so we can avoid doing a UCS2 conversion
166 static bool is_mangled_component(struct pvfs_mangle_context *ctx,
167 const char *name, size_t len)
171 M_DEBUG(10,("is_mangled_component %s (len %u) ?\n", name, (unsigned int)len));
173 /* check the length */
174 if (len > 12 || len < 8)
177 /* the best distinguishing characteristic is the ~ */
181 /* check extension */
185 for (i=9; name[i] && i < len; i++) {
186 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
192 /* check lead characters */
193 for (i=0;i<ctx->mangle_prefix;i++) {
194 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
199 /* check rest of hash */
200 if (! FLAG_CHECK(name[7], FLAG_BASECHAR)) {
203 for (i=ctx->mangle_prefix;i<6;i++) {
204 if (! FLAG_CHECK(name[i], FLAG_BASECHAR)) {
209 M_DEBUG(10,("is_mangled_component %s (len %u) -> yes\n", name, (unsigned int)len));
217 determine if a string is possibly in a mangled format, ignoring
220 In this algorithm, mangled names use only pure ascii characters (no
221 multi-byte) so we can avoid doing a UCS2 conversion
223 NOTE! This interface must be able to handle a path with unix
224 directory separators. It should return true if any component is
227 static bool is_mangled(struct pvfs_mangle_context *ctx, const char *name)
232 M_DEBUG(10,("is_mangled %s ?\n", name));
234 for (s=name; (p=strchr(s, '/')); s=p+1) {
235 if (is_mangled_component(ctx, s, PTR_DIFF(p, s))) {
240 /* and the last part ... */
241 return is_mangled_component(ctx, s, strlen(s));
246 see if a filename is an allowable 8.3 name.
248 we are only going to allow ascii characters in 8.3 names, as this
249 simplifies things greatly (it means that we know the string won't
250 get larger when converted from UNIX to DOS formats)
252 static bool is_8_3(struct pvfs_mangle_context *ctx,
253 const char *name, bool check_case, bool allow_wildcards)
258 /* as a special case, the names '.' and '..' are allowable 8.3 names */
259 if (name[0] == '.') {
260 if (!name[1] || (name[1] == '.' && !name[2])) {
265 /* the simplest test is on the overall length of the
266 filename. Note that we deliberately use the ascii string
267 length (not the multi-byte one) as it is faster, and gives us
268 the result we need in this case. Using strlen_m would not
269 only be slower, it would be incorrect */
274 /* find the '.'. Note that once again we use the non-multibyte
276 dot_p = strchr(name, '.');
279 /* if the name doesn't contain a '.' then its length
280 must be less than 8 */
285 int prefix_len, suffix_len;
287 /* if it does contain a dot then the prefix must be <=
288 8 and the suffix <= 3 in length */
289 prefix_len = PTR_DIFF(dot_p, name);
290 suffix_len = len - (prefix_len+1);
292 if (prefix_len > 8 || suffix_len > 3 || suffix_len == 0) {
296 /* a 8.3 name cannot contain more than 1 '.' */
297 if (strchr(dot_p+1, '.')) {
302 /* the length are all OK. Now check to see if the characters themselves are OK */
303 for (i=0; name[i]; i++) {
304 /* note that we may allow wildcard petterns! */
305 if (!FLAG_CHECK(name[i], FLAG_ASCII|(allow_wildcards ? FLAG_WILDCARD : 0)) &&
311 /* it is a good 8.3 name */
317 try to find a 8.3 name in the cache, and if found then
318 return the original long name.
320 static char *check_cache(struct pvfs_mangle_context *ctx,
321 TALLOC_CTX *mem_ctx, const char *name)
323 uint32_t hash, multiplier;
328 /* make sure that this is a mangled name from this cache */
329 if (!is_mangled(ctx, name)) {
330 M_DEBUG(10,("check_cache: %s -> not mangled\n", name));
334 /* we need to extract the hash from the 8.3 name */
335 hash = ctx->base_reverse[(unsigned char)name[7]];
336 for (multiplier=36, i=5;i>=ctx->mangle_prefix;i--) {
337 uint32_t v = ctx->base_reverse[(unsigned char)name[i]];
338 hash += multiplier * v;
342 /* now look in the prefix cache for that hash */
343 prefix = cache_lookup(ctx, hash);
345 M_DEBUG(10,("check_cache: %s -> %08X -> not found\n", name, hash));
349 /* we found it - construct the full name */
350 if (name[8] == '.') {
351 strncpy(extension, name+9, 3);
358 return talloc_asprintf(mem_ctx, "%s.%s", prefix, extension);
361 return talloc_strdup(mem_ctx, prefix);
366 look for a DOS reserved name
368 static bool is_reserved_name(struct pvfs_mangle_context *ctx, const char *name)
370 if (FLAG_CHECK(name[0], FLAG_POSSIBLE1) &&
371 FLAG_CHECK(name[1], FLAG_POSSIBLE2) &&
372 FLAG_CHECK(name[2], FLAG_POSSIBLE3) &&
373 FLAG_CHECK(name[3], FLAG_POSSIBLE4)) {
374 /* a likely match, scan the lot */
376 for (i=0; reserved_names[i]; i++) {
377 if (strcasecmp(name, reserved_names[i]) == 0) {
388 See if a filename is a legal long filename.
389 A filename ending in a '.' is not legal unless it's "." or "..". JRA.
391 static bool is_legal_name(struct pvfs_mangle_context *ctx, const char *name)
395 codepoint_t c = next_codepoint(name, &c_size);
396 if (c == INVALID_CODEPOINT) {
399 /* all high chars are OK */
404 if (FLAG_CHECK(c, FLAG_ILLEGAL)) {
414 the main forward mapping function, which converts a long filename to
417 if need83 is not set then we only do the mangling if the name is illegal
420 if cache83 is not set then we don't cache the result
422 return NULL if we don't need to do any conversion
424 static char *name_map(struct pvfs_mangle_context *ctx,
425 const char *name, bool need83, bool cache83)
430 unsigned int extension_length, i;
431 unsigned int prefix_len;
434 const char *basechars = MANGLE_BASECHARS;
436 /* reserved names are handled specially */
437 if (!is_reserved_name(ctx, name)) {
438 /* if the name is already a valid 8.3 name then we don't need to
440 if (is_8_3(ctx, name, false, false)) {
444 /* if the caller doesn't strictly need 8.3 then just check for illegal
446 if (!need83 && is_legal_name(ctx, name)) {
451 /* find the '.' if any */
452 dot_p = strrchr(name, '.');
455 /* if the extension contains any illegal characters or
456 is too long or zero length then we treat it as part
458 for (i=0; i<4 && dot_p[i+1]; i++) {
459 if (! FLAG_CHECK(dot_p[i+1], FLAG_ASCII)) {
464 if (i == 0 || i == 4) dot_p = NULL;
467 /* the leading characters in the mangled name is taken from
468 the first characters of the name, if they are ascii otherwise
471 for (i=0;i<ctx->mangle_prefix && name[i];i++) {
472 lead_chars[i] = name[i];
473 if (! FLAG_CHECK(lead_chars[i], FLAG_ASCII)) {
476 lead_chars[i] = toupper((unsigned char)lead_chars[i]);
478 for (;i<ctx->mangle_prefix;i++) {
482 /* the prefix is anything up to the first dot */
484 prefix_len = PTR_DIFF(dot_p, name);
486 prefix_len = strlen(name);
489 /* the extension of the mangled name is taken from the first 3
490 ascii chars after the dot */
491 extension_length = 0;
493 for (i=1; extension_length < 3 && dot_p[i]; i++) {
494 unsigned char c = dot_p[i];
495 if (FLAG_CHECK(c, FLAG_ASCII)) {
496 extension[extension_length++] = toupper(c);
501 /* find the hash for this prefix */
502 v = hash = mangle_hash(ctx, name, prefix_len);
504 new_name = talloc_array(ctx, char, 13);
505 if (new_name == NULL) {
509 /* now form the mangled name. */
510 for (i=0;i<ctx->mangle_prefix;i++) {
511 new_name[i] = lead_chars[i];
513 new_name[7] = basechars[v % 36];
515 for (i=5; i>=ctx->mangle_prefix; i--) {
517 new_name[i] = basechars[v % 36];
520 /* add the extension */
521 if (extension_length) {
523 memcpy(&new_name[9], extension, extension_length);
524 new_name[9+extension_length] = 0;
530 /* put it in the cache */
531 cache_insert(ctx, name, prefix_len, hash);
534 M_DEBUG(10,("name_map: %s -> %08X -> %s (cache=%d)\n",
535 name, hash, new_name, cache83));
541 /* initialise the flags table
543 we allow only a very restricted set of characters as 'ascii' in this
544 mangling backend. This isn't a significant problem as modern clients
545 use the 'long' filenames anyway, and those don't have these
548 static void init_tables(struct pvfs_mangle_context *ctx)
550 const char *basechars = MANGLE_BASECHARS;
552 /* the list of reserved dos names - all of these are illegal */
554 ZERO_STRUCT(ctx->char_flags);
556 for (i=1;i<128;i++) {
557 if ((i >= '0' && i <= '9') ||
558 (i >= 'a' && i <= 'z') ||
559 (i >= 'A' && i <= 'Z')) {
560 ctx->char_flags[i] |= (FLAG_ASCII | FLAG_BASECHAR);
562 if (strchr("_-$~", i)) {
563 ctx->char_flags[i] |= FLAG_ASCII;
566 if (strchr("*\\/?<>|\":", i)) {
567 ctx->char_flags[i] |= FLAG_ILLEGAL;
570 if (strchr("*?\"<>", i)) {
571 ctx->char_flags[i] |= FLAG_WILDCARD;
575 ZERO_STRUCT(ctx->base_reverse);
577 ctx->base_reverse[(uint8_t)basechars[i]] = i;
580 /* fill in the reserved names flags. These are used as a very
581 fast filter for finding possible DOS reserved filenames */
582 for (i=0; reserved_names[i]; i++) {
583 unsigned char c1, c2, c3, c4;
585 c1 = (unsigned char)reserved_names[i][0];
586 c2 = (unsigned char)reserved_names[i][1];
587 c3 = (unsigned char)reserved_names[i][2];
588 c4 = (unsigned char)reserved_names[i][3];
590 ctx->char_flags[c1] |= FLAG_POSSIBLE1;
591 ctx->char_flags[c2] |= FLAG_POSSIBLE2;
592 ctx->char_flags[c3] |= FLAG_POSSIBLE3;
593 ctx->char_flags[c4] |= FLAG_POSSIBLE4;
594 ctx->char_flags[tolower(c1)] |= FLAG_POSSIBLE1;
595 ctx->char_flags[tolower(c2)] |= FLAG_POSSIBLE2;
596 ctx->char_flags[tolower(c3)] |= FLAG_POSSIBLE3;
597 ctx->char_flags[tolower(c4)] |= FLAG_POSSIBLE4;
599 ctx->char_flags[(unsigned char)'.'] |= FLAG_POSSIBLE4;
602 ctx->mangle_modulus = 1;
603 for (i=0;i<(7-ctx->mangle_prefix);i++) {
604 ctx->mangle_modulus *= 36;
609 initialise the mangling code
611 NTSTATUS pvfs_mangle_init(struct pvfs_state *pvfs)
613 struct pvfs_mangle_context *ctx;
615 ctx = talloc(pvfs, struct pvfs_mangle_context);
617 return NT_STATUS_NO_MEMORY;
620 /* by default have a max of 512 entries in the cache. */
621 ctx->cache_size = lpcfg_parm_int(pvfs->ntvfs->ctx->lp_ctx, NULL, "mangle", "cachesize", 512);
623 ctx->prefix_cache = talloc_array(ctx, char *, ctx->cache_size);
624 if (ctx->prefix_cache == NULL) {
625 return NT_STATUS_NO_MEMORY;
627 ctx->prefix_cache_hashes = talloc_array(ctx, uint32_t, ctx->cache_size);
628 if (ctx->prefix_cache_hashes == NULL) {
629 return NT_STATUS_NO_MEMORY;
632 memset(ctx->prefix_cache, 0, sizeof(char *) * ctx->cache_size);
633 memset(ctx->prefix_cache_hashes, 0, sizeof(uint32_t) * ctx->cache_size);
635 ctx->mangle_prefix = lpcfg_parm_int(pvfs->ntvfs->ctx->lp_ctx, NULL, "mangle", "prefix", -1);
636 if (ctx->mangle_prefix < 0 || ctx->mangle_prefix > 6) {
637 ctx->mangle_prefix = DEFAULT_MANGLE_PREFIX;
642 pvfs->mangle_ctx = ctx;
649 return the short name for a component of a full name
651 char *pvfs_short_name_component(struct pvfs_state *pvfs, const char *name)
653 return name_map(pvfs->mangle_ctx, name, true, true);
658 return the short name for a given entry in a directory
660 const char *pvfs_short_name(struct pvfs_state *pvfs, TALLOC_CTX *mem_ctx,
661 struct pvfs_filename *name)
663 char *p = strrchr(name->full_name, '/');
664 char *ret = pvfs_short_name_component(pvfs, p+1);
668 talloc_steal(mem_ctx, ret);
673 lookup a mangled name, returning the original long name if present
676 char *pvfs_mangled_lookup(struct pvfs_state *pvfs, TALLOC_CTX *mem_ctx,
679 return check_cache(pvfs->mangle_ctx, mem_ctx, name);
684 look for a DOS reserved name
686 bool pvfs_is_reserved_name(struct pvfs_state *pvfs, const char *name)
688 return is_reserved_name(pvfs->mangle_ctx, name);
693 see if a component of a filename could be a mangled name from our
696 bool pvfs_is_mangled_component(struct pvfs_state *pvfs, const char *name)
698 return is_mangled_component(pvfs->mangle_ctx, name, strlen(name));