2 Unix SMB/CIFS implementation.
4 POSIX NTVFS backend - 8.3 name routines
6 Copyright (C) Andrew Tridgell 2004
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include "include/includes.h"
24 #include "vfs_posix.h"
27 this mangling scheme uses the following format
31 where nnnnn is a base 36 hash, and A represents characters from the original string
33 The hash is taken of the leading part of the long filename, in uppercase
35 for simplicity, we only allow ascii characters in 8.3 names
38 /* hash alghorithm changed to FNV1 by idra@samba.org (Simo Sorce).
39 * see http://www.isthe.com/chongo/tech/comp/fnv/index.html for a
40 * discussion on Fowler / Noll / Vo (FNV) Hash by one of it's authors
44 ===============================================================================
47 This file deliberately uses non-multibyte string functions in many places. This
48 is *not* a mistake. This code is multi-byte safe, but it gets this property
49 through some very subtle knowledge of the way multi-byte strings are encoded
50 and the fact that this mangling algorithm only supports ascii characters in
53 please don't convert this file to use the *_m() functions!!
54 ===============================================================================
59 #include "vfs_posix.h"
62 #define M_DEBUG(level, x) DEBUG(level, x)
64 #define M_DEBUG(level, x)
67 /* these flags are used to mark characters in as having particular
69 #define FLAG_BASECHAR 1
71 #define FLAG_ILLEGAL 4
72 #define FLAG_WILDCARD 8
74 /* the "possible" flags are used as a fast way to find possible DOS
76 #define FLAG_POSSIBLE1 16
77 #define FLAG_POSSIBLE2 32
78 #define FLAG_POSSIBLE3 64
79 #define FLAG_POSSIBLE4 128
81 /* by default have a max of 512 entries in the cache. */
82 #ifndef MANGLE_CACHE_SIZE
83 #define MANGLE_CACHE_SIZE 512
86 #define DEFAULT_MANGLE_PREFIX 4
88 #define FNV1_PRIME 0x01000193
89 /*the following number is a fnv1 of the string: idra@samba.org 2002 */
90 #define FNV1_INIT 0xa6b93095
92 #define MANGLE_BASECHARS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
94 #define FLAG_CHECK(c, flag) (ctx->char_flags[(uint8_t)(c)] & (flag))
98 hash a string of the specified length. The string does not need to be
101 this hash needs to be fast with a low collision rate (what hash doesn't?)
103 static uint32_t mangle_hash(struct pvfs_mangle_context *ctx,
104 const char *key, size_t length)
106 uint32_t value = FNV1_INIT;
110 while (*key && length--) {
111 c = next_codepoint(key, &c_size);
113 value *= (uint32_t)FNV1_PRIME;
114 value ^= (uint32_t)c;
118 return (value % ctx->mangle_modulus);
122 insert an entry into the prefix cache. The string might not be null
124 static void cache_insert(struct pvfs_mangle_context *ctx,
125 const char *prefix, int length, uint32_t hash)
127 int i = hash % MANGLE_CACHE_SIZE;
129 if (ctx->prefix_cache[i]) {
130 talloc_free(ctx->prefix_cache[i]);
133 ctx->prefix_cache[i] = talloc_strndup(ctx->prefix_cache, prefix, length);
134 ctx->prefix_cache_hashes[i] = hash;
138 lookup an entry in the prefix cache. Return NULL if not found.
140 static const char *cache_lookup(struct pvfs_mangle_context *ctx, uint32_t hash)
142 int i = hash % MANGLE_CACHE_SIZE;
145 if (!ctx->prefix_cache[i] || hash != ctx->prefix_cache_hashes[i]) {
149 /* yep, it matched */
150 return ctx->prefix_cache[i];
155 determine if a string is possibly in a mangled format, ignoring
158 In this algorithm, mangled names use only pure ascii characters (no
159 multi-byte) so we can avoid doing a UCS2 conversion
161 static BOOL is_mangled_component(struct pvfs_mangle_context *ctx,
162 const char *name, size_t len)
166 M_DEBUG(10,("is_mangled_component %s (len %u) ?\n", name, (unsigned int)len));
168 /* check the length */
169 if (len > 12 || len < 8)
172 /* the best distinguishing characteristic is the ~ */
176 /* check extension */
180 for (i=9; name[i] && i < len; i++) {
181 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
187 /* check lead characters */
188 for (i=0;i<ctx->mangle_prefix;i++) {
189 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
194 /* check rest of hash */
195 if (! FLAG_CHECK(name[7], FLAG_BASECHAR)) {
198 for (i=ctx->mangle_prefix;i<6;i++) {
199 if (! FLAG_CHECK(name[i], FLAG_BASECHAR)) {
204 M_DEBUG(10,("is_mangled_component %s (len %u) -> yes\n", name, (unsigned int)len));
212 determine if a string is possibly in a mangled format, ignoring
215 In this algorithm, mangled names use only pure ascii characters (no
216 multi-byte) so we can avoid doing a UCS2 conversion
218 NOTE! This interface must be able to handle a path with unix
219 directory separators. It should return true if any component is
222 static BOOL is_mangled(struct pvfs_mangle_context *ctx, const char *name)
227 M_DEBUG(10,("is_mangled %s ?\n", name));
229 for (s=name; (p=strchr(s, '/')); s=p+1) {
230 if (is_mangled_component(ctx, s, PTR_DIFF(p, s))) {
235 /* and the last part ... */
236 return is_mangled_component(ctx, s,strlen(s));
241 see if a filename is an allowable 8.3 name.
243 we are only going to allow ascii characters in 8.3 names, as this
244 simplifies things greatly (it means that we know the string won't
245 get larger when converted from UNIX to DOS formats)
247 static BOOL is_8_3(struct pvfs_mangle_context *ctx,
248 const char *name, BOOL check_case, BOOL allow_wildcards)
253 /* as a special case, the names '.' and '..' are allowable 8.3 names */
254 if (name[0] == '.') {
255 if (!name[1] || (name[1] == '.' && !name[2])) {
260 /* the simplest test is on the overall length of the
261 filename. Note that we deliberately use the ascii string
262 length (not the multi-byte one) as it is faster, and gives us
263 the result we need in this case. Using strlen_m would not
264 only be slower, it would be incorrect */
269 /* find the '.'. Note that once again we use the non-multibyte
271 dot_p = strchr(name, '.');
274 /* if the name doesn't contain a '.' then its length
275 must be less than 8 */
280 int prefix_len, suffix_len;
282 /* if it does contain a dot then the prefix must be <=
283 8 and the suffix <= 3 in length */
284 prefix_len = PTR_DIFF(dot_p, name);
285 suffix_len = len - (prefix_len+1);
287 if (prefix_len > 8 || suffix_len > 3 || suffix_len == 0) {
291 /* a 8.3 name cannot contain more than 1 '.' */
292 if (strchr(dot_p+1, '.')) {
297 /* the length are all OK. Now check to see if the characters themselves are OK */
298 for (i=0; name[i]; i++) {
299 /* note that we may allow wildcard petterns! */
300 if (!FLAG_CHECK(name[i], FLAG_ASCII|(allow_wildcards ? FLAG_WILDCARD : 0)) && name[i] != '.') {
305 /* it is a good 8.3 name */
311 try to find a 8.3 name in the cache, and if found then
312 return the original long name.
314 static const char *check_cache(struct pvfs_mangle_context *ctx,
317 uint32_t hash, multiplier;
322 /* make sure that this is a mangled name from this cache */
323 if (!is_mangled(ctx, name)) {
324 M_DEBUG(10,("check_cache: %s -> not mangled\n", name));
328 /* we need to extract the hash from the 8.3 name */
329 hash = ctx->base_reverse[(unsigned char)name[7]];
330 for (multiplier=36, i=5;i>=ctx->mangle_prefix;i--) {
331 uint32_t v = ctx->base_reverse[(unsigned char)name[i]];
332 hash += multiplier * v;
336 /* now look in the prefix cache for that hash */
337 prefix = cache_lookup(ctx, hash);
339 M_DEBUG(10,("check_cache: %s -> %08X -> not found\n", name, hash));
343 /* we found it - construct the full name */
344 if (name[8] == '.') {
345 strncpy(extension, name+9, 3);
352 return talloc_asprintf(ctx, "%s.%s", prefix, extension);
355 return talloc_strdup(ctx, prefix);
360 look for a DOS reserved name
362 static BOOL is_reserved_name(struct pvfs_mangle_context *ctx, const char *name)
364 if (FLAG_CHECK(name[0], FLAG_POSSIBLE1) &&
365 FLAG_CHECK(name[1], FLAG_POSSIBLE2) &&
366 FLAG_CHECK(name[2], FLAG_POSSIBLE3) &&
367 FLAG_CHECK(name[3], FLAG_POSSIBLE4)) {
368 /* a likely match, scan the lot */
370 for (i=0; ctx->reserved_names[i]; i++) {
371 int len = strlen(ctx->reserved_names[i]);
372 /* note that we match on COM1 as well as COM1.foo */
373 if (strncasecmp(name, ctx->reserved_names[i], len) == 0 &&
374 (name[len] == '.' || name[len] == 0)) {
385 See if a filename is a legal long filename.
386 A filename ending in a '.' is not legal unless it's "." or "..". JRA.
388 static BOOL is_legal_name(struct pvfs_mangle_context *ctx, const char *name)
390 const char *dot_pos = NULL;
396 codepoint_t c = next_codepoint(name, &c_size);
397 if (c == INVALID_CODEPOINT) {
400 /* all high chars are OK */
405 if (FLAG_CHECK(c, FLAG_ILLEGAL)) {
408 if (name[0] == '.') {
419 if (alldots && (numdots == 1 || numdots == 2))
420 return True; /* . or .. is a valid name */
422 /* A valid long name cannot end in '.' */
423 if (dot_pos[1] == '\0')
431 the main forward mapping function, which converts a long filename to
434 if need83 is not set then we only do the mangling if the name is illegal
437 if cache83 is not set then we don't cache the result
439 return NULL if we don't need to do any conversion
441 static char *name_map(struct pvfs_mangle_context *ctx,
442 const char *name, BOOL need83, BOOL cache83)
447 unsigned int extension_length, i;
448 unsigned int prefix_len;
451 const char *basechars = MANGLE_BASECHARS;
453 /* reserved names are handled specially */
454 if (!is_reserved_name(ctx, name)) {
455 /* if the name is already a valid 8.3 name then we don't need to
457 if (is_8_3(ctx, name, False, False)) {
461 /* if the caller doesn't strictly need 8.3 then just check for illegal
463 if (!need83 && is_legal_name(ctx, name)) {
468 /* find the '.' if any */
469 dot_p = strrchr(name, '.');
472 /* if the extension contains any illegal characters or
473 is too long or zero length then we treat it as part
475 for (i=0; i<4 && dot_p[i+1]; i++) {
476 if (! FLAG_CHECK(dot_p[i+1], FLAG_ASCII)) {
481 if (i == 0 || i == 4) dot_p = NULL;
484 /* the leading characters in the mangled name is taken from
485 the first characters of the name, if they are ascii otherwise
488 for (i=0;i<ctx->mangle_prefix && name[i];i++) {
489 lead_chars[i] = name[i];
490 if (! FLAG_CHECK(lead_chars[i], FLAG_ASCII)) {
493 lead_chars[i] = toupper(lead_chars[i]);
495 for (;i<ctx->mangle_prefix;i++) {
499 /* the prefix is anything up to the first dot */
501 prefix_len = PTR_DIFF(dot_p, name);
503 prefix_len = strlen(name);
506 /* the extension of the mangled name is taken from the first 3
507 ascii chars after the dot */
508 extension_length = 0;
510 for (i=1; extension_length < 3 && dot_p[i]; i++) {
512 if (FLAG_CHECK(c, FLAG_ASCII)) {
513 extension[extension_length++] = toupper(c);
518 /* find the hash for this prefix */
519 v = hash = mangle_hash(ctx, name, prefix_len);
521 new_name = talloc_array_p(ctx, char, 13);
522 if (new_name == NULL) {
526 /* now form the mangled name. */
527 for (i=0;i<ctx->mangle_prefix;i++) {
528 new_name[i] = lead_chars[i];
530 new_name[7] = basechars[v % 36];
532 for (i=5; i>=ctx->mangle_prefix; i--) {
534 new_name[i] = basechars[v % 36];
537 /* add the extension */
538 if (extension_length) {
540 memcpy(&new_name[9], extension, extension_length);
541 new_name[9+extension_length] = 0;
547 /* put it in the cache */
548 cache_insert(ctx, name, prefix_len, hash);
551 M_DEBUG(10,("name_map: %s -> %08X -> %s (cache=%d)\n",
552 name, hash, new_name, cache83));
558 /* initialise the flags table
560 we allow only a very restricted set of characters as 'ascii' in this
561 mangling backend. This isn't a significant problem as modern clients
562 use the 'long' filenames anyway, and those don't have these
565 static void init_tables(struct pvfs_mangle_context *ctx)
567 const char *basechars = MANGLE_BASECHARS;
569 /* the list of reserved dos names - all of these are illegal */
570 const char *reserved_names[] =
571 { "AUX", "LOCK$", "CON", "COM1", "COM2", "COM3", "COM4",
572 "LPT1", "LPT2", "LPT3", "NUL", "PRN", NULL };
575 ZERO_STRUCT(ctx->char_flags);
577 for (i=1;i<128;i++) {
578 if ((i >= '0' && i <= '9') ||
579 (i >= 'a' && i <= 'z') ||
580 (i >= 'A' && i <= 'Z')) {
581 ctx->char_flags[i] |= (FLAG_ASCII | FLAG_BASECHAR);
583 if (strchr("_-$~", i)) {
584 ctx->char_flags[i] |= FLAG_ASCII;
587 if (strchr("*\\/?<>|\":", i)) {
588 ctx->char_flags[i] |= FLAG_ILLEGAL;
591 if (strchr("*?\"<>", i)) {
592 ctx->char_flags[i] |= FLAG_WILDCARD;
596 ZERO_STRUCT(ctx->base_reverse);
598 ctx->base_reverse[(uint8_t)basechars[i]] = i;
601 ctx->reserved_names = reserved_names;
603 /* fill in the reserved names flags. These are used as a very
604 fast filter for finding possible DOS reserved filenames */
605 for (i=0; ctx->reserved_names[i]; i++) {
606 unsigned char c1, c2, c3, c4;
608 c1 = (unsigned char)ctx->reserved_names[i][0];
609 c2 = (unsigned char)ctx->reserved_names[i][1];
610 c3 = (unsigned char)ctx->reserved_names[i][2];
611 c4 = (unsigned char)ctx->reserved_names[i][3];
613 ctx->char_flags[c1] |= FLAG_POSSIBLE1;
614 ctx->char_flags[c2] |= FLAG_POSSIBLE2;
615 ctx->char_flags[c3] |= FLAG_POSSIBLE3;
616 ctx->char_flags[c4] |= FLAG_POSSIBLE4;
617 ctx->char_flags[tolower(c1)] |= FLAG_POSSIBLE1;
618 ctx->char_flags[tolower(c2)] |= FLAG_POSSIBLE2;
619 ctx->char_flags[tolower(c3)] |= FLAG_POSSIBLE3;
620 ctx->char_flags[tolower(c4)] |= FLAG_POSSIBLE4;
622 ctx->char_flags[(unsigned char)'.'] |= FLAG_POSSIBLE4;
625 ctx->mangle_modulus = 1;
626 for (i=0;i<(7-ctx->mangle_prefix);i++) {
627 ctx->mangle_modulus *= 36;
632 initialise the mangling code
634 NTSTATUS pvfs_mangle_init(struct pvfs_state *pvfs)
636 struct pvfs_mangle_context *ctx;
638 ctx = talloc_p(pvfs, struct pvfs_mangle_context);
640 return NT_STATUS_NO_MEMORY;
642 ctx->prefix_cache = talloc_array_p(ctx, char *, MANGLE_CACHE_SIZE);
643 if (ctx->prefix_cache == NULL) {
644 return NT_STATUS_NO_MEMORY;
646 ctx->prefix_cache_hashes = talloc_array_p(ctx, uint32_t, MANGLE_CACHE_SIZE);
647 if (ctx->prefix_cache_hashes == NULL) {
648 return NT_STATUS_NO_MEMORY;
651 memset(ctx->prefix_cache, 0, sizeof(char *)*MANGLE_CACHE_SIZE);
652 memset(ctx->prefix_cache_hashes, 0, sizeof(uint32_t)*MANGLE_CACHE_SIZE);
654 ctx->mangle_prefix = lp_parm_int(-1, "mangle", "prefix");
655 if (ctx->mangle_prefix < 0 || ctx->mangle_prefix > 6) {
656 ctx->mangle_prefix = DEFAULT_MANGLE_PREFIX;
661 pvfs->mangle_ctx = ctx;
668 return the short name for a component of a full name
670 char *pvfs_short_name_component(struct pvfs_state *pvfs, const char *name)
672 return name_map(pvfs->mangle_ctx, name, True, True);
677 return the short name for a given entry in a directory
679 const char *pvfs_short_name(struct pvfs_state *pvfs, TALLOC_CTX *mem_ctx,
680 struct pvfs_filename *name)
682 char *p = strrchr(name->full_name, '/');
683 char *ret = pvfs_short_name_component(pvfs, p+1);
687 talloc_steal(mem_ctx, ret);
692 lookup a mangled name, returning the original long name if present
695 char *pvfs_mangled_lookup(struct pvfs_state *pvfs, TALLOC_CTX *mem_ctx,
699 ret = check_cache(pvfs->mangle_ctx, name);
701 return talloc_steal(mem_ctx, ret);