Unix SMB/CIFS implementation.
new hash based name mangling implementation
Copyright (C) Andrew Tridgell 2002
+ Copyright (C) Simo Sorce 2002
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
for simplicity, we only allow ascii characters in 8.3 names
*/
+ /* hash alghorithm changed to FNV1 by idra@samba.org (Simo Sorce).
+ * see http://www.isthe.com/chongo/tech/comp/fnv/index.html for a
+ * discussion on Fowler / Noll / Vo (FNV) Hash by one of it's authors
+ */
/*
===============================================================================
#define MANGLE_CACHE_SIZE 4096
#endif
+#define FNV1_PRIME 0x01000193
+/*the following number is a fnv1 of the string: idra@samba.org 2002 */
+#define FNV1_INIT 0xa6b93095
+
/* these tables are used to provide fast tests for characters */
static unsigned char char_flags[256];
#define FLAG_CHECK(c, flag) (char_flags[(unsigned char)(c)] & (flag))
+/*
+ this determines how many characters are used from the original filename
+ in the 8.3 mangled name. A larger value leads to a weaker hash and more collisions.
+ The largest possible value is 6.
+*/
+static unsigned mangle_prefix;
+
/* we will use a very simple direct mapped prefix cache. The big
advantage of this cache structure is speed and low memory usage
static u32 *prefix_cache_hashes;
/* these are the characters we use in the 8.3 hash. Must be 36 chars long */
-const char *basechars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+static const char *basechars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
static unsigned char base_reverse[256];
#define base_forward(v) basechars[v]
/* the list of reserved dos names - all of these are illegal */
-const char *reserved_names[] = { "AUX", "LOCK$", "CON", "COM1", "COM2", "COM3", "COM4",
- "LPT1", "LPT2", "LPT3", "NUL", "PRN", NULL };
+static const char *reserved_names[] =
+{ "AUX", "LOCK$", "CON", "COM1", "COM2", "COM3", "COM4",
+ "LPT1", "LPT2", "LPT3", "NUL", "PRN", NULL };
/*
hash a string of the specified length. The string does not need to be
length = strlen(str);
/* Set the initial value from the key size. */
- for (value = 0x238F13AF * length, i=0; i < length; i++) {
- value = (value + (((unsigned char)str[i]) << (i*5 % 24)));
- }
+ for (value = FNV1_INIT, i=0; i < length; i++) {
+ value *= (u32)FNV1_PRIME;
+ value ^= (u32)(str[i]);
+ }
/* note that we force it to a 31 bit hash, to keep within the limits
of the 36^6 mangle space */
- return (1103515243 * value + 12345) & ~0x80000000;
+ return value & ~0x80000000;
}
/*
{
if (prefix_cache) return True;
- prefix_cache = malloc(sizeof(char *) * MANGLE_CACHE_SIZE);
+ prefix_cache = calloc(MANGLE_CACHE_SIZE, sizeof(char *));
if (!prefix_cache) return False;
- prefix_cache_hashes = malloc(sizeof(u32) * MANGLE_CACHE_SIZE);
+ prefix_cache_hashes = calloc(MANGLE_CACHE_SIZE, sizeof(u32));
if (!prefix_cache_hashes) return False;
- memset(prefix_cache, 0, sizeof(char *) * MANGLE_CACHE_SIZE);
- memset(prefix_cache_hashes, 0, sizeof(char *) * MANGLE_CACHE_SIZE);
return True;
}
In this algorithm, mangled names use only pure ascii characters (no
multi-byte) so we can avoid doing a UCS2 conversion
-*/
-static BOOL is_mangled(const char *name)
+ */
+static BOOL is_mangled_component(const char *name, size_t len)
{
- int len, i;
+ unsigned int i;
- M_DEBUG(0,("is_mangled %s ?\n", name));
-
- /* the best distinguishing characteristic is the ~ */
- if (name[6] != '~') return False;
+ M_DEBUG(10,("is_mangled_component %s (len %u) ?\n", name, (unsigned int)len));
/* check the length */
- len = strlen(name);
- if (len > 12 || len < 8) return False;
+ if (len > 12 || len < 8)
+ return False;
+
+ /* the best distinguishing characteristic is the ~ */
+ if (name[6] != '~')
+ return False;
/* check extension */
if (len > 8) {
- if (name[8] != '.') return False;
+ if (name[8] != '.')
+ return False;
for (i=9; name[i]; i++) {
if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
return False;
}
}
- /* check first character */
- if (! FLAG_CHECK(name[0], FLAG_ASCII)) {
- return False;
+ /* check lead characters */
+ for (i=0;i<mangle_prefix;i++) {
+ if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
+ return False;
+ }
}
/* check rest of hash */
if (! FLAG_CHECK(name[7], FLAG_BASECHAR)) {
return False;
}
- for (i=1;i<6;i++) {
+ for (i=mangle_prefix;i<6;i++) {
if (! FLAG_CHECK(name[i], FLAG_BASECHAR)) {
return False;
}
}
- M_DEBUG(0,("is_mangled %s -> yes\n", name));
+ M_DEBUG(10,("is_mangled_component %s (len %u) -> yes\n", name, (unsigned int)len));
return True;
}
+
+/*
+ determine if a string is possibly in a mangled format, ignoring
+ case
+
+ In this algorithm, mangled names use only pure ascii characters (no
+ multi-byte) so we can avoid doing a UCS2 conversion
+
+ NOTE! This interface must be able to handle a path with unix
+ directory separators. It should return true if any component is
+ mangled
+ */
+static BOOL is_mangled(const char *name)
+{
+ const char *p;
+ const char *s;
+
+ M_DEBUG(10,("is_mangled %s ?\n", name));
+
+ for (s=name; (p=strchr(s, '/')); s=p+1) {
+ if (is_mangled_component(s, PTR_DIFF(p, s))) {
+ return True;
+ }
+ }
+
+ /* and the last part ... */
+ return is_mangled_component(s,strlen(s));
+}
+
+
/*
see if a filename is an allowable 8.3 name.
simplifies things greatly (it means that we know the string won't
get larger when converted from UNIX to DOS formats)
*/
-static BOOL is_8_3(const char *name, BOOL check_case)
+static BOOL is_8_3(const char *name, BOOL check_case, BOOL allow_wildcards)
{
int len, i;
char *dot_p;
the result we need in this case. Using strlen_m would not
only be slower, it would be incorrect */
len = strlen(name);
- if (len > 12) return False;
+ if (len > 12)
+ return False;
/* find the '.'. Note that once again we use the non-multibyte
function */
/* the length are all OK. Now check to see if the characters themselves are OK */
for (i=0; name[i]; i++) {
- /* note that we allow wildcard petterns! */
- if (!FLAG_CHECK(name[i], FLAG_ASCII|FLAG_WILDCARD) && name[i] != '.') {
+ /* note that we may allow wildcard petterns! */
+ if (!FLAG_CHECK(name[i], FLAG_ASCII|(allow_wildcards ? FLAG_WILDCARD : 0)) && name[i] != '.') {
return False;
}
}
static BOOL check_cache(char *name)
{
u32 hash, multiplier;
- int i;
+ unsigned int i;
const char *prefix;
char extension[4];
/* make sure that this is a mangled name from this cache */
if (!is_mangled(name)) {
- M_DEBUG(0,("check_cache: %s -> not mangled\n", name));
+ M_DEBUG(10,("check_cache: %s -> not mangled\n", name));
return False;
}
/* we need to extract the hash from the 8.3 name */
hash = base_reverse[(unsigned char)name[7]];
- for (multiplier=36, i=5;i>=1;i--) {
+ for (multiplier=36, i=5;i>=mangle_prefix;i--) {
u32 v = base_reverse[(unsigned char)name[i]];
hash += multiplier * v;
multiplier *= 36;
/* now look in the prefix cache for that hash */
prefix = cache_lookup(hash);
if (!prefix) {
- M_DEBUG(0,("check_cache: %s -> %08X -> not found\n", name, hash));
+ M_DEBUG(10,("check_cache: %s -> %08X -> not found\n", name, hash));
return False;
}
/* we found it - construct the full name */
- strncpy(extension, name+9, 3);
+ if (name[8] == '.') {
+ strncpy(extension, name+9, 3);
+ extension[3] = 0;
+ } else {
+ extension[0] = 0;
+ }
if (extension[0]) {
- M_DEBUG(0,("check_cache: %s -> %s.%s\n", name, prefix, extension));
+ M_DEBUG(10,("check_cache: %s -> %s.%s\n", name, prefix, extension));
slprintf(name, sizeof(fstring), "%s.%s", prefix, extension);
} else {
- M_DEBUG(0,("check_cache: %s -> %s\n", name, prefix));
+ M_DEBUG(10,("check_cache: %s -> %s\n", name, prefix));
fstrcpy(name, prefix);
}
}
/*
- see if a filename is a legal long filename
+ See if a filename is a legal long filename.
+ A filename ending in a '.' is not legal unless it's "." or "..". JRA.
*/
+
static BOOL is_legal_name(const char *name)
{
+ const char *dot_pos = NULL;
+ BOOL alldots = True;
+ size_t numdots = 0;
+
while (*name) {
+ if (((unsigned int)name[0]) > 128 && (name[1] != 0)) {
+ /* Possible start of mb character. */
+ char mbc[2];
+ /*
+ * We know the following will return 2 bytes. What
+ * we need to know was if errno was set.
+ * Note that if CH_UNIX is utf8 a string may be 3
+ * bytes, but this is ok as mb utf8 characters don't
+ * contain embedded ascii bytes. We are really checking
+ * for mb UNIX asian characters like Japanese (SJIS) here.
+ * JRA.
+ */
+ errno = 0;
+ convert_string(CH_UNIX, CH_UCS2, name, 2, mbc, 2);
+ if (!errno) {
+ /* Was a good mb string. */
+ name += 2;
+ continue;
+ }
+ }
+
if (FLAG_CHECK(name[0], FLAG_ILLEGAL)) {
return False;
}
+ if (name[0] == '.') {
+ dot_pos = name;
+ numdots++;
+ } else {
+ alldots = False;
+ }
name++;
}
+ if (dot_pos) {
+ if (alldots && (numdots == 1 || numdots == 2))
+ return True; /* . or .. is a valid name */
+
+ /* A valid long name cannot end in '.' */
+ if (dot_pos[1] == '\0')
+ return False;
+ }
+
return True;
}
the name parameter must be able to hold 13 bytes
*/
-static BOOL name_map(char *name, BOOL need83, BOOL cache83)
+static void name_map(fstring name, BOOL need83, BOOL cache83)
{
char *dot_p;
- char lead_char;
+ char lead_chars[7];
char extension[4];
- int extension_length, i;
- int prefix_len;
+ unsigned int extension_length, i;
+ unsigned int prefix_len;
u32 hash, v;
char new_name[13];
if (!is_reserved_name(name)) {
/* if the name is already a valid 8.3 name then we don't need to
do anything */
- if (is_8_3(name, False)) {
- return True;
+ if (is_8_3(name, False, False)) {
+ return;
}
/* if the caller doesn't strictly need 8.3 then just check for illegal
filenames */
if (!need83 && is_legal_name(name)) {
- return True;
+ return;
}
}
/* find the '.' if any */
dot_p = strrchr(name, '.');
- /* the leading character in the mangled name is taken from
- the first character of the name, if it is ascii
- otherwise '_' is used
+ if (dot_p) {
+ /* if the extension contains any illegal characters or
+ is too long or zero length then we treat it as part
+ of the prefix */
+ for (i=0; i<4 && dot_p[i+1]; i++) {
+ if (! FLAG_CHECK(dot_p[i+1], FLAG_ASCII)) {
+ dot_p = NULL;
+ break;
+ }
+ }
+ if (i == 0 || i == 4) dot_p = NULL;
+ }
+
+ /* the leading characters in the mangled name is taken from
+ the first characters of the name, if they are ascii otherwise
+ '_' is used
*/
- lead_char = name[0];
- if (! FLAG_CHECK(lead_char, FLAG_ASCII)) {
- lead_char = '_';
+ for (i=0;i<mangle_prefix && name[i];i++) {
+ lead_chars[i] = name[i];
+ if (! FLAG_CHECK(lead_chars[i], FLAG_ASCII)) {
+ lead_chars[i] = '_';
+ }
+ lead_chars[i] = toupper(lead_chars[i]);
+ }
+ for (;i<mangle_prefix;i++) {
+ lead_chars[i] = '_';
}
- lead_char = toupper(lead_char);
/* the prefix is anything up to the first dot */
if (dot_p) {
v = hash = mangle_hash(name, prefix_len);
/* now form the mangled name. */
- new_name[0] = lead_char;
+ for (i=0;i<mangle_prefix;i++) {
+ new_name[i] = lead_chars[i];
+ }
new_name[7] = base_forward(v % 36);
new_name[6] = '~';
- for (i=5; i>=1; i--) {
+ for (i=5; i>=mangle_prefix; i--) {
v = v / 36;
new_name[i] = base_forward(v % 36);
}
cache_insert(name, prefix_len, hash);
}
- M_DEBUG(0,("name_map: %s -> %08X -> %s (cache=%d)\n",
+ M_DEBUG(10,("name_map: %s -> %08X -> %s (cache=%d)\n",
name, hash, new_name, cache83));
/* and overwrite the old name */
fstrcpy(name, new_name);
/* all done, we've managed to mangle it */
- return True;
}
memset(char_flags, 0, sizeof(char_flags));
- for (i=0;i<128;i++) {
+ for (i=1;i<128;i++) {
if ((i >= '0' && i <= '9') ||
(i >= 'a' && i <= 'z') ||
(i >= 'A' && i <= 'Z')) {
/* return the methods for this mangling implementation */
struct mangle_fns *mangle_hash2_init(void)
{
+ /* the mangle prefix can only be in the mange 1 to 6 */
+ mangle_prefix = lp_mangle_prefix();
+ if (mangle_prefix > 6) {
+ mangle_prefix = 6;
+ }
+ if (mangle_prefix < 1) {
+ mangle_prefix = 1;
+ }
+
init_tables();
mangle_reset();