Change check_path_syntax() to use the new next_mb_char_size() function
authorJeremy Allison <jra@samba.org>
Wed, 17 Mar 2004 02:08:27 +0000 (02:08 +0000)
committerJeremy Allison <jra@samba.org>
Wed, 17 Mar 2004 02:08:27 +0000 (02:08 +0000)
to make it generic. Remove the mb-codepage "blacklist". Alexander, please
check this fix as it reverts your blacklist changes, but I'm hoping it
fixes the problem in a more generic way for all charsets. I'm not trying
to trample on your (excellent!) work here, just make things more generic
without special cases.
Jeremy.

source/lib/charcnv.c
source/smbd/reply.c

index 9ec6e739705d8b49dc106f099ffd4c8c58f0d5f8..ad15788b84a8d6dc514fe659f70b8dedbbdd0ba2 100644 (file)
 
 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
-/* Unsafe unix charsets which could contain '\\' as second byte of mb character */
-static const char *conv_unsafe_charsets[] = {
-           "CP932",
-           "EUC-JP",
-           NULL};
-/* Global variable which is set to True in init_iconv() if unix charset is unsafe
-   w.r.t. '\\' in second byte of mb character. Otherwise it is set to False.
-*/
-BOOL is_unix_charset_unsafe;
 
 /**
  * Return the name of a charset to give to iconv().
@@ -114,7 +105,6 @@ void init_iconv(void)
 {
        int c1, c2;
        BOOL did_reload = False;
-       const char **unsafe_charset = conv_unsafe_charsets;
 
        /* so that charset_name() works we need to get the UNIX<->UCS2 going
           first */
@@ -156,16 +146,6 @@ void init_iconv(void)
                init_valid_table();
                conv_silent = False;
        }
-       
-       while(*unsafe_charset && strcmp(*unsafe_charset, conv_handles[CH_UCS2][CH_UNIX]->to_name)) {
-               unsafe_charset++;
-       }
-       
-       if (*unsafe_charset) {
-               is_unix_charset_unsafe = True;
-       } else {
-               is_unix_charset_unsafe = False;
-       }
 }
 
 /**
@@ -1330,6 +1310,10 @@ size_t next_mb_char_size(const char *s)
        for ( i = 1; i <=4; i++ ) {
                smb_ucs2_t uc;
                if (convert_string(CH_UNIX, CH_UCS2, s, i, &uc, 2, False) == 2) {
+#if 0 /* JRATEST */
+                       DEBUG(10,("next_mb_char_size: size %u at string %s\n",
+                               (unsigned int)i, s));
+#endif
                        return i;
                }
        }
index c470d15645da811bf6c4c082227364ca59cc5ffa..0fe73cddc28f6e08a795baef25c643799b82ede3 100644 (file)
@@ -34,7 +34,6 @@ extern char magic_char;
 extern BOOL case_sensitive;
 extern BOOL case_preserve;
 extern BOOL short_case_preserve;
-extern BOOL is_unix_charset_unsafe;
 extern int global_oplock_break;
 unsigned int smb_echo_count = 0;
 
@@ -88,6 +87,8 @@ NTSTATUS check_path_syntax(pstring destname, const pstring srcname)
                                return NT_STATUS_OBJECT_PATH_SYNTAX_BAD;
                        }
                        /* Go back one level... */
+                       /* We know this is safe as '/' cannot be part of a mb sequence. */
+                       /* NOTE - if this assumption is invalid we are not in good shape... */
                        while (d > destname) {
                                if (*d == '/')
                                        break;
@@ -112,31 +113,20 @@ NTSTATUS check_path_syntax(pstring destname, const pstring srcname)
                        }
                        s++;
                } else {
-                       /* Activate this codepath only if we know that Unix charset may contain unsafe '\\' */
-                       if ((is_unix_charset_unsafe == True) && ((*s & 0x80) && IS_DIRECTORY_SEP(s[1]))) {
-                               /* 
-                                * Potential mb char with second char a directory separator.
-                                * All the encodings we care about are 2 byte only, so do a
-                                * conversion to unicode. If the one byte char converts then
-                                * it really is a directory separator following. Otherwise if
-                                * the two byte character converts (and it should or our assumption
-                                * about character sets is broken and we return an error) then copy both
-                                * bytes as it's a MB character, not a directory separator.
-                                */
-
-                               uint16 ucs2_val;
-
-                               if (convert_string(CH_UNIX, CH_UCS2, s, 1, &ucs2_val, 2, False) == 2) {
-                                       ;
-                               } else if (convert_string(CH_UNIX, CH_UCS2, s, 2, &ucs2_val, 2, False) == 2) {
+                       switch(next_mb_char_size(s)) {
+                               case 4:
                                        *d++ = *s++;
-                               } else {
-                                       DEBUG(0,("check_path_syntax: directory separator assumptions invalid !\n"));
+                               case 3:
+                                       *d++ = *s++;
+                               case 2:
+                                       *d++ = *s++;
+                               case 1:
+                                       *d++ = *s++;
+                                       break;
+                               default:
+                                       DEBUG(0,("check_path_syntax: character length assumptions invalid !\n"));
                                        return NT_STATUS_INVALID_PARAMETER;
-                               }
                        }
-                       /* Just copy the char (or the second byte of the mb char). */
-                       *d++ = *s++;
                }
        }
        *d = '\0';