r2159: converted samba4 over to UTF-16.
authorAndrew Tridgell <tridge@samba.org>
Wed, 1 Sep 2004 04:39:06 +0000 (04:39 +0000)
committerGerald (Jerry) Carter <jerry@samba.org>
Wed, 10 Oct 2007 17:58:27 +0000 (12:58 -0500)
I had previously thought this was unnecessary, as windows doesn't use
standards compliant UTF-16, and for filesystem operations treats bytes
as UCS-2, but Bjoern Jacke has pointed out to me that this means we
don't correctly store extended UTF-16 characters as UTF-8 on
disk. This can be seen with (for example) the gothic characters with
codepoints above 64k.

This commit also adds a LOCAL-ICONV torture test that tests the first
1 million codepoints against the system iconv library, and tests 5
million random UTF-16LE buffers for identical error handling to the
system iconv library.

the lib/iconv.c changes need backporting to samba3
(This used to be commit 756f28ac95feaa84b42402723d5f7286865c78db)

13 files changed:
source4/include/charset.h
source4/lib/charcnv.c
source4/lib/iconv.c
source4/lib/util_unistr.c
source4/libcli/raw/rawrequest.c
source4/librpc/ndr/ndr_basic.c
source4/smb_server/request.c
source4/torture/basic/charset.c
source4/torture/basic/utable.c
source4/torture/config.m4
source4/torture/config.mk
source4/torture/local/iconv.c [new file with mode: 0644]
source4/torture/torture.c

index daf10e23c13519a46cae4241d5ff754a297bf6e9..f1482ac08c8aa6588dbd27edf54349cd54377078 100644 (file)
@@ -20,7 +20,7 @@
 */
 
 /* this defines the charset types used in samba */
-typedef enum {CH_UCS2=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=4, CH_UCS2BE=5} charset_t;
+typedef enum {CH_UTF16=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=4, CH_UTF16BE=5} charset_t;
 
 #define NUM_CHARSETS 6
 
index 8204a3bcdd8507372892aaafda2ba178cad00d29..2109e957d92d96e08dab14fdcf138cdaac0ef10e 100644 (file)
@@ -47,12 +47,12 @@ static const char *charset_name(charset_t ch)
 {
        const char *ret = NULL;
 
-       if (ch == CH_UCS2) ret = "UTF-16LE";
+       if (ch == CH_UTF16) ret = "UTF-16LE";
        else if (ch == CH_UNIX) ret = lp_unix_charset();
        else if (ch == CH_DOS) ret = lp_dos_charset();
        else if (ch == CH_DISPLAY) ret = lp_display_charset();
        else if (ch == CH_UTF8) ret = "UTF8";
-       else if (ch == CH_UCS2BE) ret = "UCS-2BE";
+       else if (ch == CH_UTF16BE) ret = "UTF-16BE";
 
        if (!ret || !*ret) ret = "ASCII";
        return ret;
@@ -81,13 +81,13 @@ void init_iconv(void)
 
        /* so that charset_name() works we need to get the UNIX<->UCS2 going
           first */
-       if (!conv_handles[CH_UNIX][CH_UCS2])
-               conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open(charset_name(CH_UCS2), 
+       if (!conv_handles[CH_UNIX][CH_UTF16])
+               conv_handles[CH_UNIX][CH_UTF16] = smb_iconv_open(charset_name(CH_UTF16), 
                                                                "ASCII");
 
-       if (!conv_handles[CH_UCS2][CH_UNIX])
-               conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", 
-                                                               charset_name(CH_UCS2));
+       if (!conv_handles[CH_UTF16][CH_UNIX])
+               conv_handles[CH_UTF16][CH_UNIX] = smb_iconv_open("ASCII", 
+                                                               charset_name(CH_UTF16));
 
        for (c1=0;c1<NUM_CHARSETS;c1++) {
                for (c2=0;c2<NUM_CHARSETS;c2++) {
@@ -293,7 +293,7 @@ size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
        size_t size;
        smb_ucs2_t *buffer;
        
-       size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen,
+       size = convert_string_allocate(CH_UNIX, CH_UTF16, src, srclen,
                                       (void **) &buffer);
        if (size == -1) {
                smb_panic("failed to create UCS2 buffer");
@@ -303,7 +303,7 @@ size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
                return srclen;
        }
        
-       size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
+       size = convert_string(CH_UTF16, CH_UNIX, buffer, size, dest, destlen);
        free(buffer);
        return size;
 }
@@ -313,7 +313,7 @@ size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
        size_t size;
        smb_ucs2_t *buffer;
        
-       size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen,
+       size = convert_string_allocate(CH_UNIX, CH_UTF16, src, srclen,
                                       (void **) &buffer);
        if (size == -1) {
                smb_panic("failed to create UCS2 buffer");
@@ -322,7 +322,7 @@ size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
                free(buffer);
                return srclen;
        }
-       size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
+       size = convert_string(CH_UTF16, CH_UNIX, buffer, size, dest, destlen);
        free(buffer);
        return size;
 }
@@ -461,7 +461,7 @@ ssize_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest
        /* ucs2 is always a multiple of 2 bytes */
        dest_len &= ~1;
 
-       len += convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len);
+       len += convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len);
        return len;
 }
 
@@ -480,7 +480,7 @@ ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
        size_t src_len = strlen(src)+1;
 
        *dest = NULL;
-       return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (const void **)dest);
+       return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (const void **)dest);
 }
 
 
@@ -498,7 +498,7 @@ ssize_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
        size_t src_len = strlen(src)+1;
 
        *dest = NULL;
-       return convert_string_allocate(CH_UNIX, CH_UCS2, src, src_len, (void **)dest);  
+       return convert_string_allocate(CH_UNIX, CH_UTF16, src, src_len, (void **)dest); 
 }
 
 /**
@@ -603,7 +603,7 @@ size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_
        if (src_len != (size_t)-1)
                src_len &= ~1;
        
-       ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len);
+       ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len);
        if (dest_len)
                dest[MIN(ret, dest_len-1)] = 0;
 
@@ -627,7 +627,7 @@ ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
 {
        size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
        *dest = NULL;
-       return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (const void **)dest);
+       return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (const void **)dest);
 }
 
 /**
@@ -642,7 +642,7 @@ ssize_t pull_ucs2_allocate(void **dest, const smb_ucs2_t *src)
 {
        size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
        *dest = NULL;
-       return convert_string_allocate(CH_UCS2, CH_UNIX, src, src_len, dest);   
+       return convert_string_allocate(CH_UTF16, CH_UNIX, src, src_len, dest);  
 }
 
 /**
index c869c253cf0422b7e81a4239251b9cb315daeaae..9f6526faa5e35e5c8211ccab06934e3245aa88a9 100644 (file)
@@ -39,8 +39,8 @@
  *
  * Note that the only function provided by iconv is conversion between
  * characters.  It doesn't directly support operations like
- * uppercasing or comparison.  We have to convert to UCS-2 and compare
- * there.
+ * uppercasing or comparison.  We have to convert to UTF-16LE and
+ * compare there.
  *
  * @sa Samba Developers Guide
  **/
@@ -55,10 +55,11 @@ static size_t iconv_copy  (void *,const char **, size_t *, char **, size_t *);
 static size_t iconv_swab  (void *,const char **, size_t *, char **, size_t *);
 
 static const struct charset_functions const builtin_functions[] = {
-       /* windows is really neither UCS-2 not UTF-16 */
+       /* windows is closest to UTF-16 */
        {"UCS-2LE",  iconv_copy, iconv_copy},
        {"UTF-16LE",  iconv_copy, iconv_copy},
        {"UCS-2BE",  iconv_swab, iconv_swab},
+       {"UTF-16BE",  iconv_swab, iconv_swab},
 
        /* we include the UTF-8 alias to cope with differing locale settings */
        {"UTF8",   utf8_pull,  utf8_push},
@@ -217,23 +218,23 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
 #endif
 
        /* check for conversion to/from ucs2 */
-       if (strcasecmp(fromcode, "UCS-2LE") == 0 && to) {
+       if (strcasecmp(fromcode, "UTF-16LE") == 0 && to) {
                ret->direct = to->push;
                return ret;
        }
-       if (strcasecmp(tocode, "UCS-2LE") == 0 && from) {
+       if (strcasecmp(tocode, "UTF-16LE") == 0 && from) {
                ret->direct = from->pull;
                return ret;
        }
 
 #ifdef HAVE_NATIVE_ICONV
-       if (strcasecmp(fromcode, "UCS-2LE") == 0) {
+       if (strcasecmp(fromcode, "UTF-16LE") == 0) {
                ret->direct = sys_iconv;
                ret->cd_direct = ret->cd_push;
                ret->cd_push = NULL;
                return ret;
        }
-       if (strcasecmp(tocode, "UCS-2LE") == 0) {
+       if (strcasecmp(tocode, "UTF-16LE") == 0) {
                ret->direct = sys_iconv;
                ret->cd_direct = ret->cd_pull;
                ret->cd_pull = NULL;
@@ -460,100 +461,231 @@ static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
 static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
-       while (*inbytesleft >= 1 && *outbytesleft >= 2) {
-               const uint8_t *c = (const uint8_t *)*inbuf;
-               uint8_t *uc = (uint8_t *)*outbuf;
-               int len = 1;
+       size_t in_left=*inbytesleft, out_left=*outbytesleft;
+       const uint8_t *c = (const uint8_t *)*inbuf;
+       uint8_t *uc = (uint8_t *)*outbuf;
 
+       while (in_left >= 1 && out_left >= 2) {
                if ((c[0] & 0x80) == 0) {
                        uc[0] = c[0];
                        uc[1] = 0;
-               } else if ((c[0] & 0xf0) == 0xe0) {
-                       if (*inbytesleft < 3) {
-                               DEBUG(0,("short utf8 char\n"));
-                               goto badseq;
+                       c  += 1;
+                       in_left  -= 1;
+                       out_left -= 2;
+                       uc += 2;
+                       continue;
+               }
+
+               if ((c[0] & 0xe0) == 0xc0) {
+                       if (in_left < 2 ||
+                           (c[1] & 0xc0) != 0x80) {
+                               errno = EILSEQ;
+                               goto error;
+                       }
+                       uc[1] = (c[0]>>2) & 0x7;
+                       uc[0] = (c[0]<<6) | (c[1]&0x3f);
+                       c  += 2;
+                       in_left  -= 2;
+                       out_left -= 2;
+                       uc += 2;
+                       continue;
+               }
+
+               if ((c[0] & 0xf0) == 0xe0) {
+                       if (in_left < 3 ||
+                           (c[1] & 0xc0) != 0x80 || 
+                           (c[2] & 0xc0) != 0x80) {
+                               errno = EILSEQ;
+                               goto error;
                        }
                        uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
                        uc[0] = (c[1]<<6) | (c[2]&0x3f);
-                       len = 3;
-               } else if ((c[0] & 0xe0) == 0xc0) {
-                       if (*inbytesleft < 2) {
-                               DEBUG(0,("short utf8 char\n"));
-                               goto badseq;
+                       c  += 3;
+                       in_left  -= 3;
+                       out_left -= 2;
+                       uc += 2;
+                       continue;
+               }
+
+               if ((c[0] & 0xf8) == 0xf0) {
+                       unsigned int codepoint;
+                       if (in_left < 4 ||
+                           (c[1] & 0xc0) != 0x80 || 
+                           (c[2] & 0xc0) != 0x80 ||
+                           (c[3] & 0xc0) != 0x80) {
+                               errno = EILSEQ;
+                               goto error;
                        }
-                       uc[1] = (c[0]>>2) & 0x7;
-                       uc[0] = (c[0]<<6) | (c[1]&0x3f);
-                       len = 2;
+                       codepoint = 
+                               (c[3]&0x3f) | 
+                               ((c[2]&0x3f)<<6) | 
+                               ((c[1]&0x3f)<<12) |
+                               ((c[0]&0x7)<<18);
+                       if (codepoint < 0x10000) {
+                               /* accept UTF-8 characters that are not
+                                  minimally packed, but pack the result */
+                               uc[0] = (codepoint & 0xFF);
+                               uc[1] = (codepoint >> 8);
+                               c += 4;
+                               in_left -= 4;
+                               out_left -= 2;
+                               uc += 2;
+                               continue;
+                       }
+
+                       codepoint -= 0x10000;
+
+                       if (out_left < 4) {
+                               errno = E2BIG;
+                               goto error;
+                       }
+
+                       uc[0] = (codepoint>>10) & 0xFF;
+                       uc[1] = (codepoint>>18) | 0xd8;
+                       uc[2] = codepoint & 0xFF;
+                       uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
+                       c  += 4;
+                       in_left  -= 4;
+                       out_left -= 4;
+                       uc += 4;
+                       continue;
                }
 
-               (*inbuf)  += len;
-               (*inbytesleft)  -= len;
-               (*outbytesleft) -= 2;
-               (*outbuf) += 2;
+               /* we don't handle 5 byte sequences */
+               errno = EINVAL;
+               goto error;
        }
 
-       if (*inbytesleft > 0) {
+       if (in_left > 0) {
                errno = E2BIG;
-               return -1;
+               goto error;
        }
-       
+
+       *inbytesleft = in_left;
+       *outbytesleft = out_left;
+       *inbuf = c;
+       *outbuf = uc;   
        return 0;
 
-badseq:
-       errno = EINVAL;
+error:
+       *inbytesleft = in_left;
+       *outbytesleft = out_left;
+       *inbuf = c;
+       *outbuf = uc;
        return -1;
 }
 
 static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
-                        char **outbuf, size_t *outbytesleft)
+                       char **outbuf, size_t *outbytesleft)
 {
-       while (*inbytesleft >= 2 && *outbytesleft >= 1) {
-               uint8_t *c = (uint8_t *)*outbuf;
-               const uint8_t *uc = (const uint8_t *)*inbuf;
-               int len=1;
-
-               if (uc[1] & 0xf8) {
-                       if (*outbytesleft < 3) {
-                               DEBUG(0,("short utf8 write\n"));
-                               goto toobig;
+       size_t in_left=*inbytesleft, out_left=*outbytesleft;
+       uint8_t *c = (uint8_t *)*outbuf;
+       const uint8_t *uc = (const uint8_t *)*inbuf;
+
+       while (in_left >= 2 && out_left >= 1) {
+               unsigned int codepoint;
+
+               if (uc[1] == 0 && !(uc[0] & 0x80)) {
+                       /* simplest case */
+                       c[0] = uc[0];
+                       in_left  -= 2;
+                       out_left -= 1;
+                       uc += 2;
+                       c  += 1;
+                       continue;
+               }
+
+               if ((uc[1]&0xf8) == 0) {
+                       /* next simplest case */
+                       if (out_left < 2) {
+                               errno = E2BIG;
+                               goto error;
                        }
-                       c[0] = 0xe0 | (uc[1]>>4);
-                       c[1] = 0x80 | ((uc[1]&0xF)<<2) | (uc[0]>>6);
-                       c[2] = 0x80 | (uc[0]&0x3f);
-                       len = 3;
-               } else if (uc[1] | (uc[0] & 0x80)) {
-                       if (*outbytesleft < 2) {
-                               DEBUG(0,("short utf8 write\n"));
-                               goto toobig;
+                       c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
+                       c[1] = 0x80 | (uc[0] & 0x3f);
+                       in_left  -= 2;
+                       out_left -= 2;
+                       uc += 2;
+                       c  += 2;
+                       continue;
+               }
+
+               if ((uc[1] & 0xfc) == 0xdc) {
+                       /* its the second part of a 4 byte sequence. Illegal */
+                       if (in_left < 4) {
+                               errno = EINVAL;
+                       } else {
+                               errno = EILSEQ;
                        }
-                       c[0] = 0xc0 | (uc[1]<<2) | (uc[0]>>6);
-                       c[1] = 0x80 | (uc[0]&0x3f);
-                       len = 2;
-               } else {
-                       c[0] = uc[0];
+                       goto error;
                }
 
+               if ((uc[1] & 0xfc) != 0xd8) {
+                       codepoint = uc[0] | (uc[1]<<8);
+                       if (out_left < 3) {
+                               errno = E2BIG;
+                               goto error;
+                       }
+                       c[0] = 0xe0 | (codepoint >> 12);
+                       c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
+                       c[2] = 0x80 | (codepoint & 0x3f);
+                       
+                       in_left  -= 2;
+                       out_left -= 3;
+                       uc  += 2;
+                       c   += 3;
+                       continue;
+               }
 
-               (*inbytesleft)  -= 2;
-               (*outbytesleft) -= len;
-               (*inbuf)  += 2;
-               (*outbuf) += len;
+               /* its the first part of a 4 byte sequence */
+               if (in_left < 4) {
+                       errno = EINVAL;
+                       goto error;
+               }
+               if ((uc[3] & 0xfc) != 0xdc) {
+                       errno = EILSEQ;
+                       goto error;
+               }
+               codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) | 
+                                      (uc[0]<<10) | ((uc[1] & 0x3)<<18));
+               
+               if (out_left < 4) {
+                       errno = E2BIG;
+                       goto error;
+               }
+               c[0] = 0xf0 | (codepoint >> 18);
+               c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
+               c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
+               c[3] = 0x80 | (codepoint & 0x3f);
+               
+               in_left  -= 4;
+               out_left -= 4;
+               uc       += 4;
+               c        += 4;
        }
 
-       if (*inbytesleft == 1) {
+       if (in_left == 1) {
                errno = EINVAL;
-               return -1;
+               goto error;
        }
 
-       if (*inbytesleft > 1) {
+       if (in_left > 1) {
                errno = E2BIG;
-               return -1;
+               goto error;
        }
+
+       *inbytesleft = in_left;
+       *outbytesleft = out_left;
+       *inbuf  = uc;
+       *outbuf = c;
        
        return 0;
 
-toobig:
-       errno = E2BIG;
+error:
+       *inbytesleft = in_left;
+       *outbytesleft = out_left;
+       *inbuf  = uc;
+       *outbuf = c;
        return -1;
 }
 
index 2bd990836ee0bfaa8dd44f8629712514d2dc3d71..63d68fa12e31339832028ef32d5eb70fe6b9b40d 100644 (file)
@@ -96,9 +96,9 @@ static int check_dos_char(smb_ucs2_t c)
        char buf[10];
        smb_ucs2_t c2 = 0;
        int len1, len2;
-       len1 = convert_string(CH_UCS2, CH_DOS, &c, 2, buf, sizeof(buf));
+       len1 = convert_string(CH_UTF16, CH_DOS, &c, 2, buf, sizeof(buf));
        if (len1 == 0) return 0;
-       len2 = convert_string(CH_DOS, CH_UCS2, buf, len1, &c2, 2);
+       len2 = convert_string(CH_DOS, CH_UTF16, buf, len1, &c2, 2);
        if (len2 != 2) return 0;
        return (c == c2);
 }
index 87bbe5a31b2ec692a492efb12f91a4f8b01c2368..51c0c0b925aad8354d8a1802cfabe810e07b99ff 100644 (file)
@@ -526,7 +526,7 @@ static size_t smbcli_req_pull_ucs2(struct smbcli_request *req, TALLOC_CTX *mem_c
                return 0;
        }
 
-       ret = convert_string_talloc(mem_ctx, CH_UCS2, CH_UNIX, src, src_len2, (const void **)dest);
+       ret = convert_string_talloc(mem_ctx, CH_UTF16, CH_UNIX, src, src_len2, (const void **)dest);
        if (ret == -1) {
                *dest = NULL;
                return 0;
@@ -725,7 +725,7 @@ static size_t smbcli_blob_pull_ucs2(TALLOC_CTX* mem_ctx,
                src_len2 += 2;
        }
 
-       ret = convert_string_talloc(mem_ctx, CH_UCS2, CH_UNIX, src, src_len2, (const void **)dest);
+       ret = convert_string_talloc(mem_ctx, CH_UTF16, CH_UNIX, src, src_len2, (const void **)dest);
        if (ret == -1) {
                *dest = NULL;
                return 0;
index 34a1935c3b01bae26cd3c67763f160c686f42e80..7f36f7e4ba6c5f9d6a4d0ec992b494335d924430 100644 (file)
@@ -421,14 +421,14 @@ NTSTATUS ndr_pull_string(struct ndr_pull *ndr, int ndr_flags, const char **s)
        uint32_t len1, ofs, len2;
        uint16_t len3;
        int ret;
-       int chset = CH_UCS2;
+       int chset = CH_UTF16;
 
        if (!(ndr_flags & NDR_SCALARS)) {
                return NT_STATUS_OK;
        }
 
        if (NDR_BE(ndr)) {
-               chset = CH_UCS2BE;
+               chset = CH_UTF16BE;
        }
 
        switch (ndr->flags & LIBNDR_STRING_FLAGS) {
@@ -600,14 +600,14 @@ NTSTATUS ndr_push_string(struct ndr_push *ndr, int ndr_flags, const char *s)
 {
        ssize_t s_len, c_len;
        int ret;
-       int chset = CH_UCS2;
+       int chset = CH_UTF16;
 
        if (!(ndr_flags & NDR_SCALARS)) {
                return NT_STATUS_OK;
        }
 
        if (NDR_BE(ndr)) {
-               chset = CH_UCS2BE;
+               chset = CH_UTF16BE;
        }
        
        s_len = s?strlen(s):0;
index e3f54bde654552294445f247b849b5ceb1bd698c..645ea105a2b1c6ea0ab2036e7f898619000b1cb5 100644 (file)
@@ -468,7 +468,7 @@ static size_t req_pull_ucs2(struct smbsrv_request *req, const char **dest, const
                src_len2 += 2;
        }
 
-       ret = convert_string_talloc(req->mem_ctx, CH_UCS2, CH_UNIX, src, src_len2, (const void **)dest);
+       ret = convert_string_talloc(req->mem_ctx, CH_UTF16, CH_UNIX, src, src_len2, (const void **)dest);
 
        if (ret == -1) {
                *dest = NULL;
index 330dcf1707b839408f2e92482f2f16b15ed640f9..879f20617fd3781a6172859e2696ad48f14b23ea 100644 (file)
@@ -50,7 +50,7 @@ static NTSTATUS unicode_open(struct smbcli_tree *tree,
        }
        SSVAL(ucs_name, i*2, 0);
 
-       i = convert_string_allocate(CH_UCS2, CH_UNIX, ucs_name, (1+u_name_len)*2, (void **)&fname);
+       i = convert_string_allocate(CH_UTF16, CH_UNIX, ucs_name, (1+u_name_len)*2, (void **)&fname);
        if (i == -1) {
                free(ucs_name);
                return NT_STATUS_NO_MEMORY;
index f98e1b1b4ad2993be43fccbb659cc6cf12518b06..6faf020ef9acef02714e161a18546907bfbb7045 100644 (file)
@@ -50,7 +50,7 @@ BOOL torture_utable(int dummy)
                SSVAL(&c2, 0, c);
                fstrcpy(fname, "\\utable\\x");
                p = fname+strlen(fname);
-               len = convert_string(CH_UCS2, CH_UNIX, 
+               len = convert_string(CH_UTF16, CH_UNIX, 
                                     &c2, 2, 
                                     p, sizeof(fname)-strlen(fname));
                p[len] = 0;
@@ -107,7 +107,7 @@ static char *form_name(int c)
        p = fname+strlen(fname);
        SSVAL(&c2, 0, c);
 
-       len = convert_string(CH_UCS2, CH_UNIX, 
+       len = convert_string(CH_UTF16, CH_UNIX, 
                             &c2, 2, 
                             p, sizeof(fname)-strlen(fname));
        p[len] = 0;
index e5f1d357e64212d60a235f2c79e47b93428a4454..fdd5be1355191d68e97516b8ae6473e05ba2887b 100644 (file)
@@ -10,6 +10,8 @@ SMB_SUBSYSTEM_MK(TORTURE_RAP,torture/config.mk)
 
 SMB_SUBSYSTEM_MK(TORTURE_AUTH,torture/config.mk)
 
+SMB_SUBSYSTEM_MK(TORTURE_LOCAL,torture/config.mk)
+
 SMB_SUBSYSTEM_MK(TORTURE_NBENCH,torture/config.mk)
 
 SMB_SUBSYSTEM_MK(TORTURE_LDAP,torture/config.mk)
index 6d1ddd4d5a78a73d0b9f414f24b152781b4d53f1..43a42217f59da1b877d45640ee049a5a25c718f5 100644 (file)
@@ -93,6 +93,16 @@ REQUIRED_SUBSYSTEMS = \
 # End SUBSYSTEM TORTURE_AUTH
 #################################
 
+#################################
+# Start SUBSYSTEM TORTURE_LOCAL
+[SUBSYSTEM::TORTURE_LOCAL]
+ADD_OBJ_FILES = \
+               torture/local/iconv.o
+REQUIRED_SUBSYSTEMS = \
+               LIBSMB
+# End SUBSYSTEM TORTURE_LOCAL
+#################################
+
 #################################
 # Start SUBSYSTEM TORTURE_NBENCH
 [SUBSYSTEM::TORTURE_NBENCH]
@@ -125,6 +135,7 @@ REQUIRED_SUBSYSTEMS = \
                TORTURE_RPC \
                TORTURE_RAP \
                TORTURE_AUTH \
+               TORTURE_LOCAL \
                TORTURE_NBENCH \
                TORTURE_LDAP \
                CONFIG \
diff --git a/source4/torture/local/iconv.c b/source4/torture/local/iconv.c
new file mode 100644 (file)
index 0000000..0867be0
--- /dev/null
@@ -0,0 +1,298 @@
+/* 
+   Unix SMB/CIFS implementation.
+
+   local testing of iconv routines. This tests the system iconv code against
+   the built-in iconv code
+
+   Copyright (C) Andrew Tridgell 2004
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "includes.h"
+
+/*
+  generate a UTF-16LE buffer for a given unicode codepoint
+*/
+static int gen_codepoint(unsigned int codepoint,
+                         char *buf, size_t *size)
+{
+       static iconv_t cd;
+       uint8_t in[4];
+       char *ptr_in;
+       size_t size_in, size_out, ret;
+       if (!cd) {
+               cd = iconv_open("UTF-16LE", "UCS-4LE");
+       }
+
+       in[0] = codepoint & 0xFF;
+       in[1] = (codepoint>>8) & 0xFF;
+       in[2] = (codepoint>>16) & 0xFF;
+       in[3] = (codepoint>>24) & 0xFF;
+
+       ptr_in = in;
+       size_in = 4;
+       size_out = 8;
+
+       ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
+
+       *size = 8 - size_out;
+
+       return ret;
+}
+
+
+/*
+  work out the unicode codepoint of the first UTF-8 character in the buffer
+*/
+static unsigned int get_codepoint(char *buf, size_t size)
+{
+       static iconv_t cd;
+       uint8_t out[4];
+       char *ptr_out;
+       size_t size_out, size_in, ret;
+       if (!cd) {
+               cd = iconv_open("UCS-4LE", "UTF-8");
+       }
+
+       size_in = size;
+       ptr_out = out;
+       size_out = sizeof(out);
+       memset(out, 0, sizeof(out));
+
+       ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
+
+       return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
+}
+
+/*
+  display a buffer with name prefix
+*/
+static void show_buf(const char *name, uint8_t *buf, size_t size)
+{
+       int i;
+       printf("%s ", name);
+       for (i=0;i<size;i++) {
+               printf("%02x ", buf[i]);
+       }
+       printf("\n");
+}
+
+/*
+  given a UTF-16LE buffer, test the system and built-in iconv code
+  to make sure they do exactly the same thing in converting the buffer
+  to UTF-8, then convert it back again and ensure we get the same buffer
+  back
+*/
+static int test_buffer(uint8_t *inbuf, size_t size)
+{
+       uint8_t buf1[1000], buf2[1000], buf3[1000];
+       size_t outsize1, outsize2, outsize3;
+       const char *ptr_in;
+       char *ptr_out;
+       size_t size_in1, size_in2, size_in3;
+       size_t ret1, ret2, ret3, len1, len2;
+       int ok = 1;
+       int errno1, errno2;
+       static iconv_t cd;
+       static smb_iconv_t cd2, cd3;
+       
+       if (!cd) {
+               cd = iconv_open("UTF-8", "UTF-16LE");
+               cd2 = smb_iconv_open("UTF-8", "UTF-16LE");
+               cd3 = smb_iconv_open("UTF-16LE", "UTF-8");
+       }
+
+#if 0
+       int i;
+       for (i=0;i<50;i++) {
+               ptr_in = inbuf;
+               ptr_out = buf1;
+               size_in1 = size;
+               outsize1 = sizeof(buf1);
+
+               memset(ptr_out, 0, outsize1);
+               errno = 0;
+               ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
+               errno1 = errno;
+       }
+#endif
+
+       ptr_in = inbuf;
+       ptr_out = buf1;
+       size_in1 = size;
+       outsize1 = sizeof(buf1);
+
+       memset(ptr_out, 0, outsize1);
+       errno = 0;
+       ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
+       errno1 = errno;
+
+       ptr_in = inbuf;
+       ptr_out = buf2;
+       size_in2 = size;
+       outsize2 = sizeof(buf2);
+       
+       memset(ptr_out, 0, outsize2);
+       errno = 0;
+       ret2 = iconv(cd, &ptr_in, &size_in2, &ptr_out, &outsize2);
+       errno2 = errno;
+
+       len1 = sizeof(buf1) - outsize1;
+       len2 = sizeof(buf2) - outsize2;
+
+       /* codepoints above 1M are not interesting for now */
+       if (len2 > len1 && 
+           memcmp(buf1, buf2, len1) == 0 && 
+           get_codepoint(buf2+len1, len2-len1) >= (1<<20)) {
+               return ok;
+       }
+       if (len1 > len2 && 
+           memcmp(buf1, buf2, len2) == 0 && 
+           get_codepoint(buf1+len2, len1-len2) >= (1<<20)) {
+               return ok;
+       }
+
+       if (ret1 != ret2) {
+               printf("ret1=%d ret2=%d\n", ret1, ret2);
+               ok = 0;
+       }
+
+       if (errno1 != errno2) {
+               printf("e1=%s e2=%s\n", strerror(errno1), strerror(errno2));
+               show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
+               show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
+               ok = 0;
+       }
+       
+       if (outsize1 != outsize2) {
+               printf("\noutsize mismatch outsize1=%d outsize2=%d\n",
+                      outsize1, outsize2);
+               ok = 0;
+       }
+       
+       if (size_in1 != size_in2) {
+               printf("\nsize_in mismatch size_in1=%d size_in2=%d\n",
+                      size_in1, size_in2);
+               ok = 0;
+       }
+
+       if (!ok ||
+           len1 != len2 ||
+           memcmp(buf1, buf2, len1) != 0) {
+               printf("\nsize=%d ret1=%d ret2=%d\n", size, ret1, ret2);
+               show_buf(" IN1:", inbuf, size-size_in1);
+               show_buf(" IN2:", inbuf, size-size_in2);
+               show_buf("OUT1:", buf1, len1);
+               show_buf("OUT2:", buf2, len2);
+               if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
+                       printf("next codepoint is %u\n", get_codepoint(buf2+len1, len2-len1));
+               }
+               if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
+                       printf("next codepoint is %u\n", get_codepoint(buf1+len2,len1-len2));
+               }
+
+               ok = 0;
+       }
+       
+       if (!ok) return ok;
+
+       size = size - size_in1;
+       ptr_in = buf1;
+       ptr_out = buf3;
+       size_in3 = len1;
+       outsize3 = sizeof(buf3);
+
+       memset(ptr_out, 0, outsize3);
+       ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
+       
+       if (ret3 != 0) {
+               printf("pull failed - %s\n", strerror(errno));
+               ok = 0;
+       }
+       
+       if (outsize3 != sizeof(buf3) - size) {
+               printf("wrong outsize3 - %d should be %d\n", 
+                      outsize3, sizeof(buf3) - size);
+               ok = 0;
+       }
+       
+       if (memcmp(buf3, inbuf, size) != 0) {
+               int i;
+               printf("pull bytes mismatch:\n");
+               for (i=0;i<size;i++) {
+                       printf("%02x ", inbuf[i]);
+               }
+               printf("\n");
+               for (i=0;i<size;i++) {
+                       printf("%02x ", buf3[i]);
+               }
+               printf("\n");
+               ok = 0;
+       }
+
+       return ok;
+}
+
+BOOL torture_local_iconv(int dummy) 
+{
+       size_t size;
+       unsigned char inbuf[1000];
+       int ok = 1;
+       unsigned int codepoint, i, c;
+
+       srandom(time(NULL));
+
+       printf("Testing first 1M codepoints\n");
+       for (codepoint=0;ok && codepoint<(1<<20);codepoint++) {
+               if (gen_codepoint(codepoint, inbuf, &size) != 0) {
+                       continue;
+               }
+
+               if (codepoint % 100 == 0) {
+                       printf("codepoint=%u   \r", codepoint);
+               }
+
+               ok = test_buffer(inbuf, size);
+       }
+
+
+       printf("Testing 5M random UTF-16LE sequences\n");
+       for (i=0;ok && i<500000;i++) {
+               if (i % 100 == 0) {
+                       printf("i=%u              \r", i);
+               }
+
+               size = random() % 100;
+               for (c=0;c<size;c++) {
+                       if (random() % 100 < 80) {
+                               inbuf[c] = random() % 128;
+                       } else {
+                               inbuf[c] = random();
+                       }
+                       if (random() % 10 == 0) {
+                               inbuf[c] |= 0xd8;
+                       }
+                       if (random() % 10 == 0) {
+                               inbuf[c] |= 0xdc;
+                       }
+               }
+               ok = test_buffer(inbuf, size);
+       }
+
+       return ok == 1;
+}
+
+
index 9488c987eca8959d2a190296b328d00c6bddebdb..e3a7d8e5d88935730ff6dcee510ec4eab8d2fffc 100644 (file)
@@ -4212,8 +4212,9 @@ static struct {
        {"RPC-MULTIBIND", torture_multi_bind, 0},
        {"RPC-DRSUAPI", torture_rpc_drsuapi, 0},
 
-       /* crypto testers */
-       {"CRYPT-NTLMSSP", torture_ntlmssp_self_check, 0},
+       /* local (no server) testers */
+       {"LOCAL-NTLMSSP", torture_ntlmssp_self_check, 0},
+       {"LOCAL-ICONV", torture_local_iconv, 0},
 
        /* ldap testers */
        {"LDAP-BASIC", torture_ldap_basic, 0},