s3-charcnv: Move convert_string() et al to lib/util/charset
authorAndrew Bartlett <abartlet@samba.org>
Tue, 12 Apr 2011 04:01:41 +0000 (14:01 +1000)
committerAndrew Tridgell <tridge@samba.org>
Wed, 13 Apr 2011 04:47:08 +0000 (14:47 +1000)
This is the first step to this being the common convert_string
implementation.

Andrew Bartlett

Signed-off-by: Andrew Tridgell <tridge@samba.org>
lib/util/charset/convert_string.c [new file with mode: 0644]
source3/Makefile.in
source3/include/proto.h
source3/lib/charcnv.c
source3/wscript_build

diff --git a/lib/util/charset/convert_string.c b/lib/util/charset/convert_string.c
new file mode 100644 (file)
index 0000000..86bb625
--- /dev/null
@@ -0,0 +1,467 @@
+/*
+   Unix SMB/CIFS implementation.
+   Character set conversion Extensions
+   Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
+   Copyright (C) Andrew Tridgell 2001
+   Copyright (C) Simo Sorce 2001
+   Copyright (C) Martin Pool 2003
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+*/
+#include "includes.h"
+
+/**
+ * @file
+ *
+ * @brief Character-set conversion routines built on our iconv.
+ *
+ * @note Samba's internal character set (at least in the 3.0 series)
+ * is always the same as the one for the Unix filesystem.  It is
+ * <b>not</b> necessarily UTF-8 and may be different on machines that
+ * need i18n filenames to be compatible with Unix software.  It does
+ * have to be a superset of ASCII.  All multibyte sequences must start
+ * with a byte with the high bit set.
+ *
+ * @sa lib/iconv.c
+ */
+
+
+/**
+ * Convert string from one encoding to another, making error checking etc
+ * Slow path version - uses (slow) iconv.
+ *
+ * @param src pointer to source string (multibyte or singlebyte)
+ * @param srclen length of the source string in bytes
+ * @param dest pointer to destination string (multibyte or singlebyte)
+ * @param destlen maximal length allowed for string
+ * @param converted size is the number of bytes occupied in the destination
+ *
+ * @returns false and sets errno on fail, true on success.
+ *
+ * Ensure the srclen contains the terminating zero.
+ *
+ **/
+
+static bool convert_string_internal(charset_t from, charset_t to,
+                     void const *src, size_t srclen,
+                     void *dest, size_t destlen, size_t *converted_size)
+{
+       size_t i_len, o_len;
+       size_t retval;
+       const char* inbuf = (const char*)src;
+       char* outbuf = (char*)dest;
+       smb_iconv_t descriptor;
+       struct smb_iconv_handle *ic;
+
+       lazy_initialize_conv();
+       ic = get_iconv_handle();
+       descriptor = get_conv_handle(ic, from, to);
+
+       if (srclen == (size_t)-1) {
+               if (from == CH_UTF16LE || from == CH_UTF16BE) {
+                       srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
+               } else {
+                       srclen = strlen((const char *)src)+1;
+               }
+       }
+
+
+       if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
+               errno = EINVAL;
+               return false;
+       }
+
+       i_len=srclen;
+       o_len=destlen;
+
+       retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
+       if (retval == (size_t)-1) {
+               return false;
+       }
+       *converted_size = destlen-o_len;
+       return true;
+}
+
+/**
+ * Convert string from one encoding to another, making error checking etc
+ * Fast path version - handles ASCII first.
+ *
+ * @param src pointer to source string (multibyte or singlebyte)
+ * @param srclen length of the source string in bytes, or -1 for nul terminated.
+ * @param dest pointer to destination string (multibyte or singlebyte)
+ * @param destlen maximal length allowed for string - *NEVER* -1.
+ * @param converted size is the number of bytes occupied in the destination
+ *
+ * @returns false and sets errno on fail, true on success.
+ *
+ * Ensure the srclen contains the terminating zero.
+ *
+ * This function has been hand-tuned to provide a fast path.
+ * Don't change unless you really know what you are doing. JRA.
+ **/
+
+bool convert_string_error(charset_t from, charset_t to,
+                           void const *src, size_t srclen,
+                           void *dest, size_t destlen,
+                           size_t *converted_size)
+{
+       /*
+        * NB. We deliberately don't do a strlen here if srclen == -1.
+        * This is very expensive over millions of calls and is taken
+        * care of in the slow path in convert_string_internal. JRA.
+        */
+
+#ifdef DEVELOPER
+       SMB_ASSERT(destlen != (size_t)-1);
+#endif
+
+       if (srclen == 0) {
+               *converted_size = 0;
+               return true;
+       }
+
+       if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
+               const unsigned char *p = (const unsigned char *)src;
+               unsigned char *q = (unsigned char *)dest;
+               size_t slen = srclen;
+               size_t dlen = destlen;
+               unsigned char lastp = '\0';
+               size_t retval = 0;
+
+               /* If all characters are ascii, fast path here. */
+               while (slen && dlen) {
+                       if ((lastp = *p) <= 0x7f) {
+                               *q++ = *p++;
+                               if (slen != (size_t)-1) {
+                                       slen--;
+                               }
+                               dlen--;
+                               retval++;
+                               if (!lastp)
+                                       break;
+                       } else {
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+                               goto general_case;
+#else
+                               bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
+                               *converted_size += retval;
+                               return ret;
+#endif
+                       }
+               }
+
+               *converted_size = retval;
+
+               if (!dlen) {
+                       /* Even if we fast path we should note if we ran out of room. */
+                       if (((slen != (size_t)-1) && slen) ||
+                                       ((slen == (size_t)-1) && lastp)) {
+                               errno = E2BIG;
+                               return false;
+                       }
+               }
+               return true;
+       } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
+               const unsigned char *p = (const unsigned char *)src;
+               unsigned char *q = (unsigned char *)dest;
+               size_t retval = 0;
+               size_t slen = srclen;
+               size_t dlen = destlen;
+               unsigned char lastp = '\0';
+
+               /* If all characters are ascii, fast path here. */
+               while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
+                       if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
+                               *q++ = *p;
+                               if (slen != (size_t)-1) {
+                                       slen -= 2;
+                               }
+                               p += 2;
+                               dlen--;
+                               retval++;
+                               if (!lastp)
+                                       break;
+                       } else {
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+                               goto general_case;
+#else
+                               bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
+                               *converted_size += retval;
+                               return ret;
+#endif
+                       }
+               }
+
+               *converted_size = retval;
+
+               if (!dlen) {
+                       /* Even if we fast path we should note if we ran out of room. */
+                       if (((slen != (size_t)-1) && slen) ||
+                                       ((slen == (size_t)-1) && lastp)) {
+                               errno = E2BIG;
+                               return false;
+                       }
+               }
+               return true;
+       } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
+               const unsigned char *p = (const unsigned char *)src;
+               unsigned char *q = (unsigned char *)dest;
+               size_t retval = 0;
+               size_t slen = srclen;
+               size_t dlen = destlen;
+               unsigned char lastp = '\0';
+
+               /* If all characters are ascii, fast path here. */
+               while (slen && (dlen >= 2)) {
+                       if ((lastp = *p) <= 0x7F) {
+                               *q++ = *p++;
+                               *q++ = '\0';
+                               if (slen != (size_t)-1) {
+                                       slen--;
+                               }
+                               dlen -= 2;
+                               retval += 2;
+                               if (!lastp)
+                                       break;
+                       } else {
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+                               goto general_case;
+#else
+                               bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
+                               *converted_size += retval;
+                               return ret;
+#endif
+                       }
+               }
+
+               *converted_size = retval;
+
+               if (!dlen) {
+                       /* Even if we fast path we should note if we ran out of room. */
+                       if (((slen != (size_t)-1) && slen) ||
+                                       ((slen == (size_t)-1) && lastp)) {
+                               errno = E2BIG;
+                               return false;
+                       }
+               }
+               return true;
+       }
+
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+  general_case:
+#endif
+       return convert_string_internal(from, to, src, srclen, dest, destlen, converted_size);
+}
+
+bool convert_string(charset_t from, charset_t to,
+                     void const *src, size_t srclen,
+                     void *dest, size_t destlen,
+                     size_t *converted_size)
+{
+       bool ret = convert_string_error(from, to, src, srclen, dest, destlen, converted_size);
+
+       if(ret==false) {
+               const char *reason="unknown error";
+               switch(errno) {
+                       case EINVAL:
+                               reason="Incomplete multibyte sequence";
+                               DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",
+                                        reason, (const char *)src));
+                               break;
+                       case E2BIG:
+                       {
+                               struct smb_iconv_handle *ic;
+                               lazy_initialize_conv();
+                               ic = get_iconv_handle();
+
+                               reason="No more room";
+                               if (from == CH_UNIX) {
+                                       DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
+                                                charset_name(ic, from), charset_name(ic, to),
+                                                (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
+                               } else {
+                                       DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
+                                                charset_name(ic, from), charset_name(ic, to),
+                                                (unsigned int)srclen, (unsigned int)destlen));
+                               }
+                               break;
+                       }
+                       case EILSEQ:
+                               reason="Illegal multibyte sequence";
+                               DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",
+                                        reason, (const char *)src));
+                               break;
+                       default:
+                               DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",
+                                        reason, (const char *)src));
+                               break;
+               }
+               /* smb_panic(reason); */
+       }
+       return ret;
+}
+
+
+/**
+ * Convert between character sets, allocating a new buffer using talloc for the result.
+ *
+ * @param srclen length of source buffer.
+ * @param dest always set at least to NULL
+ * @parm converted_size set to the number of bytes occupied by the string in
+ * the destination on success.
+ * @note -1 is not accepted for srclen.
+ *
+ * @return true if new buffer was correctly allocated, and string was
+ * converted.
+ *
+ * Ensure the srclen contains the terminating zero.
+ *
+ * I hate the goto's in this function. It's embarressing.....
+ * There has to be a cleaner way to do this. JRA.
+ */
+bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
+                          void const *src, size_t srclen, void *dst,
+                          size_t *converted_size)
+
+{
+       size_t i_len, o_len, destlen = (srclen * 3) / 2;
+       size_t retval;
+       const char *inbuf = (const char *)src;
+       char *outbuf = NULL, *ob = NULL;
+       smb_iconv_t descriptor;
+       void **dest = (void **)dst;
+       struct smb_iconv_handle *ic;
+
+       *dest = NULL;
+
+       if (src == NULL || srclen == (size_t)-1) {
+               errno = EINVAL;
+               return false;
+       }
+
+       if (srclen == 0) {
+               /* We really should treat this as an error, but
+                  there are too many callers that need this to
+                  return a NULL terminated string in the correct
+                  character set. */
+               if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
+                       destlen = 2;
+               } else {
+                       destlen = 1;
+               }
+               ob = talloc_zero_array(ctx, char, destlen);
+               if (ob == NULL) {
+                       errno = ENOMEM;
+                       return false;
+               }
+               *converted_size = destlen;
+               *dest = ob;
+               return true;
+       }
+
+       lazy_initialize_conv();
+       ic = get_iconv_handle();
+       descriptor = get_conv_handle(ic, from, to);
+
+       if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
+               DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
+               errno = EOPNOTSUPP;
+               return false;
+       }
+
+  convert:
+
+       /* +2 is for ucs2 null termination. */
+       if ((destlen*2)+2 < destlen) {
+               /* wrapped ! abort. */
+               DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
+               TALLOC_FREE(outbuf);
+               errno = EOPNOTSUPP;
+               return false;
+       } else {
+               destlen = destlen * 2;
+       }
+
+       /* +2 is for ucs2 null termination. */
+       ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
+
+       if (!ob) {
+               DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
+               errno = ENOMEM;
+               return false;
+       }
+       outbuf = ob;
+       i_len = srclen;
+       o_len = destlen;
+
+       retval = smb_iconv(descriptor,
+                          &inbuf, &i_len,
+                          &outbuf, &o_len);
+       if(retval == (size_t)-1)                {
+               const char *reason="unknown error";
+               switch(errno) {
+                       case EINVAL:
+                               reason="Incomplete multibyte sequence";
+                               DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
+                               break;
+                       case E2BIG:
+                               goto convert;
+                       case EILSEQ:
+                               reason="Illegal multibyte sequence";
+                               DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
+                               break;
+               }
+               DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
+               /* smb_panic(reason); */
+               TALLOC_FREE(ob);
+               return false;
+       }
+
+       destlen = destlen - o_len;
+       /* Don't shrink unless we're reclaiming a lot of
+        * space. This is in the hot codepath and these
+        * reallocs *cost*. JRA.
+        */
+       if (o_len > 1024) {
+               /* We're shrinking here so we know the +2 is safe from wrap. */
+               ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
+       }
+
+       if (destlen && !ob) {
+               DEBUG(0, ("convert_string_talloc: out of memory!\n"));
+               errno = ENOMEM;
+               return false;
+       }
+
+       *dest = ob;
+
+       /* Must ucs2 null terminate in the extra space we allocated. */
+       ob[destlen] = '\0';
+       ob[destlen+1] = '\0';
+
+       /* Ensure we can never return a *converted_size of zero. */
+       if (destlen == 0) {
+               /* As we're now returning false on a bad smb_iconv call,
+                  this should never happen. But be safe anyway. */
+               if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
+                       destlen = 2;
+               } else {
+                       destlen = 1;
+               }
+       }
+
+       *converted_size = destlen;
+       return true;
+}
index 2c74ff8cf43560944a4c7791c9901de72b6d5598..c20323c0786d939489d639dae8bdc63b60a7868b 100644 (file)
@@ -442,7 +442,7 @@ LIB_OBJ = $(LIBSAMBAUTIL_OBJ) $(UTIL_OBJ) $(CRYPTO_OBJ) \
          lib/util_transfer_file.o ../lib/async_req/async_sock.o \
          lib/addrchange.o \
          $(TDB_LIB_OBJ) \
-         $(VERSION_OBJ) lib/charcnv.o lib/fstring.o ../lib/util/debug.o ../lib/util/debug_s3.o ../lib/util/fault.o \
+         $(VERSION_OBJ) lib/charcnv.o ../lib/util/charset/convert_string.o lib/fstring.o ../lib/util/debug.o ../lib/util/debug_s3.o ../lib/util/fault.o \
          lib/interface.o lib/pidfile.o lib/dumpcore.o \
          lib/system.o lib/sendfile.o lib/recvfile.o lib/time.o \
          lib/username.o \
index 3f44b949f2e7441a8e44040af2ab4a8e6d631f5f..5b96bdfbeb45d742ea3f87ca9d84888336fccee2 100644 (file)
@@ -72,6 +72,7 @@ int bitmap_find(struct bitmap *bm, unsigned ofs);
 
 /* The following definitions come from lib/charcnv.c  */
 
+void lazy_initialize_conv(void);
 void gfree_charcnv(void);
 void init_iconv(void);
 bool convert_string(charset_t from, charset_t to,
index a8719070c3299207f93500b574c9bf629d64f6a6..5c01052d21003844c957b08d2af244c71072e7b1 100644 (file)
 */
 #include "includes.h"
 
-/**
- * @file
- *
- * @brief Character-set conversion routines built on our iconv.
- *
- * @note Samba's internal character set (at least in the 3.0 series)
- * is always the same as the one for the Unix filesystem.  It is
- * <b>not</b> necessarily UTF-8 and may be different on machines that
- * need i18n filenames to be compatible with Unix software.  It does
- * have to be a superset of ASCII.  All multibyte sequences must start
- * with a byte with the high bit set.
- *
- * @sa lib/iconv.c
- */
-
-
 static bool initialized;
 
-static void lazy_initialize_conv(void)
+void lazy_initialize_conv(void)
 {
        if (!initialized) {
                load_case_tables_library();
@@ -72,434 +56,6 @@ void init_iconv(void)
                                                                true, global_iconv_handle);
 }
 
-/**
- * Convert string from one encoding to another, making error checking etc
- * Slow path version - uses (slow) iconv.
- *
- * @param src pointer to source string (multibyte or singlebyte)
- * @param srclen length of the source string in bytes
- * @param dest pointer to destination string (multibyte or singlebyte)
- * @param destlen maximal length allowed for string
- * @param converted size is the number of bytes occupied in the destination
- *
- * @returns false and sets errno on fail, true on success.
- *
- * Ensure the srclen contains the terminating zero.
- *
- **/
-
-static bool convert_string_internal(charset_t from, charset_t to,
-                     void const *src, size_t srclen, 
-                     void *dest, size_t destlen, size_t *converted_size)
-{
-       size_t i_len, o_len;
-       size_t retval;
-       const char* inbuf = (const char*)src;
-       char* outbuf = (char*)dest;
-       smb_iconv_t descriptor;
-       struct smb_iconv_handle *ic;
-
-       lazy_initialize_conv();
-       ic = get_iconv_handle();
-       descriptor = get_conv_handle(ic, from, to);
-
-       if (srclen == (size_t)-1) {
-               if (from == CH_UTF16LE || from == CH_UTF16BE) {
-                       srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
-               } else {
-                       srclen = strlen((const char *)src)+1;
-               }
-       }
-
-
-       if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
-               errno = EINVAL;
-               return false;
-       }
-
-       i_len=srclen;
-       o_len=destlen;
-
-       retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
-       if (retval == (size_t)-1) {
-               return false;
-       }
-       *converted_size = destlen-o_len;
-       return true;
-}
-
-/**
- * Convert string from one encoding to another, making error checking etc
- * Fast path version - handles ASCII first.
- *
- * @param src pointer to source string (multibyte or singlebyte)
- * @param srclen length of the source string in bytes, or -1 for nul terminated.
- * @param dest pointer to destination string (multibyte or singlebyte)
- * @param destlen maximal length allowed for string - *NEVER* -1.
- * @param converted size is the number of bytes occupied in the destination
- *
- * @returns false and sets errno on fail, true on success.
- *
- * Ensure the srclen contains the terminating zero.
- *
- * This function has been hand-tuned to provide a fast path.
- * Don't change unless you really know what you are doing. JRA.
- **/
-
-bool convert_string_error(charset_t from, charset_t to,
-                           void const *src, size_t srclen,
-                           void *dest, size_t destlen,
-                           size_t *converted_size)
-{
-       /*
-        * NB. We deliberately don't do a strlen here if srclen == -1.
-        * This is very expensive over millions of calls and is taken
-        * care of in the slow path in convert_string_internal. JRA.
-        */
-
-#ifdef DEVELOPER
-       SMB_ASSERT(destlen != (size_t)-1);
-#endif
-
-       if (srclen == 0) {
-               *converted_size = 0;
-               return true;
-       }
-
-       if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
-               const unsigned char *p = (const unsigned char *)src;
-               unsigned char *q = (unsigned char *)dest;
-               size_t slen = srclen;
-               size_t dlen = destlen;
-               unsigned char lastp = '\0';
-               size_t retval = 0;
-
-               /* If all characters are ascii, fast path here. */
-               while (slen && dlen) {
-                       if ((lastp = *p) <= 0x7f) {
-                               *q++ = *p++;
-                               if (slen != (size_t)-1) {
-                                       slen--;
-                               }
-                               dlen--;
-                               retval++;
-                               if (!lastp)
-                                       break;
-                       } else {
-#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
-                               goto general_case;
-#else
-                               bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
-                               *converted_size += retval;
-                               return ret;
-#endif
-                       }
-               }
-
-               *converted_size = retval;
-
-               if (!dlen) {
-                       /* Even if we fast path we should note if we ran out of room. */
-                       if (((slen != (size_t)-1) && slen) ||
-                                       ((slen == (size_t)-1) && lastp)) {
-                               errno = E2BIG;
-                               return false;
-                       }
-               }
-               return true;
-       } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
-               const unsigned char *p = (const unsigned char *)src;
-               unsigned char *q = (unsigned char *)dest;
-               size_t retval = 0;
-               size_t slen = srclen;
-               size_t dlen = destlen;
-               unsigned char lastp = '\0';
-
-               /* If all characters are ascii, fast path here. */
-               while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
-                       if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
-                               *q++ = *p;
-                               if (slen != (size_t)-1) {
-                                       slen -= 2;
-                               }
-                               p += 2;
-                               dlen--;
-                               retval++;
-                               if (!lastp)
-                                       break;
-                       } else {
-#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
-                               goto general_case;
-#else
-                               bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
-                               *converted_size += retval;
-                               return ret;
-#endif
-                       }
-               }
-
-               *converted_size = retval;
-
-               if (!dlen) {
-                       /* Even if we fast path we should note if we ran out of room. */
-                       if (((slen != (size_t)-1) && slen) ||
-                                       ((slen == (size_t)-1) && lastp)) {
-                               errno = E2BIG;
-                               return false;
-                       }
-               }
-               return true;
-       } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
-               const unsigned char *p = (const unsigned char *)src;
-               unsigned char *q = (unsigned char *)dest;
-               size_t retval = 0;
-               size_t slen = srclen;
-               size_t dlen = destlen;
-               unsigned char lastp = '\0';
-
-               /* If all characters are ascii, fast path here. */
-               while (slen && (dlen >= 2)) {
-                       if ((lastp = *p) <= 0x7F) {
-                               *q++ = *p++;
-                               *q++ = '\0';
-                               if (slen != (size_t)-1) {
-                                       slen--;
-                               }
-                               dlen -= 2;
-                               retval += 2;
-                               if (!lastp)
-                                       break;
-                       } else {
-#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
-                               goto general_case;
-#else
-                               bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
-                               *converted_size += retval;
-                               return ret;
-#endif
-                       }
-               }
-
-               *converted_size = retval;
-
-               if (!dlen) {
-                       /* Even if we fast path we should note if we ran out of room. */
-                       if (((slen != (size_t)-1) && slen) ||
-                                       ((slen == (size_t)-1) && lastp)) {
-                               errno = E2BIG;
-                               return false;
-                       }
-               }
-               return true;
-       }
-
-#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
-  general_case:
-#endif
-       return convert_string_internal(from, to, src, srclen, dest, destlen, converted_size);
-}
-
-bool convert_string(charset_t from, charset_t to,
-                     void const *src, size_t srclen,
-                     void *dest, size_t destlen,
-                     size_t *converted_size)
-{
-       bool ret = convert_string_error(from, to, src, srclen, dest, destlen, converted_size);
-
-       if(ret==false) {
-               const char *reason="unknown error";
-               switch(errno) {
-                       case EINVAL:
-                               reason="Incomplete multibyte sequence";
-                               DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",
-                                        reason, (const char *)src));
-                               break;
-                       case E2BIG:
-                       {
-                               struct smb_iconv_handle *ic;
-                               lazy_initialize_conv();
-                               ic = get_iconv_handle();
-
-                               reason="No more room";
-                               if (from == CH_UNIX) {
-                                       DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
-                                                charset_name(ic, from), charset_name(ic, to),
-                                                (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
-                               } else {
-                                       DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
-                                                charset_name(ic, from), charset_name(ic, to),
-                                                (unsigned int)srclen, (unsigned int)destlen));
-                               }
-                               break;
-                       }
-                       case EILSEQ:
-                               reason="Illegal multibyte sequence";
-                               DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",
-                                        reason, (const char *)src));
-                               break;
-                       default:
-                               DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",
-                                        reason, (const char *)src));
-                               break;
-               }
-               /* smb_panic(reason); */
-       }
-       return ret;
-}
-
-
-/**
- * Convert between character sets, allocating a new buffer using talloc for the result.
- *
- * @param srclen length of source buffer.
- * @param dest always set at least to NULL
- * @parm converted_size set to the number of bytes occupied by the string in
- * the destination on success.
- * @note -1 is not accepted for srclen.
- *
- * @return true if new buffer was correctly allocated, and string was
- * converted.
- *
- * Ensure the srclen contains the terminating zero.
- *
- * I hate the goto's in this function. It's embarressing.....
- * There has to be a cleaner way to do this. JRA.
- */
-bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
-                          void const *src, size_t srclen, void *dst,
-                          size_t *converted_size)
-
-{
-       size_t i_len, o_len, destlen = (srclen * 3) / 2;
-       size_t retval;
-       const char *inbuf = (const char *)src;
-       char *outbuf = NULL, *ob = NULL;
-       smb_iconv_t descriptor;
-       void **dest = (void **)dst;
-       struct smb_iconv_handle *ic;
-
-       *dest = NULL;
-
-       if (src == NULL || srclen == (size_t)-1) {
-               errno = EINVAL;
-               return false;
-       }
-
-       if (srclen == 0) {
-               /* We really should treat this as an error, but
-                  there are too many callers that need this to
-                  return a NULL terminated string in the correct
-                  character set. */
-               if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
-                       destlen = 2;
-               } else {
-                       destlen = 1;
-               }
-               ob = talloc_zero_array(ctx, char, destlen);
-               if (ob == NULL) {
-                       errno = ENOMEM;
-                       return false;
-               }
-               *converted_size = destlen;
-               *dest = ob;
-               return true;
-       }
-
-       lazy_initialize_conv();
-       ic = get_iconv_handle();
-       descriptor = get_conv_handle(ic, from, to);
-
-       if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
-               DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
-               errno = EOPNOTSUPP;
-               return false;
-       }
-
-  convert:
-
-       /* +2 is for ucs2 null termination. */
-       if ((destlen*2)+2 < destlen) {
-               /* wrapped ! abort. */
-               DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
-               TALLOC_FREE(outbuf);
-               errno = EOPNOTSUPP;
-               return false;
-       } else {
-               destlen = destlen * 2;
-       }
-
-       /* +2 is for ucs2 null termination. */
-       ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
-
-       if (!ob) {
-               DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
-               errno = ENOMEM;
-               return false;
-       }
-       outbuf = ob;
-       i_len = srclen;
-       o_len = destlen;
-
-       retval = smb_iconv(descriptor,
-                          &inbuf, &i_len,
-                          &outbuf, &o_len);
-       if(retval == (size_t)-1)                {
-               const char *reason="unknown error";
-               switch(errno) {
-                       case EINVAL:
-                               reason="Incomplete multibyte sequence";
-                               DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
-                               break;
-                       case E2BIG:
-                               goto convert;
-                       case EILSEQ:
-                               reason="Illegal multibyte sequence";
-                               DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
-                               break;
-               }
-               DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
-               /* smb_panic(reason); */
-               TALLOC_FREE(ob);
-               return false;
-       }
-
-       destlen = destlen - o_len;
-       /* Don't shrink unless we're reclaiming a lot of
-        * space. This is in the hot codepath and these
-        * reallocs *cost*. JRA.
-        */
-       if (o_len > 1024) {
-               /* We're shrinking here so we know the +2 is safe from wrap. */
-               ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
-       }
-
-       if (destlen && !ob) {
-               DEBUG(0, ("convert_string_talloc: out of memory!\n"));
-               errno = ENOMEM;
-               return false;
-       }
-
-       *dest = ob;
-
-       /* Must ucs2 null terminate in the extra space we allocated. */
-       ob[destlen] = '\0';
-       ob[destlen+1] = '\0';
-
-       /* Ensure we can never return a *converted_size of zero. */
-       if (destlen == 0) {
-               /* As we're now returning false on a bad smb_iconv call,
-                  this should never happen. But be safe anyway. */
-               if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
-                       destlen = 2;
-               } else {
-                       destlen = 1;
-               }
-       }
-
-       *converted_size = destlen;
-       return true;
-}
-
 bool unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 {
        size_t size;
index 71d91388dd81966a5e36d245b41a2bf84a2302b3..6eb0c4c4641c66c4090bf974074a111caf1d80a0 100755 (executable)
@@ -963,7 +963,7 @@ bld.SAMBA3_SUBSYSTEM('tdb-wrap3',
                     vars=locals())
 
 bld.SAMBA3_SUBSYSTEM('CHARSET3',
-                    source='''lib/util_str.c lib/charcnv.c lib/fstring.c''',
+                    source='''lib/util_str.c lib/charcnv.c  ../lib/util/charset/convert_string.c lib/fstring.c''',
                     public_deps='ICONV_WRAPPER CODEPOINTS',
                     deps='DYNCONFIG')