Imported Upstream version 4.0.0+dfsg1
[abartlet/samba-debian.git] / lib / util / charset / iconv.c
index 1adeb5221aa2437594f6ffee6dac96e0bd52beed..1c507b4b13707d1fd3142d09ef79d58593c1ed69 100644 (file)
 #include "../lib/util/dlinklist.h"
 #include "system/iconv.h"
 #include "system/filesys.h"
+#include "charset_proto.h"
 
 #ifdef strcasecmp
 #undef strcasecmp
 #endif
 
-#ifdef static_decl_charset
-static_decl_charset;
-#endif
-
 /**
  * @file
  *
@@ -56,6 +53,7 @@ static_decl_charset;
 
 static size_t ascii_pull  (void *,const char **, size_t *, char **, size_t *);
 static size_t ascii_push  (void *,const char **, size_t *, char **, size_t *);
+static size_t latin1_pull(void *,const char **, size_t *, char **, size_t *);
 static size_t latin1_push(void *,const char **, size_t *, char **, size_t *);
 static size_t utf8_pull   (void *,const char **, size_t *, char **, size_t *);
 static size_t utf8_push   (void *,const char **, size_t *, char **, size_t *);
@@ -77,65 +75,20 @@ static const struct charset_functions builtin_functions[] = {
        {"UTF-8",   utf8_pull,  utf8_push},
 
        /* this handles the munging needed for String2Key */
-       {"UTF16_MUNGED",   utf16_munged_pull,  iconv_copy},
+       {"UTF16_MUNGED",   utf16_munged_pull,  iconv_copy, true},
 
        {"ASCII", ascii_pull, ascii_push},
        {"646", ascii_pull, ascii_push},
-       {"ISO-8859-1", ascii_pull, latin1_push},
-       {"UCS2-HEX", ucs2hex_pull, ucs2hex_push}
-};
-
-static struct charset_functions *charsets = NULL;
-
-static struct charset_functions *find_charset_functions(const char *name)
-{
-       struct charset_functions *c;
-
-       /* Check whether we already have this charset... */
-       for (c = charsets; c != NULL; c = c->next) {
-               if(strcasecmp(c->name, name) == 0) { 
-                       return c;
-               }
-       }
-
-       return NULL;
-}
-
-bool smb_register_charset(const struct charset_functions *funcs_in)
-{
-       struct charset_functions *funcs;
-
-       DEBUG(5, ("Attempting to register new charset %s\n", funcs_in->name));
-       /* Check whether we already have this charset... */
-       if (find_charset_functions(funcs_in->name)) {
-               DEBUG(0, ("Duplicate charset %s, not registering\n", funcs_in->name));
-               return false;
-       }
-
-       funcs = talloc(NULL, struct charset_functions);
-       if (!funcs) {
-               DEBUG(0, ("Out of memory duplicating charset %s\n", funcs_in->name));
-               return false;
-       }
-       *funcs = *funcs_in;
-
-       funcs->next = funcs->prev = NULL;
-       DEBUG(5, ("Registered charset %s\n", funcs->name));
-       DLIST_ADD(charsets, funcs);
-       return true;
-}
-
-static void lazy_initialize_iconv(void)
-{
-       static bool initialized;
-
-#ifdef static_init_charset
-       if (!initialized) {
-               static_init_charset;
-               initialized = true;
-       }
+       {"ISO-8859-1", latin1_pull, latin1_push},
+#ifdef DEVELOPER       
+       {"WEIRD", weird_pull, weird_push, true},
 #endif
-}
+#ifdef DARWINOS
+       {"MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push, true},
+#endif
+       {"UCS2-HEX", ucs2hex_pull, ucs2hex_push, true}
+
+};
 
 #ifdef HAVE_NATIVE_ICONV
 /* if there was an error then reset the internal state,
@@ -163,32 +116,60 @@ _PUBLIC_ size_t smb_iconv(smb_iconv_t cd,
                 const char **inbuf, size_t *inbytesleft,
                 char **outbuf, size_t *outbytesleft)
 {
-       char cvtbuf[2048];
-       size_t bufsize;
-
        /* in many cases we can go direct */
        if (cd->direct) {
                return cd->direct(cd->cd_direct, 
                                  inbuf, inbytesleft, outbuf, outbytesleft);
        }
 
-
        /* otherwise we have to do it chunks at a time */
-       while (*inbytesleft > 0) {
-               char *bufp1 = cvtbuf;
-               const char *bufp2 = cvtbuf;
+       {
+#ifndef SMB_ICONV_BUFSIZE
+#define SMB_ICONV_BUFSIZE 2048
+#endif
+               TALLOC_CTX *mem_ctx;
+               size_t bufsize;
+               char *cvtbuf;
 
-               bufsize = sizeof(cvtbuf);
-               
-               if (cd->pull(cd->cd_pull, 
-                            inbuf, inbytesleft, &bufp1, &bufsize) == -1
-                   && errno != E2BIG) return -1;
+#if _SAMBA_BUILD_ == 3
+               mem_ctx = talloc_tos();
+#else
+               mem_ctx = cd;
+#endif
+               cvtbuf = talloc_array(mem_ctx, char, SMB_ICONV_BUFSIZE);
+
+               if (!cvtbuf) {
+                       return (size_t)-1;
+               }
 
-               bufsize = sizeof(cvtbuf) - bufsize;
+               while (*inbytesleft > 0) {
+                       char *bufp1 = cvtbuf;
+                       const char *bufp2 = cvtbuf;
+                       int saved_errno = errno;
+                       bool pull_failed = false;
+                       bufsize = SMB_ICONV_BUFSIZE;
+
+                       if (cd->pull(cd->cd_pull,
+                                    inbuf, inbytesleft, &bufp1, &bufsize) == -1
+                           && errno != E2BIG) {
+                               saved_errno = errno;
+                               pull_failed = true;
+                       }
 
-               if (cd->push(cd->cd_push, 
-                            &bufp2, &bufsize, 
-                            outbuf, outbytesleft) == -1) return -1;
+                       bufsize = SMB_ICONV_BUFSIZE - bufsize;
+
+                       if (cd->push(cd->cd_push,
+                                    &bufp2, &bufsize,
+                                    outbuf, outbytesleft) == -1) {
+                               talloc_free(cvtbuf);
+                               return -1;
+                       } else if (pull_failed) {
+                               /* We want the pull errno if possible */
+                               errno = saved_errno;
+                               return -1;
+                       }
+               }
+               talloc_free(cvtbuf);
        }
 
        return 0;
@@ -215,14 +196,12 @@ static int smb_iconv_t_destructor(smb_iconv_t hwd)
 }
 
 _PUBLIC_ smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode, 
-                             const char *fromcode, bool native_iconv)
+                             const char *fromcode, bool use_builtin_handlers)
 {
        smb_iconv_t ret;
        const struct charset_functions *from=NULL, *to=NULL;
        int i;
 
-       lazy_initialize_iconv();
-
        ret = (smb_iconv_t)talloc_named(mem_ctx,
                                        sizeof(*ret), 
                                        "iconv(%s,%s)", tocode, fromcode);
@@ -239,51 +218,52 @@ _PUBLIC_ smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode,
                return ret;
        }
 
+       /* check if we have a builtin function for this conversion */
        for (i=0;i<ARRAY_SIZE(builtin_functions);i++) {
                if (strcasecmp(fromcode, builtin_functions[i].name) == 0) {
-                       from = &builtin_functions[i];
-               }
-               if (strcasecmp(tocode, builtin_functions[i].name) == 0) {
-                       to = &builtin_functions[i];
-               }
-       }
-
-       if (from == NULL) {
-               for (from=charsets; from; from=from->next) {
-                       if (strcasecmp(from->name, fromcode) == 0) break;
+                       if (use_builtin_handlers || builtin_functions[i].samba_internal_charset) {
+                               from = &builtin_functions[i];
+                       }
                }
-       }
-
-       if (to == NULL) {
-               for (to=charsets; to; to=to->next) {
-                       if (strcasecmp(to->name, tocode) == 0) break;
+               if (strcasecmp(tocode, builtin_functions[i].name) == 0) { 
+                       if (use_builtin_handlers || builtin_functions[i].samba_internal_charset) {
+                               to = &builtin_functions[i];
+                       }
                }
        }
 
 #ifdef HAVE_NATIVE_ICONV
-       if ((!from || !to) && !native_iconv) {
-               goto failed;
-       }
-       if (!from) {
-               ret->pull = sys_iconv;
+       /* the from and to varaibles indicate a samba module or
+        * internal conversion, ret->pull and ret->push are
+        * initialised only in this block for iconv based
+        * conversions */
+
+       if (from == NULL) {
                ret->cd_pull = iconv_open("UTF-16LE", fromcode);
                if (ret->cd_pull == (iconv_t)-1)
                        ret->cd_pull = iconv_open("UCS-2LE", fromcode);
-               if (ret->cd_pull == (iconv_t)-1) goto failed;
+               if (ret->cd_pull != (iconv_t)-1) {
+                       ret->pull = sys_iconv;
+               }
        }
-
-       if (!to) {
-               ret->push = sys_iconv;
+       
+       if (to == NULL) {
                ret->cd_push = iconv_open(tocode, "UTF-16LE");
                if (ret->cd_push == (iconv_t)-1)
                        ret->cd_push = iconv_open(tocode, "UCS-2LE");
-               if (ret->cd_push == (iconv_t)-1) goto failed;
+               if (ret->cd_push != (iconv_t)-1) {
+                       ret->push = sys_iconv;
+               }
        }
-#else
-       if (!from || !to) {
+#endif
+
+       if (ret->pull == NULL && from == NULL) {
+               goto failed;
+       }
+       
+       if (ret->push == NULL && to == NULL) {
                goto failed;
        }
-#endif
 
        /* check for conversion to/from ucs2 */
        if (is_utf16(fromcode) && to) {
@@ -344,10 +324,24 @@ _PUBLIC_ int smb_iconv_close(smb_iconv_t cd)
  and also the "test" character sets that are designed to test
  multi-byte character set support for english users
 ***********************************************************************/
+
+/*
+  this takes an ASCII sequence and produces a UTF16 sequence
+
+  The first 127 codepoints of latin1 matches the first 127 codepoints
+  of unicode, and so can be put into the first byte of UTF16LE
+
+ */
+
 static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        while (*inbytesleft >= 1 && *outbytesleft >= 2) {
+               if (((*inbuf)[0] & 0x7F) != (*inbuf)[0]) {
+                       /* If this is multi-byte, then it isn't legal ASCII */
+                       errno = EILSEQ;
+                       return -1;
+               }
                (*outbuf)[0] = (*inbuf)[0];
                (*outbuf)[1] = 0;
                (*inbytesleft)  -= 1;
@@ -364,14 +358,26 @@ static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
        return 0;
 }
 
+/*
+  this takes a UTF16 sequence and produces an ASCII sequence
+
+  The first 127 codepoints of ASCII matches the first 127 codepoints
+  of unicode, and so can be read directly from the first byte of UTF16LE
+
+ */
 static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        int ir_count=0;
 
        while (*inbytesleft >= 2 && *outbytesleft >= 1) {
-               (*outbuf)[0] = (*inbuf)[0] & 0x7F;
-               if ((*inbuf)[1]) ir_count++;
+               if (((*inbuf)[0] & 0x7F) != (*inbuf)[0] ||
+                       (*inbuf)[1] != 0) {
+                       /* If this is multi-byte, then it isn't legal ASCII */
+                       errno = EILSEQ;
+                       return -1;
+               }
+               (*outbuf)[0] = (*inbuf)[0];
                (*inbytesleft)  -= 2;
                (*outbytesleft) -= 1;
                (*inbuf)  += 2;
@@ -391,6 +397,40 @@ static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
        return ir_count;
 }
 
+/*
+  this takes a latin1/ISO-8859-1 sequence and produces a UTF16 sequence
+
+  The first 256 codepoints of latin1 matches the first 256 codepoints
+  of unicode, and so can be put into the first byte of UTF16LE
+
+ */
+static size_t latin1_pull(void *cd, const char **inbuf, size_t *inbytesleft,
+                         char **outbuf, size_t *outbytesleft)
+{
+       while (*inbytesleft >= 1 && *outbytesleft >= 2) {
+               (*outbuf)[0] = (*inbuf)[0];
+               (*outbuf)[1] = 0;
+               (*inbytesleft)  -= 1;
+               (*outbytesleft) -= 2;
+               (*inbuf)  += 1;
+               (*outbuf) += 2;
+       }
+
+       if (*inbytesleft > 0) {
+               errno = E2BIG;
+               return -1;
+       }
+
+       return 0;
+}
+
+/*
+  this takes a UTF16 sequence and produces a latin1/ISO-8859-1 sequence
+
+  The first 256 codepoints of latin1 matches the first 256 codepoints
+  of unicode, and so can be read directly from the first byte of UTF16LE
+
+ */
 static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
@@ -398,7 +438,11 @@ static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft,
 
        while (*inbytesleft >= 2 && *outbytesleft >= 1) {
                (*outbuf)[0] = (*inbuf)[0];
-               if ((*inbuf)[1]) ir_count++;
+               if ((*inbuf)[1] != 0) {
+                       /* If this is multi-byte, then it isn't legal latin1 */
+                       errno = EILSEQ;
+                       return -1;
+               }
                (*inbytesleft)  -= 2;
                (*outbytesleft) -= 1;
                (*inbuf)  += 2;