Change uint_t to unsigned int in lib/util
[ira/wip.git] / lib / util / charset / iconv.c
index 150383e7f9ebf7b316b6c87b043a8b3ee33a3b38..2dac333d0c4beed9b199f212fe50c788c41ce385 100644 (file)
@@ -22,7 +22,6 @@
 #include "../lib/util/dlinklist.h"
 #include "system/iconv.h"
 #include "system/filesys.h"
-#include "param/param.h"
 
 
 /**
@@ -52,6 +51,7 @@ static size_t ascii_pull  (void *,const char **, size_t *, char **, size_t *);
 static size_t ascii_push  (void *,const char **, size_t *, char **, size_t *);
 static size_t utf8_pull   (void *,const char **, size_t *, char **, size_t *);
 static size_t utf8_push   (void *,const char **, size_t *, char **, size_t *);
+static size_t utf16_munged_pull(void *,const char **, size_t *, char **, size_t *);
 static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *);
 static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *);
 static size_t iconv_copy  (void *,const char **, size_t *, char **, size_t *);
@@ -67,6 +67,10 @@ static const struct charset_functions builtin_functions[] = {
        /* we include the UTF-8 alias to cope with differing locale settings */
        {"UTF8",   utf8_pull,  utf8_push},
        {"UTF-8",   utf8_pull,  utf8_push},
+
+       /* this handles the munging needed for String2Key */
+       {"UTF16_MUNGED",   utf16_munged_pull,  iconv_copy},
+
        {"ASCII", ascii_pull, ascii_push},
        {"UCS2-HEX", ucs2hex_pull, ucs2hex_push}
 };
@@ -154,7 +158,19 @@ static bool is_utf16(const char *name)
                strcasecmp(name, "UTF-16LE") == 0;
 }
 
+int smb_iconv_t_destructor(smb_iconv_t hwd)
+{ 
+#ifdef HAVE_NATIVE_ICONV
+       if (hwd->cd_pull != NULL && hwd->cd_pull != (iconv_t)-1)
+               iconv_close(hwd->cd_pull);
+       if (hwd->cd_push != NULL && hwd->cd_push != (iconv_t)-1)
+               iconv_close(hwd->cd_push);
+       if (hwd->cd_direct != NULL && hwd->cd_direct != (iconv_t)-1)
+               iconv_close(hwd->cd_direct);
+#endif
 
+       return 0;
+}
 
 _PUBLIC_ smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode, 
                              const char *fromcode, bool native_iconv)
@@ -171,6 +187,7 @@ _PUBLIC_ smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode,
                return (smb_iconv_t)-1;
        }
        memset(ret, 0, sizeof(*ret));
+       talloc_set_destructor(ret, smb_iconv_t_destructor);
 
        /* check for the simplest null conversion */
        if (strcmp(fromcode, tocode) == 0) {
@@ -243,6 +260,9 @@ _PUBLIC_ smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode,
        }
        if (is_utf16(tocode)) {
                ret->direct = sys_iconv;
+               /* could be set just above - so we need to close iconv */
+               if (ret->cd_direct != NULL && ret->cd_direct != (iconv_t)-1)
+                       iconv_close(ret->cd_direct);
                ret->cd_direct = ret->cd_pull;
                ret->cd_pull = NULL;
                return ret;
@@ -265,7 +285,7 @@ failed:
  */
 _PUBLIC_ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
 {
-       return smb_iconv_open_ex(NULL, tocode, fromcode, true);
+       return smb_iconv_open_ex(talloc_autofree_context(), tocode, fromcode, true);
 }
 
 /*
@@ -273,12 +293,6 @@ _PUBLIC_ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
 */
 _PUBLIC_ int smb_iconv_close(smb_iconv_t cd)
 {
-#ifdef HAVE_NATIVE_ICONV
-       if (cd->cd_direct) iconv_close((iconv_t)cd->cd_direct);
-       if (cd->cd_pull) iconv_close((iconv_t)cd->cd_pull);
-       if (cd->cd_push) iconv_close((iconv_t)cd->cd_push);
-#endif
-
        talloc_free(cd);
        return 0;
 }
@@ -341,7 +355,7 @@ static size_t ucs2hex_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        while (*inbytesleft >= 1 && *outbytesleft >= 2) {
-               uint_t v;
+               unsigned int v;
 
                if ((*inbuf)[0] != '@') {
                        /* seven bit ascii case */
@@ -708,4 +722,102 @@ error:
 }
 
 
+/*
+  this takes a UTF16 munged sequence, modifies it according to the
+  string2key rules, and produces a UTF16 sequence
+
+The rules are:
+
+    1) any 0x0000 characters are mapped to 0x0001
+
+    2) convert any instance of 0xD800 - 0xDBFF (high surrogate)
+       without an immediately following 0xDC00 - 0x0xDFFF (low surrogate) to
+       U+FFFD (OBJECT REPLACEMENT CHARACTER).
+
+    3) the same for any low surrogate that was not preceded by a high surrogate.
+
+ */
+static size_t utf16_munged_pull(void *cd, const char **inbuf, size_t *inbytesleft,
+                              char **outbuf, size_t *outbytesleft)
+{
+       size_t in_left=*inbytesleft, out_left=*outbytesleft;
+       uint8_t *c = (uint8_t *)*outbuf;
+       const uint8_t *uc = (const uint8_t *)*inbuf;
+
+       while (in_left >= 2 && out_left >= 2) {
+               unsigned int codepoint = uc[0] | (uc[1]<<8);
+
+               if (codepoint == 0) {
+                       codepoint = 1;
+               }
+
+               if ((codepoint & 0xfc00) == 0xd800) {
+                       /* a high surrogate */
+                       unsigned int codepoint2;
+                       if (in_left < 4) {
+                               codepoint = 0xfffd;
+                               goto codepoint16;                               
+                       }
+                       codepoint2 = uc[2] | (uc[3]<<8);
+                       if ((codepoint2 & 0xfc00) != 0xdc00) {
+                               /* high surrogate not followed by low
+                                  surrogate: convert to 0xfffd */
+                               codepoint = 0xfffd;
+                               goto codepoint16;
+                       }
+                       if (out_left < 4) {
+                               errno = E2BIG;
+                               goto error;
+                       }
+                       memcpy(c, uc, 4);
+                       in_left  -= 4;
+                       out_left -= 4;
+                       uc       += 4;
+                       c        += 4;
+                       continue;
+               }
+
+               if ((codepoint & 0xfc00) == 0xdc00) {
+                       /* low surrogate not preceded by high
+                          surrogate: convert to 0xfffd */
+                       codepoint = 0xfffd;
+               }
+
+       codepoint16:
+               c[0] = codepoint & 0xFF;
+               c[1] = (codepoint>>8) & 0xFF;
+               
+               in_left  -= 2;
+               out_left -= 2;
+               uc  += 2;
+               c   += 2;
+               continue;               
+       }
+
+       if (in_left == 1) {
+               errno = EINVAL;
+               goto error;
+       }
+
+       if (in_left > 1) {
+               errno = E2BIG;
+               goto error;
+       }
+
+       *inbytesleft = in_left;
+       *outbytesleft = out_left;
+       *inbuf  = (const char *)uc;
+       *outbuf = (char *)c;
+       
+       return 0;
+
+error:
+       *inbytesleft = in_left;
+       *outbytesleft = out_left;
+       *inbuf  = (const char *)uc;
+       *outbuf = (char *)c;
+       return -1;
+}
+
+