lib/util/charset rename iconv_convenience to iconv_handle
[nivanova/samba-autobuild/.git] / lib / util / charset / util_unistr.c
index ec88e784d088e59f271e7e3db460483e98f0b19e..f761613ba83f325ebebbfde092685a3f87ea233b 100644 (file)
 #include "includes.h"
 #include "system/locale.h"
 
-struct smb_iconv_convenience *global_iconv_convenience = NULL;
-
-static inline struct smb_iconv_convenience *get_iconv_convenience(void)
-{
-       if (global_iconv_convenience == NULL)
-               global_iconv_convenience = smb_iconv_convenience_init(talloc_autofree_context(), "ASCII", "UTF-8", true);
-       return global_iconv_convenience;
-}
-
-/**
- Case insensitive string compararison
-**/
-_PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
-{
-       codepoint_t c1=0, c2=0;
-       size_t size1, size2;
-       struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-
-       /* handle null ptr comparisons to simplify the use in qsort */
-       if (s1 == s2) return 0;
-       if (s1 == NULL) return -1;
-       if (s2 == NULL) return 1;
-
-       while (*s1 && *s2) {
-               c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
-               c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
-
-               s1 += size1;
-               s2 += size2;
-
-               if (c1 == c2) {
-                       continue;
-               }
-
-               if (c1 == INVALID_CODEPOINT ||
-                   c2 == INVALID_CODEPOINT) {
-                       /* what else can we do?? */
-                       return strcasecmp(s1, s2);
-               }
-
-               if (toupper_m(c1) != toupper_m(c2)) {
-                       return c1 - c2;
-               }
-       }
-
-       return *s1 - *s2;
-}
-
 /**
  * Get the next token from a string, return False if none found.
  * Handles double-quotes.
@@ -115,74 +67,6 @@ _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bu
        return true;
 }
 
-/**
- Case insensitive string compararison, length limited
-**/
-_PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
-{
-       codepoint_t c1=0, c2=0;
-       size_t size1, size2;
-       struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-
-       /* handle null ptr comparisons to simplify the use in qsort */
-       if (s1 == s2) return 0;
-       if (s1 == NULL) return -1;
-       if (s2 == NULL) return 1;
-
-       while (*s1 && *s2 && n) {
-               n--;
-
-               c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
-               c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
-
-               s1 += size1;
-               s2 += size2;
-
-               if (c1 == c2) {
-                       continue;
-               }
-
-               if (c1 == INVALID_CODEPOINT ||
-                   c2 == INVALID_CODEPOINT) {
-                       /* what else can we do?? */
-                       return strcasecmp(s1, s2);
-               }
-
-               if (toupper_m(c1) != toupper_m(c2)) {
-                       return c1 - c2;
-               }
-       }
-
-       if (n == 0) {
-               return 0;
-       }
-
-       return *s1 - *s2;
-}
-
-/**
- * Compare 2 strings.
- *
- * @note The comparison is case-insensitive.
- **/
-_PUBLIC_ bool strequal_m(const char *s1, const char *s2)
-{
-       return strcasecmp_m(s1,s2) == 0;
-}
-
-/**
- Compare 2 strings (case sensitive).
-**/
-_PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
-{
-       if (s1 == s2)
-               return true;
-       if (!s1 || !s2)
-               return false;
-       
-       return strcmp(s1,s2) == 0;
-}
-
 
 /**
  String replace.
@@ -190,10 +74,10 @@ _PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
 **/
 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
 {
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
+       struct smb_iconv_handle *ic = get_iconv_handle();
        while (s && *s) {
                size_t size;
-               codepoint_t c = next_codepoint_convenience(ic, s, &size);
+               codepoint_t c = next_codepoint_handle(ic, s, &size);
                if (c == oldc) {
                        *s = newc;
                }
@@ -247,180 +131,6 @@ _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_
        return dest;
 }
 
-/**
- Count the number of UCS2 characters in a string. Normally this will
- be the same as the number of bytes in a string for single byte strings,
- but will be different for multibyte.
-**/
-_PUBLIC_ size_t strlen_m(const char *s)
-{
-       size_t count = 0;
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-
-       if (!s) {
-               return 0;
-       }
-
-       while (*s && !(((uint8_t)*s) & 0x80)) {
-               s++;
-               count++;
-       }
-
-       if (!*s) {
-               return count;
-       }
-
-       while (*s) {
-               size_t c_size;
-               codepoint_t c = next_codepoint_convenience(ic, s, &c_size);
-               if (c < 0x10000) {
-                       count += 1;
-               } else {
-                       count += 2;
-               }
-               s += c_size;
-       }
-
-       return count;
-}
-
-/**
-   Work out the number of multibyte chars in a string, including the NULL
-   terminator.
-**/
-_PUBLIC_ size_t strlen_m_term(const char *s)
-{
-       if (!s) {
-               return 0;
-       }
-
-       return strlen_m(s) + 1;
-}
-
-/*
- * Weird helper routine for the winreg pipe: If nothing is around, return 0,
- * if a string is there, include the terminator.
- */
-
-_PUBLIC_ size_t strlen_m_term_null(const char *s)
-{
-       size_t len;
-       if (!s) {
-               return 0;
-       }
-       len = strlen_m(s);
-       if (len == 0) {
-               return 0;
-       }
-
-       return len+1;
-}
-
-/**
- Strchr and strrchr_m are a bit complex on general multi-byte strings. 
-**/
-_PUBLIC_ char *strchr_m(const char *s, char c)
-{
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-       if (s == NULL) {
-               return NULL;
-       }
-       /* characters below 0x3F are guaranteed to not appear in
-          non-initial position in multi-byte charsets */
-       if ((c & 0xC0) == 0) {
-               return strchr(s, c);
-       }
-
-       while (*s) {
-               size_t size;
-               codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
-               if (c2 == c) {
-                       return discard_const_p(char, s);
-               }
-               s += size;
-       }
-
-       return NULL;
-}
-
-/**
- * Multibyte-character version of strrchr
- */
-_PUBLIC_ char *strrchr_m(const char *s, char c)
-{
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-       char *ret = NULL;
-
-       if (s == NULL) {
-               return NULL;
-       }
-
-       /* characters below 0x3F are guaranteed to not appear in
-          non-initial position in multi-byte charsets */
-       if ((c & 0xC0) == 0) {
-               return strrchr(s, c);
-       }
-
-       while (*s) {
-               size_t size;
-               codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
-               if (c2 == c) {
-                       ret = discard_const_p(char, s);
-               }
-               s += size;
-       }
-
-       return ret;
-}
-
-/**
-  return True if any (multi-byte) character is lower case
-*/
-_PUBLIC_ bool strhaslower(const char *string)
-{
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-       while (*string) {
-               size_t c_size;
-               codepoint_t s;
-               codepoint_t t;
-
-               s = next_codepoint_convenience(ic, string, &c_size);
-               string += c_size;
-
-               t = toupper_m(s);
-
-               if (s != t) {
-                       return true; /* that means it has lower case chars */
-               }
-       }
-
-       return false;
-} 
-
-/**
-  return True if any (multi-byte) character is upper case
-*/
-_PUBLIC_ bool strhasupper(const char *string)
-{
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-       while (*string) {
-               size_t c_size;
-               codepoint_t s;
-               codepoint_t t;
-
-               s = next_codepoint_convenience(ic, string, &c_size);
-               string += c_size;
-
-               t = tolower_m(s);
-
-               if (s != t) {
-                       return true; /* that means it has upper case chars */
-               }
-       }
-
-       return false;
-} 
-
 /**
  Convert a string to lower case, allocated with talloc
 **/
@@ -428,7 +138,11 @@ _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
 {
        size_t size=0;
        char *dest;
-       struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
+       struct smb_iconv_handle *iconv_handle = get_iconv_handle();
+
+       if(src == NULL) {
+               return NULL;
+       }
 
        /* this takes advantage of the fact that upper/lower can't
           change the length of a character by more than 1 byte */
@@ -439,12 +153,12 @@ _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
 
        while (*src) {
                size_t c_size;
-               codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
+               codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
                src += c_size;
 
                c = tolower_m(c);
 
-               c_size = push_codepoint(iconv_convenience, dest+size, c);
+               c_size = push_codepoint_handle(iconv_handle, dest+size, c);
                if (c_size == -1) {
                        talloc_free(dest);
                        return NULL;
@@ -470,8 +184,8 @@ _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
 {
        size_t size=0;
        char *dest;
-       struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-       
+       struct smb_iconv_handle *iconv_handle = get_iconv_handle();
+
        if (!src) {
                return NULL;
        }
@@ -483,14 +197,14 @@ _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
                return NULL;
        }
 
-       while (*src && n--) {
+       while (n-- && *src) {
                size_t c_size;
-               codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
+               codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
                src += c_size;
 
                c = toupper_m(c);
 
-               c_size = push_codepoint(iconv_convenience, dest+size, c);
+               c_size = push_codepoint_handle(iconv_handle, dest+size, c);
                if (c_size == -1) {
                        talloc_free(dest);
                        return NULL;
@@ -530,7 +244,7 @@ _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
 _PUBLIC_ void strlower_m(char *s)
 {
        char *d;
-       struct smb_iconv_convenience *iconv_convenience;
+       struct smb_iconv_handle *iconv_handle;
 
        /* this is quite a common operation, so we want it to be
           fast. We optimise for the ascii case, knowing that all our
@@ -544,14 +258,14 @@ _PUBLIC_ void strlower_m(char *s)
        if (!*s)
                return;
 
-       iconv_convenience = get_iconv_convenience();
+       iconv_handle = get_iconv_handle();
 
        d = s;
 
        while (*s) {
                size_t c_size, c_size2;
-               codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
-               c_size2 = push_codepoint(iconv_convenience, d, tolower_m(c));
+               codepoint_t c = next_codepoint_handle(iconv_handle, s, &c_size);
+               c_size2 = push_codepoint_handle(iconv_handle, d, tolower_m(c));
                if (c_size2 > c_size) {
                        DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
                                 c, tolower_m(c), (int)c_size, (int)c_size2));
@@ -569,7 +283,7 @@ _PUBLIC_ void strlower_m(char *s)
 _PUBLIC_ void strupper_m(char *s)
 {
        char *d;
-       struct smb_iconv_convenience *iconv_convenience;
+       struct smb_iconv_handle *iconv_handle;
 
        /* this is quite a common operation, so we want it to be
           fast. We optimise for the ascii case, knowing that all our
@@ -583,14 +297,14 @@ _PUBLIC_ void strupper_m(char *s)
        if (!*s)
                return;
 
-       iconv_convenience = get_iconv_convenience();
+       iconv_handle = get_iconv_handle();
 
        d = s;
 
        while (*s) {
                size_t c_size, c_size2;
-               codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
-               c_size2 = push_codepoint(iconv_convenience, d, toupper_m(c));
+               codepoint_t c = next_codepoint_handle(iconv_handle, s, &c_size);
+               c_size2 = push_codepoint_handle(iconv_handle, d, toupper_m(c));
                if (c_size2 > c_size) {
                        DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
                                 c, toupper_m(c), (int)c_size, (int)c_size2));
@@ -608,12 +322,12 @@ _PUBLIC_ void strupper_m(char *s)
 **/
 _PUBLIC_ size_t count_chars_m(const char *s, char c)
 {
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
+       struct smb_iconv_handle *ic = get_iconv_handle();
        size_t count = 0;
 
        while (*s) {
                size_t size;
-               codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
+               codepoint_t c2 = next_codepoint_handle(ic, s, &size);
                if (c2 == c) count++;
                s += size;
        }
@@ -656,7 +370,7 @@ static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flag
        if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
                src_len++;
 
-       return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, false);
+       return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
 }
 
 /**
@@ -672,7 +386,7 @@ _PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, s
 {
        size_t src_len = strlen(src)+1;
        *dest = NULL;
-       return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size, false);
+       return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size);
 }
 
 
@@ -706,7 +420,7 @@ static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t s
                }
        }
 
-       ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, false);
+       ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len);
 
        if (dest_len)
                dest[MIN(ret, dest_len-1)] = 0;
@@ -759,7 +473,7 @@ static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags
        /* ucs2 is always a multiple of 2 bytes */
        dest_len &= ~1;
 
-       ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, false);
+       ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len);
        if (ret == (size_t)-1) {
                return 0;
        }
@@ -783,7 +497,7 @@ _PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *s
 {
        size_t src_len = strlen(src)+1;
        *dest = NULL;
-       return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size, false);
+       return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size);
 }
 
 
@@ -799,7 +513,7 @@ _PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, si
 {
        size_t src_len = strlen(src)+1;
        *dest = NULL;
-       return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size, false);
+       return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size);
 }
 
 /**
@@ -835,7 +549,7 @@ static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src
        if (src_len != (size_t)-1)
                src_len &= ~1;
        
-       ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, false);
+       ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len);
        if (dest_len)
                dest[MIN(ret, dest_len-1)] = 0;
 
@@ -854,7 +568,7 @@ _PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, s
 {
        size_t src_len = strlen(src)+1;
        *dest = NULL;
-       return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
+       return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size);
 }
 
 /**
@@ -869,7 +583,7 @@ _PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *s
 {
        size_t src_len = utf16_len(src);
        *dest = NULL;
-       return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
+       return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size);
 }
 
 /**
@@ -884,7 +598,7 @@ _PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, si
 {
        size_t src_len = strlen(src)+1;
        *dest = NULL;
-       return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
+       return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size);
 }
 
 /**
@@ -951,15 +665,13 @@ _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_
  * @returns the number of bytes occupied in the destination
  **/
 _PUBLIC_ size_t convert_string(charset_t from, charset_t to,
-                               void const *src, size_t srclen, 
-                               void *dest, size_t destlen, 
-                               bool allow_badcharcnv)
+                              void const *src, size_t srclen, 
+                              void *dest, size_t destlen)
 {
        size_t ret;
-       if (!convert_string_convenience(get_iconv_convenience(), from, to, 
-                                                                         src, srclen,
-                                                                         dest, destlen, &ret,
-                                                                         allow_badcharcnv))
+       if (!convert_string_handle(get_iconv_handle(), from, to,
+                                       src, srclen,
+                                       dest, destlen, &ret))
                return -1;
        return ret;
 }
@@ -976,19 +688,12 @@ _PUBLIC_ size_t convert_string(charset_t from, charset_t to,
  **/
 
 _PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx, 
-                                      charset_t from, charset_t to, 
-                                      void const *src, size_t srclen, 
-                                      void **dest, size_t *converted_size, 
-                                          bool allow_badcharcnv)
+                                   charset_t from, charset_t to, 
+                                   void const *src, size_t srclen, 
+                                   void *dest, size_t *converted_size)
 {
-       return convert_string_talloc_convenience(ctx, get_iconv_convenience(),
-                                                                                        from, to, src, srclen, dest,
-                                                                                        converted_size, 
-                                                                                        allow_badcharcnv);
+       return convert_string_talloc_handle(ctx, get_iconv_handle(),
+                                                from, to, src, srclen, dest,
+                                                converted_size);
 }
 
-
-_PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
-{
-       return next_codepoint_convenience(get_iconv_convenience(), str, size);
-}