lib/util: Merge basic string length and comparison functions
authorAndrew Bartlett <abartlet@samba.org>
Fri, 18 Mar 2011 08:10:23 +0000 (19:10 +1100)
committerAndrew Bartlett <abartlet@samba.org>
Wed, 23 Mar 2011 07:21:54 +0000 (08:21 +0100)
These functions now use the codepoints for more accurate string
handling and now form common code.

Andrew Bartlett

Autobuild-User: Andrew Bartlett <abartlet@samba.org>
Autobuild-Date: Wed Mar 23 08:21:54 CET 2011 on sn-devel-104

lib/util/charset/charset.h
lib/util/charset/tests/charset.c
lib/util/charset/util_str.c [new file with mode: 0644]
lib/util/charset/util_unistr.c
lib/util/charset/wscript_build
source3/Makefile.in
source3/lib/util_str.c

index 474d77e54e7ddb437b353e4233f115c58234afc1..943bfa46957fdf203d61ada82f40e6d21a0e180e 100644 (file)
@@ -128,7 +128,7 @@ size_t strlen_m_term_null(const char *s);
 size_t strlen_m(const char *s);
 char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength);
 void string_replace_m(char *s, char oldc, char newc);
-bool strcsequal_m(const char *s1,const char *s2);
+bool strcsequal(const char *s1,const char *s2);
 bool strequal_m(const char *s1, const char *s2);
 int strncasecmp_m(const char *s1, const char *s2, size_t n);
 bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize);
index 72fd11b128db5fa988af6ecc89c08e34a20b8001..351b91c7b7970a0ad1ec4122a28b61c0f494ace2 100644 (file)
@@ -69,14 +69,14 @@ static bool test_strequal_m(struct torture_context *tctx)
        return true;
 }
 
-static bool test_strcsequal_m(struct torture_context *tctx)
+static bool test_strcsequal(struct torture_context *tctx)
 {
-       torture_assert(tctx, !strcsequal_m("foo", "bar"), "different strings");
-       torture_assert(tctx, strcsequal_m("foo", "foo"), "same case strings");
-       torture_assert(tctx, !strcsequal_m("foo", "Foo"), "different case strings");
-       torture_assert(tctx, !strcsequal_m(NULL, "Foo"), "one NULL");
-       torture_assert(tctx, !strcsequal_m("foo", NULL), "other NULL");
-       torture_assert(tctx, strcsequal_m(NULL, NULL), "both NULL");
+       torture_assert(tctx, !strcsequal("foo", "bar"), "different strings");
+       torture_assert(tctx, strcsequal("foo", "foo"), "same case strings");
+       torture_assert(tctx, !strcsequal("foo", "Foo"), "different case strings");
+       torture_assert(tctx, !strcsequal(NULL, "Foo"), "one NULL");
+       torture_assert(tctx, !strcsequal("foo", NULL), "other NULL");
+       torture_assert(tctx, strcsequal(NULL, NULL), "both NULL");
        return true;
 }
 
@@ -253,7 +253,7 @@ struct torture_suite *torture_local_charset(TALLOC_CTX *mem_ctx)
        torture_suite_add_simple_test(suite, "codepoint_cmpi", test_codepoint_cmpi);
        torture_suite_add_simple_test(suite, "strcasecmp_m", test_strcasecmp_m);
        torture_suite_add_simple_test(suite, "strequal_m", test_strequal_m);
-       torture_suite_add_simple_test(suite, "strcsequal_m", test_strcsequal_m);
+       torture_suite_add_simple_test(suite, "strcsequal", test_strcsequal);
        torture_suite_add_simple_test(suite, "string_replace_m", test_string_replace_m);
        torture_suite_add_simple_test(suite, "strncasecmp_m", test_strncasecmp_m);
        torture_suite_add_simple_test(suite, "next_token", test_next_token);
diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c
new file mode 100644 (file)
index 0000000..597b031
--- /dev/null
@@ -0,0 +1,416 @@
+/*
+   Unix SMB/CIFS implementation.
+   Samba utility functions
+   Copyright (C) Andrew Tridgell 1992-2001
+   Copyright (C) Simo Sorce 2001
+   Copyright (C) Andrew Bartlett 2011
+   Copyright (C) Jeremy Allison  1992-2007
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "system/locale.h"
+
+#ifdef strcasecmp
+#undef strcasecmp
+#endif
+
+/**
+ Case insensitive string compararison
+**/
+_PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
+{
+       codepoint_t c1=0, c2=0;
+       size_t size1, size2;
+       struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
+
+       /* handle null ptr comparisons to simplify the use in qsort */
+       if (s1 == s2) return 0;
+       if (s1 == NULL) return -1;
+       if (s2 == NULL) return 1;
+
+       while (*s1 && *s2) {
+               c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
+               c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
+
+               s1 += size1;
+               s2 += size2;
+
+               if (c1 == c2) {
+                       continue;
+               }
+
+               if (c1 == INVALID_CODEPOINT ||
+                   c2 == INVALID_CODEPOINT) {
+                       /* what else can we do?? */
+                       return strcasecmp(s1, s2);
+               }
+
+               if (toupper_m(c1) != toupper_m(c2)) {
+                       return c1 - c2;
+               }
+       }
+
+       return *s1 - *s2;
+}
+
+/**
+ Case insensitive string compararison, length limited
+**/
+_PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
+{
+       codepoint_t c1=0, c2=0;
+       size_t size1, size2;
+       struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
+
+       /* handle null ptr comparisons to simplify the use in qsort */
+       if (s1 == s2) return 0;
+       if (s1 == NULL) return -1;
+       if (s2 == NULL) return 1;
+
+       while (*s1 && *s2 && n) {
+               n--;
+
+               c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
+               c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
+
+               s1 += size1;
+               s2 += size2;
+
+               if (c1 == c2) {
+                       continue;
+               }
+
+               if (c1 == INVALID_CODEPOINT ||
+                   c2 == INVALID_CODEPOINT) {
+                       /* what else can we do?? */
+                       return strcasecmp(s1, s2);
+               }
+
+               if (toupper_m(c1) != toupper_m(c2)) {
+                       return c1 - c2;
+               }
+       }
+
+       if (n == 0) {
+               return 0;
+       }
+
+       return *s1 - *s2;
+}
+
+/**
+ * Compare 2 strings.
+ *
+ * @note The comparison is case-insensitive.
+ **/
+_PUBLIC_ bool strequal_m(const char *s1, const char *s2)
+{
+       return strcasecmp_m(s1,s2) == 0;
+}
+
+/**
+ Compare 2 strings (case sensitive).
+**/
+_PUBLIC_ bool strcsequal(const char *s1,const char *s2)
+{
+       if (s1 == s2)
+               return true;
+       if (!s1 || !s2)
+               return false;
+
+       return strcmp(s1,s2) == 0;
+}
+
+/**
+ * Calculate the number of units (8 or 16-bit, depending on the
+ * destination charset), that would be needed to convert the input
+ * string which is expected to be in in src_charset encoding to the
+ * destination charset (which should be a unicode charset).
+ */
+_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
+{
+       size_t count = 0;
+       struct smb_iconv_convenience *ic = get_iconv_convenience();
+
+       if (!s) {
+               return 0;
+       }
+
+       while (*s && !(((uint8_t)*s) & 0x80)) {
+               s++;
+               count++;
+       }
+
+       if (!*s) {
+               return count;
+       }
+
+       while (*s) {
+               size_t c_size;
+               codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
+               s += c_size;
+
+               switch (dst_charset) {
+               case CH_UTF16LE:
+               case CH_UTF16BE:
+               case CH_UTF16MUNGED:
+                       if (c < 0x10000) {
+                               /* Unicode char fits into 16 bits. */
+                               count += 1;
+                       } else {
+                               /* Double-width unicode char - 32 bits. */
+                               count += 2;
+                       }
+                       break;
+               case CH_UTF8:
+                       /*
+                        * this only checks ranges, and does not
+                        * check for invalid codepoints
+                        */
+                       if (c < 0x80) {
+                               count += 1;
+                       } else if (c < 0x800) {
+                               count += 2;
+                       } else if (c < 0x1000) {
+                               count += 3;
+                       } else {
+                               count += 4;
+                       }
+                       break;
+               default:
+                       /*
+                        * non-unicode encoding:
+                        * assume that each codepoint fits into
+                        * one unit in the destination encoding.
+                        */
+                       count += 1;
+               }
+       }
+
+       return count;
+}
+
+_PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
+                                 const charset_t dst_charset)
+{
+       if (!s) {
+               return 0;
+       }
+       return strlen_m_ext(s, src_charset, dst_charset) + 1;
+}
+
+/**
+ * Calculate the number of 16-bit units that would be needed to convert
+ * the input string which is expected to be in CH_UNIX encoding to UTF16.
+ *
+ * This will be the same as the number of bytes in a string for single
+ * byte strings, but will be different for multibyte.
+ */
+_PUBLIC_ size_t strlen_m(const char *s)
+{
+       return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
+}
+
+/**
+   Work out the number of multibyte chars in a string, including the NULL
+   terminator.
+**/
+_PUBLIC_ size_t strlen_m_term(const char *s)
+{
+       if (!s) {
+               return 0;
+       }
+
+       return strlen_m(s) + 1;
+}
+
+/*
+ * Weird helper routine for the winreg pipe: If nothing is around, return 0,
+ * if a string is there, include the terminator.
+ */
+
+_PUBLIC_ size_t strlen_m_term_null(const char *s)
+{
+       size_t len;
+       if (!s) {
+               return 0;
+       }
+       len = strlen_m(s);
+       if (len == 0) {
+               return 0;
+       }
+
+       return len+1;
+}
+
+/**
+ Strchr and strrchr_m are a bit complex on general multi-byte strings.
+**/
+_PUBLIC_ char *strchr_m(const char *src, char c)
+{
+       const char *s;
+       struct smb_iconv_convenience *ic = get_iconv_convenience();
+       if (src == NULL) {
+               return NULL;
+       }
+       /* characters below 0x3F are guaranteed to not appear in
+          non-initial position in multi-byte charsets */
+       if ((c & 0xC0) == 0) {
+               return strchr(src, c);
+       }
+
+       /* this is quite a common operation, so we want it to be
+          fast. We optimise for the ascii case, knowing that all our
+          supported multi-byte character sets are ascii-compatible
+          (ie. they match for the first 128 chars) */
+
+       for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
+               if (*s == c)
+                       return (char *)s;
+       }
+
+       if (!*s)
+               return NULL;
+
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+       /* With compose characters we must restart from the beginning. JRA. */
+       s = src;
+#endif
+
+       while (*s) {
+               size_t size;
+               codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
+               if (c2 == c) {
+                       return discard_const_p(char, s);
+               }
+               s += size;
+       }
+
+       return NULL;
+}
+
+/**
+ * Multibyte-character version of strrchr
+ */
+_PUBLIC_ char *strrchr_m(const char *s, char c)
+{
+       struct smb_iconv_convenience *ic = get_iconv_convenience();
+       char *ret = NULL;
+
+       if (s == NULL) {
+               return NULL;
+       }
+
+       /* characters below 0x3F are guaranteed to not appear in
+          non-initial position in multi-byte charsets */
+       if ((c & 0xC0) == 0) {
+               return strrchr(s, c);
+       }
+
+       /* this is quite a common operation, so we want it to be
+          fast. We optimise for the ascii case, knowing that all our
+          supported multi-byte character sets are ascii-compatible
+          (ie. they match for the first 128 chars). Also, in Samba
+          we only search for ascii characters in 'c' and that
+          in all mb character sets with a compound character
+          containing c, if 'c' is not a match at position
+          p, then p[-1] > 0x7f. JRA. */
+
+       {
+               size_t len = strlen(s);
+               const char *cp = s;
+               bool got_mb = false;
+
+               if (len == 0)
+                       return NULL;
+               cp += (len - 1);
+               do {
+                       if (c == *cp) {
+                               /* Could be a match. Part of a multibyte ? */
+                               if ((cp > s) &&
+                                       (((unsigned char)cp[-1]) & 0x80)) {
+                                       /* Yep - go slow :-( */
+                                       got_mb = true;
+                                       break;
+                               }
+                               /* No - we have a match ! */
+                               return (char *)cp;
+                       }
+               } while (cp-- != s);
+               if (!got_mb)
+                       return NULL;
+       }
+
+       while (*s) {
+               size_t size;
+               codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
+               if (c2 == c) {
+                       ret = discard_const_p(char, s);
+               }
+               s += size;
+       }
+
+       return ret;
+}
+
+/**
+  return True if any (multi-byte) character is lower case
+*/
+_PUBLIC_ bool strhaslower(const char *string)
+{
+       struct smb_iconv_convenience *ic = get_iconv_convenience();
+       while (*string) {
+               size_t c_size;
+               codepoint_t s;
+               codepoint_t t;
+
+               s = next_codepoint_convenience(ic, string, &c_size);
+               string += c_size;
+
+               t = toupper_m(s);
+
+               if (s != t) {
+                       return true; /* that means it has lower case chars */
+               }
+       }
+
+       return false;
+}
+
+/**
+  return True if any (multi-byte) character is upper case
+*/
+_PUBLIC_ bool strhasupper(const char *string)
+{
+       struct smb_iconv_convenience *ic = get_iconv_convenience();
+       while (*string) {
+               size_t c_size;
+               codepoint_t s;
+               codepoint_t t;
+
+               s = next_codepoint_convenience(ic, string, &c_size);
+               string += c_size;
+
+               t = tolower_m(s);
+
+               if (s != t) {
+                       return true; /* that means it has upper case chars */
+               }
+       }
+
+       return false;
+}
+
index b6bfb29e7dc4f5bc7af9673627e36944d2b545e1..ad2ba687f484a2af9b029d879541e2a198e838a4 100644 (file)
 #include "includes.h"
 #include "system/locale.h"
 
-/**
- Case insensitive string compararison
-**/
-_PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
-{
-       codepoint_t c1=0, c2=0;
-       size_t size1, size2;
-       struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-
-       /* handle null ptr comparisons to simplify the use in qsort */
-       if (s1 == s2) return 0;
-       if (s1 == NULL) return -1;
-       if (s2 == NULL) return 1;
-
-       while (*s1 && *s2) {
-               c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
-               c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
-
-               s1 += size1;
-               s2 += size2;
-
-               if (c1 == c2) {
-                       continue;
-               }
-
-               if (c1 == INVALID_CODEPOINT ||
-                   c2 == INVALID_CODEPOINT) {
-                       /* what else can we do?? */
-                       return strcasecmp(s1, s2);
-               }
-
-               if (toupper_m(c1) != toupper_m(c2)) {
-                       return c1 - c2;
-               }
-       }
-
-       return *s1 - *s2;
-}
-
 /**
  * Get the next token from a string, return False if none found.
  * Handles double-quotes.
@@ -106,74 +67,6 @@ _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bu
        return true;
 }
 
-/**
- Case insensitive string compararison, length limited
-**/
-_PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
-{
-       codepoint_t c1=0, c2=0;
-       size_t size1, size2;
-       struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-
-       /* handle null ptr comparisons to simplify the use in qsort */
-       if (s1 == s2) return 0;
-       if (s1 == NULL) return -1;
-       if (s2 == NULL) return 1;
-
-       while (*s1 && *s2 && n) {
-               n--;
-
-               c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
-               c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
-
-               s1 += size1;
-               s2 += size2;
-
-               if (c1 == c2) {
-                       continue;
-               }
-
-               if (c1 == INVALID_CODEPOINT ||
-                   c2 == INVALID_CODEPOINT) {
-                       /* what else can we do?? */
-                       return strcasecmp(s1, s2);
-               }
-
-               if (toupper_m(c1) != toupper_m(c2)) {
-                       return c1 - c2;
-               }
-       }
-
-       if (n == 0) {
-               return 0;
-       }
-
-       return *s1 - *s2;
-}
-
-/**
- * Compare 2 strings.
- *
- * @note The comparison is case-insensitive.
- **/
-_PUBLIC_ bool strequal_m(const char *s1, const char *s2)
-{
-       return strcasecmp_m(s1,s2) == 0;
-}
-
-/**
- Compare 2 strings (case sensitive).
-**/
-_PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
-{
-       if (s1 == s2)
-               return true;
-       if (!s1 || !s2)
-               return false;
-       
-       return strcmp(s1,s2) == 0;
-}
-
 
 /**
  String replace.
@@ -238,231 +131,6 @@ _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_
        return dest;
 }
 
-/**
- * Calculate the number of units (8 or 16-bit, depending on the
- * destination charset), that would be needed to convert the input
- * string which is expected to be in in src_charset encoding to the
- * destination charset (which should be a unicode charset).
- */
-_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
-{
-       size_t count = 0;
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-
-       if (!s) {
-               return 0;
-       }
-
-       while (*s && !(((uint8_t)*s) & 0x80)) {
-               s++;
-               count++;
-       }
-
-       if (!*s) {
-               return count;
-       }
-
-       while (*s) {
-               size_t c_size;
-               codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
-               s += c_size;
-
-               switch (dst_charset) {
-               case CH_UTF16LE:
-               case CH_UTF16BE:
-               case CH_UTF16MUNGED:
-                       if (c < 0x10000) {
-                               count += 1;
-                       } else {
-                               count += 2;
-                       }
-                       break;
-               case CH_UTF8:
-                       /*
-                        * this only checks ranges, and does not
-                        * check for invalid codepoints
-                        */
-                       if (c < 0x80) {
-                               count += 1;
-                       } else if (c < 0x800) {
-                               count += 2;
-                       } else if (c < 0x1000) {
-                               count += 3;
-                       } else {
-                               count += 4;
-                       }
-                       break;
-               default:
-                       /*
-                        * non-unicode encoding:
-                        * assume that each codepoint fits into
-                        * one unit in the destination encoding.
-                        */
-                       count += 1;
-               }
-       }
-
-       return count;
-}
-
-_PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
-                                 const charset_t dst_charset)
-{
-       if (!s) {
-               return 0;
-       }
-       return strlen_m_ext(s, src_charset, dst_charset) + 1;
-}
-
-/**
- * Calculate the number of 16-bit units that would be needed to convert
- * the input string which is expected to be in CH_UNIX encoding to UTF16.
- *
- * This will be the same as the number of bytes in a string for single
- * byte strings, but will be different for multibyte.
- */
-_PUBLIC_ size_t strlen_m(const char *s)
-{
-       return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
-}
-
-/**
-   Work out the number of multibyte chars in a string, including the NULL
-   terminator.
-**/
-_PUBLIC_ size_t strlen_m_term(const char *s)
-{
-       if (!s) {
-               return 0;
-       }
-
-       return strlen_m(s) + 1;
-}
-
-/*
- * Weird helper routine for the winreg pipe: If nothing is around, return 0,
- * if a string is there, include the terminator.
- */
-
-_PUBLIC_ size_t strlen_m_term_null(const char *s)
-{
-       size_t len;
-       if (!s) {
-               return 0;
-       }
-       len = strlen_m(s);
-       if (len == 0) {
-               return 0;
-       }
-
-       return len+1;
-}
-
-/**
- Strchr and strrchr_m are a bit complex on general multi-byte strings. 
-**/
-_PUBLIC_ char *strchr_m(const char *s, char c)
-{
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-       if (s == NULL) {
-               return NULL;
-       }
-       /* characters below 0x3F are guaranteed to not appear in
-          non-initial position in multi-byte charsets */
-       if ((c & 0xC0) == 0) {
-               return strchr(s, c);
-       }
-
-       while (*s) {
-               size_t size;
-               codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
-               if (c2 == c) {
-                       return discard_const_p(char, s);
-               }
-               s += size;
-       }
-
-       return NULL;
-}
-
-/**
- * Multibyte-character version of strrchr
- */
-_PUBLIC_ char *strrchr_m(const char *s, char c)
-{
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-       char *ret = NULL;
-
-       if (s == NULL) {
-               return NULL;
-       }
-
-       /* characters below 0x3F are guaranteed to not appear in
-          non-initial position in multi-byte charsets */
-       if ((c & 0xC0) == 0) {
-               return strrchr(s, c);
-       }
-
-       while (*s) {
-               size_t size;
-               codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
-               if (c2 == c) {
-                       ret = discard_const_p(char, s);
-               }
-               s += size;
-       }
-
-       return ret;
-}
-
-/**
-  return True if any (multi-byte) character is lower case
-*/
-_PUBLIC_ bool strhaslower(const char *string)
-{
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-       while (*string) {
-               size_t c_size;
-               codepoint_t s;
-               codepoint_t t;
-
-               s = next_codepoint_convenience(ic, string, &c_size);
-               string += c_size;
-
-               t = toupper_m(s);
-
-               if (s != t) {
-                       return true; /* that means it has lower case chars */
-               }
-       }
-
-       return false;
-} 
-
-/**
-  return True if any (multi-byte) character is upper case
-*/
-_PUBLIC_ bool strhasupper(const char *string)
-{
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-       while (*string) {
-               size_t c_size;
-               codepoint_t s;
-               codepoint_t t;
-
-               s = next_codepoint_convenience(ic, string, &c_size);
-               string += c_size;
-
-               t = tolower_m(s);
-
-               if (s != t) {
-                       return true; /* that means it has upper case chars */
-               }
-       }
-
-       return false;
-} 
-
 /**
  Convert a string to lower case, allocated with talloc
 **/
@@ -517,7 +185,7 @@ _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
        size_t size=0;
        char *dest;
        struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-       
+
        if (!src) {
                return NULL;
        }
index ab7cfc412dfb779714386b49a6223a0afc7fd25a..a245ef1b0c0c1f95fc47a803b974af691de133b5 100644 (file)
@@ -13,6 +13,6 @@ bld.SAMBA_SUBSYSTEM('ICONV_WRAPPER',
                     public_deps='iconv replace talloc')
 
 bld.SAMBA_SUBSYSTEM('CODEPOINTS',
-       source='codepoints.c',
+       source='codepoints.c util_str.c',
        deps='DYNCONFIG ICONV_WRAPPER'
        )
index 01245c1c12a61deda028c0a72619f40d5c49ccf3..2bc65c72856ebcf573da55cf93ad4ea43b35e236 100644 (file)
@@ -451,7 +451,7 @@ LIB_OBJ = $(LIBSAMBAUTIL_OBJ) $(UTIL_OBJ) $(CRYPTO_OBJ) \
          lib/bitmap.o lib/dprintf.o $(UTIL_REG_OBJ) \
          lib/wins_srv.o \
          lib/util_str.o lib/util_sid.o \
-         lib/util_unistr.o ../lib/util/charset/codepoints.o lib/util_file.o \
+         lib/util_unistr.o ../lib/util/charset/codepoints.o ../lib/util/charset/util_str.o lib/util_file.o \
          lib/util.o lib/util_names.o \
          lib/util_sock.o lib/sock_exec.o lib/util_sec.o \
          lib/substitute.o lib/dbwrap_util.o \
index 84b280005e0bfb22266a5d2bde5fb893a9cfefce..19961742619608ca847d6bca62d85cc8fcd4104d 100644 (file)
@@ -208,19 +208,6 @@ bool strnequal(const char *s1,const char *s2,size_t n)
        return(StrnCaseCmp(s1,s2,n)==0);
 }
 
-/**
- Compare 2 strings (case sensitive).
-**/
-_PUBLIC_ bool strcsequal(const char *s1,const char *s2)
-{
-       if (s1 == s2)
-               return true;
-       if (!s1 || !s2)
-               return false;
-
-       return strcmp(s1,s2) == 0;
-}
-
 /**
 Do a case-insensitive, whitespace-ignoring string compare.
 **/
@@ -444,54 +431,6 @@ bool trim_char(char *s,char cfront,char cback)
        return ret;
 }
 
-/**
-  return True if any (multi-byte) character is upper case
-*/
-_PUBLIC_ bool strhasupper(const char *string)
-{
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-       while (*string) {
-               size_t c_size;
-               codepoint_t s;
-               codepoint_t t;
-
-               s = next_codepoint_convenience(ic, string, &c_size);
-               string += c_size;
-
-               t = tolower_m(s);
-
-               if (s != t) {
-                       return true; /* that means it has upper case chars */
-               }
-       }
-
-       return false;
-}
-
-/**
-  return True if any (multi-byte) character is lower case
-*/
-_PUBLIC_ bool strhaslower(const char *string)
-{
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-       while (*string) {
-               size_t c_size;
-               codepoint_t s;
-               codepoint_t t;
-
-               s = next_codepoint_convenience(ic, string, &c_size);
-               string += c_size;
-
-               t = toupper_m(s);
-
-               if (s != t) {
-                       return true; /* that means it has lower case chars */
-               }
-       }
-
-       return false;
-}
-
 /**
  Safe string copy into a known length string. maxlength does not
  include the terminating zero.
@@ -1103,115 +1042,6 @@ char *string_truncate(char *s, unsigned int length)
        return s;
 }
 
-/**
- Strchr and strrchr_m are a bit complex on general multi-byte strings.
-**/
-_PUBLIC_ char *strchr_m(const char *src, char c)
-{
-       const char *s;
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-       if (src == NULL) {
-               return NULL;
-       }
-       /* characters below 0x3F are guaranteed to not appear in
-          non-initial position in multi-byte charsets */
-       if ((c & 0xC0) == 0) {
-               return strchr(src, c);
-       }
-
-       /* this is quite a common operation, so we want it to be
-          fast. We optimise for the ascii case, knowing that all our
-          supported multi-byte character sets are ascii-compatible
-          (ie. they match for the first 128 chars) */
-
-       for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
-               if (*s == c)
-                       return (char *)s;
-       }
-
-       if (!*s)
-               return NULL;
-
-#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
-       /* With compose characters we must restart from the beginning. JRA. */
-       s = src;
-#endif
-
-       while (*s) {
-               size_t size;
-               codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
-               if (c2 == c) {
-                       return discard_const_p(char, s);
-               }
-               s += size;
-       }
-
-       return NULL;
-}
-
-/**
- * Multibyte-character version of strrchr
- */
-_PUBLIC_ char *strrchr_m(const char *s, char c)
-{
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-       char *ret = NULL;
-
-       if (s == NULL) {
-               return NULL;
-       }
-
-       /* characters below 0x3F are guaranteed to not appear in
-          non-initial position in multi-byte charsets */
-       if ((c & 0xC0) == 0) {
-               return strrchr(s, c);
-       }
-
-       /* this is quite a common operation, so we want it to be
-          fast. We optimise for the ascii case, knowing that all our
-          supported multi-byte character sets are ascii-compatible
-          (ie. they match for the first 128 chars). Also, in Samba
-          we only search for ascii characters in 'c' and that
-          in all mb character sets with a compound character
-          containing c, if 'c' is not a match at position
-          p, then p[-1] > 0x7f. JRA. */
-
-       {
-               size_t len = strlen(s);
-               const char *cp = s;
-               bool got_mb = false;
-
-               if (len == 0)
-                       return NULL;
-               cp += (len - 1);
-               do {
-                       if (c == *cp) {
-                               /* Could be a match. Part of a multibyte ? */
-                               if ((cp > s) &&
-                                       (((unsigned char)cp[-1]) & 0x80)) {
-                                       /* Yep - go slow :-( */
-                                       got_mb = true;
-                                       break;
-                               }
-                               /* No - we have a match ! */
-                               return (char *)cp;
-                       }
-               } while (cp-- != s);
-               if (!got_mb)
-                       return NULL;
-       }
-
-       while (*s) {
-               size_t size;
-               codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
-               if (c2 == c) {
-                       ret = discard_const_p(char, s);
-               }
-               s += size;
-       }
-
-       return ret;
-}
 
 /***********************************************************************
  Return the equivalent of doing strrchr 'n' times - always going
@@ -1396,128 +1226,6 @@ void strupper_m(char *s)
        errno = errno_save;
 }
 
-/**
- * Calculate the number of units (8 or 16-bit, depending on the
- * destination charset), that would be needed to convert the input
- * string which is expected to be in in src_charset encoding to the
- * destination charset (which should be a unicode charset).
- */
-_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
-{
-       size_t count = 0;
-       struct smb_iconv_convenience *ic = get_iconv_convenience();
-
-       if (!s) {
-               return 0;
-       }
-
-       while (*s && !(((uint8_t)*s) & 0x80)) {
-               s++;
-               count++;
-       }
-
-       if (!*s) {
-               return count;
-       }
-
-       while (*s) {
-               size_t c_size;
-               codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
-               s += c_size;
-
-               switch (dst_charset) {
-               case CH_UTF16LE:
-               case CH_UTF16BE:
-               case CH_UTF16MUNGED:
-                       if (c < 0x10000) {
-                               /* Unicode char fits into 16 bits. */
-                               count += 1;
-                       } else {
-                               /* Double-width unicode char - 32 bits. */
-                               count += 2;
-                       }
-                       break;
-               case CH_UTF8:
-                       /*
-                        * this only checks ranges, and does not
-                        * check for invalid codepoints
-                        */
-                       if (c < 0x80) {
-                               count += 1;
-                       } else if (c < 0x800) {
-                               count += 2;
-                       } else if (c < 0x1000) {
-                               count += 3;
-                       } else {
-                               count += 4;
-                       }
-                       break;
-               default:
-                       /*
-                        * non-unicode encoding:
-                        * assume that each codepoint fits into
-                        * one unit in the destination encoding.
-                        */
-                       count += 1;
-               }
-       }
-
-       return count;
-}
-
-_PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
-                                 const charset_t dst_charset)
-{
-       if (!s) {
-               return 0;
-       }
-       return strlen_m_ext(s, src_charset, dst_charset) + 1;
-}
-
-/**
- * Calculate the number of 16-bit units that would be needed to convert
- * the input string which is expected to be in CH_UNIX encoding to UTF16.
- *
- * This will be the same as the number of bytes in a string for single
- * byte strings, but will be different for multibyte.
- */
-_PUBLIC_ size_t strlen_m(const char *s)
-{
-       return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
-}
-
-/**
-   Work out the number of multibyte chars in a string, including the NULL
-   terminator.
-**/
-_PUBLIC_ size_t strlen_m_term(const char *s)
-{
-       if (!s) {
-               return 0;
-       }
-
-       return strlen_m(s) + 1;
-}
-
-/*
- * Weird helper routine for the winreg pipe: If nothing is around, return 0,
- * if a string is there, include the terminator.
- */
-
-_PUBLIC_ size_t strlen_m_term_null(const char *s)
-{
-       size_t len;
-       if (!s) {
-               return 0;
-       }
-       len = strlen_m(s);
-       if (len == 0) {
-               return 0;
-       }
-
-       return len+1;
-}
-
 /**
  Just a typesafety wrapper for snprintf into a fstring.
 **/