Merge from Samba 3.0:
authorTim Potter <tpot@samba.org>
Mon, 15 Sep 2003 05:02:12 +0000 (05:02 +0000)
committerTim Potter <tpot@samba.org>
Mon, 15 Sep 2003 05:02:12 +0000 (05:02 +0000)
>Fix for MacOS/X which uses STUPID BROKEN UNICODE COMPOSE CHARACTERS !
>(rant off :-). Inspired by work from Benjamin Riefenstahl <Benjamin.Riefenstahl@epost.de>.
>Also add MacOSX/Darwin configure fixes.
>Jerry - can we put this in 3.0 release ? :-).
>Jeremy.
(This used to be commit 67acde75d3569b612f87646ff0740f8020e8fdcf)

source3/lib/charcnv.c
source3/lib/util_str.c

index 5f3cf64a6852723a95e156dea8fe346aeb858947..dafc88fb77a087c7778f0ebe97dcade798c2efc2 100644 (file)
@@ -176,6 +176,14 @@ static size_t convert_string_internal(charset_t from, charset_t to,
 
        descriptor = conv_handles[from][to];
 
+       if (srclen == (size_t)-1) {
+               if (from == CH_UCS2) {
+                       srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
+               } else {
+                       srclen = strlen((const char *)src)+1;
+               }
+       }
+
        if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
                if (!conv_silent)
                        DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
@@ -248,31 +256,40 @@ size_t convert_string(charset_t from, charset_t to,
                      void const *src, size_t srclen, 
                      void *dest, size_t destlen)
 {
+       /*
+        * NB. We deliberately don't do a strlen here is srclen == -1.
+        * This is very expensive over millions of calls and is taken
+        * care of in the slow path in convert_string_internal. JRA.
+        */
+
        if (srclen == 0)
                return 0;
 
        if (from != CH_UCS2 && to != CH_UCS2) {
                const unsigned char *p = (const unsigned char *)src;
                unsigned char *q = (unsigned char *)dest;
+               size_t slen = srclen;
+               size_t dlen = destlen;
                unsigned char lastp;
                size_t retval = 0;
 
                /* If all characters are ascii, fast path here. */
-               while (srclen && destlen) {
+               while (slen && dlen) {
                        if ((lastp = *p) <= 0x7f) {
                                *q++ = *p++;
-                               if (srclen != (size_t)-1) {
-                                       srclen--;
+                               if (slen != (size_t)-1) {
+                                       slen--;
                                }
-                               destlen--;
+                               dlen--;
                                retval++;
                                if (!lastp)
                                        break;
                        } else {
-                               if (srclen == (size_t)-1) {
-                                       srclen = strlen(p)+1;
-                               }
-                               return retval + convert_string_internal(from, to, p, srclen, q, destlen);
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+                               goto general_case;
+#else
+                               return retval + convert_string_internal(from, to, p, slen, q, dlen);
+#endif
                        }
                }
                return retval;
@@ -280,25 +297,28 @@ size_t convert_string(charset_t from, charset_t to,
                const unsigned char *p = (const unsigned char *)src;
                unsigned char *q = (unsigned char *)dest;
                size_t retval = 0;
+               size_t slen = srclen;
+               size_t dlen = destlen;
                unsigned char lastp;
 
                /* If all characters are ascii, fast path here. */
-               while ((srclen >= 2) && destlen) {
-                       if ((lastp = *p) <= 0x7f && p[1] == 0) {
+               while ((slen >= 2) && dlen) {
+                       if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
                                *q++ = *p;
-                               if (srclen != (size_t)-1) {
-                                       srclen -= 2;
+                               if (slen != (size_t)-1) {
+                                       slen -= 2;
                                }
                                p += 2;
-                               destlen--;
+                               dlen--;
                                retval++;
                                if (!lastp)
                                        break;
                        } else {
-                               if (srclen == (size_t)-1) {
-                                       srclen = strlen_w((const void *)p)+2;
-                               }
-                               return retval + convert_string_internal(from, to, p, srclen, q, destlen);
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+                               goto general_case;
+#else
+                               return retval + convert_string_internal(from, to, p, slen, q, dlen);
+#endif
                        }
                }
                return retval;
@@ -306,29 +326,36 @@ size_t convert_string(charset_t from, charset_t to,
                const unsigned char *p = (const unsigned char *)src;
                unsigned char *q = (unsigned char *)dest;
                size_t retval = 0;
+               size_t slen = srclen;
+               size_t dlen = destlen;
                unsigned char lastp;
 
                /* If all characters are ascii, fast path here. */
-               while (srclen && (destlen >= 2)) {
+               while (slen && (dlen >= 2)) {
                        if ((lastp = *p) <= 0x7F) {
                                *q++ = *p++;
                                *q++ = '\0';
-                               if (srclen != (size_t)-1) {
-                                       srclen--;
+                               if (slen != (size_t)-1) {
+                                       slen--;
                                }
-                               destlen -= 2;
+                               dlen -= 2;
                                retval += 2;
                                if (!lastp)
                                        break;
                        } else {
-                               if (srclen == (size_t)-1) {
-                                       srclen = strlen(p)+1;
-                               }
-                               return retval + convert_string_internal(from, to, p, srclen, q, destlen);
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+                               goto general_case;
+#else
+                               return retval + convert_string_internal(from, to, p, slen, q, dlen);
+#endif
                        }
                }
                return retval;
        }
+
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+  general_case:
+#endif
        return convert_string_internal(from, to, src, srclen, dest, destlen);
 }
 
@@ -542,7 +569,7 @@ char *strdup_upper(const char *s)
 
                strupper_w(buffer);
        
-               size = convert_string(CH_UCS2, CH_UNIX, buffer, sizeof(buffer), out_buffer, sizeof(out_buffer));
+               size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer));
                if (size == -1) {
                        return NULL;
                }
index 82b312e24150f1562d73dd001927609f68b56845..15ac1639a9ae153eea8ffc9207fde3978b2b5cb2 100644 (file)
@@ -382,6 +382,10 @@ void string_replace(pstring s,char oldc,char newc)
                return;
 
        /* Slow (mb) path. */
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+       /* With compose characters we must restart from the beginning. JRA. */
+       p = s;
+#endif
        push_ucs2(NULL, tmpbuf, p, sizeof(tmpbuf), STR_TERMINATE);
        string_replace_w(tmpbuf, UCS2_CHAR(oldc), UCS2_CHAR(newc));
        pull_ucs2(NULL, p, tmpbuf, -1, sizeof(tmpbuf), STR_TERMINATE);
@@ -1175,18 +1179,19 @@ char *string_truncate(char *s, unsigned int length)
  We convert via ucs2 for now.
 **/
 
-char *strchr_m(const char *s, char c)
+char *strchr_m(const char *src, char c)
 {
        wpstring ws;
        pstring s2;
        smb_ucs2_t *p;
+       const char *s;
 
        /* this is quite a common operation, so we want it to be
           fast. We optimise for the ascii case, knowing that all our
           supported multi-byte character sets are ascii-compatible
           (ie. they match for the first 128 chars) */
 
-       while (*s && (((unsigned char)s[0]) & 0x80)) {
+       for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
                if (*s == c)
                        return s;
        }
@@ -1194,6 +1199,11 @@ char *strchr_m(const char *s, char c)
        if (!*s)
                return NULL;
 
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+       /* With compose characters we must restart from the beginning. JRA. */
+       s = src;
+#endif
+
        push_ucs2(NULL, ws, s, sizeof(ws), STR_TERMINATE);
        p = strchr_w(ws, UCS2_CHAR(c));
        if (!p)