Merge fixes and tests for jra's broken strstr_m() function from 3.0

[samba.git] / source3 / lib / util_str.c
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c

index b6025a362d3443fc128812fcd5d0830a9295abda..b8cf052862f3e07dd55992ad848f688f50903372 100644 (file)
--- a/source3/lib/util_str.c
+++ b/source3/lib/util_str.c
@@ -62,7 +62,7 @@ BOOL next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
         /* copy over the token */
         pbuf = buff;
         for (quoted = False; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
-               if (*s == '\"') {
+               if (*s == '\"' || *s == '\'') {
                         quoted = !quoted;
                 } else {
                         len++;
@@ -557,10 +557,17 @@ size_t count_chars(const char *s,char c)
  {
         smb_ucs2_t *ptr;
         int count;
-       push_ucs2(NULL, tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE);
-       for(count=0,ptr=tmpbuf;*ptr;ptr++)
+       smb_ucs2_t *alloc_tmpbuf = NULL;
+
+       if (push_ucs2_allocate(&alloc_tmpbuf, s) == (size_t)-1) {
+               return 0;
+       }
+
+       for(count=0,ptr=alloc_tmpbuf;*ptr;ptr++)
                 if(*ptr==UCS2_CHAR(c))
                         count++;
+
+       SAFE_FREE(alloc_tmpbuf);
         return(count);
  }
  
@@ -574,7 +581,7 @@ char *safe_strcpy_fn(const char *fn, int line, char *dest,const char *src, size_
         size_t len;
  
         if (!dest) {
-               DEBUG(0,("ERROR: NULL dest in safe_strcpy\n"));
+               DEBUG(0,("ERROR: NULL dest in safe_strcpy, called from [%s][%d]\n", fn, line));
                 return NULL;
         }
  
@@ -610,7 +617,7 @@ char *safe_strcat_fn(const char *fn, int line, char *dest, const char *src, size
         size_t src_len, dest_len;
  
         if (!dest) {
-               DEBUG(0,("ERROR: NULL dest in safe_strcat\n"));
+               DEBUG(0,("ERROR: NULL dest in safe_strcat, called from [%s][%d]\n", fn, line));
                 return NULL;
         }
  
@@ -654,7 +661,7 @@ char *alpha_strcpy_fn(const char *fn, int line, char *dest, const char *src, con
  #endif
  
         if (!dest) {
-               DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
+               DEBUG(0,("ERROR: NULL dest in alpha_strcpy, called from [%s][%d]\n", fn, line));
                 return NULL;
         }
  
@@ -695,9 +702,11 @@ char *StrnCpy_fn(const char *fn, int line,char *dest,const char *src,size_t n)
         clobber_region(fn, line, dest, n+1);
  #endif
  
-       if (!dest)
+       if (!dest) {
+               DEBUG(0,("ERROR: NULL dest in StrnCpy, called from [%s][%d]\n", fn, line));
                 return(NULL);
-       
+       }
+
         if (!src) {
                 *dest = 0;
                 return(dest);
@@ -910,7 +919,7 @@ void string_sub(char *s,const char *pattern, const char *insert, size_t len)
         if (len == 0)
                 len = ls + 1; /* len is number of *bytes* */
  
-       while (lp <= ls && (p = strstr(s,pattern))) {
+       while (lp <= ls && (p = strstr_m(s,pattern))) {
                 if (ls + (li-lp) >= len) {
                         DEBUG(0,("ERROR: string overflow by %d in string_sub(%.50s, %d)\n", 
                                  (int)(ls + (li-lp) - len),
@@ -995,7 +1004,7 @@ char *realloc_string_sub(char *string, const char *pattern, const char *insert)
                 }
         }
         
-       while ((p = strstr(s,pattern))) {
+       while ((p = strstr_m(s,pattern))) {
                 if (ld > 0) {
                         int offset = PTR_DIFF(s,string);
                         char *t = Realloc(string, ls + ld + 1);
@@ -1043,7 +1052,7 @@ void all_string_sub(char *s,const char *pattern,const char *insert, size_t len)
         if (len == 0)
                 len = ls + 1; /* len is number of *bytes* */
         
-       while (lp <= ls && (p = strstr(s,pattern))) {
+       while (lp <= ls && (p = strstr_m(s,pattern))) {
                 if (ls + (li-lp) >= len) {
                         DEBUG(0,("ERROR: string overflow by %d in all_string_sub(%.50s, %d)\n", 
                                  (int)(ls + (li-lp) - len),
@@ -1194,7 +1203,7 @@ char *strchr_m(const char *src, char c)
  
         for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
                 if (*s == c)
-                       return s;
+                       return (char *)s;
         }
  
         if (!*s)
@@ -1242,7 +1251,7 @@ char *strrchr_m(const char *s, char c)
                                         break;
                                 }
                                 /* No - we have a match ! */
-                               return cp;
+                               return (char *)cp;
                         }
                 } while (cp-- != s);
                 if (!got_mb)
@@ -1285,18 +1294,102 @@ char *strnrchr_m(const char *s, char c, unsigned int n)
         return (char *)(s+strlen(s2));
  }
  
+/***********************************************************************
+ strstr_m - We convert via ucs2 for now.
+***********************************************************************/
+
+char *strstr_m(const char *src, const char *findstr)
+{
+       smb_ucs2_t *p;
+       smb_ucs2_t *src_w, *find_w;
+       const char *s;
+       char *s2;
+       char *retp;
+
+       size_t findstr_len = 0;
+       size_t find_w_len;
+
+       /* for correctness */
+       if (!findstr[0]) {
+               return src;
+       }
+
+       /* Samba does single character findstr calls a *lot*. */
+       if (findstr[1] == '\0')
+               return strchr_m(src, *findstr);
+
+       /* We optimise for the ascii case, knowing that all our
+          supported multi-byte character sets are ascii-compatible
+          (ie. they match for the first 128 chars) */
+
+       for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
+               if (*s == *findstr) {
+                       if (!findstr_len) 
+                               findstr_len = strlen(findstr);
+
+                       if (strncmp(s, findstr, findstr_len) == 0) {
+                               return (char *)s;
+                       }
+               }
+       }
+
+       if (!*s)
+               return NULL;
+
+#if 1 /* def BROKEN_UNICODE_COMPOSE_CHARACTERS */
+       /* 'make check' fails unless we do this */
+
+       /* With compose characters we must restart from the beginning. JRA. */
+       s = src;
+#endif
+
+       if (push_ucs2_allocate(&src_w, src) == (size_t)-1) {
+               DEBUG(0,("strstr_m: src malloc fail\n"));
+               return NULL;
+       }
+       
+       if (push_ucs2_allocate(&find_w, findstr) == (size_t)-1) {
+               SAFE_FREE(src_w);
+               DEBUG(0,("strstr_m: find malloc fail\n"));
+               return NULL;
+       }
+
+       p = strstr_w(src_w, find_w);
+
+       if (!p) {
+               SAFE_FREE(src_w);
+               SAFE_FREE(find_w);
+               return NULL;
+       }
+       
+       *p = 0;
+       if (pull_ucs2_allocate(&s2, src_w) == (size_t)-1) {
+               SAFE_FREE(src_w);
+               SAFE_FREE(find_w);
+               DEBUG(0,("strstr_m: dest malloc fail\n"));
+               return NULL;
+       }
+       retp = (char *)(s+strlen(s2));
+       SAFE_FREE(src_w);
+       SAFE_FREE(find_w);
+       SAFE_FREE(s2);
+       return retp;
+}
+
  /**
   Convert a string to lower case.
  **/
  
  void strlower_m(char *s)
  {
+       size_t len;
+
         /* this is quite a common operation, so we want it to be
            fast. We optimise for the ascii case, knowing that all our
            supported multi-byte character sets are ascii-compatible
            (ie. they match for the first 128 chars) */
  
-       while (*s && !(((unsigned char)s[0]) & 0x7F)) {
+       while (*s && !(((unsigned char)s[0]) & 0x80)) {
                 *s = tolower((unsigned char)*s);
                 s++;
         }
@@ -1306,7 +1399,12 @@ void strlower_m(char *s)
  
         /* I assume that lowercased string takes the same number of bytes
          * as source string even in UTF-8 encoding. (VIV) */
-       unix_strlower(s,strlen(s)+1,s,strlen(s)+1);     
+       len = strlen(s) + 1;
+       errno = 0;
+       unix_strlower(s,len,s,len);     
+       /* Catch mb conversion errors that may not terminate. */
+       if (errno)
+               s[len-1] = '\0';
  }
  
  /**
@@ -1315,12 +1413,14 @@ void strlower_m(char *s)
  
  void strupper_m(char *s)
  {
+       size_t len;
+
         /* this is quite a common operation, so we want it to be
            fast. We optimise for the ascii case, knowing that all our
            supported multi-byte character sets are ascii-compatible
            (ie. they match for the first 128 chars) */
  
-       while (*s && !(((unsigned char)s[0]) & 0x7F)) {
+       while (*s && !(((unsigned char)s[0]) & 0x80)) {
                 *s = toupper((unsigned char)*s);
                 s++;
         }
@@ -1330,7 +1430,12 @@ void strupper_m(char *s)
  
         /* I assume that lowercased string takes the same number of bytes
          * as source string even in multibyte encoding. (VIV) */
-       unix_strupper(s,strlen(s)+1,s,strlen(s)+1);     
+       len = strlen(s) + 1;
+       errno = 0;
+       unix_strupper(s,len,s,len);     
+       /* Catch mb conversion errors that may not terminate. */
+       if (errno)
+               s[len-1] = '\0';
  }
  
  /**
@@ -1601,7 +1706,7 @@ BOOL str_list_substitute(char **list, const char *pattern, const char *insert)
                 s = *list;
                 ls = (ssize_t)strlen(s);
  
-               while ((p = strstr(s, pattern))) {
+               while ((p = strstr_m(s, pattern))) {
                         t = *list;
                         d = p -t;
                         if (ld) {
@@ -1778,11 +1883,6 @@ void rfc1738_unescape(char *buf)
  {
         char *p=buf;
  
-       while ((p=strchr_m(p,'+')))
-               *p = ' ';
-
-       p = buf;
-
         while (p && *p && (p=strchr_m(p,'%'))) {
                 int c1 = p[1];
                 int c2 = p[2];
@@ -1841,6 +1941,8 @@ DATA_BLOB base64_decode_data_blob(const char *s)
                 s++; i++;
         }
  
+       if (*s == '=') n -= 1;
+
         /* fix up length */
         decoded.length = n;
         return decoded;