source4/lib/util_str.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Samba utility functions
   4
   5    Copyright (C) Andrew Tridgell 1992-2001
   6    Copyright (C) Simo Sorce      2001-2002
   7    Copyright (C) Martin Pool     2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22 */
  23
  24 #include "includes.h"
  25 #include "system/iconv.h"
  26
  27 /**
  28  * @file
  29  * @brief String utilities.
  30  **/
  31
  32 /**
  33  * Get the next token from a string, return False if none found.
  34  * Handles double-quotes.
  35  *
  36  * Based on a routine by GJC@VILLAGE.COM.
  37  * Extensively modified by Andrew.Tridgell@anu.edu.au
  38  **/
  39 BOOL next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
  40 {
  41         const char *s;
  42         BOOL quoted;
  43         size_t len=1;
  44
  45         if (!ptr)
  46                 return(False);
  47
  48         s = *ptr;
  49
  50         /* default to simple separators */
  51         if (!sep)
  52                 sep = " \t\n\r";
  53
  54         /* find the first non sep char */
  55         while (*s && strchr_m(sep,*s))
  56                 s++;
  57
  58         /* nothing left? */
  59         if (! *s)
  60                 return(False);
  61
  62         /* copy over the token */
  63         for (quoted = False; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
  64                 if (*s == '\"') {
  65                         quoted = !quoted;
  66                 } else {
  67                         len++;
  68                         *buff++ = *s;
  69                 }
  70         }
  71
  72         *ptr = (*s) ? s+1 : s;
  73         *buff = 0;
  74
  75         return(True);
  76 }
  77
  78 /**
  79  Case insensitive string compararison
  80 **/
  81 int StrCaseCmp(const char *s1, const char *s2)
  82 {
  83         codepoint_t c1=0, c2=0;
  84         size_t size1, size2;
  85
  86         while (*s1 && *s2) {
  87                 c1 = next_codepoint(s1, &size1);
  88                 c2 = next_codepoint(s2, &size2);
  89
  90                 s1 += size1;
  91                 s2 += size2;
  92
  93                 if (c1 == c2) {
  94                         continue;
  95                 }
  96
  97                 if (c1 == INVALID_CODEPOINT ||
  98                     c2 == INVALID_CODEPOINT) {
  99                         /* what else can we do?? */
 100                         return c1 - c2;
 101                 }
 102
 103                 if (toupper_w(c1) != toupper_w(c2)) {
 104                         return c1 - c2;
 105                 }
 106         }
 107
 108         return *s1 - *s2;
 109 }
 110
 111 /**
 112  * Compare 2 strings.
 113  *
 114  * @note The comparison is case-insensitive.
 115  **/
 116 BOOL strequal(const char *s1, const char *s2)
 117 {
 118         if (s1 == s2)
 119                 return(True);
 120         if (!s1 || !s2)
 121                 return(False);
 122
 123         return StrCaseCmp(s1,s2) == 0;
 124 }
 125
 126 /**
 127  Compare 2 strings (case sensitive).
 128 **/
 129 BOOL strcsequal(const char *s1,const char *s2)
 130 {
 131         if (s1 == s2)
 132                 return(True);
 133         if (!s1 || !s2)
 134                 return(False);
 135
 136         return strcmp(s1,s2) == 0;
 137 }
 138
 139
 140 /**
 141 Do a case-insensitive, whitespace-ignoring string compare.
 142 **/
 143 int strwicmp(const char *psz1, const char *psz2)
 144 {
 145         /* if BOTH strings are NULL, return TRUE, if ONE is NULL return */
 146         /* appropriate value. */
 147         if (psz1 == psz2)
 148                 return (0);
 149         else if (psz1 == NULL)
 150                 return (-1);
 151         else if (psz2 == NULL)
 152                 return (1);
 153
 154         /* sync the strings on first non-whitespace */
 155         while (1) {
 156                 while (isspace((int)*psz1))
 157                         psz1++;
 158                 while (isspace((int)*psz2))
 159                         psz2++;
 160                 if (toupper(*psz1) != toupper(*psz2) || *psz1 == '\0'
 161                     || *psz2 == '\0')
 162                         break;
 163                 psz1++;
 164                 psz2++;
 165         }
 166         return (*psz1 - *psz2);
 167 }
 168
 169 /**
 170  String replace.
 171  NOTE: oldc and newc must be 7 bit characters
 172 **/
 173 void string_replace(char *s, char oldc, char newc)
 174 {
 175         while (*s) {
 176                 size_t size;
 177                 codepoint_t c = next_codepoint(s, &size);
 178                 if (c == oldc) {
 179                         *s = newc;
 180                 }
 181                 s += size;
 182         }
 183 }
 184
 185 /**
 186  Trim the specified elements off the front and back of a string.
 187 **/
 188 BOOL trim_string(char *s,const char *front,const char *back)
 189 {
 190         BOOL ret = False;
 191         size_t front_len;
 192         size_t back_len;
 193         size_t len;
 194
 195         /* Ignore null or empty strings. */
 196         if (!s || (s[0] == '\0'))
 197                 return False;
 198
 199         front_len       = front? strlen(front) : 0;
 200         back_len        = back? strlen(back) : 0;
 201
 202         len = strlen(s);
 203
 204         if (front_len) {
 205                 while (len && strncmp(s, front, front_len)==0) {
 206                         /* Must use memmove here as src & dest can
 207                          * easily overlap. Found by valgrind. JRA. */
 208                         memmove(s, s+front_len, (len-front_len)+1);
 209                         len -= front_len;
 210                         ret=True;
 211                 }
 212         }
 213
 214         if (back_len) {
 215                 while ((len >= back_len) && strncmp(s+len-back_len,back,back_len)==0) {
 216                         s[len-back_len]='\0';
 217                         len -= back_len;
 218                         ret=True;
 219                 }
 220         }
 221         return ret;
 222 }
 223
 224 /**
 225  Find the number of 'c' chars in a string
 226 **/
 227 size_t count_chars(const char *s, char c)
 228 {
 229         size_t count = 0;
 230
 231         while (*s) {
 232                 size_t size;
 233                 codepoint_t c2 = next_codepoint(s, &size);
 234                 if (c2 == c) count++;
 235                 s += size;
 236         }
 237
 238         return count;
 239 }
 240
 241 /**
 242  Safe string copy into a known length string. maxlength does not
 243  include the terminating zero.
 244 **/
 245 char *safe_strcpy(char *dest,const char *src, size_t maxlength)
 246 {
 247         size_t len;
 248
 249         if (!dest) {
 250                 DEBUG(0,("ERROR: NULL dest in safe_strcpy\n"));
 251                 return NULL;
 252         }
 253
 254 #ifdef DEVELOPER
 255         /* We intentionally write out at the extremity of the destination
 256          * string.  If the destination is too short (e.g. pstrcpy into mallocd
 257          * or fstring) then this should cause an error under a memory
 258          * checker. */
 259         dest[maxlength] = '\0';
 260         if (PTR_DIFF(&len, dest) > 0) {  /* check if destination is on the stack, ok if so */
 261                 log_suspicious_usage("safe_strcpy", src);
 262         }
 263 #endif
 264
 265         if (!src) {
 266                 *dest = 0;
 267                 return dest;
 268         }
 269
 270         len = strlen(src);
 271
 272         if (len > maxlength) {
 273                 DEBUG(0,("ERROR: string overflow by %u (%u - %u) in safe_strcpy [%.50s]\n",
 274                          (uint_t)(len-maxlength), len, maxlength, src));
 275                 len = maxlength;
 276         }
 277
 278         memmove(dest, src, len);
 279         dest[len] = 0;
 280         return dest;
 281 }
 282
 283 /**
 284  Safe string cat into a string. maxlength does not
 285  include the terminating zero.
 286 **/
 287 char *safe_strcat(char *dest, const char *src, size_t maxlength)
 288 {
 289         size_t src_len, dest_len;
 290
 291         if (!dest) {
 292                 DEBUG(0,("ERROR: NULL dest in safe_strcat\n"));
 293                 return NULL;
 294         }
 295
 296         if (!src)
 297                 return dest;
 298
 299 #ifdef DEVELOPER
 300         if (PTR_DIFF(&src_len, dest) > 0) {  /* check if destination is on the stack, ok if so */
 301                 log_suspicious_usage("safe_strcat", src);
 302         }
 303 #endif
 304         src_len = strlen(src);
 305         dest_len = strlen(dest);
 306
 307         if (src_len + dest_len > maxlength) {
 308                 DEBUG(0,("ERROR: string overflow by %d in safe_strcat [%.50s]\n",
 309                          (int)(src_len + dest_len - maxlength), src));
 310                 if (maxlength > dest_len) {
 311                         memcpy(&dest[dest_len], src, maxlength - dest_len);
 312                 }
 313                 dest[maxlength] = 0;
 314                 return NULL;
 315         }
 316
 317         memcpy(&dest[dest_len], src, src_len);
 318         dest[dest_len + src_len] = 0;
 319         return dest;
 320 }
 321
 322 /**
 323  Paranoid strcpy into a buffer of given length (includes terminating
 324  zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
 325  and replaces with '_'. Deliberately does *NOT* check for multibyte
 326  characters. Don't change it !
 327 **/
 328
 329 char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
 330 {
 331         size_t len, i;
 332
 333         if (maxlength == 0) {
 334                 /* can't fit any bytes at all! */
 335                 return NULL;
 336         }
 337
 338         if (!dest) {
 339                 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
 340                 return NULL;
 341         }
 342
 343         if (!src) {
 344                 *dest = 0;
 345                 return dest;
 346         }
 347
 348         len = strlen(src);
 349         if (len >= maxlength)
 350                 len = maxlength - 1;
 351
 352         if (!other_safe_chars)
 353                 other_safe_chars = "";
 354
 355         for(i = 0; i < len; i++) {
 356                 int val = (src[i] & 0xff);
 357                 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
 358                         dest[i] = src[i];
 359                 else
 360                         dest[i] = '_';
 361         }
 362
 363         dest[i] = '\0';
 364
 365         return dest;
 366 }
 367
 368 /**
 369  Like strncpy but always null terminates. Make sure there is room!
 370  The variable n should always be one less than the available size.
 371 **/
 372
 373 char *StrnCpy(char *dest,const char *src,size_t n)
 374 {
 375         char *d = dest;
 376         if (!dest)
 377                 return(NULL);
 378         if (!src) {
 379                 *dest = 0;
 380                 return(dest);
 381         }
 382         while (n-- && (*d++ = *src++))
 383                 ;
 384         *d = 0;
 385         return(dest);
 386 }
 387
 388
 389 /**
 390  Routine to get hex characters and turn them into a 16 byte array.
 391  the array can be variable length, and any non-hex-numeric
 392  characters are skipped.  "0xnn" or "0Xnn" is specially catered
 393  for.
 394
 395  valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
 396
 397 **/
 398 size_t strhex_to_str(char *p, size_t len, const char *strhex)
 399 {
 400         size_t i;
 401         size_t num_chars = 0;
 402         uint8_t   lonybble, hinybble;
 403         const char     *hexchars = "0123456789ABCDEF";
 404         char           *p1 = NULL, *p2 = NULL;
 405
 406         for (i = 0; i < len && strhex[i] != 0; i++) {
 407                 if (strncasecmp(hexchars, "0x", 2) == 0) {
 408                         i++; /* skip two chars */
 409                         continue;
 410                 }
 411
 412                 if (!(p1 = strchr_m(hexchars, toupper(strhex[i]))))
 413                         break;
 414
 415                 i++; /* next hex digit */
 416
 417                 if (!(p2 = strchr_m(hexchars, toupper(strhex[i]))))
 418                         break;
 419
 420                 /* get the two nybbles */
 421                 hinybble = PTR_DIFF(p1, hexchars);
 422                 lonybble = PTR_DIFF(p2, hexchars);
 423
 424                 p[num_chars] = (hinybble << 4) | lonybble;
 425                 num_chars++;
 426
 427                 p1 = NULL;
 428                 p2 = NULL;
 429         }
 430         return num_chars;
 431 }
 432
 433 DATA_BLOB strhex_to_data_blob(const char *strhex)
 434 {
 435         DATA_BLOB ret_blob = data_blob(NULL, strlen(strhex)/2+1);
 436
 437         ret_blob.length = strhex_to_str(ret_blob.data,
 438                                         strlen(strhex),
 439                                         strhex);
 440
 441         return ret_blob;
 442 }
 443
 444
 445 /**
 446  * Routine to print a buffer as HEX digits, into an allocated string.
 447  */
 448 void hex_encode(const unsigned char *buff_in, size_t len, char **out_hex_buffer)
 449 {
 450         int i;
 451         char *hex_buffer;
 452
 453         *out_hex_buffer = smb_xmalloc((len*2)+1);
 454         hex_buffer = *out_hex_buffer;
 455
 456         for (i = 0; i < len; i++)
 457                 slprintf(&hex_buffer[i*2], 3, "%02X", buff_in[i]);
 458 }
 459
 460 /**
 461  Check if a string is part of a list.
 462 **/
 463 BOOL in_list(const char *s, const char *list, BOOL casesensitive)
 464 {
 465         pstring tok;
 466         const char *p=list;
 467
 468         if (!list)
 469                 return(False);
 470
 471         while (next_token(&p,tok,LIST_SEP,sizeof(tok))) {
 472                 if (casesensitive) {
 473                         if (strcmp(tok,s) == 0)
 474                                 return(True);
 475                 } else {
 476                         if (StrCaseCmp(tok,s) == 0)
 477                                 return(True);
 478                 }
 479         }
 480         return(False);
 481 }
 482
 483 /**
 484  Set a string value, allocing the space for the string
 485 **/
 486 static BOOL string_init(char **dest,const char *src)
 487 {
 488         if (!src) src = "";
 489
 490         (*dest) = strdup(src);
 491         if ((*dest) == NULL) {
 492                 DEBUG(0,("Out of memory in string_init\n"));
 493                 return False;
 494         }
 495         return True;
 496 }
 497
 498 /**
 499  Free a string value.
 500 **/
 501 void string_free(char **s)
 502 {
 503         if (s) SAFE_FREE(*s);
 504 }
 505
 506 /**
 507  Set a string value, deallocating any existing space, and allocing the space
 508  for the string
 509 **/
 510 BOOL string_set(char **dest, const char *src)
 511 {
 512         string_free(dest);
 513         return string_init(dest,src);
 514 }
 515
 516 /**
 517  Substitute a string for a pattern in another string. Make sure there is
 518  enough room!
 519
 520  This routine looks for pattern in s and replaces it with
 521  insert. It may do multiple replacements.
 522
 523  Any of " ; ' $ or ` in the insert string are replaced with _
 524  if len==0 then the string cannot be extended. This is different from the old
 525  use of len==0 which was for no length checks to be done.
 526 **/
 527
 528 void string_sub(char *s,const char *pattern, const char *insert, size_t len)
 529 {
 530         char *p;
 531         ssize_t ls,lp,li, i;
 532
 533         if (!insert || !pattern || !*pattern || !s)
 534                 return;
 535
 536         ls = (ssize_t)strlen(s);
 537         lp = (ssize_t)strlen(pattern);
 538         li = (ssize_t)strlen(insert);
 539
 540         if (len == 0)
 541                 len = ls + 1; /* len is number of *bytes* */
 542
 543         while (lp <= ls && (p = strstr(s,pattern))) {
 544                 if (ls + (li-lp) >= len) {
 545                         DEBUG(0,("ERROR: string overflow by %d in string_sub(%.50s, %d)\n",
 546                                  (int)(ls + (li-lp) - len),
 547                                  pattern, (int)len));
 548                         break;
 549                 }
 550                 if (li != lp) {
 551                         memmove(p+li,p+lp,strlen(p+lp)+1);
 552                 }
 553                 for (i=0;i<li;i++) {
 554                         switch (insert[i]) {
 555                         case '`':
 556                         case '"':
 557                         case '\'':
 558                         case ';':
 559                         case '$':
 560                         case '%':
 561                         case '\r':
 562                         case '\n':
 563                                 p[i] = '_';
 564                                 break;
 565                         default:
 566                                 p[i] = insert[i];
 567                         }
 568                 }
 569                 s = p + li;
 570                 ls += (li-lp);
 571         }
 572 }
 573
 574
 575 /**
 576  Similar to string_sub() but allows for any character to be substituted.
 577  Use with caution!
 578  if len==0 then the string cannot be extended. This is different from the old
 579  use of len==0 which was for no length checks to be done.
 580 **/
 581
 582 void all_string_sub(char *s,const char *pattern,const char *insert, size_t len)
 583 {
 584         char *p;
 585         ssize_t ls,lp,li;
 586
 587         if (!insert || !pattern || !s)
 588                 return;
 589
 590         ls = (ssize_t)strlen(s);
 591         lp = (ssize_t)strlen(pattern);
 592         li = (ssize_t)strlen(insert);
 593
 594         if (!*pattern)
 595                 return;
 596
 597         if (len == 0)
 598                 len = ls + 1; /* len is number of *bytes* */
 599
 600         while (lp <= ls && (p = strstr(s,pattern))) {
 601                 if (ls + (li-lp) >= len) {
 602                         DEBUG(0,("ERROR: string overflow by %d in all_string_sub(%.50s, %d)\n",
 603                                  (int)(ls + (li-lp) - len),
 604                                  pattern, (int)len));
 605                         break;
 606                 }
 607                 if (li != lp) {
 608                         memmove(p+li,p+lp,strlen(p+lp)+1);
 609                 }
 610                 memcpy(p, insert, li);
 611                 s = p + li;
 612                 ls += (li-lp);
 613         }
 614 }
 615
 616
 617 /**
 618  Strchr and strrchr_m are a bit complex on general multi-byte strings.
 619 **/
 620 char *strchr_m(const char *s, char c)
 621 {
 622         /* characters below 0x3F are guaranteed to not appear in
 623            non-initial position in multi-byte charsets */
 624         if ((c & 0xC0) == 0) {
 625                 return strchr(s, c);
 626         }
 627
 628         while (*s) {
 629                 size_t size;
 630                 codepoint_t c2 = next_codepoint(s, &size);
 631                 if (c2 == c) {
 632                         return discard_const(s);
 633                 }
 634                 s += size;
 635         }
 636
 637         return NULL;
 638 }
 639
 640 char *strrchr_m(const char *s, char c)
 641 {
 642         char *ret = NULL;
 643
 644         /* characters below 0x3F are guaranteed to not appear in
 645            non-initial position in multi-byte charsets */
 646         if ((c & 0xC0) == 0) {
 647                 return strrchr(s, c);
 648         }
 649
 650         while (*s) {
 651                 size_t size;
 652                 codepoint_t c2 = next_codepoint(s, &size);
 653                 if (c2 == c) {
 654                         ret = discard_const(s);
 655                 }
 656                 s += size;
 657         }
 658
 659         return ret;
 660 }
 661
 662 /**
 663  Convert a string to lower case, allocated with talloc
 664 **/
 665 char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
 666 {
 667         size_t size=0;
 668         char *dest;
 669
 670         /* this takes advantage of the fact that upper/lower can't
 671            change the length of a character by more than 1 byte */
 672         dest = talloc_size(ctx, 2*(strlen(src))+1);
 673         if (dest == NULL) {
 674                 return NULL;
 675         }
 676
 677         while (*src) {
 678                 size_t c_size;
 679                 codepoint_t c = next_codepoint(src, &c_size);
 680                 src += c_size;
 681
 682                 c = tolower_w(c);
 683
 684                 c_size = push_codepoint(dest+size, c);
 685                 if (c_size == -1) {
 686                         talloc_free(dest);
 687                         return NULL;
 688                 }
 689                 size += c_size;
 690         }
 691
 692         dest[size] = 0;
 693
 694         return dest;
 695 }
 696
 697 /**
 698  Convert a string to UPPER case, allocated with talloc
 699 **/
 700 char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
 701 {
 702         size_t size=0;
 703         char *dest;
 704
 705         /* this takes advantage of the fact that upper/lower can't
 706            change the length of a character by more than 1 byte */
 707         dest = talloc_size(ctx, 2*(strlen(src))+1);
 708         if (dest == NULL) {
 709                 return NULL;
 710         }
 711
 712         while (*src) {
 713                 size_t c_size;
 714                 codepoint_t c = next_codepoint(src, &c_size);
 715                 src += c_size;
 716
 717                 c = toupper_w(c);
 718
 719                 c_size = push_codepoint(dest+size, c);
 720                 if (c_size == -1) {
 721                         talloc_free(dest);
 722                         return NULL;
 723                 }
 724                 size += c_size;
 725         }
 726
 727         dest[size] = 0;
 728
 729         return dest;
 730 }
 731
 732 /**
 733  Convert a string to lower case.
 734 **/
 735 void strlower_m(char *s)
 736 {
 737         char *d;
 738
 739         /* this is quite a common operation, so we want it to be
 740            fast. We optimise for the ascii case, knowing that all our
 741            supported multi-byte character sets are ascii-compatible
 742            (ie. they match for the first 128 chars) */
 743         while (*s && !(((uint8_t)s[0]) & 0x7F)) {
 744                 *s = tolower((uint8_t)*s);
 745                 s++;
 746         }
 747
 748         if (!*s)
 749                 return;
 750
 751         d = s;
 752
 753         while (*s) {
 754                 size_t c_size, c_size2;
 755                 codepoint_t c = next_codepoint(s, &c_size);
 756                 c_size2 = push_codepoint(d, tolower_w(c));
 757                 if (c_size2 > c_size) {
 758                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
 759                                  c, tolower_w(c), c_size, c_size2));
 760                         smb_panic("codepoint expansion in strlower_m\n");
 761                 }
 762                 s += c_size;
 763                 d += c_size2;
 764         }
 765         *d = 0;
 766 }
 767
 768 /**
 769  Convert a string to UPPER case.
 770 **/
 771 void strupper_m(char *s)
 772 {
 773         char *d;
 774
 775         /* this is quite a common operation, so we want it to be
 776            fast. We optimise for the ascii case, knowing that all our
 777            supported multi-byte character sets are ascii-compatible
 778            (ie. they match for the first 128 chars) */
 779         while (*s && !(((uint8_t)s[0]) & 0x7F)) {
 780                 *s = toupper((uint8_t)*s);
 781                 s++;
 782         }
 783
 784         if (!*s)
 785                 return;
 786
 787         d = s;
 788
 789         while (*s) {
 790                 size_t c_size, c_size2;
 791                 codepoint_t c = next_codepoint(s, &c_size);
 792                 c_size2 = push_codepoint(d, toupper_w(c));
 793                 if (c_size2 > c_size) {
 794                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
 795                                  c, toupper_w(c), c_size, c_size2));
 796                         smb_panic("codepoint expansion in strupper_m\n");
 797                 }
 798                 s += c_size;
 799                 d += c_size2;
 800         }
 801         *d = 0;
 802 }
 803
 804 /**
 805  Count the number of UCS2 characters in a string. Normally this will
 806  be the same as the number of bytes in a string for single byte strings,
 807  but will be different for multibyte.
 808 **/
 809 size_t strlen_m(const char *s)
 810 {
 811         size_t count = 0;
 812
 813         if (!s) {
 814                 return 0;
 815         }
 816
 817         while (*s && !(((uint8_t)s[0]) & 0x7F)) {
 818                 s++;
 819                 count++;
 820         }
 821
 822         if (!*s) {
 823                 return count;
 824         }
 825
 826         while (*s) {
 827                 size_t c_size;
 828                 codepoint_t c = next_codepoint(s, &c_size);
 829                 if (c < 0x10000) {
 830                         count += 1;
 831                 } else {
 832                         count += 2;
 833                 }
 834                 s += c_size;
 835         }
 836
 837         return count;
 838 }
 839
 840 /**
 841    Work out the number of multibyte chars in a string, including the NULL
 842    terminator.
 843 **/
 844 size_t strlen_m_term(const char *s)
 845 {
 846         if (!s) {
 847                 return 0;
 848         }
 849
 850         return strlen_m(s) + 1;
 851 }
 852
 853 /**
 854  Return a RFC2254 binary string representation of a buffer.
 855  Used in LDAP filters.
 856  Caller must free.
 857 **/
 858 char *binary_string(char *buf, int len)
 859 {
 860         char *s;
 861         int i, j;
 862         const char *hex = "0123456789ABCDEF";
 863         s = malloc(len * 3 + 1);
 864         if (!s)
 865                 return NULL;
 866         for (j=i=0;i<len;i++) {
 867                 s[j] = '\\';
 868                 s[j+1] = hex[((uint8_t)buf[i]) >> 4];
 869                 s[j+2] = hex[((uint8_t)buf[i]) & 0xF];
 870                 j += 3;
 871         }
 872         s[j] = 0;
 873         return s;
 874 }
 875
 876 /**
 877  Unescape a URL encoded string, in place.
 878 **/
 879
 880 void rfc1738_unescape(char *buf)
 881 {
 882         char *p=buf;
 883
 884         while ((p=strchr_m(p,'+')))
 885                 *p = ' ';
 886
 887         p = buf;
 888
 889         while (p && *p && (p=strchr_m(p,'%'))) {
 890                 int c1 = p[1];
 891                 int c2 = p[2];
 892
 893                 if (c1 >= '0' && c1 <= '9')
 894                         c1 = c1 - '0';
 895                 else if (c1 >= 'A' && c1 <= 'F')
 896                         c1 = 10 + c1 - 'A';
 897                 else if (c1 >= 'a' && c1 <= 'f')
 898                         c1 = 10 + c1 - 'a';
 899                 else {p++; continue;}
 900
 901                 if (c2 >= '0' && c2 <= '9')
 902                         c2 = c2 - '0';
 903                 else if (c2 >= 'A' && c2 <= 'F')
 904                         c2 = 10 + c2 - 'A';
 905                 else if (c2 >= 'a' && c2 <= 'f')
 906                         c2 = 10 + c2 - 'a';
 907                 else {p++; continue;}
 908
 909                 *p = (c1<<4) | c2;
 910
 911                 memmove(p+1, p+3, strlen(p+3)+1);
 912                 p++;
 913         }
 914 }
 915
 916 /**
 917  * Decode a base64 string into a DATA_BLOB - simple and slow algorithm
 918  **/
 919 DATA_BLOB base64_decode_data_blob(const char *s)
 920 {
 921         const char *b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 922
 923         int bit_offset, byte_offset, idx, i, n;
 924         DATA_BLOB decoded = data_blob(s, strlen(s)+1);
 925         uint8_t *d = decoded.data;
 926         char *p;
 927
 928         n=i=0;
 929
 930         while (*s && (p=strchr_m(b64,*s))) {
 931                 idx = (int)(p - b64);
 932                 byte_offset = (i*6)/8;
 933                 bit_offset = (i*6)%8;
 934                 d[byte_offset] &= ~((1<<(8-bit_offset))-1);
 935                 if (bit_offset < 3) {
 936                         d[byte_offset] |= (idx << (2-bit_offset));
 937                         n = byte_offset+1;
 938                 } else {
 939                         d[byte_offset] |= (idx >> (bit_offset-2));
 940                         d[byte_offset+1] = 0;
 941                         d[byte_offset+1] |= (idx << (8-(bit_offset-2))) & 0xFF;
 942                         n = byte_offset+2;
 943                 }
 944                 s++; i++;
 945         }
 946
 947         /* fix up length */
 948         decoded.length = n;
 949         return decoded;
 950 }
 951
 952 /**
 953  * Decode a base64 string in-place - wrapper for the above
 954  **/
 955 void base64_decode_inplace(char *s)
 956 {
 957         DATA_BLOB decoded = base64_decode_data_blob(s);
 958         memcpy(s, decoded.data, decoded.length);
 959         data_blob_free(&decoded);
 960
 961         /* null terminate */
 962         s[decoded.length] = '\0';
 963 }
 964
 965 /**
 966  * Encode a base64 string into a malloc()ed string caller to free.
 967  *
 968  *From SQUID: adopted from http://ftp.sunet.se/pub2/gnu/vm/base64-encode.c with adjustments
 969  **/
 970 char * base64_encode_data_blob(DATA_BLOB data)
 971 {
 972         const char *b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 973         int bits = 0;
 974         int char_count = 0;
 975         size_t out_cnt = 0;
 976         size_t len = data.length;
 977         size_t output_len = data.length * 2;
 978         char *result = malloc(output_len); /* get us plenty of space */
 979
 980         while (len-- && out_cnt < (data.length * 2) - 5) {
 981                 int c = (uint8_t) *(data.data++);
 982                 bits += c;
 983                 char_count++;
 984                 if (char_count == 3) {
 985                         result[out_cnt++] = b64[bits >> 18];
 986                         result[out_cnt++] = b64[(bits >> 12) & 0x3f];
 987                         result[out_cnt++] = b64[(bits >> 6) & 0x3f];
 988             result[out_cnt++] = b64[bits & 0x3f];
 989             bits = 0;
 990             char_count = 0;
 991         } else {
 992             bits <<= 8;
 993         }
 994     }
 995     if (char_count != 0) {
 996         bits <<= 16 - (8 * char_count);
 997         result[out_cnt++] = b64[bits >> 18];
 998         result[out_cnt++] = b64[(bits >> 12) & 0x3f];
 999         if (char_count == 1) {
1000             result[out_cnt++] = '=';
1001             result[out_cnt++] = '=';
1002         } else {
1003             result[out_cnt++] = b64[(bits >> 6) & 0x3f];
1004             result[out_cnt++] = '=';
1005         }
1006     }
1007     result[out_cnt] = '\0';     /* terminate */
1008     return result;
1009 }
1010
1011 #ifdef VALGRIND
1012 size_t valgrind_strlen(const char *s)
1013 {
1014         size_t count;
1015         for(count = 0; *s++; count++)
1016                 ;
1017         return count;
1018 }
1019 #endif
1020
1021
1022 /*
1023   format a string into length-prefixed dotted domain format, as used in NBT
1024   and in some ADS structures
1025 */
1026 const char *str_format_nbt_domain(TALLOC_CTX *mem_ctx, const char *s)
1027 {
1028         char *ret;
1029         int i;
1030         if (!s || !*s) {
1031                 return talloc_strdup(mem_ctx, "");
1032         }
1033         ret = talloc_size(mem_ctx, strlen(s)+2);
1034         if (!ret) {
1035                 return ret;
1036         }
1037
1038         memcpy(ret+1, s, strlen(s)+1);
1039         ret[0] = '.';
1040
1041         for (i=0;ret[i];i++) {
1042                 if (ret[i] == '.') {
1043                         char *p = strchr(ret+i+1, '.');
1044                         if (p) {
1045                                 ret[i] = p-(ret+i+1);
1046                         } else {
1047                                 ret[i] = strlen(ret+i+1);
1048                         }
1049                 }
1050         }
1051
1052         return ret;
1053 }
1054
1055 BOOL add_string_to_array(TALLOC_CTX *mem_ctx,
1056                          const char *str, const char ***strings, int *num)
1057 {
1058         char *dup_str = talloc_strdup(mem_ctx, str);
1059
1060         *strings = talloc_realloc(mem_ctx,
1061                                     *strings,
1062                                     const char *, ((*num)+1));
1063
1064         if ((*strings == NULL) || (dup_str == NULL))
1065                 return False;
1066
1067         (*strings)[*num] = dup_str;
1068         *num += 1;
1069
1070         return True;
1071 }
1072
1073
1074
1075 /*
1076   varient of strcmp() that handles NULL ptrs
1077 */
1078 int strcmp_safe(const char *s1, const char *s2)
1079 {
1080         if (s1 == s2) {
1081                 return 0;
1082         }
1083         if (s1 == NULL || s2 == NULL) {
1084                 return s1?-1:1;
1085         }
1086         return strcmp(s1, s2);
1087 }
1088
1089
1090 /*******************************************************************
1091 return the number of bytes occupied by a buffer in ASCII format
1092 the result includes the null termination
1093 limited by 'n' bytes
1094 ********************************************************************/
1095 size_t ascii_len_n(const char *src, size_t n)
1096 {
1097         size_t len;
1098
1099         len = strnlen(src, n);
1100         if (len+1 <= n) {
1101                 len += 1;
1102         }
1103
1104         return len;
1105 }
1106
1107
1108 /*******************************************************************
1109  Return a string representing a CIFS attribute for a file.
1110 ********************************************************************/
1111 char *attrib_string(TALLOC_CTX *mem_ctx, uint32_t attrib)
1112 {
1113         int i, len;
1114         const struct {
1115                 char c;
1116                 uint16_t attr;
1117         } attr_strs[] = {
1118                 {'V', FILE_ATTRIBUTE_VOLUME},
1119                 {'D', FILE_ATTRIBUTE_DIRECTORY},
1120                 {'A', FILE_ATTRIBUTE_ARCHIVE},
1121                 {'H', FILE_ATTRIBUTE_HIDDEN},
1122                 {'S', FILE_ATTRIBUTE_SYSTEM},
1123                 {'N', FILE_ATTRIBUTE_NORMAL},
1124                 {'R', FILE_ATTRIBUTE_READONLY},
1125                 {'d', FILE_ATTRIBUTE_DEVICE},
1126                 {'t', FILE_ATTRIBUTE_TEMPORARY},
1127                 {'s', FILE_ATTRIBUTE_SPARSE},
1128                 {'r', FILE_ATTRIBUTE_REPARSE_POINT},
1129                 {'c', FILE_ATTRIBUTE_COMPRESSED},
1130                 {'o', FILE_ATTRIBUTE_OFFLINE},
1131                 {'n', FILE_ATTRIBUTE_NONINDEXED},
1132                 {'e', FILE_ATTRIBUTE_ENCRYPTED}
1133         };
1134         char *ret;
1135
1136         ret = talloc_size(mem_ctx, ARRAY_SIZE(attr_strs)+1);
1137         if (!ret) {
1138                 return NULL;
1139         }
1140
1141         for (len=i=0; i<ARRAY_SIZE(attr_strs); i++) {
1142                 if (attrib & attr_strs[i].attr) {
1143                         ret[len++] = attr_strs[i].c;
1144                 }
1145         }
1146
1147         ret[len] = 0;
1148
1149         return ret;
1150 }
1151