lib/util/charset/util_unistr.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Samba utility functions
   4    Copyright (C) Andrew Tridgell 1992-2001
   5    Copyright (C) Simo Sorce 2001
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 #include "includes.h"
  22 #include "system/locale.h"
  23
  24 struct smb_iconv_convenience *global_iconv_convenience = NULL;
  25
  26 static inline struct smb_iconv_convenience *get_iconv_convenience(void)
  27 {
  28         if (global_iconv_convenience == NULL)
  29                 global_iconv_convenience = smb_iconv_convenience_reinit(talloc_autofree_context(),
  30                                                                         "ASCII", "UTF-8", true, NULL);
  31         return global_iconv_convenience;
  32 }
  33
  34 /**
  35  Case insensitive string compararison
  36 **/
  37 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
  38 {
  39         codepoint_t c1=0, c2=0;
  40         size_t size1, size2;
  41         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
  42
  43         /* handle null ptr comparisons to simplify the use in qsort */
  44         if (s1 == s2) return 0;
  45         if (s1 == NULL) return -1;
  46         if (s2 == NULL) return 1;
  47
  48         while (*s1 && *s2) {
  49                 c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
  50                 c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
  51
  52                 s1 += size1;
  53                 s2 += size2;
  54
  55                 if (c1 == c2) {
  56                         continue;
  57                 }
  58
  59                 if (c1 == INVALID_CODEPOINT ||
  60                     c2 == INVALID_CODEPOINT) {
  61                         /* what else can we do?? */
  62                         return strcasecmp(s1, s2);
  63                 }
  64
  65                 if (toupper_m(c1) != toupper_m(c2)) {
  66                         return c1 - c2;
  67                 }
  68         }
  69
  70         return *s1 - *s2;
  71 }
  72
  73 /**
  74  * Get the next token from a string, return False if none found.
  75  * Handles double-quotes.
  76  *
  77  * Based on a routine by GJC@VILLAGE.COM.
  78  * Extensively modified by Andrew.Tridgell@anu.edu.au
  79  **/
  80 _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
  81 {
  82         const char *s;
  83         bool quoted;
  84         size_t len=1;
  85
  86         if (!ptr)
  87                 return false;
  88
  89         s = *ptr;
  90
  91         /* default to simple separators */
  92         if (!sep)
  93                 sep = " \t\n\r";
  94
  95         /* find the first non sep char */
  96         while (*s && strchr_m(sep,*s))
  97                 s++;
  98
  99         /* nothing left? */
 100         if (!*s)
 101                 return false;
 102
 103         /* copy over the token */
 104         for (quoted = false; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
 105                 if (*s == '\"') {
 106                         quoted = !quoted;
 107                 } else {
 108                         len++;
 109                         *buff++ = *s;
 110                 }
 111         }
 112
 113         *ptr = (*s) ? s+1 : s;
 114         *buff = 0;
 115
 116         return true;
 117 }
 118
 119 /**
 120  Case insensitive string compararison, length limited
 121 **/
 122 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
 123 {
 124         codepoint_t c1=0, c2=0;
 125         size_t size1, size2;
 126         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 127
 128         /* handle null ptr comparisons to simplify the use in qsort */
 129         if (s1 == s2) return 0;
 130         if (s1 == NULL) return -1;
 131         if (s2 == NULL) return 1;
 132
 133         while (*s1 && *s2 && n) {
 134                 n--;
 135
 136                 c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
 137                 c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
 138
 139                 s1 += size1;
 140                 s2 += size2;
 141
 142                 if (c1 == c2) {
 143                         continue;
 144                 }
 145
 146                 if (c1 == INVALID_CODEPOINT ||
 147                     c2 == INVALID_CODEPOINT) {
 148                         /* what else can we do?? */
 149                         return strcasecmp(s1, s2);
 150                 }
 151
 152                 if (toupper_m(c1) != toupper_m(c2)) {
 153                         return c1 - c2;
 154                 }
 155         }
 156
 157         if (n == 0) {
 158                 return 0;
 159         }
 160
 161         return *s1 - *s2;
 162 }
 163
 164 /**
 165  * Compare 2 strings.
 166  *
 167  * @note The comparison is case-insensitive.
 168  **/
 169 _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
 170 {
 171         return strcasecmp_m(s1,s2) == 0;
 172 }
 173
 174 /**
 175  Compare 2 strings (case sensitive).
 176 **/
 177 _PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
 178 {
 179         if (s1 == s2)
 180                 return true;
 181         if (!s1 || !s2)
 182                 return false;
 183
 184         return strcmp(s1,s2) == 0;
 185 }
 186
 187
 188 /**
 189  String replace.
 190  NOTE: oldc and newc must be 7 bit characters
 191 **/
 192 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
 193 {
 194         struct smb_iconv_convenience *ic = get_iconv_convenience();
 195         while (s && *s) {
 196                 size_t size;
 197                 codepoint_t c = next_codepoint_convenience(ic, s, &size);
 198                 if (c == oldc) {
 199                         *s = newc;
 200                 }
 201                 s += size;
 202         }
 203 }
 204
 205 /**
 206  Paranoid strcpy into a buffer of given length (includes terminating
 207  zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
 208  and replaces with '_'. Deliberately does *NOT* check for multibyte
 209  characters. Don't change it !
 210 **/
 211
 212 _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
 213 {
 214         size_t len, i;
 215
 216         if (maxlength == 0) {
 217                 /* can't fit any bytes at all! */
 218                 return NULL;
 219         }
 220
 221         if (!dest) {
 222                 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
 223                 return NULL;
 224         }
 225
 226         if (!src) {
 227                 *dest = 0;
 228                 return dest;
 229         }
 230
 231         len = strlen(src);
 232         if (len >= maxlength)
 233                 len = maxlength - 1;
 234
 235         if (!other_safe_chars)
 236                 other_safe_chars = "";
 237
 238         for(i = 0; i < len; i++) {
 239                 int val = (src[i] & 0xff);
 240                 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
 241                         dest[i] = src[i];
 242                 else
 243                         dest[i] = '_';
 244         }
 245
 246         dest[i] = '\0';
 247
 248         return dest;
 249 }
 250
 251 /**
 252  * Calculate the number of units (8 or 16-bit, depending on the
 253  * destination charset), that would be needed to convert the input
 254  * string which is expected to be in in src_charset encoding to the
 255  * destination charset (which should be a unicode charset).
 256  */
 257 _PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
 258 {
 259         size_t count = 0;
 260         struct smb_iconv_convenience *ic = get_iconv_convenience();
 261
 262         if (!s) {
 263                 return 0;
 264         }
 265
 266         while (*s && !(((uint8_t)*s) & 0x80)) {
 267                 s++;
 268                 count++;
 269         }
 270
 271         if (!*s) {
 272                 return count;
 273         }
 274
 275         while (*s) {
 276                 size_t c_size;
 277                 codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
 278                 s += c_size;
 279
 280                 switch (dst_charset) {
 281                 case CH_UTF16LE:
 282                 case CH_UTF16BE:
 283                 case CH_UTF16MUNGED:
 284                         if (c < 0x10000) {
 285                                 count += 1;
 286                         } else {
 287                                 count += 2;
 288                         }
 289                         break;
 290                 case CH_UTF8:
 291                         /*
 292                          * this only checks ranges, and does not
 293                          * check for invalid codepoints
 294                          */
 295                         if (c < 0x80) {
 296                                 count += 1;
 297                         } else if (c < 0x800) {
 298                                 count += 2;
 299                         } else if (c < 0x1000) {
 300                                 count += 3;
 301                         } else {
 302                                 count += 4;
 303                         }
 304                         break;
 305                 default:
 306                         /*
 307                          * non-unicode encoding:
 308                          * assume that each codepoint fits into
 309                          * one unit in the destination encoding.
 310                          */
 311                         count += 1;
 312                 }
 313         }
 314
 315         return count;
 316 }
 317
 318 _PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
 319                                   const charset_t dst_charset)
 320 {
 321         if (!s) {
 322                 return 0;
 323         }
 324         return strlen_m_ext(s, src_charset, dst_charset) + 1;
 325 }
 326
 327 /**
 328  * Calculate the number of 16-bit units that would be needed to convert
 329  * the input string which is expected to be in CH_UNIX encoding to UTF16.
 330  *
 331  * This will be the same as the number of bytes in a string for single
 332  * byte strings, but will be different for multibyte.
 333  */
 334 _PUBLIC_ size_t strlen_m(const char *s)
 335 {
 336         return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
 337 }
 338
 339 /**
 340    Work out the number of multibyte chars in a string, including the NULL
 341    terminator.
 342 **/
 343 _PUBLIC_ size_t strlen_m_term(const char *s)
 344 {
 345         if (!s) {
 346                 return 0;
 347         }
 348
 349         return strlen_m(s) + 1;
 350 }
 351
 352 /*
 353  * Weird helper routine for the winreg pipe: If nothing is around, return 0,
 354  * if a string is there, include the terminator.
 355  */
 356
 357 _PUBLIC_ size_t strlen_m_term_null(const char *s)
 358 {
 359         size_t len;
 360         if (!s) {
 361                 return 0;
 362         }
 363         len = strlen_m(s);
 364         if (len == 0) {
 365                 return 0;
 366         }
 367
 368         return len+1;
 369 }
 370
 371 /**
 372  Strchr and strrchr_m are a bit complex on general multi-byte strings.
 373 **/
 374 _PUBLIC_ char *strchr_m(const char *s, char c)
 375 {
 376         struct smb_iconv_convenience *ic = get_iconv_convenience();
 377         if (s == NULL) {
 378                 return NULL;
 379         }
 380         /* characters below 0x3F are guaranteed to not appear in
 381            non-initial position in multi-byte charsets */
 382         if ((c & 0xC0) == 0) {
 383                 return strchr(s, c);
 384         }
 385
 386         while (*s) {
 387                 size_t size;
 388                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 389                 if (c2 == c) {
 390                         return discard_const_p(char, s);
 391                 }
 392                 s += size;
 393         }
 394
 395         return NULL;
 396 }
 397
 398 /**
 399  * Multibyte-character version of strrchr
 400  */
 401 _PUBLIC_ char *strrchr_m(const char *s, char c)
 402 {
 403         struct smb_iconv_convenience *ic = get_iconv_convenience();
 404         char *ret = NULL;
 405
 406         if (s == NULL) {
 407                 return NULL;
 408         }
 409
 410         /* characters below 0x3F are guaranteed to not appear in
 411            non-initial position in multi-byte charsets */
 412         if ((c & 0xC0) == 0) {
 413                 return strrchr(s, c);
 414         }
 415
 416         while (*s) {
 417                 size_t size;
 418                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 419                 if (c2 == c) {
 420                         ret = discard_const_p(char, s);
 421                 }
 422                 s += size;
 423         }
 424
 425         return ret;
 426 }
 427
 428 /**
 429   return True if any (multi-byte) character is lower case
 430 */
 431 _PUBLIC_ bool strhaslower(const char *string)
 432 {
 433         struct smb_iconv_convenience *ic = get_iconv_convenience();
 434         while (*string) {
 435                 size_t c_size;
 436                 codepoint_t s;
 437                 codepoint_t t;
 438
 439                 s = next_codepoint_convenience(ic, string, &c_size);
 440                 string += c_size;
 441
 442                 t = toupper_m(s);
 443
 444                 if (s != t) {
 445                         return true; /* that means it has lower case chars */
 446                 }
 447         }
 448
 449         return false;
 450 }
 451
 452 /**
 453   return True if any (multi-byte) character is upper case
 454 */
 455 _PUBLIC_ bool strhasupper(const char *string)
 456 {
 457         struct smb_iconv_convenience *ic = get_iconv_convenience();
 458         while (*string) {
 459                 size_t c_size;
 460                 codepoint_t s;
 461                 codepoint_t t;
 462
 463                 s = next_codepoint_convenience(ic, string, &c_size);
 464                 string += c_size;
 465
 466                 t = tolower_m(s);
 467
 468                 if (s != t) {
 469                         return true; /* that means it has upper case chars */
 470                 }
 471         }
 472
 473         return false;
 474 }
 475
 476 /**
 477  Convert a string to lower case, allocated with talloc
 478 **/
 479 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
 480 {
 481         size_t size=0;
 482         char *dest;
 483         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 484
 485         if(src == NULL) {
 486                 return NULL;
 487         }
 488
 489         /* this takes advantage of the fact that upper/lower can't
 490            change the length of a character by more than 1 byte */
 491         dest = talloc_array(ctx, char, 2*(strlen(src))+1);
 492         if (dest == NULL) {
 493                 return NULL;
 494         }
 495
 496         while (*src) {
 497                 size_t c_size;
 498                 codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
 499                 src += c_size;
 500
 501                 c = tolower_m(c);
 502
 503                 c_size = push_codepoint_convenience(iconv_convenience, dest+size, c);
 504                 if (c_size == -1) {
 505                         talloc_free(dest);
 506                         return NULL;
 507                 }
 508                 size += c_size;
 509         }
 510
 511         dest[size] = 0;
 512
 513         /* trim it so talloc_append_string() works */
 514         dest = talloc_realloc(ctx, dest, char, size+1);
 515
 516         talloc_set_name_const(dest, dest);
 517
 518         return dest;
 519 }
 520
 521 /**
 522  Convert a string to UPPER case, allocated with talloc
 523  source length limited to n bytes
 524 **/
 525 _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
 526 {
 527         size_t size=0;
 528         char *dest;
 529         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 530
 531         if (!src) {
 532                 return NULL;
 533         }
 534
 535         /* this takes advantage of the fact that upper/lower can't
 536            change the length of a character by more than 1 byte */
 537         dest = talloc_array(ctx, char, 2*(n+1));
 538         if (dest == NULL) {
 539                 return NULL;
 540         }
 541
 542         while (n-- && *src) {
 543                 size_t c_size;
 544                 codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
 545                 src += c_size;
 546
 547                 c = toupper_m(c);
 548
 549                 c_size = push_codepoint_convenience(iconv_convenience, dest+size, c);
 550                 if (c_size == -1) {
 551                         talloc_free(dest);
 552                         return NULL;
 553                 }
 554                 size += c_size;
 555         }
 556
 557         dest[size] = 0;
 558
 559         /* trim it so talloc_append_string() works */
 560         dest = talloc_realloc(ctx, dest, char, size+1);
 561
 562         talloc_set_name_const(dest, dest);
 563
 564         return dest;
 565 }
 566
 567 /**
 568  Convert a string to UPPER case, allocated with talloc
 569 **/
 570 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
 571 {
 572         return strupper_talloc_n(ctx, src, src?strlen(src):0);
 573 }
 574
 575 /**
 576  talloc_strdup() a unix string to upper case.
 577 **/
 578 _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
 579 {
 580         return strupper_talloc(ctx, src);
 581 }
 582
 583 /**
 584  Convert a string to lower case.
 585 **/
 586 _PUBLIC_ void strlower_m(char *s)
 587 {
 588         char *d;
 589         struct smb_iconv_convenience *iconv_convenience;
 590
 591         /* this is quite a common operation, so we want it to be
 592            fast. We optimise for the ascii case, knowing that all our
 593            supported multi-byte character sets are ascii-compatible
 594            (ie. they match for the first 128 chars) */
 595         while (*s && !(((uint8_t)*s) & 0x80)) {
 596                 *s = tolower((uint8_t)*s);
 597                 s++;
 598         }
 599
 600         if (!*s)
 601                 return;
 602
 603         iconv_convenience = get_iconv_convenience();
 604
 605         d = s;
 606
 607         while (*s) {
 608                 size_t c_size, c_size2;
 609                 codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
 610                 c_size2 = push_codepoint_convenience(iconv_convenience, d, tolower_m(c));
 611                 if (c_size2 > c_size) {
 612                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
 613                                  c, tolower_m(c), (int)c_size, (int)c_size2));
 614                         smb_panic("codepoint expansion in strlower_m\n");
 615                 }
 616                 s += c_size;
 617                 d += c_size2;
 618         }
 619         *d = 0;
 620 }
 621
 622 /**
 623  Convert a string to UPPER case.
 624 **/
 625 _PUBLIC_ void strupper_m(char *s)
 626 {
 627         char *d;
 628         struct smb_iconv_convenience *iconv_convenience;
 629
 630         /* this is quite a common operation, so we want it to be
 631            fast. We optimise for the ascii case, knowing that all our
 632            supported multi-byte character sets are ascii-compatible
 633            (ie. they match for the first 128 chars) */
 634         while (*s && !(((uint8_t)*s) & 0x80)) {
 635                 *s = toupper((uint8_t)*s);
 636                 s++;
 637         }
 638
 639         if (!*s)
 640                 return;
 641
 642         iconv_convenience = get_iconv_convenience();
 643
 644         d = s;
 645
 646         while (*s) {
 647                 size_t c_size, c_size2;
 648                 codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
 649                 c_size2 = push_codepoint_convenience(iconv_convenience, d, toupper_m(c));
 650                 if (c_size2 > c_size) {
 651                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
 652                                  c, toupper_m(c), (int)c_size, (int)c_size2));
 653                         smb_panic("codepoint expansion in strupper_m\n");
 654                 }
 655                 s += c_size;
 656                 d += c_size2;
 657         }
 658         *d = 0;
 659 }
 660
 661
 662 /**
 663  Find the number of 'c' chars in a string
 664 **/
 665 _PUBLIC_ size_t count_chars_m(const char *s, char c)
 666 {
 667         struct smb_iconv_convenience *ic = get_iconv_convenience();
 668         size_t count = 0;
 669
 670         while (*s) {
 671                 size_t size;
 672                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 673                 if (c2 == c) count++;
 674                 s += size;
 675         }
 676
 677         return count;
 678 }
 679
 680
 681 /**
 682  * Copy a string from a char* unix src to a dos codepage string destination.
 683  *
 684  * @return the number of bytes occupied by the string in the destination.
 685  *
 686  * @param flags can include
 687  * <dl>
 688  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 689  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 690  * </dl>
 691  *
 692  * @param dest_len the maximum length in bytes allowed in the
 693  * destination.  If @p dest_len is -1 then no maximum is used.
 694  **/
 695 static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 696 {
 697         size_t src_len;
 698         ssize_t ret;
 699
 700         if (flags & STR_UPPER) {
 701                 char *tmpbuf = strupper_talloc(NULL, src);
 702                 if (tmpbuf == NULL) {
 703                         return -1;
 704                 }
 705                 ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
 706                 talloc_free(tmpbuf);
 707                 return ret;
 708         }
 709
 710         src_len = strlen(src);
 711
 712         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 713                 src_len++;
 714
 715         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, false);
 716 }
 717
 718 /**
 719  * Copy a string from a unix char* src to an ASCII destination,
 720  * allocating a buffer using talloc().
 721  *
 722  * @param dest always set at least to NULL
 723  *
 724  * @returns The number of bytes occupied by the string in the destination
 725  *         or -1 in case of error.
 726  **/
 727 _PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 728 {
 729         size_t src_len = strlen(src)+1;
 730         *dest = NULL;
 731         return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size, false);
 732 }
 733
 734
 735 /**
 736  * Copy a string from a dos codepage source to a unix char* destination.
 737  *
 738  * The resulting string in "dest" is always null terminated.
 739  *
 740  * @param flags can have:
 741  * <dl>
 742  * <dt>STR_TERMINATE</dt>
 743  * <dd>STR_TERMINATE means the string in @p src
 744  * is null terminated, and src_len is ignored.</dd>
 745  * </dl>
 746  *
 747  * @param src_len is the length of the source area in bytes.
 748  * @returns the number of bytes occupied by the string in @p src.
 749  **/
 750 static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 751 {
 752         size_t ret;
 753
 754         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
 755                 if (src_len == (size_t)-1) {
 756                         src_len = strlen((const char *)src) + 1;
 757                 } else {
 758                         size_t len = strnlen((const char *)src, src_len);
 759                         if (len < src_len)
 760                                 len++;
 761                         src_len = len;
 762                 }
 763         }
 764
 765         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, false);
 766
 767         if (dest_len)
 768                 dest[MIN(ret, dest_len-1)] = 0;
 769
 770         return src_len;
 771 }
 772
 773 /**
 774  * Copy a string from a char* src to a unicode destination.
 775  *
 776  * @returns the number of bytes occupied by the string in the destination.
 777  *
 778  * @param flags can have:
 779  *
 780  * <dl>
 781  * <dt>STR_TERMINATE <dd>means include the null termination.
 782  * <dt>STR_UPPER     <dd>means uppercase in the destination.
 783  * <dt>STR_NOALIGN   <dd>means don't do alignment.
 784  * </dl>
 785  *
 786  * @param dest_len is the maximum length allowed in the
 787  * destination. If dest_len is -1 then no maxiumum is used.
 788  **/
 789 static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
 790 {
 791         size_t len=0;
 792         size_t src_len = strlen(src);
 793         size_t ret;
 794
 795         if (flags & STR_UPPER) {
 796                 char *tmpbuf = strupper_talloc(NULL, src);
 797                 if (tmpbuf == NULL) {
 798                         return -1;
 799                 }
 800                 ret = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
 801                 talloc_free(tmpbuf);
 802                 return ret;
 803         }
 804
 805         if (flags & STR_TERMINATE)
 806                 src_len++;
 807
 808         if (ucs2_align(NULL, dest, flags)) {
 809                 *(char *)dest = 0;
 810                 dest = (void *)((char *)dest + 1);
 811                 if (dest_len) dest_len--;
 812                 len++;
 813         }
 814
 815         /* ucs2 is always a multiple of 2 bytes */
 816         dest_len &= ~1;
 817
 818         ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, false);
 819         if (ret == (size_t)-1) {
 820                 return 0;
 821         }
 822
 823         len += ret;
 824
 825         return len;
 826 }
 827
 828
 829 /**
 830  * Copy a string from a unix char* src to a UCS2 destination,
 831  * allocating a buffer using talloc().
 832  *
 833  * @param dest always set at least to NULL
 834  *
 835  * @returns The number of bytes occupied by the string in the destination
 836  *         or -1 in case of error.
 837  **/
 838 _PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size)
 839 {
 840         size_t src_len = strlen(src)+1;
 841         *dest = NULL;
 842         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size, false);
 843 }
 844
 845
 846 /**
 847  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
 848  *
 849  * @param dest always set at least to NULL
 850  *
 851  * @returns The number of bytes occupied by the string in the destination
 852  **/
 853
 854 _PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 855 {
 856         size_t src_len = strlen(src)+1;
 857         *dest = NULL;
 858         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size, false);
 859 }
 860
 861 /**
 862  Copy a string from a ucs2 source to a unix char* destination.
 863  Flags can have:
 864   STR_TERMINATE means the string in src is null terminated.
 865   STR_NOALIGN   means don't try to align.
 866  if STR_TERMINATE is set then src_len is ignored if it is -1.
 867  src_len is the length of the source area in bytes
 868  Return the number of bytes occupied by the string in src.
 869  The resulting string in "dest" is always null terminated.
 870 **/
 871
 872 static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 873 {
 874         size_t ret;
 875
 876         if (ucs2_align(NULL, src, flags)) {
 877                 src = (const void *)((const char *)src + 1);
 878                 if (src_len > 0)
 879                         src_len--;
 880         }
 881
 882         if (flags & STR_TERMINATE) {
 883                 if (src_len == (size_t)-1) {
 884                         src_len = utf16_len(src);
 885                 } else {
 886                         src_len = utf16_len_n(src, src_len);
 887                 }
 888         }
 889
 890         /* ucs2 is always a multiple of 2 bytes */
 891         if (src_len != (size_t)-1)
 892                 src_len &= ~1;
 893
 894         ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, false);
 895         if (dest_len)
 896                 dest[MIN(ret, dest_len-1)] = 0;
 897
 898         return src_len;
 899 }
 900
 901 /**
 902  * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc
 903  *
 904  * @param dest always set at least to NULL
 905  *
 906  * @returns The number of bytes occupied by the string in the destination
 907  **/
 908
 909 _PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 910 {
 911         size_t src_len = strlen(src)+1;
 912         *dest = NULL;
 913         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 914 }
 915
 916 /**
 917  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
 918  *
 919  * @param dest always set at least to NULL
 920  *
 921  * @returns The number of bytes occupied by the string in the destination
 922  **/
 923
 924 _PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size)
 925 {
 926         size_t src_len = utf16_len(src);
 927         *dest = NULL;
 928         return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 929 }
 930
 931 /**
 932  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
 933  *
 934  * @param dest always set at least to NULL
 935  *
 936  * @returns The number of bytes occupied by the string in the destination
 937  **/
 938
 939 _PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 940 {
 941         size_t src_len = strlen(src)+1;
 942         *dest = NULL;
 943         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 944 }
 945
 946 /**
 947  Copy a string from a char* src to a unicode or ascii
 948  dos codepage destination choosing unicode or ascii based on the
 949  flags in the SMB buffer starting at base_ptr.
 950  Return the number of bytes occupied by the string in the destination.
 951  flags can have:
 952   STR_TERMINATE means include the null termination.
 953   STR_UPPER     means uppercase in the destination.
 954   STR_ASCII     use ascii even with unicode packet.
 955   STR_NOALIGN   means don't do alignment.
 956  dest_len is the maximum length allowed in the destination. If dest_len
 957  is -1 then no maxiumum is used.
 958 **/
 959
 960 _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
 961 {
 962         if (flags & STR_ASCII) {
 963                 return push_ascii(dest, src, dest_len, flags);
 964         } else if (flags & STR_UNICODE) {
 965                 return push_ucs2(dest, src, dest_len, flags);
 966         } else {
 967                 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
 968                 return -1;
 969         }
 970 }
 971
 972
 973 /**
 974  Copy a string from a unicode or ascii source (depending on
 975  the packet flags) to a char* destination.
 976  Flags can have:
 977   STR_TERMINATE means the string in src is null terminated.
 978   STR_UNICODE   means to force as unicode.
 979   STR_ASCII     use ascii even with unicode packet.
 980   STR_NOALIGN   means don't do alignment.
 981  if STR_TERMINATE is set then src_len is ignored is it is -1
 982  src_len is the length of the source area in bytes.
 983  Return the number of bytes occupied by the string in src.
 984  The resulting string in "dest" is always null terminated.
 985 **/
 986
 987 _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 988 {
 989         if (flags & STR_ASCII) {
 990                 return pull_ascii(dest, src, dest_len, src_len, flags);
 991         } else if (flags & STR_UNICODE) {
 992                 return pull_ucs2(dest, src, dest_len, src_len, flags);
 993         } else {
 994                 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
 995                 return -1;
 996         }
 997 }
 998
 999
1000 /**
1001  * Convert string from one encoding to another, making error checking etc
1002  *
1003  * @param src pointer to source string (multibyte or singlebyte)
1004  * @param srclen length of the source string in bytes
1005  * @param dest pointer to destination string (multibyte or singlebyte)
1006  * @param destlen maximal length allowed for string
1007  * @returns the number of bytes occupied in the destination
1008  **/
1009 _PUBLIC_ size_t convert_string(charset_t from, charset_t to,
1010                                 void const *src, size_t srclen,
1011                                 void *dest, size_t destlen,
1012                                 bool allow_badcharcnv)
1013 {
1014         size_t ret;
1015         if (!convert_string_convenience(get_iconv_convenience(), from, to,
1016                                                                           src, srclen,
1017                                                                           dest, destlen, &ret,
1018                                                                           allow_badcharcnv))
1019                 return -1;
1020         return ret;
1021 }
1022
1023 /**
1024  * Convert between character sets, allocating a new buffer using talloc for the result.
1025  *
1026  * @param srclen length of source buffer.
1027  * @param dest always set at least to NULL
1028  * @param converted_size Size in bytes of the converted string
1029  * @note -1 is not accepted for srclen.
1030  *
1031  * @returns boolean indication whether the conversion succeeded
1032  **/
1033
1034 _PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx,
1035                                        charset_t from, charset_t to,
1036                                        void const *src, size_t srclen,
1037                                        void *dest, size_t *converted_size,
1038                                            bool allow_badcharcnv)
1039 {
1040         return convert_string_talloc_convenience(ctx, get_iconv_convenience(),
1041                                                                                          from, to, src, srclen, dest,
1042                                                                                          converted_size,
1043                                                                                          allow_badcharcnv);
1044 }
1045
1046 _PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
1047                                         size_t *size)
1048 {
1049         return next_codepoint_convenience_ext(get_iconv_convenience(), str,
1050                                               src_charset, size);
1051 }
1052
1053 _PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
1054 {
1055         return next_codepoint_convenience(get_iconv_convenience(), str, size);
1056 }
1057
1058 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1059 {
1060         return push_codepoint_convenience(get_iconv_convenience(), str, c);
1061 }