source3/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21
  22 */
  23 #include "includes.h"
  24
  25 /* We can parameterize this if someone complains.... JRA. */
  26
  27 char lp_failed_convert_char(void)
  28 {
  29         return '_';
  30 }
  31
  32 /**
  33  * @file
  34  *
  35  * @brief Character-set conversion routines built on our iconv.
  36  *
  37  * @note Samba's internal character set (at least in the 3.0 series)
  38  * is always the same as the one for the Unix filesystem.  It is
  39  * <b>not</b> necessarily UTF-8 and may be different on machines that
  40  * need i18n filenames to be compatible with Unix software.  It does
  41  * have to be a superset of ASCII.  All multibyte sequences must start
  42  * with a byte with the high bit set.
  43  *
  44  * @sa lib/iconv.c
  45  */
  46
  47
  48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
  50
  51 /**
  52  * Return the name of a charset to give to iconv().
  53  **/
  54 static const char *charset_name(charset_t ch)
  55 {
  56         const char *ret = NULL;
  57
  58         if (ch == CH_UTF16LE) ret = "UTF-16LE";
  59         else if (ch == CH_UTF16BE) ret = "UTF-16BE";
  60         else if (ch == CH_UNIX) ret = lp_unix_charset();
  61         else if (ch == CH_DOS) ret = lp_dos_charset();
  62         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  63         else if (ch == CH_UTF8) ret = "UTF8";
  64
  65 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  66         if (ret && !strcmp(ret, "LOCALE")) {
  67                 const char *ln = NULL;
  68
  69 #ifdef HAVE_SETLOCALE
  70                 setlocale(LC_ALL, "");
  71 #endif
  72                 ln = nl_langinfo(CODESET);
  73                 if (ln) {
  74                         /* Check whether the charset name is supported
  75                            by iconv */
  76                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  77                         if (handle == (smb_iconv_t) -1) {
  78                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  79                                 ln = NULL;
  80                         } else {
  81                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  82                                 smb_iconv_close(handle);
  83                         }
  84                 }
  85                 ret = ln;
  86         }
  87 #endif
  88
  89         if (!ret || !*ret) ret = "ASCII";
  90         return ret;
  91 }
  92
  93 void lazy_initialize_conv(void)
  94 {
  95         static int initialized = False;
  96
  97         if (!initialized) {
  98                 initialized = True;
  99                 load_case_tables();
 100                 init_iconv();
 101         }
 102 }
 103
 104 /**
 105  * Destroy global objects allocated by init_iconv()
 106  **/
 107 void gfree_charcnv(void)
 108 {
 109         int c1, c2;
 110
 111         for (c1=0;c1<NUM_CHARSETS;c1++) {
 112                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 113                         if ( conv_handles[c1][c2] ) {
 114                                 smb_iconv_close( conv_handles[c1][c2] );
 115                                 conv_handles[c1][c2] = 0;
 116                         }
 117                 }
 118         }
 119 }
 120
 121 /**
 122  * Initialize iconv conversion descriptors.
 123  *
 124  * This is called the first time it is needed, and also called again
 125  * every time the configuration is reloaded, because the charset or
 126  * codepage might have changed.
 127  **/
 128 void init_iconv(void)
 129 {
 130         int c1, c2;
 131         bool did_reload = False;
 132
 133         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 134            first */
 135         if (!conv_handles[CH_UNIX][CH_UTF16LE])
 136                 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
 137
 138         if (!conv_handles[CH_UTF16LE][CH_UNIX])
 139                 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
 140
 141         for (c1=0;c1<NUM_CHARSETS;c1++) {
 142                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 143                         const char *n1 = charset_name((charset_t)c1);
 144                         const char *n2 = charset_name((charset_t)c2);
 145                         if (conv_handles[c1][c2] &&
 146                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 147                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 148                                 continue;
 149
 150                         did_reload = True;
 151
 152                         if (conv_handles[c1][c2])
 153                                 smb_iconv_close(conv_handles[c1][c2]);
 154
 155                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 156                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 157                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 158                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 159                                 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
 160                                         n1 = "ASCII";
 161                                 }
 162                                 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
 163                                         n2 = "ASCII";
 164                                 }
 165                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 166                                         n1, n2 ));
 167                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 168                                 if (!conv_handles[c1][c2]) {
 169                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 170                                         smb_panic("init_iconv: conv_handle initialization failed");
 171                                 }
 172                         }
 173                 }
 174         }
 175
 176         if (did_reload) {
 177                 /* XXX: Does this really get called every time the dos
 178                  * codepage changes? */
 179                 /* XXX: Is the did_reload test too strict? */
 180                 conv_silent = True;
 181                 init_doschar_table();
 182                 init_valid_table();
 183                 conv_silent = False;
 184         }
 185 }
 186
 187 /**
 188  * Convert string from one encoding to another, making error checking etc
 189  * Slow path version - uses (slow) iconv.
 190  *
 191  * @param src pointer to source string (multibyte or singlebyte)
 192  * @param srclen length of the source string in bytes
 193  * @param dest pointer to destination string (multibyte or singlebyte)
 194  * @param destlen maximal length allowed for string
 195  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 196  * @returns the number of bytes occupied in the destination
 197  *
 198  * Ensure the srclen contains the terminating zero.
 199  *
 200  **/
 201
 202 static size_t convert_string_internal(charset_t from, charset_t to,
 203                       void const *src, size_t srclen,
 204                       void *dest, size_t destlen, bool allow_bad_conv)
 205 {
 206         size_t i_len, o_len;
 207         size_t retval;
 208         const char* inbuf = (const char*)src;
 209         char* outbuf = (char*)dest;
 210         smb_iconv_t descriptor;
 211
 212         lazy_initialize_conv();
 213
 214         descriptor = conv_handles[from][to];
 215
 216         if (srclen == (size_t)-1) {
 217                 if (from == CH_UTF16LE || from == CH_UTF16BE) {
 218                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 219                 } else {
 220                         srclen = strlen((const char *)src)+1;
 221                 }
 222         }
 223
 224
 225         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 226                 if (!conv_silent)
 227                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 228                 return (size_t)-1;
 229         }
 230
 231         i_len=srclen;
 232         o_len=destlen;
 233
 234  again:
 235
 236         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
 237         if(retval==(size_t)-1) {
 238                 const char *reason="unknown error";
 239                 switch(errno) {
 240                         case EINVAL:
 241                                 reason="Incomplete multibyte sequence";
 242                                 if (!conv_silent)
 243                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 244                                 if (allow_bad_conv)
 245                                         goto use_as_is;
 246                                 break;
 247                         case E2BIG:
 248                                 reason="No more room";
 249                                 if (!conv_silent) {
 250                                         if (from == CH_UNIX) {
 251                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
 252                                                         charset_name(from), charset_name(to),
 253                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 254                                         } else {
 255                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
 256                                                         charset_name(from), charset_name(to),
 257                                                         (unsigned int)srclen, (unsigned int)destlen));
 258                                         }
 259                                 }
 260                                 break;
 261                         case EILSEQ:
 262                                 reason="Illegal multibyte sequence";
 263                                 if (!conv_silent)
 264                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 265                                 if (allow_bad_conv)
 266                                         goto use_as_is;
 267                                 break;
 268                         default:
 269                                 if (!conv_silent)
 270                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 271                                 break;
 272                 }
 273                 /* smb_panic(reason); */
 274         }
 275         return destlen-o_len;
 276
 277  use_as_is:
 278
 279         /*
 280          * Conversion not supported. This is actually an error, but there are so
 281          * many misconfigured iconv systems and smb.conf's out there we can't just
 282          * fail. Do a very bad conversion instead.... JRA.
 283          */
 284
 285         {
 286                 if (o_len == 0 || i_len == 0)
 287                         return destlen - o_len;
 288
 289                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 290                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 291                         /* Can't convert from utf16 any endian to multibyte.
 292                            Replace with the default fail char.
 293                         */
 294                         if (i_len < 2)
 295                                 return destlen - o_len;
 296                         if (i_len >= 2) {
 297                                 *outbuf = lp_failed_convert_char();
 298
 299                                 outbuf++;
 300                                 o_len--;
 301
 302                                 inbuf += 2;
 303                                 i_len -= 2;
 304                         }
 305
 306                         if (o_len == 0 || i_len == 0)
 307                                 return destlen - o_len;
 308
 309                         /* Keep trying with the next char... */
 310                         goto again;
 311
 312                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 313                         /* Can't convert to UTF16LE - just widen by adding the
 314                            default fail char then zero.
 315                         */
 316                         if (o_len < 2)
 317                                 return destlen - o_len;
 318
 319                         outbuf[0] = lp_failed_convert_char();
 320                         outbuf[1] = '\0';
 321
 322                         inbuf++;
 323                         i_len--;
 324
 325                         outbuf += 2;
 326                         o_len -= 2;
 327
 328                         if (o_len == 0 || i_len == 0)
 329                                 return destlen - o_len;
 330
 331                         /* Keep trying with the next char... */
 332                         goto again;
 333
 334                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 335                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 336                         /* Failed multibyte to multibyte. Just copy the default fail char and
 337                                 try again. */
 338                         outbuf[0] = lp_failed_convert_char();
 339
 340                         inbuf++;
 341                         i_len--;
 342
 343                         outbuf++;
 344                         o_len--;
 345
 346                         if (o_len == 0 || i_len == 0)
 347                                 return destlen - o_len;
 348
 349                         /* Keep trying with the next char... */
 350                         goto again;
 351
 352                 } else {
 353                         /* Keep compiler happy.... */
 354                         return destlen - o_len;
 355                 }
 356         }
 357 }
 358
 359 /**
 360  * Convert string from one encoding to another, making error checking etc
 361  * Fast path version - handles ASCII first.
 362  *
 363  * @param src pointer to source string (multibyte or singlebyte)
 364  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 365  * @param dest pointer to destination string (multibyte or singlebyte)
 366  * @param destlen maximal length allowed for string - *NEVER* -1.
 367  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 368  * @returns the number of bytes occupied in the destination
 369  *
 370  * Ensure the srclen contains the terminating zero.
 371  *
 372  * This function has been hand-tuned to provide a fast path.
 373  * Don't change unless you really know what you are doing. JRA.
 374  **/
 375
 376 size_t convert_string(charset_t from, charset_t to,
 377                       void const *src, size_t srclen,
 378                       void *dest, size_t destlen, bool allow_bad_conv)
 379 {
 380         /*
 381          * NB. We deliberately don't do a strlen here if srclen == -1.
 382          * This is very expensive over millions of calls and is taken
 383          * care of in the slow path in convert_string_internal. JRA.
 384          */
 385
 386 #ifdef DEVELOPER
 387         SMB_ASSERT(destlen != (size_t)-1);
 388 #endif
 389
 390         if (srclen == 0)
 391                 return 0;
 392
 393         if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
 394                 const unsigned char *p = (const unsigned char *)src;
 395                 unsigned char *q = (unsigned char *)dest;
 396                 size_t slen = srclen;
 397                 size_t dlen = destlen;
 398                 unsigned char lastp = '\0';
 399                 size_t retval = 0;
 400
 401                 /* If all characters are ascii, fast path here. */
 402                 while (slen && dlen) {
 403                         if ((lastp = *p) <= 0x7f) {
 404                                 *q++ = *p++;
 405                                 if (slen != (size_t)-1) {
 406                                         slen--;
 407                                 }
 408                                 dlen--;
 409                                 retval++;
 410                                 if (!lastp)
 411                                         break;
 412                         } else {
 413 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 414                                 goto general_case;
 415 #else
 416                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 417 #endif
 418                         }
 419                 }
 420                 if (!dlen) {
 421                         /* Even if we fast path we should note if we ran out of room. */
 422                         if (((slen != (size_t)-1) && slen) ||
 423                                         ((slen == (size_t)-1) && lastp)) {
 424                                 errno = E2BIG;
 425                         }
 426                 }
 427                 return retval;
 428         } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
 429                 const unsigned char *p = (const unsigned char *)src;
 430                 unsigned char *q = (unsigned char *)dest;
 431                 size_t retval = 0;
 432                 size_t slen = srclen;
 433                 size_t dlen = destlen;
 434                 unsigned char lastp = '\0';
 435
 436                 /* If all characters are ascii, fast path here. */
 437                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 438                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 439                                 *q++ = *p;
 440                                 if (slen != (size_t)-1) {
 441                                         slen -= 2;
 442                                 }
 443                                 p += 2;
 444                                 dlen--;
 445                                 retval++;
 446                                 if (!lastp)
 447                                         break;
 448                         } else {
 449 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 450                                 goto general_case;
 451 #else
 452                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 453 #endif
 454                         }
 455                 }
 456                 if (!dlen) {
 457                         /* Even if we fast path we should note if we ran out of room. */
 458                         if (((slen != (size_t)-1) && slen) ||
 459                                         ((slen == (size_t)-1) && lastp)) {
 460                                 errno = E2BIG;
 461                         }
 462                 }
 463                 return retval;
 464         } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 465                 const unsigned char *p = (const unsigned char *)src;
 466                 unsigned char *q = (unsigned char *)dest;
 467                 size_t retval = 0;
 468                 size_t slen = srclen;
 469                 size_t dlen = destlen;
 470                 unsigned char lastp = '\0';
 471
 472                 /* If all characters are ascii, fast path here. */
 473                 while (slen && (dlen >= 2)) {
 474                         if ((lastp = *p) <= 0x7F) {
 475                                 *q++ = *p++;
 476                                 *q++ = '\0';
 477                                 if (slen != (size_t)-1) {
 478                                         slen--;
 479                                 }
 480                                 dlen -= 2;
 481                                 retval += 2;
 482                                 if (!lastp)
 483                                         break;
 484                         } else {
 485 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 486                                 goto general_case;
 487 #else
 488                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 489 #endif
 490                         }
 491                 }
 492                 if (!dlen) {
 493                         /* Even if we fast path we should note if we ran out of room. */
 494                         if (((slen != (size_t)-1) && slen) ||
 495                                         ((slen == (size_t)-1) && lastp)) {
 496                                 errno = E2BIG;
 497                         }
 498                 }
 499                 return retval;
 500         }
 501
 502 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 503   general_case:
 504 #endif
 505         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 506 }
 507
 508 /**
 509  * Convert between character sets, allocating a new buffer for the result.
 510  *
 511  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 512  * (this is a bad interface and needs fixing. JRA).
 513  * @param srclen length of source buffer.
 514  * @param dest always set at least to NULL
 515  * @note -1 is not accepted for srclen.
 516  *
 517  * @returns Size in bytes of the converted string; or -1 in case of error.
 518  *
 519  * Ensure the srclen contains the terminating zero.
 520  *
 521  * I hate the goto's in this function. It's embarressing.....
 522  * There has to be a cleaner way to do this. JRA.
 523  **/
 524
 525 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 526                                void const *src, size_t srclen, void *dst, bool allow_bad_conv)
 527 {
 528         size_t i_len, o_len, destlen = MAX(srclen, 512);
 529         size_t retval;
 530         const char *inbuf = (const char *)src;
 531         char *outbuf = NULL, *ob = NULL;
 532         smb_iconv_t descriptor;
 533         void **dest = (void **)dst;
 534
 535         *dest = NULL;
 536
 537         if (src == NULL || srclen == (size_t)-1)
 538                 return (size_t)-1;
 539         if (srclen == 0)
 540                 return 0;
 541
 542         lazy_initialize_conv();
 543
 544         descriptor = conv_handles[from][to];
 545
 546         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 547                 if (!conv_silent)
 548                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 549                 return (size_t)-1;
 550         }
 551
 552   convert:
 553
 554         if ((destlen*2) < destlen) {
 555                 /* wrapped ! abort. */
 556                 if (!conv_silent)
 557                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 558                 if (!ctx)
 559                         SAFE_FREE(outbuf);
 560                 return (size_t)-1;
 561         } else {
 562                 destlen = destlen * 2;
 563         }
 564
 565         if (ctx) {
 566                 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen);
 567         } else {
 568                 ob = (char *)SMB_REALLOC(ob, destlen);
 569         }
 570
 571         if (!ob) {
 572                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 573                 return (size_t)-1;
 574         }
 575         outbuf = ob;
 576         i_len = srclen;
 577         o_len = destlen;
 578
 579  again:
 580
 581         retval = smb_iconv(descriptor,
 582                            &inbuf, &i_len,
 583                            &outbuf, &o_len);
 584         if(retval == (size_t)-1)                {
 585                 const char *reason="unknown error";
 586                 switch(errno) {
 587                         case EINVAL:
 588                                 reason="Incomplete multibyte sequence";
 589                                 if (!conv_silent)
 590                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 591                                 if (allow_bad_conv)
 592                                         goto use_as_is;
 593                                 break;
 594                         case E2BIG:
 595                                 goto convert;
 596                         case EILSEQ:
 597                                 reason="Illegal multibyte sequence";
 598                                 if (!conv_silent)
 599                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 600                                 if (allow_bad_conv)
 601                                         goto use_as_is;
 602                                 break;
 603                 }
 604                 if (!conv_silent)
 605                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 606                 /* smb_panic(reason); */
 607                 if (ctx) {
 608                         TALLOC_FREE(ob);
 609                 } else {
 610                         SAFE_FREE(ob);
 611                 }
 612                 return (size_t)-1;
 613         }
 614
 615   out:
 616
 617         destlen = destlen - o_len;
 618         if (ctx) {
 619                 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen);
 620         } else {
 621                 ob = (char *)SMB_REALLOC(ob,destlen);
 622         }
 623
 624         if (destlen && !ob) {
 625                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 626                 return (size_t)-1;
 627         }
 628
 629         *dest = ob;
 630         return destlen;
 631
 632  use_as_is:
 633
 634         /*
 635          * Conversion not supported. This is actually an error, but there are so
 636          * many misconfigured iconv systems and smb.conf's out there we can't just
 637          * fail. Do a very bad conversion instead.... JRA.
 638          */
 639
 640         {
 641                 if (o_len == 0 || i_len == 0)
 642                         goto out;
 643
 644                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 645                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 646                         /* Can't convert from utf16 any endian to multibyte.
 647                            Replace with the default fail char.
 648                         */
 649
 650                         if (i_len < 2)
 651                                 goto out;
 652
 653                         if (i_len >= 2) {
 654                                 *outbuf = lp_failed_convert_char();
 655
 656                                 outbuf++;
 657                                 o_len--;
 658
 659                                 inbuf += 2;
 660                                 i_len -= 2;
 661                         }
 662
 663                         if (o_len == 0 || i_len == 0)
 664                                 goto out;
 665
 666                         /* Keep trying with the next char... */
 667                         goto again;
 668
 669                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 670                         /* Can't convert to UTF16LE - just widen by adding the
 671                            default fail char then zero.
 672                         */
 673                         if (o_len < 2)
 674                                 goto out;
 675
 676                         outbuf[0] = lp_failed_convert_char();
 677                         outbuf[1] = '\0';
 678
 679                         inbuf++;
 680                         i_len--;
 681
 682                         outbuf += 2;
 683                         o_len -= 2;
 684
 685                         if (o_len == 0 || i_len == 0)
 686                                 goto out;
 687
 688                         /* Keep trying with the next char... */
 689                         goto again;
 690
 691                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 692                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 693                         /* Failed multibyte to multibyte. Just copy the default fail char and
 694                            try again. */
 695                         outbuf[0] = lp_failed_convert_char();
 696
 697                         inbuf++;
 698                         i_len--;
 699
 700                         outbuf++;
 701                         o_len--;
 702
 703                         if (o_len == 0 || i_len == 0)
 704                                 goto out;
 705
 706                         /* Keep trying with the next char... */
 707                         goto again;
 708
 709                 } else {
 710                         /* Keep compiler happy.... */
 711                         goto out;
 712                 }
 713         }
 714 }
 715
 716 /**
 717  * Convert between character sets, allocating a new buffer using talloc for the result.
 718  *
 719  * @param srclen length of source buffer.
 720  * @param dest always set at least to NULL
 721  * @note -1 is not accepted for srclen.
 722  *
 723  * @returns Size in bytes of the converted string; or -1 in case of error.
 724  **/
 725 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 726                              void const *src, size_t srclen, void *dst,
 727                              bool allow_bad_conv)
 728 {
 729         void **dest = (void **)dst;
 730         size_t dest_len;
 731
 732         *dest = NULL;
 733         dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
 734         if (dest_len == (size_t)-1)
 735                 return (size_t)-1;
 736         if (*dest == NULL)
 737                 return (size_t)-1;
 738         return dest_len;
 739 }
 740
 741 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 742 {
 743         size_t size;
 744         smb_ucs2_t *buffer;
 745
 746         size = push_ucs2_allocate(&buffer, src);
 747         if (size == (size_t)-1) {
 748                 return (size_t)-1;
 749         }
 750         if (!strupper_w(buffer) && (dest == src)) {
 751                 free(buffer);
 752                 return srclen;
 753         }
 754
 755         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 756         free(buffer);
 757         return size;
 758 }
 759
 760 /**
 761  strdup() a unix string to upper case.
 762 **/
 763
 764 char *strdup_upper(const char *s)
 765 {
 766         char *out_buffer = SMB_STRDUP(s);
 767         const unsigned char *p = (const unsigned char *)s;
 768         unsigned char *q = (unsigned char *)out_buffer;
 769
 770         if (!q) {
 771                 return NULL;
 772         }
 773
 774         /* this is quite a common operation, so we want it to be
 775            fast. We optimise for the ascii case, knowing that all our
 776            supported multi-byte character sets are ascii-compatible
 777            (ie. they match for the first 128 chars) */
 778
 779         while (*p) {
 780                 if (*p & 0x80)
 781                         break;
 782                 *q++ = toupper_ascii(*p);
 783                 p++;
 784         }
 785
 786         if (*p) {
 787                 /* MB case. */
 788                 size_t size;
 789                 smb_ucs2_t *buffer = NULL;
 790
 791                 SAFE_FREE(out_buffer);
 792                 size = convert_string_allocate(NULL,
 793                                         CH_UNIX,
 794                                         CH_UTF16LE,
 795                                         s,
 796                                         strlen(s) + 1,
 797                                         (void **)(void *)&buffer,
 798                                         True);
 799                 if (size == (size_t)-1) {
 800                         return NULL;
 801                 }
 802
 803                 strupper_w(buffer);
 804
 805                 size = convert_string_allocate(NULL,
 806                                         CH_UTF16LE,
 807                                         CH_UNIX,
 808                                         buffer,
 809                                         size,
 810                                         (void **)(void *)&out_buffer,
 811                                         True);
 812
 813                 /* Don't need the intermediate buffer
 814                  * anymore.
 815                  */
 816
 817                 TALLOC_FREE(buffer);
 818                 if (size == (size_t)-1) {
 819                         return NULL;
 820                 }
 821         }
 822
 823         return out_buffer;
 824 }
 825
 826 /**
 827  talloc_strdup() a unix string to upper case.
 828 **/
 829
 830 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
 831 {
 832         char *out_buffer = talloc_strdup(ctx,s);
 833         const unsigned char *p = (const unsigned char *)s;
 834         unsigned char *q = (unsigned char *)out_buffer;
 835
 836         if (!q) {
 837                 return NULL;
 838         }
 839
 840         /* this is quite a common operation, so we want it to be
 841            fast. We optimise for the ascii case, knowing that all our
 842            supported multi-byte character sets are ascii-compatible
 843            (ie. they match for the first 128 chars) */
 844
 845         while (*p) {
 846                 if (*p & 0x80)
 847                         break;
 848                 *q++ = toupper_ascii(*p);
 849                 p++;
 850         }
 851
 852         if (*p) {
 853                 /* MB case. */
 854                 size_t size;
 855                 smb_ucs2_t *ubuf = NULL;
 856
 857                 /* We're not using the ascii buffer above. */
 858                 TALLOC_FREE(out_buffer);
 859
 860                 size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
 861                                 s, strlen(s)+1,
 862                                 (void *)&ubuf,
 863                                 True);
 864                 if (size == (size_t)-1) {
 865                         return NULL;
 866                 }
 867
 868                 strupper_w(ubuf);
 869
 870                 size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
 871                                 ubuf, size,
 872                                 (void *)&out_buffer,
 873                                 True);
 874
 875                 /* Don't need the intermediate buffer
 876                  * anymore.
 877                  */
 878
 879                 TALLOC_FREE(ubuf);
 880
 881                 if (size == (size_t)-1) {
 882                         return NULL;
 883                 }
 884         }
 885
 886         return out_buffer;
 887 }
 888
 889 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 890 {
 891         size_t size;
 892         smb_ucs2_t *buffer = NULL;
 893
 894         size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
 895                                        (void **)(void *)&buffer, True);
 896         if (size == (size_t)-1 || !buffer) {
 897                 smb_panic("failed to create UCS2 buffer");
 898         }
 899         if (!strlower_w(buffer) && (dest == src)) {
 900                 SAFE_FREE(buffer);
 901                 return srclen;
 902         }
 903         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 904         SAFE_FREE(buffer);
 905         return size;
 906 }
 907
 908 /**
 909  strdup() a unix string to lower case.
 910 **/
 911
 912 char *strdup_lower(const char *s)
 913 {
 914         size_t size;
 915         smb_ucs2_t *buffer = NULL;
 916         char *out_buffer;
 917
 918         size = push_ucs2_allocate(&buffer, s);
 919         if (size == -1 || !buffer) {
 920                 return NULL;
 921         }
 922
 923         strlower_w(buffer);
 924
 925         size = pull_ucs2_allocate(&out_buffer, buffer);
 926         SAFE_FREE(buffer);
 927
 928         if (size == (size_t)-1) {
 929                 return NULL;
 930         }
 931
 932         return out_buffer;
 933 }
 934
 935 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 936 {
 937         if (flags & (STR_NOALIGN|STR_ASCII))
 938                 return 0;
 939         return PTR_DIFF(p, base_ptr) & 1;
 940 }
 941
 942
 943 /**
 944  * Copy a string from a char* unix src to a dos codepage string destination.
 945  *
 946  * @return the number of bytes occupied by the string in the destination.
 947  *
 948  * @param flags can include
 949  * <dl>
 950  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 951  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 952  * </dl>
 953  *
 954  * @param dest_len the maximum length in bytes allowed in the
 955  * destination.
 956  **/
 957 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 958 {
 959         size_t src_len = strlen(src);
 960         char *tmpbuf = NULL;
 961         size_t ret;
 962
 963         /* No longer allow a length of -1. */
 964         if (dest_len == (size_t)-1) {
 965                 smb_panic("push_ascii - dest_len == -1");
 966         }
 967
 968         if (flags & STR_UPPER) {
 969                 tmpbuf = SMB_STRDUP(src);
 970                 if (!tmpbuf) {
 971                         smb_panic("malloc fail");
 972                 }
 973                 strupper_m(tmpbuf);
 974                 src = tmpbuf;
 975         }
 976
 977         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
 978                 src_len++;
 979         }
 980
 981         ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
 982         if (ret == (size_t)-1 &&
 983                         (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 984                         && dest_len > 0) {
 985                 ((char *)dest)[0] = '\0';
 986         }
 987         SAFE_FREE(tmpbuf);
 988         return ret;
 989 }
 990
 991 size_t push_ascii_fstring(void *dest, const char *src)
 992 {
 993         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
 994 }
 995
 996 /********************************************************************
 997  Push an nstring - ensure null terminated. Written by
 998  moriyama@miraclelinux.com (MORIYAMA Masayuki).
 999 ********************************************************************/
1000
1001 size_t push_ascii_nstring(void *dest, const char *src)
1002 {
1003         size_t i, buffer_len, dest_len;
1004         smb_ucs2_t *buffer;
1005
1006         conv_silent = True;
1007         buffer_len = push_ucs2_allocate(&buffer, src);
1008         if (buffer_len == (size_t)-1) {
1009                 smb_panic("failed to create UCS2 buffer");
1010         }
1011
1012         /* We're using buffer_len below to count ucs2 characters, not bytes. */
1013         buffer_len /= sizeof(smb_ucs2_t);
1014
1015         dest_len = 0;
1016         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1017                 unsigned char mb[10];
1018                 /* Convert one smb_ucs2_t character at a time. */
1019                 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1020                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1021                         memcpy((char *)dest + dest_len, mb, mb_len);
1022                         dest_len += mb_len;
1023                 } else {
1024                         errno = E2BIG;
1025                         break;
1026                 }
1027         }
1028         ((char *)dest)[dest_len] = '\0';
1029
1030         SAFE_FREE(buffer);
1031         conv_silent = False;
1032         return dest_len;
1033 }
1034
1035 /********************************************************************
1036  Push and malloc an ascii string. src and dest null terminated.
1037 ********************************************************************/
1038
1039 size_t push_ascii_allocate(char **dest, const char *src)
1040 {
1041         size_t src_len = strlen(src)+1;
1042
1043         *dest = NULL;
1044         return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len, (void **)dest, True);
1045 }
1046
1047 /**
1048  * Copy a string from a dos codepage source to a unix char* destination.
1049  *
1050  * The resulting string in "dest" is always null terminated.
1051  *
1052  * @param flags can have:
1053  * <dl>
1054  * <dt>STR_TERMINATE</dt>
1055  * <dd>STR_TERMINATE means the string in @p src
1056  * is null terminated, and src_len is ignored.</dd>
1057  * </dl>
1058  *
1059  * @param src_len is the length of the source area in bytes.
1060  * @returns the number of bytes occupied by the string in @p src.
1061  **/
1062 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1063 {
1064         size_t ret;
1065
1066         if (dest_len == (size_t)-1) {
1067                 /* No longer allow dest_len of -1. */
1068                 smb_panic("pull_ascii - invalid dest_len of -1");
1069         }
1070
1071         if (flags & STR_TERMINATE) {
1072                 if (src_len == (size_t)-1) {
1073                         src_len = strlen((const char *)src) + 1;
1074                 } else {
1075                         size_t len = strnlen((const char *)src, src_len);
1076                         if (len < src_len)
1077                                 len++;
1078                         src_len = len;
1079                 }
1080         }
1081
1082         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1083         if (ret == (size_t)-1) {
1084                 ret = 0;
1085                 dest_len = 0;
1086         }
1087
1088         if (dest_len && ret) {
1089                 /* Did we already process the terminating zero ? */
1090                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1091                         dest[MIN(ret, dest_len-1)] = 0;
1092                 }
1093         } else  {
1094                 dest[0] = 0;
1095         }
1096
1097         return src_len;
1098 }
1099
1100 /**
1101  * Copy a string from a dos codepage source to a unix char* destination.
1102  Talloc version.
1103  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1104  needs fixing. JRA).
1105  *
1106  * The resulting string in "dest" is always null terminated.
1107  *
1108  * @param flags can have:
1109  * <dl>
1110  * <dt>STR_TERMINATE</dt>
1111  * <dd>STR_TERMINATE means the string in @p src
1112  * is null terminated, and src_len is ignored.</dd>
1113  * </dl>
1114  *
1115  * @param src_len is the length of the source area in bytes.
1116  * @returns the number of bytes occupied by the string in @p src.
1117  **/
1118
1119 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1120                                         char **ppdest,
1121                                         const void *src,
1122                                         size_t src_len,
1123                                         int flags)
1124 {
1125         char *dest = NULL;
1126         size_t dest_len = 0;
1127
1128 #ifdef DEVELOPER
1129         /* Ensure we never use the braindead "malloc" varient. */
1130         if (ctx == NULL) {
1131                 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1132         }
1133 #endif
1134
1135         *ppdest = NULL;
1136
1137         if (flags & STR_TERMINATE) {
1138                 if (src_len == (size_t)-1) {
1139                         src_len = strlen((const char *)src) + 1;
1140                 } else {
1141                         size_t len = strnlen((const char *)src, src_len);
1142                         if (len < src_len)
1143                                 len++;
1144                         src_len = len;
1145                 }
1146                 /* Ensure we don't use an insane length from the client. */
1147                 if (src_len >= 1024*1024) {
1148                         char *msg = talloc_asprintf(ctx,
1149                                         "Bad src length (%u) in "
1150                                         "pull_ascii_base_talloc",
1151                                         (unsigned int)src_len);
1152                         smb_panic(msg);
1153                 }
1154         }
1155
1156         dest_len = convert_string_allocate(ctx,
1157                                 CH_DOS,
1158                                 CH_UNIX,
1159                                 src,
1160                                 src_len,
1161                                 &dest,
1162                                 True);
1163
1164         if (dest_len == (size_t)-1) {
1165                 dest_len = 0;
1166         }
1167
1168         if (dest_len && dest) {
1169                 /* Did we already process the terminating zero ? */
1170                 if (dest[dest_len-1] != 0) {
1171                         dest[dest_len-1] = 0;
1172                 }
1173         } else if (dest) {
1174                 dest[0] = 0;
1175         }
1176
1177         *ppdest = dest;
1178         return src_len;
1179 }
1180
1181
1182 size_t pull_ascii_pstring(char *dest, const void *src)
1183 {
1184         return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
1185 }
1186
1187 size_t pull_ascii_fstring(char *dest, const void *src)
1188 {
1189         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1190 }
1191
1192 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1193
1194 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1195 {
1196         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1197 }
1198
1199 /**
1200  * Copy a string from a char* src to a unicode destination.
1201  *
1202  * @returns the number of bytes occupied by the string in the destination.
1203  *
1204  * @param flags can have:
1205  *
1206  * <dl>
1207  * <dt>STR_TERMINATE <dd>means include the null termination.
1208  * <dt>STR_UPPER     <dd>means uppercase in the destination.
1209  * <dt>STR_NOALIGN   <dd>means don't do alignment.
1210  * </dl>
1211  *
1212  * @param dest_len is the maximum length allowed in the
1213  * destination.
1214  **/
1215
1216 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1217 {
1218         size_t len=0;
1219         size_t src_len;
1220         size_t ret;
1221
1222         if (dest_len == (size_t)-1) {
1223                 /* No longer allow dest_len of -1. */
1224                 smb_panic("push_ucs2 - invalid dest_len of -1");
1225         }
1226
1227         if (flags & STR_TERMINATE)
1228                 src_len = (size_t)-1;
1229         else
1230                 src_len = strlen(src);
1231
1232         if (ucs2_align(base_ptr, dest, flags)) {
1233                 *(char *)dest = 0;
1234                 dest = (void *)((char *)dest + 1);
1235                 if (dest_len)
1236                         dest_len--;
1237                 len++;
1238         }
1239
1240         /* ucs2 is always a multiple of 2 bytes */
1241         dest_len &= ~1;
1242
1243         ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1244         if (ret == (size_t)-1) {
1245                 if ((flags & STR_TERMINATE) &&
1246                                 dest &&
1247                                 dest_len) {
1248                         *(char *)dest = 0;
1249                 }
1250                 return len;
1251         }
1252
1253         len += ret;
1254
1255         if (flags & STR_UPPER) {
1256                 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1257                 size_t i;
1258
1259                 /* We check for i < (ret / 2) below as the dest string isn't null
1260                    terminated if STR_TERMINATE isn't set. */
1261
1262                 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1263                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1264                         if (v != dest_ucs2[i]) {
1265                                 dest_ucs2[i] = v;
1266                         }
1267                 }
1268         }
1269
1270         return len;
1271 }
1272
1273
1274 /**
1275  * Copy a string from a unix char* src to a UCS2 destination,
1276  * allocating a buffer using talloc().
1277  *
1278  * @param dest always set at least to NULL
1279  *
1280  * @returns The number of bytes occupied by the string in the destination
1281  *         or -1 in case of error.
1282  **/
1283 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1284 {
1285         size_t src_len = strlen(src)+1;
1286
1287         *dest = NULL;
1288         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1289 }
1290
1291
1292 /**
1293  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1294  *
1295  * @param dest always set at least to NULL
1296  *
1297  * @returns The number of bytes occupied by the string in the destination
1298  *         or -1 in case of error.
1299  **/
1300
1301 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1302 {
1303         size_t src_len = strlen(src)+1;
1304
1305         *dest = NULL;
1306         return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1307 }
1308
1309 /**
1310  Copy a string from a char* src to a UTF-8 destination.
1311  Return the number of bytes occupied by the string in the destination
1312  Flags can have:
1313   STR_TERMINATE means include the null termination
1314   STR_UPPER     means uppercase in the destination
1315  dest_len is the maximum length allowed in the destination. If dest_len
1316  is -1 then no maxiumum is used.
1317 **/
1318
1319 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1320 {
1321         size_t src_len = 0;
1322         size_t ret;
1323         char *tmpbuf = NULL;
1324
1325         if (dest_len == (size_t)-1) {
1326                 /* No longer allow dest_len of -1. */
1327                 smb_panic("push_utf8 - invalid dest_len of -1");
1328         }
1329
1330         if (flags & STR_UPPER) {
1331                 tmpbuf = strdup_upper(src);
1332                 if (!tmpbuf) {
1333                         return (size_t)-1;
1334                 }
1335                 src = tmpbuf;
1336                 src_len = strlen(src);
1337         }
1338
1339         src_len = strlen(src);
1340         if (flags & STR_TERMINATE) {
1341                 src_len++;
1342         }
1343
1344         ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1345         SAFE_FREE(tmpbuf);
1346         return ret;
1347 }
1348
1349 size_t push_utf8_fstring(void *dest, const char *src)
1350 {
1351         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1352 }
1353
1354 /**
1355  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1356  *
1357  * @param dest always set at least to NULL
1358  *
1359  * @returns The number of bytes occupied by the string in the destination
1360  **/
1361
1362 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1363 {
1364         size_t src_len = strlen(src)+1;
1365
1366         *dest = NULL;
1367         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1368 }
1369
1370 /**
1371  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1372  *
1373  * @param dest always set at least to NULL
1374  *
1375  * @returns The number of bytes occupied by the string in the destination
1376  **/
1377
1378 size_t push_utf8_allocate(char **dest, const char *src)
1379 {
1380         size_t src_len = strlen(src)+1;
1381
1382         *dest = NULL;
1383         return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1384 }
1385
1386 /**
1387  Copy a string from a ucs2 source to a unix char* destination.
1388  Flags can have:
1389   STR_TERMINATE means the string in src is null terminated.
1390   STR_NOALIGN   means don't try to align.
1391  if STR_TERMINATE is set then src_len is ignored if it is -1.
1392  src_len is the length of the source area in bytes
1393  Return the number of bytes occupied by the string in src.
1394  The resulting string in "dest" is always null terminated.
1395 **/
1396
1397 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1398 {
1399         size_t ret;
1400
1401         if (dest_len == (size_t)-1) {
1402                 /* No longer allow dest_len of -1. */
1403                 smb_panic("pull_ucs2 - invalid dest_len of -1");
1404         }
1405
1406         if (!src_len) {
1407                 if (dest && dest_len > 0) {
1408                         dest[0] = '\0';
1409                 }
1410                 return 0;
1411         }
1412
1413         if (ucs2_align(base_ptr, src, flags)) {
1414                 src = (const void *)((const char *)src + 1);
1415                 if (src_len != (size_t)-1)
1416                         src_len--;
1417         }
1418
1419         if (flags & STR_TERMINATE) {
1420                 /* src_len -1 is the default for null terminated strings. */
1421                 if (src_len != (size_t)-1) {
1422                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1423                                                 src_len/2);
1424                         if (len < src_len/2)
1425                                 len++;
1426                         src_len = len*2;
1427                 }
1428         }
1429
1430         /* ucs2 is always a multiple of 2 bytes */
1431         if (src_len != (size_t)-1)
1432                 src_len &= ~1;
1433
1434         ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1435         if (ret == (size_t)-1) {
1436                 ret = 0;
1437                 dest_len = 0;
1438         }
1439
1440         if (src_len == (size_t)-1)
1441                 src_len = ret*2;
1442
1443         if (dest_len && ret) {
1444                 /* Did we already process the terminating zero ? */
1445                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1446                         dest[MIN(ret, dest_len-1)] = 0;
1447                 }
1448         } else {
1449                 dest[0] = 0;
1450         }
1451
1452         return src_len;
1453 }
1454
1455 /**
1456  Copy a string from a ucs2 source to a unix char* destination.
1457  Talloc version with a base pointer.
1458  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1459  needs fixing. JRA).
1460  Flags can have:
1461   STR_TERMINATE means the string in src is null terminated.
1462   STR_NOALIGN   means don't try to align.
1463  if STR_TERMINATE is set then src_len is ignored if it is -1.
1464  src_len is the length of the source area in bytes
1465  Return the number of bytes occupied by the string in src.
1466  The resulting string in "dest" is always null terminated.
1467 **/
1468
1469 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1470                         const void *base_ptr,
1471                         char **ppdest,
1472                         const void *src,
1473                         size_t src_len,
1474                         int flags)
1475 {
1476         char *dest;
1477         size_t dest_len;
1478
1479         *ppdest = NULL;
1480
1481 #ifdef DEVELOPER
1482         /* Ensure we never use the braindead "malloc" varient. */
1483         if (ctx == NULL) {
1484                 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1485         }
1486 #endif
1487
1488         if (!src_len) {
1489                 return 0;
1490         }
1491
1492         if (ucs2_align(base_ptr, src, flags)) {
1493                 src = (const void *)((const char *)src + 1);
1494                 if (src_len != (size_t)-1)
1495                         src_len--;
1496         }
1497
1498         if (flags & STR_TERMINATE) {
1499                 /* src_len -1 is the default for null terminated strings. */
1500                 if (src_len != (size_t)-1) {
1501                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1502                                                 src_len/2);
1503                         if (len < src_len/2)
1504                                 len++;
1505                         src_len = len*2;
1506                 } else {
1507                         /*
1508                          * src_len == -1 - alloc interface won't take this
1509                          * so we must calculate.
1510                          */
1511                         src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1512                 }
1513                 /* Ensure we don't use an insane length from the client. */
1514                 if (src_len >= 1024*1024) {
1515                         smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1516                 }
1517         }
1518
1519         /* ucs2 is always a multiple of 2 bytes */
1520         if (src_len != (size_t)-1) {
1521                 src_len &= ~1;
1522         }
1523
1524         dest_len = convert_string_talloc(ctx,
1525                                         CH_UTF16LE,
1526                                         CH_UNIX,
1527                                         src,
1528                                         src_len,
1529                                         (void *)&dest,
1530                                         True);
1531         if (dest_len == (size_t)-1) {
1532                 dest_len = 0;
1533         }
1534
1535         if (src_len == (size_t)-1)
1536                 src_len = dest_len*2;
1537
1538         if (dest_len) {
1539                 /* Did we already process the terminating zero ? */
1540                 if (dest[dest_len-1] != 0) {
1541                         size_t size = talloc_get_size(dest);
1542                         /* Have we got space to append the '\0' ? */
1543                         if (size <= dest_len) {
1544                                 /* No, realloc. */
1545                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1546                                                 dest_len+1);
1547                                 if (!dest) {
1548                                         /* talloc fail. */
1549                                         dest_len = (size_t)-1;
1550                                         return 0;
1551                                 }
1552                         }
1553                         /* Yay - space ! */
1554                         dest[dest_len] = '\0';
1555                         dest_len++;
1556                 }
1557         } else if (dest) {
1558                 dest[0] = 0;
1559         }
1560
1561         *ppdest = dest;
1562         return src_len;
1563 }
1564
1565 size_t pull_ucs2_fstring(char *dest, const void *src)
1566 {
1567         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1568 }
1569
1570 /**
1571  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1572  *
1573  * @param dest always set at least to NULL
1574  *
1575  * @returns The number of bytes occupied by the string in the destination
1576  **/
1577
1578 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1579 {
1580         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1581         *dest = NULL;
1582         return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1583 }
1584
1585 /**
1586  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1587  *
1588  * @param dest always set at least to NULL
1589  *
1590  * @returns The number of bytes occupied by the string in the destination
1591  **/
1592
1593 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1594 {
1595         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1596         *dest = NULL;
1597         return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1598 }
1599
1600 /**
1601  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1602  *
1603  * @param dest always set at least to NULL
1604  *
1605  * @returns The number of bytes occupied by the string in the destination
1606  **/
1607
1608 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1609 {
1610         size_t src_len = strlen(src)+1;
1611         *dest = NULL;
1612         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1613 }
1614
1615 /**
1616  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1617  *
1618  * @param dest always set at least to NULL
1619  *
1620  * @returns The number of bytes occupied by the string in the destination
1621  **/
1622
1623 size_t pull_utf8_allocate(char **dest, const char *src)
1624 {
1625         size_t src_len = strlen(src)+1;
1626         *dest = NULL;
1627         return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1628 }
1629
1630 /**
1631  * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1632  *
1633  * @param dest always set at least to NULL
1634  *
1635  * @returns The number of bytes occupied by the string in the destination
1636  **/
1637
1638 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1639 {
1640         size_t src_len = strlen(src)+1;
1641         *dest = NULL;
1642         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1643 }
1644
1645 /**
1646  Copy a string from a char* src to a unicode or ascii
1647  dos codepage destination choosing unicode or ascii based on the
1648  flags in the SMB buffer starting at base_ptr.
1649  Return the number of bytes occupied by the string in the destination.
1650  flags can have:
1651   STR_TERMINATE means include the null termination.
1652   STR_UPPER     means uppercase in the destination.
1653   STR_ASCII     use ascii even with unicode packet.
1654   STR_NOALIGN   means don't do alignment.
1655  dest_len is the maximum length allowed in the destination. If dest_len
1656  is -1 then no maxiumum is used.
1657 **/
1658
1659 size_t push_string_fn(const char *function, unsigned int line,
1660                       const void *base_ptr, uint16 flags2,
1661                       void *dest, const char *src,
1662                       size_t dest_len, int flags)
1663 {
1664 #ifdef DEVELOPER
1665         /* We really need to zero fill here, not clobber
1666          * region, as we want to ensure that valgrind thinks
1667          * all of the outgoing buffer has been written to
1668          * so a send() or write() won't trap an error.
1669          * JRA.
1670          */
1671 #if 0
1672         clobber_region(function, line, dest, dest_len);
1673 #else
1674         memset(dest, '\0', dest_len);
1675 #endif
1676 #endif
1677
1678         if (!(flags & STR_ASCII) && \
1679             ((flags & STR_UNICODE || \
1680               (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1681                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1682         }
1683         return push_ascii(dest, src, dest_len, flags);
1684 }
1685
1686
1687 /**
1688  Copy a string from a unicode or ascii source (depending on
1689  the packet flags) to a char* destination.
1690  Flags can have:
1691   STR_TERMINATE means the string in src is null terminated.
1692   STR_UNICODE   means to force as unicode.
1693   STR_ASCII     use ascii even with unicode packet.
1694   STR_NOALIGN   means don't do alignment.
1695  if STR_TERMINATE is set then src_len is ignored is it is -1
1696  src_len is the length of the source area in bytes.
1697  Return the number of bytes occupied by the string in src.
1698  The resulting string in "dest" is always null terminated.
1699 **/
1700
1701 size_t pull_string_fn(const char *function, unsigned int line,
1702                       const void *base_ptr, uint16 smb_flags2, char *dest,
1703                       const void *src, size_t dest_len, size_t src_len,
1704                       int flags)
1705 {
1706 #ifdef DEVELOPER
1707         clobber_region(function, line, dest, dest_len);
1708 #endif
1709
1710         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1711                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1712                           "UNICODE defined");
1713         }
1714
1715         if (!(flags & STR_ASCII) && \
1716             ((flags & STR_UNICODE || \
1717               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1718                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1719         }
1720         return pull_ascii(dest, src, dest_len, src_len, flags);
1721 }
1722
1723 /**
1724  Copy a string from a unicode or ascii source (depending on
1725  the packet flags) to a char* destination.
1726  Variant that uses talloc.
1727  Flags can have:
1728   STR_TERMINATE means the string in src is null terminated.
1729   STR_UNICODE   means to force as unicode.
1730   STR_ASCII     use ascii even with unicode packet.
1731   STR_NOALIGN   means don't do alignment.
1732  if STR_TERMINATE is set then src_len is ignored is it is -1
1733  src_len is the length of the source area in bytes.
1734  Return the number of bytes occupied by the string in src.
1735  The resulting string in "dest" is always null terminated.
1736 **/
1737
1738 size_t pull_string_talloc_fn(const char *function,
1739                         unsigned int line,
1740                         TALLOC_CTX *ctx,
1741                         const void *base_ptr,
1742                         uint16 smb_flags2,
1743                         char **ppdest,
1744                         const void *src,
1745                         size_t src_len,
1746                         int flags)
1747 {
1748         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1749                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1750                           "UNICODE defined");
1751         }
1752
1753         if (!(flags & STR_ASCII) && \
1754             ((flags & STR_UNICODE || \
1755               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1756                 return pull_ucs2_base_talloc(ctx,
1757                                         base_ptr,
1758                                         ppdest,
1759                                         src,
1760                                         src_len,
1761                                         flags);
1762         }
1763         return pull_ascii_base_talloc(ctx,
1764                                         ppdest,
1765                                         src,
1766                                         src_len,
1767                                         flags);
1768 }
1769
1770
1771 size_t align_string(const void *base_ptr, const char *p, int flags)
1772 {
1773         if (!(flags & STR_ASCII) && \
1774             ((flags & STR_UNICODE || \
1775               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1776                 return ucs2_align(base_ptr, p, flags);
1777         }
1778         return 0;
1779 }
1780
1781 /*
1782   Return the unicode codepoint for the next multi-byte CH_UNIX character
1783   in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1784
1785   Also return the number of bytes consumed (which tells the caller
1786   how many bytes to skip to get to the next CH_UNIX character).
1787
1788   Return INVALID_CODEPOINT if the next character cannot be converted.
1789 */
1790
1791 codepoint_t next_codepoint(const char *str, size_t *size)
1792 {
1793         /* It cannot occupy more than 4 bytes in UTF16 format */
1794         uint8_t buf[4];
1795         smb_iconv_t descriptor;
1796         size_t ilen_orig;
1797         size_t ilen;
1798         size_t olen;
1799         char *outbuf;
1800
1801         if ((str[0] & 0x80) == 0) {
1802                 *size = 1;
1803                 return (codepoint_t)str[0];
1804         }
1805
1806         /* We assume that no multi-byte character can take
1807            more than 5 bytes. This is OK as we only
1808            support codepoints up to 1M */
1809
1810         ilen_orig = strnlen(str, 5);
1811         ilen = ilen_orig;
1812
1813         lazy_initialize_conv();
1814
1815         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1816         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1817                 *size = 1;
1818                 return INVALID_CODEPOINT;
1819         }
1820
1821         /* This looks a little strange, but it is needed to cope
1822            with codepoints above 64k which are encoded as per RFC2781. */
1823         olen = 2;
1824         outbuf = (char *)buf;
1825         smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1826         if (olen == 2) {
1827                 /* We failed to convert to a 2 byte character.
1828                    See if we can convert to a 4 UTF16-LE byte char encoding.
1829                 */
1830                 olen = 4;
1831                 outbuf = (char *)buf;
1832                 smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
1833                 if (olen == 4) {
1834                         /* We didn't convert any bytes */
1835                         *size = 1;
1836                         return INVALID_CODEPOINT;
1837                 }
1838                 olen = 4 - olen;
1839         } else {
1840                 olen = 2 - olen;
1841         }
1842
1843         *size = ilen_orig - ilen;
1844
1845         if (olen == 2) {
1846                 /* 2 byte, UTF16-LE encoded value. */
1847                 return (codepoint_t)SVAL(buf, 0);
1848         }
1849         if (olen == 4) {
1850                 /* Decode a 4 byte UTF16-LE character manually.
1851                    See RFC2871 for the encoding machanism.
1852                 */
1853                 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1854                 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1855
1856                 return (codepoint_t)0x10000 +
1857                                 (w1 << 10) + w2;
1858         }
1859
1860         /* no other length is valid */
1861         return INVALID_CODEPOINT;
1862 }