source3/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21
  22 */
  23 #include "includes.h"
  24
  25 /* We can parameterize this if someone complains.... JRA. */
  26
  27 char lp_failed_convert_char(void)
  28 {
  29         return '_';
  30 }
  31
  32 /**
  33  * @file
  34  *
  35  * @brief Character-set conversion routines built on our iconv.
  36  *
  37  * @note Samba's internal character set (at least in the 3.0 series)
  38  * is always the same as the one for the Unix filesystem.  It is
  39  * <b>not</b> necessarily UTF-8 and may be different on machines that
  40  * need i18n filenames to be compatible with Unix software.  It does
  41  * have to be a superset of ASCII.  All multibyte sequences must start
  42  * with a byte with the high bit set.
  43  *
  44  * @sa lib/iconv.c
  45  */
  46
  47
  48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  49 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
  50
  51 /**
  52  * Return the name of a charset to give to iconv().
  53  **/
  54 static const char *charset_name(charset_t ch)
  55 {
  56         const char *ret = NULL;
  57
  58         if (ch == CH_UTF16LE) ret = "UTF-16LE";
  59         else if (ch == CH_UTF16BE) ret = "UTF-16BE";
  60         else if (ch == CH_UNIX) ret = lp_unix_charset();
  61         else if (ch == CH_DOS) ret = lp_dos_charset();
  62         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  63         else if (ch == CH_UTF8) ret = "UTF8";
  64
  65 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  66         if (ret && !strcmp(ret, "LOCALE")) {
  67                 const char *ln = NULL;
  68
  69 #ifdef HAVE_SETLOCALE
  70                 setlocale(LC_ALL, "");
  71 #endif
  72                 ln = nl_langinfo(CODESET);
  73                 if (ln) {
  74                         /* Check whether the charset name is supported
  75                            by iconv */
  76                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  77                         if (handle == (smb_iconv_t) -1) {
  78                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  79                                 ln = NULL;
  80                         } else {
  81                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  82                                 smb_iconv_close(handle);
  83                         }
  84                 }
  85                 ret = ln;
  86         }
  87 #endif
  88
  89         if (!ret || !*ret) ret = "ASCII";
  90         return ret;
  91 }
  92
  93 void lazy_initialize_conv(void)
  94 {
  95         static int initialized = False;
  96
  97         if (!initialized) {
  98                 initialized = True;
  99                 load_case_tables();
 100                 init_iconv();
 101         }
 102 }
 103
 104 /**
 105  * Destroy global objects allocated by init_iconv()
 106  **/
 107 void gfree_charcnv(void)
 108 {
 109         int c1, c2;
 110
 111         for (c1=0;c1<NUM_CHARSETS;c1++) {
 112                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 113                         if ( conv_handles[c1][c2] ) {
 114                                 smb_iconv_close( conv_handles[c1][c2] );
 115                                 conv_handles[c1][c2] = 0;
 116                         }
 117                 }
 118         }
 119 }
 120
 121 /**
 122  * Initialize iconv conversion descriptors.
 123  *
 124  * This is called the first time it is needed, and also called again
 125  * every time the configuration is reloaded, because the charset or
 126  * codepage might have changed.
 127  **/
 128 void init_iconv(void)
 129 {
 130         int c1, c2;
 131         BOOL did_reload = False;
 132
 133         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 134            first */
 135         if (!conv_handles[CH_UNIX][CH_UTF16LE])
 136                 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
 137
 138         if (!conv_handles[CH_UTF16LE][CH_UNIX])
 139                 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
 140
 141         for (c1=0;c1<NUM_CHARSETS;c1++) {
 142                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 143                         const char *n1 = charset_name((charset_t)c1);
 144                         const char *n2 = charset_name((charset_t)c2);
 145                         if (conv_handles[c1][c2] &&
 146                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 147                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 148                                 continue;
 149
 150                         did_reload = True;
 151
 152                         if (conv_handles[c1][c2])
 153                                 smb_iconv_close(conv_handles[c1][c2]);
 154
 155                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 156                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 157                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 158                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 159                                 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
 160                                         n1 = "ASCII";
 161                                 }
 162                                 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
 163                                         n2 = "ASCII";
 164                                 }
 165                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 166                                         n1, n2 ));
 167                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 168                                 if (!conv_handles[c1][c2]) {
 169                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 170                                         smb_panic("init_iconv: conv_handle initialization failed");
 171                                 }
 172                         }
 173                 }
 174         }
 175
 176         if (did_reload) {
 177                 /* XXX: Does this really get called every time the dos
 178                  * codepage changes? */
 179                 /* XXX: Is the did_reload test too strict? */
 180                 conv_silent = True;
 181                 init_doschar_table();
 182                 init_valid_table();
 183                 conv_silent = False;
 184         }
 185 }
 186
 187 /**
 188  * Convert string from one encoding to another, making error checking etc
 189  * Slow path version - uses (slow) iconv.
 190  *
 191  * @param src pointer to source string (multibyte or singlebyte)
 192  * @param srclen length of the source string in bytes
 193  * @param dest pointer to destination string (multibyte or singlebyte)
 194  * @param destlen maximal length allowed for string
 195  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 196  * @returns the number of bytes occupied in the destination
 197  *
 198  * Ensure the srclen contains the terminating zero.
 199  *
 200  **/
 201
 202 static size_t convert_string_internal(charset_t from, charset_t to,
 203                       void const *src, size_t srclen,
 204                       void *dest, size_t destlen, BOOL allow_bad_conv)
 205 {
 206         size_t i_len, o_len;
 207         size_t retval;
 208         const char* inbuf = (const char*)src;
 209         char* outbuf = (char*)dest;
 210         smb_iconv_t descriptor;
 211
 212         lazy_initialize_conv();
 213
 214         descriptor = conv_handles[from][to];
 215
 216         if (srclen == (size_t)-1) {
 217                 if (from == CH_UTF16LE || from == CH_UTF16BE) {
 218                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 219                 } else {
 220                         srclen = strlen((const char *)src)+1;
 221                 }
 222         }
 223
 224
 225         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 226                 if (!conv_silent)
 227                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 228                 return (size_t)-1;
 229         }
 230
 231         i_len=srclen;
 232         o_len=destlen;
 233
 234  again:
 235
 236         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
 237         if(retval==(size_t)-1) {
 238                 const char *reason="unknown error";
 239                 switch(errno) {
 240                         case EINVAL:
 241                                 reason="Incomplete multibyte sequence";
 242                                 if (!conv_silent)
 243                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 244                                 if (allow_bad_conv)
 245                                         goto use_as_is;
 246                                 break;
 247                         case E2BIG:
 248                                 reason="No more room";
 249                                 if (!conv_silent) {
 250                                         if (from == CH_UNIX) {
 251                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
 252                                                         charset_name(from), charset_name(to),
 253                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 254                                         } else {
 255                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
 256                                                         charset_name(from), charset_name(to),
 257                                                         (unsigned int)srclen, (unsigned int)destlen));
 258                                         }
 259                                 }
 260                                 break;
 261                         case EILSEQ:
 262                                 reason="Illegal multibyte sequence";
 263                                 if (!conv_silent)
 264                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 265                                 if (allow_bad_conv)
 266                                         goto use_as_is;
 267                                 break;
 268                         default:
 269                                 if (!conv_silent)
 270                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 271                                 break;
 272                 }
 273                 /* smb_panic(reason); */
 274         }
 275         return destlen-o_len;
 276
 277  use_as_is:
 278
 279         /*
 280          * Conversion not supported. This is actually an error, but there are so
 281          * many misconfigured iconv systems and smb.conf's out there we can't just
 282          * fail. Do a very bad conversion instead.... JRA.
 283          */
 284
 285         {
 286                 if (o_len == 0 || i_len == 0)
 287                         return destlen - o_len;
 288
 289                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 290                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 291                         /* Can't convert from utf16 any endian to multibyte.
 292                            Replace with the default fail char.
 293                         */
 294                         if (i_len < 2)
 295                                 return destlen - o_len;
 296                         if (i_len >= 2) {
 297                                 *outbuf = lp_failed_convert_char();
 298
 299                                 outbuf++;
 300                                 o_len--;
 301
 302                                 inbuf += 2;
 303                                 i_len -= 2;
 304                         }
 305
 306                         if (o_len == 0 || i_len == 0)
 307                                 return destlen - o_len;
 308
 309                         /* Keep trying with the next char... */
 310                         goto again;
 311
 312                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 313                         /* Can't convert to UTF16LE - just widen by adding the
 314                            default fail char then zero.
 315                         */
 316                         if (o_len < 2)
 317                                 return destlen - o_len;
 318
 319                         outbuf[0] = lp_failed_convert_char();
 320                         outbuf[1] = '\0';
 321
 322                         inbuf++;
 323                         i_len--;
 324
 325                         outbuf += 2;
 326                         o_len -= 2;
 327
 328                         if (o_len == 0 || i_len == 0)
 329                                 return destlen - o_len;
 330
 331                         /* Keep trying with the next char... */
 332                         goto again;
 333
 334                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 335                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 336                         /* Failed multibyte to multibyte. Just copy the default fail char and
 337                                 try again. */
 338                         outbuf[0] = lp_failed_convert_char();
 339
 340                         inbuf++;
 341                         i_len--;
 342
 343                         outbuf++;
 344                         o_len--;
 345
 346                         if (o_len == 0 || i_len == 0)
 347                                 return destlen - o_len;
 348
 349                         /* Keep trying with the next char... */
 350                         goto again;
 351
 352                 } else {
 353                         /* Keep compiler happy.... */
 354                         return destlen - o_len;
 355                 }
 356         }
 357 }
 358
 359 /**
 360  * Convert string from one encoding to another, making error checking etc
 361  * Fast path version - handles ASCII first.
 362  *
 363  * @param src pointer to source string (multibyte or singlebyte)
 364  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 365  * @param dest pointer to destination string (multibyte or singlebyte)
 366  * @param destlen maximal length allowed for string - *NEVER* -1.
 367  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 368  * @returns the number of bytes occupied in the destination
 369  *
 370  * Ensure the srclen contains the terminating zero.
 371  *
 372  * This function has been hand-tuned to provide a fast path.
 373  * Don't change unless you really know what you are doing. JRA.
 374  **/
 375
 376 size_t convert_string(charset_t from, charset_t to,
 377                       void const *src, size_t srclen,
 378                       void *dest, size_t destlen, BOOL allow_bad_conv)
 379 {
 380         /*
 381          * NB. We deliberately don't do a strlen here if srclen == -1.
 382          * This is very expensive over millions of calls and is taken
 383          * care of in the slow path in convert_string_internal. JRA.
 384          */
 385
 386 #ifdef DEVELOPER
 387         SMB_ASSERT(destlen != (size_t)-1);
 388 #endif
 389
 390         if (srclen == 0)
 391                 return 0;
 392
 393         if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
 394                 const unsigned char *p = (const unsigned char *)src;
 395                 unsigned char *q = (unsigned char *)dest;
 396                 size_t slen = srclen;
 397                 size_t dlen = destlen;
 398                 unsigned char lastp = '\0';
 399                 size_t retval = 0;
 400
 401                 /* If all characters are ascii, fast path here. */
 402                 while (slen && dlen) {
 403                         if ((lastp = *p) <= 0x7f) {
 404                                 *q++ = *p++;
 405                                 if (slen != (size_t)-1) {
 406                                         slen--;
 407                                 }
 408                                 dlen--;
 409                                 retval++;
 410                                 if (!lastp)
 411                                         break;
 412                         } else {
 413 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 414                                 goto general_case;
 415 #else
 416                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 417 #endif
 418                         }
 419                 }
 420                 if (!dlen) {
 421                         /* Even if we fast path we should note if we ran out of room. */
 422                         if (((slen != (size_t)-1) && slen) ||
 423                                         ((slen == (size_t)-1) && lastp)) {
 424                                 errno = E2BIG;
 425                         }
 426                 }
 427                 return retval;
 428         } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
 429                 const unsigned char *p = (const unsigned char *)src;
 430                 unsigned char *q = (unsigned char *)dest;
 431                 size_t retval = 0;
 432                 size_t slen = srclen;
 433                 size_t dlen = destlen;
 434                 unsigned char lastp = '\0';
 435
 436                 /* If all characters are ascii, fast path here. */
 437                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 438                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 439                                 *q++ = *p;
 440                                 if (slen != (size_t)-1) {
 441                                         slen -= 2;
 442                                 }
 443                                 p += 2;
 444                                 dlen--;
 445                                 retval++;
 446                                 if (!lastp)
 447                                         break;
 448                         } else {
 449 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 450                                 goto general_case;
 451 #else
 452                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 453 #endif
 454                         }
 455                 }
 456                 if (!dlen) {
 457                         /* Even if we fast path we should note if we ran out of room. */
 458                         if (((slen != (size_t)-1) && slen) ||
 459                                         ((slen == (size_t)-1) && lastp)) {
 460                                 errno = E2BIG;
 461                         }
 462                 }
 463                 return retval;
 464         } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 465                 const unsigned char *p = (const unsigned char *)src;
 466                 unsigned char *q = (unsigned char *)dest;
 467                 size_t retval = 0;
 468                 size_t slen = srclen;
 469                 size_t dlen = destlen;
 470                 unsigned char lastp = '\0';
 471
 472                 /* If all characters are ascii, fast path here. */
 473                 while (slen && (dlen >= 2)) {
 474                         if ((lastp = *p) <= 0x7F) {
 475                                 *q++ = *p++;
 476                                 *q++ = '\0';
 477                                 if (slen != (size_t)-1) {
 478                                         slen--;
 479                                 }
 480                                 dlen -= 2;
 481                                 retval += 2;
 482                                 if (!lastp)
 483                                         break;
 484                         } else {
 485 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 486                                 goto general_case;
 487 #else
 488                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 489 #endif
 490                         }
 491                 }
 492                 if (!dlen) {
 493                         /* Even if we fast path we should note if we ran out of room. */
 494                         if (((slen != (size_t)-1) && slen) ||
 495                                         ((slen == (size_t)-1) && lastp)) {
 496                                 errno = E2BIG;
 497                         }
 498                 }
 499                 return retval;
 500         }
 501
 502 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 503   general_case:
 504 #endif
 505         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 506 }
 507
 508 /**
 509  * Convert between character sets, allocating a new buffer for the result.
 510  *
 511  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 512  * @param srclen length of source buffer.
 513  * @param dest always set at least to NULL
 514  * @note -1 is not accepted for srclen.
 515  *
 516  * @returns Size in bytes of the converted string; or -1 in case of error.
 517  *
 518  * Ensure the srclen contains the terminating zero.
 519  *
 520  * I hate the goto's in this function. It's embarressing.....
 521  * There has to be a cleaner way to do this. JRA.
 522  **/
 523
 524 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 525                                void const *src, size_t srclen, void *dst, BOOL allow_bad_conv)
 526 {
 527         size_t i_len, o_len, destlen = MAX(srclen, 512);
 528         size_t retval;
 529         const char *inbuf = (const char *)src;
 530         char *outbuf = NULL, *ob = NULL;
 531         smb_iconv_t descriptor;
 532         void **dest = (void **)dst;
 533
 534         *dest = NULL;
 535
 536         if (src == NULL || srclen == (size_t)-1)
 537                 return (size_t)-1;
 538         if (srclen == 0)
 539                 return 0;
 540
 541         lazy_initialize_conv();
 542
 543         descriptor = conv_handles[from][to];
 544
 545         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 546                 if (!conv_silent)
 547                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 548                 return (size_t)-1;
 549         }
 550
 551   convert:
 552
 553         if ((destlen*2) < destlen) {
 554                 /* wrapped ! abort. */
 555                 if (!conv_silent)
 556                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 557                 if (!ctx)
 558                         SAFE_FREE(outbuf);
 559                 return (size_t)-1;
 560         } else {
 561                 destlen = destlen * 2;
 562         }
 563
 564         if (ctx) {
 565                 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen);
 566         } else {
 567                 ob = (char *)SMB_REALLOC(ob, destlen);
 568         }
 569
 570         if (!ob) {
 571                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 572                 return (size_t)-1;
 573         }
 574         outbuf = ob;
 575         i_len = srclen;
 576         o_len = destlen;
 577
 578  again:
 579
 580         retval = smb_iconv(descriptor,
 581                            &inbuf, &i_len,
 582                            &outbuf, &o_len);
 583         if(retval == (size_t)-1)                {
 584                 const char *reason="unknown error";
 585                 switch(errno) {
 586                         case EINVAL:
 587                                 reason="Incomplete multibyte sequence";
 588                                 if (!conv_silent)
 589                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 590                                 if (allow_bad_conv)
 591                                         goto use_as_is;
 592                                 break;
 593                         case E2BIG:
 594                                 goto convert;
 595                         case EILSEQ:
 596                                 reason="Illegal multibyte sequence";
 597                                 if (!conv_silent)
 598                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 599                                 if (allow_bad_conv)
 600                                         goto use_as_is;
 601                                 break;
 602                 }
 603                 if (!conv_silent)
 604                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 605                 /* smb_panic(reason); */
 606                 return (size_t)-1;
 607         }
 608
 609   out:
 610
 611         destlen = destlen - o_len;
 612         if (ctx) {
 613                 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen);
 614         } else {
 615                 ob = (char *)SMB_REALLOC(ob,destlen);
 616         }
 617
 618         if (destlen && !ob) {
 619                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 620                 return (size_t)-1;
 621         }
 622
 623         *dest = ob;
 624         return destlen;
 625
 626  use_as_is:
 627
 628         /*
 629          * Conversion not supported. This is actually an error, but there are so
 630          * many misconfigured iconv systems and smb.conf's out there we can't just
 631          * fail. Do a very bad conversion instead.... JRA.
 632          */
 633
 634         {
 635                 if (o_len == 0 || i_len == 0)
 636                         goto out;
 637
 638                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 639                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 640                         /* Can't convert from utf16 any endian to multibyte.
 641                            Replace with the default fail char.
 642                         */
 643
 644                         if (i_len < 2)
 645                                 goto out;
 646
 647                         if (i_len >= 2) {
 648                                 *outbuf = lp_failed_convert_char();
 649
 650                                 outbuf++;
 651                                 o_len--;
 652
 653                                 inbuf += 2;
 654                                 i_len -= 2;
 655                         }
 656
 657                         if (o_len == 0 || i_len == 0)
 658                                 goto out;
 659
 660                         /* Keep trying with the next char... */
 661                         goto again;
 662
 663                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 664                         /* Can't convert to UTF16LE - just widen by adding the
 665                            default fail char then zero.
 666                         */
 667                         if (o_len < 2)
 668                                 goto out;
 669
 670                         outbuf[0] = lp_failed_convert_char();
 671                         outbuf[1] = '\0';
 672
 673                         inbuf++;
 674                         i_len--;
 675
 676                         outbuf += 2;
 677                         o_len -= 2;
 678
 679                         if (o_len == 0 || i_len == 0)
 680                                 goto out;
 681
 682                         /* Keep trying with the next char... */
 683                         goto again;
 684
 685                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 686                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 687                         /* Failed multibyte to multibyte. Just copy the default fail char and
 688                            try again. */
 689                         outbuf[0] = lp_failed_convert_char();
 690
 691                         inbuf++;
 692                         i_len--;
 693
 694                         outbuf++;
 695                         o_len--;
 696
 697                         if (o_len == 0 || i_len == 0)
 698                                 goto out;
 699
 700                         /* Keep trying with the next char... */
 701                         goto again;
 702
 703                 } else {
 704                         /* Keep compiler happy.... */
 705                         goto out;
 706                 }
 707         }
 708 }
 709
 710 /**
 711  * Convert between character sets, allocating a new buffer using talloc for the result.
 712  *
 713  * @param srclen length of source buffer.
 714  * @param dest always set at least to NULL
 715  * @note -1 is not accepted for srclen.
 716  *
 717  * @returns Size in bytes of the converted string; or -1 in case of error.
 718  **/
 719 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 720                              void const *src, size_t srclen, void *dst,
 721                              BOOL allow_bad_conv)
 722 {
 723         void **dest = (void **)dst;
 724         size_t dest_len;
 725
 726         *dest = NULL;
 727         dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
 728         if (dest_len == (size_t)-1)
 729                 return (size_t)-1;
 730         if (*dest == NULL)
 731                 return (size_t)-1;
 732         return dest_len;
 733 }
 734
 735 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 736 {
 737         size_t size;
 738         smb_ucs2_t *buffer;
 739
 740         size = push_ucs2_allocate(&buffer, src);
 741         if (size == (size_t)-1) {
 742                 smb_panic("failed to create UCS2 buffer");
 743         }
 744         if (!strupper_w(buffer) && (dest == src)) {
 745                 free(buffer);
 746                 return srclen;
 747         }
 748
 749         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 750         free(buffer);
 751         return size;
 752 }
 753
 754 /**
 755  strdup() a unix string to upper case.
 756  Max size is pstring.
 757 **/
 758
 759 char *strdup_upper(const char *s)
 760 {
 761         pstring out_buffer;
 762         const unsigned char *p = (const unsigned char *)s;
 763         unsigned char *q = (unsigned char *)out_buffer;
 764
 765         /* this is quite a common operation, so we want it to be
 766            fast. We optimise for the ascii case, knowing that all our
 767            supported multi-byte character sets are ascii-compatible
 768            (ie. they match for the first 128 chars) */
 769
 770         while (1) {
 771                 if (*p & 0x80)
 772                         break;
 773                 *q++ = toupper_ascii(*p);
 774                 if (!*p)
 775                         break;
 776                 p++;
 777                 if (p - ( const unsigned char *)s >= sizeof(pstring))
 778                         break;
 779         }
 780
 781         if (*p) {
 782                 /* MB case. */
 783                 size_t size;
 784                 wpstring buffer;
 785                 size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True);
 786                 if (size == (size_t)-1) {
 787                         return NULL;
 788                 }
 789
 790                 strupper_w(buffer);
 791
 792                 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
 793                 if (size == (size_t)-1) {
 794                         return NULL;
 795                 }
 796         }
 797
 798         return SMB_STRDUP(out_buffer);
 799 }
 800
 801 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 802 {
 803         size_t size;
 804         smb_ucs2_t *buffer = NULL;
 805
 806         size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
 807                                        (void **)(void *)&buffer, True);
 808         if (size == (size_t)-1 || !buffer) {
 809                 smb_panic("failed to create UCS2 buffer");
 810         }
 811         if (!strlower_w(buffer) && (dest == src)) {
 812                 SAFE_FREE(buffer);
 813                 return srclen;
 814         }
 815         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 816         SAFE_FREE(buffer);
 817         return size;
 818 }
 819
 820 /**
 821  strdup() a unix string to lower case.
 822 **/
 823
 824 char *strdup_lower(const char *s)
 825 {
 826         size_t size;
 827         smb_ucs2_t *buffer = NULL;
 828         char *out_buffer;
 829
 830         size = push_ucs2_allocate(&buffer, s);
 831         if (size == -1 || !buffer) {
 832                 return NULL;
 833         }
 834
 835         strlower_w(buffer);
 836
 837         size = pull_ucs2_allocate(&out_buffer, buffer);
 838         SAFE_FREE(buffer);
 839
 840         if (size == (size_t)-1) {
 841                 return NULL;
 842         }
 843
 844         return out_buffer;
 845 }
 846
 847 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 848 {
 849         if (flags & (STR_NOALIGN|STR_ASCII))
 850                 return 0;
 851         return PTR_DIFF(p, base_ptr) & 1;
 852 }
 853
 854
 855 /**
 856  * Copy a string from a char* unix src to a dos codepage string destination.
 857  *
 858  * @return the number of bytes occupied by the string in the destination.
 859  *
 860  * @param flags can include
 861  * <dl>
 862  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 863  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 864  * </dl>
 865  *
 866  * @param dest_len the maximum length in bytes allowed in the
 867  * destination.  If @p dest_len is -1 then no maximum is used.
 868  **/
 869 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 870 {
 871         size_t src_len = strlen(src);
 872         pstring tmpbuf;
 873
 874         /* treat a pstring as "unlimited" length */
 875         if (dest_len == (size_t)-1)
 876                 dest_len = sizeof(pstring);
 877
 878         if (flags & STR_UPPER) {
 879                 pstrcpy(tmpbuf, src);
 880                 strupper_m(tmpbuf);
 881                 src = tmpbuf;
 882         }
 883
 884         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 885                 src_len++;
 886
 887         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
 888 }
 889
 890 size_t push_ascii_fstring(void *dest, const char *src)
 891 {
 892         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
 893 }
 894
 895 size_t push_ascii_pstring(void *dest, const char *src)
 896 {
 897         return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
 898 }
 899
 900 /********************************************************************
 901  Push an nstring - ensure null terminated. Written by
 902  moriyama@miraclelinux.com (MORIYAMA Masayuki).
 903 ********************************************************************/
 904
 905 size_t push_ascii_nstring(void *dest, const char *src)
 906 {
 907         size_t i, buffer_len, dest_len;
 908         smb_ucs2_t *buffer;
 909
 910         conv_silent = True;
 911         buffer_len = push_ucs2_allocate(&buffer, src);
 912         if (buffer_len == (size_t)-1) {
 913                 smb_panic("failed to create UCS2 buffer");
 914         }
 915
 916         /* We're using buffer_len below to count ucs2 characters, not bytes. */
 917         buffer_len /= sizeof(smb_ucs2_t);
 918
 919         dest_len = 0;
 920         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
 921                 unsigned char mb[10];
 922                 /* Convert one smb_ucs2_t character at a time. */
 923                 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
 924                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
 925                         memcpy((char *)dest + dest_len, mb, mb_len);
 926                         dest_len += mb_len;
 927                 } else {
 928                         errno = E2BIG;
 929                         break;
 930                 }
 931         }
 932         ((char *)dest)[dest_len] = '\0';
 933
 934         SAFE_FREE(buffer);
 935         conv_silent = False;
 936         return dest_len;
 937 }
 938
 939 /**
 940  * Copy a string from a dos codepage source to a unix char* destination.
 941  *
 942  * The resulting string in "dest" is always null terminated.
 943  *
 944  * @param flags can have:
 945  * <dl>
 946  * <dt>STR_TERMINATE</dt>
 947  * <dd>STR_TERMINATE means the string in @p src
 948  * is null terminated, and src_len is ignored.</dd>
 949  * </dl>
 950  *
 951  * @param src_len is the length of the source area in bytes.
 952  * @returns the number of bytes occupied by the string in @p src.
 953  **/
 954 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 955 {
 956         size_t ret;
 957
 958         if (dest_len == (size_t)-1)
 959                 dest_len = sizeof(pstring);
 960
 961         if (flags & STR_TERMINATE) {
 962                 if (src_len == (size_t)-1) {
 963                         src_len = strlen((const char *)src) + 1;
 964                 } else {
 965                         size_t len = strnlen((const char *)src, src_len);
 966                         if (len < src_len)
 967                                 len++;
 968                         src_len = len;
 969                 }
 970         }
 971
 972         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
 973         if (ret == (size_t)-1) {
 974                 ret = 0;
 975                 dest_len = 0;
 976         }
 977
 978         if (dest_len && ret) {
 979                 /* Did we already process the terminating zero ? */
 980                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
 981                         dest[MIN(ret, dest_len-1)] = 0;
 982                 }
 983         } else  {
 984                 dest[0] = 0;
 985         }
 986
 987         return src_len;
 988 }
 989
 990 size_t pull_ascii_pstring(char *dest, const void *src)
 991 {
 992         return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
 993 }
 994
 995 size_t pull_ascii_fstring(char *dest, const void *src)
 996 {
 997         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
 998 }
 999
1000 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1001
1002 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1003 {
1004         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1005 }
1006
1007 /**
1008  * Copy a string from a char* src to a unicode destination.
1009  *
1010  * @returns the number of bytes occupied by the string in the destination.
1011  *
1012  * @param flags can have:
1013  *
1014  * <dl>
1015  * <dt>STR_TERMINATE <dd>means include the null termination.
1016  * <dt>STR_UPPER     <dd>means uppercase in the destination.
1017  * <dt>STR_NOALIGN   <dd>means don't do alignment.
1018  * </dl>
1019  *
1020  * @param dest_len is the maximum length allowed in the
1021  * destination. If dest_len is -1 then no maxiumum is used.
1022  **/
1023
1024 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1025 {
1026         size_t len=0;
1027         size_t src_len;
1028         size_t ret;
1029
1030         /* treat a pstring as "unlimited" length */
1031         if (dest_len == (size_t)-1)
1032                 dest_len = sizeof(pstring);
1033
1034         if (flags & STR_TERMINATE)
1035                 src_len = (size_t)-1;
1036         else
1037                 src_len = strlen(src);
1038
1039         if (ucs2_align(base_ptr, dest, flags)) {
1040                 *(char *)dest = 0;
1041                 dest = (void *)((char *)dest + 1);
1042                 if (dest_len)
1043                         dest_len--;
1044                 len++;
1045         }
1046
1047         /* ucs2 is always a multiple of 2 bytes */
1048         dest_len &= ~1;
1049
1050         ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1051         if (ret == (size_t)-1) {
1052                 return 0;
1053         }
1054
1055         len += ret;
1056
1057         if (flags & STR_UPPER) {
1058                 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1059                 size_t i;
1060
1061                 /* We check for i < (ret / 2) below as the dest string isn't null
1062                    terminated if STR_TERMINATE isn't set. */
1063
1064                 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1065                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1066                         if (v != dest_ucs2[i]) {
1067                                 dest_ucs2[i] = v;
1068                         }
1069                 }
1070         }
1071
1072         return len;
1073 }
1074
1075
1076 /**
1077  * Copy a string from a unix char* src to a UCS2 destination,
1078  * allocating a buffer using talloc().
1079  *
1080  * @param dest always set at least to NULL
1081  *
1082  * @returns The number of bytes occupied by the string in the destination
1083  *         or -1 in case of error.
1084  **/
1085 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1086 {
1087         size_t src_len = strlen(src)+1;
1088
1089         *dest = NULL;
1090         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1091 }
1092
1093
1094 /**
1095  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1096  *
1097  * @param dest always set at least to NULL
1098  *
1099  * @returns The number of bytes occupied by the string in the destination
1100  *         or -1 in case of error.
1101  **/
1102
1103 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1104 {
1105         size_t src_len = strlen(src)+1;
1106
1107         *dest = NULL;
1108         return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1109 }
1110
1111 /**
1112  Copy a string from a char* src to a UTF-8 destination.
1113  Return the number of bytes occupied by the string in the destination
1114  Flags can have:
1115   STR_TERMINATE means include the null termination
1116   STR_UPPER     means uppercase in the destination
1117  dest_len is the maximum length allowed in the destination. If dest_len
1118  is -1 then no maxiumum is used.
1119 **/
1120
1121 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1122 {
1123         size_t src_len = strlen(src);
1124         pstring tmpbuf;
1125
1126         /* treat a pstring as "unlimited" length */
1127         if (dest_len == (size_t)-1)
1128                 dest_len = sizeof(pstring);
1129
1130         if (flags & STR_UPPER) {
1131                 pstrcpy(tmpbuf, src);
1132                 strupper_m(tmpbuf);
1133                 src = tmpbuf;
1134         }
1135
1136         if (flags & STR_TERMINATE)
1137                 src_len++;
1138
1139         return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1140 }
1141
1142 size_t push_utf8_fstring(void *dest, const char *src)
1143 {
1144         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1145 }
1146
1147 /**
1148  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1149  *
1150  * @param dest always set at least to NULL
1151  *
1152  * @returns The number of bytes occupied by the string in the destination
1153  **/
1154
1155 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1156 {
1157         size_t src_len = strlen(src)+1;
1158
1159         *dest = NULL;
1160         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1161 }
1162
1163 /**
1164  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1165  *
1166  * @param dest always set at least to NULL
1167  *
1168  * @returns The number of bytes occupied by the string in the destination
1169  **/
1170
1171 size_t push_utf8_allocate(char **dest, const char *src)
1172 {
1173         size_t src_len = strlen(src)+1;
1174
1175         *dest = NULL;
1176         return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1177 }
1178
1179 /**
1180  Copy a string from a ucs2 source to a unix char* destination.
1181  Flags can have:
1182   STR_TERMINATE means the string in src is null terminated.
1183   STR_NOALIGN   means don't try to align.
1184  if STR_TERMINATE is set then src_len is ignored if it is -1.
1185  src_len is the length of the source area in bytes
1186  Return the number of bytes occupied by the string in src.
1187  The resulting string in "dest" is always null terminated.
1188 **/
1189
1190 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1191 {
1192         size_t ret;
1193
1194         if (dest_len == (size_t)-1)
1195                 dest_len = sizeof(pstring);
1196
1197         if (ucs2_align(base_ptr, src, flags)) {
1198                 src = (const void *)((const char *)src + 1);
1199                 if (src_len != (size_t)-1)
1200                         src_len--;
1201         }
1202
1203         if (flags & STR_TERMINATE) {
1204                 /* src_len -1 is the default for null terminated strings. */
1205                 if (src_len != (size_t)-1) {
1206                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1207                                                 src_len/2);
1208                         if (len < src_len/2)
1209                                 len++;
1210                         src_len = len*2;
1211                 }
1212         }
1213
1214         /* ucs2 is always a multiple of 2 bytes */
1215         if (src_len != (size_t)-1)
1216                 src_len &= ~1;
1217
1218         ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1219         if (ret == (size_t)-1) {
1220                 return 0;
1221         }
1222
1223         if (src_len == (size_t)-1)
1224                 src_len = ret*2;
1225
1226         if (dest_len && ret) {
1227                 /* Did we already process the terminating zero ? */
1228                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1229                         dest[MIN(ret, dest_len-1)] = 0;
1230                 }
1231         } else {
1232                 dest[0] = 0;
1233         }
1234
1235         return src_len;
1236 }
1237
1238 size_t pull_ucs2_pstring(char *dest, const void *src)
1239 {
1240         return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1241 }
1242
1243 size_t pull_ucs2_fstring(char *dest, const void *src)
1244 {
1245         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1246 }
1247
1248 /**
1249  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1250  *
1251  * @param dest always set at least to NULL
1252  *
1253  * @returns The number of bytes occupied by the string in the destination
1254  **/
1255
1256 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1257 {
1258         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1259         *dest = NULL;
1260         return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1261 }
1262
1263 /**
1264  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1265  *
1266  * @param dest always set at least to NULL
1267  *
1268  * @returns The number of bytes occupied by the string in the destination
1269  **/
1270
1271 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1272 {
1273         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1274         *dest = NULL;
1275         return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1276 }
1277
1278 /**
1279  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1280  *
1281  * @param dest always set at least to NULL
1282  *
1283  * @returns The number of bytes occupied by the string in the destination
1284  **/
1285
1286 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1287 {
1288         size_t src_len = strlen(src)+1;
1289         *dest = NULL;
1290         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1291 }
1292
1293 /**
1294  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1295  *
1296  * @param dest always set at least to NULL
1297  *
1298  * @returns The number of bytes occupied by the string in the destination
1299  **/
1300
1301 size_t pull_utf8_allocate(char **dest, const char *src)
1302 {
1303         size_t src_len = strlen(src)+1;
1304         *dest = NULL;
1305         return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1306 }
1307
1308 /**
1309  * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1310  *
1311  * @param dest always set at least to NULL
1312  *
1313  * @returns The number of bytes occupied by the string in the destination
1314  **/
1315
1316 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1317 {
1318         size_t src_len = strlen(src)+1;
1319         *dest = NULL;
1320         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1321 }
1322
1323 /**
1324  Copy a string from a char* src to a unicode or ascii
1325  dos codepage destination choosing unicode or ascii based on the
1326  flags in the SMB buffer starting at base_ptr.
1327  Return the number of bytes occupied by the string in the destination.
1328  flags can have:
1329   STR_TERMINATE means include the null termination.
1330   STR_UPPER     means uppercase in the destination.
1331   STR_ASCII     use ascii even with unicode packet.
1332   STR_NOALIGN   means don't do alignment.
1333  dest_len is the maximum length allowed in the destination. If dest_len
1334  is -1 then no maxiumum is used.
1335 **/
1336
1337 size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1338 {
1339 #ifdef DEVELOPER
1340         /* We really need to zero fill here, not clobber
1341          * region, as we want to ensure that valgrind thinks
1342          * all of the outgoing buffer has been written to
1343          * so a send() or write() won't trap an error.
1344          * JRA.
1345          */
1346 #if 0
1347         if (dest_len != (size_t)-1)
1348                 clobber_region(function, line, dest, dest_len);
1349 #else
1350         if (dest_len != (size_t)-1)
1351                 memset(dest, '\0', dest_len);
1352 #endif
1353 #endif
1354
1355         if (!(flags & STR_ASCII) && \
1356             ((flags & STR_UNICODE || \
1357               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1358                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1359         }
1360         return push_ascii(dest, src, dest_len, flags);
1361 }
1362
1363
1364 /**
1365  Copy a string from a unicode or ascii source (depending on
1366  the packet flags) to a char* destination.
1367  Flags can have:
1368   STR_TERMINATE means the string in src is null terminated.
1369   STR_UNICODE   means to force as unicode.
1370   STR_ASCII     use ascii even with unicode packet.
1371   STR_NOALIGN   means don't do alignment.
1372  if STR_TERMINATE is set then src_len is ignored is it is -1
1373  src_len is the length of the source area in bytes.
1374  Return the number of bytes occupied by the string in src.
1375  The resulting string in "dest" is always null terminated.
1376 **/
1377
1378 size_t pull_string_fn(const char *function, unsigned int line,
1379                       const void *base_ptr, uint16 smb_flags2, char *dest,
1380                       const void *src, size_t dest_len, size_t src_len,
1381                       int flags)
1382 {
1383 #ifdef DEVELOPER
1384         if (dest_len != (size_t)-1)
1385                 clobber_region(function, line, dest, dest_len);
1386 #endif
1387
1388         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1389                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1390                           "UNICODE defined");
1391         }
1392
1393         if (!(flags & STR_ASCII) && \
1394             ((flags & STR_UNICODE || \
1395               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1396                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1397         }
1398         return pull_ascii(dest, src, dest_len, src_len, flags);
1399 }
1400
1401 size_t align_string(const void *base_ptr, const char *p, int flags)
1402 {
1403         if (!(flags & STR_ASCII) && \
1404             ((flags & STR_UNICODE || \
1405               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1406                 return ucs2_align(base_ptr, p, flags);
1407         }
1408         return 0;
1409 }
1410
1411 /*
1412   Return the unicode codepoint for the next multi-byte CH_UNIX character
1413   in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1414
1415   Also return the number of bytes consumed (which tells the caller
1416   how many bytes to skip to get to the next CH_UNIX character).
1417
1418   Return INVALID_CODEPOINT if the next character cannot be converted.
1419 */
1420
1421 codepoint_t next_codepoint(const char *str, size_t *size)
1422 {
1423         /* It cannot occupy more than 4 bytes in UTF16 format */
1424         uint8_t buf[4];
1425         smb_iconv_t descriptor;
1426         size_t ilen_orig;
1427         size_t ilen;
1428         size_t olen;
1429         char *outbuf;
1430
1431         if ((str[0] & 0x80) == 0) {
1432                 *size = 1;
1433                 return (codepoint_t)str[0];
1434         }
1435
1436         /* We assume that no multi-byte character can take
1437            more than 5 bytes. This is OK as we only
1438            support codepoints up to 1M */
1439
1440         ilen_orig = strnlen(str, 5);
1441         ilen = ilen_orig;
1442
1443         lazy_initialize_conv();
1444
1445         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1446         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1447                 *size = 1;
1448                 return INVALID_CODEPOINT;
1449         }
1450
1451         /* This looks a little strange, but it is needed to cope
1452            with codepoints above 64k which are encoded as per RFC2781. */
1453         olen = 2;
1454         outbuf = (char *)buf;
1455         smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1456         if (olen == 2) {
1457                 /* We failed to convert to a 2 byte character.
1458                    See if we can convert to a 4 UTF16-LE byte char encoding.
1459                 */
1460                 olen = 4;
1461                 outbuf = (char *)buf;
1462                 smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
1463                 if (olen == 4) {
1464                         /* We didn't convert any bytes */
1465                         *size = 1;
1466                         return INVALID_CODEPOINT;
1467                 }
1468                 olen = 4 - olen;
1469         } else {
1470                 olen = 2 - olen;
1471         }
1472
1473         *size = ilen_orig - ilen;
1474
1475         if (olen == 2) {
1476                 /* 2 byte, UTF16-LE encoded value. */
1477                 return (codepoint_t)SVAL(buf, 0);
1478         }
1479         if (olen == 4) {
1480                 /* Decode a 4 byte UTF16-LE character manually.
1481                    See RFC2871 for the encoding machanism.
1482                 */
1483                 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1484                 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1485
1486                 return (codepoint_t)0x10000 +
1487                                 (w1 << 10) + w2;
1488         }
1489
1490         /* no other length is valid */
1491         return INVALID_CODEPOINT;
1492 }