source3/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22
  23 */
  24 #include "includes.h"
  25
  26 /**
  27  * @file
  28  *
  29  * @brief Character-set conversion routines built on our iconv.
  30  *
  31  * @note Samba's internal character set (at least in the 3.0 series)
  32  * is always the same as the one for the Unix filesystem.  It is
  33  * <b>not</b> necessarily UTF-8 and may be different on machines that
  34  * need i18n filenames to be compatible with Unix software.  It does
  35  * have to be a superset of ASCII.  All multibyte sequences must start
  36  * with a byte with the high bit set.
  37  *
  38  * @sa lib/iconv.c
  39  */
  40
  41
  42 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  43 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
  44
  45 /**
  46  * Return the name of a charset to give to iconv().
  47  **/
  48 static const char *charset_name(charset_t ch)
  49 {
  50         const char *ret = NULL;
  51
  52         if (ch == CH_UCS2) ret = "UCS-2LE";
  53         else if (ch == CH_UNIX) ret = lp_unix_charset();
  54         else if (ch == CH_DOS) ret = lp_dos_charset();
  55         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  56         else if (ch == CH_UTF8) ret = "UTF8";
  57
  58 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  59         if (ret && !strcmp(ret, "LOCALE")) {
  60                 const char *ln = NULL;
  61
  62 #ifdef HAVE_SETLOCALE
  63                 setlocale(LC_ALL, "");
  64 #endif
  65                 ln = nl_langinfo(CODESET);
  66                 if (ln) {
  67                         /* Check whether the charset name is supported
  68                            by iconv */
  69                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  70                         if (handle == (smb_iconv_t) -1) {
  71                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  72                                 ln = NULL;
  73                         } else {
  74                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  75                                 smb_iconv_close(handle);
  76                         }
  77                 }
  78                 ret = ln;
  79         }
  80 #endif
  81
  82         if (!ret || !*ret) ret = "ASCII";
  83         return ret;
  84 }
  85
  86 void lazy_initialize_conv(void)
  87 {
  88         static int initialized = False;
  89
  90         if (!initialized) {
  91                 initialized = True;
  92                 load_case_tables();
  93                 init_iconv();
  94         }
  95 }
  96
  97 /**
  98  * Initialize iconv conversion descriptors.
  99  *
 100  * This is called the first time it is needed, and also called again
 101  * every time the configuration is reloaded, because the charset or
 102  * codepage might have changed.
 103  **/
 104 void init_iconv(void)
 105 {
 106         int c1, c2;
 107         BOOL did_reload = False;
 108
 109         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 110            first */
 111         if (!conv_handles[CH_UNIX][CH_UCS2])
 112                 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
 113
 114         if (!conv_handles[CH_UCS2][CH_UNIX])
 115                 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
 116
 117         for (c1=0;c1<NUM_CHARSETS;c1++) {
 118                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 119                         const char *n1 = charset_name((charset_t)c1);
 120                         const char *n2 = charset_name((charset_t)c2);
 121                         if (conv_handles[c1][c2] &&
 122                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 123                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 124                                 continue;
 125
 126                         did_reload = True;
 127
 128                         if (conv_handles[c1][c2])
 129                                 smb_iconv_close(conv_handles[c1][c2]);
 130
 131                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 132                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 133                                 DEBUG(0,("Conversion from %s to %s not supported\n",
 134                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 135                                 conv_handles[c1][c2] = NULL;
 136                         }
 137                 }
 138         }
 139
 140         if (did_reload) {
 141                 /* XXX: Does this really get called every time the dos
 142                  * codepage changes? */
 143                 /* XXX: Is the did_reload test too strict? */
 144                 conv_silent = True;
 145                 init_doschar_table();
 146                 init_valid_table();
 147                 conv_silent = False;
 148         }
 149 }
 150
 151 /**
 152  * Convert string from one encoding to another, making error checking etc
 153  * Slow path version - uses (slow) iconv.
 154  *
 155  * @param src pointer to source string (multibyte or singlebyte)
 156  * @param srclen length of the source string in bytes
 157  * @param dest pointer to destination string (multibyte or singlebyte)
 158  * @param destlen maximal length allowed for string
 159  * @returns the number of bytes occupied in the destination
 160  *
 161  * Ensure the srclen contains the terminating zero.
 162  *
 163  **/
 164
 165 static size_t convert_string_internal(charset_t from, charset_t to,
 166                       void const *src, size_t srclen,
 167                       void *dest, size_t destlen)
 168 {
 169         size_t i_len, o_len;
 170         size_t retval;
 171         const char* inbuf = (const char*)src;
 172         char* outbuf = (char*)dest;
 173         smb_iconv_t descriptor;
 174
 175         lazy_initialize_conv();
 176
 177         descriptor = conv_handles[from][to];
 178
 179         if (srclen == (size_t)-1) {
 180                 if (from == CH_UCS2) {
 181                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 182                 } else {
 183                         srclen = strlen((const char *)src)+1;
 184                 }
 185         }
 186
 187
 188         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 189                 if (!conv_silent)
 190                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 191                 return 0;
 192         }
 193
 194         i_len=srclen;
 195         o_len=destlen;
 196
 197  again:
 198
 199         retval = smb_iconv(descriptor, (char **)&inbuf, &i_len, &outbuf, &o_len);
 200         if(retval==(size_t)-1) {
 201                 const char *reason="unknown error";
 202                 switch(errno) {
 203                         case EINVAL:
 204                                 reason="Incomplete multibyte sequence";
 205                                 if (!conv_silent)
 206                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 207                                 goto use_as_is;
 208                         case E2BIG:
 209                                 reason="No more room";
 210                                 if (!conv_silent)
 211                                         DEBUG(3, ("convert_string_internal: Required %lu, available %lu\n",
 212                                                 (unsigned long)srclen, (unsigned long)destlen));
 213                                 /* we are not sure we need srclen bytes,
 214                                   may be more, may be less.
 215                                   We only know we need more than destlen
 216                                   bytes ---simo */
 217                                break;
 218                         case EILSEQ:
 219                                 reason="Illegal multibyte sequence";
 220                                 if (!conv_silent)
 221                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 222                                 goto use_as_is;
 223                         default:
 224                                 if (!conv_silent)
 225                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 226                                 break;
 227                 }
 228                 /* smb_panic(reason); */
 229         }
 230         return destlen-o_len;
 231
 232  use_as_is:
 233
 234         /*
 235          * Conversion not supported. This is actually an error, but there are so
 236          * many misconfigured iconv systems and smb.conf's out there we can't just
 237          * fail. Do a very bad conversion instead.... JRA.
 238          */
 239
 240         {
 241                 if (o_len == 0 || i_len == 0)
 242                         return destlen - o_len;
 243
 244                 if (from == CH_UCS2 && to != CH_UCS2) {
 245                         /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
 246                         if (i_len < 2)
 247                                 return destlen - o_len;
 248                         if (i_len >= 2) {
 249                                 *outbuf = inbuf[0];
 250
 251                                 outbuf++;
 252                                 o_len--;
 253
 254                                 inbuf += 2;
 255                                 i_len -= 2;
 256                         }
 257
 258                         if (o_len == 0 || i_len == 0)
 259                                 return destlen - o_len;
 260
 261                         /* Keep trying with the next char... */
 262                         goto again;
 263
 264                 } else if (from != CH_UCS2 && to == CH_UCS2) {
 265                         /* Can't convert to ucs2 - just widen by adding zero. */
 266                         if (o_len < 2)
 267                                 return destlen - o_len;
 268
 269                         outbuf[0] = inbuf[0];
 270                         outbuf[1] = '\0';
 271
 272                         inbuf++;
 273                         i_len--;
 274
 275                         outbuf += 2;
 276                         o_len -= 2;
 277
 278                         if (o_len == 0 || i_len == 0)
 279                                 return destlen - o_len;
 280
 281                         /* Keep trying with the next char... */
 282                         goto again;
 283
 284                 } else if (from != CH_UCS2 && to != CH_UCS2) {
 285                         /* Failed multibyte to multibyte. Just copy 1 char and
 286                                 try again. */
 287                         outbuf[0] = inbuf[0];
 288
 289                         inbuf++;
 290                         i_len--;
 291
 292                         outbuf++;
 293                         o_len--;
 294
 295                         if (o_len == 0 || i_len == 0)
 296                                 return destlen - o_len;
 297
 298                         /* Keep trying with the next char... */
 299                         goto again;
 300
 301                 } else {
 302                         /* Keep compiler happy.... */
 303                         return destlen - o_len;
 304                 }
 305         }
 306 }
 307
 308 /**
 309  * Convert string from one encoding to another, making error checking etc
 310  * Fast path version - handles ASCII first.
 311  *
 312  * @param src pointer to source string (multibyte or singlebyte)
 313  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 314  * @param dest pointer to destination string (multibyte or singlebyte)
 315  * @param destlen maximal length allowed for string - *NEVER* -1.
 316  * @returns the number of bytes occupied in the destination
 317  *
 318  * Ensure the srclen contains the terminating zero.
 319  *
 320  * This function has been hand-tuned to provide a fast path.
 321  * Don't change unless you really know what you are doing. JRA.
 322  **/
 323
 324 size_t convert_string(charset_t from, charset_t to,
 325                       void const *src, size_t srclen,
 326                       void *dest, size_t destlen)
 327 {
 328         /*
 329          * NB. We deliberately don't do a strlen here if srclen == -1.
 330          * This is very expensive over millions of calls and is taken
 331          * care of in the slow path in convert_string_internal. JRA.
 332          */
 333
 334 #ifdef DEVELOPER
 335         SMB_ASSERT(destlen != (size_t)-1);
 336 #endif
 337
 338         if (srclen == 0)
 339                 return 0;
 340
 341         if (from != CH_UCS2 && to != CH_UCS2) {
 342                 const unsigned char *p = (const unsigned char *)src;
 343                 unsigned char *q = (unsigned char *)dest;
 344                 size_t slen = srclen;
 345                 size_t dlen = destlen;
 346                 unsigned char lastp;
 347                 size_t retval = 0;
 348
 349                 /* If all characters are ascii, fast path here. */
 350                 while (slen && dlen) {
 351                         if ((lastp = *p) <= 0x7f) {
 352                                 *q++ = *p++;
 353                                 if (slen != (size_t)-1) {
 354                                         slen--;
 355                                 }
 356                                 dlen--;
 357                                 retval++;
 358                                 if (!lastp)
 359                                         break;
 360                         } else {
 361 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 362                                 goto general_case;
 363 #else
 364                                 return retval + convert_string_internal(from, to, p, slen, q, dlen);
 365 #endif
 366                         }
 367                 }
 368                 return retval;
 369         } else if (from == CH_UCS2 && to != CH_UCS2) {
 370                 const unsigned char *p = (const unsigned char *)src;
 371                 unsigned char *q = (unsigned char *)dest;
 372                 size_t retval = 0;
 373                 size_t slen = srclen;
 374                 size_t dlen = destlen;
 375                 unsigned char lastp;
 376
 377                 /* If all characters are ascii, fast path here. */
 378                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 379                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 380                                 *q++ = *p;
 381                                 if (slen != (size_t)-1) {
 382                                         slen -= 2;
 383                                 }
 384                                 p += 2;
 385                                 dlen--;
 386                                 retval++;
 387                                 if (!lastp)
 388                                         break;
 389                         } else {
 390 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 391                                 goto general_case;
 392 #else
 393                                 return retval + convert_string_internal(from, to, p, slen, q, dlen);
 394 #endif
 395                         }
 396                 }
 397                 return retval;
 398         } else if (from != CH_UCS2 && to == CH_UCS2) {
 399                 const unsigned char *p = (const unsigned char *)src;
 400                 unsigned char *q = (unsigned char *)dest;
 401                 size_t retval = 0;
 402                 size_t slen = srclen;
 403                 size_t dlen = destlen;
 404                 unsigned char lastp;
 405
 406                 /* If all characters are ascii, fast path here. */
 407                 while (slen && (dlen >= 2)) {
 408                         if ((lastp = *p) <= 0x7F) {
 409                                 *q++ = *p++;
 410                                 *q++ = '\0';
 411                                 if (slen != (size_t)-1) {
 412                                         slen--;
 413                                 }
 414                                 dlen -= 2;
 415                                 retval += 2;
 416                                 if (!lastp)
 417                                         break;
 418                         } else {
 419 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 420                                 goto general_case;
 421 #else
 422                                 return retval + convert_string_internal(from, to, p, slen, q, dlen);
 423 #endif
 424                         }
 425                 }
 426                 return retval;
 427         }
 428
 429 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 430   general_case:
 431 #endif
 432         return convert_string_internal(from, to, src, srclen, dest, destlen);
 433 }
 434
 435 /**
 436  * Convert between character sets, allocating a new buffer for the result.
 437  *
 438  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 439  * @param srclen length of source buffer.
 440  * @param dest always set at least to NULL
 441  * @note -1 is not accepted for srclen.
 442  *
 443  * @returns Size in bytes of the converted string; or -1 in case of error.
 444  *
 445  * Ensure the srclen contains the terminating zero.
 446  *
 447  * I hate the goto's in this function. It's embarressing.....
 448  * There has to be a cleaner way to do this. JRA.
 449  **/
 450
 451 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 452                                void const *src, size_t srclen, void **dest)
 453 {
 454         size_t i_len, o_len, destlen = MAX(srclen, 512);
 455         size_t retval;
 456         const char *inbuf = (const char *)src;
 457         char *outbuf = NULL, *ob = NULL;
 458         smb_iconv_t descriptor;
 459
 460         *dest = NULL;
 461
 462         if (src == NULL || srclen == (size_t)-1)
 463                 return (size_t)-1;
 464         if (srclen == 0)
 465                 return 0;
 466
 467         lazy_initialize_conv();
 468
 469         descriptor = conv_handles[from][to];
 470
 471         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 472                 if (!conv_silent)
 473                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 474                 goto use_as_is;
 475         }
 476
 477   convert:
 478
 479         if ((destlen*2) < destlen) {
 480                 /* wrapped ! abort. */
 481                 if (!conv_silent)
 482                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 483                 if (!ctx)
 484                         SAFE_FREE(outbuf);
 485                 return (size_t)-1;
 486         } else {
 487                 destlen = destlen * 2;
 488         }
 489
 490         if (ctx)
 491                 ob = (char *)talloc_realloc(ctx, ob, destlen);
 492         else
 493                 ob = (char *)Realloc(ob, destlen);
 494
 495         if (!ob) {
 496                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 497                 if (!ctx)
 498                         SAFE_FREE(outbuf);
 499                 return (size_t)-1;
 500         } else {
 501                 outbuf = ob;
 502         }
 503         i_len = srclen;
 504         o_len = destlen;
 505
 506  again:
 507
 508         retval = smb_iconv(descriptor,
 509                            (char **)&inbuf, &i_len,
 510                            &outbuf, &o_len);
 511         if(retval == (size_t)-1)                {
 512                 const char *reason="unknown error";
 513                 switch(errno) {
 514                         case EINVAL:
 515                                 reason="Incomplete multibyte sequence";
 516                                 if (!conv_silent)
 517                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 518                                 goto use_as_is;
 519                         case E2BIG:
 520                                 goto convert;
 521                         case EILSEQ:
 522                                 reason="Illegal multibyte sequence";
 523                                 if (!conv_silent)
 524                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 525                                 goto use_as_is;
 526                 }
 527                 if (!conv_silent)
 528                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 529                 /* smb_panic(reason); */
 530                 return (size_t)-1;
 531         }
 532
 533   out:
 534
 535         destlen = destlen - o_len;
 536         if (ctx)
 537                 *dest = (char *)talloc_realloc(ctx,ob,destlen);
 538         else
 539                 *dest = (char *)Realloc(ob,destlen);
 540         if (destlen && !*dest) {
 541                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 542                 if (!ctx)
 543                         SAFE_FREE(ob);
 544                 return (size_t)-1;
 545         }
 546
 547         return destlen;
 548
 549  use_as_is:
 550
 551         /*
 552          * Conversion not supported. This is actually an error, but there are so
 553          * many misconfigured iconv systems and smb.conf's out there we can't just
 554          * fail. Do a very bad conversion instead.... JRA.
 555          */
 556
 557         {
 558                 if (o_len == 0 || i_len == 0)
 559                         goto out;
 560
 561                 if (from == CH_UCS2 && to != CH_UCS2) {
 562                         /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
 563                         if (i_len < 2)
 564                                 goto out;
 565
 566                         if (i_len >= 2) {
 567                                 *outbuf = inbuf[0];
 568
 569                                 outbuf++;
 570                                 o_len--;
 571
 572                                 inbuf += 2;
 573                                 i_len -= 2;
 574                         }
 575
 576                         if (o_len == 0 || i_len == 0)
 577                                 goto out;
 578
 579                         /* Keep trying with the next char... */
 580                         goto again;
 581
 582                 } else if (from != CH_UCS2 && to == CH_UCS2) {
 583                         /* Can't convert to ucs2 - just widen by adding zero. */
 584                         if (o_len < 2)
 585                                 goto out;
 586
 587                         outbuf[0] = inbuf[0];
 588                         outbuf[1] = '\0';
 589
 590                         inbuf++;
 591                         i_len--;
 592
 593                         outbuf += 2;
 594                         o_len -= 2;
 595
 596                         if (o_len == 0 || i_len == 0)
 597                                 goto out;
 598
 599                         /* Keep trying with the next char... */
 600                         goto again;
 601
 602                 } else if (from != CH_UCS2 && to != CH_UCS2) {
 603                         /* Failed multibyte to multibyte. Just copy 1 char and
 604                                 try again. */
 605                         outbuf[0] = inbuf[0];
 606
 607                         inbuf++;
 608                         i_len--;
 609
 610                         outbuf++;
 611                         o_len--;
 612
 613                         if (o_len == 0 || i_len == 0)
 614                                 goto out;
 615
 616                         /* Keep trying with the next char... */
 617                         goto again;
 618
 619                 } else {
 620                         /* Keep compiler happy.... */
 621                         goto out;
 622                 }
 623         }
 624 }
 625
 626 /**
 627  * Convert between character sets, allocating a new buffer using talloc for the result.
 628  *
 629  * @param srclen length of source buffer.
 630  * @param dest always set at least to NULL
 631  * @note -1 is not accepted for srclen.
 632  *
 633  * @returns Size in bytes of the converted string; or -1 in case of error.
 634  **/
 635 static size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 636                                 void const *src, size_t srclen, void **dest)
 637 {
 638         size_t dest_len;
 639
 640         *dest = NULL;
 641         dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest);
 642         if (dest_len == (size_t)-1)
 643                 return (size_t)-1;
 644         if (*dest == NULL)
 645                 return (size_t)-1;
 646         return dest_len;
 647 }
 648
 649 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 650 {
 651         size_t size;
 652         smb_ucs2_t *buffer;
 653
 654         size = push_ucs2_allocate(&buffer, src);
 655         if (size == -1) {
 656                 smb_panic("failed to create UCS2 buffer");
 657         }
 658         if (!strupper_w(buffer) && (dest == src)) {
 659                 free(buffer);
 660                 return srclen;
 661         }
 662
 663         size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
 664         free(buffer);
 665         return size;
 666 }
 667
 668 /**
 669  strdup() a unix string to upper case.
 670  Max size is pstring.
 671 **/
 672
 673 char *strdup_upper(const char *s)
 674 {
 675         pstring out_buffer;
 676         const unsigned char *p = (const unsigned char *)s;
 677         unsigned char *q = (unsigned char *)out_buffer;
 678
 679         /* this is quite a common operation, so we want it to be
 680            fast. We optimise for the ascii case, knowing that all our
 681            supported multi-byte character sets are ascii-compatible
 682            (ie. they match for the first 128 chars) */
 683
 684         while (1) {
 685                 if (*p & 0x80)
 686                         break;
 687                 *q++ = toupper(*p);
 688                 if (!*p)
 689                         break;
 690                 p++;
 691                 if (p - ( const unsigned char *)s >= sizeof(pstring))
 692                         break;
 693         }
 694
 695         if (*p) {
 696                 /* MB case. */
 697                 size_t size;
 698                 wpstring buffer;
 699                 size = convert_string(CH_UNIX, CH_UCS2, s, -1, buffer, sizeof(buffer));
 700                 if (size == -1) {
 701                         return NULL;
 702                 }
 703
 704                 strupper_w(buffer);
 705
 706                 size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer));
 707                 if (size == -1) {
 708                         return NULL;
 709                 }
 710         }
 711
 712         return strdup(out_buffer);
 713 }
 714
 715 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 716 {
 717         size_t size;
 718         smb_ucs2_t *buffer = NULL;
 719
 720         size = convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, srclen,
 721                                        (void **) &buffer);
 722         if (size == -1 || !buffer) {
 723                 smb_panic("failed to create UCS2 buffer");
 724         }
 725         if (!strlower_w(buffer) && (dest == src)) {
 726                 SAFE_FREE(buffer);
 727                 return srclen;
 728         }
 729         size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
 730         SAFE_FREE(buffer);
 731         return size;
 732 }
 733
 734 /**
 735  strdup() a unix string to lower case.
 736 **/
 737
 738 char *strdup_lower(const char *s)
 739 {
 740         size_t size;
 741         smb_ucs2_t *buffer = NULL;
 742         char *out_buffer;
 743
 744         size = push_ucs2_allocate(&buffer, s);
 745         if (size == -1 || !buffer) {
 746                 return NULL;
 747         }
 748
 749         strlower_w(buffer);
 750
 751         size = pull_ucs2_allocate(&out_buffer, buffer);
 752         SAFE_FREE(buffer);
 753
 754         if (size == -1) {
 755                 return NULL;
 756         }
 757
 758         return out_buffer;
 759 }
 760
 761 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 762 {
 763         if (flags & (STR_NOALIGN|STR_ASCII))
 764                 return 0;
 765         return PTR_DIFF(p, base_ptr) & 1;
 766 }
 767
 768
 769 /**
 770  * Copy a string from a char* unix src to a dos codepage string destination.
 771  *
 772  * @return the number of bytes occupied by the string in the destination.
 773  *
 774  * @param flags can include
 775  * <dl>
 776  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 777  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 778  * </dl>
 779  *
 780  * @param dest_len the maximum length in bytes allowed in the
 781  * destination.  If @p dest_len is -1 then no maximum is used.
 782  **/
 783 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 784 {
 785         size_t src_len = strlen(src);
 786         pstring tmpbuf;
 787
 788         /* treat a pstring as "unlimited" length */
 789         if (dest_len == (size_t)-1)
 790                 dest_len = sizeof(pstring);
 791
 792         if (flags & STR_UPPER) {
 793                 pstrcpy(tmpbuf, src);
 794                 strupper_m(tmpbuf);
 795                 src = tmpbuf;
 796         }
 797
 798         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 799                 src_len++;
 800
 801         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
 802 }
 803
 804 size_t push_ascii_fstring(void *dest, const char *src)
 805 {
 806         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
 807 }
 808
 809 size_t push_ascii_pstring(void *dest, const char *src)
 810 {
 811         return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
 812 }
 813
 814 size_t push_ascii_nstring(void *dest, const char *src)
 815 {
 816         return push_ascii(dest, src, sizeof(nstring), STR_TERMINATE);
 817 }
 818
 819 /**
 820  * Copy a string from a dos codepage source to a unix char* destination.
 821  *
 822  * The resulting string in "dest" is always null terminated.
 823  *
 824  * @param flags can have:
 825  * <dl>
 826  * <dt>STR_TERMINATE</dt>
 827  * <dd>STR_TERMINATE means the string in @p src
 828  * is null terminated, and src_len is ignored.</dd>
 829  * </dl>
 830  *
 831  * @param src_len is the length of the source area in bytes.
 832  * @returns the number of bytes occupied by the string in @p src.
 833  **/
 834 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 835 {
 836         size_t ret;
 837
 838         if (dest_len == (size_t)-1)
 839                 dest_len = sizeof(pstring);
 840
 841         if (flags & STR_TERMINATE) {
 842                 if (src_len == (size_t)-1) {
 843                         src_len = strlen(src) + 1;
 844                 } else {
 845                         size_t len = strnlen(src, src_len);
 846                         if (len < src_len)
 847                                 len++;
 848                         src_len = len;
 849                 }
 850         }
 851
 852         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len);
 853
 854         if (dest_len)
 855                 dest[MIN(ret, dest_len-1)] = 0;
 856         else
 857                 dest[0] = 0;
 858
 859         return src_len;
 860 }
 861
 862 size_t pull_ascii_pstring(char *dest, const void *src)
 863 {
 864         return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
 865 }
 866
 867 size_t pull_ascii_fstring(char *dest, const void *src)
 868 {
 869         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
 870 }
 871
 872 size_t pull_ascii_nstring(char *dest, const void *src)
 873 {
 874         return pull_ascii(dest, src, sizeof(nstring), sizeof(nstring), STR_TERMINATE);
 875 }
 876
 877 /**
 878  * Copy a string from a char* src to a unicode destination.
 879  *
 880  * @returns the number of bytes occupied by the string in the destination.
 881  *
 882  * @param flags can have:
 883  *
 884  * <dl>
 885  * <dt>STR_TERMINATE <dd>means include the null termination.
 886  * <dt>STR_UPPER     <dd>means uppercase in the destination.
 887  * <dt>STR_NOALIGN   <dd>means don't do alignment.
 888  * </dl>
 889  *
 890  * @param dest_len is the maximum length allowed in the
 891  * destination. If dest_len is -1 then no maxiumum is used.
 892  **/
 893
 894 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
 895 {
 896         size_t len=0;
 897         size_t src_len;
 898
 899         /* treat a pstring as "unlimited" length */
 900         if (dest_len == (size_t)-1)
 901                 dest_len = sizeof(pstring);
 902
 903         if (flags & STR_TERMINATE)
 904                 src_len = (size_t)-1;
 905         else
 906                 src_len = strlen(src);
 907
 908         if (ucs2_align(base_ptr, dest, flags)) {
 909                 *(char *)dest = 0;
 910                 dest = (void *)((char *)dest + 1);
 911                 if (dest_len)
 912                         dest_len--;
 913                 len++;
 914         }
 915
 916         /* ucs2 is always a multiple of 2 bytes */
 917         dest_len &= ~1;
 918
 919         len += convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len);
 920
 921         if (flags & STR_UPPER) {
 922                 smb_ucs2_t *dest_ucs2 = dest;
 923                 size_t i;
 924                 for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) {
 925                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
 926                         if (v != dest_ucs2[i]) {
 927                                 dest_ucs2[i] = v;
 928                         }
 929                 }
 930         }
 931
 932         return len;
 933 }
 934
 935
 936 /**
 937  * Copy a string from a unix char* src to a UCS2 destination,
 938  * allocating a buffer using talloc().
 939  *
 940  * @param dest always set at least to NULL
 941  *
 942  * @returns The number of bytes occupied by the string in the destination
 943  *         or -1 in case of error.
 944  **/
 945 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
 946 {
 947         size_t src_len = strlen(src)+1;
 948
 949         *dest = NULL;
 950         return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest);
 951 }
 952
 953
 954 /**
 955  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
 956  *
 957  * @param dest always set at least to NULL
 958  *
 959  * @returns The number of bytes occupied by the string in the destination
 960  *         or -1 in case of error.
 961  **/
 962
 963 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
 964 {
 965         size_t src_len = strlen(src)+1;
 966
 967         *dest = NULL;
 968         return convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, src_len, (void **)dest);
 969 }
 970
 971 /**
 972  Copy a string from a char* src to a UTF-8 destination.
 973  Return the number of bytes occupied by the string in the destination
 974  Flags can have:
 975   STR_TERMINATE means include the null termination
 976   STR_UPPER     means uppercase in the destination
 977  dest_len is the maximum length allowed in the destination. If dest_len
 978  is -1 then no maxiumum is used.
 979 **/
 980
 981 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
 982 {
 983         size_t src_len = strlen(src);
 984         pstring tmpbuf;
 985
 986         /* treat a pstring as "unlimited" length */
 987         if (dest_len == (size_t)-1)
 988                 dest_len = sizeof(pstring);
 989
 990         if (flags & STR_UPPER) {
 991                 pstrcpy(tmpbuf, src);
 992                 strupper_m(tmpbuf);
 993                 src = tmpbuf;
 994         }
 995
 996         if (flags & STR_TERMINATE)
 997                 src_len++;
 998
 999         return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len);
1000 }
1001
1002 size_t push_utf8_fstring(void *dest, const char *src)
1003 {
1004         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1005 }
1006
1007 /**
1008  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1009  *
1010  * @param dest always set at least to NULL
1011  *
1012  * @returns The number of bytes occupied by the string in the destination
1013  **/
1014
1015 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1016 {
1017         size_t src_len = strlen(src)+1;
1018
1019         *dest = NULL;
1020         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest);
1021 }
1022
1023 /**
1024  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1025  *
1026  * @param dest always set at least to NULL
1027  *
1028  * @returns The number of bytes occupied by the string in the destination
1029  **/
1030
1031 size_t push_utf8_allocate(char **dest, const char *src)
1032 {
1033         size_t src_len = strlen(src)+1;
1034
1035         *dest = NULL;
1036         return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest);
1037 }
1038
1039 /**
1040  Copy a string from a ucs2 source to a unix char* destination.
1041  Flags can have:
1042   STR_TERMINATE means the string in src is null terminated.
1043   STR_NOALIGN   means don't try to align.
1044  if STR_TERMINATE is set then src_len is ignored if it is -1.
1045  src_len is the length of the source area in bytes
1046  Return the number of bytes occupied by the string in src.
1047  The resulting string in "dest" is always null terminated.
1048 **/
1049
1050 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1051 {
1052         size_t ret;
1053
1054         if (dest_len == (size_t)-1)
1055                 dest_len = sizeof(pstring);
1056
1057         if (ucs2_align(base_ptr, src, flags)) {
1058                 src = (const void *)((const char *)src + 1);
1059                 if (src_len != (size_t)-1)
1060                         src_len--;
1061         }
1062
1063         if (flags & STR_TERMINATE) {
1064                 /* src_len -1 is the default for null terminated strings. */
1065                 if (src_len != (size_t)-1) {
1066                         size_t len = strnlen_w(src, src_len/2);
1067                         if (len < src_len/2)
1068                                 len++;
1069                         src_len = len*2;
1070                 }
1071         }
1072
1073         /* ucs2 is always a multiple of 2 bytes */
1074         if (src_len != (size_t)-1)
1075                 src_len &= ~1;
1076
1077         ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len);
1078
1079         if (src_len == (size_t)-1)
1080                 src_len = ret*2;
1081
1082         if (dest_len)
1083                 dest[MIN(ret, dest_len-1)] = 0;
1084         else
1085                 dest[0] = 0;
1086
1087         return src_len;
1088 }
1089
1090 size_t pull_ucs2_pstring(char *dest, const void *src)
1091 {
1092         return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1093 }
1094
1095 size_t pull_ucs2_fstring(char *dest, const void *src)
1096 {
1097         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1098 }
1099
1100 /**
1101  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1102  *
1103  * @param dest always set at least to NULL
1104  *
1105  * @returns The number of bytes occupied by the string in the destination
1106  **/
1107
1108 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1109 {
1110         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1111         *dest = NULL;
1112         return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (void **)dest);
1113 }
1114
1115 /**
1116  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1117  *
1118  * @param dest always set at least to NULL
1119  *
1120  * @returns The number of bytes occupied by the string in the destination
1121  **/
1122
1123 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1124 {
1125         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1126         *dest = NULL;
1127         return convert_string_allocate(NULL, CH_UCS2, CH_UNIX, src, src_len, (void **)dest);
1128 }
1129
1130 /**
1131  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1132  *
1133  * @param dest always set at least to NULL
1134  *
1135  * @returns The number of bytes occupied by the string in the destination
1136  **/
1137
1138 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1139 {
1140         size_t src_len = strlen(src)+1;
1141         *dest = NULL;
1142         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest);
1143 }
1144
1145 /**
1146  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1147  *
1148  * @param dest always set at least to NULL
1149  *
1150  * @returns The number of bytes occupied by the string in the destination
1151  **/
1152
1153 size_t pull_utf8_allocate(char **dest, const char *src)
1154 {
1155         size_t src_len = strlen(src)+1;
1156         *dest = NULL;
1157         return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest);
1158 }
1159
1160 /**
1161  Copy a string from a char* src to a unicode or ascii
1162  dos codepage destination choosing unicode or ascii based on the
1163  flags in the SMB buffer starting at base_ptr.
1164  Return the number of bytes occupied by the string in the destination.
1165  flags can have:
1166   STR_TERMINATE means include the null termination.
1167   STR_UPPER     means uppercase in the destination.
1168   STR_ASCII     use ascii even with unicode packet.
1169   STR_NOALIGN   means don't do alignment.
1170  dest_len is the maximum length allowed in the destination. If dest_len
1171  is -1 then no maxiumum is used.
1172 **/
1173
1174 size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1175 {
1176 #ifdef DEVELOPER
1177         /* We really need to zero fill here, not clobber
1178          * region, as we want to ensure that valgrind thinks
1179          * all of the outgoing buffer has been written to
1180          * so a send() or write() won't trap an error.
1181          * JRA.
1182          */
1183 #if 0
1184         if (dest_len != (size_t)-1)
1185                 clobber_region(function, line, dest, dest_len);
1186 #else
1187         if (dest_len != (size_t)-1)
1188                 memset(dest, '\0', dest_len);
1189 #endif
1190 #endif
1191
1192         if (!(flags & STR_ASCII) && \
1193             ((flags & STR_UNICODE || \
1194               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1195                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1196         }
1197         return push_ascii(dest, src, dest_len, flags);
1198 }
1199
1200
1201 /**
1202  Copy a string from a unicode or ascii source (depending on
1203  the packet flags) to a char* destination.
1204  Flags can have:
1205   STR_TERMINATE means the string in src is null terminated.
1206   STR_UNICODE   means to force as unicode.
1207   STR_ASCII     use ascii even with unicode packet.
1208   STR_NOALIGN   means don't do alignment.
1209  if STR_TERMINATE is set then src_len is ignored is it is -1
1210  src_len is the length of the source area in bytes.
1211  Return the number of bytes occupied by the string in src.
1212  The resulting string in "dest" is always null terminated.
1213 **/
1214
1215 size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1216 {
1217 #ifdef DEVELOPER
1218         if (dest_len != (size_t)-1)
1219                 clobber_region(function, line, dest, dest_len);
1220 #endif
1221
1222         if (!(flags & STR_ASCII) && \
1223             ((flags & STR_UNICODE || \
1224               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1225                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1226         }
1227         return pull_ascii(dest, src, dest_len, src_len, flags);
1228 }
1229
1230 size_t align_string(const void *base_ptr, const char *p, int flags)
1231 {
1232         if (!(flags & STR_ASCII) && \
1233             ((flags & STR_UNICODE || \
1234               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1235                 return ucs2_align(base_ptr, p, flags);
1236         }
1237         return 0;
1238 }