source3/lib/util_unistr.c

   1 /*
   2    Unix SMB/Netbios implementation.
   3    Version 1.9.
   4    Samba utility functions
   5    Copyright (C) Andrew Tridgell 1992-1998
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 */
  21
  22 #include "includes.h"
  23
  24 extern int DEBUGLEVEL;
  25
  26  smb_ucs2_t wchar_list_sep[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)',',
  27                                                                 (smb_ucs2_t)';', (smb_ucs2_t)':', (smb_ucs2_t)'\n',
  28                                                                 (smb_ucs2_t)'\r', 0 };
  29 /*
  30  * The following are the codepage to ucs2 and vica versa maps.
  31  * These are dynamically loaded from a unicode translation file.
  32  */
  33
  34 static smb_ucs2_t *doscp_to_ucs2;
  35 static uint16 *ucs2_to_doscp;
  36
  37 static smb_ucs2_t *unixcp_to_ucs2;
  38 static uint16 *ucs2_to_unixcp;
  39
  40 #ifndef MAXUNI
  41 #define MAXUNI 1024
  42 #endif
  43
  44 /*******************************************************************
  45  Write a string in (little-endian) unicode format. src is in
  46  the current DOS codepage. len is the length in bytes of the
  47  string pointed to by dst.
  48
  49  if null_terminate is True then null terminate the packet (adds 2 bytes)
  50
  51  the return value is the length in bytes consumed by the string, including the
  52  null termination if applied
  53 ********************************************************************/
  54
  55 size_t dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
  56 {
  57         size_t ret = 0;
  58         while (*src && (len >= 2)) {
  59                 size_t skip = get_character_len(*src);
  60                 smb_ucs2_t val = (*src & 0xff);
  61
  62                 /*
  63                  * If this is a multibyte character (and all DOS/Windows
  64                  * codepages have at maximum 2 byte multibyte characters)
  65                  * then work out the index value for the unicode conversion.
  66                  */
  67
  68                 if (skip == 2)
  69                         val = ((val << 8) | (src[1] & 0xff));
  70
  71                 SSVAL(dst,ret,doscp_to_ucs2[val]);
  72                 ret += 2;
  73                 len -= 2;
  74                 if (skip)
  75                         src += skip;
  76                 else
  77                         src++;
  78         }
  79         if (null_terminate) {
  80                 SSVAL(dst,ret,0);
  81                 ret += 2;
  82         }
  83         return(ret);
  84 }
  85
  86 /*******************************************************************
  87  Pull a DOS codepage string out of a UNICODE array. len is in bytes.
  88 ********************************************************************/
  89
  90 void unistr_to_dos(char *dest, const char *src, size_t len)
  91 {
  92         char *destend = dest + len;
  93
  94         while (dest < destend) {
  95                 uint16 ucs2_val = SVAL(src,0);
  96                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
  97
  98                 src += 2;
  99
 100                 if (ucs2_val == 0)
 101                         break;
 102
 103                 if (cp_val < 256)
 104                         *dest++ = (char)cp_val;
 105                 else {
 106                         *dest++ = (cp_val >> 8) & 0xff;
 107                         *dest++ = (cp_val & 0xff);
 108                 }
 109         }
 110
 111         *dest = 0;
 112 }
 113
 114 /*******************************************************************
 115  Skip past a unicode string, but not more than len. Always move
 116  past a terminating zero if found.
 117 ********************************************************************/
 118
 119 char *skip_unibuf(char *src, size_t len)
 120 {
 121     char *srcend = src + len;
 122
 123     while (src < srcend && SVAL(src,0))
 124         src += 2;
 125
 126     if(!SVAL(src,0))
 127         src += 2;
 128
 129     return src;
 130 }
 131
 132 /*******************************************************************
 133  Return a DOS codepage version of a little-endian unicode string.
 134  len is the filename length (ignoring any terminating zero) in uin16
 135  units. Always null terminates.
 136  Hack alert: uses fixed buffer(s).
 137 ********************************************************************/
 138
 139 char *dos_unistrn2(uint16 *src, int len)
 140 {
 141         static char lbufs[8][MAXUNI];
 142         static int nexti;
 143         char *lbuf = lbufs[nexti];
 144         char *p;
 145
 146         nexti = (nexti+1)%8;
 147
 148         for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) {
 149                 uint16 ucs2_val = SVAL(src,0);
 150                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 151
 152                 if (cp_val < 256)
 153                         *p++ = (char)cp_val;
 154                 else {
 155                         *p++ = (cp_val >> 8) & 0xff;
 156                         *p++ = (cp_val & 0xff);
 157                 }
 158         }
 159
 160         *p = 0;
 161         return lbuf;
 162 }
 163
 164 static char lbufs[8][MAXUNI];
 165 static int nexti;
 166
 167 /*******************************************************************
 168  Return a DOS codepage version of a little-endian unicode string.
 169  Hack alert: uses fixed buffer(s).
 170 ********************************************************************/
 171
 172 char *dos_unistr2(uint16 *src)
 173 {
 174         char *lbuf = lbufs[nexti];
 175         char *p;
 176
 177         nexti = (nexti+1)%8;
 178
 179         for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) {
 180                 uint16 ucs2_val = SVAL(src,0);
 181                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 182
 183                 if (cp_val < 256)
 184                         *p++ = (char)cp_val;
 185                 else {
 186                         *p++ = (cp_val >> 8) & 0xff;
 187                         *p++ = (cp_val & 0xff);
 188                 }
 189         }
 190
 191         *p = 0;
 192         return lbuf;
 193 }
 194
 195 /*******************************************************************
 196 Return a DOS codepage version of a little-endian unicode string
 197 ********************************************************************/
 198
 199 char *dos_unistr2_to_str(UNISTR2 *str)
 200 {
 201         char *lbuf = lbufs[nexti];
 202         char *p;
 203         uint16 *src = str->buffer;
 204         int max_size = MIN(MAXUNI-3, str->uni_str_len);
 205
 206         nexti = (nexti+1)%8;
 207
 208         for (p = lbuf; (p-lbuf < max_size) && *src; src++) {
 209                 uint16 ucs2_val = SVAL(src,0);
 210                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 211
 212                 if (cp_val < 256)
 213                         *p++ = (char)cp_val;
 214                 else {
 215                         *p++ = (cp_val >> 8) & 0xff;
 216                         *p++ = (cp_val & 0xff);
 217                 }
 218         }
 219
 220         *p = 0;
 221         return lbuf;
 222 }
 223
 224 /*******************************************************************
 225  Put an ASCII string into a UNICODE array (uint16's).
 226  ********************************************************************/
 227 void ascii_to_unistr(uint16 *dest, const char *src, int maxlen)
 228 {
 229         uint16 *destend = dest + maxlen;
 230         register char c;
 231
 232         while (dest < destend)
 233         {
 234                 c = *(src++);
 235                 if (c == 0)
 236                 {
 237                         break;
 238                 }
 239
 240                 *(dest++) = (uint16)c;
 241         }
 242
 243         *dest = 0;
 244 }
 245
 246
 247 /*******************************************************************
 248  Pull an ASCII string out of a UNICODE array (uint16's).
 249  ********************************************************************/
 250
 251 void unistr_to_ascii(char *dest, const uint16 *src, int len)
 252 {
 253         char *destend = dest + len;
 254         register uint16 c;
 255
 256         if (src == NULL)
 257         {
 258                 *dest = '\0';
 259                 return;
 260         }
 261
 262         /* normal code path for a valid 'src' */
 263         while (dest < destend)
 264         {
 265                 c = *(src++);
 266                 if (c == 0)
 267                 {
 268                         break;
 269                 }
 270
 271                 *(dest++) = (char)c;
 272         }
 273
 274         *dest = 0;
 275         return;
 276 }
 277
 278 /*******************************************************************
 279  Convert a UNISTR2 structure to an ASCII string
 280  Warning: this version does DOS codepage.
 281 ********************************************************************/
 282
 283 void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen)
 284 {
 285         char *p;
 286         uint16 *src;
 287         size_t len;
 288
 289         if (str == NULL) {
 290                 *dest='\0';
 291                 return;
 292         }
 293
 294         src = str->buffer;
 295         len = MIN(str->uni_str_len, maxlen);
 296
 297         if (len == 0) {
 298                 *dest='\0';
 299                 return;
 300         }
 301
 302         for (p = dest; (p-dest < len) && *src; src++) {
 303                 uint16 ucs2_val = SVAL(src,0);
 304                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 305
 306                 if (cp_val < 256)
 307                         *p++ = (char)cp_val;
 308                 else {
 309                         *p   = (cp_val >> 8) & 0xff;
 310                         *p++ = (cp_val & 0xff);
 311                 }
 312         }
 313
 314         *p = 0;
 315 }
 316
 317
 318 /*******************************************************************
 319 Return a number stored in a buffer
 320 ********************************************************************/
 321
 322 uint32 buffer2_to_uint32(BUFFER2 *str)
 323 {
 324         if (str->buf_len == 4)
 325                 return IVAL(str->buffer, 0);
 326         else
 327                 return 0;
 328 }
 329
 330 /*******************************************************************
 331 Return a DOS codepage version of a NOTunicode string
 332 ********************************************************************/
 333
 334 char *dos_buffer2_to_str(BUFFER2 *str)
 335 {
 336         char *lbuf = lbufs[nexti];
 337         char *p;
 338         uint16 *src = str->buffer;
 339         int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
 340
 341         nexti = (nexti+1)%8;
 342
 343         for (p = lbuf; (p-lbuf < max_size) && *src; src++) {
 344                 uint16 ucs2_val = SVAL(src,0);
 345                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 346
 347                 if (cp_val < 256)
 348                         *p++ = (char)cp_val;
 349                 else {
 350                         *p++ = (cp_val >> 8) & 0xff;
 351                         *p++ = (cp_val & 0xff);
 352                 }
 353         }
 354
 355         *p = 0;
 356         return lbuf;
 357 }
 358
 359 /*******************************************************************
 360  Return a dos codepage version of a NOTunicode string
 361 ********************************************************************/
 362
 363 char *dos_buffer2_to_multistr(BUFFER2 *str)
 364 {
 365         char *lbuf = lbufs[nexti];
 366         char *p;
 367         uint16 *src = str->buffer;
 368         int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
 369
 370         nexti = (nexti+1)%8;
 371
 372         for (p = lbuf; p-lbuf < max_size; src++) {
 373                 if (*src == 0) {
 374                         *p++ = ' ';
 375                 } else {
 376                         uint16 ucs2_val = SVAL(src,0);
 377                         uint16 cp_val = ucs2_to_doscp[ucs2_val];
 378
 379                         if (cp_val < 256)
 380                                 *p++ = (char)cp_val;
 381                         else {
 382                                 *p++ = (cp_val >> 8) & 0xff;
 383                                 *p++ = (cp_val & 0xff);
 384                         }
 385                 }
 386         }
 387
 388         *p = 0;
 389         return lbuf;
 390 }
 391
 392 /*******************************************************************
 393  Create a null-terminated unicode string from a null-terminated DOS
 394  codepage string.
 395  Return number of unicode chars copied, excluding the null character.
 396  Unicode strings created are in little-endian format.
 397 ********************************************************************/
 398
 399 size_t dos_struni2(char *dst, const char *src, size_t max_len)
 400 {
 401         size_t len = 0;
 402
 403         if (dst == NULL)
 404                 return 0;
 405
 406         if (src != NULL) {
 407                 for (; (len < max_len-2) && *src; len++, dst +=2) {
 408                         size_t skip = get_character_len(*src);
 409                         smb_ucs2_t val = (*src & 0xff);
 410
 411                         /*
 412                          * If this is a multibyte character (and all DOS/Windows
 413                          * codepages have at maximum 2 byte multibyte characters)
 414                          * then work out the index value for the unicode conversion.
 415                          */
 416
 417                         if (skip == 2)
 418                                 val = ((val << 8) | (src[1] & 0xff));
 419
 420                         SSVAL(dst,0,doscp_to_ucs2[val]);
 421                         if (skip)
 422                                 src += skip;
 423                         else
 424                                 src++;
 425                 }
 426         }
 427
 428         SSVAL(dst,0,0);
 429
 430         return len;
 431 }
 432
 433 /*******************************************************************
 434  Return a DOS codepage version of a little-endian unicode string.
 435  Hack alert: uses fixed buffer(s).
 436 ********************************************************************/
 437
 438 char *dos_unistr(char *buf)
 439 {
 440         char *lbuf = lbufs[nexti];
 441         uint16 *src = (uint16 *)buf;
 442         char *p;
 443
 444         nexti = (nexti+1)%8;
 445
 446         for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) {
 447                 uint16 ucs2_val = SVAL(src,0);
 448                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 449
 450                 if (cp_val < 256)
 451                         *p++ = (char)cp_val;
 452                 else {
 453                         *p++ = (cp_val >> 8) & 0xff;
 454                         *p++ = (cp_val & 0xff);
 455                 }
 456         }
 457
 458         *p = 0;
 459         return lbuf;
 460 }
 461
 462 /*******************************************************************
 463  Strcpy for unicode strings.  returns length (in num of wide chars)
 464 ********************************************************************/
 465
 466 int unistrcpy(char *dst, char *src)
 467 {
 468         int num_wchars = 0;
 469         uint16 *wsrc = (uint16 *)src;
 470         uint16 *wdst = (uint16 *)dst;
 471
 472         while (*wsrc) {
 473                 *wdst++ = *wsrc++;
 474                 num_wchars++;
 475         }
 476         *wdst = 0;
 477
 478         return num_wchars;
 479 }
 480
 481
 482
 483 /*******************************************************************
 484  Free any existing maps.
 485 ********************************************************************/
 486
 487 static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 488 {
 489         /* this handles identity mappings where we share the pointer */
 490         if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) {
 491                 *pp_ucs2_to_cp = NULL;
 492         }
 493
 494         if (*pp_cp_to_ucs2) {
 495                 free(*pp_cp_to_ucs2);
 496                 *pp_cp_to_ucs2 = NULL;
 497         }
 498
 499         if (*pp_ucs2_to_cp) {
 500                 free(*pp_ucs2_to_cp);
 501                 *pp_ucs2_to_cp = NULL;
 502         }
 503 }
 504
 505
 506 /*******************************************************************
 507  Build a default (null) codepage to unicode map.
 508 ********************************************************************/
 509
 510 void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 511 {
 512   int i;
 513
 514   free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
 515
 516   if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) {
 517     DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536));
 518     abort();
 519   }
 520
 521   *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */
 522   for (i = 0; i < 65536; i++)
 523     (*pp_cp_to_ucs2)[i] = i;
 524 }
 525
 526 /*******************************************************************
 527  Load a codepage to unicode and vica-versa map.
 528 ********************************************************************/
 529
 530 BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 531 {
 532   pstring unicode_map_file_name;
 533   FILE *fp = NULL;
 534   SMB_STRUCT_STAT st;
 535   smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2;
 536   uint16 *ucs2_to_cp = *pp_ucs2_to_cp;
 537   size_t cp_to_ucs2_size;
 538   size_t ucs2_to_cp_size;
 539   size_t i;
 540   size_t size;
 541   char buf[UNICODE_MAP_HEADER_SIZE];
 542
 543   DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage));
 544
 545   if (*codepage == '\0')
 546     goto clean_and_exit;
 547
 548   if(strlen(lp_codepagedir()) + 13 + strlen(codepage) >
 549      sizeof(unicode_map_file_name)) {
 550     DEBUG(0,("load_unicode_map: filename too long to load\n"));
 551     goto clean_and_exit;
 552   }
 553
 554   pstrcpy(unicode_map_file_name, lp_codepagedir());
 555   pstrcat(unicode_map_file_name, "/");
 556   pstrcat(unicode_map_file_name, "unicode_map.");
 557   pstrcat(unicode_map_file_name, codepage);
 558
 559   if(sys_stat(unicode_map_file_name,&st)!=0) {
 560     DEBUG(0,("load_unicode_map: filename %s does not exist.\n",
 561               unicode_map_file_name));
 562     goto clean_and_exit;
 563   }
 564
 565   size = st.st_size;
 566
 567   if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) {
 568     DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \
 569 unicode map file (size=%d).\n", unicode_map_file_name, (int)size));
 570     goto clean_and_exit;
 571   }
 572
 573   if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) {
 574     DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n",
 575               unicode_map_file_name, strerror(errno)));
 576     goto clean_and_exit;
 577   }
 578
 579   if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) {
 580     DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n",
 581               unicode_map_file_name, strerror(errno)));
 582     goto clean_and_exit;
 583   }
 584
 585   /* Check the version value */
 586   if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) {
 587     DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \
 588 Needed %hu, got %hu.\n",
 589           unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID,
 590           SVAL(buf,UNICODE_MAP_VERSION_OFFSET)));
 591     goto clean_and_exit;
 592   }
 593
 594   /* Check the codepage value */
 595   if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) {
 596     DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \
 597 requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage ));
 598     goto clean_and_exit;
 599   }
 600
 601   ucs2_to_cp_size = 2*65536;
 602   if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) {
 603     /*
 604      * This is a multibyte code page.
 605      */
 606     cp_to_ucs2_size = 2*65536;
 607   } else {
 608     /*
 609      * Single byte code page.
 610      */
 611     cp_to_ucs2_size = 2*256;
 612   }
 613
 614   /*
 615    * Free any old translation tables.
 616    */
 617
 618   free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
 619
 620   if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) {
 621     DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size ));
 622     goto clean_and_exit;
 623   }
 624
 625   if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) {
 626     DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size ));
 627     goto clean_and_exit;
 628   }
 629
 630   if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) {
 631     DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n",
 632               unicode_map_file_name, strerror(errno)));
 633     goto clean_and_exit;
 634   }
 635
 636   if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) {
 637     DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n",
 638               unicode_map_file_name, strerror(errno)));
 639     goto clean_and_exit;
 640   }
 641
 642   /*
 643    * Now ensure the 16 bit values are in the correct endianness.
 644    */
 645
 646   for (i = 0; i < cp_to_ucs2_size/2; i++)
 647     cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2);
 648
 649   for (i = 0; i < ucs2_to_cp_size/2; i++)
 650     ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2);
 651
 652   fclose(fp);
 653
 654   *pp_cp_to_ucs2 = cp_to_ucs2;
 655   *pp_ucs2_to_cp = ucs2_to_cp;
 656
 657   return True;
 658
 659 clean_and_exit:
 660
 661   /* pseudo destructor :-) */
 662
 663   if(fp != NULL)
 664     fclose(fp);
 665
 666   free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
 667
 668   default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp);
 669
 670   return False;
 671 }
 672
 673 /*******************************************************************
 674  Load a dos codepage to unicode and vica-versa map.
 675 ********************************************************************/
 676
 677 BOOL load_dos_unicode_map(int codepage)
 678 {
 679   fstring codepage_str;
 680
 681   slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage);
 682   return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp);
 683 }
 684
 685 /*******************************************************************
 686  Load a UNIX codepage to unicode and vica-versa map.
 687 ********************************************************************/
 688
 689 BOOL load_unix_unicode_map(const char *unix_char_set)
 690 {
 691   fstring upper_unix_char_set;
 692
 693   fstrcpy(upper_unix_char_set, unix_char_set);
 694   strupper(upper_unix_char_set);
 695   return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
 696 }
 697
 698 /*******************************************************************
 699  The following functions reproduce many of the non-UNICODE standard
 700  string functions in Samba.
 701 ********************************************************************/
 702
 703 /*******************************************************************
 704  Convert a UNICODE string to multibyte format. Note that the 'src' is in
 705  native byte order, not little endian. Always zero terminates.
 706  dst_len is in bytes.
 707 ********************************************************************/
 708
 709 static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src,
 710                                   size_t dst_len, const uint16 *ucs2_to_cp)
 711 {
 712         size_t dst_pos;
 713
 714         for(dst_pos = 0; (dst_pos < dst_len - 1) && *src;) {
 715                 smb_ucs2_t val = ucs2_to_cp[*src++];
 716                 if(val < 256) {
 717                         dst[dst_pos++] = (char)val;
 718                 } else {
 719
 720                         if(dst_pos >= dst_len - 2)
 721                                 break;
 722
 723                         /*
 724                          * A 2 byte value is always written as
 725                          * high/low into the buffer stream.
 726                          */
 727
 728                         dst[dst_pos++] = (char)((val >> 8) & 0xff);
 729                         dst[dst_pos++] = (char)(val & 0xff);
 730                 }
 731         }
 732
 733         dst[dst_pos] = '\0';
 734
 735         return dst;
 736 }
 737
 738 /*******************************************************************
 739  Convert a multibyte string to UNICODE format. Note that the 'dst' is in
 740  native byte order, not little endian. Always zero terminates.
 741  dst_len is in bytes.
 742 ********************************************************************/
 743
 744 smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src,
 745                                  size_t dst_len, smb_ucs2_t *cp_to_ucs2)
 746 {
 747         size_t i;
 748
 749         dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */
 750
 751         for(i = 0; (i < (dst_len  - 1)) && src[i];) {
 752                 size_t skip = skip_multibyte_char(*src);
 753                 smb_ucs2_t val = (*src & 0xff);
 754
 755                 /*
 756                  * If this is a multibyte character
 757                  * then work out the index value for the unicode conversion.
 758                  */
 759
 760                 if (skip == 2)
 761                         val = ((val << 8) | (src[1] & 0xff));
 762
 763                 dst[i++] = cp_to_ucs2[val];
 764                 if (skip)
 765                         src += skip;
 766                 else
 767                         src++;
 768         }
 769
 770         dst[i] = 0;
 771
 772         return dst;
 773 }
 774
 775 /*******************************************************************
 776  Convert a UNICODE string to multibyte format. Note that the 'src' is in
 777  native byte order, not little endian. Always zero terminates.
 778  This function may be replaced if the MB  codepage format is an
 779  encoded one (ie. utf8, hex). See the code in lib/kanji.c
 780  for details. dst_len is in bytes.
 781 ********************************************************************/
 782
 783 char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len)
 784 {
 785         return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp);
 786 }
 787
 788 /*******************************************************************
 789  Convert a UNIX string to UNICODE format. Note that the 'dst' is in
 790  native byte order, not little endian. Always zero terminates.
 791  This function may be replaced if the UNIX codepage format is a
 792  multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
 793  for details. dst_len is in bytes, not ucs2 units.
 794 ********************************************************************/
 795
 796 smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
 797 {
 798         return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2);
 799 }
 800
 801 /*******************************************************************
 802  Convert a UNICODE string to DOS format. Note that the 'src' is in
 803  native byte order, not little endian. Always zero terminates.
 804  dst_len is in bytes.
 805 ********************************************************************/
 806
 807 char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len)
 808 {
 809         return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp);
 810 }
 811
 812 /*******************************************************************
 813  Convert a single UNICODE character to DOS codepage. Returns the
 814  number of bytes in the DOS codepage character.
 815 ********************************************************************/
 816
 817 size_t unicode_to_dos_char(char *dst, const smb_ucs2_t src)
 818 {
 819         smb_ucs2_t val = ucs2_to_doscp[src];
 820         if(val < 256) {
 821                 *dst = (char)val;
 822                 return (size_t)1;
 823         }
 824         /*
 825          * A 2 byte value is always written as
 826          * high/low into the buffer stream.
 827          */
 828
 829         dst[0] = (char)((val >> 8) & 0xff);
 830         dst[1] = (char)(val & 0xff);
 831         return (size_t)2;
 832 }
 833
 834 /*******************************************************************
 835  Convert a DOS string to UNICODE format. Note that the 'dst' is in
 836  native byte order, not little endian. Always zero terminates.
 837  This function may be replaced if the DOS codepage format is a
 838  multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
 839  for details. dst_len is in bytes, not ucs2 units.
 840 ********************************************************************/
 841
 842 smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
 843 {
 844         return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2);
 845 }
 846
 847 /*******************************************************************
 848  Count the number of characters in a smb_ucs2_t string.
 849 ********************************************************************/
 850
 851 size_t strlen_w(const smb_ucs2_t *src)
 852 {
 853   size_t len;
 854
 855   for(len = 0; *src; len++)
 856     ;
 857
 858   return len;
 859 }
 860
 861 /*******************************************************************
 862  Safe wstring copy into a known length string. maxlength includes
 863  the terminating zero. maxlength is in ucs2 units.
 864 ********************************************************************/
 865
 866 smb_ucs2_t *safe_strcpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength)
 867 {
 868     size_t ucs2_len;
 869
 870     if (!dest) {
 871         DEBUG(0,("ERROR: NULL dest in safe_strcpy_w\n"));
 872         return NULL;
 873     }
 874
 875     if (!src) {
 876         *dest = 0;
 877         return dest;
 878     }
 879
 880         maxlength /= sizeof(smb_ucs2_t);
 881
 882         ucs2_len = strlen_w(src);
 883
 884     if (ucs2_len >= maxlength) {
 885                 fstring out;
 886         DEBUG(0,("ERROR: string overflow by %u bytes in safe_strcpy_w [%.50s]\n",
 887                         (unsigned int)((ucs2_len-maxlength)*sizeof(smb_ucs2_t)),
 888                         unicode_to_unix(out,src,sizeof(out))) );
 889                 ucs2_len = maxlength - 1;
 890     }
 891
 892     memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t));
 893     dest[ucs2_len] = 0;
 894     return dest;
 895 }
 896
 897 /*******************************************************************
 898  Safe string cat into a string. maxlength includes the terminating zero.
 899  maxlength is in ucs2 units.
 900 ********************************************************************/
 901
 902 smb_ucs2_t *safe_strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
 903 {
 904     size_t ucs2_src_len, ucs2_dest_len;
 905
 906     if (!dest) {
 907         DEBUG(0,("ERROR: NULL dest in safe_strcat_w\n"));
 908         return NULL;
 909     }
 910
 911     if (!src)
 912         return dest;
 913
 914     ucs2_src_len = strlen_w(src);
 915     ucs2_dest_len = strlen_w(dest);
 916
 917     if (ucs2_src_len + ucs2_dest_len >= maxlength) {
 918                 fstring out;
 919                 int new_len = maxlength - ucs2_dest_len - 1;
 920         DEBUG(0,("ERROR: string overflow by %u characters in safe_strcat_w [%.50s]\n",
 921                         (unsigned int)(sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len - maxlength)),
 922                         unicode_to_unix(out,src,sizeof(out))) );
 923         ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0);
 924     }
 925
 926     memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t));
 927     dest[ucs2_dest_len + ucs2_src_len] = 0;
 928     return dest;
 929 }
 930
 931 /*******************************************************************
 932  Compare the two strings s1 and s2.
 933 ********************************************************************/
 934
 935 int strcmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
 936 {
 937         smb_ucs2_t c1, c2;
 938
 939         for (;;) {
 940                 c1 = *s1++;
 941                 c2 = *s2++;
 942
 943                 if (c1 != c2)
 944                         return c1 - c2;
 945
 946                 if (c1 == 0)
 947                         break;
 948         }
 949         return 0;
 950 }
 951
 952 /*******************************************************************
 953  Compare the first n characters of s1 to s2. len is in ucs2 units.
 954 ********************************************************************/
 955
 956 int strncmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2, size_t len)
 957 {
 958         smb_ucs2_t c1, c2;
 959
 960         for (; len != 0; --len) {
 961                 c1 = *s1++;
 962                 c2 = *s2++;
 963
 964                 if (c1 != c2)
 965                         return c1 - c2;
 966
 967                 if (c1 == 0)
 968                         break;
 969
 970         }
 971         return 0;
 972 }
 973
 974 /*******************************************************************
 975  Search string s2 from s1.
 976 ********************************************************************/
 977
 978 smb_ucs2_t *strstr_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
 979 {
 980         size_t len = strlen_w(s2);
 981
 982         if (!*s2)
 983                 return (smb_ucs2_t *)s1;
 984
 985         for(;*s1; s1++) {
 986                 if (*s1 == *s2) {
 987                         if (strncmp_w(s1, s2, len) == 0)
 988                                 return (smb_ucs2_t *)s1;
 989                 }
 990         }
 991         return NULL;
 992 }
 993
 994 /*******************************************************************
 995  Search for ucs2 char c from the beginning of s.
 996 ********************************************************************/
 997
 998 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
 999 {
1000         do {
1001                 if (*s == c)
1002                         return (smb_ucs2_t *)s;
1003         } while (*s++);
1004
1005         return NULL;
1006 }
1007
1008 /*******************************************************************
1009  Search for ucs2 char c from the end of s.
1010 ********************************************************************/
1011
1012 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
1013 {
1014         smb_ucs2_t *retval = 0;
1015
1016         do {
1017                 if (*s == c)
1018                         retval = (smb_ucs2_t *)s;
1019         } while (*s++);
1020
1021         return retval;
1022 }
1023
1024 /*******************************************************************
1025  Search token from s1 separated by any ucs2 char of s2.
1026 ********************************************************************/
1027
1028 smb_ucs2_t *strtok_w(smb_ucs2_t *s1, const smb_ucs2_t *s2)
1029 {
1030         static smb_ucs2_t *s = NULL;
1031         smb_ucs2_t *q;
1032
1033         if (!s1) {
1034                 if (!s)
1035                         return NULL;
1036                 s1 = s;
1037         }
1038
1039         for (q = s1; *s1; s1++) {
1040                 smb_ucs2_t *p = strchr_w(s2, *s1);
1041                 if (p) {
1042                         if (s1 != q) {
1043                                 s = s1 + 1;
1044                                 *s1 = '\0';
1045                                 return q;
1046                         }
1047                         q = s1 + 1;
1048                 }
1049         }
1050
1051         s = NULL;
1052         if (*q)
1053                 return q;
1054
1055         return NULL;
1056 }
1057
1058 /*******************************************************************
1059  Duplicate a ucs2 string.
1060 ********************************************************************/
1061
1062 smb_ucs2_t *strdup_w(const smb_ucs2_t *s)
1063 {
1064         size_t newlen = (strlen_w(s)+1)*sizeof(smb_ucs2_t);
1065         smb_ucs2_t *newstr = (smb_ucs2_t *)malloc(newlen);
1066     if (newstr == NULL)
1067         return NULL;
1068     safe_strcpy_w(newstr, s, newlen);
1069     return newstr;
1070 }
1071
1072 /*******************************************************************
1073  Mapping tables for UNICODE character. Allows toupper/tolower and
1074  isXXX functions to work.
1075
1076  tridge: split into 2 pieces. This saves us 5/6 of the memory
1077  with a small speed penalty
1078  The magic constants are the lower/upper range of the tables two
1079  parts
1080 ********************************************************************/
1081
1082 typedef struct {
1083         smb_ucs2_t lower;
1084         smb_ucs2_t upper;
1085         unsigned char flags;
1086 } smb_unicode_table_t;
1087
1088 #define TABLE1_BOUNDARY 9450
1089 #define TABLE2_BOUNDARY 64256
1090
1091 static smb_unicode_table_t map_table1[] = {
1092 #include "unicode_map_table1.h"
1093 };
1094
1095 static smb_unicode_table_t map_table2[] = {
1096 #include "unicode_map_table2.h"
1097 };
1098
1099 static unsigned char map_table_flags(smb_ucs2_t v)
1100 {
1101         if (v < TABLE1_BOUNDARY) return map_table1[v].flags;
1102         if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].flags;
1103         return 0;
1104 }
1105
1106 static smb_ucs2_t map_table_lower(smb_ucs2_t v)
1107 {
1108         if (v < TABLE1_BOUNDARY) return map_table1[v].lower;
1109         if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].lower;
1110         return v;
1111 }
1112
1113 static smb_ucs2_t map_table_upper(smb_ucs2_t v)
1114 {
1115         if (v < TABLE1_BOUNDARY) return map_table1[v].upper;
1116         if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].upper;
1117         return v;
1118 }
1119
1120 /*******************************************************************
1121  Is an upper case wchar.
1122 ********************************************************************/
1123
1124 int isupper_w( smb_ucs2_t val)
1125 {
1126         return (map_table_flags(val) & UNI_UPPER);
1127 }
1128
1129 /*******************************************************************
1130  Is a lower case wchar.
1131 ********************************************************************/
1132
1133 int islower_w( smb_ucs2_t val)
1134 {
1135         return (map_table_flags(val) & UNI_LOWER);
1136 }
1137
1138 /*******************************************************************
1139  Is a digit wchar.
1140 ********************************************************************/
1141
1142 int isdigit_w( smb_ucs2_t val)
1143 {
1144         return (map_table_flags(val) & UNI_DIGIT);
1145 }
1146
1147 /*******************************************************************
1148  Is a hex digit wchar.
1149 ********************************************************************/
1150
1151 int isxdigit_w( smb_ucs2_t val)
1152 {
1153         return (map_table_flags(val) & UNI_XDIGIT);
1154 }
1155
1156 /*******************************************************************
1157  Is a space wchar.
1158 ********************************************************************/
1159
1160 int isspace_w( smb_ucs2_t val)
1161 {
1162         return (map_table_flags(val) & UNI_SPACE);
1163 }
1164
1165 /*******************************************************************
1166  Convert a wchar to upper case.
1167 ********************************************************************/
1168
1169 smb_ucs2_t toupper_w( smb_ucs2_t val )
1170 {
1171         return map_table_upper(val);
1172 }
1173
1174 /*******************************************************************
1175  Convert a wchar to lower case.
1176 ********************************************************************/
1177
1178 smb_ucs2_t tolower_w( smb_ucs2_t val )
1179 {
1180         return map_table_lower(val);
1181 }
1182
1183 static smb_ucs2_t *last_ptr = NULL;
1184
1185 void set_first_token_w(smb_ucs2_t *ptr)
1186 {
1187         last_ptr = ptr;
1188 }
1189
1190 /****************************************************************************
1191  Get the next token from a string, return False if none found
1192  handles double-quotes.
1193  Based on a routine by GJC@VILLAGE.COM.
1194  Extensively modified by Andrew.Tridgell@anu.edu.au
1195  bufsize is in bytes.
1196 ****************************************************************************/
1197
1198 static smb_ucs2_t sep_list[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t',  (smb_ucs2_t)'\n',  (smb_ucs2_t)'\r', 0};
1199 static smb_ucs2_t quotechar = (smb_ucs2_t)'\"';
1200
1201 BOOL next_token_w(smb_ucs2_t **ptr, smb_ucs2_t *buff, smb_ucs2_t *sep, size_t bufsize)
1202 {
1203         smb_ucs2_t *s;
1204         BOOL quoted;
1205         size_t len=1;
1206
1207         /*
1208          * Convert bufsize to smb_ucs2_t units.
1209          */
1210
1211         bufsize /= sizeof(smb_ucs2_t);
1212
1213         if (!ptr)
1214                 ptr = &last_ptr;
1215         if (!ptr)
1216                 return(False);
1217
1218         s = *ptr;
1219
1220         /*
1221          * Default to simple separators.
1222          */
1223
1224         if (!sep)
1225                 sep = sep_list;
1226
1227         /*
1228          * Find the first non sep char.
1229          */
1230
1231         while(*s && strchr_w(sep,*s))
1232                 s++;
1233
1234         /*
1235          * Nothing left ?
1236          */
1237
1238         if (!*s)
1239                 return(False);
1240
1241         /*
1242          * Copy over the token.
1243          */
1244
1245         for (quoted = False; len < bufsize && *s && (quoted || !strchr_w(sep,*s)); s++) {
1246                 if (*s == quotechar) {
1247                         quoted = !quoted;
1248                 } else {
1249                         len++;
1250                         *buff++ = *s;
1251                 }
1252         }
1253
1254         *ptr = (*s) ? s+1 : s;
1255         *buff = 0;
1256         last_ptr = *ptr;
1257
1258         return(True);
1259 }
1260
1261 /****************************************************************************
1262  Convert list of tokens to array; dependent on above routine.
1263  Uses last_ptr from above - bit of a hack.
1264 ****************************************************************************/
1265
1266 smb_ucs2_t **toktocliplist_w(int *ctok, smb_ucs2_t *sep)
1267 {
1268         smb_ucs2_t *s=last_ptr;
1269         int ictok=0;
1270         smb_ucs2_t **ret, **iret;
1271
1272         if (!sep)
1273                 sep = sep_list;
1274
1275         while(*s && strchr_w(sep,*s))
1276                 s++;
1277
1278         /*
1279          * Nothing left ?
1280          */
1281
1282         if (!*s)
1283                 return(NULL);
1284
1285         do {
1286                 ictok++;
1287                 while(*s && (!strchr_w(sep,*s)))
1288                         s++;
1289                 while(*s && strchr_w(sep,*s))
1290                         *s++=0;
1291         } while(*s);
1292
1293         *ctok = ictok;
1294         s = last_ptr;
1295
1296         if (!(ret=iret=malloc(ictok*sizeof(smb_ucs2_t *))))
1297                 return NULL;
1298
1299         while(ictok--) {
1300                 *iret++=s;
1301                 while(*s++)
1302                         ;
1303                 while(!*s)
1304                         s++;
1305         }
1306
1307         return ret;
1308 }
1309
1310 /*******************************************************************
1311  Case insensitive string compararison.
1312 ********************************************************************/
1313
1314 int StrCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t)
1315 {
1316         /*
1317          * Compare until we run out of string, either t or s, or find a difference.
1318          */
1319
1320         while (*s && *t && toupper_w(*s) == toupper_w(*t)) {
1321                 s++;
1322                 t++;
1323         }
1324
1325         return(toupper_w(*s) - toupper_w(*t));
1326 }
1327
1328 /*******************************************************************
1329  Case insensitive string compararison, length limited.
1330  n is in ucs2 units.
1331 ********************************************************************/
1332
1333 int StrnCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t, size_t n)
1334 {
1335         /*
1336          * Compare until we run out of string, either t or s, or chars.
1337          */
1338
1339         while (n && *s && *t && toupper_w(*s) == toupper_w(*t)) {
1340                 s++;
1341                 t++;
1342                 n--;
1343         }
1344
1345     /*
1346          * Not run out of chars - strings are different lengths.
1347          */
1348
1349     if (n)
1350       return(toupper_w(*s) - toupper_w(*t));
1351
1352     /*
1353          * Identical up to where we run out of chars,
1354          * and strings are same length.
1355          */
1356
1357         return(0);
1358 }
1359
1360 /*******************************************************************
1361  Compare 2 strings.
1362 ********************************************************************/
1363
1364 BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1365 {
1366         if (s1 == s2)
1367                 return(True);
1368         if (!s1 || !s2)
1369                 return(False);
1370
1371         return(StrCaseCmp_w(s1,s2)==0);
1372 }
1373
1374 /*******************************************************************
1375  Compare 2 strings up to and including the nth char. n is in ucs2
1376  units.
1377 ******************************************************************/
1378
1379 BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
1380 {
1381         if (s1 == s2)
1382                 return(True);
1383         if (!s1 || !s2 || !n)
1384                 return(False);
1385
1386         return(StrnCaseCmp_w(s1,s2,n)==0);
1387 }
1388
1389 /*******************************************************************
1390  Compare 2 strings (case sensitive).
1391 ********************************************************************/
1392
1393 BOOL strcsequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2)
1394 {
1395         if (s1 == s2)
1396                 return(True);
1397         if (!s1 || !s2)
1398                 return(False);
1399
1400         return(strcmp_w(s1,s2)==0);
1401 }
1402
1403 /*******************************************************************
1404  Convert a string to lower case.
1405 ********************************************************************/
1406
1407 void strlower_w(smb_ucs2_t *s)
1408 {
1409         while (*s) {
1410                 if (isupper_w(*s))
1411                         *s = tolower_w(*s);
1412                 s++;
1413         }
1414 }
1415
1416 /*******************************************************************
1417  Convert a string to upper case.
1418 ********************************************************************/
1419
1420 void strupper_w(smb_ucs2_t *s)
1421 {
1422         while (*s) {
1423                 if (islower_w(*s))
1424                         *s = toupper_w(*s);
1425                 s++;
1426         }
1427 }
1428
1429 /*******************************************************************
1430  Convert a string to "normal" form.
1431 ********************************************************************/
1432
1433 void strnorm_w(smb_ucs2_t *s)
1434 {
1435         extern int case_default;
1436         if (case_default == CASE_UPPER)
1437                 strupper_w(s);
1438         else
1439                 strlower_w(s);
1440 }
1441
1442 /*******************************************************************
1443  Check if a string is in "normal" case.
1444 ********************************************************************/
1445
1446 BOOL strisnormal_w(smb_ucs2_t *s)
1447 {
1448         extern int case_default;
1449         if (case_default == CASE_UPPER)
1450                 return(!strhaslower_w(s));
1451
1452         return(!strhasupper_w(s));
1453 }
1454
1455 /****************************************************************************
1456  String replace.
1457 ****************************************************************************/
1458
1459 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
1460 {
1461         while (*s) {
1462                 if (oldc == *s)
1463                         *s = newc;
1464                 s++;
1465         }
1466 }
1467
1468 /*******************************************************************
1469  Skip past some strings in a buffer. n is in bytes.
1470 ********************************************************************/
1471
1472 smb_ucs2_t *skip_string_w(smb_ucs2_t *buf,size_t n)
1473 {
1474         while (n--)
1475                 buf += (strlen_w(buf)*sizeof(smb_ucs2_t)) + 1;
1476         return(buf);
1477 }
1478
1479 /*******************************************************************
1480  Count the number of characters in a string. Same as strlen_w in
1481  smb_ucs2_t string units.
1482 ********************************************************************/
1483
1484 size_t str_charnum_w(const smb_ucs2_t *s)
1485 {
1486         return strlen_w(s);
1487 }
1488
1489 /*******************************************************************
1490  Trim the specified elements off the front and back of a string.
1491 ********************************************************************/
1492
1493 BOOL trim_string_w(smb_ucs2_t *s,const smb_ucs2_t *front,const smb_ucs2_t *back)
1494 {
1495         BOOL ret = False;
1496         size_t front_len = (front && *front) ? strlen_w(front) : 0;
1497         size_t back_len = (back && *back) ? strlen_w(back) : 0;
1498         size_t s_len;
1499
1500         while (front_len && strncmp_w(s, front, front_len) == 0) {
1501                 smb_ucs2_t *p = s;
1502                 ret = True;
1503
1504                 while (1) {
1505                         if (!(*p = p[front_len]))
1506                                 break;
1507                         p++;
1508                 }
1509         }
1510
1511         if(back_len) {
1512                 s_len = strlen_w(s);
1513                 while ((s_len >= back_len) &&
1514                         (strncmp_w(s + s_len - back_len, back, back_len)==0)) {
1515                         ret = True;
1516                         s[s_len - back_len] = 0;
1517                         s_len = strlen_w(s);
1518                 }
1519         }
1520
1521         return(ret);
1522 }
1523
1524 /****************************************************************************
1525  Does a string have any uppercase chars in it ?
1526 ****************************************************************************/
1527
1528 BOOL strhasupper_w(const smb_ucs2_t *s)
1529 {
1530         while (*s) {
1531                 if (isupper_w(*s))
1532                         return(True);
1533                 s++;
1534         }
1535         return(False);
1536 }
1537
1538 /****************************************************************************
1539  Does a string have any lowercase chars in it ?
1540 ****************************************************************************/
1541
1542 BOOL strhaslower_w(const smb_ucs2_t *s)
1543 {
1544         while (*s) {
1545                 if (islower(*s))
1546                         return(True);
1547                 s++;
1548         }
1549         return(False);
1550 }
1551
1552 /****************************************************************************
1553  Find the number of 'c' chars in a string.
1554 ****************************************************************************/
1555
1556 size_t count_chars_w(const smb_ucs2_t *s,smb_ucs2_t c)
1557 {
1558         size_t count=0;
1559
1560         while (*s) {
1561                 if (*s == c)
1562                         count++;
1563                 s++;
1564         }
1565         return(count);
1566 }
1567
1568 /*******************************************************************
1569  Return True if a string consists only of one particular character.
1570 ********************************************************************/
1571
1572 BOOL str_is_all_w(const smb_ucs2_t *s,smb_ucs2_t c)
1573 {
1574         if(s == NULL)
1575                 return False;
1576         if(!*s)
1577                 return False;
1578
1579         while (*s) {
1580                 if (*s != c)
1581                         return False;
1582                 s++;
1583         }
1584         return True;
1585 }
1586
1587 /*******************************************************************
1588  Paranoid strcpy into a buffer of given length (includes terminating
1589  zero. Strips out all but 'a-Z0-9' and replaces with '_'. Deliberately
1590  does *NOT* check for multibyte characters. Don't change it !
1591  maxlength is in ucs2 units.
1592 ********************************************************************/
1593
1594 smb_ucs2_t *alpha_strcpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
1595 {
1596         size_t len, i;
1597
1598         if (!dest) {
1599                 DEBUG(0,("ERROR: NULL dest in alpha_strcpy_w\n"));
1600                 return NULL;
1601         }
1602
1603         if (!src) {
1604                 *dest = 0;
1605                 return dest;
1606         }
1607
1608         len = strlen_w(src);
1609         if (len >= maxlength)
1610                 len = maxlength - 1;
1611
1612         for(i = 0; i < len; i++) {
1613                 smb_ucs2_t val = src[i];
1614                 if(isupper_w(val) ||islower_w(val) || isdigit_w(val))
1615                         dest[i] = src[i];
1616                 else
1617                         dest[i] = (smb_ucs2_t)'_';
1618         }
1619
1620         dest[i] = 0;
1621
1622         return dest;
1623 }
1624
1625 /****************************************************************************
1626  Like strncpy but always null terminates. Make sure there is room !
1627  The variable n should always be one less than the available size and is in
1628  ucs2 units.
1629 ****************************************************************************/
1630
1631 smb_ucs2_t *StrnCpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src,size_t n)
1632 {
1633         smb_ucs2_t *d = dest;
1634         if (!dest)
1635                 return(NULL);
1636         if (!src) {
1637                 *dest = 0;
1638                 return(dest);
1639         }
1640
1641         while (n-- && (*d++ = *src++))
1642                 ;
1643         *d = 0;
1644         return(dest);
1645 }
1646
1647 /****************************************************************************
1648  Like strncpy but copies up to the character marker. Always null terminates.
1649  returns a pointer to the character marker in the source string (src).
1650  n is in ucs2 units.
1651 ****************************************************************************/
1652
1653 smb_ucs2_t *strncpyn_w(smb_ucs2_t *dest, const smb_ucs2_t *src,size_t n, smb_ucs2_t c)
1654 {
1655         smb_ucs2_t *p;
1656         size_t str_len;
1657
1658         p = strchr_w(src, c);
1659         if (p == NULL) {
1660                 fstring cval;
1661                 smb_ucs2_t mbcval[2];
1662                 mbcval[0] = c;
1663                 mbcval[1] = 0;
1664                 DEBUG(5, ("strncpyn_w: separator character (%s) not found\n",
1665                         unicode_to_unix(cval,mbcval,sizeof(cval)) ));
1666                 return NULL;
1667         }
1668
1669         str_len = PTR_DIFF(p, src) + 1;
1670         safe_strcpy_w(dest, src, MIN(n, str_len));
1671
1672         return p;
1673 }
1674
1675 /*************************************************************
1676  Routine to get hex characters and turn them into a 16 byte array.
1677  The array can be variable length, and any non-hex-numeric
1678  characters are skipped.  "0xnn" or "0Xnn" is specially catered
1679  for. len is in bytes.
1680  Valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
1681 **************************************************************/
1682
1683 static smb_ucs2_t hexprefix[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'x', 0 };
1684 static smb_ucs2_t hexchars[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'1', (smb_ucs2_t)'2', (smb_ucs2_t)'3',
1685                                                                 (smb_ucs2_t)'4', (smb_ucs2_t)'5', (smb_ucs2_t)'6', (smb_ucs2_t)'7',
1686                                                                 (smb_ucs2_t)'8', (smb_ucs2_t)'9', (smb_ucs2_t)'A', (smb_ucs2_t)'B',
1687                                                                 (smb_ucs2_t)'C', (smb_ucs2_t)'D', (smb_ucs2_t)'E', (smb_ucs2_t)'F', 0 };
1688
1689 size_t strhex_to_str_w(char *p, size_t len, const smb_ucs2_t *strhex)
1690 {
1691         size_t i;
1692         size_t num_chars = 0;
1693         unsigned char   lonybble, hinybble;
1694         smb_ucs2_t *p1 = NULL, *p2 = NULL;
1695
1696         /*
1697          * Convert to smb_ucs2_t units.
1698          */
1699
1700         len /= sizeof(smb_ucs2_t);
1701
1702         for (i = 0; i < len && strhex[i] != 0; i++) {
1703                 if (strnequal_w(hexchars, hexprefix, 2)) {
1704                         i++; /* skip two chars */
1705                         continue;
1706                 }
1707
1708                 if (!(p1 = strchr_w(hexchars, toupper_w(strhex[i]))))
1709                         break;
1710
1711                 i++; /* next hex digit */
1712
1713                 if (!(p2 = strchr_w(hexchars, toupper_w(strhex[i]))))
1714                         break;
1715
1716                 /* get the two nybbles */
1717                 hinybble = (PTR_DIFF(p1, hexchars)/sizeof(smb_ucs2_t));
1718                 lonybble = (PTR_DIFF(p2, hexchars)/sizeof(smb_ucs2_t));
1719
1720                 p[num_chars] = (hinybble << 4) | lonybble;
1721                 num_chars++;
1722
1723                 p1 = NULL;
1724                 p2 = NULL;
1725         }
1726         return num_chars;
1727 }
1728
1729 /****************************************************************************
1730  Check if a string is part of a list.
1731 ****************************************************************************/
1732
1733 BOOL in_list_w(smb_ucs2_t *s,smb_ucs2_t *list,BOOL casesensitive)
1734 {
1735         wpstring tok;
1736         smb_ucs2_t *p=list;
1737
1738         if (!list)
1739                 return(False);
1740
1741         while (next_token_w(&p,tok,LIST_SEP_W,sizeof(tok))) {
1742                 if (casesensitive) {
1743                         if (strcmp_w(tok,s) == 0)
1744                                 return(True);
1745                 } else {
1746                         if (StrCaseCmp_w(tok,s) == 0)
1747                                 return(True);
1748                 }
1749         }
1750         return(False);
1751 }
1752
1753 /* This is used to prevent lots of mallocs of size 2 */
1754 static smb_ucs2_t *null_string = NULL;
1755
1756 /****************************************************************************
1757  Set a string value, allocing the space for the string.
1758 ****************************************************************************/
1759
1760 BOOL string_init_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1761 {
1762         size_t l;
1763
1764         if (!null_string) {
1765                 if((null_string = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t))) == NULL) {
1766                         DEBUG(0,("string_init_w: malloc fail for null_string.\n"));
1767                 return False;
1768                 }
1769                 *null_string = 0;
1770         }
1771
1772         if (!src)
1773                 src = null_string;
1774
1775         l = strlen_w(src);
1776
1777         if (l == 0)
1778                 *dest = null_string;
1779         else {
1780                 (*dest) = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t)*(l+1));
1781                 if ((*dest) == NULL) {
1782                         DEBUG(0,("Out of memory in string_init_w\n"));
1783                         return False;
1784                 }
1785
1786                 wpstrcpy(*dest,src);
1787         }
1788         return(True);
1789 }
1790
1791 /****************************************************************************
1792  Free a string value.
1793 ****************************************************************************/
1794
1795 void string_free_w(smb_ucs2_t **s)
1796 {
1797         if (!s || !(*s))
1798                 return;
1799         if (*s == null_string)
1800                 *s = NULL;
1801         if (*s)
1802                 free((char *)*s);
1803         *s = NULL;
1804 }
1805
1806 /****************************************************************************
1807  Set a string value, allocing the space for the string, and deallocating any
1808  existing space.
1809 ****************************************************************************/
1810
1811 BOOL string_set_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1812 {
1813         string_free_w(dest);
1814
1815         return(string_init_w(dest,src));
1816 }
1817
1818 /****************************************************************************
1819  Substitute a string for a pattern in another string. Make sure there is
1820  enough room !
1821
1822  This routine looks for pattern in s and replaces it with
1823  insert. It may do multiple replacements.
1824
1825  Any of " ; ' $ or ` in the insert string are replaced with _
1826  if len==0 then no length check is performed
1827  len is in ucs2 units.
1828 ****************************************************************************/
1829
1830 void string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1831 {
1832         smb_ucs2_t *p;
1833         ssize_t ls,lp,li, i;
1834
1835         if (!insert || !pattern || !s)
1836                 return;
1837
1838         ls = (ssize_t)strlen_w(s);
1839         lp = (ssize_t)strlen_w(pattern);
1840         li = (ssize_t)strlen_w(insert);
1841
1842         if (!*pattern)
1843                 return;
1844
1845         while (lp <= ls && (p = strstr_w(s,pattern))) {
1846                 if (len && (ls + (li-lp) >= len)) {
1847                         fstring out;
1848                         DEBUG(0,("ERROR: string overflow by %d in string_sub_w(%.50s, %d)\n",
1849                                  (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1850                                  unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1851                         break;
1852                 }
1853                 if (li != lp)
1854                         memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1855
1856                 for (i=0;i<li;i++) {
1857                         switch (insert[i]) {
1858                         case (smb_ucs2_t)'`':
1859                         case (smb_ucs2_t)'"':
1860                         case (smb_ucs2_t)'\'':
1861                         case (smb_ucs2_t)';':
1862                         case (smb_ucs2_t)'$':
1863                         case (smb_ucs2_t)'%':
1864                         case (smb_ucs2_t)'\r':
1865                         case (smb_ucs2_t)'\n':
1866                                 p[i] = (smb_ucs2_t)'_';
1867                                 break;
1868                         default:
1869                                 p[i] = insert[i];
1870                         }
1871                 }
1872                 s = p + li;
1873                 ls += (li-lp);
1874         }
1875 }
1876
1877 void fstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert)
1878 {
1879         string_sub_w(s, pattern, insert, sizeof(wfstring));
1880 }
1881
1882 void pstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,smb_ucs2_t *insert)
1883 {
1884         string_sub_w(s, pattern, insert, sizeof(wpstring));
1885 }
1886
1887 /****************************************************************************
1888  Similar to string_sub() but allows for any character to be substituted.
1889  Use with caution !
1890  if len==0 then no length check is performed.
1891 ****************************************************************************/
1892
1893 void all_string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1894 {
1895         smb_ucs2_t *p;
1896         ssize_t ls,lp,li;
1897
1898         if (!insert || !pattern || !s)
1899                 return;
1900
1901         ls = (ssize_t)strlen_w(s);
1902         lp = (ssize_t)strlen_w(pattern);
1903         li = (ssize_t)strlen_w(insert);
1904
1905         if (!*pattern)
1906                 return;
1907
1908         while (lp <= ls && (p = strstr_w(s,pattern))) {
1909                 if (len && (ls + (li-lp) >= len)) {
1910                         fstring out;
1911                         DEBUG(0,("ERROR: string overflow by %d in all_string_sub_w(%.50s, %d)\n",
1912                                  (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1913                                  unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1914                         break;
1915                 }
1916                 if (li != lp)
1917                         memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1918
1919                 memcpy(p, insert, li*sizeof(smb_ucs2_t));
1920                 s = p + li;
1921                 ls += (li-lp);
1922         }
1923 }
1924
1925 /****************************************************************************
1926  Splits out the front and back at a separator.
1927 ****************************************************************************/
1928
1929 void split_at_last_component_w(smb_ucs2_t *path, smb_ucs2_t *front, smb_ucs2_t sep, smb_ucs2_t *back)
1930 {
1931     smb_ucs2_t *p = strrchr_w(path, sep);
1932
1933         if (p != NULL)
1934                 *p = 0;
1935
1936         if (front != NULL)
1937                 wpstrcpy(front, path);
1938
1939         if (p != NULL) {
1940                 if (back != NULL)
1941                         wpstrcpy(back, p+1);
1942                 *p = (smb_ucs2_t)'\\';
1943         } else {
1944                 if (back != NULL)
1945                         back[0] = 0;
1946         }
1947 }
1948
1949
1950 /****************************************************************************
1951  Write an octal as a string.
1952 ****************************************************************************/
1953
1954 smb_ucs2_t *octal_string_w(int i)
1955 {
1956         static smb_ucs2_t wret[64];
1957         char ret[64];
1958
1959         if (i == -1)
1960                 slprintf(ret, sizeof(ret), "-1");
1961         else
1962                 slprintf(ret, sizeof(ret), "0%o", i);
1963         return unix_to_unicode(wret, ret, sizeof(wret));
1964 }
1965
1966
1967 /****************************************************************************
1968  Truncate a string at a specified length.
1969  length is in ucs2 units.
1970 ****************************************************************************/
1971
1972 smb_ucs2_t *string_truncate_w(smb_ucs2_t *s, size_t length)
1973 {
1974         if (s && strlen_w(s) > length)
1975                 s[length] = 0;
1976
1977         return s;
1978 }
1979
1980 /******************************************************************
1981  functions for UTF8 support (using in kanji.c)
1982  ******************************************************************/
1983 smb_ucs2_t doscp2ucs2(int w)
1984 {
1985   return ((smb_ucs2_t)doscp_to_ucs2[w]);
1986 }
1987
1988 int ucs2doscp(smb_ucs2_t w)
1989 {
1990   return ((int)ucs2_to_doscp[w]);
1991 }
1992