source3/lib/util_unistr.c

   1 /*
   2    Unix SMB/Netbios implementation.
   3    Version 1.9.
   4    Samba utility functions
   5    Copyright (C) Andrew Tridgell 1992-1998
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 */
  21
  22 #include "includes.h"
  23
  24 extern int DEBUGLEVEL;
  25
  26  smb_ucs2_t wchar_list_sep[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)',',
  27                                                                 (smb_ucs2_t)';', (smb_ucs2_t)':', (smb_ucs2_t)'\n',
  28                                                                 (smb_ucs2_t)'\r', 0 };
  29 /*
  30  * The following are the codepage to ucs2 and vica versa maps.
  31  * These are dynamically loaded from a unicode translation file.
  32  */
  33
  34 static smb_ucs2_t *doscp_to_ucs2;
  35 static uint16 *ucs2_to_doscp;
  36
  37 static smb_ucs2_t *unixcp_to_ucs2;
  38 static uint16 *ucs2_to_unixcp;
  39
  40 #ifndef MAXUNI
  41 #define MAXUNI 1024
  42 #endif
  43
  44 /*******************************************************************
  45  Write a string in (little-endian) unicode format. src is in
  46  the current DOS codepage. len is the length in bytes of the
  47  string pointed to by dst.
  48
  49  if null_terminate is True then null terminate the packet (adds 2 bytes)
  50
  51  the return value is the length in bytes consumed by the string, including the
  52  null termination if applied
  53 ********************************************************************/
  54
  55 size_t dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
  56 {
  57         size_t ret = 0;
  58         while (*src && (len >= 2)) {
  59                 size_t skip = get_character_len(*src);
  60                 smb_ucs2_t val = (*src & 0xff);
  61
  62                 /*
  63                  * If this is a multibyte character (and all DOS/Windows
  64                  * codepages have at maximum 2 byte multibyte characters)
  65                  * then work out the index value for the unicode conversion.
  66                  */
  67
  68                 if (skip == 2)
  69                         val = ((val << 8) | (src[1] & 0xff));
  70
  71                 SSVAL(dst,ret,doscp_to_ucs2[val]);
  72                 ret += 2;
  73                 len -= 2;
  74                 if (skip)
  75                         src += skip;
  76                 else
  77                         src++;
  78         }
  79         if (null_terminate) {
  80                 SSVAL(dst,ret,0);
  81                 ret += 2;
  82         }
  83         return(ret);
  84 }
  85
  86 /*******************************************************************
  87  Pull a DOS codepage string out of a UNICODE array. len is in bytes.
  88 ********************************************************************/
  89
  90 void unistr_to_dos(char *dest, const char *src, size_t len)
  91 {
  92         char *destend = dest + len;
  93
  94         while (dest < destend) {
  95                 uint16 ucs2_val = SVAL(src,0);
  96                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
  97
  98                 src += 2;
  99
 100                 if (ucs2_val == 0)
 101                         break;
 102
 103                 if (cp_val < 256)
 104                         *dest++ = (char)cp_val;
 105                 else {
 106                         *dest++ = (cp_val >> 8) & 0xff;
 107                         *dest++ = (cp_val & 0xff);
 108                 }
 109         }
 110
 111         *dest = 0;
 112 }
 113
 114 /*******************************************************************
 115  Skip past a unicode string, but not more than len. Always move
 116  past a terminating zero if found.
 117 ********************************************************************/
 118
 119 char *skip_unibuf(char *src, size_t len)
 120 {
 121     char *srcend = src + len;
 122
 123     while (src < srcend && SVAL(src,0))
 124         src += 2;
 125
 126     if(!SVAL(src,0))
 127         src += 2;
 128
 129     return src;
 130 }
 131
 132 /*******************************************************************
 133  Return a DOS codepage version of a little-endian unicode string.
 134  len is the filename length (ignoring any terminating zero) in uin16
 135  units. Always null terminates.
 136  Hack alert: uses fixed buffer(s).
 137 ********************************************************************/
 138
 139 char *dos_unistrn2(uint16 *src, int len)
 140 {
 141         static char lbufs[8][MAXUNI];
 142         static int nexti;
 143         char *lbuf = lbufs[nexti];
 144         char *p;
 145
 146         nexti = (nexti+1)%8;
 147
 148         for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) {
 149                 uint16 ucs2_val = SVAL(src,0);
 150                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 151
 152                 if (cp_val < 256)
 153                         *p++ = (char)cp_val;
 154                 else {
 155                         *p++ = (cp_val >> 8) & 0xff;
 156                         *p++ = (cp_val & 0xff);
 157                 }
 158         }
 159
 160         *p = 0;
 161         return lbuf;
 162 }
 163
 164 static char lbufs[8][MAXUNI];
 165 static int nexti;
 166
 167 /*******************************************************************
 168  Return a DOS codepage version of a little-endian unicode string.
 169  Hack alert: uses fixed buffer(s).
 170 ********************************************************************/
 171
 172 char *dos_unistr2(uint16 *src)
 173 {
 174         char *lbuf = lbufs[nexti];
 175         char *p;
 176
 177         nexti = (nexti+1)%8;
 178
 179         for (p = lbuf; *src && (p-lbuf < MAXUNI-3); src++) {
 180                 uint16 ucs2_val = SVAL(src,0);
 181                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 182
 183                 if (cp_val < 256)
 184                         *p++ = (char)cp_val;
 185                 else {
 186                         *p++ = (cp_val >> 8) & 0xff;
 187                         *p++ = (cp_val & 0xff);
 188                 }
 189         }
 190
 191         *p = 0;
 192         return lbuf;
 193 }
 194
 195 /*******************************************************************
 196 Return a DOS codepage version of a little-endian unicode string
 197 ********************************************************************/
 198
 199 char *dos_unistr2_to_str(UNISTR2 *str)
 200 {
 201         char *lbuf = lbufs[nexti];
 202         char *p;
 203         uint16 *src = str->buffer;
 204         int max_size = MIN(sizeof(str->buffer)-3, str->uni_str_len);
 205
 206         nexti = (nexti+1)%8;
 207
 208         for (p = lbuf; *src && p-lbuf < max_size; src++) {
 209                 uint16 ucs2_val = SVAL(src,0);
 210                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 211
 212                 if (cp_val < 256)
 213                         *p++ = (char)cp_val;
 214                 else {
 215                         *p++ = (cp_val >> 8) & 0xff;
 216                         *p++ = (cp_val & 0xff);
 217                 }
 218         }
 219
 220         *p = 0;
 221         return lbuf;
 222 }
 223
 224 /*******************************************************************
 225  Convert a UNISTR2 structure to an ASCII string
 226  Warning: this version does DOS codepage.
 227 ********************************************************************/
 228
 229 void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen)
 230 {
 231         char *destend;
 232         const uint16 *src;
 233         size_t len;
 234         register uint16 c;
 235
 236         src = str->buffer;
 237         len = MIN(str->uni_str_len, maxlen);
 238         destend = dest + len;
 239
 240         while (dest < destend)
 241         {
 242                 uint16 ucs2_val;
 243                 uint16 cp_val;
 244
 245                 c = *src;
 246                 if (c == 0)
 247                 {
 248                         break;
 249                 }
 250
 251                 ucs2_val = SVAL(src++,0);
 252                 cp_val = ucs2_to_doscp[ucs2_val];
 253
 254                 if (cp_val < 256)
 255                         *(dest++) = (char)cp_val;
 256                 else {
 257                         *dest= (cp_val >> 8) & 0xff;
 258                         *(dest++) = (cp_val & 0xff);
 259                 }
 260         }
 261
 262         *dest = 0;
 263 }
 264
 265
 266 /*******************************************************************
 267 Return a number stored in a buffer
 268 ********************************************************************/
 269
 270 uint32 buffer2_to_uint32(BUFFER2 *str)
 271 {
 272         if (str->buf_len == 4)
 273                 return IVAL(str->buffer, 0);
 274         else
 275                 return 0;
 276 }
 277
 278 /*******************************************************************
 279 Return a DOS codepage version of a NOTunicode string
 280 ********************************************************************/
 281
 282 char *dos_buffer2_to_str(BUFFER2 *str)
 283 {
 284         char *lbuf = lbufs[nexti];
 285         char *p;
 286         uint16 *src = str->buffer;
 287         int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
 288
 289         nexti = (nexti+1)%8;
 290
 291         for (p = lbuf; *src && p-lbuf < max_size; src++) {
 292                 uint16 ucs2_val = SVAL(src,0);
 293                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 294
 295                 if (cp_val < 256)
 296                         *p++ = (char)cp_val;
 297                 else {
 298                         *p++ = (cp_val >> 8) & 0xff;
 299                         *p++ = (cp_val & 0xff);
 300                 }
 301         }
 302
 303         *p = 0;
 304         return lbuf;
 305 }
 306
 307 /*******************************************************************
 308  Return a dos codepage version of a NOTunicode string
 309 ********************************************************************/
 310
 311 char *dos_buffer2_to_multistr(BUFFER2 *str)
 312 {
 313         char *lbuf = lbufs[nexti];
 314         char *p;
 315         uint16 *src = str->buffer;
 316         int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
 317
 318         nexti = (nexti+1)%8;
 319
 320         for (p = lbuf; p-lbuf < max_size; src++) {
 321                 if (*src == 0) {
 322                         *p++ = ' ';
 323                 } else {
 324                         uint16 ucs2_val = SVAL(src,0);
 325                         uint16 cp_val = ucs2_to_doscp[ucs2_val];
 326
 327                         if (cp_val < 256)
 328                                 *p++ = (char)cp_val;
 329                         else {
 330                                 *p++ = (cp_val >> 8) & 0xff;
 331                                 *p++ = (cp_val & 0xff);
 332                         }
 333                 }
 334         }
 335
 336         *p = 0;
 337         return lbuf;
 338 }
 339
 340 /*******************************************************************
 341  Create a null-terminated unicode string from a null-terminated DOS
 342  codepage string.
 343  Return number of unicode chars copied, excluding the null character.
 344  Unicode strings created are in little-endian format.
 345 ********************************************************************/
 346
 347 size_t dos_struni2(char *dst, const char *src, size_t max_len)
 348 {
 349         size_t len = 0;
 350
 351         if (dst == NULL)
 352                 return 0;
 353
 354         if (src != NULL) {
 355                 for (; *src && len < max_len-2; len++, dst +=2) {
 356                         size_t skip = get_character_len(*src);
 357                         smb_ucs2_t val = (*src & 0xff);
 358
 359                         /*
 360                          * If this is a multibyte character (and all DOS/Windows
 361                          * codepages have at maximum 2 byte multibyte characters)
 362                          * then work out the index value for the unicode conversion.
 363                          */
 364
 365                         if (skip == 2)
 366                                 val = ((val << 8) | (src[1] & 0xff));
 367
 368                         SSVAL(dst,0,doscp_to_ucs2[val]);
 369                         if (skip)
 370                                 src += skip;
 371                         else
 372                                 src++;
 373                 }
 374         }
 375
 376         SSVAL(dst,0,0);
 377
 378         return len;
 379 }
 380
 381 /*******************************************************************
 382  Return a DOS codepage version of a little-endian unicode string.
 383  Hack alert: uses fixed buffer(s).
 384 ********************************************************************/
 385
 386 char *dos_unistr(char *buf)
 387 {
 388         char *lbuf = lbufs[nexti];
 389         uint16 *src = (uint16 *)buf;
 390         char *p;
 391
 392         nexti = (nexti+1)%8;
 393
 394         for (p = lbuf; *src && p-lbuf < MAXUNI-3; src++) {
 395                 uint16 ucs2_val = SVAL(src,0);
 396                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 397
 398                 if (cp_val < 256)
 399                         *p++ = (char)cp_val;
 400                 else {
 401                         *p++ = (cp_val >> 8) & 0xff;
 402                         *p++ = (cp_val & 0xff);
 403                 }
 404         }
 405
 406         *p = 0;
 407         return lbuf;
 408 }
 409
 410 /*******************************************************************
 411  Strcpy for unicode strings.  returns length (in num of wide chars)
 412 ********************************************************************/
 413
 414 int unistrcpy(char *dst, char *src)
 415 {
 416         int num_wchars = 0;
 417         uint16 *wsrc = (uint16 *)src;
 418         uint16 *wdst = (uint16 *)dst;
 419
 420         while (*wsrc) {
 421                 *wdst++ = *wsrc++;
 422                 num_wchars++;
 423         }
 424         *wdst = 0;
 425
 426         return num_wchars;
 427 }
 428
 429
 430
 431 /*******************************************************************
 432  Free any existing maps.
 433 ********************************************************************/
 434
 435 static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 436 {
 437         /* this handles identity mappings where we share the pointer */
 438         if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) {
 439                 *pp_ucs2_to_cp = NULL;
 440         }
 441
 442         if (*pp_cp_to_ucs2) {
 443                 free(*pp_cp_to_ucs2);
 444                 *pp_cp_to_ucs2 = NULL;
 445         }
 446
 447         if (*pp_ucs2_to_cp) {
 448                 free(*pp_ucs2_to_cp);
 449                 *pp_ucs2_to_cp = NULL;
 450         }
 451 }
 452
 453
 454 /*******************************************************************
 455  Build a default (null) codepage to unicode map.
 456 ********************************************************************/
 457
 458 void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 459 {
 460   int i;
 461
 462   free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
 463
 464   if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) {
 465     DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536));
 466     abort();
 467   }
 468
 469   *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */
 470   for (i = 0; i < 65536; i++)
 471     (*pp_cp_to_ucs2)[i] = i;
 472 }
 473
 474 /*******************************************************************
 475  Load a codepage to unicode and vica-versa map.
 476 ********************************************************************/
 477
 478 BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 479 {
 480   pstring unicode_map_file_name;
 481   FILE *fp = NULL;
 482   SMB_STRUCT_STAT st;
 483   smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2;
 484   uint16 *ucs2_to_cp = *pp_ucs2_to_cp;
 485   size_t cp_to_ucs2_size;
 486   size_t ucs2_to_cp_size;
 487   size_t i;
 488   size_t size;
 489   char buf[UNICODE_MAP_HEADER_SIZE];
 490
 491   DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage));
 492
 493   if (*codepage == '\0')
 494     goto clean_and_exit;
 495
 496   if(strlen(CODEPAGEDIR) + 13 + strlen(codepage) > sizeof(unicode_map_file_name)) {
 497     DEBUG(0,("load_unicode_map: filename too long to load\n"));
 498     goto clean_and_exit;
 499   }
 500
 501   pstrcpy(unicode_map_file_name, CODEPAGEDIR);
 502   pstrcat(unicode_map_file_name, "/");
 503   pstrcat(unicode_map_file_name, "unicode_map.");
 504   pstrcat(unicode_map_file_name, codepage);
 505
 506   if(sys_stat(unicode_map_file_name,&st)!=0) {
 507     DEBUG(0,("load_unicode_map: filename %s does not exist.\n",
 508               unicode_map_file_name));
 509     goto clean_and_exit;
 510   }
 511
 512   size = st.st_size;
 513
 514   if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) {
 515     DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \
 516 unicode map file (size=%d).\n", unicode_map_file_name, (int)size));
 517     goto clean_and_exit;
 518   }
 519
 520   if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) {
 521     DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n",
 522               unicode_map_file_name, strerror(errno)));
 523     goto clean_and_exit;
 524   }
 525
 526   if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) {
 527     DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n",
 528               unicode_map_file_name, strerror(errno)));
 529     goto clean_and_exit;
 530   }
 531
 532   /* Check the version value */
 533   if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) {
 534     DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \
 535 Needed %hu, got %hu.\n",
 536           unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID,
 537           SVAL(buf,UNICODE_MAP_VERSION_OFFSET)));
 538     goto clean_and_exit;
 539   }
 540
 541   /* Check the codepage value */
 542   if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) {
 543     DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \
 544 requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage ));
 545     goto clean_and_exit;
 546   }
 547
 548   ucs2_to_cp_size = 2*65536;
 549   if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) {
 550     /*
 551      * This is a multibyte code page.
 552      */
 553     cp_to_ucs2_size = 2*65536;
 554   } else {
 555     /*
 556      * Single byte code page.
 557      */
 558     cp_to_ucs2_size = 2*256;
 559   }
 560
 561   /*
 562    * Free any old translation tables.
 563    */
 564
 565   free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
 566
 567   if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) {
 568     DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size ));
 569     goto clean_and_exit;
 570   }
 571
 572   if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) {
 573     DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size ));
 574     goto clean_and_exit;
 575   }
 576
 577   if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) {
 578     DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n",
 579               unicode_map_file_name, strerror(errno)));
 580     goto clean_and_exit;
 581   }
 582
 583   if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) {
 584     DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n",
 585               unicode_map_file_name, strerror(errno)));
 586     goto clean_and_exit;
 587   }
 588
 589   /*
 590    * Now ensure the 16 bit values are in the correct endianness.
 591    */
 592
 593   for (i = 0; i < cp_to_ucs2_size/2; i++)
 594     cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2);
 595
 596   for (i = 0; i < ucs2_to_cp_size/2; i++)
 597     ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2);
 598
 599   fclose(fp);
 600
 601   *pp_cp_to_ucs2 = cp_to_ucs2;
 602   *pp_ucs2_to_cp = ucs2_to_cp;
 603
 604   return True;
 605
 606 clean_and_exit:
 607
 608   /* pseudo destructor :-) */
 609
 610   if(fp != NULL)
 611     fclose(fp);
 612
 613   free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
 614
 615   default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp);
 616
 617   return False;
 618 }
 619
 620 /*******************************************************************
 621  Load a dos codepage to unicode and vica-versa map.
 622 ********************************************************************/
 623
 624 BOOL load_dos_unicode_map(int codepage)
 625 {
 626   fstring codepage_str;
 627
 628   slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage);
 629   return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp);
 630 }
 631
 632 /*******************************************************************
 633  Load a UNIX codepage to unicode and vica-versa map.
 634 ********************************************************************/
 635
 636 BOOL load_unix_unicode_map(const char *unix_char_set)
 637 {
 638   fstring upper_unix_char_set;
 639
 640   fstrcpy(upper_unix_char_set, unix_char_set);
 641   strupper(upper_unix_char_set);
 642   return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
 643 }
 644
 645 /*******************************************************************
 646  The following functions reproduce many of the non-UNICODE standard
 647  string functions in Samba.
 648 ********************************************************************/
 649
 650 /*******************************************************************
 651  Convert a UNICODE string to multibyte format. Note that the 'src' is in
 652  native byte order, not little endian. Always zero terminates.
 653  dst_len is in bytes.
 654 ********************************************************************/
 655
 656 static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src,
 657                                   size_t dst_len, const uint16 *ucs2_to_cp)
 658 {
 659         size_t dst_pos;
 660
 661         for(dst_pos = 0; *src && (dst_pos < dst_len - 1);) {
 662                 smb_ucs2_t val = ucs2_to_cp[*src++];
 663                 if(val < 256) {
 664                         dst[dst_pos++] = (char)val;
 665                 } else {
 666
 667                         if(dst_pos >= dst_len - 2)
 668                                 break;
 669
 670                         /*
 671                          * A 2 byte value is always written as
 672                          * high/low into the buffer stream.
 673                          */
 674
 675                         dst[dst_pos++] = (char)((val >> 8) & 0xff);
 676                         dst[dst_pos++] = (char)(val & 0xff);
 677                 }
 678         }
 679
 680         dst[dst_pos] = '\0';
 681
 682         return dst;
 683 }
 684
 685 /*******************************************************************
 686  Convert a multibyte string to UNICODE format. Note that the 'dst' is in
 687  native byte order, not little endian. Always zero terminates.
 688  dst_len is in bytes.
 689 ********************************************************************/
 690
 691 smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src,
 692                                  size_t dst_len, smb_ucs2_t *cp_to_ucs2)
 693 {
 694         size_t i;
 695
 696         dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */
 697
 698         for(i = 0; (i < (dst_len  - 1)) && src[i];) {
 699                 size_t skip = skip_multibyte_char(*src);
 700                 smb_ucs2_t val = (*src & 0xff);
 701
 702                 /*
 703                  * If this is a multibyte character
 704                  * then work out the index value for the unicode conversion.
 705                  */
 706
 707                 if (skip == 2)
 708                         val = ((val << 8) | (src[1] & 0xff));
 709
 710                 dst[i++] = cp_to_ucs2[val];
 711                 if (skip)
 712                         src += skip;
 713                 else
 714                         src++;
 715         }
 716
 717         dst[i] = 0;
 718
 719         return dst;
 720 }
 721
 722 /*******************************************************************
 723  Convert a UNICODE string to multibyte format. Note that the 'src' is in
 724  native byte order, not little endian. Always zero terminates.
 725  This function may be replaced if the MB  codepage format is an
 726  encoded one (ie. utf8, hex). See the code in lib/kanji.c
 727  for details. dst_len is in bytes.
 728 ********************************************************************/
 729
 730 char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len)
 731 {
 732         return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp);
 733 }
 734
 735 /*******************************************************************
 736  Convert a UNIX string to UNICODE format. Note that the 'dst' is in
 737  native byte order, not little endian. Always zero terminates.
 738  This function may be replaced if the UNIX codepage format is a
 739  multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
 740  for details. dst_len is in bytes, not ucs2 units.
 741 ********************************************************************/
 742
 743 smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
 744 {
 745         return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2);
 746 }
 747
 748 /*******************************************************************
 749  Convert a UNICODE string to DOS format. Note that the 'src' is in
 750  native byte order, not little endian. Always zero terminates.
 751  dst_len is in bytes.
 752 ********************************************************************/
 753
 754 char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len)
 755 {
 756         return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp);
 757 }
 758
 759 /*******************************************************************
 760  Convert a single UNICODE character to DOS codepage. Returns the
 761  number of bytes in the DOS codepage character.
 762 ********************************************************************/
 763
 764 size_t unicode_to_dos_char(char *dst, const smb_ucs2_t src)
 765 {
 766         smb_ucs2_t val = ucs2_to_doscp[src];
 767         if(val < 256) {
 768                 *dst = (char)val;
 769                 return (size_t)1;
 770         }
 771         /*
 772          * A 2 byte value is always written as
 773          * high/low into the buffer stream.
 774          */
 775
 776         dst[0] = (char)((val >> 8) & 0xff);
 777         dst[1] = (char)(val & 0xff);
 778         return (size_t)2;
 779 }
 780
 781 /*******************************************************************
 782  Convert a DOS string to UNICODE format. Note that the 'dst' is in
 783  native byte order, not little endian. Always zero terminates.
 784  This function may be replaced if the DOS codepage format is a
 785  multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
 786  for details. dst_len is in bytes, not ucs2 units.
 787 ********************************************************************/
 788
 789 smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
 790 {
 791         return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2);
 792 }
 793
 794 /*******************************************************************
 795  Count the number of characters in a smb_ucs2_t string.
 796 ********************************************************************/
 797
 798 size_t strlen_w(const smb_ucs2_t *src)
 799 {
 800   size_t len;
 801
 802   for(len = 0; *src; len++)
 803     ;
 804
 805   return len;
 806 }
 807
 808 /*******************************************************************
 809  Safe wstring copy into a known length string. maxlength includes
 810  the terminating zero. maxlength is in ucs2 units.
 811 ********************************************************************/
 812
 813 smb_ucs2_t *safe_strcpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength)
 814 {
 815     size_t ucs2_len;
 816
 817     if (!dest) {
 818         DEBUG(0,("ERROR: NULL dest in safe_strcpy_w\n"));
 819         return NULL;
 820     }
 821
 822     if (!src) {
 823         *dest = 0;
 824         return dest;
 825     }
 826
 827         maxlength /= sizeof(smb_ucs2_t);
 828
 829         ucs2_len = strlen_w(src);
 830
 831     if (ucs2_len >= maxlength) {
 832                 fstring out;
 833         DEBUG(0,("ERROR: string overflow by %u bytes in safe_strcpy_w [%.50s]\n",
 834                         (unsigned int)((ucs2_len-maxlength)*sizeof(smb_ucs2_t)),
 835                         unicode_to_unix(out,src,sizeof(out))) );
 836                 ucs2_len = maxlength - 1;
 837     }
 838
 839     memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t));
 840     dest[ucs2_len] = 0;
 841     return dest;
 842 }
 843
 844 /*******************************************************************
 845  Safe string cat into a string. maxlength includes the terminating zero.
 846  maxlength is in ucs2 units.
 847 ********************************************************************/
 848
 849 smb_ucs2_t *safe_strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
 850 {
 851     size_t ucs2_src_len, ucs2_dest_len;
 852
 853     if (!dest) {
 854         DEBUG(0,("ERROR: NULL dest in safe_strcat_w\n"));
 855         return NULL;
 856     }
 857
 858     if (!src)
 859         return dest;
 860
 861     ucs2_src_len = strlen_w(src);
 862     ucs2_dest_len = strlen_w(dest);
 863
 864     if (ucs2_src_len + ucs2_dest_len >= maxlength) {
 865                 fstring out;
 866                 int new_len = maxlength - ucs2_dest_len - 1;
 867         DEBUG(0,("ERROR: string overflow by %u characters in safe_strcat_w [%.50s]\n",
 868                         (unsigned int)(sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len - maxlength)),
 869                         unicode_to_unix(out,src,sizeof(out))) );
 870         ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0);
 871     }
 872
 873     memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t));
 874     dest[ucs2_dest_len + ucs2_src_len] = 0;
 875     return dest;
 876 }
 877
 878 /*******************************************************************
 879  Compare the two strings s1 and s2.
 880 ********************************************************************/
 881
 882 int strcmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
 883 {
 884         smb_ucs2_t c1, c2;
 885
 886         for (;;) {
 887                 c1 = *s1++;
 888                 c2 = *s2++;
 889
 890                 if (c1 != c2)
 891                         return c1 - c2;
 892
 893                 if (c1 == 0)
 894             return 0;
 895     }
 896         return 0;
 897 }
 898
 899 /*******************************************************************
 900  Compare the first n characters of s1 to s2. len is in ucs2 units.
 901 ********************************************************************/
 902
 903 int strncmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2, size_t len)
 904 {
 905         smb_ucs2_t c1, c2;
 906
 907         for (; len != 0; --len) {
 908                 c1 = *s1++;
 909                 c2 = *s2++;
 910
 911                 if (c1 != c2)
 912                         return c1 - c2;
 913
 914                 if (c1 == 0)
 915                         return 0;
 916
 917     }
 918         return 0;
 919 }
 920
 921 /*******************************************************************
 922  Search string s2 from s1.
 923 ********************************************************************/
 924
 925 smb_ucs2_t *strstr_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
 926 {
 927         size_t len = strlen_w(s2);
 928
 929         if (!*s2)
 930                 return (smb_ucs2_t *)s1;
 931
 932         for(;*s1; s1++) {
 933                 if (*s1 == *s2) {
 934                         if (strncmp_w(s1, s2, len) == 0)
 935                                 return (smb_ucs2_t *)s1;
 936                 }
 937         }
 938         return NULL;
 939 }
 940
 941 /*******************************************************************
 942  Search for ucs2 char c from the beginning of s.
 943 ********************************************************************/
 944
 945 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
 946 {
 947         do {
 948                 if (*s == c)
 949                         return (smb_ucs2_t *)s;
 950         } while (*s++);
 951
 952         return NULL;
 953 }
 954
 955 /*******************************************************************
 956  Search for ucs2 char c from the end of s.
 957 ********************************************************************/
 958
 959 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
 960 {
 961         smb_ucs2_t *retval = 0;
 962
 963         do {
 964                 if (*s == c)
 965                         retval = (smb_ucs2_t *)s;
 966         } while (*s++);
 967
 968         return retval;
 969 }
 970
 971 /*******************************************************************
 972  Search token from s1 separated by any ucs2 char of s2.
 973 ********************************************************************/
 974
 975 smb_ucs2_t *strtok_w(smb_ucs2_t *s1, const smb_ucs2_t *s2)
 976 {
 977         static smb_ucs2_t *s = NULL;
 978         smb_ucs2_t *q;
 979
 980         if (!s1) {
 981                 if (!s)
 982                         return NULL;
 983                 s1 = s;
 984         }
 985
 986         for (q = s1; *s1; s1++) {
 987                 smb_ucs2_t *p = strchr_w(s2, *s1);
 988                 if (p) {
 989                         if (s1 != q) {
 990                                 s = s1 + 1;
 991                                 *s1 = '\0';
 992                                 return q;
 993                         }
 994                         q = s1 + 1;
 995                 }
 996         }
 997
 998         s = NULL;
 999         if (*q)
1000                 return q;
1001
1002         return NULL;
1003 }
1004
1005 /*******************************************************************
1006  Duplicate a ucs2 string.
1007 ********************************************************************/
1008
1009 smb_ucs2_t *strdup_w(const smb_ucs2_t *s)
1010 {
1011         size_t newlen = (strlen_w(s)+1)*sizeof(smb_ucs2_t);
1012         smb_ucs2_t *newstr = (smb_ucs2_t *)malloc(newlen);
1013     if (newstr == NULL)
1014         return NULL;
1015     safe_strcpy_w(newstr, s, newlen);
1016     return newstr;
1017 }
1018
1019 /*******************************************************************
1020  Mapping tables for UNICODE character. Allows toupper/tolower and
1021  isXXX functions to work.
1022
1023  tridge: split into 2 pieces. This saves us 5/6 of the memory
1024  with a small speed penalty
1025  The magic constants are the lower/upper range of the tables two
1026  parts
1027 ********************************************************************/
1028
1029 typedef struct {
1030         smb_ucs2_t lower;
1031         smb_ucs2_t upper;
1032         unsigned char flags;
1033 } smb_unicode_table_t;
1034
1035 #define TABLE1_BOUNDARY 9450
1036 #define TABLE2_BOUNDARY 64256
1037
1038 static smb_unicode_table_t map_table1[] = {
1039 #include "unicode_map_table1.h"
1040 };
1041
1042 static smb_unicode_table_t map_table2[] = {
1043 #include "unicode_map_table2.h"
1044 };
1045
1046 static unsigned char map_table_flags(smb_ucs2_t v)
1047 {
1048         if (v < TABLE1_BOUNDARY) return map_table1[v].flags;
1049         if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].flags;
1050         return 0;
1051 }
1052
1053 static smb_ucs2_t map_table_lower(smb_ucs2_t v)
1054 {
1055         if (v < TABLE1_BOUNDARY) return map_table1[v].lower;
1056         if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].lower;
1057         return v;
1058 }
1059
1060 static smb_ucs2_t map_table_upper(smb_ucs2_t v)
1061 {
1062         if (v < TABLE1_BOUNDARY) return map_table1[v].upper;
1063         if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].upper;
1064         return v;
1065 }
1066
1067 /*******************************************************************
1068  Is an upper case wchar.
1069 ********************************************************************/
1070
1071 int isupper_w( smb_ucs2_t val)
1072 {
1073         return (map_table_flags(val) & UNI_UPPER);
1074 }
1075
1076 /*******************************************************************
1077  Is a lower case wchar.
1078 ********************************************************************/
1079
1080 int islower_w( smb_ucs2_t val)
1081 {
1082         return (map_table_flags(val) & UNI_LOWER);
1083 }
1084
1085 /*******************************************************************
1086  Is a digit wchar.
1087 ********************************************************************/
1088
1089 int isdigit_w( smb_ucs2_t val)
1090 {
1091         return (map_table_flags(val) & UNI_DIGIT);
1092 }
1093
1094 /*******************************************************************
1095  Is a hex digit wchar.
1096 ********************************************************************/
1097
1098 int isxdigit_w( smb_ucs2_t val)
1099 {
1100         return (map_table_flags(val) & UNI_XDIGIT);
1101 }
1102
1103 /*******************************************************************
1104  Is a space wchar.
1105 ********************************************************************/
1106
1107 int isspace_w( smb_ucs2_t val)
1108 {
1109         return (map_table_flags(val) & UNI_SPACE);
1110 }
1111
1112 /*******************************************************************
1113  Convert a wchar to upper case.
1114 ********************************************************************/
1115
1116 smb_ucs2_t toupper_w( smb_ucs2_t val )
1117 {
1118         return map_table_upper(val);
1119 }
1120
1121 /*******************************************************************
1122  Convert a wchar to lower case.
1123 ********************************************************************/
1124
1125 smb_ucs2_t tolower_w( smb_ucs2_t val )
1126 {
1127         return map_table_lower(val);
1128 }
1129
1130 static smb_ucs2_t *last_ptr = NULL;
1131
1132 void set_first_token_w(smb_ucs2_t *ptr)
1133 {
1134         last_ptr = ptr;
1135 }
1136
1137 /****************************************************************************
1138  Get the next token from a string, return False if none found
1139  handles double-quotes.
1140  Based on a routine by GJC@VILLAGE.COM.
1141  Extensively modified by Andrew.Tridgell@anu.edu.au
1142  bufsize is in bytes.
1143 ****************************************************************************/
1144
1145 static smb_ucs2_t sep_list[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t',  (smb_ucs2_t)'\n',  (smb_ucs2_t)'\r', 0};
1146 static smb_ucs2_t quotechar = (smb_ucs2_t)'\"';
1147
1148 BOOL next_token_w(smb_ucs2_t **ptr, smb_ucs2_t *buff, smb_ucs2_t *sep, size_t bufsize)
1149 {
1150         smb_ucs2_t *s;
1151         BOOL quoted;
1152         size_t len=1;
1153
1154         /*
1155          * Convert bufsize to smb_ucs2_t units.
1156          */
1157
1158         bufsize /= sizeof(smb_ucs2_t);
1159
1160         if (!ptr)
1161                 ptr = &last_ptr;
1162         if (!ptr)
1163                 return(False);
1164
1165         s = *ptr;
1166
1167         /*
1168          * Default to simple separators.
1169          */
1170
1171         if (!sep)
1172                 sep = sep_list;
1173
1174         /*
1175          * Find the first non sep char.
1176          */
1177
1178         while(*s && strchr_w(sep,*s))
1179                 s++;
1180
1181         /*
1182          * Nothing left ?
1183          */
1184
1185         if (!*s)
1186                 return(False);
1187
1188         /*
1189          * Copy over the token.
1190          */
1191
1192         for (quoted = False; len < bufsize && *s && (quoted || !strchr_w(sep,*s)); s++) {
1193                 if (*s == quotechar) {
1194                         quoted = !quoted;
1195                 } else {
1196                         len++;
1197                         *buff++ = *s;
1198                 }
1199         }
1200
1201         *ptr = (*s) ? s+1 : s;
1202         *buff = 0;
1203         last_ptr = *ptr;
1204
1205         return(True);
1206 }
1207
1208 /****************************************************************************
1209  Convert list of tokens to array; dependent on above routine.
1210  Uses last_ptr from above - bit of a hack.
1211 ****************************************************************************/
1212
1213 smb_ucs2_t **toktocliplist_w(int *ctok, smb_ucs2_t *sep)
1214 {
1215         smb_ucs2_t *s=last_ptr;
1216         int ictok=0;
1217         smb_ucs2_t **ret, **iret;
1218
1219         if (!sep)
1220                 sep = sep_list;
1221
1222         while(*s && strchr_w(sep,*s))
1223                 s++;
1224
1225         /*
1226          * Nothing left ?
1227          */
1228
1229         if (!*s)
1230                 return(NULL);
1231
1232         do {
1233                 ictok++;
1234                 while(*s && (!strchr_w(sep,*s)))
1235                         s++;
1236                 while(*s && strchr_w(sep,*s))
1237                         *s++=0;
1238         } while(*s);
1239
1240         *ctok = ictok;
1241         s = last_ptr;
1242
1243         if (!(ret=iret=malloc(ictok*sizeof(smb_ucs2_t *))))
1244                 return NULL;
1245
1246         while(ictok--) {
1247                 *iret++=s;
1248                 while(*s++)
1249                         ;
1250                 while(!*s)
1251                         s++;
1252         }
1253
1254         return ret;
1255 }
1256
1257 /*******************************************************************
1258  Case insensitive string compararison.
1259 ********************************************************************/
1260
1261 int StrCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t)
1262 {
1263         /*
1264          * Compare until we run out of string, either t or s, or find a difference.
1265          */
1266
1267         while (*s && *t && toupper_w(*s) == toupper_w(*t)) {
1268                 s++;
1269                 t++;
1270         }
1271
1272         return(toupper_w(*s) - toupper_w(*t));
1273 }
1274
1275 /*******************************************************************
1276  Case insensitive string compararison, length limited.
1277  n is in ucs2 units.
1278 ********************************************************************/
1279
1280 int StrnCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t, size_t n)
1281 {
1282         /*
1283          * Compare until we run out of string, either t or s, or chars.
1284          */
1285
1286         while (n && *s && *t && toupper_w(*s) == toupper_w(*t)) {
1287                 s++;
1288                 t++;
1289                 n--;
1290         }
1291
1292     /*
1293          * Not run out of chars - strings are different lengths.
1294          */
1295
1296     if (n)
1297       return(toupper_w(*s) - toupper_w(*t));
1298
1299     /*
1300          * Identical up to where we run out of chars,
1301          * and strings are same length.
1302          */
1303
1304         return(0);
1305 }
1306
1307 /*******************************************************************
1308  Compare 2 strings.
1309 ********************************************************************/
1310
1311 BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1312 {
1313         if (s1 == s2)
1314                 return(True);
1315         if (!s1 || !s2)
1316                 return(False);
1317
1318         return(StrCaseCmp_w(s1,s2)==0);
1319 }
1320
1321 /*******************************************************************
1322  Compare 2 strings up to and including the nth char. n is in ucs2
1323  units.
1324 ******************************************************************/
1325
1326 BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
1327 {
1328         if (s1 == s2)
1329                 return(True);
1330         if (!s1 || !s2 || !n)
1331                 return(False);
1332
1333         return(StrnCaseCmp_w(s1,s2,n)==0);
1334 }
1335
1336 /*******************************************************************
1337  Compare 2 strings (case sensitive).
1338 ********************************************************************/
1339
1340 BOOL strcsequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2)
1341 {
1342         if (s1 == s2)
1343                 return(True);
1344         if (!s1 || !s2)
1345                 return(False);
1346
1347         return(strcmp_w(s1,s2)==0);
1348 }
1349
1350 /*******************************************************************
1351  Convert a string to lower case.
1352 ********************************************************************/
1353
1354 void strlower_w(smb_ucs2_t *s)
1355 {
1356         while (*s) {
1357                 if (isupper_w(*s))
1358                         *s = tolower_w(*s);
1359                 s++;
1360         }
1361 }
1362
1363 /*******************************************************************
1364  Convert a string to upper case.
1365 ********************************************************************/
1366
1367 void strupper_w(smb_ucs2_t *s)
1368 {
1369         while (*s) {
1370                 if (islower_w(*s))
1371                         *s = toupper_w(*s);
1372                 s++;
1373         }
1374 }
1375
1376 /*******************************************************************
1377  Convert a string to "normal" form.
1378 ********************************************************************/
1379
1380 void strnorm_w(smb_ucs2_t *s)
1381 {
1382         extern int case_default;
1383         if (case_default == CASE_UPPER)
1384                 strupper_w(s);
1385         else
1386                 strlower_w(s);
1387 }
1388
1389 /*******************************************************************
1390  Check if a string is in "normal" case.
1391 ********************************************************************/
1392
1393 BOOL strisnormal_w(smb_ucs2_t *s)
1394 {
1395         extern int case_default;
1396         if (case_default == CASE_UPPER)
1397                 return(!strhaslower_w(s));
1398
1399         return(!strhasupper_w(s));
1400 }
1401
1402 /****************************************************************************
1403  String replace.
1404 ****************************************************************************/
1405
1406 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
1407 {
1408         while (*s) {
1409                 if (oldc == *s)
1410                         *s = newc;
1411                 s++;
1412         }
1413 }
1414
1415 /*******************************************************************
1416  Skip past some strings in a buffer. n is in bytes.
1417 ********************************************************************/
1418
1419 smb_ucs2_t *skip_string_w(smb_ucs2_t *buf,size_t n)
1420 {
1421         while (n--)
1422                 buf += (strlen_w(buf)*sizeof(smb_ucs2_t)) + 1;
1423         return(buf);
1424 }
1425
1426 /*******************************************************************
1427  Count the number of characters in a string. Same as strlen_w in
1428  smb_ucs2_t string units.
1429 ********************************************************************/
1430
1431 size_t str_charnum_w(const smb_ucs2_t *s)
1432 {
1433         return strlen_w(s);
1434 }
1435
1436 /*******************************************************************
1437  Trim the specified elements off the front and back of a string.
1438 ********************************************************************/
1439
1440 BOOL trim_string_w(smb_ucs2_t *s,const smb_ucs2_t *front,const smb_ucs2_t *back)
1441 {
1442         BOOL ret = False;
1443         size_t front_len = (front && *front) ? strlen_w(front) : 0;
1444         size_t back_len = (back && *back) ? strlen_w(back) : 0;
1445         size_t s_len;
1446
1447         while (front_len && strncmp_w(s, front, front_len) == 0) {
1448                 smb_ucs2_t *p = s;
1449                 ret = True;
1450
1451                 while (1) {
1452                         if (!(*p = p[front_len]))
1453                                 break;
1454                         p++;
1455                 }
1456         }
1457
1458         if(back_len) {
1459                 s_len = strlen_w(s);
1460                 while ((s_len >= back_len) &&
1461                         (strncmp_w(s + s_len - back_len, back, back_len)==0)) {
1462                         ret = True;
1463                         s[s_len - back_len] = 0;
1464                         s_len = strlen_w(s);
1465                 }
1466         }
1467
1468         return(ret);
1469 }
1470
1471 /****************************************************************************
1472  Does a string have any uppercase chars in it ?
1473 ****************************************************************************/
1474
1475 BOOL strhasupper_w(const smb_ucs2_t *s)
1476 {
1477         while (*s) {
1478                 if (isupper_w(*s))
1479                         return(True);
1480                 s++;
1481         }
1482         return(False);
1483 }
1484
1485 /****************************************************************************
1486  Does a string have any lowercase chars in it ?
1487 ****************************************************************************/
1488
1489 BOOL strhaslower_w(const smb_ucs2_t *s)
1490 {
1491         while (*s) {
1492                 if (islower(*s))
1493                         return(True);
1494                 s++;
1495         }
1496         return(False);
1497 }
1498
1499 /****************************************************************************
1500  Find the number of 'c' chars in a string.
1501 ****************************************************************************/
1502
1503 size_t count_chars_w(const smb_ucs2_t *s,smb_ucs2_t c)
1504 {
1505         size_t count=0;
1506
1507         while (*s) {
1508                 if (*s == c)
1509                         count++;
1510                 s++;
1511         }
1512         return(count);
1513 }
1514
1515 /*******************************************************************
1516  Return True if a string consists only of one particular character.
1517 ********************************************************************/
1518
1519 BOOL str_is_all_w(const smb_ucs2_t *s,smb_ucs2_t c)
1520 {
1521         if(s == NULL)
1522                 return False;
1523         if(!*s)
1524                 return False;
1525
1526         while (*s) {
1527                 if (*s != c)
1528                         return False;
1529                 s++;
1530         }
1531         return True;
1532 }
1533
1534 /*******************************************************************
1535  Paranoid strcpy into a buffer of given length (includes terminating
1536  zero. Strips out all but 'a-Z0-9' and replaces with '_'. Deliberately
1537  does *NOT* check for multibyte characters. Don't change it !
1538  maxlength is in ucs2 units.
1539 ********************************************************************/
1540
1541 smb_ucs2_t *alpha_strcpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
1542 {
1543         size_t len, i;
1544
1545         if (!dest) {
1546                 DEBUG(0,("ERROR: NULL dest in alpha_strcpy_w\n"));
1547                 return NULL;
1548         }
1549
1550         if (!src) {
1551                 *dest = 0;
1552                 return dest;
1553         }
1554
1555         len = strlen_w(src);
1556         if (len >= maxlength)
1557                 len = maxlength - 1;
1558
1559         for(i = 0; i < len; i++) {
1560                 smb_ucs2_t val = src[i];
1561                 if(isupper_w(val) ||islower_w(val) || isdigit_w(val))
1562                         dest[i] = src[i];
1563                 else
1564                         dest[i] = (smb_ucs2_t)'_';
1565         }
1566
1567         dest[i] = 0;
1568
1569         return dest;
1570 }
1571
1572 /****************************************************************************
1573  Like strncpy but always null terminates. Make sure there is room !
1574  The variable n should always be one less than the available size and is in
1575  ucs2 units.
1576 ****************************************************************************/
1577
1578 smb_ucs2_t *StrnCpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src,size_t n)
1579 {
1580         smb_ucs2_t *d = dest;
1581         if (!dest)
1582                 return(NULL);
1583         if (!src) {
1584                 *dest = 0;
1585                 return(dest);
1586         }
1587
1588         while (n-- && (*d++ = *src++))
1589                 ;
1590         *d = 0;
1591         return(dest);
1592 }
1593
1594 /****************************************************************************
1595  Like strncpy but copies up to the character marker. Always null terminates.
1596  returns a pointer to the character marker in the source string (src).
1597  n is in ucs2 units.
1598 ****************************************************************************/
1599
1600 smb_ucs2_t *strncpyn_w(smb_ucs2_t *dest, const smb_ucs2_t *src,size_t n, smb_ucs2_t c)
1601 {
1602         smb_ucs2_t *p;
1603         size_t str_len;
1604
1605         p = strchr_w(src, c);
1606         if (p == NULL) {
1607                 fstring cval;
1608                 smb_ucs2_t mbcval[2];
1609                 mbcval[0] = c;
1610                 mbcval[1] = 0;
1611                 DEBUG(5, ("strncpyn_w: separator character (%s) not found\n",
1612                         unicode_to_unix(cval,mbcval,sizeof(cval)) ));
1613                 return NULL;
1614         }
1615
1616         str_len = PTR_DIFF(p, src) + 1;
1617         safe_strcpy_w(dest, src, MIN(n, str_len));
1618
1619         return p;
1620 }
1621
1622 /*************************************************************
1623  Routine to get hex characters and turn them into a 16 byte array.
1624  The array can be variable length, and any non-hex-numeric
1625  characters are skipped.  "0xnn" or "0Xnn" is specially catered
1626  for. len is in bytes.
1627  Valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
1628 **************************************************************/
1629
1630 static smb_ucs2_t hexprefix[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'x', 0 };
1631 static smb_ucs2_t hexchars[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'1', (smb_ucs2_t)'2', (smb_ucs2_t)'3',
1632                                                                 (smb_ucs2_t)'4', (smb_ucs2_t)'5', (smb_ucs2_t)'6', (smb_ucs2_t)'7',
1633                                                                 (smb_ucs2_t)'8', (smb_ucs2_t)'9', (smb_ucs2_t)'A', (smb_ucs2_t)'B',
1634                                                                 (smb_ucs2_t)'C', (smb_ucs2_t)'D', (smb_ucs2_t)'E', (smb_ucs2_t)'F', 0 };
1635
1636 size_t strhex_to_str_w(char *p, size_t len, const smb_ucs2_t *strhex)
1637 {
1638         size_t i;
1639         size_t num_chars = 0;
1640         unsigned char   lonybble, hinybble;
1641         smb_ucs2_t *p1 = NULL, *p2 = NULL;
1642
1643         /*
1644          * Convert to smb_ucs2_t units.
1645          */
1646
1647         len /= sizeof(smb_ucs2_t);
1648
1649         for (i = 0; i < len && strhex[i] != 0; i++) {
1650                 if (strnequal_w(hexchars, hexprefix, 2)) {
1651                         i++; /* skip two chars */
1652                         continue;
1653                 }
1654
1655                 if (!(p1 = strchr_w(hexchars, toupper_w(strhex[i]))))
1656                         break;
1657
1658                 i++; /* next hex digit */
1659
1660                 if (!(p2 = strchr_w(hexchars, toupper_w(strhex[i]))))
1661                         break;
1662
1663                 /* get the two nybbles */
1664                 hinybble = (PTR_DIFF(p1, hexchars)/sizeof(smb_ucs2_t));
1665                 lonybble = (PTR_DIFF(p2, hexchars)/sizeof(smb_ucs2_t));
1666
1667                 p[num_chars] = (hinybble << 4) | lonybble;
1668                 num_chars++;
1669
1670                 p1 = NULL;
1671                 p2 = NULL;
1672         }
1673         return num_chars;
1674 }
1675
1676 /****************************************************************************
1677  Check if a string is part of a list.
1678 ****************************************************************************/
1679
1680 BOOL in_list_w(smb_ucs2_t *s,smb_ucs2_t *list,BOOL casesensitive)
1681 {
1682         wpstring tok;
1683         smb_ucs2_t *p=list;
1684
1685         if (!list)
1686                 return(False);
1687
1688         while (next_token_w(&p,tok,LIST_SEP_W,sizeof(tok))) {
1689                 if (casesensitive) {
1690                         if (strcmp_w(tok,s) == 0)
1691                                 return(True);
1692                 } else {
1693                         if (StrCaseCmp_w(tok,s) == 0)
1694                                 return(True);
1695                 }
1696         }
1697         return(False);
1698 }
1699
1700 /* This is used to prevent lots of mallocs of size 2 */
1701 static smb_ucs2_t *null_string = NULL;
1702
1703 /****************************************************************************
1704  Set a string value, allocing the space for the string.
1705 ****************************************************************************/
1706
1707 BOOL string_init_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1708 {
1709         size_t l;
1710
1711         if (!null_string) {
1712                 if((null_string = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t))) == NULL) {
1713                         DEBUG(0,("string_init_w: malloc fail for null_string.\n"));
1714                 return False;
1715                 }
1716                 *null_string = 0;
1717         }
1718
1719         if (!src)
1720                 src = null_string;
1721
1722         l = strlen_w(src);
1723
1724         if (l == 0)
1725                 *dest = null_string;
1726         else {
1727                 (*dest) = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t)*(l+1));
1728                 if ((*dest) == NULL) {
1729                         DEBUG(0,("Out of memory in string_init_w\n"));
1730                         return False;
1731                 }
1732
1733                 wpstrcpy(*dest,src);
1734         }
1735         return(True);
1736 }
1737
1738 /****************************************************************************
1739  Free a string value.
1740 ****************************************************************************/
1741
1742 void string_free_w(smb_ucs2_t **s)
1743 {
1744         if (!s || !(*s))
1745                 return;
1746         if (*s == null_string)
1747                 *s = NULL;
1748         if (*s)
1749                 free((char *)*s);
1750         *s = NULL;
1751 }
1752
1753 /****************************************************************************
1754  Set a string value, allocing the space for the string, and deallocating any
1755  existing space.
1756 ****************************************************************************/
1757
1758 BOOL string_set_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1759 {
1760         string_free_w(dest);
1761
1762         return(string_init_w(dest,src));
1763 }
1764
1765 /****************************************************************************
1766  Substitute a string for a pattern in another string. Make sure there is
1767  enough room !
1768
1769  This routine looks for pattern in s and replaces it with
1770  insert. It may do multiple replacements.
1771
1772  Any of " ; ' $ or ` in the insert string are replaced with _
1773  if len==0 then no length check is performed
1774  len is in ucs2 units.
1775 ****************************************************************************/
1776
1777 void string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1778 {
1779         smb_ucs2_t *p;
1780         ssize_t ls,lp,li, i;
1781
1782         if (!insert || !pattern || !s)
1783                 return;
1784
1785         ls = (ssize_t)strlen_w(s);
1786         lp = (ssize_t)strlen_w(pattern);
1787         li = (ssize_t)strlen_w(insert);
1788
1789         if (!*pattern)
1790                 return;
1791
1792         while (lp <= ls && (p = strstr_w(s,pattern))) {
1793                 if (len && (ls + (li-lp) >= len)) {
1794                         fstring out;
1795                         DEBUG(0,("ERROR: string overflow by %d in string_sub_w(%.50s, %d)\n",
1796                                  (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1797                                  unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1798                         break;
1799                 }
1800                 if (li != lp)
1801                         memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1802
1803                 for (i=0;i<li;i++) {
1804                         switch (insert[i]) {
1805                         case (smb_ucs2_t)'`':
1806                         case (smb_ucs2_t)'"':
1807                         case (smb_ucs2_t)'\'':
1808                         case (smb_ucs2_t)';':
1809                         case (smb_ucs2_t)'$':
1810                         case (smb_ucs2_t)'%':
1811                         case (smb_ucs2_t)'\r':
1812                         case (smb_ucs2_t)'\n':
1813                                 p[i] = (smb_ucs2_t)'_';
1814                                 break;
1815                         default:
1816                                 p[i] = insert[i];
1817                         }
1818                 }
1819                 s = p + li;
1820                 ls += (li-lp);
1821         }
1822 }
1823
1824 void fstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert)
1825 {
1826         string_sub_w(s, pattern, insert, sizeof(wfstring));
1827 }
1828
1829 void pstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,smb_ucs2_t *insert)
1830 {
1831         string_sub_w(s, pattern, insert, sizeof(wpstring));
1832 }
1833
1834 /****************************************************************************
1835  Similar to string_sub() but allows for any character to be substituted.
1836  Use with caution !
1837  if len==0 then no length check is performed.
1838 ****************************************************************************/
1839
1840 void all_string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1841 {
1842         smb_ucs2_t *p;
1843         ssize_t ls,lp,li;
1844
1845         if (!insert || !pattern || !s)
1846                 return;
1847
1848         ls = (ssize_t)strlen_w(s);
1849         lp = (ssize_t)strlen_w(pattern);
1850         li = (ssize_t)strlen_w(insert);
1851
1852         if (!*pattern)
1853                 return;
1854
1855         while (lp <= ls && (p = strstr_w(s,pattern))) {
1856                 if (len && (ls + (li-lp) >= len)) {
1857                         fstring out;
1858                         DEBUG(0,("ERROR: string overflow by %d in all_string_sub_w(%.50s, %d)\n",
1859                                  (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1860                                  unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1861                         break;
1862                 }
1863                 if (li != lp)
1864                         memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1865
1866                 memcpy(p, insert, li*sizeof(smb_ucs2_t));
1867                 s = p + li;
1868                 ls += (li-lp);
1869         }
1870 }
1871
1872 /****************************************************************************
1873  Splits out the front and back at a separator.
1874 ****************************************************************************/
1875
1876 void split_at_last_component_w(smb_ucs2_t *path, smb_ucs2_t *front, smb_ucs2_t sep, smb_ucs2_t *back)
1877 {
1878     smb_ucs2_t *p = strrchr_w(path, sep);
1879
1880         if (p != NULL)
1881                 *p = 0;
1882
1883         if (front != NULL)
1884                 wpstrcpy(front, path);
1885
1886         if (p != NULL) {
1887                 if (back != NULL)
1888                         wpstrcpy(back, p+1);
1889                 *p = (smb_ucs2_t)'\\';
1890         } else {
1891                 if (back != NULL)
1892                         back[0] = 0;
1893         }
1894 }
1895
1896
1897 /****************************************************************************
1898  Write an octal as a string.
1899 ****************************************************************************/
1900
1901 smb_ucs2_t *octal_string_w(int i)
1902 {
1903         static smb_ucs2_t wret[64];
1904         char ret[64];
1905
1906         if (i == -1)
1907                 slprintf(ret, sizeof(ret), "-1");
1908         else
1909                 slprintf(ret, sizeof(ret), "0%o", i);
1910         return unix_to_unicode(wret, ret, sizeof(wret));
1911 }
1912
1913
1914 /****************************************************************************
1915  Truncate a string at a specified length.
1916  length is in ucs2 units.
1917 ****************************************************************************/
1918
1919 smb_ucs2_t *string_truncate_w(smb_ucs2_t *s, size_t length)
1920 {
1921         if (s && strlen_w(s) > length)
1922                 s[length] = 0;
1923
1924         return s;
1925 }