2 Unix SMB/Netbios implementation.
4 Samba utility functions
5 Copyright (C) Andrew Tridgell 1992-1998
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 extern int DEBUGLEVEL;
26 smb_ucs2_t wchar_list_sep[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)',',
27 (smb_ucs2_t)';', (smb_ucs2_t)':', (smb_ucs2_t)'\n',
28 (smb_ucs2_t)'\r', 0 };
30 * The following are the codepage to ucs2 and vica versa maps.
31 * These are dynamically loaded from a unicode translation file.
34 static smb_ucs2_t *doscp_to_ucs2;
35 static uint16 *ucs2_to_doscp;
37 static smb_ucs2_t *unixcp_to_ucs2;
38 static uint16 *ucs2_to_unixcp;
44 /*******************************************************************
45 Write a string in (little-endian) unicode format. src is in
46 the current DOS codepage. len is the length in bytes of the
47 string pointed to by dst.
49 if null_terminate is True then null terminate the packet (adds 2 bytes)
51 the return value is the length consumed by the string, including the
52 null termination if applied
53 ********************************************************************/
55 int dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
58 while (*src && (len > 2)) {
59 size_t skip = get_character_len(*src);
60 smb_ucs2_t val = (*src & 0xff);
63 * If this is a multibyte character (and all DOS/Windows
64 * codepages have at maximum 2 byte multibyte characters)
65 * then work out the index value for the unicode conversion.
69 val = ((val << 8) | (src[1] & 0xff));
71 SSVAL(dst,ret,doscp_to_ucs2[val]);
86 /*******************************************************************
87 Put an ASCII string into a UNICODE string.
89 Warning: doesn't do any codepage !!! BAD !!!
91 Help ! Fix Me ! Fix Me !
92 ********************************************************************/
94 void ascii_to_unistr(char *dest, const char *src, size_t maxlen)
96 char *destend = dest + maxlen;
99 while (dest < destend) {
103 SSVAL(dest, 0, (uint16)c);
109 /*******************************************************************
110 Pull a DOS codepage string out of a UNICODE array. len is in bytes.
111 ********************************************************************/
113 void unistr_to_dos(char *dest, char *src, size_t len)
115 char *destend = dest + len;
117 while (dest < destend) {
118 uint16 ucs2_val = SVAL(src,0);
119 uint16 cp_val = ucs2_to_doscp[ucs2_val];
127 *dest++ = (char)cp_val;
129 *dest++ = (cp_val >> 8) & 0xff;
130 *dest++ = (cp_val & 0xff);
137 /*******************************************************************
138 Skip past a unicode string, but not more than len. Always move
139 past a terminating zero if found.
140 ********************************************************************/
142 char *skip_unibuf(char *src, size_t len)
144 char *srcend = src + len;
146 while (src < srcend && SVAL(src,0))
155 /*******************************************************************
156 Return a DOS codepage version of a little-endian unicode string.
157 len is the filename length (ignoring any terminating zero) in uin16
158 units. Always null terminates.
159 Hack alert: uses fixed buffer(s).
160 ********************************************************************/
162 char *dos_unistrn2(uint16 *src, int len)
164 static char lbufs[8][MAXUNI];
166 char *lbuf = lbufs[nexti];
171 for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) {
172 uint16 ucs2_val = SVAL(src,0);
173 uint16 cp_val = ucs2_to_doscp[ucs2_val];
178 *p++ = (cp_val >> 8) & 0xff;
179 *p++ = (cp_val & 0xff);
187 static char lbufs[8][MAXUNI];
190 /*******************************************************************
191 Return a DOS codepage version of a little-endian unicode string.
192 Hack alert: uses fixed buffer(s).
193 ********************************************************************/
195 char *dos_unistr2(uint16 *src)
197 char *lbuf = lbufs[nexti];
202 for (p = lbuf; *src && (p-lbuf < MAXUNI-3); src++) {
203 uint16 ucs2_val = SVAL(src,0);
204 uint16 cp_val = ucs2_to_doscp[ucs2_val];
209 *p++ = (cp_val >> 8) & 0xff;
210 *p++ = (cp_val & 0xff);
218 /*******************************************************************
219 Return a DOS codepage version of a little-endian unicode string
220 ********************************************************************/
222 char *dos_unistr2_to_str(UNISTR2 *str)
224 char *lbuf = lbufs[nexti];
226 uint16 *src = str->buffer;
227 int max_size = MIN(sizeof(str->buffer)-3, str->uni_str_len);
231 for (p = lbuf; *src && p-lbuf < max_size; src++) {
232 uint16 ucs2_val = SVAL(src,0);
233 uint16 cp_val = ucs2_to_doscp[ucs2_val];
238 *p++ = (cp_val >> 8) & 0xff;
239 *p++ = (cp_val & 0xff);
247 /*******************************************************************
248 Convert a UNISTR2 structure to an ASCII string
249 Warning: this version does DOS codepage.
250 ********************************************************************/
252 void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen)
260 len = MIN(str->uni_str_len, maxlen);
261 destend = dest + len;
263 while (dest < destend)
274 ucs2_val = SVAL(src++,0);
275 cp_val = ucs2_to_doscp[ucs2_val];
278 *(dest++) = (char)cp_val;
280 *dest= (cp_val >> 8) & 0xff;
281 *(dest++) = (cp_val & 0xff);
289 /*******************************************************************
290 Return a number stored in a buffer
291 ********************************************************************/
293 uint32 buffer2_to_uint32(BUFFER2 *str)
295 if (str->buf_len == 4)
296 return IVAL(str->buffer, 0);
301 /*******************************************************************
302 Return a DOS codepage version of a NOTunicode string
303 ********************************************************************/
305 char *dos_buffer2_to_str(BUFFER2 *str)
307 char *lbuf = lbufs[nexti];
309 uint16 *src = str->buffer;
310 int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
314 for (p = lbuf; *src && p-lbuf < max_size; src++) {
315 uint16 ucs2_val = SVAL(src,0);
316 uint16 cp_val = ucs2_to_doscp[ucs2_val];
321 *p++ = (cp_val >> 8) & 0xff;
322 *p++ = (cp_val & 0xff);
330 /*******************************************************************
331 Return a dos codepage version of a NOTunicode string
332 ********************************************************************/
334 char *dos_buffer2_to_multistr(BUFFER2 *str)
336 char *lbuf = lbufs[nexti];
338 uint16 *src = str->buffer;
339 int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
343 for (p = lbuf; p-lbuf < max_size; src++) {
347 uint16 ucs2_val = SVAL(src,0);
348 uint16 cp_val = ucs2_to_doscp[ucs2_val];
353 *p++ = (cp_val >> 8) & 0xff;
354 *p++ = (cp_val & 0xff);
363 /*******************************************************************
364 Create a null-terminated unicode string from a null-terminated DOS
366 Return number of unicode chars copied, excluding the null character.
367 Unicode strings created are in little-endian format.
368 ********************************************************************/
370 size_t dos_struni2(char *dst, const char *src, size_t max_len)
378 for (; *src && len < max_len-2; len++, dst +=2) {
379 size_t skip = get_character_len(*src);
380 smb_ucs2_t val = (*src & 0xff);
383 * If this is a multibyte character (and all DOS/Windows
384 * codepages have at maximum 2 byte multibyte characters)
385 * then work out the index value for the unicode conversion.
389 val = ((val << 8) | (src[1] & 0xff));
391 SSVAL(dst,0,doscp_to_ucs2[val]);
404 /*******************************************************************
405 Return a DOS codepage version of a little-endian unicode string.
406 Hack alert: uses fixed buffer(s).
407 ********************************************************************/
409 char *dos_unistr(char *buf)
411 char *lbuf = lbufs[nexti];
412 uint16 *src = (uint16 *)buf;
417 for (p = lbuf; *src && p-lbuf < MAXUNI-3; src++) {
418 uint16 ucs2_val = SVAL(src,0);
419 uint16 cp_val = ucs2_to_doscp[ucs2_val];
424 *p++ = (cp_val >> 8) & 0xff;
425 *p++ = (cp_val & 0xff);
433 /*******************************************************************
434 Strcpy for unicode strings. returns length (in num of wide chars)
435 ********************************************************************/
437 int unistrcpy(char *dst, char *src)
440 uint16 *wsrc = (uint16 *)src;
441 uint16 *wdst = (uint16 *)dst;
454 /*******************************************************************
455 Free any existing maps.
456 ********************************************************************/
458 static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
460 /* this handles identity mappings where we share the pointer */
461 if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) {
462 *pp_ucs2_to_cp = NULL;
465 if (*pp_cp_to_ucs2) {
466 free(*pp_cp_to_ucs2);
467 *pp_cp_to_ucs2 = NULL;
470 if (*pp_ucs2_to_cp) {
471 free(*pp_ucs2_to_cp);
472 *pp_ucs2_to_cp = NULL;
477 /*******************************************************************
478 Build a default (null) codepage to unicode map.
479 ********************************************************************/
481 void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
485 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
487 if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) {
488 DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536));
492 *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */
493 for (i = 0; i < 65536; i++)
494 (*pp_cp_to_ucs2)[i] = i;
497 /*******************************************************************
498 Load a codepage to unicode and vica-versa map.
499 ********************************************************************/
501 BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
503 pstring unicode_map_file_name;
506 smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2;
507 uint16 *ucs2_to_cp = *pp_ucs2_to_cp;
508 size_t cp_to_ucs2_size;
509 size_t ucs2_to_cp_size;
512 char buf[UNICODE_MAP_HEADER_SIZE];
514 DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage));
516 if (*codepage == '\0')
519 if(strlen(CODEPAGEDIR) + 13 + strlen(codepage) > sizeof(unicode_map_file_name)) {
520 DEBUG(0,("load_unicode_map: filename too long to load\n"));
524 pstrcpy(unicode_map_file_name, CODEPAGEDIR);
525 pstrcat(unicode_map_file_name, "/");
526 pstrcat(unicode_map_file_name, "unicode_map.");
527 pstrcat(unicode_map_file_name, codepage);
529 if(sys_stat(unicode_map_file_name,&st)!=0) {
530 DEBUG(0,("load_unicode_map: filename %s does not exist.\n",
531 unicode_map_file_name));
537 if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) {
538 DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \
539 unicode map file (size=%d).\n", unicode_map_file_name, (int)size));
543 if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) {
544 DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n",
545 unicode_map_file_name, strerror(errno)));
549 if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) {
550 DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n",
551 unicode_map_file_name, strerror(errno)));
555 /* Check the version value */
556 if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) {
557 DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \
558 Needed %hu, got %hu.\n",
559 unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID,
560 SVAL(buf,UNICODE_MAP_VERSION_OFFSET)));
564 /* Check the codepage value */
565 if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) {
566 DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \
567 requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage ));
571 ucs2_to_cp_size = 2*65536;
572 if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) {
574 * This is a multibyte code page.
576 cp_to_ucs2_size = 2*65536;
579 * Single byte code page.
581 cp_to_ucs2_size = 2*256;
585 * Free any old translation tables.
588 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
590 if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) {
591 DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size ));
595 if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) {
596 DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size ));
600 if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) {
601 DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n",
602 unicode_map_file_name, strerror(errno)));
606 if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) {
607 DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n",
608 unicode_map_file_name, strerror(errno)));
613 * Now ensure the 16 bit values are in the correct endianness.
616 for (i = 0; i < cp_to_ucs2_size/2; i++)
617 cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2);
619 for (i = 0; i < ucs2_to_cp_size/2; i++)
620 ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2);
624 *pp_cp_to_ucs2 = cp_to_ucs2;
625 *pp_ucs2_to_cp = ucs2_to_cp;
631 /* pseudo destructor :-) */
636 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
638 default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp);
643 /*******************************************************************
644 Load a dos codepage to unicode and vica-versa map.
645 ********************************************************************/
647 BOOL load_dos_unicode_map(int codepage)
649 fstring codepage_str;
651 slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage);
652 return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp);
655 /*******************************************************************
656 Load a UNIX codepage to unicode and vica-versa map.
657 ********************************************************************/
659 BOOL load_unix_unicode_map(const char *unix_char_set)
661 fstring upper_unix_char_set;
663 fstrcpy(upper_unix_char_set, unix_char_set);
664 strupper(upper_unix_char_set);
665 return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
668 /*******************************************************************
669 The following functions reproduce many of the non-UNICODE standard
670 string functions in Samba.
671 ********************************************************************/
673 /*******************************************************************
674 Convert a UNICODE string to multibyte format. Note that the 'src' is in
675 native byte order, not little endian. Always zero terminates.
677 ********************************************************************/
679 static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src,
680 size_t dst_len, const uint16 *ucs2_to_cp)
684 for(dst_pos = 0; *src && (dst_pos < dst_len - 1);) {
685 smb_ucs2_t val = ucs2_to_cp[*src++];
687 dst[dst_pos++] = (char)val;
690 if(dst_pos >= dst_len - 2)
694 * A 2 byte value is always written as
695 * high/low into the buffer stream.
698 dst[dst_pos++] = (char)((val >> 8) & 0xff);
699 dst[dst_pos++] = (char)(val & 0xff);
708 /*******************************************************************
709 Convert a multibyte string to UNICODE format. Note that the 'dst' is in
710 native byte order, not little endian. Always zero terminates.
712 ********************************************************************/
714 smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src,
715 size_t dst_len, smb_ucs2_t *cp_to_ucs2)
719 dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */
721 for(i = 0; (i < (dst_len - 1)) && src[i];) {
722 size_t skip = skip_multibyte_char(*src);
723 smb_ucs2_t val = (*src & 0xff);
726 * If this is a multibyte character
727 * then work out the index value for the unicode conversion.
731 val = ((val << 8) | (src[1] & 0xff));
733 dst[i++] = cp_to_ucs2[val];
745 /*******************************************************************
746 Convert a UNICODE string to multibyte format. Note that the 'src' is in
747 native byte order, not little endian. Always zero terminates.
748 This function may be replaced if the MB codepage format is an
749 encoded one (ie. utf8, hex). See the code in lib/kanji.c
750 for details. dst_len is in bytes.
751 ********************************************************************/
753 char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len)
755 return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp);
758 /*******************************************************************
759 Convert a UNIX string to UNICODE format. Note that the 'dst' is in
760 native byte order, not little endian. Always zero terminates.
761 This function may be replaced if the UNIX codepage format is a
762 multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
763 for details. dst_len is in bytes, not ucs2 units.
764 ********************************************************************/
766 smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
768 return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2);
771 /*******************************************************************
772 Convert a UNICODE string to DOS format. Note that the 'src' is in
773 native byte order, not little endian. Always zero terminates.
775 ********************************************************************/
777 char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len)
779 return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp);
782 /*******************************************************************
783 Convert a single UNICODE character to DOS codepage. Returns the
784 number of bytes in the DOS codepage character.
785 ********************************************************************/
787 size_t unicode_to_dos_char(char *dst, const smb_ucs2_t src)
789 smb_ucs2_t val = ucs2_to_doscp[src];
795 * A 2 byte value is always written as
796 * high/low into the buffer stream.
799 dst[0] = (char)((val >> 8) & 0xff);
800 dst[1] = (char)(val & 0xff);
804 /*******************************************************************
805 Convert a DOS string to UNICODE format. Note that the 'dst' is in
806 native byte order, not little endian. Always zero terminates.
807 This function may be replaced if the DOS codepage format is a
808 multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
809 for details. dst_len is in bytes, not ucs2 units.
810 ********************************************************************/
812 smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
814 return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2);
817 /*******************************************************************
818 Count the number of characters in a smb_ucs2_t string.
819 ********************************************************************/
821 size_t strlen_w(const smb_ucs2_t *src)
825 for(len = 0; *src; len++)
831 /*******************************************************************
832 Safe wstring copy into a known length string. maxlength includes
833 the terminating zero. maxlength is in ucs2 units.
834 ********************************************************************/
836 smb_ucs2_t *safe_strcpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength)
841 DEBUG(0,("ERROR: NULL dest in safe_strcpy_w\n"));
850 maxlength /= sizeof(smb_ucs2_t);
852 ucs2_len = strlen_w(src);
854 if (ucs2_len >= maxlength) {
856 DEBUG(0,("ERROR: string overflow by %u bytes in safe_strcpy_w [%.50s]\n",
857 (unsigned int)((ucs2_len-maxlength)*sizeof(smb_ucs2_t)),
858 unicode_to_unix(out,src,sizeof(out))) );
859 ucs2_len = maxlength - 1;
862 memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t));
867 /*******************************************************************
868 Safe string cat into a string. maxlength includes the terminating zero.
869 maxlength is in ucs2 units.
870 ********************************************************************/
872 smb_ucs2_t *safe_strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
874 size_t ucs2_src_len, ucs2_dest_len;
877 DEBUG(0,("ERROR: NULL dest in safe_strcat_w\n"));
884 ucs2_src_len = strlen_w(src);
885 ucs2_dest_len = strlen_w(dest);
887 if (ucs2_src_len + ucs2_dest_len >= maxlength) {
889 int new_len = maxlength - ucs2_dest_len - 1;
890 DEBUG(0,("ERROR: string overflow by %u characters in safe_strcat_w [%.50s]\n",
891 (unsigned int)(sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len - maxlength)),
892 unicode_to_unix(out,src,sizeof(out))) );
893 ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0);
896 memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t));
897 dest[ucs2_dest_len + ucs2_src_len] = 0;
901 /*******************************************************************
902 Compare the two strings s1 and s2.
903 ********************************************************************/
905 int strcmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
922 /*******************************************************************
923 Compare the first n characters of s1 to s2. len is in ucs2 units.
924 ********************************************************************/
926 int strncmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2, size_t len)
930 for (; len != 0; --len) {
944 /*******************************************************************
945 Search string s2 from s1.
946 ********************************************************************/
948 smb_ucs2_t *strstr_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
950 size_t len = strlen_w(s2);
953 return (smb_ucs2_t *)s1;
957 if (strncmp_w(s1, s2, len) == 0)
958 return (smb_ucs2_t *)s1;
964 /*******************************************************************
965 Search for ucs2 char c from the beginning of s.
966 ********************************************************************/
968 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
972 return (smb_ucs2_t *)s;
978 /*******************************************************************
979 Search for ucs2 char c from the end of s.
980 ********************************************************************/
982 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
984 smb_ucs2_t *retval = 0;
988 retval = (smb_ucs2_t *)s;
994 /*******************************************************************
995 Search token from s1 separated by any ucs2 char of s2.
996 ********************************************************************/
998 smb_ucs2_t *strtok_w(smb_ucs2_t *s1, const smb_ucs2_t *s2)
1000 static smb_ucs2_t *s = NULL;
1009 for (q = s1; *s1; s1++) {
1010 smb_ucs2_t *p = strchr_w(s2, *s1);
1028 /*******************************************************************
1029 Duplicate a ucs2 string.
1030 ********************************************************************/
1032 smb_ucs2_t *strdup_w(const smb_ucs2_t *s)
1034 size_t newlen = (strlen_w(s)+1)*sizeof(smb_ucs2_t);
1035 smb_ucs2_t *newstr = (smb_ucs2_t *)malloc(newlen);
1038 safe_strcpy_w(newstr, s, newlen);
1042 /*******************************************************************
1043 Mapping tables for UNICODE character. Allows toupper/tolower and
1044 isXXX functions to work.
1046 tridge: split into 2 pieces. This saves us 5/6 of the memory
1047 with a small speed penalty
1048 The magic constants are the lower/upper range of the tables two
1050 ********************************************************************/
1055 unsigned char flags;
1056 } smb_unicode_table_t;
1058 #define TABLE1_BOUNDARY 9450
1059 #define TABLE2_BOUNDARY 64256
1061 static smb_unicode_table_t map_table1[] = {
1062 #include "unicode_map_table1.h"
1065 static smb_unicode_table_t map_table2[] = {
1066 #include "unicode_map_table2.h"
1069 static unsigned char map_table_flags(smb_ucs2_t v)
1071 if (v < TABLE1_BOUNDARY) return map_table1[v].flags;
1072 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].flags;
1076 static smb_ucs2_t map_table_lower(smb_ucs2_t v)
1078 if (v < TABLE1_BOUNDARY) return map_table1[v].lower;
1079 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].lower;
1083 static smb_ucs2_t map_table_upper(smb_ucs2_t v)
1085 if (v < TABLE1_BOUNDARY) return map_table1[v].upper;
1086 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].upper;
1090 /*******************************************************************
1091 Is an upper case wchar.
1092 ********************************************************************/
1094 int isupper_w( smb_ucs2_t val)
1096 return (map_table_flags(val) & UNI_UPPER);
1099 /*******************************************************************
1100 Is a lower case wchar.
1101 ********************************************************************/
1103 int islower_w( smb_ucs2_t val)
1105 return (map_table_flags(val) & UNI_LOWER);
1108 /*******************************************************************
1110 ********************************************************************/
1112 int isdigit_w( smb_ucs2_t val)
1114 return (map_table_flags(val) & UNI_DIGIT);
1117 /*******************************************************************
1118 Is a hex digit wchar.
1119 ********************************************************************/
1121 int isxdigit_w( smb_ucs2_t val)
1123 return (map_table_flags(val) & UNI_XDIGIT);
1126 /*******************************************************************
1128 ********************************************************************/
1130 int isspace_w( smb_ucs2_t val)
1132 return (map_table_flags(val) & UNI_SPACE);
1135 /*******************************************************************
1136 Convert a wchar to upper case.
1137 ********************************************************************/
1139 smb_ucs2_t toupper_w( smb_ucs2_t val )
1141 return map_table_upper(val);
1144 /*******************************************************************
1145 Convert a wchar to lower case.
1146 ********************************************************************/
1148 smb_ucs2_t tolower_w( smb_ucs2_t val )
1150 return map_table_lower(val);
1153 static smb_ucs2_t *last_ptr = NULL;
1155 void set_first_token_w(smb_ucs2_t *ptr)
1160 /****************************************************************************
1161 Get the next token from a string, return False if none found
1162 handles double-quotes.
1163 Based on a routine by GJC@VILLAGE.COM.
1164 Extensively modified by Andrew.Tridgell@anu.edu.au
1165 bufsize is in bytes.
1166 ****************************************************************************/
1168 static smb_ucs2_t sep_list[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)'\n', (smb_ucs2_t)'\r', 0};
1169 static smb_ucs2_t quotechar = (smb_ucs2_t)'\"';
1171 BOOL next_token_w(smb_ucs2_t **ptr, smb_ucs2_t *buff, smb_ucs2_t *sep, size_t bufsize)
1178 * Convert bufsize to smb_ucs2_t units.
1181 bufsize /= sizeof(smb_ucs2_t);
1191 * Default to simple separators.
1198 * Find the first non sep char.
1201 while(*s && strchr_w(sep,*s))
1212 * Copy over the token.
1215 for (quoted = False; len < bufsize && *s && (quoted || !strchr_w(sep,*s)); s++) {
1216 if (*s == quotechar) {
1224 *ptr = (*s) ? s+1 : s;
1231 /****************************************************************************
1232 Convert list of tokens to array; dependent on above routine.
1233 Uses last_ptr from above - bit of a hack.
1234 ****************************************************************************/
1236 smb_ucs2_t **toktocliplist_w(int *ctok, smb_ucs2_t *sep)
1238 smb_ucs2_t *s=last_ptr;
1240 smb_ucs2_t **ret, **iret;
1245 while(*s && strchr_w(sep,*s))
1257 while(*s && (!strchr_w(sep,*s)))
1259 while(*s && strchr_w(sep,*s))
1266 if (!(ret=iret=malloc(ictok*sizeof(smb_ucs2_t *))))
1280 /*******************************************************************
1281 Case insensitive string compararison.
1282 ********************************************************************/
1284 int StrCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t)
1287 * Compare until we run out of string, either t or s, or find a difference.
1290 while (*s && *t && toupper_w(*s) == toupper_w(*t)) {
1295 return(toupper_w(*s) - toupper_w(*t));
1298 /*******************************************************************
1299 Case insensitive string compararison, length limited.
1301 ********************************************************************/
1303 int StrnCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t, size_t n)
1306 * Compare until we run out of string, either t or s, or chars.
1309 while (n && *s && *t && toupper_w(*s) == toupper_w(*t)) {
1316 * Not run out of chars - strings are different lengths.
1320 return(toupper_w(*s) - toupper_w(*t));
1323 * Identical up to where we run out of chars,
1324 * and strings are same length.
1330 /*******************************************************************
1332 ********************************************************************/
1334 BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1341 return(StrCaseCmp_w(s1,s2)==0);
1344 /*******************************************************************
1345 Compare 2 strings up to and including the nth char. n is in ucs2
1347 ******************************************************************/
1349 BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
1353 if (!s1 || !s2 || !n)
1356 return(StrnCaseCmp_w(s1,s2,n)==0);
1359 /*******************************************************************
1360 Compare 2 strings (case sensitive).
1361 ********************************************************************/
1363 BOOL strcsequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2)
1370 return(strcmp_w(s1,s2)==0);
1373 /*******************************************************************
1374 Convert a string to lower case.
1375 ********************************************************************/
1377 void strlower_w(smb_ucs2_t *s)
1386 /*******************************************************************
1387 Convert a string to upper case.
1388 ********************************************************************/
1390 void strupper_w(smb_ucs2_t *s)
1399 /*******************************************************************
1400 Convert a string to "normal" form.
1401 ********************************************************************/
1403 void strnorm_w(smb_ucs2_t *s)
1405 extern int case_default;
1406 if (case_default == CASE_UPPER)
1412 /*******************************************************************
1413 Check if a string is in "normal" case.
1414 ********************************************************************/
1416 BOOL strisnormal_w(smb_ucs2_t *s)
1418 extern int case_default;
1419 if (case_default == CASE_UPPER)
1420 return(!strhaslower_w(s));
1422 return(!strhasupper_w(s));
1425 /****************************************************************************
1427 ****************************************************************************/
1429 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
1438 /*******************************************************************
1439 Skip past some strings in a buffer. n is in bytes.
1440 ********************************************************************/
1442 smb_ucs2_t *skip_string_w(smb_ucs2_t *buf,size_t n)
1445 buf += (strlen_w(buf)*sizeof(smb_ucs2_t)) + 1;
1449 /*******************************************************************
1450 Count the number of characters in a string. Same as strlen_w in
1451 smb_ucs2_t string units.
1452 ********************************************************************/
1454 size_t str_charnum_w(const smb_ucs2_t *s)
1459 /*******************************************************************
1460 Trim the specified elements off the front and back of a string.
1461 ********************************************************************/
1463 BOOL trim_string_w(smb_ucs2_t *s,const smb_ucs2_t *front,const smb_ucs2_t *back)
1466 size_t front_len = (front && *front) ? strlen_w(front) : 0;
1467 size_t back_len = (back && *back) ? strlen_w(back) : 0;
1470 while (front_len && strncmp_w(s, front, front_len) == 0) {
1475 if (!(*p = p[front_len]))
1482 s_len = strlen_w(s);
1483 while ((s_len >= back_len) &&
1484 (strncmp_w(s + s_len - back_len, back, back_len)==0)) {
1486 s[s_len - back_len] = 0;
1487 s_len = strlen_w(s);
1494 /****************************************************************************
1495 Does a string have any uppercase chars in it ?
1496 ****************************************************************************/
1498 BOOL strhasupper_w(const smb_ucs2_t *s)
1508 /****************************************************************************
1509 Does a string have any lowercase chars in it ?
1510 ****************************************************************************/
1512 BOOL strhaslower_w(const smb_ucs2_t *s)
1522 /****************************************************************************
1523 Find the number of 'c' chars in a string.
1524 ****************************************************************************/
1526 size_t count_chars_w(const smb_ucs2_t *s,smb_ucs2_t c)
1538 /*******************************************************************
1539 Return True if a string consists only of one particular character.
1540 ********************************************************************/
1542 BOOL str_is_all_w(const smb_ucs2_t *s,smb_ucs2_t c)
1557 /*******************************************************************
1558 Paranoid strcpy into a buffer of given length (includes terminating
1559 zero. Strips out all but 'a-Z0-9' and replaces with '_'. Deliberately
1560 does *NOT* check for multibyte characters. Don't change it !
1561 maxlength is in ucs2 units.
1562 ********************************************************************/
1564 smb_ucs2_t *alpha_strcpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
1569 DEBUG(0,("ERROR: NULL dest in alpha_strcpy_w\n"));
1578 len = strlen_w(src);
1579 if (len >= maxlength)
1580 len = maxlength - 1;
1582 for(i = 0; i < len; i++) {
1583 smb_ucs2_t val = src[i];
1584 if(isupper_w(val) ||islower_w(val) || isdigit_w(val))
1587 dest[i] = (smb_ucs2_t)'_';
1595 /****************************************************************************
1596 Like strncpy but always null terminates. Make sure there is room !
1597 The variable n should always be one less than the available size and is in
1599 ****************************************************************************/
1601 smb_ucs2_t *StrnCpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src,size_t n)
1603 smb_ucs2_t *d = dest;
1611 while (n-- && (*d++ = *src++))
1617 /****************************************************************************
1618 Like strncpy but copies up to the character marker. Always null terminates.
1619 returns a pointer to the character marker in the source string (src).
1621 ****************************************************************************/
1623 smb_ucs2_t *strncpyn_w(smb_ucs2_t *dest, const smb_ucs2_t *src,size_t n, smb_ucs2_t c)
1628 p = strchr_w(src, c);
1631 smb_ucs2_t mbcval[2];
1634 DEBUG(5, ("strncpyn_w: separator character (%s) not found\n",
1635 unicode_to_unix(cval,mbcval,sizeof(cval)) ));
1639 str_len = PTR_DIFF(p, src) + 1;
1640 safe_strcpy_w(dest, src, MIN(n, str_len));
1645 /*************************************************************
1646 Routine to get hex characters and turn them into a 16 byte array.
1647 The array can be variable length, and any non-hex-numeric
1648 characters are skipped. "0xnn" or "0Xnn" is specially catered
1649 for. len is in bytes.
1650 Valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
1651 **************************************************************/
1653 static smb_ucs2_t hexprefix[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'x', 0 };
1654 static smb_ucs2_t hexchars[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'1', (smb_ucs2_t)'2', (smb_ucs2_t)'3',
1655 (smb_ucs2_t)'4', (smb_ucs2_t)'5', (smb_ucs2_t)'6', (smb_ucs2_t)'7',
1656 (smb_ucs2_t)'8', (smb_ucs2_t)'9', (smb_ucs2_t)'A', (smb_ucs2_t)'B',
1657 (smb_ucs2_t)'C', (smb_ucs2_t)'D', (smb_ucs2_t)'E', (smb_ucs2_t)'F', 0 };
1659 size_t strhex_to_str_w(char *p, size_t len, const smb_ucs2_t *strhex)
1662 size_t num_chars = 0;
1663 unsigned char lonybble, hinybble;
1664 smb_ucs2_t *p1 = NULL, *p2 = NULL;
1667 * Convert to smb_ucs2_t units.
1670 len /= sizeof(smb_ucs2_t);
1672 for (i = 0; i < len && strhex[i] != 0; i++) {
1673 if (strnequal_w(hexchars, hexprefix, 2)) {
1674 i++; /* skip two chars */
1678 if (!(p1 = strchr_w(hexchars, toupper_w(strhex[i]))))
1681 i++; /* next hex digit */
1683 if (!(p2 = strchr_w(hexchars, toupper_w(strhex[i]))))
1686 /* get the two nybbles */
1687 hinybble = (PTR_DIFF(p1, hexchars)/sizeof(smb_ucs2_t));
1688 lonybble = (PTR_DIFF(p2, hexchars)/sizeof(smb_ucs2_t));
1690 p[num_chars] = (hinybble << 4) | lonybble;
1699 /****************************************************************************
1700 Check if a string is part of a list.
1701 ****************************************************************************/
1703 BOOL in_list_w(smb_ucs2_t *s,smb_ucs2_t *list,BOOL casesensitive)
1711 while (next_token_w(&p,tok,LIST_SEP_W,sizeof(tok))) {
1712 if (casesensitive) {
1713 if (strcmp_w(tok,s) == 0)
1716 if (StrCaseCmp_w(tok,s) == 0)
1723 /* This is used to prevent lots of mallocs of size 2 */
1724 static smb_ucs2_t *null_string = NULL;
1726 /****************************************************************************
1727 Set a string value, allocing the space for the string.
1728 ****************************************************************************/
1730 BOOL string_init_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1735 if((null_string = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t))) == NULL) {
1736 DEBUG(0,("string_init_w: malloc fail for null_string.\n"));
1748 *dest = null_string;
1750 (*dest) = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t)*(l+1));
1751 if ((*dest) == NULL) {
1752 DEBUG(0,("Out of memory in string_init_w\n"));
1756 wpstrcpy(*dest,src);
1761 /****************************************************************************
1762 Free a string value.
1763 ****************************************************************************/
1765 void string_free_w(smb_ucs2_t **s)
1769 if (*s == null_string)
1776 /****************************************************************************
1777 Set a string value, allocing the space for the string, and deallocating any
1779 ****************************************************************************/
1781 BOOL string_set_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1783 string_free_w(dest);
1785 return(string_init_w(dest,src));
1788 /****************************************************************************
1789 Substitute a string for a pattern in another string. Make sure there is
1792 This routine looks for pattern in s and replaces it with
1793 insert. It may do multiple replacements.
1795 Any of " ; ' $ or ` in the insert string are replaced with _
1796 if len==0 then no length check is performed
1797 len is in ucs2 units.
1798 ****************************************************************************/
1800 void string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1803 ssize_t ls,lp,li, i;
1805 if (!insert || !pattern || !s)
1808 ls = (ssize_t)strlen_w(s);
1809 lp = (ssize_t)strlen_w(pattern);
1810 li = (ssize_t)strlen_w(insert);
1815 while (lp <= ls && (p = strstr_w(s,pattern))) {
1816 if (len && (ls + (li-lp) >= len)) {
1818 DEBUG(0,("ERROR: string overflow by %d in string_sub_w(%.50s, %d)\n",
1819 (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1820 unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1824 memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1826 for (i=0;i<li;i++) {
1827 switch (insert[i]) {
1828 case (smb_ucs2_t)'`':
1829 case (smb_ucs2_t)'"':
1830 case (smb_ucs2_t)'\'':
1831 case (smb_ucs2_t)';':
1832 case (smb_ucs2_t)'$':
1833 case (smb_ucs2_t)'%':
1834 case (smb_ucs2_t)'\r':
1835 case (smb_ucs2_t)'\n':
1836 p[i] = (smb_ucs2_t)'_';
1847 void fstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert)
1849 string_sub_w(s, pattern, insert, sizeof(wfstring));
1852 void pstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,smb_ucs2_t *insert)
1854 string_sub_w(s, pattern, insert, sizeof(wpstring));
1857 /****************************************************************************
1858 Similar to string_sub() but allows for any character to be substituted.
1860 if len==0 then no length check is performed.
1861 ****************************************************************************/
1863 void all_string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1868 if (!insert || !pattern || !s)
1871 ls = (ssize_t)strlen_w(s);
1872 lp = (ssize_t)strlen_w(pattern);
1873 li = (ssize_t)strlen_w(insert);
1878 while (lp <= ls && (p = strstr_w(s,pattern))) {
1879 if (len && (ls + (li-lp) >= len)) {
1881 DEBUG(0,("ERROR: string overflow by %d in all_string_sub_w(%.50s, %d)\n",
1882 (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1883 unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1887 memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1889 memcpy(p, insert, li*sizeof(smb_ucs2_t));
1895 /****************************************************************************
1896 Splits out the front and back at a separator.
1897 ****************************************************************************/
1899 void split_at_last_component_w(smb_ucs2_t *path, smb_ucs2_t *front, smb_ucs2_t sep, smb_ucs2_t *back)
1901 smb_ucs2_t *p = strrchr_w(path, sep);
1907 wpstrcpy(front, path);
1911 wpstrcpy(back, p+1);
1912 *p = (smb_ucs2_t)'\\';
1920 /****************************************************************************
1921 Write an octal as a string.
1922 ****************************************************************************/
1924 smb_ucs2_t *octal_string_w(int i)
1926 static smb_ucs2_t wret[64];
1930 slprintf(ret, sizeof(ret), "-1");
1932 slprintf(ret, sizeof(ret), "0%o", i);
1933 return unix_to_unicode(wret, ret, sizeof(wret));
1937 /****************************************************************************
1938 Truncate a string at a specified length.
1939 length is in ucs2 units.
1940 ****************************************************************************/
1942 smb_ucs2_t *string_truncate_w(smb_ucs2_t *s, size_t length)
1944 if (s && strlen_w(s) > length)