2 Unix SMB/Netbios implementation.
4 Samba utility functions
5 Copyright (C) Andrew Tridgell 1992-1998
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 extern int DEBUGLEVEL;
26 smb_ucs2_t wchar_list_sep[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)',',
27 (smb_ucs2_t)';', (smb_ucs2_t)':', (smb_ucs2_t)'\n',
28 (smb_ucs2_t)'\r', 0 };
30 * The following are the codepage to ucs2 and vica versa maps.
31 * These are dynamically loaded from a unicode translation file.
34 static smb_ucs2_t *doscp_to_ucs2;
35 static uint16 *ucs2_to_doscp;
37 static smb_ucs2_t *unixcp_to_ucs2;
38 static uint16 *ucs2_to_unixcp;
44 /*******************************************************************
45 Write a string in (little-endian) unicode format. src is in
46 the current DOS codepage. len is the length in bytes of the
47 string pointed to by dst.
49 if null_terminate is True then null terminate the packet (adds 2 bytes)
51 the return value is the length in bytes consumed by the string, including the
52 null termination if applied
53 ********************************************************************/
55 size_t dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
58 while (*src && (len >= 2)) {
59 size_t skip = get_character_len(*src);
60 smb_ucs2_t val = (*src & 0xff);
63 * If this is a multibyte character (and all DOS/Windows
64 * codepages have at maximum 2 byte multibyte characters)
65 * then work out the index value for the unicode conversion.
69 val = ((val << 8) | (src[1] & 0xff));
71 SSVAL(dst,ret,doscp_to_ucs2[val]);
86 /*******************************************************************
87 Pull a DOS codepage string out of a UNICODE array. len is in bytes.
88 ********************************************************************/
90 void unistr_to_dos(char *dest, const char *src, size_t len)
92 char *destend = dest + len;
94 while (dest < destend) {
95 uint16 ucs2_val = SVAL(src,0);
96 uint16 cp_val = ucs2_to_doscp[ucs2_val];
104 *dest++ = (char)cp_val;
106 *dest++ = (cp_val >> 8) & 0xff;
107 *dest++ = (cp_val & 0xff);
114 /*******************************************************************
115 Skip past a unicode string, but not more than len. Always move
116 past a terminating zero if found.
117 ********************************************************************/
119 char *skip_unibuf(char *src, size_t len)
121 char *srcend = src + len;
123 while (src < srcend && SVAL(src,0))
132 /*******************************************************************
133 Return a DOS codepage version of a little-endian unicode string.
134 len is the filename length (ignoring any terminating zero) in uin16
135 units. Always null terminates.
136 Hack alert: uses fixed buffer(s).
137 ********************************************************************/
139 char *dos_unistrn2(uint16 *src, int len)
141 static char lbufs[8][MAXUNI];
143 char *lbuf = lbufs[nexti];
148 for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) {
149 uint16 ucs2_val = SVAL(src,0);
150 uint16 cp_val = ucs2_to_doscp[ucs2_val];
155 *p++ = (cp_val >> 8) & 0xff;
156 *p++ = (cp_val & 0xff);
164 static char lbufs[8][MAXUNI];
167 /*******************************************************************
168 Return a DOS codepage version of a little-endian unicode string.
169 Hack alert: uses fixed buffer(s).
170 ********************************************************************/
172 char *dos_unistr2(uint16 *src)
174 char *lbuf = lbufs[nexti];
179 for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) {
180 uint16 ucs2_val = SVAL(src,0);
181 uint16 cp_val = ucs2_to_doscp[ucs2_val];
186 *p++ = (cp_val >> 8) & 0xff;
187 *p++ = (cp_val & 0xff);
195 /*******************************************************************
196 Return a DOS codepage version of a little-endian unicode string
197 ********************************************************************/
199 char *dos_unistr2_to_str(UNISTR2 *str)
201 char *lbuf = lbufs[nexti];
203 uint16 *src = str->buffer;
204 int max_size = MIN(MAXUNI-3, str->uni_str_len);
208 for (p = lbuf; (p-lbuf < max_size) && *src; src++) {
209 uint16 ucs2_val = SVAL(src,0);
210 uint16 cp_val = ucs2_to_doscp[ucs2_val];
215 *p++ = (cp_val >> 8) & 0xff;
216 *p++ = (cp_val & 0xff);
224 /*******************************************************************
225 Put an ASCII string into a UNICODE array (uint16's).
226 ********************************************************************/
227 void ascii_to_unistr(uint16 *dest, const char *src, int maxlen)
229 uint16 *destend = dest + maxlen;
232 while (dest < destend)
240 *(dest++) = (uint16)c;
247 /*******************************************************************
248 Pull an ASCII string out of a UNICODE array (uint16's).
249 ********************************************************************/
251 void unistr_to_ascii(char *dest, const uint16 *src, int len)
253 char *destend = dest + len;
262 /* normal code path for a valid 'src' */
263 while (dest < destend)
278 /*******************************************************************
279 Convert a UNISTR2 structure to an ASCII string
280 Warning: this version does DOS codepage.
281 ********************************************************************/
283 void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen)
295 len = MIN(str->uni_str_len, maxlen);
302 for (p = dest; (p-dest < len) && *src; src++) {
303 uint16 ucs2_val = SVAL(src,0);
304 uint16 cp_val = ucs2_to_doscp[ucs2_val];
309 *p = (cp_val >> 8) & 0xff;
310 *p++ = (cp_val & 0xff);
318 /*******************************************************************
319 Return a number stored in a buffer
320 ********************************************************************/
322 uint32 buffer2_to_uint32(BUFFER2 *str)
324 if (str->buf_len == 4)
325 return IVAL(str->buffer, 0);
330 /*******************************************************************
331 Return a DOS codepage version of a NOTunicode string
332 ********************************************************************/
334 char *dos_buffer2_to_str(BUFFER2 *str)
336 char *lbuf = lbufs[nexti];
338 uint16 *src = str->buffer;
339 int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
343 for (p = lbuf; (p-lbuf < max_size) && *src; src++) {
344 uint16 ucs2_val = SVAL(src,0);
345 uint16 cp_val = ucs2_to_doscp[ucs2_val];
350 *p++ = (cp_val >> 8) & 0xff;
351 *p++ = (cp_val & 0xff);
359 /*******************************************************************
360 Return a dos codepage version of a NOTunicode string
361 ********************************************************************/
363 char *dos_buffer2_to_multistr(BUFFER2 *str)
365 char *lbuf = lbufs[nexti];
367 uint16 *src = str->buffer;
368 int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
372 for (p = lbuf; p-lbuf < max_size; src++) {
376 uint16 ucs2_val = SVAL(src,0);
377 uint16 cp_val = ucs2_to_doscp[ucs2_val];
382 *p++ = (cp_val >> 8) & 0xff;
383 *p++ = (cp_val & 0xff);
392 /*******************************************************************
393 Create a null-terminated unicode string from a null-terminated DOS
395 Return number of unicode chars copied, excluding the null character.
396 Unicode strings created are in little-endian format.
397 ********************************************************************/
399 size_t dos_struni2(char *dst, const char *src, size_t max_len)
407 for (; (len < max_len-2) && *src; len++, dst +=2) {
408 size_t skip = get_character_len(*src);
409 smb_ucs2_t val = (*src & 0xff);
412 * If this is a multibyte character (and all DOS/Windows
413 * codepages have at maximum 2 byte multibyte characters)
414 * then work out the index value for the unicode conversion.
418 val = ((val << 8) | (src[1] & 0xff));
420 SSVAL(dst,0,doscp_to_ucs2[val]);
433 /*******************************************************************
434 Return a DOS codepage version of a little-endian unicode string.
435 Hack alert: uses fixed buffer(s).
436 ********************************************************************/
438 char *dos_unistr(char *buf)
440 char *lbuf = lbufs[nexti];
441 uint16 *src = (uint16 *)buf;
446 for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) {
447 uint16 ucs2_val = SVAL(src,0);
448 uint16 cp_val = ucs2_to_doscp[ucs2_val];
453 *p++ = (cp_val >> 8) & 0xff;
454 *p++ = (cp_val & 0xff);
462 /*******************************************************************
463 Strcpy for unicode strings. returns length (in num of wide chars)
464 ********************************************************************/
466 int unistrcpy(char *dst, char *src)
469 uint16 *wsrc = (uint16 *)src;
470 uint16 *wdst = (uint16 *)dst;
483 /*******************************************************************
484 Free any existing maps.
485 ********************************************************************/
487 static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
489 /* this handles identity mappings where we share the pointer */
490 if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) {
491 *pp_ucs2_to_cp = NULL;
494 if (*pp_cp_to_ucs2) {
495 free(*pp_cp_to_ucs2);
496 *pp_cp_to_ucs2 = NULL;
499 if (*pp_ucs2_to_cp) {
500 free(*pp_ucs2_to_cp);
501 *pp_ucs2_to_cp = NULL;
506 /*******************************************************************
507 Build a default (null) codepage to unicode map.
508 ********************************************************************/
510 void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
514 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
516 if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) {
517 DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536));
521 *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */
522 for (i = 0; i < 65536; i++)
523 (*pp_cp_to_ucs2)[i] = i;
526 /*******************************************************************
527 Load a codepage to unicode and vica-versa map.
528 ********************************************************************/
530 BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
532 pstring unicode_map_file_name;
535 smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2;
536 uint16 *ucs2_to_cp = *pp_ucs2_to_cp;
537 size_t cp_to_ucs2_size;
538 size_t ucs2_to_cp_size;
541 char buf[UNICODE_MAP_HEADER_SIZE];
543 DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage));
545 if (*codepage == '\0')
548 if(strlen(lp_codepagedir()) + 13 + strlen(codepage) >
549 sizeof(unicode_map_file_name)) {
550 DEBUG(0,("load_unicode_map: filename too long to load\n"));
554 pstrcpy(unicode_map_file_name, lp_codepagedir());
555 pstrcat(unicode_map_file_name, "/");
556 pstrcat(unicode_map_file_name, "unicode_map.");
557 pstrcat(unicode_map_file_name, codepage);
559 if(sys_stat(unicode_map_file_name,&st)!=0) {
560 DEBUG(0,("load_unicode_map: filename %s does not exist.\n",
561 unicode_map_file_name));
567 if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) {
568 DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \
569 unicode map file (size=%d).\n", unicode_map_file_name, (int)size));
573 if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) {
574 DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n",
575 unicode_map_file_name, strerror(errno)));
579 if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) {
580 DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n",
581 unicode_map_file_name, strerror(errno)));
585 /* Check the version value */
586 if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) {
587 DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \
588 Needed %hu, got %hu.\n",
589 unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID,
590 SVAL(buf,UNICODE_MAP_VERSION_OFFSET)));
594 /* Check the codepage value */
595 if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) {
596 DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \
597 requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage ));
601 ucs2_to_cp_size = 2*65536;
602 if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) {
604 * This is a multibyte code page.
606 cp_to_ucs2_size = 2*65536;
609 * Single byte code page.
611 cp_to_ucs2_size = 2*256;
615 * Free any old translation tables.
618 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
620 if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) {
621 DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size ));
625 if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) {
626 DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size ));
630 if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) {
631 DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n",
632 unicode_map_file_name, strerror(errno)));
636 if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) {
637 DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n",
638 unicode_map_file_name, strerror(errno)));
643 * Now ensure the 16 bit values are in the correct endianness.
646 for (i = 0; i < cp_to_ucs2_size/2; i++)
647 cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2);
649 for (i = 0; i < ucs2_to_cp_size/2; i++)
650 ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2);
654 *pp_cp_to_ucs2 = cp_to_ucs2;
655 *pp_ucs2_to_cp = ucs2_to_cp;
661 /* pseudo destructor :-) */
666 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
668 default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp);
673 /*******************************************************************
674 Load a dos codepage to unicode and vica-versa map.
675 ********************************************************************/
677 BOOL load_dos_unicode_map(int codepage)
679 fstring codepage_str;
681 slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage);
682 return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp);
685 /*******************************************************************
686 Load a UNIX codepage to unicode and vica-versa map.
687 ********************************************************************/
689 BOOL load_unix_unicode_map(const char *unix_char_set)
691 fstring upper_unix_char_set;
693 fstrcpy(upper_unix_char_set, unix_char_set);
694 strupper(upper_unix_char_set);
695 return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
698 /*******************************************************************
699 The following functions reproduce many of the non-UNICODE standard
700 string functions in Samba.
701 ********************************************************************/
703 /*******************************************************************
704 Convert a UNICODE string to multibyte format. Note that the 'src' is in
705 native byte order, not little endian. Always zero terminates.
707 ********************************************************************/
709 static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src,
710 size_t dst_len, const uint16 *ucs2_to_cp)
714 for(dst_pos = 0; (dst_pos < dst_len - 1) && *src;) {
715 smb_ucs2_t val = ucs2_to_cp[*src++];
717 dst[dst_pos++] = (char)val;
720 if(dst_pos >= dst_len - 2)
724 * A 2 byte value is always written as
725 * high/low into the buffer stream.
728 dst[dst_pos++] = (char)((val >> 8) & 0xff);
729 dst[dst_pos++] = (char)(val & 0xff);
738 /*******************************************************************
739 Convert a multibyte string to UNICODE format. Note that the 'dst' is in
740 native byte order, not little endian. Always zero terminates.
742 ********************************************************************/
744 smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src,
745 size_t dst_len, smb_ucs2_t *cp_to_ucs2)
749 dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */
751 for(i = 0; (i < (dst_len - 1)) && src[i];) {
752 size_t skip = skip_multibyte_char(*src);
753 smb_ucs2_t val = (*src & 0xff);
756 * If this is a multibyte character
757 * then work out the index value for the unicode conversion.
761 val = ((val << 8) | (src[1] & 0xff));
763 dst[i++] = cp_to_ucs2[val];
775 /*******************************************************************
776 Convert a UNICODE string to multibyte format. Note that the 'src' is in
777 native byte order, not little endian. Always zero terminates.
778 This function may be replaced if the MB codepage format is an
779 encoded one (ie. utf8, hex). See the code in lib/kanji.c
780 for details. dst_len is in bytes.
781 ********************************************************************/
783 char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len)
785 return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp);
788 /*******************************************************************
789 Convert a UNIX string to UNICODE format. Note that the 'dst' is in
790 native byte order, not little endian. Always zero terminates.
791 This function may be replaced if the UNIX codepage format is a
792 multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
793 for details. dst_len is in bytes, not ucs2 units.
794 ********************************************************************/
796 smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
798 return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2);
801 /*******************************************************************
802 Convert a UNICODE string to DOS format. Note that the 'src' is in
803 native byte order, not little endian. Always zero terminates.
805 ********************************************************************/
807 char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len)
809 return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp);
812 /*******************************************************************
813 Convert a single UNICODE character to DOS codepage. Returns the
814 number of bytes in the DOS codepage character.
815 ********************************************************************/
817 size_t unicode_to_dos_char(char *dst, const smb_ucs2_t src)
819 smb_ucs2_t val = ucs2_to_doscp[src];
825 * A 2 byte value is always written as
826 * high/low into the buffer stream.
829 dst[0] = (char)((val >> 8) & 0xff);
830 dst[1] = (char)(val & 0xff);
834 /*******************************************************************
835 Convert a DOS string to UNICODE format. Note that the 'dst' is in
836 native byte order, not little endian. Always zero terminates.
837 This function may be replaced if the DOS codepage format is a
838 multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
839 for details. dst_len is in bytes, not ucs2 units.
840 ********************************************************************/
842 smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
844 return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2);
847 /*******************************************************************
848 Count the number of characters in a smb_ucs2_t string.
849 ********************************************************************/
851 size_t strlen_w(const smb_ucs2_t *src)
855 for(len = 0; *src; len++)
861 /*******************************************************************
862 Safe wstring copy into a known length string. maxlength includes
863 the terminating zero. maxlength is in ucs2 units.
864 ********************************************************************/
866 smb_ucs2_t *safe_strcpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength)
871 DEBUG(0,("ERROR: NULL dest in safe_strcpy_w\n"));
880 maxlength /= sizeof(smb_ucs2_t);
882 ucs2_len = strlen_w(src);
884 if (ucs2_len >= maxlength) {
886 DEBUG(0,("ERROR: string overflow by %u bytes in safe_strcpy_w [%.50s]\n",
887 (unsigned int)((ucs2_len-maxlength)*sizeof(smb_ucs2_t)),
888 unicode_to_unix(out,src,sizeof(out))) );
889 ucs2_len = maxlength - 1;
892 memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t));
897 /*******************************************************************
898 Safe string cat into a string. maxlength includes the terminating zero.
899 maxlength is in ucs2 units.
900 ********************************************************************/
902 smb_ucs2_t *safe_strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
904 size_t ucs2_src_len, ucs2_dest_len;
907 DEBUG(0,("ERROR: NULL dest in safe_strcat_w\n"));
914 ucs2_src_len = strlen_w(src);
915 ucs2_dest_len = strlen_w(dest);
917 if (ucs2_src_len + ucs2_dest_len >= maxlength) {
919 int new_len = maxlength - ucs2_dest_len - 1;
920 DEBUG(0,("ERROR: string overflow by %u characters in safe_strcat_w [%.50s]\n",
921 (unsigned int)(sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len - maxlength)),
922 unicode_to_unix(out,src,sizeof(out))) );
923 ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0);
926 memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t));
927 dest[ucs2_dest_len + ucs2_src_len] = 0;
931 /*******************************************************************
932 Compare the two strings s1 and s2.
933 ********************************************************************/
935 int strcmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
952 /*******************************************************************
953 Compare the first n characters of s1 to s2. len is in ucs2 units.
954 ********************************************************************/
956 int strncmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2, size_t len)
960 for (; len != 0; --len) {
974 /*******************************************************************
975 Search string s2 from s1.
976 ********************************************************************/
978 smb_ucs2_t *strstr_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
980 size_t len = strlen_w(s2);
983 return (smb_ucs2_t *)s1;
987 if (strncmp_w(s1, s2, len) == 0)
988 return (smb_ucs2_t *)s1;
994 /*******************************************************************
995 Search for ucs2 char c from the beginning of s.
996 ********************************************************************/
998 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
1002 return (smb_ucs2_t *)s;
1008 /*******************************************************************
1009 Search for ucs2 char c from the end of s.
1010 ********************************************************************/
1012 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
1014 smb_ucs2_t *retval = 0;
1018 retval = (smb_ucs2_t *)s;
1024 /*******************************************************************
1025 Search token from s1 separated by any ucs2 char of s2.
1026 ********************************************************************/
1028 smb_ucs2_t *strtok_w(smb_ucs2_t *s1, const smb_ucs2_t *s2)
1030 static smb_ucs2_t *s = NULL;
1039 for (q = s1; *s1; s1++) {
1040 smb_ucs2_t *p = strchr_w(s2, *s1);
1058 /*******************************************************************
1059 Duplicate a ucs2 string.
1060 ********************************************************************/
1062 smb_ucs2_t *strdup_w(const smb_ucs2_t *s)
1064 size_t newlen = (strlen_w(s)+1)*sizeof(smb_ucs2_t);
1065 smb_ucs2_t *newstr = (smb_ucs2_t *)malloc(newlen);
1068 safe_strcpy_w(newstr, s, newlen);
1072 /*******************************************************************
1073 Mapping tables for UNICODE character. Allows toupper/tolower and
1074 isXXX functions to work.
1076 tridge: split into 2 pieces. This saves us 5/6 of the memory
1077 with a small speed penalty
1078 The magic constants are the lower/upper range of the tables two
1080 ********************************************************************/
1085 unsigned char flags;
1086 } smb_unicode_table_t;
1088 #define TABLE1_BOUNDARY 9450
1089 #define TABLE2_BOUNDARY 64256
1091 static smb_unicode_table_t map_table1[] = {
1092 #include "unicode_map_table1.h"
1095 static smb_unicode_table_t map_table2[] = {
1096 #include "unicode_map_table2.h"
1099 static unsigned char map_table_flags(smb_ucs2_t v)
1101 if (v < TABLE1_BOUNDARY) return map_table1[v].flags;
1102 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].flags;
1106 static smb_ucs2_t map_table_lower(smb_ucs2_t v)
1108 if (v < TABLE1_BOUNDARY) return map_table1[v].lower;
1109 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].lower;
1113 static smb_ucs2_t map_table_upper(smb_ucs2_t v)
1115 if (v < TABLE1_BOUNDARY) return map_table1[v].upper;
1116 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].upper;
1120 /*******************************************************************
1121 Is an upper case wchar.
1122 ********************************************************************/
1124 int isupper_w( smb_ucs2_t val)
1126 return (map_table_flags(val) & UNI_UPPER);
1129 /*******************************************************************
1130 Is a lower case wchar.
1131 ********************************************************************/
1133 int islower_w( smb_ucs2_t val)
1135 return (map_table_flags(val) & UNI_LOWER);
1138 /*******************************************************************
1140 ********************************************************************/
1142 int isdigit_w( smb_ucs2_t val)
1144 return (map_table_flags(val) & UNI_DIGIT);
1147 /*******************************************************************
1148 Is a hex digit wchar.
1149 ********************************************************************/
1151 int isxdigit_w( smb_ucs2_t val)
1153 return (map_table_flags(val) & UNI_XDIGIT);
1156 /*******************************************************************
1158 ********************************************************************/
1160 int isspace_w( smb_ucs2_t val)
1162 return (map_table_flags(val) & UNI_SPACE);
1165 /*******************************************************************
1166 Convert a wchar to upper case.
1167 ********************************************************************/
1169 smb_ucs2_t toupper_w( smb_ucs2_t val )
1171 return map_table_upper(val);
1174 /*******************************************************************
1175 Convert a wchar to lower case.
1176 ********************************************************************/
1178 smb_ucs2_t tolower_w( smb_ucs2_t val )
1180 return map_table_lower(val);
1183 static smb_ucs2_t *last_ptr = NULL;
1185 void set_first_token_w(smb_ucs2_t *ptr)
1190 /****************************************************************************
1191 Get the next token from a string, return False if none found
1192 handles double-quotes.
1193 Based on a routine by GJC@VILLAGE.COM.
1194 Extensively modified by Andrew.Tridgell@anu.edu.au
1195 bufsize is in bytes.
1196 ****************************************************************************/
1198 static smb_ucs2_t sep_list[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)'\n', (smb_ucs2_t)'\r', 0};
1199 static smb_ucs2_t quotechar = (smb_ucs2_t)'\"';
1201 BOOL next_token_w(smb_ucs2_t **ptr, smb_ucs2_t *buff, smb_ucs2_t *sep, size_t bufsize)
1208 * Convert bufsize to smb_ucs2_t units.
1211 bufsize /= sizeof(smb_ucs2_t);
1221 * Default to simple separators.
1228 * Find the first non sep char.
1231 while(*s && strchr_w(sep,*s))
1242 * Copy over the token.
1245 for (quoted = False; len < bufsize && *s && (quoted || !strchr_w(sep,*s)); s++) {
1246 if (*s == quotechar) {
1254 *ptr = (*s) ? s+1 : s;
1261 /****************************************************************************
1262 Convert list of tokens to array; dependent on above routine.
1263 Uses last_ptr from above - bit of a hack.
1264 ****************************************************************************/
1266 smb_ucs2_t **toktocliplist_w(int *ctok, smb_ucs2_t *sep)
1268 smb_ucs2_t *s=last_ptr;
1270 smb_ucs2_t **ret, **iret;
1275 while(*s && strchr_w(sep,*s))
1287 while(*s && (!strchr_w(sep,*s)))
1289 while(*s && strchr_w(sep,*s))
1296 if (!(ret=iret=malloc(ictok*sizeof(smb_ucs2_t *))))
1310 /*******************************************************************
1311 Case insensitive string compararison.
1312 ********************************************************************/
1314 int StrCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t)
1317 * Compare until we run out of string, either t or s, or find a difference.
1320 while (*s && *t && toupper_w(*s) == toupper_w(*t)) {
1325 return(toupper_w(*s) - toupper_w(*t));
1328 /*******************************************************************
1329 Case insensitive string compararison, length limited.
1331 ********************************************************************/
1333 int StrnCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t, size_t n)
1336 * Compare until we run out of string, either t or s, or chars.
1339 while (n && *s && *t && toupper_w(*s) == toupper_w(*t)) {
1346 * Not run out of chars - strings are different lengths.
1350 return(toupper_w(*s) - toupper_w(*t));
1353 * Identical up to where we run out of chars,
1354 * and strings are same length.
1360 /*******************************************************************
1362 ********************************************************************/
1364 BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1371 return(StrCaseCmp_w(s1,s2)==0);
1374 /*******************************************************************
1375 Compare 2 strings up to and including the nth char. n is in ucs2
1377 ******************************************************************/
1379 BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
1383 if (!s1 || !s2 || !n)
1386 return(StrnCaseCmp_w(s1,s2,n)==0);
1389 /*******************************************************************
1390 Compare 2 strings (case sensitive).
1391 ********************************************************************/
1393 BOOL strcsequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2)
1400 return(strcmp_w(s1,s2)==0);
1403 /*******************************************************************
1404 Convert a string to lower case.
1405 ********************************************************************/
1407 void strlower_w(smb_ucs2_t *s)
1416 /*******************************************************************
1417 Convert a string to upper case.
1418 ********************************************************************/
1420 void strupper_w(smb_ucs2_t *s)
1429 /*******************************************************************
1430 Convert a string to "normal" form.
1431 ********************************************************************/
1433 void strnorm_w(smb_ucs2_t *s)
1435 extern int case_default;
1436 if (case_default == CASE_UPPER)
1442 /*******************************************************************
1443 Check if a string is in "normal" case.
1444 ********************************************************************/
1446 BOOL strisnormal_w(smb_ucs2_t *s)
1448 extern int case_default;
1449 if (case_default == CASE_UPPER)
1450 return(!strhaslower_w(s));
1452 return(!strhasupper_w(s));
1455 /****************************************************************************
1457 ****************************************************************************/
1459 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
1468 /*******************************************************************
1469 Skip past some strings in a buffer. n is in bytes.
1470 ********************************************************************/
1472 smb_ucs2_t *skip_string_w(smb_ucs2_t *buf,size_t n)
1475 buf += (strlen_w(buf)*sizeof(smb_ucs2_t)) + 1;
1479 /*******************************************************************
1480 Count the number of characters in a string. Same as strlen_w in
1481 smb_ucs2_t string units.
1482 ********************************************************************/
1484 size_t str_charnum_w(const smb_ucs2_t *s)
1489 /*******************************************************************
1490 Trim the specified elements off the front and back of a string.
1491 ********************************************************************/
1493 BOOL trim_string_w(smb_ucs2_t *s,const smb_ucs2_t *front,const smb_ucs2_t *back)
1496 size_t front_len = (front && *front) ? strlen_w(front) : 0;
1497 size_t back_len = (back && *back) ? strlen_w(back) : 0;
1500 while (front_len && strncmp_w(s, front, front_len) == 0) {
1505 if (!(*p = p[front_len]))
1512 s_len = strlen_w(s);
1513 while ((s_len >= back_len) &&
1514 (strncmp_w(s + s_len - back_len, back, back_len)==0)) {
1516 s[s_len - back_len] = 0;
1517 s_len = strlen_w(s);
1524 /****************************************************************************
1525 Does a string have any uppercase chars in it ?
1526 ****************************************************************************/
1528 BOOL strhasupper_w(const smb_ucs2_t *s)
1538 /****************************************************************************
1539 Does a string have any lowercase chars in it ?
1540 ****************************************************************************/
1542 BOOL strhaslower_w(const smb_ucs2_t *s)
1552 /****************************************************************************
1553 Find the number of 'c' chars in a string.
1554 ****************************************************************************/
1556 size_t count_chars_w(const smb_ucs2_t *s,smb_ucs2_t c)
1568 /*******************************************************************
1569 Return True if a string consists only of one particular character.
1570 ********************************************************************/
1572 BOOL str_is_all_w(const smb_ucs2_t *s,smb_ucs2_t c)
1587 /*******************************************************************
1588 Paranoid strcpy into a buffer of given length (includes terminating
1589 zero. Strips out all but 'a-Z0-9' and replaces with '_'. Deliberately
1590 does *NOT* check for multibyte characters. Don't change it !
1591 maxlength is in ucs2 units.
1592 ********************************************************************/
1594 smb_ucs2_t *alpha_strcpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
1599 DEBUG(0,("ERROR: NULL dest in alpha_strcpy_w\n"));
1608 len = strlen_w(src);
1609 if (len >= maxlength)
1610 len = maxlength - 1;
1612 for(i = 0; i < len; i++) {
1613 smb_ucs2_t val = src[i];
1614 if(isupper_w(val) ||islower_w(val) || isdigit_w(val))
1617 dest[i] = (smb_ucs2_t)'_';
1625 /****************************************************************************
1626 Like strncpy but always null terminates. Make sure there is room !
1627 The variable n should always be one less than the available size and is in
1629 ****************************************************************************/
1631 smb_ucs2_t *StrnCpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src,size_t n)
1633 smb_ucs2_t *d = dest;
1641 while (n-- && (*d++ = *src++))
1647 /****************************************************************************
1648 Like strncpy but copies up to the character marker. Always null terminates.
1649 returns a pointer to the character marker in the source string (src).
1651 ****************************************************************************/
1653 smb_ucs2_t *strncpyn_w(smb_ucs2_t *dest, const smb_ucs2_t *src,size_t n, smb_ucs2_t c)
1658 p = strchr_w(src, c);
1661 smb_ucs2_t mbcval[2];
1664 DEBUG(5, ("strncpyn_w: separator character (%s) not found\n",
1665 unicode_to_unix(cval,mbcval,sizeof(cval)) ));
1669 str_len = PTR_DIFF(p, src) + 1;
1670 safe_strcpy_w(dest, src, MIN(n, str_len));
1675 /*************************************************************
1676 Routine to get hex characters and turn them into a 16 byte array.
1677 The array can be variable length, and any non-hex-numeric
1678 characters are skipped. "0xnn" or "0Xnn" is specially catered
1679 for. len is in bytes.
1680 Valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
1681 **************************************************************/
1683 static smb_ucs2_t hexprefix[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'x', 0 };
1684 static smb_ucs2_t hexchars[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'1', (smb_ucs2_t)'2', (smb_ucs2_t)'3',
1685 (smb_ucs2_t)'4', (smb_ucs2_t)'5', (smb_ucs2_t)'6', (smb_ucs2_t)'7',
1686 (smb_ucs2_t)'8', (smb_ucs2_t)'9', (smb_ucs2_t)'A', (smb_ucs2_t)'B',
1687 (smb_ucs2_t)'C', (smb_ucs2_t)'D', (smb_ucs2_t)'E', (smb_ucs2_t)'F', 0 };
1689 size_t strhex_to_str_w(char *p, size_t len, const smb_ucs2_t *strhex)
1692 size_t num_chars = 0;
1693 unsigned char lonybble, hinybble;
1694 smb_ucs2_t *p1 = NULL, *p2 = NULL;
1697 * Convert to smb_ucs2_t units.
1700 len /= sizeof(smb_ucs2_t);
1702 for (i = 0; i < len && strhex[i] != 0; i++) {
1703 if (strnequal_w(hexchars, hexprefix, 2)) {
1704 i++; /* skip two chars */
1708 if (!(p1 = strchr_w(hexchars, toupper_w(strhex[i]))))
1711 i++; /* next hex digit */
1713 if (!(p2 = strchr_w(hexchars, toupper_w(strhex[i]))))
1716 /* get the two nybbles */
1717 hinybble = (PTR_DIFF(p1, hexchars)/sizeof(smb_ucs2_t));
1718 lonybble = (PTR_DIFF(p2, hexchars)/sizeof(smb_ucs2_t));
1720 p[num_chars] = (hinybble << 4) | lonybble;
1729 /****************************************************************************
1730 Check if a string is part of a list.
1731 ****************************************************************************/
1733 BOOL in_list_w(smb_ucs2_t *s,smb_ucs2_t *list,BOOL casesensitive)
1741 while (next_token_w(&p,tok,LIST_SEP_W,sizeof(tok))) {
1742 if (casesensitive) {
1743 if (strcmp_w(tok,s) == 0)
1746 if (StrCaseCmp_w(tok,s) == 0)
1753 /* This is used to prevent lots of mallocs of size 2 */
1754 static smb_ucs2_t *null_string = NULL;
1756 /****************************************************************************
1757 Set a string value, allocing the space for the string.
1758 ****************************************************************************/
1760 BOOL string_init_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1765 if((null_string = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t))) == NULL) {
1766 DEBUG(0,("string_init_w: malloc fail for null_string.\n"));
1778 *dest = null_string;
1780 (*dest) = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t)*(l+1));
1781 if ((*dest) == NULL) {
1782 DEBUG(0,("Out of memory in string_init_w\n"));
1786 wpstrcpy(*dest,src);
1791 /****************************************************************************
1792 Free a string value.
1793 ****************************************************************************/
1795 void string_free_w(smb_ucs2_t **s)
1799 if (*s == null_string)
1806 /****************************************************************************
1807 Set a string value, allocing the space for the string, and deallocating any
1809 ****************************************************************************/
1811 BOOL string_set_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1813 string_free_w(dest);
1815 return(string_init_w(dest,src));
1818 /****************************************************************************
1819 Substitute a string for a pattern in another string. Make sure there is
1822 This routine looks for pattern in s and replaces it with
1823 insert. It may do multiple replacements.
1825 Any of " ; ' $ or ` in the insert string are replaced with _
1826 if len==0 then no length check is performed
1827 len is in ucs2 units.
1828 ****************************************************************************/
1830 void string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1833 ssize_t ls,lp,li, i;
1835 if (!insert || !pattern || !s)
1838 ls = (ssize_t)strlen_w(s);
1839 lp = (ssize_t)strlen_w(pattern);
1840 li = (ssize_t)strlen_w(insert);
1845 while (lp <= ls && (p = strstr_w(s,pattern))) {
1846 if (len && (ls + (li-lp) >= len)) {
1848 DEBUG(0,("ERROR: string overflow by %d in string_sub_w(%.50s, %d)\n",
1849 (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1850 unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1854 memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1856 for (i=0;i<li;i++) {
1857 switch (insert[i]) {
1858 case (smb_ucs2_t)'`':
1859 case (smb_ucs2_t)'"':
1860 case (smb_ucs2_t)'\'':
1861 case (smb_ucs2_t)';':
1862 case (smb_ucs2_t)'$':
1863 case (smb_ucs2_t)'%':
1864 case (smb_ucs2_t)'\r':
1865 case (smb_ucs2_t)'\n':
1866 p[i] = (smb_ucs2_t)'_';
1877 void fstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert)
1879 string_sub_w(s, pattern, insert, sizeof(wfstring));
1882 void pstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,smb_ucs2_t *insert)
1884 string_sub_w(s, pattern, insert, sizeof(wpstring));
1887 /****************************************************************************
1888 Similar to string_sub() but allows for any character to be substituted.
1890 if len==0 then no length check is performed.
1891 ****************************************************************************/
1893 void all_string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1898 if (!insert || !pattern || !s)
1901 ls = (ssize_t)strlen_w(s);
1902 lp = (ssize_t)strlen_w(pattern);
1903 li = (ssize_t)strlen_w(insert);
1908 while (lp <= ls && (p = strstr_w(s,pattern))) {
1909 if (len && (ls + (li-lp) >= len)) {
1911 DEBUG(0,("ERROR: string overflow by %d in all_string_sub_w(%.50s, %d)\n",
1912 (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1913 unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1917 memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1919 memcpy(p, insert, li*sizeof(smb_ucs2_t));
1925 /****************************************************************************
1926 Splits out the front and back at a separator.
1927 ****************************************************************************/
1929 void split_at_last_component_w(smb_ucs2_t *path, smb_ucs2_t *front, smb_ucs2_t sep, smb_ucs2_t *back)
1931 smb_ucs2_t *p = strrchr_w(path, sep);
1937 wpstrcpy(front, path);
1941 wpstrcpy(back, p+1);
1942 *p = (smb_ucs2_t)'\\';
1950 /****************************************************************************
1951 Write an octal as a string.
1952 ****************************************************************************/
1954 smb_ucs2_t *octal_string_w(int i)
1956 static smb_ucs2_t wret[64];
1960 slprintf(ret, sizeof(ret), "-1");
1962 slprintf(ret, sizeof(ret), "0%o", i);
1963 return unix_to_unicode(wret, ret, sizeof(wret));
1967 /****************************************************************************
1968 Truncate a string at a specified length.
1969 length is in ucs2 units.
1970 ****************************************************************************/
1972 smb_ucs2_t *string_truncate_w(smb_ucs2_t *s, size_t length)
1974 if (s && strlen_w(s) > length)
1980 /******************************************************************
1981 functions for UTF8 support (using in kanji.c)
1982 ******************************************************************/
1983 smb_ucs2_t doscp2ucs2(int w)
1985 return ((smb_ucs2_t)doscp_to_ucs2[w]);
1988 int ucs2doscp(smb_ucs2_t w)
1990 return ((int)ucs2_to_doscp[w]);