2 Unix SMB/Netbios implementation.
4 Samba utility functions
5 Copyright (C) Andrew Tridgell 1992-1998
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 extern int DEBUGLEVEL;
26 smb_ucs2_t wchar_list_sep[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)',',
27 (smb_ucs2_t)';', (smb_ucs2_t)':', (smb_ucs2_t)'\n',
28 (smb_ucs2_t)'\r', 0 };
30 * The following are the codepage to ucs2 and vica versa maps.
31 * These are dynamically loaded from a unicode translation file.
34 static smb_ucs2_t *doscp_to_ucs2;
35 static uint16 *ucs2_to_doscp;
37 static smb_ucs2_t *unixcp_to_ucs2;
38 static uint16 *ucs2_to_unixcp;
44 /*******************************************************************
45 Write a string in (little-endian) unicode format. src is in
46 the current DOS codepage. len is the length in bytes of the
47 string pointed to by dst.
49 if null_terminate is True then null terminate the packet (adds 2 bytes)
51 the return value is the length consumed by the string, including the
52 null termination if applied
53 ********************************************************************/
55 int dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
58 while (*src && (len > 2)) {
59 size_t skip = get_character_len(*src);
60 smb_ucs2_t val = (*src & 0xff);
63 * If this is a multibyte character (and all DOS/Windows
64 * codepages have at maximum 2 byte multibyte characters)
65 * then work out the index value for the unicode conversion.
69 val = ((val << 8) | (src[1] & 0xff));
71 SSVAL(dst,ret,doscp_to_ucs2[val]);
86 /*******************************************************************
87 Put an ASCII string into a UNICODE array (uint16's).
89 Warning: doesn't do any codepage !!! BAD !!!
91 Help ! Fix Me ! Fix Me !
92 ********************************************************************/
94 void ascii_to_unistr(uint16 *dest, const char *src, int maxlen)
96 uint16 *destend = dest + maxlen;
99 while (dest < destend)
107 *(dest++) = (uint16)c;
113 /*******************************************************************
114 Pull an ASCII string out of a UNICODE array (uint16's).
116 Warning: doesn't do any codepage !!! BAD !!!
118 Help ! Fix Me ! Fix Me !
119 ********************************************************************/
121 void unistr_to_ascii(char *dest, const uint16 *src, int len)
123 char *destend = dest + len;
126 while (dest < destend)
140 /* from TNG - should be fixed */
141 char *skip_unibuf(char *src, int len)
143 char *srcend = src + len;
145 while (src < srcend && SVAL(src,0)) src += 2;
151 /*******************************************************************
152 Skip past some unicode strings in a buffer.
153 ********************************************************************/
155 char *skip_unicode_string(char *buf,int n)
165 /*******************************************************************
166 Return a DOS codepage version of a little-endian unicode string.
167 len is the filename length (ignoring any terminating zero) in uin16
168 units. Always null terminates.
169 Hack alert: uses fixed buffer(s).
170 ********************************************************************/
172 char *dos_unistrn2(uint16 *src, int len)
174 static char lbufs[8][MAXUNI];
176 char *lbuf = lbufs[nexti];
181 for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) {
182 uint16 ucs2_val = SVAL(src,0);
183 uint16 cp_val = ucs2_to_doscp[ucs2_val];
188 *p++ = (cp_val >> 8) & 0xff;
189 *p++ = (cp_val & 0xff);
197 static char lbufs[8][MAXUNI];
200 /*******************************************************************
201 Return a DOS codepage version of a little-endian unicode string.
202 Hack alert: uses fixed buffer(s).
203 ********************************************************************/
205 char *dos_unistr2(uint16 *src)
207 char *lbuf = lbufs[nexti];
212 for (p = lbuf; *src && (p-lbuf < MAXUNI-3); src++) {
213 uint16 ucs2_val = SVAL(src,0);
214 uint16 cp_val = ucs2_to_doscp[ucs2_val];
219 *p++ = (cp_val >> 8) & 0xff;
220 *p++ = (cp_val & 0xff);
228 /*******************************************************************
229 Return a DOS codepage version of a little-endian unicode string
230 ********************************************************************/
232 char *dos_unistr2_to_str(UNISTR2 *str)
234 char *lbuf = lbufs[nexti];
236 uint16 *src = str->buffer;
237 int max_size = MIN(sizeof(str->buffer)-3, str->uni_str_len);
241 for (p = lbuf; *src && p-lbuf < max_size; src++) {
242 uint16 ucs2_val = SVAL(src,0);
243 uint16 cp_val = ucs2_to_doscp[ucs2_val];
248 *p++ = (cp_val >> 8) & 0xff;
249 *p++ = (cp_val & 0xff);
257 /*******************************************************************
258 Convert a UNISTR2 structure to an ASCII string
259 Warning: this version does DOS codepage.
260 ********************************************************************/
262 void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen)
270 len = MIN(str->uni_str_len, maxlen);
271 destend = dest + len;
273 while (dest < destend)
284 ucs2_val = SVAL(src++,0);
285 cp_val = ucs2_to_doscp[ucs2_val];
288 *(dest++) = (char)cp_val;
290 *dest= (cp_val >> 8) & 0xff;
291 *(dest++) = (cp_val & 0xff);
299 /*******************************************************************
300 Return a number stored in a buffer
301 ********************************************************************/
303 uint32 buffer2_to_uint32(BUFFER2 *str)
305 if (str->buf_len == 4)
306 return IVAL(str->buffer, 0);
311 /*******************************************************************
312 Return a DOS codepage version of a NOTunicode string
313 ********************************************************************/
315 char *dos_buffer2_to_str(BUFFER2 *str)
317 char *lbuf = lbufs[nexti];
319 uint16 *src = str->buffer;
320 int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
324 for (p = lbuf; *src && p-lbuf < max_size; src++) {
325 uint16 ucs2_val = SVAL(src,0);
326 uint16 cp_val = ucs2_to_doscp[ucs2_val];
331 *p++ = (cp_val >> 8) & 0xff;
332 *p++ = (cp_val & 0xff);
340 /*******************************************************************
341 Return a dos codepage version of a NOTunicode string
342 ********************************************************************/
344 char *dos_buffer2_to_multistr(BUFFER2 *str)
346 char *lbuf = lbufs[nexti];
348 uint16 *src = str->buffer;
349 int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
353 for (p = lbuf; p-lbuf < max_size; src++) {
357 uint16 ucs2_val = SVAL(src,0);
358 uint16 cp_val = ucs2_to_doscp[ucs2_val];
363 *p++ = (cp_val >> 8) & 0xff;
364 *p++ = (cp_val & 0xff);
373 /*******************************************************************
374 Create a null-terminated unicode string from a null-terminated DOS
376 Return number of unicode chars copied, excluding the null character.
377 Unicode strings created are in little-endian format.
378 ********************************************************************/
380 size_t dos_struni2(char *dst, const char *src, size_t max_len)
388 for (; *src && len < max_len-2; len++, dst +=2) {
389 size_t skip = get_character_len(*src);
390 smb_ucs2_t val = (*src & 0xff);
393 * If this is a multibyte character (and all DOS/Windows
394 * codepages have at maximum 2 byte multibyte characters)
395 * then work out the index value for the unicode conversion.
399 val = ((val << 8) | (src[1] & 0xff));
401 SSVAL(dst,0,doscp_to_ucs2[val]);
414 /*******************************************************************
415 Return a DOS codepage version of a little-endian unicode string.
416 Hack alert: uses fixed buffer(s).
417 ********************************************************************/
419 char *dos_unistr(char *buf)
421 char *lbuf = lbufs[nexti];
422 uint16 *src = (uint16 *)buf;
427 for (p = lbuf; *src && p-lbuf < MAXUNI-3; src++) {
428 uint16 ucs2_val = SVAL(src,0);
429 uint16 cp_val = ucs2_to_doscp[ucs2_val];
434 *p++ = (cp_val >> 8) & 0xff;
435 *p++ = (cp_val & 0xff);
443 /*******************************************************************
444 Strcpy for unicode strings. returns length (in num of wide chars)
445 ********************************************************************/
447 int unistrcpy(char *dst, char *src)
450 uint16 *wsrc = (uint16 *)src;
451 uint16 *wdst = (uint16 *)dst;
464 /*******************************************************************
465 Free any existing maps.
466 ********************************************************************/
468 static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
470 /* this handles identity mappings where we share the pointer */
471 if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) {
472 *pp_ucs2_to_cp = NULL;
475 if (*pp_cp_to_ucs2) {
476 free(*pp_cp_to_ucs2);
477 *pp_cp_to_ucs2 = NULL;
480 if (*pp_ucs2_to_cp) {
481 free(*pp_ucs2_to_cp);
482 *pp_ucs2_to_cp = NULL;
487 /*******************************************************************
488 Build a default (null) codepage to unicode map.
489 ********************************************************************/
491 void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
495 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
497 if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) {
498 DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536));
502 *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */
503 for (i = 0; i < 65536; i++)
504 (*pp_cp_to_ucs2)[i] = i;
507 /*******************************************************************
508 Load a codepage to unicode and vica-versa map.
509 ********************************************************************/
511 BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
513 pstring unicode_map_file_name;
516 smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2;
517 uint16 *ucs2_to_cp = *pp_ucs2_to_cp;
518 size_t cp_to_ucs2_size;
519 size_t ucs2_to_cp_size;
522 char buf[UNICODE_MAP_HEADER_SIZE];
524 DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage));
526 if (*codepage == '\0')
529 if(strlen(CODEPAGEDIR) + 13 + strlen(codepage) > sizeof(unicode_map_file_name)) {
530 DEBUG(0,("load_unicode_map: filename too long to load\n"));
534 pstrcpy(unicode_map_file_name, CODEPAGEDIR);
535 pstrcat(unicode_map_file_name, "/");
536 pstrcat(unicode_map_file_name, "unicode_map.");
537 pstrcat(unicode_map_file_name, codepage);
539 if(sys_stat(unicode_map_file_name,&st)!=0) {
540 DEBUG(0,("load_unicode_map: filename %s does not exist.\n",
541 unicode_map_file_name));
547 if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) {
548 DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \
549 unicode map file (size=%d).\n", unicode_map_file_name, (int)size));
553 if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) {
554 DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n",
555 unicode_map_file_name, strerror(errno)));
559 if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) {
560 DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n",
561 unicode_map_file_name, strerror(errno)));
565 /* Check the version value */
566 if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) {
567 DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \
568 Needed %hu, got %hu.\n",
569 unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID,
570 SVAL(buf,UNICODE_MAP_VERSION_OFFSET)));
574 /* Check the codepage value */
575 if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) {
576 DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \
577 requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage ));
581 ucs2_to_cp_size = 2*65536;
582 if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) {
584 * This is a multibyte code page.
586 cp_to_ucs2_size = 2*65536;
589 * Single byte code page.
591 cp_to_ucs2_size = 2*256;
595 * Free any old translation tables.
598 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
600 if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) {
601 DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size ));
605 if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) {
606 DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size ));
610 if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) {
611 DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n",
612 unicode_map_file_name, strerror(errno)));
616 if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) {
617 DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n",
618 unicode_map_file_name, strerror(errno)));
623 * Now ensure the 16 bit values are in the correct endianness.
626 for (i = 0; i < cp_to_ucs2_size/2; i++)
627 cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2);
629 for (i = 0; i < ucs2_to_cp_size/2; i++)
630 ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2);
634 *pp_cp_to_ucs2 = cp_to_ucs2;
635 *pp_ucs2_to_cp = ucs2_to_cp;
641 /* pseudo destructor :-) */
646 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
648 default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp);
653 /*******************************************************************
654 Load a dos codepage to unicode and vica-versa map.
655 ********************************************************************/
657 BOOL load_dos_unicode_map(int codepage)
659 fstring codepage_str;
661 slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage);
662 return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp);
665 /*******************************************************************
666 Load a UNIX codepage to unicode and vica-versa map.
667 ********************************************************************/
669 BOOL load_unix_unicode_map(const char *unix_char_set)
671 fstring upper_unix_char_set;
673 fstrcpy(upper_unix_char_set, unix_char_set);
674 strupper(upper_unix_char_set);
675 return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
678 /*******************************************************************
679 The following functions reproduce many of the non-UNICODE standard
680 string functions in Samba.
681 ********************************************************************/
683 /*******************************************************************
684 Convert a UNICODE string to multibyte format. Note that the 'src' is in
685 native byte order, not little endian. Always zero terminates.
687 ********************************************************************/
689 static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src,
690 size_t dst_len, const uint16 *ucs2_to_cp)
694 for(dst_pos = 0; *src && (dst_pos < dst_len - 1);) {
695 smb_ucs2_t val = ucs2_to_cp[*src++];
697 dst[dst_pos++] = (char)val;
700 if(dst_pos >= dst_len - 2)
704 * A 2 byte value is always written as
705 * high/low into the buffer stream.
708 dst[dst_pos++] = (char)((val >> 8) & 0xff);
709 dst[dst_pos++] = (char)(val & 0xff);
718 /*******************************************************************
719 Convert a multibyte string to UNICODE format. Note that the 'dst' is in
720 native byte order, not little endian. Always zero terminates.
722 ********************************************************************/
724 smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src,
725 size_t dst_len, smb_ucs2_t *cp_to_ucs2)
729 dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */
731 for(i = 0; (i < (dst_len - 1)) && src[i];) {
732 size_t skip = skip_multibyte_char(*src);
733 smb_ucs2_t val = (*src & 0xff);
736 * If this is a multibyte character
737 * then work out the index value for the unicode conversion.
741 val = ((val << 8) | (src[1] & 0xff));
743 dst[i++] = cp_to_ucs2[val];
755 /*******************************************************************
756 Convert a UNICODE string to multibyte format. Note that the 'src' is in
757 native byte order, not little endian. Always zero terminates.
758 This function may be replaced if the MB codepage format is an
759 encoded one (ie. utf8, hex). See the code in lib/kanji.c
760 for details. dst_len is in bytes.
761 ********************************************************************/
763 char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len)
765 return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp);
768 /*******************************************************************
769 Convert a UNIX string to UNICODE format. Note that the 'dst' is in
770 native byte order, not little endian. Always zero terminates.
771 This function may be replaced if the UNIX codepage format is a
772 multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
773 for details. dst_len is in bytes, not ucs2 units.
774 ********************************************************************/
776 smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
778 return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2);
781 /*******************************************************************
782 Convert a UNICODE string to DOS format. Note that the 'src' is in
783 native byte order, not little endian. Always zero terminates.
785 ********************************************************************/
787 char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len)
789 return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp);
792 /*******************************************************************
793 Convert a DOS string to UNICODE format. Note that the 'dst' is in
794 native byte order, not little endian. Always zero terminates.
795 This function may be replaced if the DOS codepage format is a
796 multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
797 for details. dst_len is in bytes, not ucs2 units.
798 ********************************************************************/
800 smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
802 return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2);
805 /*******************************************************************
806 Count the number of characters in a smb_ucs2_t string.
807 ********************************************************************/
809 size_t strlen_w(const smb_ucs2_t *src)
813 for(len = 0; *src; len++)
819 /*******************************************************************
820 Safe wstring copy into a known length string. maxlength includes
821 the terminating zero. maxlength is in ucs2 units.
822 ********************************************************************/
824 smb_ucs2_t *safe_strcpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength)
829 DEBUG(0,("ERROR: NULL dest in safe_strcpy_w\n"));
838 maxlength /= sizeof(smb_ucs2_t);
840 ucs2_len = strlen_w(src);
842 if (ucs2_len >= maxlength) {
844 DEBUG(0,("ERROR: string overflow by %u bytes in safe_strcpy_w [%.50s]\n",
845 (unsigned int)((ucs2_len-maxlength)*sizeof(smb_ucs2_t)),
846 unicode_to_unix(out,src,sizeof(out))) );
847 ucs2_len = maxlength - 1;
850 memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t));
855 /*******************************************************************
856 Safe string cat into a string. maxlength includes the terminating zero.
857 maxlength is in ucs2 units.
858 ********************************************************************/
860 smb_ucs2_t *safe_strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
862 size_t ucs2_src_len, ucs2_dest_len;
865 DEBUG(0,("ERROR: NULL dest in safe_strcat_w\n"));
872 ucs2_src_len = strlen_w(src);
873 ucs2_dest_len = strlen_w(dest);
875 if (ucs2_src_len + ucs2_dest_len >= maxlength) {
877 int new_len = maxlength - ucs2_dest_len - 1;
878 DEBUG(0,("ERROR: string overflow by %u characters in safe_strcat_w [%.50s]\n",
879 (unsigned int)(sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len - maxlength)),
880 unicode_to_unix(out,src,sizeof(out))) );
881 ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0);
884 memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t));
885 dest[ucs2_dest_len + ucs2_src_len] = 0;
889 /*******************************************************************
890 Compare the two strings s1 and s2.
891 ********************************************************************/
893 int strcmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
910 /*******************************************************************
911 Compare the first n characters of s1 to s2. len is in ucs2 units.
912 ********************************************************************/
914 int strncmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2, size_t len)
918 for (; len != 0; --len) {
932 /*******************************************************************
933 Search string s2 from s1.
934 ********************************************************************/
936 smb_ucs2_t *strstr_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
938 size_t len = strlen_w(s2);
941 return (smb_ucs2_t *)s1;
945 if (strncmp_w(s1, s2, len) == 0)
946 return (smb_ucs2_t *)s1;
952 /*******************************************************************
953 Search for ucs2 char c from the beginning of s.
954 ********************************************************************/
956 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
960 return (smb_ucs2_t *)s;
966 /*******************************************************************
967 Search for ucs2 char c from the end of s.
968 ********************************************************************/
970 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
972 smb_ucs2_t *retval = 0;
976 retval = (smb_ucs2_t *)s;
982 /*******************************************************************
983 Search token from s1 separated by any ucs2 char of s2.
984 ********************************************************************/
986 smb_ucs2_t *strtok_w(smb_ucs2_t *s1, const smb_ucs2_t *s2)
988 static smb_ucs2_t *s = NULL;
997 for (q = s1; *s1; s1++) {
998 smb_ucs2_t *p = strchr_w(s2, *s1);
1016 /*******************************************************************
1017 Duplicate a ucs2 string.
1018 ********************************************************************/
1020 smb_ucs2_t *strdup_w(const smb_ucs2_t *s)
1022 size_t newlen = (strlen_w(s)+1)*sizeof(smb_ucs2_t);
1023 smb_ucs2_t *newstr = (smb_ucs2_t *)malloc(newlen);
1026 safe_strcpy_w(newstr, s, newlen);
1030 /*******************************************************************
1031 Mapping tables for UNICODE character. Allows toupper/tolower and
1032 isXXX functions to work.
1034 tridge: split into 2 pieces. This saves us 5/6 of the memory
1035 with a small speed penalty
1036 The magic constants are the lower/upper range of the tables two
1038 ********************************************************************/
1043 unsigned char flags;
1044 } smb_unicode_table_t;
1046 #define TABLE1_BOUNDARY 9450
1047 #define TABLE2_BOUNDARY 64256
1049 static smb_unicode_table_t map_table1[] = {
1050 #include "unicode_map_table1.h"
1053 static smb_unicode_table_t map_table2[] = {
1054 #include "unicode_map_table2.h"
1057 static unsigned char map_table_flags(smb_ucs2_t v)
1059 if (v < TABLE1_BOUNDARY) return map_table1[v].flags;
1060 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].flags;
1064 static smb_ucs2_t map_table_lower(smb_ucs2_t v)
1066 if (v < TABLE1_BOUNDARY) return map_table1[v].lower;
1067 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].lower;
1071 static smb_ucs2_t map_table_upper(smb_ucs2_t v)
1073 if (v < TABLE1_BOUNDARY) return map_table1[v].upper;
1074 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].upper;
1078 /*******************************************************************
1079 Is an upper case wchar.
1080 ********************************************************************/
1082 int isupper_w( smb_ucs2_t val)
1084 return (map_table_flags(val) & UNI_UPPER);
1087 /*******************************************************************
1088 Is a lower case wchar.
1089 ********************************************************************/
1091 int islower_w( smb_ucs2_t val)
1093 return (map_table_flags(val) & UNI_LOWER);
1096 /*******************************************************************
1098 ********************************************************************/
1100 int isdigit_w( smb_ucs2_t val)
1102 return (map_table_flags(val) & UNI_DIGIT);
1105 /*******************************************************************
1106 Is a hex digit wchar.
1107 ********************************************************************/
1109 int isxdigit_w( smb_ucs2_t val)
1111 return (map_table_flags(val) & UNI_XDIGIT);
1114 /*******************************************************************
1116 ********************************************************************/
1118 int isspace_w( smb_ucs2_t val)
1120 return (map_table_flags(val) & UNI_SPACE);
1123 /*******************************************************************
1124 Convert a wchar to upper case.
1125 ********************************************************************/
1127 smb_ucs2_t toupper_w( smb_ucs2_t val )
1129 return map_table_upper(val);
1132 /*******************************************************************
1133 Convert a wchar to lower case.
1134 ********************************************************************/
1136 smb_ucs2_t tolower_w( smb_ucs2_t val )
1138 return map_table_lower(val);
1141 static smb_ucs2_t *last_ptr = NULL;
1143 void set_first_token_w(smb_ucs2_t *ptr)
1148 /****************************************************************************
1149 Get the next token from a string, return False if none found
1150 handles double-quotes.
1151 Based on a routine by GJC@VILLAGE.COM.
1152 Extensively modified by Andrew.Tridgell@anu.edu.au
1153 bufsize is in bytes.
1154 ****************************************************************************/
1156 static smb_ucs2_t sep_list[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)'\n', (smb_ucs2_t)'\r', 0};
1157 static smb_ucs2_t quotechar = (smb_ucs2_t)'\"';
1159 BOOL next_token_w(smb_ucs2_t **ptr, smb_ucs2_t *buff, smb_ucs2_t *sep, size_t bufsize)
1166 * Convert bufsize to smb_ucs2_t units.
1169 bufsize /= sizeof(smb_ucs2_t);
1179 * Default to simple separators.
1186 * Find the first non sep char.
1189 while(*s && strchr_w(sep,*s))
1200 * Copy over the token.
1203 for (quoted = False; len < bufsize && *s && (quoted || !strchr_w(sep,*s)); s++) {
1204 if (*s == quotechar) {
1212 *ptr = (*s) ? s+1 : s;
1219 /****************************************************************************
1220 Convert list of tokens to array; dependent on above routine.
1221 Uses last_ptr from above - bit of a hack.
1222 ****************************************************************************/
1224 smb_ucs2_t **toktocliplist_w(int *ctok, smb_ucs2_t *sep)
1226 smb_ucs2_t *s=last_ptr;
1228 smb_ucs2_t **ret, **iret;
1233 while(*s && strchr_w(sep,*s))
1245 while(*s && (!strchr_w(sep,*s)))
1247 while(*s && strchr_w(sep,*s))
1254 if (!(ret=iret=malloc(ictok*sizeof(smb_ucs2_t *))))
1268 /*******************************************************************
1269 Case insensitive string compararison.
1270 ********************************************************************/
1272 int StrCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t)
1275 * Compare until we run out of string, either t or s, or find a difference.
1278 while (*s && *t && toupper_w(*s) == toupper_w(*t)) {
1283 return(toupper_w(*s) - toupper_w(*t));
1286 /*******************************************************************
1287 Case insensitive string compararison, length limited.
1289 ********************************************************************/
1291 int StrnCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t, size_t n)
1294 * Compare until we run out of string, either t or s, or chars.
1297 while (n && *s && *t && toupper_w(*s) == toupper_w(*t)) {
1304 * Not run out of chars - strings are different lengths.
1308 return(toupper_w(*s) - toupper_w(*t));
1311 * Identical up to where we run out of chars,
1312 * and strings are same length.
1318 /*******************************************************************
1320 ********************************************************************/
1322 BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1329 return(StrCaseCmp_w(s1,s2)==0);
1332 /*******************************************************************
1333 Compare 2 strings up to and including the nth char. n is in ucs2
1335 ******************************************************************/
1337 BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
1341 if (!s1 || !s2 || !n)
1344 return(StrnCaseCmp_w(s1,s2,n)==0);
1347 /*******************************************************************
1348 Compare 2 strings (case sensitive).
1349 ********************************************************************/
1351 BOOL strcsequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2)
1358 return(strcmp_w(s1,s2)==0);
1361 /*******************************************************************
1362 Convert a string to lower case.
1363 ********************************************************************/
1365 void strlower_w(smb_ucs2_t *s)
1374 /*******************************************************************
1375 Convert a string to upper case.
1376 ********************************************************************/
1378 void strupper_w(smb_ucs2_t *s)
1387 /*******************************************************************
1388 Convert a string to "normal" form.
1389 ********************************************************************/
1391 void strnorm_w(smb_ucs2_t *s)
1393 extern int case_default;
1394 if (case_default == CASE_UPPER)
1400 /*******************************************************************
1401 Check if a string is in "normal" case.
1402 ********************************************************************/
1404 BOOL strisnormal_w(smb_ucs2_t *s)
1406 extern int case_default;
1407 if (case_default == CASE_UPPER)
1408 return(!strhaslower_w(s));
1410 return(!strhasupper_w(s));
1413 /****************************************************************************
1415 ****************************************************************************/
1417 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
1426 /*******************************************************************
1427 Skip past some strings in a buffer. n is in bytes.
1428 ********************************************************************/
1430 smb_ucs2_t *skip_string_w(smb_ucs2_t *buf,size_t n)
1433 buf += (strlen_w(buf)*sizeof(smb_ucs2_t)) + 1;
1437 /*******************************************************************
1438 Count the number of characters in a string. Same as strlen_w in
1439 smb_ucs2_t string units.
1440 ********************************************************************/
1442 size_t str_charnum_w(const smb_ucs2_t *s)
1447 /*******************************************************************
1448 Trim the specified elements off the front and back of a string.
1449 ********************************************************************/
1451 BOOL trim_string_w(smb_ucs2_t *s,const smb_ucs2_t *front,const smb_ucs2_t *back)
1454 size_t front_len = (front && *front) ? strlen_w(front) : 0;
1455 size_t back_len = (back && *back) ? strlen_w(back) : 0;
1458 while (front_len && strncmp_w(s, front, front_len) == 0) {
1463 if (!(*p = p[front_len]))
1470 s_len = strlen_w(s);
1471 while ((s_len >= back_len) &&
1472 (strncmp_w(s + s_len - back_len, back, back_len)==0)) {
1474 s[s_len - back_len] = 0;
1475 s_len = strlen_w(s);
1482 /****************************************************************************
1483 Does a string have any uppercase chars in it ?
1484 ****************************************************************************/
1486 BOOL strhasupper_w(const smb_ucs2_t *s)
1496 /****************************************************************************
1497 Does a string have any lowercase chars in it ?
1498 ****************************************************************************/
1500 BOOL strhaslower_w(const smb_ucs2_t *s)
1510 /****************************************************************************
1511 Find the number of 'c' chars in a string.
1512 ****************************************************************************/
1514 size_t count_chars_w(const smb_ucs2_t *s,smb_ucs2_t c)
1526 /*******************************************************************
1527 Return True if a string consists only of one particular character.
1528 ********************************************************************/
1530 BOOL str_is_all_w(const smb_ucs2_t *s,smb_ucs2_t c)
1545 /*******************************************************************
1546 Paranoid strcpy into a buffer of given length (includes terminating
1547 zero. Strips out all but 'a-Z0-9' and replaces with '_'. Deliberately
1548 does *NOT* check for multibyte characters. Don't change it !
1549 maxlength is in ucs2 units.
1550 ********************************************************************/
1552 smb_ucs2_t *alpha_strcpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
1557 DEBUG(0,("ERROR: NULL dest in alpha_strcpy_w\n"));
1566 len = strlen_w(src);
1567 if (len >= maxlength)
1568 len = maxlength - 1;
1570 for(i = 0; i < len; i++) {
1571 smb_ucs2_t val = src[i];
1572 if(isupper_w(val) ||islower_w(val) || isdigit_w(val))
1575 dest[i] = (smb_ucs2_t)'_';
1583 /****************************************************************************
1584 Like strncpy but always null terminates. Make sure there is room !
1585 The variable n should always be one less than the available size and is in
1587 ****************************************************************************/
1589 smb_ucs2_t *StrnCpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src,size_t n)
1591 smb_ucs2_t *d = dest;
1599 while (n-- && (*d++ = *src++))
1605 /****************************************************************************
1606 Like strncpy but copies up to the character marker. Always null terminates.
1607 returns a pointer to the character marker in the source string (src).
1609 ****************************************************************************/
1611 smb_ucs2_t *strncpyn_w(smb_ucs2_t *dest, const smb_ucs2_t *src,size_t n, smb_ucs2_t c)
1616 p = strchr_w(src, c);
1619 smb_ucs2_t mbcval[2];
1622 DEBUG(5, ("strncpyn_w: separator character (%s) not found\n",
1623 unicode_to_unix(cval,mbcval,sizeof(cval)) ));
1627 str_len = PTR_DIFF(p, src) + 1;
1628 safe_strcpy_w(dest, src, MIN(n, str_len));
1633 /*************************************************************
1634 Routine to get hex characters and turn them into a 16 byte array.
1635 The array can be variable length, and any non-hex-numeric
1636 characters are skipped. "0xnn" or "0Xnn" is specially catered
1637 for. len is in bytes.
1638 Valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
1639 **************************************************************/
1641 static smb_ucs2_t hexprefix[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'x', 0 };
1642 static smb_ucs2_t hexchars[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'1', (smb_ucs2_t)'2', (smb_ucs2_t)'3',
1643 (smb_ucs2_t)'4', (smb_ucs2_t)'5', (smb_ucs2_t)'6', (smb_ucs2_t)'7',
1644 (smb_ucs2_t)'8', (smb_ucs2_t)'9', (smb_ucs2_t)'A', (smb_ucs2_t)'B',
1645 (smb_ucs2_t)'C', (smb_ucs2_t)'D', (smb_ucs2_t)'E', (smb_ucs2_t)'F', 0 };
1647 size_t strhex_to_str_w(char *p, size_t len, const smb_ucs2_t *strhex)
1650 size_t num_chars = 0;
1651 unsigned char lonybble, hinybble;
1652 smb_ucs2_t *p1 = NULL, *p2 = NULL;
1655 * Convert to smb_ucs2_t units.
1658 len /= sizeof(smb_ucs2_t);
1660 for (i = 0; i < len && strhex[i] != 0; i++) {
1661 if (strnequal_w(hexchars, hexprefix, 2)) {
1662 i++; /* skip two chars */
1666 if (!(p1 = strchr_w(hexchars, toupper_w(strhex[i]))))
1669 i++; /* next hex digit */
1671 if (!(p2 = strchr_w(hexchars, toupper_w(strhex[i]))))
1674 /* get the two nybbles */
1675 hinybble = (PTR_DIFF(p1, hexchars)/sizeof(smb_ucs2_t));
1676 lonybble = (PTR_DIFF(p2, hexchars)/sizeof(smb_ucs2_t));
1678 p[num_chars] = (hinybble << 4) | lonybble;
1687 /****************************************************************************
1688 Check if a string is part of a list.
1689 ****************************************************************************/
1691 BOOL in_list_w(smb_ucs2_t *s,smb_ucs2_t *list,BOOL casesensitive)
1699 while (next_token_w(&p,tok,LIST_SEP_W,sizeof(tok))) {
1700 if (casesensitive) {
1701 if (strcmp_w(tok,s) == 0)
1704 if (StrCaseCmp_w(tok,s) == 0)
1711 /* This is used to prevent lots of mallocs of size 2 */
1712 static smb_ucs2_t *null_string = NULL;
1714 /****************************************************************************
1715 Set a string value, allocing the space for the string.
1716 ****************************************************************************/
1718 BOOL string_init_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1723 if((null_string = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t))) == NULL) {
1724 DEBUG(0,("string_init_w: malloc fail for null_string.\n"));
1736 *dest = null_string;
1738 (*dest) = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t)*(l+1));
1739 if ((*dest) == NULL) {
1740 DEBUG(0,("Out of memory in string_init_w\n"));
1744 wpstrcpy(*dest,src);
1749 /****************************************************************************
1750 Free a string value.
1751 ****************************************************************************/
1753 void string_free_w(smb_ucs2_t **s)
1757 if (*s == null_string)
1764 /****************************************************************************
1765 Set a string value, allocing the space for the string, and deallocating any
1767 ****************************************************************************/
1769 BOOL string_set_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1771 string_free_w(dest);
1773 return(string_init_w(dest,src));
1776 /****************************************************************************
1777 Substitute a string for a pattern in another string. Make sure there is
1780 This routine looks for pattern in s and replaces it with
1781 insert. It may do multiple replacements.
1783 Any of " ; ' $ or ` in the insert string are replaced with _
1784 if len==0 then no length check is performed
1785 len is in ucs2 units.
1786 ****************************************************************************/
1788 void string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1791 ssize_t ls,lp,li, i;
1793 if (!insert || !pattern || !s)
1796 ls = (ssize_t)strlen_w(s);
1797 lp = (ssize_t)strlen_w(pattern);
1798 li = (ssize_t)strlen_w(insert);
1803 while (lp <= ls && (p = strstr_w(s,pattern))) {
1804 if (len && (ls + (li-lp) >= len)) {
1806 DEBUG(0,("ERROR: string overflow by %d in string_sub_w(%.50s, %d)\n",
1807 (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1808 unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1812 memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1814 for (i=0;i<li;i++) {
1815 switch (insert[i]) {
1816 case (smb_ucs2_t)'`':
1817 case (smb_ucs2_t)'"':
1818 case (smb_ucs2_t)'\'':
1819 case (smb_ucs2_t)';':
1820 case (smb_ucs2_t)'$':
1821 case (smb_ucs2_t)'%':
1822 case (smb_ucs2_t)'\r':
1823 case (smb_ucs2_t)'\n':
1824 p[i] = (smb_ucs2_t)'_';
1835 void fstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert)
1837 string_sub_w(s, pattern, insert, sizeof(wfstring));
1840 void pstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,smb_ucs2_t *insert)
1842 string_sub_w(s, pattern, insert, sizeof(wpstring));
1845 /****************************************************************************
1846 Similar to string_sub() but allows for any character to be substituted.
1848 if len==0 then no length check is performed.
1849 ****************************************************************************/
1851 void all_string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1856 if (!insert || !pattern || !s)
1859 ls = (ssize_t)strlen_w(s);
1860 lp = (ssize_t)strlen_w(pattern);
1861 li = (ssize_t)strlen_w(insert);
1866 while (lp <= ls && (p = strstr_w(s,pattern))) {
1867 if (len && (ls + (li-lp) >= len)) {
1869 DEBUG(0,("ERROR: string overflow by %d in all_string_sub_w(%.50s, %d)\n",
1870 (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1871 unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1875 memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1877 memcpy(p, insert, li*sizeof(smb_ucs2_t));
1883 /****************************************************************************
1884 Splits out the front and back at a separator.
1885 ****************************************************************************/
1887 void split_at_last_component_w(smb_ucs2_t *path, smb_ucs2_t *front, smb_ucs2_t sep, smb_ucs2_t *back)
1889 smb_ucs2_t *p = strrchr_w(path, sep);
1895 wpstrcpy(front, path);
1899 wpstrcpy(back, p+1);
1900 *p = (smb_ucs2_t)'\\';
1908 /****************************************************************************
1909 Write an octal as a string.
1910 ****************************************************************************/
1912 smb_ucs2_t *octal_string_w(int i)
1914 static smb_ucs2_t wret[64];
1918 slprintf(ret, sizeof(ret), "-1");
1920 slprintf(ret, sizeof(ret), "0%o", i);
1921 return unix_to_unicode(wret, ret, sizeof(wret));
1925 /****************************************************************************
1926 Truncate a string at a specified length.
1927 length is in ucs2 units.
1928 ****************************************************************************/
1930 smb_ucs2_t *string_truncate_w(smb_ucs2_t *s, size_t length)
1932 if (s && strlen_w(s) > length)