2 Unix SMB/Netbios implementation.
4 Samba utility functions
5 Copyright (C) Andrew Tridgell 1992-1998
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 extern int DEBUGLEVEL;
27 * The following are the codepage to ucs2 and vica versa maps.
28 * These are dynamically loaded from a unicode translation file.
31 static smb_ucs2_t *doscp_to_ucs2;
32 static uint16 *ucs2_to_doscp;
34 static smb_ucs2_t *unixcp_to_ucs2;
35 static uint16 *ucs2_to_unixcp;
41 /*******************************************************************
42 Write a string in (little-endian) unicode format. src is in
43 the current DOS codepage. len is the length in bytes of the
44 string pointed to by dst.
46 the return value is the length of the string *without* the trailing
48 ********************************************************************/
50 int dos_PutUniCode(char *dst,const char *src, ssize_t len)
53 while (*src && (len > 2)) {
54 size_t skip = skip_multibyte_char(*src);
55 smb_ucs2_t val = (*src & 0xff);
58 * If this is a multibyte character (and all DOS/Windows
59 * codepages have at maximum 2 byte multibyte characters)
60 * then work out the index value for the unicode conversion.
64 val = ((val << 8) | (src[1] & 0xff));
66 SSVAL(dst,ret,doscp_to_ucs2[val]);
78 /*******************************************************************
79 Skip past some unicode strings in a buffer.
80 ********************************************************************/
82 char *skip_unicode_string(char *buf,int n)
92 /*******************************************************************
93 Return a DOS codepage version of a little-endian unicode string.
94 Hack alert: uses fixed buffer(s).
95 ********************************************************************/
97 char *dos_unistrn2(uint16 *src, int len)
99 static char lbufs[8][MAXUNI];
101 char *lbuf = lbufs[nexti];
106 for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) {
107 uint16 ucs2_val = SVAL(src,0);
108 uint16 cp_val = ucs2_to_doscp[ucs2_val];
113 *p++ = (cp_val >> 8) & 0xff;
114 *p++ = (cp_val & 0xff);
122 static char lbufs[8][MAXUNI];
125 /*******************************************************************
126 Return a DOS codepage version of a little-endian unicode string.
127 Hack alert: uses fixed buffer(s).
128 ********************************************************************/
130 char *dos_unistr2(uint16 *src)
132 char *lbuf = lbufs[nexti];
137 for (p = lbuf; *src && (p-lbuf < MAXUNI-3); src++) {
138 uint16 ucs2_val = SVAL(src,0);
139 uint16 cp_val = ucs2_to_doscp[ucs2_val];
144 *p++ = (cp_val >> 8) & 0xff;
145 *p++ = (cp_val & 0xff);
153 /*******************************************************************
154 Return a DOS codepage version of a little-endian unicode string
155 ********************************************************************/
157 char *dos_unistr2_to_str(UNISTR2 *str)
159 char *lbuf = lbufs[nexti];
161 uint16 *src = str->buffer;
162 int max_size = MIN(sizeof(str->buffer)-3, str->uni_str_len);
166 for (p = lbuf; *src && p-lbuf < max_size; src++) {
167 uint16 ucs2_val = SVAL(src,0);
168 uint16 cp_val = ucs2_to_doscp[ucs2_val];
173 *p++ = (cp_val >> 8) & 0xff;
174 *p++ = (cp_val & 0xff);
182 /*******************************************************************
183 Return a number stored in a buffer
184 ********************************************************************/
186 uint32 buffer2_to_uint32(BUFFER2 *str)
188 if (str->buf_len == 4)
189 return IVAL(str->buffer, 0);
194 /*******************************************************************
195 Return a DOS codepage version of a NOTunicode string
196 ********************************************************************/
198 char *dos_buffer2_to_str(BUFFER2 *str)
200 char *lbuf = lbufs[nexti];
202 uint16 *src = str->buffer;
203 int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
207 for (p = lbuf; *src && p-lbuf < max_size; src++) {
208 uint16 ucs2_val = SVAL(src,0);
209 uint16 cp_val = ucs2_to_doscp[ucs2_val];
214 *p++ = (cp_val >> 8) & 0xff;
215 *p++ = (cp_val & 0xff);
223 /*******************************************************************
224 Return a dos codepage version of a NOTunicode string
225 ********************************************************************/
227 char *dos_buffer2_to_multistr(BUFFER2 *str)
229 char *lbuf = lbufs[nexti];
231 uint16 *src = str->buffer;
232 int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
236 for (p = lbuf; p-lbuf < max_size; src++) {
240 uint16 ucs2_val = SVAL(src,0);
241 uint16 cp_val = ucs2_to_doscp[ucs2_val];
246 *p++ = (cp_val >> 8) & 0xff;
247 *p++ = (cp_val & 0xff);
256 /*******************************************************************
257 Create a null-terminated unicode string from a null-terminated DOS
259 Return number of unicode chars copied, excluding the null character.
260 Unicode strings created are in little-endian format.
261 ********************************************************************/
263 size_t dos_struni2(char *dst, const char *src, size_t max_len)
271 for (; *src && len < max_len-2; len++, dst +=2) {
272 size_t skip = skip_multibyte_char(*src);
273 smb_ucs2_t val = (*src & 0xff);
276 * If this is a multibyte character (and all DOS/Windows
277 * codepages have at maximum 2 byte multibyte characters)
278 * then work out the index value for the unicode conversion.
282 val = ((val << 8) | (src[1] & 0xff));
284 SSVAL(dst,0,doscp_to_ucs2[val]);
297 /*******************************************************************
298 Return a DOS codepage version of a little-endian unicode string.
299 Hack alert: uses fixed buffer(s).
300 ********************************************************************/
302 char *dos_unistr(char *buf)
304 char *lbuf = lbufs[nexti];
305 uint16 *src = (uint16 *)buf;
310 for (p = lbuf; *src && p-lbuf < MAXUNI-3; src++) {
311 uint16 ucs2_val = SVAL(src,0);
312 uint16 cp_val = ucs2_to_doscp[ucs2_val];
317 *p++ = (cp_val >> 8) & 0xff;
318 *p++ = (cp_val & 0xff);
326 /*******************************************************************
327 Strcpy for unicode strings. returns length (in num of wide chars)
328 ********************************************************************/
330 int unistrcpy(char *dst, char *src)
333 uint16 *wsrc = (uint16 *)src;
334 uint16 *wdst = (uint16 *)dst;
347 /*******************************************************************
348 Free any existing maps.
349 ********************************************************************/
351 static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
353 /* this handles identity mappings where we share the pointer */
354 if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) {
355 *pp_ucs2_to_cp = NULL;
358 if (*pp_cp_to_ucs2) {
359 free(*pp_cp_to_ucs2);
360 *pp_cp_to_ucs2 = NULL;
363 if (*pp_ucs2_to_cp) {
364 free(*pp_ucs2_to_cp);
365 *pp_ucs2_to_cp = NULL;
370 /*******************************************************************
371 Build a default (null) codepage to unicode map.
372 ********************************************************************/
374 void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
378 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
380 if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) {
381 DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536));
385 *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */
386 for (i = 0; i < 65536; i++)
387 (*pp_cp_to_ucs2)[i] = i;
390 /*******************************************************************
391 Load a codepage to unicode and vica-versa map.
392 ********************************************************************/
394 BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
396 pstring unicode_map_file_name;
399 smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2;
400 uint16 *ucs2_to_cp = *pp_ucs2_to_cp;
401 size_t cp_to_ucs2_size;
402 size_t ucs2_to_cp_size;
405 char buf[UNICODE_MAP_HEADER_SIZE];
407 DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage));
409 if (*codepage == '\0')
412 if(strlen(CODEPAGEDIR) + 13 + strlen(codepage) > sizeof(unicode_map_file_name)) {
413 DEBUG(0,("load_unicode_map: filename too long to load\n"));
417 pstrcpy(unicode_map_file_name, CODEPAGEDIR);
418 pstrcat(unicode_map_file_name, "/");
419 pstrcat(unicode_map_file_name, "unicode_map.");
420 pstrcat(unicode_map_file_name, codepage);
422 if(sys_stat(unicode_map_file_name,&st)!=0) {
423 DEBUG(0,("load_unicode_map: filename %s does not exist.\n",
424 unicode_map_file_name));
430 if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) {
431 DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \
432 unicode map file (size=%d).\n", unicode_map_file_name, (int)size));
436 if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) {
437 DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n",
438 unicode_map_file_name, strerror(errno)));
442 if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) {
443 DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n",
444 unicode_map_file_name, strerror(errno)));
448 /* Check the version value */
449 if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) {
450 DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \
451 Needed %hu, got %hu.\n",
452 unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID,
453 SVAL(buf,UNICODE_MAP_VERSION_OFFSET)));
457 /* Check the codepage value */
458 if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) {
459 DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \
460 requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage ));
464 ucs2_to_cp_size = 2*65536;
465 if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) {
467 * This is a multibyte code page.
469 cp_to_ucs2_size = 2*65536;
472 * Single byte code page.
474 cp_to_ucs2_size = 2*256;
478 * Free any old translation tables.
481 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
483 if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) {
484 DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size ));
488 if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) {
489 DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size ));
493 if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) {
494 DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n",
495 unicode_map_file_name, strerror(errno)));
499 if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) {
500 DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n",
501 unicode_map_file_name, strerror(errno)));
506 * Now ensure the 16 bit values are in the correct endianness.
509 for (i = 0; i < cp_to_ucs2_size/2; i++)
510 cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2);
512 for (i = 0; i < ucs2_to_cp_size/2; i++)
513 ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2);
517 *pp_cp_to_ucs2 = cp_to_ucs2;
518 *pp_ucs2_to_cp = ucs2_to_cp;
524 /* pseudo destructor :-) */
529 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
531 default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp);
536 /*******************************************************************
537 Load a dos codepage to unicode and vica-versa map.
538 ********************************************************************/
540 BOOL load_dos_unicode_map(int codepage)
542 fstring codepage_str;
544 slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage);
545 return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp);
548 /*******************************************************************
549 Load a UNIX codepage to unicode and vica-versa map.
550 ********************************************************************/
552 BOOL load_unix_unicode_map(const char *unix_char_set)
554 fstring upper_unix_char_set;
556 fstrcpy(upper_unix_char_set, unix_char_set);
557 strupper(upper_unix_char_set);
558 return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
561 /*******************************************************************
562 The following functions reproduce many of the non-UNICODE standard
563 string functions in Samba.
564 ********************************************************************/
566 /*******************************************************************
567 Convert a UNICODE string to multibyte format. Note that the 'src' is in
568 native byte order, not little endian. Always zero terminates.
570 ********************************************************************/
572 static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src,
573 size_t dst_len, const uint16 *ucs2_to_cp)
577 for(i = 0; (i < (dst_len - 1)) && src[i];) {
578 smb_ucs2_t val = ucs2_to_cp[*src];
580 dst[i++] = (char)val;
581 } else if (i < (dst_len - 2)) {
584 * A 2 byte value is always written as
585 * high/low into the buffer stream.
588 dst[i++] = (char)((val >> 8) & 0xff);
589 dst[i++] = (char)(val & 0xff);
598 /*******************************************************************
599 Convert a multibyte string to UNICODE format. Note that the 'dst' is in
600 native byte order, not little endian. Always zero terminates.
602 ********************************************************************/
604 smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src,
605 size_t dst_len, smb_ucs2_t *cp_to_ucs2)
609 dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */
611 for(i = 0; (i < (dst_len - 1)) && src[i];) {
612 size_t skip = skip_multibyte_char(*src);
613 smb_ucs2_t val = (*src & 0xff);
616 * If this is a multibyte character
617 * then work out the index value for the unicode conversion.
621 val = ((val << 8) | (src[1] & 0xff));
623 dst[i++] = cp_to_ucs2[val];
635 /*******************************************************************
636 Convert a UNICODE string to multibyte format. Note that the 'src' is in
637 native byte order, not little endian. Always zero terminates.
638 This function may be replaced if the MB codepage format is an
639 encoded one (ie. utf8, hex). See the code in lib/kanji.c
640 for details. dst_len is in bytes.
641 ********************************************************************/
643 char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len)
645 return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp);
648 /*******************************************************************
649 Convert a UNIX string to UNICODE format. Note that the 'dst' is in
650 native byte order, not little endian. Always zero terminates.
651 This function may be replaced if the UNIX codepage format is a
652 multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
653 for details. dst_len is in bytes, not ucs2 units.
654 ********************************************************************/
656 smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
658 return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2);
661 /*******************************************************************
662 Convert a UNICODE string to DOS format. Note that the 'src' is in
663 native byte order, not little endian. Always zero terminates.
665 ********************************************************************/
667 char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len)
669 return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp);
672 /*******************************************************************
673 Convert a DOS string to UNICODE format. Note that the 'dst' is in
674 native byte order, not little endian. Always zero terminates.
675 This function may be replaced if the DOS codepage format is a
676 multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
677 for details. dst_len is in bytes, not ucs2 units.
678 ********************************************************************/
680 smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
682 return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2);
685 /*******************************************************************
686 Count the number of characters in a smb_ucs2_t string.
687 ********************************************************************/
689 size_t wstrlen(const smb_ucs2_t *src)
693 for(len = 0; *src; len++)
699 /*******************************************************************
700 Safe wstring copy into a known length string. maxlength includes
701 the terminating zero. maxlength is in bytes.
702 ********************************************************************/
704 smb_ucs2_t *safe_wstrcpy(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength)
709 DEBUG(0,("ERROR: NULL dest in safe_wstrcpy\n"));
718 ucs2_len = wstrlen(src);
720 if (ucs2_len >= (maxlength/sizeof(smb_ucs2_t))) {
722 DEBUG(0,("ERROR: string overflow by %u bytes in safe_wstrcpy [%.50s]\n",
723 (unsigned int)((ucs2_len*sizeof(smb_ucs2_t))-maxlength),
724 unicode_to_unix(out,src,sizeof(out))) );
725 ucs2_len = (maxlength/sizeof(smb_ucs2_t)) - 1;
728 memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t));
733 /*******************************************************************
734 Safe string cat into a string. maxlength includes the terminating zero.
735 maxlength is in bytes.
736 ********************************************************************/
738 smb_ucs2_t *safe_wstrcat(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
740 size_t ucs2_src_len, ucs2_dest_len;
743 DEBUG(0,("ERROR: NULL dest in safe_wstrcat\n"));
751 ucs2_src_len = wstrlen(src);
752 ucs2_dest_len = wstrlen(dest);
754 if (ucs2_src_len + ucs2_dest_len >= (maxlength/sizeof(smb_ucs2_t))) {
756 int new_len = (maxlength/sizeof(smb_ucs2_t)) - ucs2_dest_len - 1;
757 DEBUG(0,("ERROR: string overflow by %u characters in safe_wstrcat [%.50s]\n",
758 (unsigned int)((sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len)) - maxlength),
759 unicode_to_unix(out,src,sizeof(out))) );
760 ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0);
763 memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t));
764 dest[ucs2_dest_len + ucs2_src_len] = 0;
768 /*******************************************************************
769 Compare the two strings s1 and s2. len is in ucs2 units.
770 ********************************************************************/
772 int wstrcmp(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
789 /*******************************************************************
790 Compare the first n characters of s1 to s2. len is in ucs2 units.
791 ********************************************************************/
793 int wstrncmp(const smb_ucs2_t *s1, const smb_ucs2_t *s2, size_t len)
797 for (; len != 0; --len) {
811 /*******************************************************************
812 Search string s2 from s1.
813 ********************************************************************/
815 smb_ucs2_t *wstrstr(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
817 size_t len = wstrlen(s2);
820 return (smb_ucs2_t *)s1;
824 if (wstrncmp(s1, s2, len) == 0)
825 return (smb_ucs2_t *)s1;
831 /*******************************************************************
832 Search for ucs2 char c from the beginning of s.
833 ********************************************************************/
835 smb_ucs2_t *wstrchr(const smb_ucs2_t *s, smb_ucs2_t c)
839 return (smb_ucs2_t *)s;
845 /*******************************************************************
846 Search for ucs2 char c from the end of s.
847 ********************************************************************/
849 smb_ucs2_t *wstrrchr(const smb_ucs2_t *s, smb_ucs2_t c)
851 smb_ucs2_t *retval = 0;
855 retval = (smb_ucs2_t *)s;
861 /*******************************************************************
862 Search token from s1 separated by any ucs2 char of s2.
863 ********************************************************************/
865 smb_ucs2_t *wstrtok(smb_ucs2_t *s1, const smb_ucs2_t *s2)
867 static smb_ucs2_t *s = NULL;
876 for (q = s1; *s1; s1++) {
877 smb_ucs2_t *p = wstrchr(s2, *s1);
895 /*******************************************************************
896 Duplicate a ucs2 string.
897 ********************************************************************/
899 smb_ucs2_t *wstrdup(const smb_ucs2_t *s)
901 size_t newlen = (wstrlen(s)*sizeof(smb_ucs2_t)) + 1;
902 smb_ucs2_t *newstr = (smb_ucs2_t *)malloc(newlen);
905 safe_wstrcpy(newstr, s, newlen);