2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 * @brief Character-set conversion routines built on our iconv.
30 * @note Samba's internal character set (at least in the 3.0 series)
31 * is always the same as the one for the Unix filesystem. It is
32 * <b>not</b> necessarily UTF-8 and may be different on machines that
33 * need i18n filenames to be compatible with Unix software. It does
34 * have to be a superset of ASCII. All multibyte sequences must start
35 * with a byte with the high bit set.
40 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
44 * Return the name of a charset to give to iconv().
46 static const char *charset_name(charset_t ch)
48 const char *ret = NULL;
50 if (ch == CH_UCS2) ret = "UCS-2LE";
51 else if (ch == CH_UNIX) ret = lp_unix_charset();
52 else if (ch == CH_DOS) ret = lp_dos_charset();
53 else if (ch == CH_DISPLAY) ret = lp_display_charset();
54 else if (ch == CH_UTF8) ret = "UTF8";
56 if (!ret || !*ret) ret = "ASCII";
60 static void lazy_initialize_conv(void)
62 static int initialized = False;
73 Initialize iconv conversion descriptors.
79 BOOL did_reload = False;
81 /* so that charset_name() works we need to get the UNIX<->UCS2 going
83 if (!conv_handles[CH_UNIX][CH_UCS2])
84 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
86 if (!conv_handles[CH_UCS2][CH_UNIX])
87 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
89 for (c1=0;c1<NUM_CHARSETS;c1++) {
90 for (c2=0;c2<NUM_CHARSETS;c2++) {
91 const char *n1 = charset_name((charset_t)c1);
92 const char *n2 = charset_name((charset_t)c2);
93 if (conv_handles[c1][c2] &&
94 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
95 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
100 if (conv_handles[c1][c2])
101 smb_iconv_close(conv_handles[c1][c2]);
103 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
104 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
105 DEBUG(0,("Conversion from %s to %s not supported\n",
106 charset_name((charset_t)c1), charset_name((charset_t)c2)));
107 conv_handles[c1][c2] = NULL;
118 * Convert string from one encoding to another, making error checking etc
120 * @param src pointer to source string (multibyte or singlebyte)
121 * @param srclen length of the source string in bytes
122 * @param dest pointer to destination string (multibyte or singlebyte)
123 * @param destlen maximal length allowed for string
124 * @returns the number of bytes occupied in the destination
126 ssize_t convert_string(charset_t from, charset_t to,
127 void const *src, size_t srclen,
128 void *dest, size_t destlen)
132 const char* inbuf = (const char*)src;
133 char* outbuf = (char*)dest;
134 smb_iconv_t descriptor;
136 if (srclen == (size_t)-1)
137 srclen = strlen(src)+1;
139 lazy_initialize_conv();
141 descriptor = conv_handles[from][to];
143 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
144 /* conversion not supported, use as is */
145 size_t len = MIN(srclen,destlen);
146 memcpy(dest,src,len);
152 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
153 if(retval==(size_t)-1) {
154 const char *reason="unknown error";
157 reason="Incomplete multibyte sequence";
160 reason="No more room";
161 DEBUG(0, ("convert_string: Required %d, available %d\n",
163 /* we are not sure we need srclen bytes,
164 may be more, may be less.
165 We only know we need more than destlen
169 reason="Illegal multibyte sequence";
172 /* smb_panic(reason); */
174 return destlen-o_len;
178 * Convert between character sets, allocating a new buffer for the result.
180 * @param srclen length of source buffer.
181 * @param dest always set at least to NULL
182 * @note -1 is not accepted for srclen.
184 * @returns Size in bytes of the converted string; or -1 in case of error.
187 ssize_t convert_string_allocate(charset_t from, charset_t to,
188 void const *src, size_t srclen, void **dest)
190 size_t i_len, o_len, destlen;
192 const char *inbuf = (const char *)src;
194 smb_iconv_t descriptor;
198 if (src == NULL || srclen == (size_t)-1 || srclen == 0)
201 lazy_initialize_conv();
203 descriptor = conv_handles[from][to];
205 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
206 /* conversion not supported, return -1*/
207 DEBUG(3, ("convert_string_allocate: conversion not supported!\n"));
211 destlen = MAX(srclen, 512);
214 destlen = destlen * 2;
215 ob = (char *)realloc(outbuf, destlen);
217 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
225 retval = smb_iconv(descriptor,
228 if(retval == (size_t)-1) {
229 const char *reason="unknown error";
232 reason="Incomplete multibyte sequence";
237 reason="Illegal multibyte sequence";
240 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
241 /* smb_panic(reason); */
245 destlen = destlen - o_len;
246 *dest = (char *)Realloc(ob,destlen);
248 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
258 * Convert between character sets, allocating a new buffer using talloc for the result.
260 * @param srclen length of source buffer.
261 * @param dest always set at least to NULL
262 * @note -1 is not accepted for srclen.
264 * @returns Size in bytes of the converted string; or -1 in case of error.
266 ssize_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
267 void const *src, size_t srclen, const void **dest)
269 void *alloced_string;
274 dest_len=convert_string_allocate(from, to, src, srclen, &alloced_string);
275 if (dest_len == (size_t)-1)
277 dst = talloc(ctx, dest_len + 2);
278 /* we want to be absolutely sure that the result is terminated */
279 memcpy(dst, alloced_string, dest_len);
280 SSVAL(dst, dest_len, 0);
281 SAFE_FREE(alloced_string);
288 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
293 size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen,
296 smb_panic("failed to create UCS2 buffer");
298 if (!strupper_w(buffer) && (dest == src)) {
303 size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
308 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
313 size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen,
316 smb_panic("failed to create UCS2 buffer");
318 if (!strlower_w(buffer) && (dest == src)) {
322 size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
327 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
329 if (flags & (STR_NOALIGN|STR_ASCII))
331 return PTR_DIFF(p, base_ptr) & 1;
336 * Copy a string from a char* unix src to a dos codepage string destination.
338 * @return the number of bytes occupied by the string in the destination.
340 * @param flags can include
342 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
343 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
346 * @param dest_len the maximum length in bytes allowed in the
347 * destination. If @p dest_len is -1 then no maximum is used.
349 ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
351 size_t src_len = strlen(src);
354 /* treat a pstring as "unlimited" length */
355 if (dest_len == (size_t)-1)
356 dest_len = sizeof(pstring);
358 if (flags & STR_UPPER) {
359 pstrcpy(tmpbuf, src);
364 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
367 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
370 ssize_t push_ascii_fstring(void *dest, const char *src)
372 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
375 ssize_t push_ascii_pstring(void *dest, const char *src)
377 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
380 ssize_t push_pstring(void *dest, const char *src)
382 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
386 * Copy a string from a dos codepage source to a unix char* destination.
388 * The resulting string in "dest" is always null terminated.
390 * @param flags can have:
392 * <dt>STR_TERMINATE</dt>
393 * <dd>STR_TERMINATE means the string in @p src
394 * is null terminated, and src_len is ignored.</dd>
397 * @param src_len is the length of the source area in bytes.
398 * @returns the number of bytes occupied by the string in @p src.
400 ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
404 if (dest_len == (size_t)-1)
405 dest_len = sizeof(pstring);
407 if (flags & STR_TERMINATE) {
408 if (src_len == (size_t)-1) {
409 src_len = strlen(src) + 1;
411 size_t len = strnlen(src, src_len);
418 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len);
421 dest[MIN(ret, dest_len-1)] = 0;
426 ssize_t pull_ascii_pstring(char *dest, const void *src)
428 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
431 ssize_t pull_ascii_fstring(char *dest, const void *src)
433 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
437 * Copy a string from a char* src to a unicode destination.
439 * @returns the number of bytes occupied by the string in the destination.
441 * @param flags can have:
444 * <dt>STR_TERMINATE <dd>means include the null termination.
445 * <dt>STR_UPPER <dd>means uppercase in the destination.
446 * <dt>STR_NOALIGN <dd>means don't do alignment.
449 * @param dest_len is the maximum length allowed in the
450 * destination. If dest_len is -1 then no maxiumum is used.
452 ssize_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
455 size_t src_len = strlen(src);
458 /* treat a pstring as "unlimited" length */
459 if (dest_len == (size_t)-1)
460 dest_len = sizeof(pstring);
462 if (flags & STR_UPPER) {
463 pstrcpy(tmpbuf, src);
468 if (flags & STR_TERMINATE)
471 if (ucs2_align(base_ptr, dest, flags)) {
473 dest = (void *)((char *)dest + 1);
474 if (dest_len) dest_len--;
478 /* ucs2 is always a multiple of 2 bytes */
481 len += convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len);
487 * Copy a string from a unix char* src to a UCS2 destination,
488 * allocating a buffer using talloc().
490 * @param dest always set at least to NULL
492 * @returns The number of bytes occupied by the string in the destination
493 * or -1 in case of error.
495 ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
497 size_t src_len = strlen(src)+1;
500 return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (const void **)dest);
505 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
507 * @param dest always set at least to NULL
509 * @returns The number of bytes occupied by the string in the destination
510 * or -1 in case of error.
513 ssize_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
515 size_t src_len = strlen(src)+1;
518 return convert_string_allocate(CH_UNIX, CH_UCS2, src, src_len, (void **)dest);
522 Copy a string from a char* src to a UTF-8 destination.
523 Return the number of bytes occupied by the string in the destination
525 STR_TERMINATE means include the null termination
526 STR_UPPER means uppercase in the destination
527 dest_len is the maximum length allowed in the destination. If dest_len
528 is -1 then no maxiumum is used.
531 ssize_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
533 size_t src_len = strlen(src);
536 /* treat a pstring as "unlimited" length */
537 if (dest_len == (size_t)-1)
538 dest_len = sizeof(pstring);
540 if (flags & STR_UPPER) {
541 pstrcpy(tmpbuf, src);
546 if (flags & STR_TERMINATE)
549 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len);
552 ssize_t push_utf8_fstring(void *dest, const char *src)
554 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
557 ssize_t push_utf8_pstring(void *dest, const char *src)
559 return push_utf8(dest, src, sizeof(pstring), STR_TERMINATE);
563 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
565 * @param dest always set at least to NULL
567 * @returns The number of bytes occupied by the string in the destination
570 ssize_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
572 size_t src_len = strlen(src)+1;
575 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (const void **)dest);
579 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
581 * @param dest always set at least to NULL
583 * @returns The number of bytes occupied by the string in the destination
586 ssize_t push_utf8_allocate(char **dest, const char *src)
588 size_t src_len = strlen(src)+1;
591 return convert_string_allocate(CH_UNIX, CH_UTF8, src, src_len, (void **)dest);
595 Copy a string from a ucs2 source to a unix char* destination.
597 STR_TERMINATE means the string in src is null terminated.
598 STR_NOALIGN means don't try to align.
599 if STR_TERMINATE is set then src_len is ignored if it is -1.
600 src_len is the length of the source area in bytes
601 Return the number of bytes occupied by the string in src.
602 The resulting string in "dest" is always null terminated.
605 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
609 if (dest_len == (size_t)-1)
610 dest_len = sizeof(pstring);
612 if (ucs2_align(base_ptr, src, flags)) {
613 src = (const void *)((const char *)src + 1);
618 if (flags & STR_TERMINATE) {
619 if (src_len == (size_t)-1) {
620 src_len = strlen_w(src)*2 + 2;
622 size_t len = strnlen_w(src, src_len/2);
629 /* ucs2 is always a multiple of 2 bytes */
630 if (src_len != (size_t)-1)
633 ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len);
635 dest[MIN(ret, dest_len-1)] = 0;
640 ssize_t pull_ucs2_pstring(char *dest, const void *src)
642 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
645 ssize_t pull_ucs2_fstring(char *dest, const void *src)
647 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
651 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
653 * @param dest always set at least to NULL
655 * @returns The number of bytes occupied by the string in the destination
658 ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
660 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
662 return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (const void **)dest);
666 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
668 * @param dest always set at least to NULL
670 * @returns The number of bytes occupied by the string in the destination
673 ssize_t pull_ucs2_allocate(void **dest, const smb_ucs2_t *src)
675 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
677 return convert_string_allocate(CH_UCS2, CH_UNIX, src, src_len, dest);
681 Copy a string from a utf-8 source to a unix char* destination.
683 STR_TERMINATE means the string in src is null terminated.
684 if STR_TERMINATE is set then src_len is ignored.
685 src_len is the length of the source area in bytes
686 Return the number of bytes occupied by the string in src.
687 The resulting string in "dest" is always null terminated.
690 ssize_t pull_utf8(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
694 if (dest_len == (size_t)-1)
695 dest_len = sizeof(pstring);
697 if (flags & STR_TERMINATE) {
698 if (src_len == (size_t)-1) {
699 src_len = strlen(src) + 1;
701 size_t len = strnlen(src, src_len);
708 ret = convert_string(CH_UTF8, CH_UNIX, src, src_len, dest, dest_len);
710 dest[MIN(ret, dest_len-1)] = 0;
715 ssize_t pull_utf8_pstring(char *dest, const void *src)
717 return pull_utf8(dest, src, sizeof(pstring), -1, STR_TERMINATE);
720 ssize_t pull_utf8_fstring(char *dest, const void *src)
722 return pull_utf8(dest, src, sizeof(fstring), -1, STR_TERMINATE);
726 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
728 * @param dest always set at least to NULL
730 * @returns The number of bytes occupied by the string in the destination
733 ssize_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
735 size_t src_len = strlen(src)+1;
737 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (const void **)dest);
741 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
743 * @param dest always set at least to NULL
745 * @returns The number of bytes occupied by the string in the destination
748 ssize_t pull_utf8_allocate(void **dest, const char *src)
750 size_t src_len = strlen(src)+1;
752 return convert_string_allocate(CH_UTF8, CH_UNIX, src, src_len, dest);
756 Copy a string from a char* src to a unicode or ascii
757 dos codepage destination choosing unicode or ascii based on the
758 flags in the SMB buffer starting at base_ptr.
759 Return the number of bytes occupied by the string in the destination.
761 STR_TERMINATE means include the null termination.
762 STR_UPPER means uppercase in the destination.
763 STR_ASCII use ascii even with unicode packet.
764 STR_NOALIGN means don't do alignment.
765 dest_len is the maximum length allowed in the destination. If dest_len
766 is -1 then no maxiumum is used.
769 ssize_t push_string(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
771 if (!(flags & STR_ASCII) && \
772 ((flags & STR_UNICODE || \
773 (SVAL(base_ptr, NBT_HDR_SIZE+HDR_FLG2) & FLAGS2_UNICODE_STRINGS)))) {
774 return push_ucs2(base_ptr, dest, src, dest_len, flags);
776 return push_ascii(dest, src, dest_len, flags);
781 Copy a string from a unicode or ascii source (depending on
782 the packet flags) to a char* destination.
784 STR_TERMINATE means the string in src is null terminated.
785 STR_UNICODE means to force as unicode.
786 STR_ASCII use ascii even with unicode packet.
787 STR_NOALIGN means don't do alignment.
788 if STR_TERMINATE is set then src_len is ignored is it is -1
789 src_len is the length of the source area in bytes.
790 Return the number of bytes occupied by the string in src.
791 The resulting string in "dest" is always null terminated.
794 ssize_t pull_string(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
796 if (!(flags & STR_ASCII) && \
797 ((flags & STR_UNICODE || \
798 (SVAL(base_ptr, NBT_HDR_SIZE+HDR_FLG2) & FLAGS2_UNICODE_STRINGS)))) {
799 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
801 return pull_ascii(dest, src, dest_len, src_len, flags);
804 ssize_t align_string(const void *base_ptr, const char *p, int flags)
806 if (!(flags & STR_ASCII) && \
807 ((flags & STR_UNICODE || \
808 (SVAL(base_ptr, NBT_HDR_SIZE+HDR_FLG2) & FLAGS2_UNICODE_STRINGS)))) {
809 return ucs2_align(base_ptr, p, flags);
815 Copy a string from a unicode or ascii source (depending on
816 the packet flags) to a TALLOC'ed destination.
818 STR_TERMINATE means the string in src is null terminated.
819 STR_UNICODE means to force as unicode.
820 STR_ASCII use ascii even with unicode packet.
821 STR_NOALIGN means don't do alignment.
822 if STR_TERMINATE is set then src_len is ignored is it is -1
823 src_len is the length of the source area in bytes.
824 Return the number of bytes occupied by the string in src.
825 The resulting string in "dest" is always null terminated.
828 ssize_t pull_string_talloc(TALLOC_CTX *ctx, char **dest, const void *src, size_t src_len, int flags)
830 if (!(flags & STR_ASCII) && \
831 (flags & STR_UNICODE)) {
832 return pull_ucs2_talloc(ctx, dest, src);
835 if (flags & STR_TERMINATE) {
836 *dest = talloc_strdup(ctx, src);
837 return strlen(*dest);
839 *dest = talloc_strndup(ctx, src, src_len);
844 Convert from ucs2 to unix charset and return the
845 allocated and converted string or NULL if an error occurred.
846 You must provide a zero terminated string.
847 The returning string will be zero terminated.
850 char *acnv_u2ux(const smb_ucs2_t *src)
856 slen = (strlen_w(src) + 1) * sizeof(smb_ucs2_t);
857 dlen = convert_string_allocate(CH_UCS2, CH_UNIX, src, slen, &dest);
858 if (dlen == (size_t)-1)
865 Convert from unix to ucs2 charset and return the
866 allocated and converted string or NULL if an error occurred.
867 You must provide a zero terminated string.
868 The returning string will be zero terminated.
871 smb_ucs2_t *acnv_uxu2(const char *src)
877 slen = strlen(src) + 1;
878 dlen = convert_string_allocate(CH_UNIX, CH_UCS2, src, slen, &dest);
879 if (dlen == (size_t)-1)
886 Convert from ucs2 to dos charset and return the
887 allocated and converted string or NULL if an error occurred.
888 You must provide a zero terminated string.
889 The returning string will be zero terminated.
892 char *acnv_u2dos(const smb_ucs2_t *src)
898 slen = (strlen_w(src) + 1) * sizeof(smb_ucs2_t);
899 dlen = convert_string_allocate(CH_UCS2, CH_DOS, src, slen, &dest);
900 if (dlen == (size_t)-1)
907 Convert from dos to ucs2 charset and return the
908 allocated and converted string or NULL if an error occurred.
909 You must provide a zero terminated string.
910 The returning string will be zero terminated.
913 smb_ucs2_t *acnv_dosu2(const char *src)
919 slen = strlen(src) + 1;
920 dlen = convert_string_allocate(CH_DOS, CH_UCS2, src, slen, &dest);
921 if (dlen == (size_t)-1)