2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 static pstring cvtbuf;
27 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
32 * Character set conversion routines.
39 * Return the name of a charset to give to iconv().
41 static const char *charset_name(charset_t ch)
43 const char *ret = NULL;
45 if (ch == CH_UCS2) ret = "UCS-2LE";
46 else if (ch == CH_UNIX) ret = lp_unix_charset();
47 else if (ch == CH_DOS) ret = lp_dos_charset();
48 else if (ch == CH_DISPLAY) ret = lp_display_charset();
49 else if (ch == CH_UTF8) ret = "UTF8";
51 if (!ret || !*ret) ret = "ASCII";
55 static void lazy_initialize_conv(void)
57 static int initialized = False;
68 Initialize iconv conversion descriptors.
74 BOOL did_reload = False;
76 /* so that charset_name() works we need to get the UNIX<->UCS2 going
78 if (!conv_handles[CH_UNIX][CH_UCS2])
79 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
81 if (!conv_handles[CH_UCS2][CH_UNIX])
82 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
84 for (c1=0;c1<NUM_CHARSETS;c1++) {
85 for (c2=0;c2<NUM_CHARSETS;c2++) {
86 const char *n1 = charset_name((charset_t)c1);
87 const char *n2 = charset_name((charset_t)c2);
88 if (conv_handles[c1][c2] &&
89 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
90 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
95 if (conv_handles[c1][c2])
96 smb_iconv_close(conv_handles[c1][c2]);
98 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
99 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
100 DEBUG(0,("Conversion from %s to %s not supported\n",
101 charset_name((charset_t)c1), charset_name((charset_t)c2)));
102 conv_handles[c1][c2] = NULL;
113 * Convert string from one encoding to another, making error checking etc
115 * @param src pointer to source string (multibyte or singlebyte)
116 * @param srclen length of the source string in bytes
117 * @param dest pointer to destination string (multibyte or singlebyte)
118 * @param destlen maximal length allowed for string
119 * @returns the number of bytes occupied in the destination
121 size_t convert_string(charset_t from, charset_t to,
122 void const *src, size_t srclen,
123 void *dest, size_t destlen)
127 const char* inbuf = (const char*)src;
128 char* outbuf = (char*)dest;
129 smb_iconv_t descriptor;
131 if (srclen == (size_t)-1)
132 srclen = strlen(src)+1;
134 lazy_initialize_conv();
136 descriptor = conv_handles[from][to];
138 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
139 /* conversion not supported, use as is */
140 size_t len = MIN(srclen,destlen);
141 memcpy(dest,src,len);
147 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
148 if(retval==(size_t)-1) {
149 const char *reason="unknown error";
152 reason="Incomplete multibyte sequence";
155 reason="No more room";
156 DEBUG(0, ("convert_string: Required %d, available %d\n",
158 /* we are not sure we need srclen bytes,
159 may be more, may be less.
160 We only know we need more than destlen
164 reason="Illegal multibyte sequence";
167 /* smb_panic(reason); */
169 return destlen-o_len;
173 * Convert between character sets, allocating a new buffer for the result.
175 * @param srclen length of source buffer.
176 * @param dest always set at least to NULL
177 * @note -1 is not accepted for srclen.
179 * @returns Size in bytes of the converted string; or -1 in case of error.
182 size_t convert_string_allocate(charset_t from, charset_t to,
183 void const *src, size_t srclen, void **dest)
185 size_t i_len, o_len, destlen;
187 const char *inbuf = (const char *)src;
189 smb_iconv_t descriptor;
193 if (src == NULL || srclen == (size_t)-1)
196 lazy_initialize_conv();
198 descriptor = conv_handles[from][to];
200 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
201 /* conversion not supported, return -1*/
202 DEBUG(3, ("convert_string_allocate: conversion not supported!\n"));
206 destlen = MAX(srclen, 512);
209 destlen = destlen * 2;
210 ob = (char *)realloc(outbuf, destlen);
212 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
220 retval = smb_iconv(descriptor,
223 if(retval == (size_t)-1) {
224 const char *reason="unknown error";
227 reason="Incomplete multibyte sequence";
232 reason="Illegal multibyte sequence";
235 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
236 /* smb_panic(reason); */
240 destlen = destlen - o_len;
241 *dest = (char *)Realloc(ob,destlen);
243 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
253 * Convert between character sets, allocating a new buffer using talloc for the result.
255 * @param srclen length of source buffer.
256 * @param dest always set at least to NULL
257 * @note -1 is not accepted for srclen.
259 * @returns Size in bytes of the converted string; or -1 in case of error.
261 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
262 void const *src, size_t srclen, void **dest)
264 void *alloced_string;
268 dest_len=convert_string_allocate(from, to, src, srclen, &alloced_string);
269 if (dest_len == (size_t)-1)
271 *dest = talloc_memdup(ctx, alloced_string, dest_len);
272 SAFE_FREE(alloced_string);
278 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
281 smb_ucs2_t *buffer=(smb_ucs2_t*)cvtbuf;
282 size=convert_string(CH_UNIX, CH_UCS2, src, srclen, buffer, sizeof(cvtbuf));
283 if (!strupper_w(buffer) && (dest == src))
285 return convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
288 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
291 smb_ucs2_t *buffer=(smb_ucs2_t*)cvtbuf;
292 size=convert_string(CH_UNIX, CH_UCS2, src, srclen, buffer, sizeof(cvtbuf));
293 if (!strlower_w(buffer) && (dest == src))
295 return convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
299 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
301 if (flags & (STR_NOALIGN|STR_ASCII))
303 return PTR_DIFF(p, base_ptr) & 1;
308 * Copy a string from a char* unix src to a dos codepage string destination.
310 * @return the number of bytes occupied by the string in the destination.
312 * @param flags can include
314 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
315 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
318 * @param dest_len the maximum length in bytes allowed in the
319 * destination. If @p dest_len is -1 then no maximum is used.
321 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
323 size_t src_len = strlen(src);
326 /* treat a pstring as "unlimited" length */
327 if (dest_len == (size_t)-1)
328 dest_len = sizeof(pstring);
330 if (flags & STR_UPPER) {
331 pstrcpy(tmpbuf, src);
336 if (flags & STR_TERMINATE)
339 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
342 size_t push_ascii_fstring(void *dest, const char *src)
344 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
347 size_t push_ascii_pstring(void *dest, const char *src)
349 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
352 size_t push_pstring(void *dest, const char *src)
354 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
358 * Copy a string from a dos codepage source to a unix char* destination.
360 * The resulting string in "dest" is always null terminated.
362 * @param flags can have:
364 * <dt>STR_TERMINATE</dt>
365 * <dd>STR_TERMINATE means the string in @p src
366 * is null terminated, and src_len is ignored.</dd>
369 * @param src_len is the length of the source area in bytes.
370 * @returns the number of bytes occupied by the string in @p src.
372 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
376 if (dest_len == (size_t)-1)
377 dest_len = sizeof(pstring);
379 if (flags & STR_TERMINATE) {
380 if (src_len == (size_t)-1) {
381 src_len = strlen(src) + 1;
383 size_t len = strnlen(src, src_len);
390 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len);
393 dest[MIN(ret, dest_len-1)] = 0;
398 size_t pull_ascii_pstring(char *dest, const void *src)
400 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
403 size_t pull_ascii_fstring(char *dest, const void *src)
405 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
409 * Copy a string from a char* src to a unicode destination.
411 * @returns the number of bytes occupied by the string in the destination.
413 * @param flags can have:
416 * <dt>STR_TERMINATE <dd>means include the null termination.
417 * <dt>STR_UPPER <dd>means uppercase in the destination.
418 * <dt>STR_NOALIGN <dd>means don't do alignment.
421 * @param dest_len is the maximum length allowed in the
422 * destination. If dest_len is -1 then no maxiumum is used.
424 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
427 size_t src_len = strlen(src);
430 /* treat a pstring as "unlimited" length */
431 if (dest_len == (size_t)-1)
432 dest_len = sizeof(pstring);
434 if (flags & STR_UPPER) {
435 pstrcpy(tmpbuf, src);
440 if (flags & STR_TERMINATE)
443 if (ucs2_align(base_ptr, dest, flags)) {
445 dest = (void *)((char *)dest + 1);
446 if (dest_len) dest_len--;
450 /* ucs2 is always a multiple of 2 bytes */
453 len += convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len);
459 * Copy a string from a unix char* src to a UCS2 destination,
460 * allocating a buffer using talloc().
462 * @param dest always set at least to NULL
464 * @returns The number of bytes occupied by the string in the destination
465 * or -1 in case of error.
467 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
469 size_t src_len = strlen(src)+1;
472 return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest);
477 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
479 * @param dest always set at least to NULL
481 * @returns The number of bytes occupied by the string in the destination
482 * or -1 in case of error.
485 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
487 size_t src_len = strlen(src)+1;
490 return convert_string_allocate(CH_UNIX, CH_UCS2, src, src_len, (void **)dest);
494 Copy a string from a char* src to a UTF-8 destination.
495 Return the number of bytes occupied by the string in the destination
497 STR_TERMINATE means include the null termination
498 STR_UPPER means uppercase in the destination
499 dest_len is the maximum length allowed in the destination. If dest_len
500 is -1 then no maxiumum is used.
503 size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
505 size_t src_len = strlen(src);
508 /* treat a pstring as "unlimited" length */
509 if (dest_len == (size_t)-1)
510 dest_len = sizeof(pstring);
512 if (flags & STR_UPPER) {
513 pstrcpy(tmpbuf, src);
518 if (flags & STR_TERMINATE)
521 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len);
524 size_t push_utf8_fstring(void *dest, const char *src)
526 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
529 size_t push_utf8_pstring(void *dest, const char *src)
531 return push_utf8(dest, src, sizeof(pstring), STR_TERMINATE);
535 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
537 * @param dest always set at least to NULL
539 * @returns The number of bytes occupied by the string in the destination
542 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
544 size_t src_len = strlen(src)+1;
547 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest);
551 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
553 * @param dest always set at least to NULL
555 * @returns The number of bytes occupied by the string in the destination
558 size_t push_utf8_allocate(char **dest, const char *src)
560 size_t src_len = strlen(src)+1;
563 return convert_string_allocate(CH_UNIX, CH_UTF8, src, src_len, (void **)dest);
567 Copy a string from a ucs2 source to a unix char* destination.
569 STR_TERMINATE means the string in src is null terminated.
570 STR_NOALIGN means don't try to align.
571 if STR_TERMINATE is set then src_len is ignored if it is -1.
572 src_len is the length of the source area in bytes
573 Return the number of bytes occupied by the string in src.
574 The resulting string in "dest" is always null terminated.
577 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
581 if (dest_len == (size_t)-1)
582 dest_len = sizeof(pstring);
584 if (ucs2_align(base_ptr, src, flags)) {
585 src = (const void *)((const char *)src + 1);
590 if (flags & STR_TERMINATE) {
591 if (src_len == (size_t)-1) {
592 src_len = strlen_w(src)*2 + 2;
594 size_t len = strnlen_w(src, src_len/2);
601 /* ucs2 is always a multiple of 2 bytes */
602 if (src_len != (size_t)-1)
605 ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len);
607 dest[MIN(ret, dest_len-1)] = 0;
612 size_t pull_ucs2_pstring(char *dest, const void *src)
614 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
617 size_t pull_ucs2_fstring(char *dest, const void *src)
619 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
623 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
625 * @param dest always set at least to NULL
627 * @returns The number of bytes occupied by the string in the destination
630 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
632 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
634 return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (void **)dest);
638 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
640 * @param dest always set at least to NULL
642 * @returns The number of bytes occupied by the string in the destination
645 size_t pull_ucs2_allocate(void **dest, const smb_ucs2_t *src)
647 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
649 return convert_string_allocate(CH_UCS2, CH_UNIX, src, src_len, dest);
653 Copy a string from a utf-8 source to a unix char* destination.
655 STR_TERMINATE means the string in src is null terminated.
656 if STR_TERMINATE is set then src_len is ignored.
657 src_len is the length of the source area in bytes
658 Return the number of bytes occupied by the string in src.
659 The resulting string in "dest" is always null terminated.
662 size_t pull_utf8(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
666 if (dest_len == (size_t)-1)
667 dest_len = sizeof(pstring);
669 if (flags & STR_TERMINATE) {
670 if (src_len == (size_t)-1) {
671 src_len = strlen(src) + 1;
673 size_t len = strnlen(src, src_len);
680 ret = convert_string(CH_UTF8, CH_UNIX, src, src_len, dest, dest_len);
682 dest[MIN(ret, dest_len-1)] = 0;
687 size_t pull_utf8_pstring(char *dest, const void *src)
689 return pull_utf8(dest, src, sizeof(pstring), -1, STR_TERMINATE);
692 size_t pull_utf8_fstring(char *dest, const void *src)
694 return pull_utf8(dest, src, sizeof(fstring), -1, STR_TERMINATE);
698 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
700 * @param dest always set at least to NULL
702 * @returns The number of bytes occupied by the string in the destination
705 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
707 size_t src_len = strlen(src)+1;
709 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest);
713 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
715 * @param dest always set at least to NULL
717 * @returns The number of bytes occupied by the string in the destination
720 size_t pull_utf8_allocate(void **dest, const char *src)
722 size_t src_len = strlen(src)+1;
724 return convert_string_allocate(CH_UTF8, CH_UNIX, src, src_len, dest);
728 Copy a string from a char* src to a unicode or ascii
729 dos codepage destination choosing unicode or ascii based on the
730 flags in the SMB buffer starting at base_ptr.
731 Return the number of bytes occupied by the string in the destination.
733 STR_TERMINATE means include the null termination.
734 STR_UPPER means uppercase in the destination.
735 STR_ASCII use ascii even with unicode packet.
736 STR_NOALIGN means don't do alignment.
737 dest_len is the maximum length allowed in the destination. If dest_len
738 is -1 then no maxiumum is used.
741 size_t push_string(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
743 if (!(flags & STR_ASCII) && \
744 ((flags & STR_UNICODE || \
745 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
746 return push_ucs2(base_ptr, dest, src, dest_len, flags);
748 return push_ascii(dest, src, dest_len, flags);
753 Copy a string from a unicode or ascii source (depending on
754 the packet flags) to a char* destination.
756 STR_TERMINATE means the string in src is null terminated.
757 STR_UNICODE means to force as unicode.
758 STR_ASCII use ascii even with unicode packet.
759 STR_NOALIGN means don't do alignment.
760 if STR_TERMINATE is set then src_len is ignored is it is -1
761 src_len is the length of the source area in bytes.
762 Return the number of bytes occupied by the string in src.
763 The resulting string in "dest" is always null terminated.
766 size_t pull_string(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
768 if (!(flags & STR_ASCII) && \
769 ((flags & STR_UNICODE || \
770 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
771 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
773 return pull_ascii(dest, src, dest_len, src_len, flags);
776 size_t align_string(const void *base_ptr, const char *p, int flags)
778 if (!(flags & STR_ASCII) && \
779 ((flags & STR_UNICODE || \
780 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
781 return ucs2_align(base_ptr, p, flags);
787 Convert from ucs2 to unix charset and return the
788 allocated and converted string or NULL if an error occurred.
789 You must provide a zero terminated string.
790 The returning string will be zero terminated.
793 char *acnv_u2ux(const smb_ucs2_t *src)
799 slen = (strlen_w(src) + 1) * sizeof(smb_ucs2_t);
800 dlen = convert_string_allocate(CH_UCS2, CH_UNIX, src, slen, &dest);
801 if (dlen == (size_t)-1)
808 Convert from unix to ucs2 charset and return the
809 allocated and converted string or NULL if an error occurred.
810 You must provide a zero terminated string.
811 The returning string will be zero terminated.
814 smb_ucs2_t *acnv_uxu2(const char *src)
820 slen = strlen(src) + 1;
821 dlen = convert_string_allocate(CH_UNIX, CH_UCS2, src, slen, &dest);
822 if (dlen == (size_t)-1)
829 Convert from ucs2 to dos charset and return the
830 allocated and converted string or NULL if an error occurred.
831 You must provide a zero terminated string.
832 The returning string will be zero terminated.
835 char *acnv_u2dos(const smb_ucs2_t *src)
841 slen = (strlen_w(src) + 1) * sizeof(smb_ucs2_t);
842 dlen = convert_string_allocate(CH_UCS2, CH_DOS, src, slen, &dest);
843 if (dlen == (size_t)-1)
850 Convert from dos to ucs2 charset and return the
851 allocated and converted string or NULL if an error occurred.
852 You must provide a zero terminated string.
853 The returning string will be zero terminated.
856 smb_ucs2_t *acnv_dosu2(const char *src)
862 slen = strlen(src) + 1;
863 dlen = convert_string_allocate(CH_DOS, CH_UCS2, src, slen, &dest);
864 if (dlen == (size_t)-1)