2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 static pstring cvtbuf;
27 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
29 /****************************************************************************
30 Return the name of a charset to give to iconv().
31 ****************************************************************************/
33 static char *charset_name(charset_t ch)
37 if (ch == CH_UCS2) ret = "UCS-2LE";
38 else if (ch == CH_UNIX) ret = lp_unix_charset();
39 else if (ch == CH_DOS) ret = lp_dos_charset();
40 else if (ch == CH_DISPLAY) ret = lp_display_charset();
41 else if (ch == CH_UTF8) ret = "UTF8";
43 if (!ret || !*ret) ret = "ASCII";
47 static void lazy_initialize_conv(void)
49 static int initialized = False;
59 /****************************************************************************
60 Initialize iconv conversion descriptors.
61 ****************************************************************************/
66 BOOL did_reload = False;
68 /* so that charset_name() works we need to get the UNIX<->UCS2 going
70 if (!conv_handles[CH_UNIX][CH_UCS2])
71 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
73 if (!conv_handles[CH_UCS2][CH_UNIX])
74 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
76 for (c1=0;c1<NUM_CHARSETS;c1++) {
77 for (c2=0;c2<NUM_CHARSETS;c2++) {
78 char *n1 = charset_name((charset_t)c1);
79 char *n2 = charset_name((charset_t)c2);
80 if (conv_handles[c1][c2] &&
81 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
82 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
87 if (conv_handles[c1][c2])
88 smb_iconv_close(conv_handles[c1][c2]);
90 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
91 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
92 DEBUG(0,("Conversion from %s to %s not supported\n",
93 charset_name((charset_t)c1), charset_name((charset_t)c2)));
94 conv_handles[c1][c2] = NULL;
105 * Convert string from one encoding to another, making error checking etc
107 * @param descriptor conversion descriptor, created in init_iconv()
108 * @param src pointer to source string (multibyte or singlebyte)
109 * @param srclen length of the source string in bytes
110 * @param dest pointer to destination string (multibyte or singlebyte)
111 * @param destlen maximal length allowed for string
112 * @retval the number of bytes occupied in the destination
115 size_t convert_string(charset_t from, charset_t to,
116 void const *src, size_t srclen,
117 void *dest, size_t destlen)
121 const char* inbuf = (const char*)src;
122 char* outbuf = (char*)dest;
123 smb_iconv_t descriptor;
125 if (srclen == (size_t)-1)
126 srclen = strlen(src)+1;
128 lazy_initialize_conv();
130 descriptor = conv_handles[from][to];
132 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
133 /* conversion not supported, use as is */
134 size_t len = MIN(srclen,destlen);
135 memcpy(dest,src,len);
141 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
143 char *reason="unknown error";
146 reason="Incomplete multibyte sequence";
149 reason="No more room";
150 DEBUG(0, ("convert_string: Required %d, available %d\n",
152 /* we are not sure we need srclen bytes,
153 may be more, may be less.
154 We only know we need more than destlen
158 reason="Illegal multibyte sequence";
161 /* smb_panic(reason); */
163 return destlen-o_len;
167 * Convert between character sets, allocating a new buffer for the result.
169 * @param srclen length of source buffer.
170 * @param dest always set at least to NULL
171 * @note -1 is not accepted for srclen.
173 * @retval Size in bytes of the converted string; or -1 in case of error.
176 size_t convert_string_allocate(charset_t from, charset_t to,
177 void const *src, size_t srclen, void **dest)
179 size_t i_len, o_len, destlen;
181 const char *inbuf = (const char *)src;
183 smb_iconv_t descriptor;
187 if (src == NULL || srclen == (size_t)-1)
190 lazy_initialize_conv();
192 descriptor = conv_handles[from][to];
194 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
195 /* conversion not supported, return -1*/
196 DEBUG(3, ("convert_string_allocate: conversion not supported!\n"));
200 destlen = MAX(srclen, 512);
203 destlen = destlen * 2;
204 ob = (char *)realloc(outbuf, destlen);
206 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
214 retval = smb_iconv(descriptor,
218 char *reason="unknown error";
221 reason="Incomplete multibyte sequence";
226 reason="Illegal multibyte sequence";
229 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
230 /* smb_panic(reason); */
234 destlen = destlen - o_len;
235 *dest = (char *)Realloc(ob,destlen);
237 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
246 * Convert between character sets, allocating a new buffer using talloc for the result.
248 * @param srclen length of source buffer.
249 * @param dest always set at least to NULL
250 * @note -1 is not accepted for srclen.
252 * @retval Size in bytes of the converted string; or -1 in case of error.
255 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
256 void const *src, size_t srclen, void **dest)
258 void *alloced_string;
262 dest_len=convert_string_allocate(from, to, src, srclen, &alloced_string);
263 if (dest_len == (size_t)-1)
265 *dest = talloc_memdup(ctx, alloced_string, dest_len);
266 SAFE_FREE(alloced_string);
272 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
275 smb_ucs2_t *buffer=(smb_ucs2_t*)cvtbuf;
276 size=convert_string(CH_UNIX, CH_UCS2, src, srclen, buffer, sizeof(cvtbuf));
277 if (!strupper_w(buffer) && (dest == src))
279 return convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
282 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
285 smb_ucs2_t *buffer=(smb_ucs2_t*)cvtbuf;
286 size=convert_string(CH_UNIX, CH_UCS2, src, srclen, buffer, sizeof(cvtbuf));
287 if (!strlower_w(buffer) && (dest == src))
289 return convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
293 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
295 if (flags & (STR_NOALIGN|STR_ASCII))
297 return PTR_DIFF(p, base_ptr) & 1;
301 /****************************************************************************
302 copy a string from a char* unix src to a dos codepage string destination
303 return the number of bytes occupied by the string in the destination
305 STR_TERMINATE means include the null termination
306 STR_UPPER means uppercase in the destination
307 dest_len is the maximum length in bytes allowed in the destination. If dest_len
308 is -1 then no maxiumum is used
309 ****************************************************************************/
311 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
313 size_t src_len = strlen(src);
316 /* treat a pstring as "unlimited" length */
317 if (dest_len == (size_t)-1)
318 dest_len = sizeof(pstring);
320 if (flags & STR_UPPER) {
321 pstrcpy(tmpbuf, src);
326 if (flags & STR_TERMINATE)
329 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
332 size_t push_ascii_fstring(void *dest, const char *src)
334 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
337 size_t push_ascii_pstring(void *dest, const char *src)
339 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
342 size_t push_pstring(void *dest, const char *src)
344 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
347 /****************************************************************************
348 Copy a string from a dos codepage source to a unix char* destination.
350 STR_TERMINATE means the string in src is null terminated.
351 if STR_TERMINATE is set then src_len is ignored.
352 src_len is the length of the source area in bytes.
353 Return the number of bytes occupied by the string in src.
354 The resulting string in "dest" is always null terminated.
355 ****************************************************************************/
357 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
361 if (dest_len == (size_t)-1)
362 dest_len = sizeof(pstring);
364 if (flags & STR_TERMINATE) {
365 if (src_len == (size_t)-1) {
366 src_len = strlen(src) + 1;
368 size_t len = strnlen(src, src_len);
375 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len);
378 dest[MIN(ret, dest_len-1)] = 0;
383 size_t pull_ascii_pstring(char *dest, const void *src)
385 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
388 size_t pull_ascii_fstring(char *dest, const void *src)
390 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
393 /****************************************************************************
394 Copy a string from a char* src to a unicode destination.
395 Return the number of bytes occupied by the string in the destination.
397 STR_TERMINATE means include the null termination.
398 STR_UPPER means uppercase in the destination.
399 STR_NOALIGN means don't do alignment.
400 dest_len is the maximum length allowed in the destination. If dest_len
401 is -1 then no maxiumum is used.
402 ****************************************************************************/
404 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
407 size_t src_len = strlen(src);
410 /* treat a pstring as "unlimited" length */
411 if (dest_len == (size_t)-1)
412 dest_len = sizeof(pstring);
414 if (flags & STR_UPPER) {
415 pstrcpy(tmpbuf, src);
420 if (flags & STR_TERMINATE)
423 if (ucs2_align(base_ptr, dest, flags)) {
425 dest = (void *)((char *)dest + 1);
426 if (dest_len) dest_len--;
430 /* ucs2 is always a multiple of 2 bytes */
433 len += convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len);
438 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer using talloc
440 * @param dest always set at least to NULL
442 * @retval The number of bytes occupied by the string in the destination
443 * or -1 in case of error.
446 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
448 size_t src_len = strlen(src)+1;
451 return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest);
455 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
457 * @param dest always set at least to NULL
459 * @retval The number of bytes occupied by the string in the destination
460 * or -1 in case of error.
463 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
465 size_t src_len = strlen(src)+1;
468 return convert_string_allocate(CH_UNIX, CH_UCS2, src, src_len, (void **)dest);
471 /****************************************************************************
472 Copy a string from a char* src to a UTF-8 destination.
473 Return the number of bytes occupied by the string in the destination
475 STR_TERMINATE means include the null termination
476 STR_UPPER means uppercase in the destination
477 dest_len is the maximum length allowed in the destination. If dest_len
478 is -1 then no maxiumum is used.
479 ****************************************************************************/
481 size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
483 size_t src_len = strlen(src);
486 /* treat a pstring as "unlimited" length */
487 if (dest_len == (size_t)-1)
488 dest_len = sizeof(pstring);
490 if (flags & STR_UPPER) {
491 pstrcpy(tmpbuf, src);
496 if (flags & STR_TERMINATE)
499 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len);
502 size_t push_utf8_fstring(void *dest, const char *src)
504 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
507 size_t push_utf8_pstring(void *dest, const char *src)
509 return push_utf8(dest, src, sizeof(pstring), STR_TERMINATE);
513 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
515 * @param dest always set at least to NULL
517 * @retval The number of bytes occupied by the string in the destination
520 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
522 size_t src_len = strlen(src)+1;
525 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest);
529 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
531 * @param dest always set at least to NULL
533 * @retval The number of bytes occupied by the string in the destination
536 size_t push_utf8_allocate(char **dest, const char *src)
538 size_t src_len = strlen(src)+1;
541 return convert_string_allocate(CH_UNIX, CH_UTF8, src, src_len, (void **)dest);
544 /****************************************************************************
545 Copy a string from a ucs2 source to a unix char* destination.
547 STR_TERMINATE means the string in src is null terminated.
548 STR_NOALIGN means don't try to align.
549 if STR_TERMINATE is set then src_len is ignored if it is -1.
550 src_len is the length of the source area in bytes
551 Return the number of bytes occupied by the string in src.
552 The resulting string in "dest" is always null terminated.
553 ****************************************************************************/
555 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
559 if (dest_len == (size_t)-1)
560 dest_len = sizeof(pstring);
562 if (ucs2_align(base_ptr, src, flags)) {
563 src = (const void *)((const char *)src + 1);
568 if (flags & STR_TERMINATE) {
569 if (src_len == (size_t)-1) {
570 src_len = strlen_w(src)*2 + 2;
572 size_t len = strnlen_w(src, src_len/2);
579 /* ucs2 is always a multiple of 2 bytes */
580 if (src_len != (size_t)-1)
583 ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len);
585 dest[MIN(ret, dest_len-1)] = 0;
590 size_t pull_ucs2_pstring(char *dest, const void *src)
592 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
595 size_t pull_ucs2_fstring(char *dest, const void *src)
597 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
601 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
603 * @param dest always set at least to NULL
605 * @retval The number of bytes occupied by the string in the destination
608 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const smb_ucs2_t *src)
610 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
612 return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, dest);
616 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
618 * @param dest always set at least to NULL
620 * @retval The number of bytes occupied by the string in the destination
623 size_t pull_ucs2_allocate(void **dest, const smb_ucs2_t *src)
625 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
627 return convert_string_allocate(CH_UCS2, CH_UNIX, src, src_len, dest);
630 /****************************************************************************
631 Copy a string from a utf-8 source to a unix char* destination.
633 STR_TERMINATE means the string in src is null terminated.
634 if STR_TERMINATE is set then src_len is ignored.
635 src_len is the length of the source area in bytes
636 Return the number of bytes occupied by the string in src.
637 The resulting string in "dest" is always null terminated.
638 ****************************************************************************/
640 size_t pull_utf8(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
644 if (dest_len == (size_t)-1)
645 dest_len = sizeof(pstring);
647 if (flags & STR_TERMINATE) {
648 if (src_len == (size_t)-1) {
649 src_len = strlen(src) + 1;
651 size_t len = strnlen(src, src_len);
658 ret = convert_string(CH_UTF8, CH_UNIX, src, src_len, dest, dest_len);
660 dest[MIN(ret, dest_len-1)] = 0;
665 size_t pull_utf8_pstring(char *dest, const void *src)
667 return pull_utf8(dest, src, sizeof(pstring), -1, STR_TERMINATE);
670 size_t pull_utf8_fstring(char *dest, const void *src)
672 return pull_utf8(dest, src, sizeof(fstring), -1, STR_TERMINATE);
676 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
678 * @param dest always set at least to NULL
680 * @retval The number of bytes occupied by the string in the destination
683 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
685 size_t src_len = strlen(src)+1;
687 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest);
691 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
693 * @param dest always set at least to NULL
695 * @retval The number of bytes occupied by the string in the destination
698 size_t pull_utf8_allocate(void **dest, const char *src)
700 size_t src_len = strlen(src)+1;
702 return convert_string_allocate(CH_UTF8, CH_UNIX, src, src_len, dest);
705 /****************************************************************************
706 Copy a string from a char* src to a unicode or ascii
707 dos codepage destination choosing unicode or ascii based on the
708 flags in the SMB buffer starting at base_ptr.
709 Return the number of bytes occupied by the string in the destination.
711 STR_TERMINATE means include the null termination.
712 STR_UPPER means uppercase in the destination.
713 STR_ASCII use ascii even with unicode packet.
714 STR_NOALIGN means don't do alignment.
715 dest_len is the maximum length allowed in the destination. If dest_len
716 is -1 then no maxiumum is used.
717 ****************************************************************************/
719 size_t push_string(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
721 if (!(flags & STR_ASCII) && \
722 ((flags & STR_UNICODE || \
723 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
724 return push_ucs2(base_ptr, dest, src, dest_len, flags);
726 return push_ascii(dest, src, dest_len, flags);
730 /****************************************************************************
731 Copy a string from a unicode or ascii source (depending on
732 the packet flags) to a char* destination.
734 STR_TERMINATE means the string in src is null terminated.
735 STR_UNICODE means to force as unicode.
736 STR_ASCII use ascii even with unicode packet.
737 STR_NOALIGN means don't do alignment.
738 if STR_TERMINATE is set then src_len is ignored is it is -1
739 src_len is the length of the source area in bytes.
740 Return the number of bytes occupied by the string in src.
741 The resulting string in "dest" is always null terminated.
742 ****************************************************************************/
744 size_t pull_string(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
746 if (!(flags & STR_ASCII) && \
747 ((flags & STR_UNICODE || \
748 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
749 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
751 return pull_ascii(dest, src, dest_len, src_len, flags);
754 size_t align_string(const void *base_ptr, const char *p, int flags)
756 if (!(flags & STR_ASCII) && \
757 ((flags & STR_UNICODE || \
758 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
759 return ucs2_align(base_ptr, p, flags);
764 /****************************************************************************
765 Convert from ucs2 to unix charset and return the
766 allocated and converted string or NULL if an error occurred.
767 You must provide a zero terminated string.
768 The returning string will be zero terminated.
769 ****************************************************************************/
771 char *acnv_u2ux(const smb_ucs2_t *src)
777 slen = (strlen_w(src) + 1) * sizeof(smb_ucs2_t);
778 dlen = convert_string_allocate(CH_UCS2, CH_UNIX, src, slen, &dest);
779 if (dlen == (size_t)-1)
785 /****************************************************************************
786 Convert from unix to ucs2 charset and return the
787 allocated and converted string or NULL if an error occurred.
788 You must provide a zero terminated string.
789 The returning string will be zero terminated.
790 ****************************************************************************/
792 smb_ucs2_t *acnv_uxu2(const char *src)
798 slen = strlen(src) + 1;
799 dlen = convert_string_allocate(CH_UNIX, CH_UCS2, src, slen, &dest);
800 if (dlen == (size_t)-1)
806 /****************************************************************************
807 Convert from ucs2 to dos charset and return the
808 allocated and converted string or NULL if an error occurred.
809 You must provide a zero terminated string.
810 The returning string will be zero terminated.
811 ****************************************************************************/
813 char *acnv_u2dos(const smb_ucs2_t *src)
819 slen = (strlen_w(src) + 1) * sizeof(smb_ucs2_t);
820 dlen = convert_string_allocate(CH_UCS2, CH_DOS, src, slen, &dest);
821 if (dlen == (size_t)-1)
827 /****************************************************************************
828 Convert from dos to ucs2 charset and return the
829 allocated and converted string or NULL if an error occurred.
830 You must provide a zero terminated string.
831 The returning string will be zero terminated.
832 ****************************************************************************/
834 smb_ucs2_t *acnv_dosu2(const char *src)
840 slen = strlen(src) + 1;
841 dlen = convert_string_allocate(CH_DOS, CH_UCS2, src, slen, &dest);
842 if (dlen == (size_t)-1)