2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 static pstring cvtbuf;
27 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
29 /****************************************************************************
30 return the name of a charset to give to iconv()
31 ****************************************************************************/
32 static char *charset_name(charset_t ch)
36 if (ch == CH_UCS2) ret = "UCS-2LE";
37 else if (ch == CH_UNIX) ret = lp_unix_charset();
38 else if (ch == CH_DOS) ret = lp_dos_charset();
39 else if (ch == CH_DISPLAY) ret = lp_display_charset();
40 else if (ch == CH_UTF8) ret = "UTF8";
42 if (!ret || !*ret) ret = "ASCII";
47 static void lazy_initialize_conv(void)
49 static int initialized = False;
59 /****************************************************************************
60 Initialize iconv conversion descriptors
61 ****************************************************************************/
65 BOOL did_reload = False;
67 /* so that charset_name() works we need to get the UNIX<->UCS2 going
69 if (!conv_handles[CH_UNIX][CH_UCS2]) {
70 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
72 if (!conv_handles[CH_UCS2][CH_UNIX]) {
73 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
77 for (c1=0;c1<NUM_CHARSETS;c1++) {
78 for (c2=0;c2<NUM_CHARSETS;c2++) {
79 char *n1 = charset_name((charset_t)c1);
80 char *n2 = charset_name((charset_t)c2);
81 if (conv_handles[c1][c2] &&
82 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
83 strcmp(n2, conv_handles[c1][c2]->to_name) == 0) continue;
87 if (conv_handles[c1][c2]) {
88 smb_iconv_close(conv_handles[c1][c2]);
90 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
91 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
92 DEBUG(0,("Conversion from %s to %s not supported\n",
93 charset_name((charset_t)c1), charset_name((charset_t)c2)));
94 conv_handles[c1][c2] = NULL;
105 * Convert string from one encoding to another, making error checking etc
107 * @param descriptor conversion descriptor, created in init_iconv()
108 * @param src pointer to source string (multibyte or singlebyte)
109 * @param srclen length of the source string in bytes
110 * @param dest pointer to destination string (multibyte or singlebyte)
111 * @param destlen maximal length allowed for string
112 * @retval the number of bytes occupied in the destination
114 size_t convert_string(charset_t from, charset_t to,
115 void const *src, size_t srclen,
116 void *dest, size_t destlen)
120 const char* inbuf = (const char*)src;
121 char* outbuf = (char*)dest;
122 smb_iconv_t descriptor;
124 if (srclen == -1) srclen = strlen(src)+1;
126 lazy_initialize_conv();
128 descriptor = conv_handles[from][to];
130 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
131 /* conversion not supported, use as is */
132 int len = MIN(srclen,destlen);
133 memcpy(dest,src,len);
139 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
142 char *reason="unknown error";
144 { case EINVAL: reason="Incomplete multibyte sequence"; break;
145 case E2BIG: reason="No more room";
146 DEBUG(0, ("convert_string: Required %d, available %d\n",
148 /* we are not sure we need srclen bytes,
149 may be more, may be less.
150 We only know we need more than destlen
155 case EILSEQ: reason="Illegal multibyte sequence"; break;
157 /* smb_panic(reason); */
159 return destlen-o_len;
163 * Convert between character sets, allocating a new buffer for the result.
165 * @param srclen length of source buffer.
166 * @param dest always set at least to NULL
167 * @note -1 is not accepted for srclen.
169 * @retval Size in bytes of the converted string; or -1 in case of error.
171 size_t convert_string_allocate(charset_t from, charset_t to,
172 void const *src, size_t srclen, void **dest)
174 size_t i_len, o_len, destlen;
176 const char *inbuf = (const char *)src;
178 smb_iconv_t descriptor;
182 if (src == NULL || srclen == -1) return -1;
184 lazy_initialize_conv();
186 descriptor = conv_handles[from][to];
188 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
189 /* conversion not supported, return -1*/
190 DEBUG(3, ("convert_string_allocate: conversion not supported!\n"));
194 destlen = MAX(srclen, 512);
197 destlen = destlen * 2;
198 ob = (char *)realloc(outbuf, destlen);
200 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
207 retval = smb_iconv(descriptor,
212 char *reason="unknown error";
216 reason="Incomplete multibyte sequence";
221 reason="Illegal multibyte sequence";
224 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
225 /* smb_panic(reason); */
229 destlen = destlen - o_len;
230 *dest = (char *)Realloc(ob,destlen);
232 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
241 * Convert between character sets, allocating a new buffer using talloc for the result.
243 * @param srclen length of source buffer.
244 * @param dest always set at least to NULL
245 * @note -1 is not accepted for srclen.
247 * @retval Size in bytes of the converted string; or -1 in case of error.
249 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
250 void const *src, size_t srclen, void **dest)
256 dest_len=convert_string_allocate(from, to, src, srclen, (void **)&ob);
259 *dest = talloc_strdup(ctx, (char *)ob);
266 int unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
269 smb_ucs2_t *buffer=(smb_ucs2_t*)cvtbuf;
270 size=convert_string(CH_UNIX, CH_UCS2, src, srclen, buffer, sizeof(cvtbuf));
271 if (!strupper_w(buffer) && (dest == src)) return srclen;
272 return convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
275 int unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
278 smb_ucs2_t *buffer=(smb_ucs2_t*)cvtbuf;
279 size=convert_string(CH_UNIX, CH_UCS2, src, srclen, buffer, sizeof(cvtbuf));
280 if (!strlower_w(buffer) && (dest == src)) return srclen;
281 return convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
285 int ucs2_align(const void *base_ptr, const void *p, int flags)
287 if (flags & (STR_NOALIGN|STR_ASCII)) return 0;
288 return PTR_DIFF(p, base_ptr) & 1;
292 /****************************************************************************
293 copy a string from a char* unix src to a dos codepage string destination
294 return the number of bytes occupied by the string in the destination
296 STR_TERMINATE means include the null termination
297 STR_UPPER means uppercase in the destination
298 dest_len is the maximum length allowed in the destination. If dest_len
299 is -1 then no maxiumum is used
300 ****************************************************************************/
301 int push_ascii(void *dest, const char *src, int dest_len, int flags)
303 int src_len = strlen(src);
306 /* treat a pstring as "unlimited" length */
307 if (dest_len == -1) {
308 dest_len = sizeof(pstring);
311 if (flags & STR_UPPER) {
312 pstrcpy(tmpbuf, src);
317 if (flags & STR_TERMINATE) {
321 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
324 int push_ascii_fstring(void *dest, const char *src)
326 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
329 int push_ascii_pstring(void *dest, const char *src)
331 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
334 int push_pstring(void *dest, const char *src)
336 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
340 /****************************************************************************
341 copy a string from a dos codepage source to a unix char* destination
343 STR_TERMINATE means the string in src is null terminated
344 if STR_TERMINATE is set then src_len is ignored
345 src_len is the length of the source area in bytes
346 return the number of bytes occupied by the string in src
347 the resulting string in "dest" is always null terminated
348 ****************************************************************************/
349 int pull_ascii(char *dest, const void *src, int dest_len, int src_len, int flags)
353 if (dest_len == -1) {
354 dest_len = sizeof(pstring);
357 if (flags & STR_TERMINATE) {
359 src_len = strlen(src) + 1;
361 int len = strnlen(src, src_len);
362 if (len < src_len) len++;
367 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len);
369 if (dest_len) dest[MIN(ret, dest_len-1)] = 0;
374 int pull_ascii_pstring(char *dest, const void *src)
376 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
379 int pull_ascii_fstring(char *dest, const void *src)
381 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
384 /****************************************************************************
385 copy a string from a char* src to a unicode destination
386 return the number of bytes occupied by the string in the destination
388 STR_TERMINATE means include the null termination
389 STR_UPPER means uppercase in the destination
390 STR_NOALIGN means don't do alignment
391 dest_len is the maximum length allowed in the destination. If dest_len
392 is -1 then no maxiumum is used
393 ****************************************************************************/
394 int push_ucs2(const void *base_ptr, void *dest, const char *src, int dest_len, int flags)
397 int src_len = strlen(src);
400 /* treat a pstring as "unlimited" length */
401 if (dest_len == -1) {
402 dest_len = sizeof(pstring);
405 if (flags & STR_UPPER) {
406 pstrcpy(tmpbuf, src);
411 if (flags & STR_TERMINATE) {
415 if (ucs2_align(base_ptr, dest, flags)) {
417 dest = (void *)((char *)dest + 1);
418 if (dest_len) dest_len--;
422 /* ucs2 is always a multiple of 2 bytes */
425 len += convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len);
430 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer using talloc
432 * @param dest always set at least to NULL
434 * @retval The number of bytes occupied by the string in the destination
436 int push_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src)
438 int src_len = strlen(src)+1;
441 return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, dest);
445 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
447 * @param dest always set at least to NULL
449 * @retval The number of bytes occupied by the string in the destination
451 int push_ucs2_allocate(void **dest, const char *src)
453 int src_len = strlen(src)+1;
456 return convert_string_allocate(CH_UNIX, CH_UCS2, src, src_len, dest);
459 /****************************************************************************
460 copy a string from a char* src to a UTF-8 destination
461 return the number of bytes occupied by the string in the destination
463 STR_TERMINATE means include the null termination
464 STR_UPPER means uppercase in the destination
465 dest_len is the maximum length allowed in the destination. If dest_len
466 is -1 then no maxiumum is used
467 ****************************************************************************/
468 int push_utf8(void *dest, const char *src, int dest_len, int flags)
470 int src_len = strlen(src);
473 /* treat a pstring as "unlimited" length */
474 if (dest_len == -1) {
475 dest_len = sizeof(pstring);
478 if (flags & STR_UPPER) {
479 pstrcpy(tmpbuf, src);
484 if (flags & STR_TERMINATE) {
488 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len);
491 int push_utf8_fstring(void *dest, const char *src)
493 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
496 int push_utf8_pstring(void *dest, const char *src)
498 return push_utf8(dest, src, sizeof(pstring), STR_TERMINATE);
502 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
504 * @param dest always set at least to NULL
506 * @retval The number of bytes occupied by the string in the destination
508 int push_utf8_talloc(TALLOC_CTX *ctx, void **dest, const char *src)
510 int src_len = strlen(src)+1;
513 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, dest);
517 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
519 * @param dest always set at least to NULL
521 * @retval The number of bytes occupied by the string in the destination
523 int push_utf8_allocate(void **dest, const char *src)
525 int src_len = strlen(src)+1;
528 return convert_string_allocate(CH_UNIX, CH_UTF8, src, src_len, dest);
531 /****************************************************************************
532 copy a string from a ucs2 source to a unix char* destination
534 STR_TERMINATE means the string in src is null terminated
535 STR_NOALIGN means don't try to align
536 if STR_TERMINATE is set then src_len is ignored if it is -1
537 src_len is the length of the source area in bytes
538 return the number of bytes occupied by the string in src
539 the resulting string in "dest" is always null terminated
540 ****************************************************************************/
541 int pull_ucs2(const void *base_ptr, char *dest, const void *src, int dest_len, int src_len, int flags)
545 if (dest_len == -1) {
546 dest_len = sizeof(pstring);
549 if (ucs2_align(base_ptr, src, flags)) {
550 src = (const void *)((const char *)src + 1);
551 if (src_len > 0) src_len--;
554 if (flags & STR_TERMINATE) {
556 src_len = strlen_w(src)*2 + 2;
558 int len = strnlen_w(src, src_len/2);
559 if (len < src_len/2) len++;
564 /* ucs2 is always a multiple of 2 bytes */
567 ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len);
568 if (dest_len) dest[MIN(ret, dest_len-1)] = 0;
573 int pull_ucs2_pstring(char *dest, const void *src)
575 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
578 int pull_ucs2_fstring(char *dest, const void *src)
580 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
584 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
586 * @param dest always set at least to NULL
588 * @retval The number of bytes occupied by the string in the destination
590 int pull_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src)
592 int src_len = strlen(src)+1;
594 return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, dest);
598 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
600 * @param dest always set at least to NULL
602 * @retval The number of bytes occupied by the string in the destination
604 int pull_ucs2_allocate(void **dest, const char *src)
606 int src_len = strlen(src)+1;
608 return convert_string_allocate(CH_UCS2, CH_UNIX, src, src_len, dest);
611 /****************************************************************************
612 copy a string from a utf-8 source to a unix char* destination
614 STR_TERMINATE means the string in src is null terminated
615 if STR_TERMINATE is set then src_len is ignored
616 src_len is the length of the source area in bytes
617 return the number of bytes occupied by the string in src
618 the resulting string in "dest" is always null terminated
619 ****************************************************************************/
620 int pull_utf8(char *dest, const void *src, int dest_len, int src_len, int flags)
624 if (dest_len == -1) {
625 dest_len = sizeof(pstring);
628 if (flags & STR_TERMINATE) {
630 src_len = strlen(src) + 1;
632 int len = strnlen(src, src_len);
633 if (len < src_len) len++;
638 ret = convert_string(CH_UTF8, CH_UNIX, src, src_len, dest, dest_len);
639 if (dest_len) dest[MIN(ret, dest_len-1)] = 0;
644 int pull_utf8_pstring(char *dest, const void *src)
646 return pull_utf8(dest, src, sizeof(pstring), -1, STR_TERMINATE);
649 int pull_utf8_fstring(char *dest, const void *src)
651 return pull_utf8(dest, src, sizeof(fstring), -1, STR_TERMINATE);
655 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
657 * @param dest always set at least to NULL
659 * @retval The number of bytes occupied by the string in the destination
661 int pull_utf8_talloc(TALLOC_CTX *ctx, void **dest, const char *src)
663 int src_len = strlen(src)+1;
665 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, dest);
669 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
671 * @param dest always set at least to NULL
673 * @retval The number of bytes occupied by the string in the destination
675 int pull_utf8_allocate(void **dest, const char *src)
677 int src_len = strlen(src)+1;
679 return convert_string_allocate(CH_UTF8, CH_UNIX, src, src_len, dest);
682 /****************************************************************************
683 copy a string from a char* src to a unicode or ascii
684 dos codepage destination choosing unicode or ascii based on the
685 flags in the SMB buffer starting at base_ptr
686 return the number of bytes occupied by the string in the destination
688 STR_TERMINATE means include the null termination
689 STR_UPPER means uppercase in the destination
690 STR_ASCII use ascii even with unicode packet
691 STR_NOALIGN means don't do alignment
692 dest_len is the maximum length allowed in the destination. If dest_len
693 is -1 then no maxiumum is used
694 ****************************************************************************/
695 int push_string(const void *base_ptr, void *dest, const char *src, int dest_len, int flags)
697 if (!(flags & STR_ASCII) && \
698 ((flags & STR_UNICODE || \
699 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
700 return push_ucs2(base_ptr, dest, src, dest_len, flags);
702 return push_ascii(dest, src, dest_len, flags);
706 /****************************************************************************
707 copy a string from a unicode or ascii source (depending on
708 the packet flags) to a char* destination
710 STR_TERMINATE means the string in src is null terminated
711 STR_UNICODE means to force as unicode
712 STR_ASCII use ascii even with unicode packet
713 STR_NOALIGN means don't do alignment
714 if STR_TERMINATE is set then src_len is ignored is it is -1
715 src_len is the length of the source area in bytes
716 return the number of bytes occupied by the string in src
717 the resulting string in "dest" is always null terminated
718 ****************************************************************************/
719 int pull_string(const void *base_ptr, char *dest, const void *src, int dest_len, int src_len,
722 if (!(flags & STR_ASCII) && \
723 ((flags & STR_UNICODE || \
724 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
725 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
727 return pull_ascii(dest, src, dest_len, src_len, flags);
730 int align_string(const void *base_ptr, const char *p, int flags)
732 if (!(flags & STR_ASCII) && \
733 ((flags & STR_UNICODE || \
734 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
735 return ucs2_align(base_ptr, p, flags);
742 /****************************************************************************
743 convert from ucs2 to unix charset and return the
744 allocated and converted string or NULL if an error occurred.
745 you must provide a zero terminated string.
746 the returning string will be zero terminated.
747 ****************************************************************************/
748 char *acnv_u2ux(const smb_ucs2_t *src)
754 slen = (strlen_w(src) + 1) * sizeof(smb_ucs2_t);
755 dlen = convert_string_allocate(CH_UCS2, CH_UNIX, src, slen, &dest);
756 if (dlen == -1) return NULL;
760 /****************************************************************************
761 convert from unix to ucs2 charset and return the
762 allocated and converted string or NULL if an error occurred.
763 you must provide a zero terminated string.
764 the returning string will be zero terminated.
765 ****************************************************************************/
766 smb_ucs2_t *acnv_uxu2(const char *src)
772 slen = strlen(src) + 1;
773 dlen = convert_string_allocate(CH_UNIX, CH_UCS2, src, slen, &dest);
774 if (dlen == -1) return NULL;
778 /****************************************************************************
779 convert from ucs2 to dos charset and return the
780 allocated and converted string or NULL if an error occurred.
781 you must provide a zero terminated string.
782 the returning string will be zero terminated.
783 ****************************************************************************/
784 char *acnv_u2dos(const smb_ucs2_t *src)
790 slen = (strlen_w(src) + 1) * sizeof(smb_ucs2_t);
791 dlen = convert_string_allocate(CH_UCS2, CH_DOS, src, slen, &dest);
792 if (dlen == -1) return NULL;
796 /****************************************************************************
797 convert from dos to ucs2 charset and return the
798 allocated and converted string or NULL if an error occurred.
799 you must provide a zero terminated string.
800 the returning string will be zero terminated.
801 ****************************************************************************/
802 smb_ucs2_t *acnv_dosu2(const char *src)
808 slen = strlen(src) + 1;
809 dlen = convert_string_allocate(CH_DOS, CH_UCS2, src, slen, &dest);
810 if (dlen == -1) return NULL;