2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 static pstring cvtbuf;
27 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
29 /****************************************************************************
30 return the name of a charset to give to iconv()
31 ****************************************************************************/
32 static char *charset_name(charset_t ch)
36 if (ch == CH_UCS2) ret = "UCS-2LE";
37 else if (ch == CH_UNIX) ret = lp_unix_charset();
38 else if (ch == CH_DOS) ret = lp_dos_charset();
39 else if (ch == CH_DISPLAY) ret = lp_display_charset();
40 else if (ch == CH_UTF8) ret = "UTF8";
42 if (!ret || !*ret) ret = "ASCII";
47 static void lazy_initialize_conv(void)
49 static int initialized = False;
59 /****************************************************************************
60 Initialize iconv conversion descriptors
61 ****************************************************************************/
65 BOOL did_reload = False;
67 /* so that charset_name() works we need to get the UNIX<->UCS2 going
69 if (!conv_handles[CH_UNIX][CH_UCS2]) {
70 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
72 if (!conv_handles[CH_UCS2][CH_UNIX]) {
73 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
77 for (c1=0;c1<NUM_CHARSETS;c1++) {
78 for (c2=0;c2<NUM_CHARSETS;c2++) {
79 char *n1 = charset_name((charset_t)c1);
80 char *n2 = charset_name((charset_t)c2);
81 if (conv_handles[c1][c2] &&
82 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
83 strcmp(n2, conv_handles[c1][c2]->to_name) == 0) continue;
87 if (conv_handles[c1][c2]) {
88 smb_iconv_close(conv_handles[c1][c2]);
90 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
91 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
92 DEBUG(0,("Conversion from %s to %s not supported\n",
93 charset_name((charset_t)c1), charset_name((charset_t)c2)));
94 conv_handles[c1][c2] = NULL;
105 * Convert string from one encoding to another, making error checking etc
107 * @param descriptor conversion descriptor, created in init_iconv()
108 * @param src pointer to source string (multibyte or singlebyte)
109 * @param srclen length of the source string in bytes
110 * @param dest pointer to destination string (multibyte or singlebyte)
111 * @param destlen maximal length allowed for string
112 * @retval the number of bytes occupied in the destination
114 size_t convert_string(charset_t from, charset_t to,
115 void const *src, size_t srclen,
116 void *dest, size_t destlen)
120 const char* inbuf = (const char*)src;
121 char* outbuf = (char*)dest;
122 smb_iconv_t descriptor;
124 if (srclen == -1) srclen = strlen(src)+1;
126 lazy_initialize_conv();
128 descriptor = conv_handles[from][to];
130 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
131 /* conversion not supported, use as is */
132 int len = MIN(srclen,destlen);
133 memcpy(dest,src,len);
139 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
142 char *reason="unknown error";
144 { case EINVAL: reason="Incomplete multibyte sequence"; break;
145 case E2BIG: reason="No more room";
146 DEBUG(0, ("convert_string: Required %d, available %d\n",
148 /* we are not sure we need srclen bytes,
149 may be more, may be less.
150 We only know we need more than destlen
155 case EILSEQ: reason="Illegal multibyte sequence"; break;
157 /* smb_panic(reason); */
159 return destlen-o_len;
163 * Convert between character sets, allocating a new buffer for the result.
165 * @param srclen length of source buffer.
166 * @param dest always set at least to NULL
167 * @note -1 is not accepted for srclen.
169 * @retval Size in bytes of the converted string; or -1 in case of error.
171 size_t convert_string_allocate(charset_t from, charset_t to,
172 void const *src, size_t srclen, void **dest)
174 size_t i_len, o_len, destlen;
176 const char *inbuf = (const char *)src;
178 smb_iconv_t descriptor;
182 if (src == NULL || srclen == -1) return -1;
184 lazy_initialize_conv();
186 descriptor = conv_handles[from][to];
188 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
189 /* conversion not supported, return -1*/
190 DEBUG(3, ("convert_string_allocate: conversion not supported!\n"));
194 destlen = MAX(srclen, 512);
197 destlen = destlen * 2;
198 ob = (char *)realloc(outbuf, destlen);
200 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
207 retval = smb_iconv(descriptor,
212 char *reason="unknown error";
216 reason="Incomplete multibyte sequence";
221 reason="Illegal multibyte sequence";
224 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
225 /* smb_panic(reason); */
229 destlen = destlen - o_len;
230 *dest = (char *)Realloc(ob,destlen);
232 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
241 * Convert between character sets, allocating a new buffer using talloc for the result.
243 * @param srclen length of source buffer.
244 * @param dest always set at least to NULL
245 * @note -1 is not accepted for srclen.
247 * @retval Size in bytes of the converted string; or -1 in case of error.
249 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
250 void const *src, size_t srclen, void **dest)
252 void *alloced_string;
256 dest_len=convert_string_allocate(from, to, src, srclen, &alloced_string);
259 *dest = talloc_memdup(ctx, alloced_string, dest_len);
260 SAFE_FREE(alloced_string);
266 int unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
269 smb_ucs2_t *buffer=(smb_ucs2_t*)cvtbuf;
270 size=convert_string(CH_UNIX, CH_UCS2, src, srclen, buffer, sizeof(cvtbuf));
271 if (!strupper_w(buffer) && (dest == src)) return srclen;
272 return convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
275 int unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
278 smb_ucs2_t *buffer=(smb_ucs2_t*)cvtbuf;
279 size=convert_string(CH_UNIX, CH_UCS2, src, srclen, buffer, sizeof(cvtbuf));
280 if (!strlower_w(buffer) && (dest == src)) return srclen;
281 return convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
285 int ucs2_align(const void *base_ptr, const void *p, int flags)
287 if (flags & (STR_NOALIGN|STR_ASCII)) return 0;
288 return PTR_DIFF(p, base_ptr) & 1;
292 /****************************************************************************
293 copy a string from a char* unix src to a dos codepage string destination
294 return the number of bytes occupied by the string in the destination
296 STR_TERMINATE means include the null termination
297 STR_UPPER means uppercase in the destination
298 dest_len is the maximum length allowed in the destination. If dest_len
299 is -1 then no maxiumum is used
300 ****************************************************************************/
301 int push_ascii(void *dest, const char *src, int dest_len, int flags)
303 int src_len = strlen(src);
306 /* treat a pstring as "unlimited" length */
307 if (dest_len == -1) {
308 dest_len = sizeof(pstring);
311 if (flags & STR_UPPER) {
312 pstrcpy(tmpbuf, src);
317 if (flags & STR_TERMINATE) {
321 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
324 int push_ascii_fstring(void *dest, const char *src)
326 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
329 int push_ascii_pstring(void *dest, const char *src)
331 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
334 int push_pstring(void *dest, const char *src)
336 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
340 /****************************************************************************
341 copy a string from a dos codepage source to a unix char* destination
343 STR_TERMINATE means the string in src is null terminated
344 if STR_TERMINATE is set then src_len is ignored
345 src_len is the length of the source area in bytes
346 return the number of bytes occupied by the string in src
347 the resulting string in "dest" is always null terminated
348 ****************************************************************************/
349 int pull_ascii(char *dest, const void *src, int dest_len, int src_len, int flags)
353 if (dest_len == -1) {
354 dest_len = sizeof(pstring);
357 if (flags & STR_TERMINATE) {
359 src_len = strlen(src) + 1;
361 int len = strnlen(src, src_len);
362 if (len < src_len) len++;
367 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len);
369 if (dest_len) dest[MIN(ret, dest_len-1)] = 0;
374 int pull_ascii_pstring(char *dest, const void *src)
376 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
379 int pull_ascii_fstring(char *dest, const void *src)
381 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
384 /****************************************************************************
385 copy a string from a char* src to a unicode destination
386 return the number of bytes occupied by the string in the destination
388 STR_TERMINATE means include the null termination
389 STR_UPPER means uppercase in the destination
390 STR_NOALIGN means don't do alignment
391 dest_len is the maximum length allowed in the destination. If dest_len
392 is -1 then no maxiumum is used
393 ****************************************************************************/
394 int push_ucs2(const void *base_ptr, void *dest, const char *src, int dest_len, int flags)
397 int src_len = strlen(src);
400 /* treat a pstring as "unlimited" length */
401 if (dest_len == -1) {
402 dest_len = sizeof(pstring);
405 if (flags & STR_UPPER) {
406 pstrcpy(tmpbuf, src);
411 if (flags & STR_TERMINATE) {
415 if (ucs2_align(base_ptr, dest, flags)) {
417 dest = (void *)((char *)dest + 1);
418 if (dest_len) dest_len--;
422 /* ucs2 is always a multiple of 2 bytes */
425 len += convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len);
430 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer using talloc
432 * @param dest always set at least to NULL
434 * @retval The number of bytes occupied by the string in the destination
436 int push_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src)
438 int src_len = strlen(src)+1;
441 return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, dest);
445 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
447 * @param dest always set at least to NULL
449 * @retval The number of bytes occupied by the string in the destination
451 int push_ucs2_allocate(void **dest, const char *src)
453 int src_len = strlen(src)+1;
456 return convert_string_allocate(CH_UNIX, CH_UCS2, src, src_len, dest);
459 /****************************************************************************
460 copy a string from a char* src to a UTF-8 destination
461 return the number of bytes occupied by the string in the destination
463 STR_TERMINATE means include the null termination
464 STR_UPPER means uppercase in the destination
465 dest_len is the maximum length allowed in the destination. If dest_len
466 is -1 then no maxiumum is used
467 ****************************************************************************/
468 int push_utf8(void *dest, const char *src, int dest_len, int flags)
470 int src_len = strlen(src);
473 /* treat a pstring as "unlimited" length */
474 if (dest_len == -1) {
475 dest_len = sizeof(pstring);
478 if (flags & STR_UPPER) {
479 pstrcpy(tmpbuf, src);
484 if (flags & STR_TERMINATE) {
488 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len);
491 int push_utf8_fstring(void *dest, const char *src)
493 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
496 int push_utf8_pstring(void *dest, const char *src)
498 return push_utf8(dest, src, sizeof(pstring), STR_TERMINATE);
502 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
504 * @param dest always set at least to NULL
506 * @retval The number of bytes occupied by the string in the destination
508 int push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
510 int src_len = strlen(src)+1;
513 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest);
517 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
519 * @param dest always set at least to NULL
521 * @retval The number of bytes occupied by the string in the destination
523 int push_utf8_allocate(void **dest, const char *src)
525 int src_len = strlen(src)+1;
528 return convert_string_allocate(CH_UNIX, CH_UTF8, src, src_len, dest);
531 /****************************************************************************
532 copy a string from a ucs2 source to a unix char* destination
534 STR_TERMINATE means the string in src is null terminated
535 STR_NOALIGN means don't try to align
536 if STR_TERMINATE is set then src_len is ignored if it is -1
537 src_len is the length of the source area in bytes
538 return the number of bytes occupied by the string in src
539 the resulting string in "dest" is always null terminated
540 ****************************************************************************/
541 int pull_ucs2(const void *base_ptr, char *dest, const void *src, int dest_len, int src_len, int flags)
545 if (dest_len == -1) {
546 dest_len = sizeof(pstring);
549 if (ucs2_align(base_ptr, src, flags)) {
550 src = (const void *)((const char *)src + 1);
551 if (src_len > 0) src_len--;
554 if (flags & STR_TERMINATE) {
556 src_len = strlen_w(src)*2 + 2;
558 int len = strnlen_w(src, src_len/2);
559 if (len < src_len/2) len++;
564 /* ucs2 is always a multiple of 2 bytes */
568 ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len);
569 if (dest_len) dest[MIN(ret, dest_len-1)] = 0;
574 int pull_ucs2_pstring(char *dest, const void *src)
576 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
579 int pull_ucs2_fstring(char *dest, const void *src)
581 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
585 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
587 * @param dest always set at least to NULL
589 * @retval The number of bytes occupied by the string in the destination
591 int pull_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src)
593 int src_len = strlen(src)+1;
595 return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, dest);
599 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
601 * @param dest always set at least to NULL
603 * @retval The number of bytes occupied by the string in the destination
605 int pull_ucs2_allocate(void **dest, const char *src)
607 int src_len = strlen(src)+1;
609 return convert_string_allocate(CH_UCS2, CH_UNIX, src, src_len, dest);
612 /****************************************************************************
613 copy a string from a utf-8 source to a unix char* destination
615 STR_TERMINATE means the string in src is null terminated
616 if STR_TERMINATE is set then src_len is ignored
617 src_len is the length of the source area in bytes
618 return the number of bytes occupied by the string in src
619 the resulting string in "dest" is always null terminated
620 ****************************************************************************/
621 int pull_utf8(char *dest, const void *src, int dest_len, int src_len, int flags)
625 if (dest_len == -1) {
626 dest_len = sizeof(pstring);
629 if (flags & STR_TERMINATE) {
631 src_len = strlen(src) + 1;
633 int len = strnlen(src, src_len);
634 if (len < src_len) len++;
639 ret = convert_string(CH_UTF8, CH_UNIX, src, src_len, dest, dest_len);
640 if (dest_len) dest[MIN(ret, dest_len-1)] = 0;
645 int pull_utf8_pstring(char *dest, const void *src)
647 return pull_utf8(dest, src, sizeof(pstring), -1, STR_TERMINATE);
650 int pull_utf8_fstring(char *dest, const void *src)
652 return pull_utf8(dest, src, sizeof(fstring), -1, STR_TERMINATE);
656 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
658 * @param dest always set at least to NULL
660 * @retval The number of bytes occupied by the string in the destination
662 int pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
664 int src_len = strlen(src)+1;
666 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest);
670 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
672 * @param dest always set at least to NULL
674 * @retval The number of bytes occupied by the string in the destination
676 int pull_utf8_allocate(void **dest, const char *src)
678 int src_len = strlen(src)+1;
680 return convert_string_allocate(CH_UTF8, CH_UNIX, src, src_len, dest);
683 /****************************************************************************
684 copy a string from a char* src to a unicode or ascii
685 dos codepage destination choosing unicode or ascii based on the
686 flags in the SMB buffer starting at base_ptr
687 return the number of bytes occupied by the string in the destination
689 STR_TERMINATE means include the null termination
690 STR_UPPER means uppercase in the destination
691 STR_ASCII use ascii even with unicode packet
692 STR_NOALIGN means don't do alignment
693 dest_len is the maximum length allowed in the destination. If dest_len
694 is -1 then no maxiumum is used
695 ****************************************************************************/
696 int push_string(const void *base_ptr, void *dest, const char *src, int dest_len, int flags)
698 if (!(flags & STR_ASCII) && \
699 ((flags & STR_UNICODE || \
700 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
701 return push_ucs2(base_ptr, dest, src, dest_len, flags);
703 return push_ascii(dest, src, dest_len, flags);
707 /****************************************************************************
708 copy a string from a unicode or ascii source (depending on
709 the packet flags) to a char* destination
711 STR_TERMINATE means the string in src is null terminated
712 STR_UNICODE means to force as unicode
713 STR_ASCII use ascii even with unicode packet
714 STR_NOALIGN means don't do alignment
715 if STR_TERMINATE is set then src_len is ignored is it is -1
716 src_len is the length of the source area in bytes
717 return the number of bytes occupied by the string in src
718 the resulting string in "dest" is always null terminated
719 ****************************************************************************/
720 int pull_string(const void *base_ptr, char *dest, const void *src, int dest_len, int src_len,
723 if (!(flags & STR_ASCII) && \
724 ((flags & STR_UNICODE || \
725 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
726 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
728 return pull_ascii(dest, src, dest_len, src_len, flags);
731 int align_string(const void *base_ptr, const char *p, int flags)
733 if (!(flags & STR_ASCII) && \
734 ((flags & STR_UNICODE || \
735 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
736 return ucs2_align(base_ptr, p, flags);
743 /****************************************************************************
744 convert from ucs2 to unix charset and return the
745 allocated and converted string or NULL if an error occurred.
746 you must provide a zero terminated string.
747 the returning string will be zero terminated.
748 ****************************************************************************/
749 char *acnv_u2ux(const smb_ucs2_t *src)
755 slen = (strlen_w(src) + 1) * sizeof(smb_ucs2_t);
756 dlen = convert_string_allocate(CH_UCS2, CH_UNIX, src, slen, &dest);
757 if (dlen == -1) return NULL;
761 /****************************************************************************
762 convert from unix to ucs2 charset and return the
763 allocated and converted string or NULL if an error occurred.
764 you must provide a zero terminated string.
765 the returning string will be zero terminated.
766 ****************************************************************************/
767 smb_ucs2_t *acnv_uxu2(const char *src)
773 slen = strlen(src) + 1;
774 dlen = convert_string_allocate(CH_UNIX, CH_UCS2, src, slen, &dest);
775 if (dlen == -1) return NULL;
779 /****************************************************************************
780 convert from ucs2 to dos charset and return the
781 allocated and converted string or NULL if an error occurred.
782 you must provide a zero terminated string.
783 the returning string will be zero terminated.
784 ****************************************************************************/
785 char *acnv_u2dos(const smb_ucs2_t *src)
791 slen = (strlen_w(src) + 1) * sizeof(smb_ucs2_t);
792 dlen = convert_string_allocate(CH_UCS2, CH_DOS, src, slen, &dest);
793 if (dlen == -1) return NULL;
797 /****************************************************************************
798 convert from dos to ucs2 charset and return the
799 allocated and converted string or NULL if an error occurred.
800 you must provide a zero terminated string.
801 the returning string will be zero terminated.
802 ****************************************************************************/
803 smb_ucs2_t *acnv_dosu2(const char *src)
809 slen = strlen(src) + 1;
810 dlen = convert_string_allocate(CH_DOS, CH_UCS2, src, slen, &dest);
811 if (dlen == -1) return NULL;