2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
25 /* We can parameterize this if someone complains.... JRA. */
27 char lp_failed_convert_char(void)
35 * @brief Character-set conversion routines built on our iconv.
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem. It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software. It does
41 * have to be a superset of ASCII. All multibyte sequences must start
42 * with a byte with the high bit set.
48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50 static bool initialized;
53 * Return the name of a charset to give to iconv().
55 static const char *charset_name(charset_t ch)
57 const char *ret = NULL;
59 if (ch == CH_UTF16LE) ret = "UTF-16LE";
60 else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61 else if (ch == CH_UNIX) ret = lp_unix_charset();
62 else if (ch == CH_DOS) ret = lp_dos_charset();
63 else if (ch == CH_DISPLAY) ret = lp_display_charset();
64 else if (ch == CH_UTF8) ret = "UTF8";
66 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
67 if (ret && !strcmp(ret, "LOCALE")) {
68 const char *ln = NULL;
71 setlocale(LC_ALL, "");
73 ln = nl_langinfo(CODESET);
75 /* Check whether the charset name is supported
77 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
78 if (handle == (smb_iconv_t) -1) {
79 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
82 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
83 smb_iconv_close(handle);
90 if (!ret || !*ret) ret = "ASCII";
94 void lazy_initialize_conv(void)
104 * Destroy global objects allocated by init_iconv()
106 void gfree_charcnv(void)
110 for (c1=0;c1<NUM_CHARSETS;c1++) {
111 for (c2=0;c2<NUM_CHARSETS;c2++) {
112 if ( conv_handles[c1][c2] ) {
113 smb_iconv_close( conv_handles[c1][c2] );
114 conv_handles[c1][c2] = 0;
122 * Initialize iconv conversion descriptors.
124 * This is called the first time it is needed, and also called again
125 * every time the configuration is reloaded, because the charset or
126 * codepage might have changed.
128 void init_iconv(void)
131 bool did_reload = False;
133 /* so that charset_name() works we need to get the UNIX<->UCS2 going
135 if (!conv_handles[CH_UNIX][CH_UTF16LE])
136 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
138 if (!conv_handles[CH_UTF16LE][CH_UNIX])
139 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
141 for (c1=0;c1<NUM_CHARSETS;c1++) {
142 for (c2=0;c2<NUM_CHARSETS;c2++) {
143 const char *n1 = charset_name((charset_t)c1);
144 const char *n2 = charset_name((charset_t)c2);
145 if (conv_handles[c1][c2] &&
146 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
147 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
152 if (conv_handles[c1][c2])
153 smb_iconv_close(conv_handles[c1][c2]);
155 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
156 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
157 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
158 charset_name((charset_t)c1), charset_name((charset_t)c2)));
159 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
162 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
165 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
167 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
168 if (!conv_handles[c1][c2]) {
169 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
170 smb_panic("init_iconv: conv_handle initialization failed");
177 /* XXX: Does this really get called every time the dos
178 * codepage changes? */
179 /* XXX: Is the did_reload test too strict? */
187 * Convert string from one encoding to another, making error checking etc
188 * Slow path version - uses (slow) iconv.
190 * @param src pointer to source string (multibyte or singlebyte)
191 * @param srclen length of the source string in bytes
192 * @param dest pointer to destination string (multibyte or singlebyte)
193 * @param destlen maximal length allowed for string
194 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
195 * @returns the number of bytes occupied in the destination
197 * Ensure the srclen contains the terminating zero.
201 static size_t convert_string_internal(charset_t from, charset_t to,
202 void const *src, size_t srclen,
203 void *dest, size_t destlen, bool allow_bad_conv)
207 const char* inbuf = (const char*)src;
208 char* outbuf = (char*)dest;
209 smb_iconv_t descriptor;
211 lazy_initialize_conv();
213 descriptor = conv_handles[from][to];
215 if (srclen == (size_t)-1) {
216 if (from == CH_UTF16LE || from == CH_UTF16BE) {
217 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
219 srclen = strlen((const char *)src)+1;
224 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
226 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
235 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
236 if(retval==(size_t)-1) {
237 const char *reason="unknown error";
240 reason="Incomplete multibyte sequence";
242 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
247 reason="No more room";
249 if (from == CH_UNIX) {
250 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
251 charset_name(from), charset_name(to),
252 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
254 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
255 charset_name(from), charset_name(to),
256 (unsigned int)srclen, (unsigned int)destlen));
261 reason="Illegal multibyte sequence";
263 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
270 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
273 /* smb_panic(reason); */
275 return destlen-o_len;
280 * Conversion not supported. This is actually an error, but there are so
281 * many misconfigured iconv systems and smb.conf's out there we can't just
282 * fail. Do a very bad conversion instead.... JRA.
286 if (o_len == 0 || i_len == 0)
287 return destlen - o_len;
289 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
290 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
291 /* Can't convert from utf16 any endian to multibyte.
292 Replace with the default fail char.
295 return destlen - o_len;
297 *outbuf = lp_failed_convert_char();
306 if (o_len == 0 || i_len == 0)
307 return destlen - o_len;
309 /* Keep trying with the next char... */
312 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
313 /* Can't convert to UTF16LE - just widen by adding the
314 default fail char then zero.
317 return destlen - o_len;
319 outbuf[0] = lp_failed_convert_char();
328 if (o_len == 0 || i_len == 0)
329 return destlen - o_len;
331 /* Keep trying with the next char... */
334 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
335 to != CH_UTF16LE && to != CH_UTF16BE) {
336 /* Failed multibyte to multibyte. Just copy the default fail char and
338 outbuf[0] = lp_failed_convert_char();
346 if (o_len == 0 || i_len == 0)
347 return destlen - o_len;
349 /* Keep trying with the next char... */
353 /* Keep compiler happy.... */
354 return destlen - o_len;
360 * Convert string from one encoding to another, making error checking etc
361 * Fast path version - handles ASCII first.
363 * @param src pointer to source string (multibyte or singlebyte)
364 * @param srclen length of the source string in bytes, or -1 for nul terminated.
365 * @param dest pointer to destination string (multibyte or singlebyte)
366 * @param destlen maximal length allowed for string - *NEVER* -1.
367 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
368 * @returns the number of bytes occupied in the destination
370 * Ensure the srclen contains the terminating zero.
372 * This function has been hand-tuned to provide a fast path.
373 * Don't change unless you really know what you are doing. JRA.
376 size_t convert_string(charset_t from, charset_t to,
377 void const *src, size_t srclen,
378 void *dest, size_t destlen, bool allow_bad_conv)
381 * NB. We deliberately don't do a strlen here if srclen == -1.
382 * This is very expensive over millions of calls and is taken
383 * care of in the slow path in convert_string_internal. JRA.
387 SMB_ASSERT(destlen != (size_t)-1);
393 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
394 const unsigned char *p = (const unsigned char *)src;
395 unsigned char *q = (unsigned char *)dest;
396 size_t slen = srclen;
397 size_t dlen = destlen;
398 unsigned char lastp = '\0';
401 /* If all characters are ascii, fast path here. */
402 while (slen && dlen) {
403 if ((lastp = *p) <= 0x7f) {
405 if (slen != (size_t)-1) {
413 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
416 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
417 if (ret == (size_t)-1) {
425 /* Even if we fast path we should note if we ran out of room. */
426 if (((slen != (size_t)-1) && slen) ||
427 ((slen == (size_t)-1) && lastp)) {
432 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
433 const unsigned char *p = (const unsigned char *)src;
434 unsigned char *q = (unsigned char *)dest;
436 size_t slen = srclen;
437 size_t dlen = destlen;
438 unsigned char lastp = '\0';
440 /* If all characters are ascii, fast path here. */
441 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
442 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
444 if (slen != (size_t)-1) {
453 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
456 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
457 if (ret == (size_t)-1) {
465 /* Even if we fast path we should note if we ran out of room. */
466 if (((slen != (size_t)-1) && slen) ||
467 ((slen == (size_t)-1) && lastp)) {
472 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
473 const unsigned char *p = (const unsigned char *)src;
474 unsigned char *q = (unsigned char *)dest;
476 size_t slen = srclen;
477 size_t dlen = destlen;
478 unsigned char lastp = '\0';
480 /* If all characters are ascii, fast path here. */
481 while (slen && (dlen >= 2)) {
482 if ((lastp = *p) <= 0x7F) {
485 if (slen != (size_t)-1) {
493 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
496 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
497 if (ret == (size_t)-1) {
505 /* Even if we fast path we should note if we ran out of room. */
506 if (((slen != (size_t)-1) && slen) ||
507 ((slen == (size_t)-1) && lastp)) {
514 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
517 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
521 * Convert between character sets, allocating a new buffer using talloc for the result.
523 * @param srclen length of source buffer.
524 * @param dest always set at least to NULL
525 * @parm converted_size set to the number of bytes occupied by the string in
526 * the destination on success.
527 * @note -1 is not accepted for srclen.
529 * @return true if new buffer was correctly allocated, and string was
532 * Ensure the srclen contains the terminating zero.
534 * I hate the goto's in this function. It's embarressing.....
535 * There has to be a cleaner way to do this. JRA.
537 bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
538 void const *src, size_t srclen, void *dst,
539 size_t *converted_size, bool allow_bad_conv)
542 size_t i_len, o_len, destlen = (srclen * 3) / 2;
544 const char *inbuf = (const char *)src;
545 char *outbuf = NULL, *ob = NULL;
546 smb_iconv_t descriptor;
547 void **dest = (void **)dst;
551 if (!converted_size) {
556 if (src == NULL || srclen == (size_t)-1) {
561 ob = talloc_strdup(ctx, "");
571 lazy_initialize_conv();
573 descriptor = conv_handles[from][to];
575 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
577 DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
584 /* +2 is for ucs2 null termination. */
585 if ((destlen*2)+2 < destlen) {
586 /* wrapped ! abort. */
588 DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
593 destlen = destlen * 2;
596 /* +2 is for ucs2 null termination. */
597 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
600 DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
610 retval = smb_iconv(descriptor,
613 if(retval == (size_t)-1) {
614 const char *reason="unknown error";
617 reason="Incomplete multibyte sequence";
619 DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
626 reason="Illegal multibyte sequence";
628 DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
634 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
635 /* smb_panic(reason); */
642 destlen = destlen - o_len;
643 /* Don't shrink unless we're reclaiming a lot of
644 * space. This is in the hot codepath and these
645 * reallocs *cost*. JRA.
648 /* We're shrinking here so we know the +2 is safe from wrap. */
649 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
652 if (destlen && !ob) {
653 DEBUG(0, ("convert_string_talloc: out of memory!\n"));
660 /* Must ucs2 null terminate in the extra space we allocated. */
662 ob[destlen+1] = '\0';
664 *converted_size = destlen;
670 * Conversion not supported. This is actually an error, but there are so
671 * many misconfigured iconv systems and smb.conf's out there we can't just
672 * fail. Do a very bad conversion instead.... JRA.
676 if (o_len == 0 || i_len == 0)
679 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
680 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
681 /* Can't convert from utf16 any endian to multibyte.
682 Replace with the default fail char.
689 *outbuf = lp_failed_convert_char();
698 if (o_len == 0 || i_len == 0)
701 /* Keep trying with the next char... */
704 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
705 /* Can't convert to UTF16LE - just widen by adding the
706 default fail char then zero.
711 outbuf[0] = lp_failed_convert_char();
720 if (o_len == 0 || i_len == 0)
723 /* Keep trying with the next char... */
726 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
727 to != CH_UTF16LE && to != CH_UTF16BE) {
728 /* Failed multibyte to multibyte. Just copy the default fail char and
730 outbuf[0] = lp_failed_convert_char();
738 if (o_len == 0 || i_len == 0)
741 /* Keep trying with the next char... */
745 /* Keep compiler happy.... */
751 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
756 if (!push_ucs2_talloc(NULL, &buffer, src, &size)) {
760 if (!strupper_w(buffer) && (dest == src)) {
765 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
771 talloc_strdup() a unix string to upper case.
774 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
776 char *out_buffer = talloc_strdup(ctx,s);
777 const unsigned char *p = (const unsigned char *)s;
778 unsigned char *q = (unsigned char *)out_buffer;
784 /* this is quite a common operation, so we want it to be
785 fast. We optimise for the ascii case, knowing that all our
786 supported multi-byte character sets are ascii-compatible
787 (ie. they match for the first 128 chars) */
792 *q++ = toupper_ascii_fast(*p);
798 size_t converted_size, converted_size2;
799 smb_ucs2_t *ubuf = NULL;
801 /* We're not using the ascii buffer above. */
802 TALLOC_FREE(out_buffer);
804 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
805 strlen(s)+1, (void *)&ubuf,
806 &converted_size, True))
813 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
814 converted_size, (void *)&out_buffer,
815 &converted_size2, True))
821 /* Don't need the intermediate buffer
830 char *strupper_talloc(TALLOC_CTX *ctx, const char *s) {
831 return talloc_strdup_upper(ctx, s);
835 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
838 smb_ucs2_t *buffer = NULL;
840 if (!convert_string_talloc(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
841 (void **)(void *)&buffer, &size,
844 smb_panic("failed to create UCS2 buffer");
846 if (!strlower_w(buffer) && (dest == src)) {
850 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
856 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
858 size_t converted_size;
859 smb_ucs2_t *buffer = NULL;
862 if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
868 if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
878 char *strlower_talloc(TALLOC_CTX *ctx, const char *s) {
879 return talloc_strdup_lower(ctx, s);
882 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
884 if (flags & (STR_NOALIGN|STR_ASCII))
886 return PTR_DIFF(p, base_ptr) & 1;
891 * Copy a string from a char* unix src to a dos codepage string destination.
893 * @return the number of bytes occupied by the string in the destination.
895 * @param flags can include
897 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
898 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
901 * @param dest_len the maximum length in bytes allowed in the
904 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
906 size_t src_len = strlen(src);
910 /* No longer allow a length of -1. */
911 if (dest_len == (size_t)-1) {
912 smb_panic("push_ascii - dest_len == -1");
915 if (flags & STR_UPPER) {
916 tmpbuf = SMB_STRDUP(src);
918 smb_panic("malloc fail");
924 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
928 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
929 if (ret == (size_t)-1 &&
930 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
932 ((char *)dest)[0] = '\0';
938 size_t push_ascii_fstring(void *dest, const char *src)
940 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
943 /********************************************************************
944 Push an nstring - ensure null terminated. Written by
945 moriyama@miraclelinux.com (MORIYAMA Masayuki).
946 ********************************************************************/
948 size_t push_ascii_nstring(void *dest, const char *src)
950 size_t i, buffer_len, dest_len;
954 if (!push_ucs2_talloc(NULL, &buffer, src, &buffer_len)) {
955 smb_panic("failed to create UCS2 buffer");
958 /* We're using buffer_len below to count ucs2 characters, not bytes. */
959 buffer_len /= sizeof(smb_ucs2_t);
962 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
963 unsigned char mb[10];
964 /* Convert one smb_ucs2_t character at a time. */
965 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
966 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
967 memcpy((char *)dest + dest_len, mb, mb_len);
974 ((char *)dest)[dest_len] = '\0';
981 /********************************************************************
982 Push and malloc an ascii string. src and dest null terminated.
983 ********************************************************************/
985 bool push_ascii_talloc(TALLOC_CTX *mem_ctx, char **dest, const char *src, size_t *converted_size)
987 size_t src_len = strlen(src)+1;
990 return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len,
991 (void **)dest, converted_size, True);
995 * Copy a string from a dos codepage source to a unix char* destination.
997 * The resulting string in "dest" is always null terminated.
999 * @param flags can have:
1001 * <dt>STR_TERMINATE</dt>
1002 * <dd>STR_TERMINATE means the string in @p src
1003 * is null terminated, and src_len is ignored.</dd>
1006 * @param src_len is the length of the source area in bytes.
1007 * @returns the number of bytes occupied by the string in @p src.
1009 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1013 if (dest_len == (size_t)-1) {
1014 /* No longer allow dest_len of -1. */
1015 smb_panic("pull_ascii - invalid dest_len of -1");
1018 if (flags & STR_TERMINATE) {
1019 if (src_len == (size_t)-1) {
1020 src_len = strlen((const char *)src) + 1;
1022 size_t len = strnlen((const char *)src, src_len);
1029 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1030 if (ret == (size_t)-1) {
1035 if (dest_len && ret) {
1036 /* Did we already process the terminating zero ? */
1037 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1038 dest[MIN(ret, dest_len-1)] = 0;
1048 * Copy a string from a dos codepage source to a unix char* destination.
1051 * The resulting string in "dest" is always null terminated.
1053 * @param flags can have:
1055 * <dt>STR_TERMINATE</dt>
1056 * <dd>STR_TERMINATE means the string in @p src
1057 * is null terminated, and src_len is ignored.</dd>
1060 * @param src_len is the length of the source area in bytes.
1061 * @returns the number of bytes occupied by the string in @p src.
1064 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1079 if (flags & STR_TERMINATE) {
1080 if (src_len == (size_t)-1) {
1081 src_len = strlen((const char *)src) + 1;
1083 size_t len = strnlen((const char *)src, src_len);
1088 /* Ensure we don't use an insane length from the client. */
1089 if (src_len >= 1024*1024) {
1090 char *msg = talloc_asprintf(ctx,
1091 "Bad src length (%u) in "
1092 "pull_ascii_base_talloc",
1093 (unsigned int)src_len);
1097 /* Can't have an unlimited length
1098 * non STR_TERMINATE'd.
1100 if (src_len == (size_t)-1) {
1106 /* src_len != -1 here. */
1108 if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1113 if (dest_len && dest) {
1114 /* Did we already process the terminating zero ? */
1115 if (dest[dest_len-1] != 0) {
1116 size_t size = talloc_get_size(dest);
1117 /* Have we got space to append the '\0' ? */
1118 if (size <= dest_len) {
1120 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1124 dest_len = (size_t)-1;
1129 dest[dest_len] = '\0';
1140 size_t pull_ascii_fstring(char *dest, const void *src)
1142 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1145 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1147 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1149 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1153 * Copy a string from a char* src to a unicode destination.
1155 * @returns the number of bytes occupied by the string in the destination.
1157 * @param flags can have:
1160 * <dt>STR_TERMINATE <dd>means include the null termination.
1161 * <dt>STR_UPPER <dd>means uppercase in the destination.
1162 * <dt>STR_NOALIGN <dd>means don't do alignment.
1165 * @param dest_len is the maximum length allowed in the
1169 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1175 if (dest_len == (size_t)-1) {
1176 /* No longer allow dest_len of -1. */
1177 smb_panic("push_ucs2 - invalid dest_len of -1");
1180 if (flags & STR_TERMINATE)
1181 src_len = (size_t)-1;
1183 src_len = strlen(src);
1185 if (ucs2_align(base_ptr, dest, flags)) {
1187 dest = (void *)((char *)dest + 1);
1193 /* ucs2 is always a multiple of 2 bytes */
1196 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1197 if (ret == (size_t)-1) {
1198 if ((flags & STR_TERMINATE) &&
1208 if (flags & STR_UPPER) {
1209 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1212 /* We check for i < (ret / 2) below as the dest string isn't null
1213 terminated if STR_TERMINATE isn't set. */
1215 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1216 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1217 if (v != dest_ucs2[i]) {
1228 * Copy a string from a unix char* src to a UCS2 destination,
1229 * allocating a buffer using talloc().
1231 * @param dest always set at least to NULL
1232 * @parm converted_size set to the number of bytes occupied by the string in
1233 * the destination on success.
1235 * @return true if new buffer was correctly allocated, and string was
1238 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1239 size_t *converted_size)
1241 size_t src_len = strlen(src)+1;
1244 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1245 (void **)dest, converted_size, True);
1250 Copy a string from a char* src to a UTF-8 destination.
1251 Return the number of bytes occupied by the string in the destination
1253 STR_TERMINATE means include the null termination
1254 STR_UPPER means uppercase in the destination
1255 dest_len is the maximum length allowed in the destination. If dest_len
1256 is -1 then no maxiumum is used.
1259 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1263 char *tmpbuf = NULL;
1265 if (dest_len == (size_t)-1) {
1266 /* No longer allow dest_len of -1. */
1267 smb_panic("push_utf8 - invalid dest_len of -1");
1270 if (flags & STR_UPPER) {
1271 tmpbuf = strupper_talloc(NULL, src);
1276 src_len = strlen(src);
1279 src_len = strlen(src);
1280 if (flags & STR_TERMINATE) {
1284 ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1285 TALLOC_FREE(tmpbuf);
1289 size_t push_utf8_fstring(void *dest, const char *src)
1291 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1295 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1297 * @param dest always set at least to NULL
1298 * @parm converted_size set to the number of bytes occupied by the string in
1299 * the destination on success.
1301 * @return true if new buffer was correctly allocated, and string was
1305 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1306 size_t *converted_size)
1308 size_t src_len = strlen(src)+1;
1311 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1312 (void**)dest, converted_size, True);
1316 Copy a string from a ucs2 source to a unix char* destination.
1318 STR_TERMINATE means the string in src is null terminated.
1319 STR_NOALIGN means don't try to align.
1320 if STR_TERMINATE is set then src_len is ignored if it is -1.
1321 src_len is the length of the source area in bytes
1322 Return the number of bytes occupied by the string in src.
1323 The resulting string in "dest" is always null terminated.
1326 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1330 if (dest_len == (size_t)-1) {
1331 /* No longer allow dest_len of -1. */
1332 smb_panic("pull_ucs2 - invalid dest_len of -1");
1336 if (dest && dest_len > 0) {
1342 if (ucs2_align(base_ptr, src, flags)) {
1343 src = (const void *)((const char *)src + 1);
1344 if (src_len != (size_t)-1)
1348 if (flags & STR_TERMINATE) {
1349 /* src_len -1 is the default for null terminated strings. */
1350 if (src_len != (size_t)-1) {
1351 size_t len = strnlen_w((const smb_ucs2_t *)src,
1353 if (len < src_len/2)
1359 /* ucs2 is always a multiple of 2 bytes */
1360 if (src_len != (size_t)-1)
1363 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1364 if (ret == (size_t)-1) {
1369 if (src_len == (size_t)-1)
1372 if (dest_len && ret) {
1373 /* Did we already process the terminating zero ? */
1374 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1375 dest[MIN(ret, dest_len-1)] = 0;
1385 Copy a string from a ucs2 source to a unix char* destination.
1386 Talloc version with a base pointer.
1387 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1390 STR_TERMINATE means the string in src is null terminated.
1391 STR_NOALIGN means don't try to align.
1392 if STR_TERMINATE is set then src_len is ignored if it is -1.
1393 src_len is the length of the source area in bytes
1394 Return the number of bytes occupied by the string in src.
1395 The resulting string in "dest" is always null terminated.
1398 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1399 const void *base_ptr,
1411 /* Ensure we never use the braindead "malloc" varient. */
1413 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1421 if (ucs2_align(base_ptr, src, flags)) {
1422 src = (const void *)((const char *)src + 1);
1423 if (src_len != (size_t)-1)
1427 if (flags & STR_TERMINATE) {
1428 /* src_len -1 is the default for null terminated strings. */
1429 if (src_len != (size_t)-1) {
1430 size_t len = strnlen_w((const smb_ucs2_t *)src,
1432 if (len < src_len/2)
1437 * src_len == -1 - alloc interface won't take this
1438 * so we must calculate.
1440 src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1442 /* Ensure we don't use an insane length from the client. */
1443 if (src_len >= 1024*1024) {
1444 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1447 /* Can't have an unlimited length
1448 * non STR_TERMINATE'd.
1450 if (src_len == (size_t)-1) {
1456 /* src_len != -1 here. */
1458 /* ucs2 is always a multiple of 2 bytes */
1461 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1462 (void *)&dest, &dest_len, True)) {
1467 /* Did we already process the terminating zero ? */
1468 if (dest[dest_len-1] != 0) {
1469 size_t size = talloc_get_size(dest);
1470 /* Have we got space to append the '\0' ? */
1471 if (size <= dest_len) {
1473 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1477 dest_len = (size_t)-1;
1482 dest[dest_len] = '\0';
1493 size_t pull_ucs2_fstring(char *dest, const void *src)
1495 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1499 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1501 * @param dest always set at least to NULL
1502 * @parm converted_size set to the number of bytes occupied by the string in
1503 * the destination on success.
1505 * @return true if new buffer was correctly allocated, and string was
1509 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1510 size_t *converted_size)
1512 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1515 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1516 (void **)dest, converted_size, True);
1520 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1522 * @param dest always set at least to NULL
1523 * @parm converted_size set to the number of bytes occupied by the string in
1524 * the destination on success.
1526 * @return true if new buffer was correctly allocated, and string was
1530 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1531 size_t *converted_size)
1533 size_t src_len = strlen(src)+1;
1536 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1537 (void **)dest, converted_size, True);
1542 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1544 * @param dest always set at least to NULL
1545 * @parm converted_size set to the number of bytes occupied by the string in
1546 * the destination on success.
1548 * @return true if new buffer was correctly allocated, and string was
1552 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1553 size_t *converted_size)
1555 size_t src_len = strlen(src)+1;
1558 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1559 (void **)dest, converted_size, True);
1563 Copy a string from a char* src to a unicode or ascii
1564 dos codepage destination choosing unicode or ascii based on the
1566 Return the number of bytes occupied by the string in the destination.
1568 STR_TERMINATE means include the null termination.
1569 STR_UPPER means uppercase in the destination.
1570 STR_ASCII use ascii even with unicode packet.
1571 STR_NOALIGN means don't do alignment.
1572 dest_len is the maximum length allowed in the destination. If dest_len
1573 is -1 then no maxiumum is used.
1576 size_t push_string_check_fn(const char *function, unsigned int line,
1577 void *dest, const char *src,
1578 size_t dest_len, int flags)
1581 /* We really need to zero fill here, not clobber
1582 * region, as we want to ensure that valgrind thinks
1583 * all of the outgoing buffer has been written to
1584 * so a send() or write() won't trap an error.
1588 clobber_region(function, line, dest, dest_len);
1590 memset(dest, '\0', dest_len);
1594 if (!(flags & STR_ASCII) && (flags & STR_UNICODE)) {
1595 return push_ucs2(NULL, dest, src, dest_len, flags);
1597 return push_ascii(dest, src, dest_len, flags);
1602 Copy a string from a char* src to a unicode or ascii
1603 dos codepage destination choosing unicode or ascii based on the
1604 flags in the SMB buffer starting at base_ptr.
1605 Return the number of bytes occupied by the string in the destination.
1607 STR_TERMINATE means include the null termination.
1608 STR_UPPER means uppercase in the destination.
1609 STR_ASCII use ascii even with unicode packet.
1610 STR_NOALIGN means don't do alignment.
1611 dest_len is the maximum length allowed in the destination. If dest_len
1612 is -1 then no maxiumum is used.
1615 size_t push_string_base(const char *function, unsigned int line,
1616 const char *base, uint16 flags2,
1617 void *dest, const char *src,
1618 size_t dest_len, int flags)
1621 /* We really need to zero fill here, not clobber
1622 * region, as we want to ensure that valgrind thinks
1623 * all of the outgoing buffer has been written to
1624 * so a send() or write() won't trap an error.
1628 clobber_region(function, line, dest, dest_len);
1630 memset(dest, '\0', dest_len);
1634 if (!(flags & STR_ASCII) && \
1635 ((flags & STR_UNICODE || \
1636 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1637 return push_ucs2(base, dest, src, dest_len, flags);
1639 return push_ascii(dest, src, dest_len, flags);
1643 Copy a string from a char* src to a unicode or ascii
1644 dos codepage destination choosing unicode or ascii based on the
1646 Return the number of bytes occupied by the string in the destination.
1648 STR_TERMINATE means include the null termination.
1649 STR_UPPER means uppercase in the destination.
1650 STR_ASCII use ascii even with unicode packet.
1651 STR_NOALIGN means don't do alignment.
1652 dest_len is the maximum length allowed in the destination. If dest_len
1653 is -1 then no maxiumum is used.
1656 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
1660 /* We really need to zero fill here, not clobber
1661 * region, as we want to ensure that valgrind thinks
1662 * all of the outgoing buffer has been written to
1663 * so a send() or write() won't trap an error.
1666 memset(dest, '\0', dest_len);
1669 if (!(flags & STR_ASCII) && \
1670 (flags & STR_UNICODE)) {
1671 ret = push_ucs2(NULL, dest, src, dest_len, flags);
1673 ret = push_ascii(dest, src, dest_len, flags);
1675 if (ret == (size_t)-1) {
1682 Copy a string from a unicode or ascii source (depending on
1683 the packet flags) to a char* destination.
1685 STR_TERMINATE means the string in src is null terminated.
1686 STR_UNICODE means to force as unicode.
1687 STR_ASCII use ascii even with unicode packet.
1688 STR_NOALIGN means don't do alignment.
1689 if STR_TERMINATE is set then src_len is ignored is it is -1
1690 src_len is the length of the source area in bytes.
1691 Return the number of bytes occupied by the string in src.
1692 The resulting string in "dest" is always null terminated.
1695 size_t pull_string_fn(const char *function,
1697 const void *base_ptr,
1706 clobber_region(function, line, dest, dest_len);
1709 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1710 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1714 if (!(flags & STR_ASCII) && \
1715 ((flags & STR_UNICODE || \
1716 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1717 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1719 return pull_ascii(dest, src, dest_len, src_len, flags);
1723 Copy a string from a unicode or ascii source (depending on
1724 the packet flags) to a char* destination.
1725 Variant that uses talloc.
1727 STR_TERMINATE means the string in src is null terminated.
1728 STR_UNICODE means to force as unicode.
1729 STR_ASCII use ascii even with unicode packet.
1730 STR_NOALIGN means don't do alignment.
1731 if STR_TERMINATE is set then src_len is ignored is it is -1
1732 src_len is the length of the source area in bytes.
1733 Return the number of bytes occupied by the string in src.
1734 The resulting string in "dest" is always null terminated.
1737 size_t pull_string_talloc_fn(const char *function,
1740 const void *base_ptr,
1747 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1748 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1752 if (!(flags & STR_ASCII) && \
1753 ((flags & STR_UNICODE || \
1754 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1755 return pull_ucs2_base_talloc(ctx,
1762 return pull_ascii_base_talloc(ctx,
1770 size_t align_string(const void *base_ptr, const char *p, int flags)
1772 if (!(flags & STR_ASCII) && \
1773 ((flags & STR_UNICODE || \
1774 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1775 return ucs2_align(base_ptr, p, flags);
1781 Return the unicode codepoint for the next multi-byte CH_UNIX character
1782 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1784 Also return the number of bytes consumed (which tells the caller
1785 how many bytes to skip to get to the next CH_UNIX character).
1787 Return INVALID_CODEPOINT if the next character cannot be converted.
1790 codepoint_t next_codepoint(const char *str, size_t *size)
1792 /* It cannot occupy more than 4 bytes in UTF16 format */
1794 smb_iconv_t descriptor;
1800 if ((str[0] & 0x80) == 0) {
1802 return (codepoint_t)str[0];
1805 /* We assume that no multi-byte character can take
1806 more than 5 bytes. This is OK as we only
1807 support codepoints up to 1M */
1809 ilen_orig = strnlen(str, 5);
1812 lazy_initialize_conv();
1814 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1815 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1817 return INVALID_CODEPOINT;
1820 /* This looks a little strange, but it is needed to cope
1821 with codepoints above 64k which are encoded as per RFC2781. */
1823 outbuf = (char *)buf;
1824 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1826 /* We failed to convert to a 2 byte character.
1827 See if we can convert to a 4 UTF16-LE byte char encoding.
1830 outbuf = (char *)buf;
1831 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1833 /* We didn't convert any bytes */
1835 return INVALID_CODEPOINT;
1842 *size = ilen_orig - ilen;
1845 /* 2 byte, UTF16-LE encoded value. */
1846 return (codepoint_t)SVAL(buf, 0);
1849 /* Decode a 4 byte UTF16-LE character manually.
1850 See RFC2871 for the encoding machanism.
1852 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1853 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1855 return (codepoint_t)0x10000 +
1859 /* no other length is valid */
1860 return INVALID_CODEPOINT;
1864 push a single codepoint into a CH_UNIX string the target string must
1865 be able to hold the full character, which is guaranteed if it is at
1866 least 5 bytes in size. The caller may pass less than 5 bytes if they
1867 are sure the character will fit (for example, you can assume that
1868 uppercase/lowercase of a character will not add more than 1 byte)
1870 return the number of bytes occupied by the CH_UNIX character, or
1873 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1875 smb_iconv_t descriptor;
1885 lazy_initialize_conv();
1887 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1888 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1895 inbuf = (char *)buf;
1897 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1906 buf[0] = (c>>10) & 0xFF;
1907 buf[1] = (c>>18) | 0xd8;
1909 buf[3] = ((c>>8) & 0x3) | 0xdc;
1913 inbuf = (char *)buf;
1915 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);