2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 /* We can parameterize this if someone complains.... JRA. */
28 char lp_failed_convert_char(void)
36 * @brief Character-set conversion routines built on our iconv.
38 * @note Samba's internal character set (at least in the 3.0 series)
39 * is always the same as the one for the Unix filesystem. It is
40 * <b>not</b> necessarily UTF-8 and may be different on machines that
41 * need i18n filenames to be compatible with Unix software. It does
42 * have to be a superset of ASCII. All multibyte sequences must start
43 * with a byte with the high bit set.
49 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
50 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
53 * Return the name of a charset to give to iconv().
55 static const char *charset_name(charset_t ch)
57 const char *ret = NULL;
59 if (ch == CH_UCS2) ret = "UTF-16LE";
60 else if (ch == CH_UNIX) ret = lp_unix_charset();
61 else if (ch == CH_DOS) ret = lp_dos_charset();
62 else if (ch == CH_DISPLAY) ret = lp_display_charset();
63 else if (ch == CH_UTF8) ret = "UTF8";
65 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
66 if (ret && !strcmp(ret, "LOCALE")) {
67 const char *ln = NULL;
70 setlocale(LC_ALL, "");
72 ln = nl_langinfo(CODESET);
74 /* Check whether the charset name is supported
76 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
77 if (handle == (smb_iconv_t) -1) {
78 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
81 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
82 smb_iconv_close(handle);
89 if (!ret || !*ret) ret = "ASCII";
93 void lazy_initialize_conv(void)
95 static int initialized = False;
105 * Initialize iconv conversion descriptors.
107 * This is called the first time it is needed, and also called again
108 * every time the configuration is reloaded, because the charset or
109 * codepage might have changed.
111 void init_iconv(void)
114 BOOL did_reload = False;
116 /* so that charset_name() works we need to get the UNIX<->UCS2 going
118 if (!conv_handles[CH_UNIX][CH_UCS2])
119 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open(charset_name(CH_UCS2), "ASCII");
121 if (!conv_handles[CH_UCS2][CH_UNIX])
122 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UCS2));
124 for (c1=0;c1<NUM_CHARSETS;c1++) {
125 for (c2=0;c2<NUM_CHARSETS;c2++) {
126 const char *n1 = charset_name((charset_t)c1);
127 const char *n2 = charset_name((charset_t)c2);
128 if (conv_handles[c1][c2] &&
129 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
130 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
135 if (conv_handles[c1][c2])
136 smb_iconv_close(conv_handles[c1][c2]);
138 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
139 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
140 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
141 charset_name((charset_t)c1), charset_name((charset_t)c2)));
148 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
150 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
151 if (!conv_handles[c1][c2]) {
152 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
153 smb_panic("init_iconv: conv_handle initialization failed.");
160 /* XXX: Does this really get called every time the dos
161 * codepage changes? */
162 /* XXX: Is the did_reload test too strict? */
164 init_doschar_table();
171 * Convert string from one encoding to another, making error checking etc
172 * Slow path version - uses (slow) iconv.
174 * @param src pointer to source string (multibyte or singlebyte)
175 * @param srclen length of the source string in bytes
176 * @param dest pointer to destination string (multibyte or singlebyte)
177 * @param destlen maximal length allowed for string
178 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
179 * @returns the number of bytes occupied in the destination
181 * Ensure the srclen contains the terminating zero.
185 static size_t convert_string_internal(charset_t from, charset_t to,
186 void const *src, size_t srclen,
187 void *dest, size_t destlen, BOOL allow_bad_conv)
191 const char* inbuf = (const char*)src;
192 char* outbuf = (char*)dest;
193 smb_iconv_t descriptor;
195 lazy_initialize_conv();
197 descriptor = conv_handles[from][to];
199 if (srclen == (size_t)-1) {
200 if (from == CH_UCS2) {
201 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
203 srclen = strlen((const char *)src)+1;
208 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
210 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
219 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
220 if(retval==(size_t)-1) {
221 const char *reason="unknown error";
224 reason="Incomplete multibyte sequence";
226 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
231 reason="No more room";
233 if (from == CH_UNIX) {
234 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
235 charset_name(from), charset_name(to),
236 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
238 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
239 charset_name(from), charset_name(to),
240 (unsigned int)srclen, (unsigned int)destlen));
245 reason="Illegal multibyte sequence";
247 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
253 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
256 /* smb_panic(reason); */
258 return destlen-o_len;
263 * Conversion not supported. This is actually an error, but there are so
264 * many misconfigured iconv systems and smb.conf's out there we can't just
265 * fail. Do a very bad conversion instead.... JRA.
269 if (o_len == 0 || i_len == 0)
270 return destlen - o_len;
272 if (from == CH_UCS2 && to != CH_UCS2) {
273 /* Can't convert from ucs2 to multibyte. Replace with the default fail char. */
275 return destlen - o_len;
277 *outbuf = lp_failed_convert_char();
286 if (o_len == 0 || i_len == 0)
287 return destlen - o_len;
289 /* Keep trying with the next char... */
292 } else if (from != CH_UCS2 && to == CH_UCS2) {
293 /* Can't convert to ucs2 - just widen by adding the default fail char then zero. */
295 return destlen - o_len;
297 outbuf[0] = lp_failed_convert_char();
306 if (o_len == 0 || i_len == 0)
307 return destlen - o_len;
309 /* Keep trying with the next char... */
312 } else if (from != CH_UCS2 && to != CH_UCS2) {
313 /* Failed multibyte to multibyte. Just copy the default fail char and
315 outbuf[0] = lp_failed_convert_char();
323 if (o_len == 0 || i_len == 0)
324 return destlen - o_len;
326 /* Keep trying with the next char... */
330 /* Keep compiler happy.... */
331 return destlen - o_len;
337 * Convert string from one encoding to another, making error checking etc
338 * Fast path version - handles ASCII first.
340 * @param src pointer to source string (multibyte or singlebyte)
341 * @param srclen length of the source string in bytes, or -1 for nul terminated.
342 * @param dest pointer to destination string (multibyte or singlebyte)
343 * @param destlen maximal length allowed for string - *NEVER* -1.
344 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
345 * @returns the number of bytes occupied in the destination
347 * Ensure the srclen contains the terminating zero.
349 * This function has been hand-tuned to provide a fast path.
350 * Don't change unless you really know what you are doing. JRA.
353 size_t convert_string(charset_t from, charset_t to,
354 void const *src, size_t srclen,
355 void *dest, size_t destlen, BOOL allow_bad_conv)
358 * NB. We deliberately don't do a strlen here if srclen == -1.
359 * This is very expensive over millions of calls and is taken
360 * care of in the slow path in convert_string_internal. JRA.
364 SMB_ASSERT(destlen != (size_t)-1);
370 if (from != CH_UCS2 && to != CH_UCS2) {
371 const unsigned char *p = (const unsigned char *)src;
372 unsigned char *q = (unsigned char *)dest;
373 size_t slen = srclen;
374 size_t dlen = destlen;
375 unsigned char lastp = '\0';
378 /* If all characters are ascii, fast path here. */
379 while (slen && dlen) {
380 if ((lastp = *p) <= 0x7f) {
382 if (slen != (size_t)-1) {
390 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
393 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
398 /* Even if we fast path we should note if we ran out of room. */
399 if (((slen != (size_t)-1) && slen) ||
400 ((slen == (size_t)-1) && lastp)) {
405 } else if (from == CH_UCS2 && to != CH_UCS2) {
406 const unsigned char *p = (const unsigned char *)src;
407 unsigned char *q = (unsigned char *)dest;
409 size_t slen = srclen;
410 size_t dlen = destlen;
411 unsigned char lastp = '\0';
413 /* If all characters are ascii, fast path here. */
414 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
415 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
417 if (slen != (size_t)-1) {
426 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
429 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
434 /* Even if we fast path we should note if we ran out of room. */
435 if (((slen != (size_t)-1) && slen) ||
436 ((slen == (size_t)-1) && lastp)) {
441 } else if (from != CH_UCS2 && to == CH_UCS2) {
442 const unsigned char *p = (const unsigned char *)src;
443 unsigned char *q = (unsigned char *)dest;
445 size_t slen = srclen;
446 size_t dlen = destlen;
447 unsigned char lastp = '\0';
449 /* If all characters are ascii, fast path here. */
450 while (slen && (dlen >= 2)) {
451 if ((lastp = *p) <= 0x7F) {
454 if (slen != (size_t)-1) {
462 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
465 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
470 /* Even if we fast path we should note if we ran out of room. */
471 if (((slen != (size_t)-1) && slen) ||
472 ((slen == (size_t)-1) && lastp)) {
479 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
482 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
486 * Convert between character sets, allocating a new buffer for the result.
488 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
489 * @param srclen length of source buffer.
490 * @param dest always set at least to NULL
491 * @note -1 is not accepted for srclen.
493 * @returns Size in bytes of the converted string; or -1 in case of error.
495 * Ensure the srclen contains the terminating zero.
497 * I hate the goto's in this function. It's embarressing.....
498 * There has to be a cleaner way to do this. JRA.
501 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
502 void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
504 size_t i_len, o_len, destlen = MAX(srclen, 512);
506 const char *inbuf = (const char *)src;
507 char *outbuf = NULL, *ob = NULL;
508 smb_iconv_t descriptor;
512 if (src == NULL || srclen == (size_t)-1)
517 lazy_initialize_conv();
519 descriptor = conv_handles[from][to];
521 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
523 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
529 if ((destlen*2) < destlen) {
530 /* wrapped ! abort. */
532 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
537 destlen = destlen * 2;
541 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen);
543 ob = (char *)SMB_REALLOC(ob, destlen);
547 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
556 retval = smb_iconv(descriptor,
559 if(retval == (size_t)-1) {
560 const char *reason="unknown error";
563 reason="Incomplete multibyte sequence";
565 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
572 reason="Illegal multibyte sequence";
574 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
580 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
581 /* smb_panic(reason); */
587 destlen = destlen - o_len;
589 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen);
591 ob = (char *)SMB_REALLOC(ob,destlen);
594 if (destlen && !ob) {
595 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
605 * Conversion not supported. This is actually an error, but there are so
606 * many misconfigured iconv systems and smb.conf's out there we can't just
607 * fail. Do a very bad conversion instead.... JRA.
611 if (o_len == 0 || i_len == 0)
614 if (from == CH_UCS2 && to != CH_UCS2) {
615 /* Can't convert from ucs2 to multibyte. Just use the default fail char. */
620 *outbuf = lp_failed_convert_char();
629 if (o_len == 0 || i_len == 0)
632 /* Keep trying with the next char... */
635 } else if (from != CH_UCS2 && to == CH_UCS2) {
636 /* Can't convert to ucs2 - just widen by adding the default fail char then zero. */
640 outbuf[0] = lp_failed_convert_char();
649 if (o_len == 0 || i_len == 0)
652 /* Keep trying with the next char... */
655 } else if (from != CH_UCS2 && to != CH_UCS2) {
656 /* Failed multibyte to multibyte. Just copy the default fail char and
658 outbuf[0] = lp_failed_convert_char();
666 if (o_len == 0 || i_len == 0)
669 /* Keep trying with the next char... */
673 /* Keep compiler happy.... */
680 * Convert between character sets, allocating a new buffer using talloc for the result.
682 * @param srclen length of source buffer.
683 * @param dest always set at least to NULL
684 * @note -1 is not accepted for srclen.
686 * @returns Size in bytes of the converted string; or -1 in case of error.
688 static size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
689 void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
694 dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
695 if (dest_len == (size_t)-1)
702 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
707 size = push_ucs2_allocate(&buffer, src);
708 if (size == (size_t)-1) {
709 smb_panic("failed to create UCS2 buffer");
711 if (!strupper_w(buffer) && (dest == src)) {
716 size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
722 strdup() a unix string to upper case.
726 char *strdup_upper(const char *s)
729 const unsigned char *p = (const unsigned char *)s;
730 unsigned char *q = (unsigned char *)out_buffer;
732 /* this is quite a common operation, so we want it to be
733 fast. We optimise for the ascii case, knowing that all our
734 supported multi-byte character sets are ascii-compatible
735 (ie. they match for the first 128 chars) */
740 *q++ = toupper_ascii(*p);
744 if (p - ( const unsigned char *)s >= sizeof(pstring))
752 size = convert_string(CH_UNIX, CH_UCS2, s, -1, buffer, sizeof(buffer), True);
753 if (size == (size_t)-1) {
759 size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
760 if (size == (size_t)-1) {
765 return SMB_STRDUP(out_buffer);
768 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
771 smb_ucs2_t *buffer = NULL;
773 size = convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, srclen,
774 (void **)(void *)&buffer, True);
775 if (size == (size_t)-1 || !buffer) {
776 smb_panic("failed to create UCS2 buffer");
778 if (!strlower_w(buffer) && (dest == src)) {
782 size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
788 strdup() a unix string to lower case.
791 char *strdup_lower(const char *s)
794 smb_ucs2_t *buffer = NULL;
797 size = push_ucs2_allocate(&buffer, s);
798 if (size == -1 || !buffer) {
804 size = pull_ucs2_allocate(&out_buffer, buffer);
807 if (size == (size_t)-1) {
814 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
816 if (flags & (STR_NOALIGN|STR_ASCII))
818 return PTR_DIFF(p, base_ptr) & 1;
823 * Copy a string from a char* unix src to a dos codepage string destination.
825 * @return the number of bytes occupied by the string in the destination.
827 * @param flags can include
829 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
830 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
833 * @param dest_len the maximum length in bytes allowed in the
834 * destination. If @p dest_len is -1 then no maximum is used.
836 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
838 size_t src_len = strlen(src);
841 /* treat a pstring as "unlimited" length */
842 if (dest_len == (size_t)-1)
843 dest_len = sizeof(pstring);
845 if (flags & STR_UPPER) {
846 pstrcpy(tmpbuf, src);
851 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
854 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
857 size_t push_ascii_fstring(void *dest, const char *src)
859 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
862 size_t push_ascii_pstring(void *dest, const char *src)
864 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
867 /********************************************************************
868 Push an nstring - ensure null terminated. Written by
869 moriyama@miraclelinux.com (MORIYAMA Masayuki).
870 ********************************************************************/
872 size_t push_ascii_nstring(void *dest, const char *src)
874 size_t i, buffer_len, dest_len;
878 buffer_len = push_ucs2_allocate(&buffer, src);
879 if (buffer_len == (size_t)-1) {
880 smb_panic("failed to create UCS2 buffer");
883 /* We're using buffer_len below to count ucs2 characters, not bytes. */
884 buffer_len /= sizeof(smb_ucs2_t);
887 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
888 unsigned char mb[10];
889 /* Convert one smb_ucs2_t character at a time. */
890 size_t mb_len = convert_string(CH_UCS2, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
891 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
892 memcpy((char *)dest + dest_len, mb, mb_len);
899 ((char *)dest)[dest_len] = '\0';
907 * Copy a string from a dos codepage source to a unix char* destination.
909 * The resulting string in "dest" is always null terminated.
911 * @param flags can have:
913 * <dt>STR_TERMINATE</dt>
914 * <dd>STR_TERMINATE means the string in @p src
915 * is null terminated, and src_len is ignored.</dd>
918 * @param src_len is the length of the source area in bytes.
919 * @returns the number of bytes occupied by the string in @p src.
921 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
925 if (dest_len == (size_t)-1)
926 dest_len = sizeof(pstring);
928 if (flags & STR_TERMINATE) {
929 if (src_len == (size_t)-1) {
930 src_len = strlen(src) + 1;
932 size_t len = strnlen(src, src_len);
939 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
940 if (ret == (size_t)-1) {
945 dest[MIN(ret, dest_len-1)] = 0;
952 size_t pull_ascii_pstring(char *dest, const void *src)
954 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
957 size_t pull_ascii_fstring(char *dest, const void *src)
959 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
962 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
964 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
966 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
970 * Copy a string from a char* src to a unicode destination.
972 * @returns the number of bytes occupied by the string in the destination.
974 * @param flags can have:
977 * <dt>STR_TERMINATE <dd>means include the null termination.
978 * <dt>STR_UPPER <dd>means uppercase in the destination.
979 * <dt>STR_NOALIGN <dd>means don't do alignment.
982 * @param dest_len is the maximum length allowed in the
983 * destination. If dest_len is -1 then no maxiumum is used.
986 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
992 /* treat a pstring as "unlimited" length */
993 if (dest_len == (size_t)-1)
994 dest_len = sizeof(pstring);
996 if (flags & STR_TERMINATE)
997 src_len = (size_t)-1;
999 src_len = strlen(src);
1001 if (ucs2_align(base_ptr, dest, flags)) {
1003 dest = (void *)((char *)dest + 1);
1009 /* ucs2 is always a multiple of 2 bytes */
1012 ret = convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len, True);
1013 if (ret == (size_t)-1) {
1019 if (flags & STR_UPPER) {
1020 smb_ucs2_t *dest_ucs2 = dest;
1022 for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) {
1023 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1024 if (v != dest_ucs2[i]) {
1035 * Copy a string from a unix char* src to a UCS2 destination,
1036 * allocating a buffer using talloc().
1038 * @param dest always set at least to NULL
1040 * @returns The number of bytes occupied by the string in the destination
1041 * or -1 in case of error.
1043 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1045 size_t src_len = strlen(src)+1;
1048 return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1053 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1055 * @param dest always set at least to NULL
1057 * @returns The number of bytes occupied by the string in the destination
1058 * or -1 in case of error.
1061 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1063 size_t src_len = strlen(src)+1;
1066 return convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1070 Copy a string from a char* src to a UTF-8 destination.
1071 Return the number of bytes occupied by the string in the destination
1073 STR_TERMINATE means include the null termination
1074 STR_UPPER means uppercase in the destination
1075 dest_len is the maximum length allowed in the destination. If dest_len
1076 is -1 then no maxiumum is used.
1079 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1081 size_t src_len = strlen(src);
1084 /* treat a pstring as "unlimited" length */
1085 if (dest_len == (size_t)-1)
1086 dest_len = sizeof(pstring);
1088 if (flags & STR_UPPER) {
1089 pstrcpy(tmpbuf, src);
1094 if (flags & STR_TERMINATE)
1097 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1100 size_t push_utf8_fstring(void *dest, const char *src)
1102 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1106 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1108 * @param dest always set at least to NULL
1110 * @returns The number of bytes occupied by the string in the destination
1113 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1115 size_t src_len = strlen(src)+1;
1118 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1122 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1124 * @param dest always set at least to NULL
1126 * @returns The number of bytes occupied by the string in the destination
1129 size_t push_utf8_allocate(char **dest, const char *src)
1131 size_t src_len = strlen(src)+1;
1134 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1138 Copy a string from a ucs2 source to a unix char* destination.
1140 STR_TERMINATE means the string in src is null terminated.
1141 STR_NOALIGN means don't try to align.
1142 if STR_TERMINATE is set then src_len is ignored if it is -1.
1143 src_len is the length of the source area in bytes
1144 Return the number of bytes occupied by the string in src.
1145 The resulting string in "dest" is always null terminated.
1148 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1152 if (dest_len == (size_t)-1)
1153 dest_len = sizeof(pstring);
1155 if (ucs2_align(base_ptr, src, flags)) {
1156 src = (const void *)((const char *)src + 1);
1157 if (src_len != (size_t)-1)
1161 if (flags & STR_TERMINATE) {
1162 /* src_len -1 is the default for null terminated strings. */
1163 if (src_len != (size_t)-1) {
1164 size_t len = strnlen_w(src, src_len/2);
1165 if (len < src_len/2)
1171 /* ucs2 is always a multiple of 2 bytes */
1172 if (src_len != (size_t)-1)
1175 ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len, True);
1176 if (ret == (size_t)-1) {
1180 if (src_len == (size_t)-1)
1184 dest[MIN(ret, dest_len-1)] = 0;
1191 size_t pull_ucs2_pstring(char *dest, const void *src)
1193 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1196 size_t pull_ucs2_fstring(char *dest, const void *src)
1198 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1202 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1204 * @param dest always set at least to NULL
1206 * @returns The number of bytes occupied by the string in the destination
1209 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1211 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1213 return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1217 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1219 * @param dest always set at least to NULL
1221 * @returns The number of bytes occupied by the string in the destination
1224 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1226 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1228 return convert_string_allocate(NULL, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1232 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1234 * @param dest always set at least to NULL
1236 * @returns The number of bytes occupied by the string in the destination
1239 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1241 size_t src_len = strlen(src)+1;
1243 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1247 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1249 * @param dest always set at least to NULL
1251 * @returns The number of bytes occupied by the string in the destination
1254 size_t pull_utf8_allocate(char **dest, const char *src)
1256 size_t src_len = strlen(src)+1;
1258 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1262 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1264 * @param dest always set at least to NULL
1266 * @returns The number of bytes occupied by the string in the destination
1269 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1271 size_t src_len = strlen(src)+1;
1273 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1277 Copy a string from a char* src to a unicode or ascii
1278 dos codepage destination choosing unicode or ascii based on the
1279 flags in the SMB buffer starting at base_ptr.
1280 Return the number of bytes occupied by the string in the destination.
1282 STR_TERMINATE means include the null termination.
1283 STR_UPPER means uppercase in the destination.
1284 STR_ASCII use ascii even with unicode packet.
1285 STR_NOALIGN means don't do alignment.
1286 dest_len is the maximum length allowed in the destination. If dest_len
1287 is -1 then no maxiumum is used.
1290 size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1293 /* We really need to zero fill here, not clobber
1294 * region, as we want to ensure that valgrind thinks
1295 * all of the outgoing buffer has been written to
1296 * so a send() or write() won't trap an error.
1300 if (dest_len != (size_t)-1)
1301 clobber_region(function, line, dest, dest_len);
1303 if (dest_len != (size_t)-1)
1304 memset(dest, '\0', dest_len);
1308 if (!(flags & STR_ASCII) && \
1309 ((flags & STR_UNICODE || \
1310 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1311 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1313 return push_ascii(dest, src, dest_len, flags);
1318 Copy a string from a unicode or ascii source (depending on
1319 the packet flags) to a char* destination.
1321 STR_TERMINATE means the string in src is null terminated.
1322 STR_UNICODE means to force as unicode.
1323 STR_ASCII use ascii even with unicode packet.
1324 STR_NOALIGN means don't do alignment.
1325 if STR_TERMINATE is set then src_len is ignored is it is -1
1326 src_len is the length of the source area in bytes.
1327 Return the number of bytes occupied by the string in src.
1328 The resulting string in "dest" is always null terminated.
1331 size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1334 if (dest_len != (size_t)-1)
1335 clobber_region(function, line, dest, dest_len);
1338 if (!(flags & STR_ASCII) && \
1339 ((flags & STR_UNICODE || \
1340 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1341 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1343 return pull_ascii(dest, src, dest_len, src_len, flags);
1346 size_t align_string(const void *base_ptr, const char *p, int flags)
1348 if (!(flags & STR_ASCII) && \
1349 ((flags & STR_UNICODE || \
1350 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1351 return ucs2_align(base_ptr, p, flags);
1356 /****************************************************************
1357 Calculate the size (in bytes) of the next multibyte character in
1358 our internal character set. Note that p must be pointing to a
1359 valid mb char, not within one.
1360 ****************************************************************/
1362 size_t next_mb_char_size(const char *s)
1367 return 1; /* ascii. */
1370 for ( i = 1; i <=4; i++ ) {
1372 if (convert_string(CH_UNIX, CH_UCS2, s, i, &uc, 2, False) == 2) {
1374 DEBUG(10,("next_mb_char_size: size %u at string %s\n",
1375 (unsigned int)i, s));
1377 conv_silent = False;
1381 /* We're hosed - we don't know how big this is... */
1382 DEBUG(10,("next_mb_char_size: unknown size at string %s\n", s));
1383 conv_silent = False;