2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 * Destroy global objects allocated by init_iconv()
28 void gfree_charcnv(void)
30 TALLOC_FREE(global_iconv_handle);
34 * Initialize iconv conversion descriptors.
36 * This is called the first time it is needed, and also called again
37 * every time the configuration is reloaded, because the charset or
38 * codepage might have changed.
42 global_iconv_handle = smb_iconv_handle_reinit(NULL, lp_dos_charset(),
43 lp_unix_charset(), lp_display_charset(),
44 true, global_iconv_handle);
48 talloc_strdup() a unix string to upper case.
51 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
53 char *out_buffer = talloc_strdup(ctx,s);
54 const unsigned char *p = (const unsigned char *)s;
55 unsigned char *q = (unsigned char *)out_buffer;
61 /* this is quite a common operation, so we want it to be
62 fast. We optimise for the ascii case, knowing that all our
63 supported multi-byte character sets are ascii-compatible
64 (ie. they match for the first 128 chars) */
69 *q++ = toupper_ascii_fast(*p);
75 size_t converted_size, converted_size2;
76 smb_ucs2_t *ubuf = NULL;
78 /* We're not using the ascii buffer above. */
79 TALLOC_FREE(out_buffer);
81 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
82 strlen(s)+1, (void *)&ubuf,
90 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
91 converted_size, (void *)&out_buffer,
98 /* Don't need the intermediate buffer
107 char *strupper_talloc(TALLOC_CTX *ctx, const char *s) {
108 return talloc_strdup_upper(ctx, s);
112 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
114 size_t converted_size;
115 smb_ucs2_t *buffer = NULL;
118 if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
124 if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
134 char *strlower_talloc(TALLOC_CTX *ctx, const char *s) {
135 return talloc_strdup_lower(ctx, s);
140 * Copy a string from a char* unix src to a dos codepage string destination.
142 * @return the number of bytes occupied by the string in the destination.
144 * @param flags can include
146 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
147 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
150 * @param dest_len the maximum length in bytes allowed in the
153 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
155 size_t src_len = strlen(src);
160 /* No longer allow a length of -1. */
161 if (dest_len == (size_t)-1) {
162 smb_panic("push_ascii - dest_len == -1");
165 if (flags & STR_UPPER) {
166 tmpbuf = SMB_STRDUP(src);
168 smb_panic("malloc fail");
174 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
178 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, &size);
180 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
182 ((char *)dest)[0] = '\0';
185 return ret ? size : (size_t)-1;
188 /********************************************************************
189 Push and malloc an ascii string. src and dest null terminated.
190 ********************************************************************/
192 bool push_ascii_talloc(TALLOC_CTX *mem_ctx, char **dest, const char *src, size_t *converted_size)
194 size_t src_len = strlen(src)+1;
197 return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len,
198 (void **)dest, converted_size);
202 * Copy a string from a dos codepage source to a unix char* destination.
204 * The resulting string in "dest" is always null terminated.
206 * @param flags can have:
208 * <dt>STR_TERMINATE</dt>
209 * <dd>STR_TERMINATE means the string in @p src
210 * is null terminated, and src_len is ignored.</dd>
213 * @param src_len is the length of the source area in bytes.
214 * @returns the number of bytes occupied by the string in @p src.
216 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
221 if (dest_len == (size_t)-1) {
222 /* No longer allow dest_len of -1. */
223 smb_panic("pull_ascii - invalid dest_len of -1");
226 if (flags & STR_TERMINATE) {
227 if (src_len == (size_t)-1) {
228 src_len = strlen((const char *)src) + 1;
230 size_t len = strnlen((const char *)src, src_len);
237 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
243 if (dest_len && size) {
244 /* Did we already process the terminating zero ? */
245 if (dest[MIN(size-1, dest_len-1)] != 0) {
246 dest[MIN(size, dest_len-1)] = 0;
256 * Copy a string from a dos codepage source to a unix char* destination.
259 * The resulting string in "dest" is always null terminated.
261 * @param flags can have:
263 * <dt>STR_TERMINATE</dt>
264 * <dd>STR_TERMINATE means the string in @p src
265 * is null terminated, and src_len is ignored.</dd>
268 * @param src_len is the length of the source area in bytes.
269 * @returns the number of bytes occupied by the string in @p src.
272 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
287 if (src_len == (size_t)-1) {
288 smb_panic("sec_len == -1 in pull_ascii_base_talloc");
291 if (flags & STR_TERMINATE) {
292 size_t len = strnlen((const char *)src, src_len);
296 /* Ensure we don't use an insane length from the client. */
297 if (src_len >= 1024*1024) {
298 char *msg = talloc_asprintf(ctx,
299 "Bad src length (%u) in "
300 "pull_ascii_base_talloc",
301 (unsigned int)src_len);
306 /* src_len != -1 here. */
308 if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
313 if (dest_len && dest) {
314 /* Did we already process the terminating zero ? */
315 if (dest[dest_len-1] != 0) {
316 size_t size = talloc_get_size(dest);
317 /* Have we got space to append the '\0' ? */
318 if (size <= dest_len) {
320 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
324 dest_len = (size_t)-1;
329 dest[dest_len] = '\0';
341 * Copy a string from a char* src to a unicode destination.
343 * @returns the number of bytes occupied by the string in the destination.
345 * @param flags can have:
348 * <dt>STR_TERMINATE <dd>means include the null termination.
349 * <dt>STR_UPPER <dd>means uppercase in the destination.
350 * <dt>STR_NOALIGN <dd>means don't do alignment.
353 * @param dest_len is the maximum length allowed in the
357 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
364 if (dest_len == (size_t)-1) {
365 /* No longer allow dest_len of -1. */
366 smb_panic("push_ucs2 - invalid dest_len of -1");
369 if (flags & STR_TERMINATE)
370 src_len = (size_t)-1;
372 src_len = strlen(src);
374 if (ucs2_align(base_ptr, dest, flags)) {
376 dest = (void *)((char *)dest + 1);
382 /* ucs2 is always a multiple of 2 bytes */
385 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, &size);
387 if ((flags & STR_TERMINATE) &&
397 if (flags & STR_UPPER) {
398 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
401 /* We check for i < (ret / 2) below as the dest string isn't null
402 terminated if STR_TERMINATE isn't set. */
404 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
405 smb_ucs2_t v = toupper_m(dest_ucs2[i]);
406 if (v != dest_ucs2[i]) {
417 * Copy a string from a unix char* src to a UCS2 destination,
418 * allocating a buffer using talloc().
420 * @param dest always set at least to NULL
421 * @parm converted_size set to the number of bytes occupied by the string in
422 * the destination on success.
424 * @return true if new buffer was correctly allocated, and string was
427 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
428 size_t *converted_size)
430 size_t src_len = strlen(src)+1;
433 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
434 (void **)dest, converted_size);
439 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
441 * @param dest always set at least to NULL
442 * @parm converted_size set to the number of bytes occupied by the string in
443 * the destination on success.
445 * @return true if new buffer was correctly allocated, and string was
449 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
450 size_t *converted_size)
452 size_t src_len = strlen(src)+1;
455 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
456 (void**)dest, converted_size);
460 Copy a string from a ucs2 source to a unix char* destination.
462 STR_TERMINATE means the string in src is null terminated.
463 STR_NOALIGN means don't try to align.
464 if STR_TERMINATE is set then src_len is ignored if it is -1.
465 src_len is the length of the source area in bytes
466 Return the number of bytes occupied by the string in src.
467 The resulting string in "dest" is always null terminated.
470 static size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
473 size_t ucs2_align_len = 0;
476 if (dest_len == (size_t)-1) {
477 /* No longer allow dest_len of -1. */
478 smb_panic("pull_ucs2 - invalid dest_len of -1");
482 if (dest && dest_len > 0) {
488 if (ucs2_align(base_ptr, src, flags)) {
489 src = (const void *)((const char *)src + 1);
490 if (src_len != (size_t)-1)
495 if (flags & STR_TERMINATE) {
496 /* src_len -1 is the default for null terminated strings. */
497 if (src_len != (size_t)-1) {
498 size_t len = strnlen_w((const smb_ucs2_t *)src,
506 /* ucs2 is always a multiple of 2 bytes */
507 if (src_len != (size_t)-1)
510 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, &size);
516 if (src_len == (size_t)-1)
519 if (dest_len && size) {
520 /* Did we already process the terminating zero ? */
521 if (dest[MIN(size-1, dest_len-1)] != 0) {
522 dest[MIN(size, dest_len-1)] = 0;
528 return src_len + ucs2_align_len;
532 Copy a string from a ucs2 source to a unix char* destination.
533 Talloc version with a base pointer.
534 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
537 STR_TERMINATE means the string in src is null terminated.
538 STR_NOALIGN means don't try to align.
539 if STR_TERMINATE is set then src_len is ignored if it is -1.
540 src_len is the length of the source area in bytes
541 Return the number of bytes occupied by the string in src.
542 The resulting string in "dest" is always null terminated.
545 static size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
546 const void *base_ptr,
554 size_t ucs2_align_len = 0;
559 /* Ensure we never use the braindead "malloc" varient. */
561 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
569 if (src_len == (size_t)-1) {
570 /* no longer used anywhere, but worth checking */
571 smb_panic("sec_len == -1 in pull_ucs2_base_talloc");
574 if (ucs2_align(base_ptr, src, flags)) {
575 src = (const void *)((const char *)src + 1);
580 if (flags & STR_TERMINATE) {
581 /* src_len -1 is the default for null terminated strings. */
582 size_t len = strnlen_w((const smb_ucs2_t *)src,
588 /* Ensure we don't use an insane length from the client. */
589 if (src_len >= 1024*1024) {
590 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
594 /* ucs2 is always a multiple of 2 bytes */
597 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
598 (void *)&dest, &dest_len)) {
603 /* Did we already process the terminating zero ? */
604 if (dest[dest_len-1] != 0) {
605 size_t size = talloc_get_size(dest);
606 /* Have we got space to append the '\0' ? */
607 if (size <= dest_len) {
609 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
613 dest_len = (size_t)-1;
618 dest[dest_len] = '\0';
626 return src_len + ucs2_align_len;
630 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
632 * @param dest always set at least to NULL
633 * @parm converted_size set to the number of bytes occupied by the string in
634 * the destination on success.
636 * @return true if new buffer was correctly allocated, and string was
640 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
641 size_t *converted_size)
643 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
646 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
647 (void **)dest, converted_size);
651 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
653 * @param dest always set at least to NULL
654 * @parm converted_size set to the number of bytes occupied by the string in
655 * the destination on success.
657 * @return true if new buffer was correctly allocated, and string was
661 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
662 size_t *converted_size)
664 size_t src_len = strlen(src)+1;
667 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
668 (void **)dest, converted_size);
673 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
675 * @param dest always set at least to NULL
676 * @parm converted_size set to the number of bytes occupied by the string in
677 * the destination on success.
679 * @return true if new buffer was correctly allocated, and string was
683 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
684 size_t *converted_size)
686 size_t src_len = strlen(src)+1;
689 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
690 (void **)dest, converted_size);
694 Copy a string from a char* src to a unicode or ascii
695 dos codepage destination choosing unicode or ascii based on the
697 Return the number of bytes occupied by the string in the destination.
699 STR_TERMINATE means include the null termination.
700 STR_UPPER means uppercase in the destination.
701 STR_ASCII use ascii even with unicode packet.
702 STR_NOALIGN means don't do alignment.
703 dest_len is the maximum length allowed in the destination. If dest_len
704 is -1 then no maxiumum is used.
707 size_t push_string_check_fn(void *dest, const char *src,
708 size_t dest_len, int flags)
710 if (!(flags & STR_ASCII) && (flags & STR_UNICODE)) {
711 return push_ucs2(NULL, dest, src, dest_len, flags);
713 return push_ascii(dest, src, dest_len, flags);
718 Copy a string from a char* src to a unicode or ascii
719 dos codepage destination choosing unicode or ascii based on the
720 flags in the SMB buffer starting at base_ptr.
721 Return the number of bytes occupied by the string in the destination.
723 STR_TERMINATE means include the null termination.
724 STR_UPPER means uppercase in the destination.
725 STR_ASCII use ascii even with unicode packet.
726 STR_NOALIGN means don't do alignment.
727 dest_len is the maximum length allowed in the destination. If dest_len
728 is -1 then no maxiumum is used.
731 size_t push_string_base(const char *base, uint16 flags2,
732 void *dest, const char *src,
733 size_t dest_len, int flags)
736 if (!(flags & STR_ASCII) && \
737 ((flags & STR_UNICODE || \
738 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
739 return push_ucs2(base, dest, src, dest_len, flags);
741 return push_ascii(dest, src, dest_len, flags);
745 Copy a string from a char* src to a unicode or ascii
746 dos codepage destination choosing unicode or ascii based on the
748 Return the number of bytes occupied by the string in the destination.
750 STR_TERMINATE means include the null termination.
751 STR_UPPER means uppercase in the destination.
752 STR_ASCII use ascii even with unicode packet.
753 STR_NOALIGN means don't do alignment.
754 dest_len is the maximum length allowed in the destination. If dest_len
755 is -1 then no maxiumum is used.
758 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
762 if (!(flags & STR_ASCII) && \
763 (flags & STR_UNICODE)) {
764 ret = push_ucs2(NULL, dest, src, dest_len, flags);
766 ret = push_ascii(dest, src, dest_len, flags);
768 if (ret == (size_t)-1) {
775 Copy a string from a unicode or ascii source (depending on
776 the packet flags) to a char* destination.
778 STR_TERMINATE means the string in src is null terminated.
779 STR_UNICODE means to force as unicode.
780 STR_ASCII use ascii even with unicode packet.
781 STR_NOALIGN means don't do alignment.
782 if STR_TERMINATE is set then src_len is ignored is it is -1
783 src_len is the length of the source area in bytes.
784 Return the number of bytes occupied by the string in src.
785 The resulting string in "dest" is always null terminated.
788 size_t pull_string_fn(const void *base_ptr,
796 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
797 smb_panic("No base ptr to get flg2 and neither ASCII nor "
801 if (!(flags & STR_ASCII) && \
802 ((flags & STR_UNICODE || \
803 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
804 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
806 return pull_ascii(dest, src, dest_len, src_len, flags);
810 Copy a string from a unicode or ascii source (depending on
811 the packet flags) to a char* destination.
812 Variant that uses talloc.
814 STR_TERMINATE means the string in src is null terminated.
815 STR_UNICODE means to force as unicode.
816 STR_ASCII use ascii even with unicode packet.
817 STR_NOALIGN means don't do alignment.
818 if STR_TERMINATE is set then src_len is ignored is it is -1
819 src_len is the length of the source area in bytes.
820 Return the number of bytes occupied by the string in src.
821 The resulting string in "dest" is always null terminated.
824 size_t pull_string_talloc(TALLOC_CTX *ctx,
825 const void *base_ptr,
832 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
833 smb_panic("No base ptr to get flg2 and neither ASCII nor "
837 if (!(flags & STR_ASCII) && \
838 ((flags & STR_UNICODE || \
839 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
840 return pull_ucs2_base_talloc(ctx,
847 return pull_ascii_base_talloc(ctx,
855 size_t align_string(const void *base_ptr, const char *p, int flags)
857 if (!(flags & STR_ASCII) && \
858 ((flags & STR_UNICODE || \
859 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
860 return ucs2_align(base_ptr, p, flags);
865 /*******************************************************************
866 Write a string in (little-endian) unicode format. src is in
867 the current DOS codepage. len is the length in bytes of the
868 string pointed to by dst.
870 if null_terminate is True then null terminate the packet (adds 2 bytes)
872 the return value is the length in bytes consumed by the string, including the
873 null termination if applied
874 ********************************************************************/
876 size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate)
878 int flags = null_terminate ? STR_UNICODE|STR_NOALIGN|STR_TERMINATE
879 : STR_UNICODE|STR_NOALIGN;
880 return push_ucs2(NULL, dst, src, len, flags);
884 /* Converts a string from internal samba format to unicode
887 int rpcstr_push(void *dest, const char *src, size_t dest_len, int flags)
889 return push_ucs2(NULL, dest, src, dest_len, flags|STR_UNICODE|STR_NOALIGN);
892 /* Converts a string from internal samba format to unicode. Always terminates.
893 * Actually just a wrapper round push_ucs2_talloc().
896 int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
899 if (push_ucs2_talloc(ctx, dest, src, &size))