2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "system/locale.h"
24 struct smb_iconv_convenience *global_iconv_convenience = NULL;
26 static inline struct smb_iconv_convenience *get_iconv_convenience(void)
28 if (global_iconv_convenience == NULL)
29 global_iconv_convenience = smb_iconv_convenience_reinit(talloc_autofree_context(),
30 "ASCII", "UTF-8", true, NULL);
31 return global_iconv_convenience;
35 Case insensitive string compararison
37 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
39 codepoint_t c1=0, c2=0;
41 struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
43 /* handle null ptr comparisons to simplify the use in qsort */
44 if (s1 == s2) return 0;
45 if (s1 == NULL) return -1;
46 if (s2 == NULL) return 1;
49 c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
50 c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
59 if (c1 == INVALID_CODEPOINT ||
60 c2 == INVALID_CODEPOINT) {
61 /* what else can we do?? */
62 return strcasecmp(s1, s2);
65 if (toupper_m(c1) != toupper_m(c2)) {
74 * Get the next token from a string, return False if none found.
75 * Handles double-quotes.
77 * Based on a routine by GJC@VILLAGE.COM.
78 * Extensively modified by Andrew.Tridgell@anu.edu.au
80 _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
91 /* default to simple separators */
95 /* find the first non sep char */
96 while (*s && strchr_m(sep,*s))
103 /* copy over the token */
104 for (quoted = false; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
113 *ptr = (*s) ? s+1 : s;
120 Case insensitive string compararison, length limited
122 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
124 codepoint_t c1=0, c2=0;
126 struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
128 /* handle null ptr comparisons to simplify the use in qsort */
129 if (s1 == s2) return 0;
130 if (s1 == NULL) return -1;
131 if (s2 == NULL) return 1;
133 while (*s1 && *s2 && n) {
136 c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
137 c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
146 if (c1 == INVALID_CODEPOINT ||
147 c2 == INVALID_CODEPOINT) {
148 /* what else can we do?? */
149 return strcasecmp(s1, s2);
152 if (toupper_m(c1) != toupper_m(c2)) {
167 * @note The comparison is case-insensitive.
169 _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
171 return strcasecmp_m(s1,s2) == 0;
175 Compare 2 strings (case sensitive).
177 _PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
184 return strcmp(s1,s2) == 0;
190 NOTE: oldc and newc must be 7 bit characters
192 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
194 struct smb_iconv_convenience *ic = get_iconv_convenience();
197 codepoint_t c = next_codepoint_convenience(ic, s, &size);
206 Paranoid strcpy into a buffer of given length (includes terminating
207 zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
208 and replaces with '_'. Deliberately does *NOT* check for multibyte
209 characters. Don't change it !
212 _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
216 if (maxlength == 0) {
217 /* can't fit any bytes at all! */
222 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
232 if (len >= maxlength)
235 if (!other_safe_chars)
236 other_safe_chars = "";
238 for(i = 0; i < len; i++) {
239 int val = (src[i] & 0xff);
240 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
252 * Calculate the number of units (8 or 16-bit, depending on the
253 * destination charset), that would be needed to convert the input
254 * string which is expected to be in in src_charset encoding to the
255 * destination charset (which should be a unicode charset).
257 _PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
260 struct smb_iconv_convenience *ic = get_iconv_convenience();
266 while (*s && !(((uint8_t)*s) & 0x80)) {
277 codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
280 switch (dst_charset) {
292 * this only checks ranges, and does not
293 * check for invalid codepoints
297 } else if (c < 0x800) {
299 } else if (c < 0x1000) {
307 * non-unicode encoding:
308 * assume that each codepoint fits into
309 * one unit in the destination encoding.
318 _PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
319 const charset_t dst_charset)
324 return strlen_m_ext(s, src_charset, dst_charset) + 1;
328 * Calculate the number of 16-bit units that would be needed to convert
329 * the input string which is expected to be in CH_UNIX encoding to UTF16.
331 * This will be the same as the number of bytes in a string for single
332 * byte strings, but will be different for multibyte.
334 _PUBLIC_ size_t strlen_m(const char *s)
336 return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
340 Work out the number of multibyte chars in a string, including the NULL
343 _PUBLIC_ size_t strlen_m_term(const char *s)
349 return strlen_m(s) + 1;
353 * Weird helper routine for the winreg pipe: If nothing is around, return 0,
354 * if a string is there, include the terminator.
357 _PUBLIC_ size_t strlen_m_term_null(const char *s)
372 Strchr and strrchr_m are a bit complex on general multi-byte strings.
374 _PUBLIC_ char *strchr_m(const char *s, char c)
376 struct smb_iconv_convenience *ic = get_iconv_convenience();
380 /* characters below 0x3F are guaranteed to not appear in
381 non-initial position in multi-byte charsets */
382 if ((c & 0xC0) == 0) {
388 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
390 return discard_const_p(char, s);
399 * Multibyte-character version of strrchr
401 _PUBLIC_ char *strrchr_m(const char *s, char c)
403 struct smb_iconv_convenience *ic = get_iconv_convenience();
410 /* characters below 0x3F are guaranteed to not appear in
411 non-initial position in multi-byte charsets */
412 if ((c & 0xC0) == 0) {
413 return strrchr(s, c);
418 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
420 ret = discard_const_p(char, s);
429 return True if any (multi-byte) character is lower case
431 _PUBLIC_ bool strhaslower(const char *string)
433 struct smb_iconv_convenience *ic = get_iconv_convenience();
439 s = next_codepoint_convenience(ic, string, &c_size);
445 return true; /* that means it has lower case chars */
453 return True if any (multi-byte) character is upper case
455 _PUBLIC_ bool strhasupper(const char *string)
457 struct smb_iconv_convenience *ic = get_iconv_convenience();
463 s = next_codepoint_convenience(ic, string, &c_size);
469 return true; /* that means it has upper case chars */
477 Convert a string to lower case, allocated with talloc
479 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
483 struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
489 /* this takes advantage of the fact that upper/lower can't
490 change the length of a character by more than 1 byte */
491 dest = talloc_array(ctx, char, 2*(strlen(src))+1);
498 codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
503 c_size = push_codepoint_convenience(iconv_convenience, dest+size, c);
513 /* trim it so talloc_append_string() works */
514 dest = talloc_realloc(ctx, dest, char, size+1);
516 talloc_set_name_const(dest, dest);
522 Convert a string to UPPER case, allocated with talloc
523 source length limited to n bytes
525 _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
529 struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
535 /* this takes advantage of the fact that upper/lower can't
536 change the length of a character by more than 1 byte */
537 dest = talloc_array(ctx, char, 2*(n+1));
542 while (n-- && *src) {
544 codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
549 c_size = push_codepoint_convenience(iconv_convenience, dest+size, c);
559 /* trim it so talloc_append_string() works */
560 dest = talloc_realloc(ctx, dest, char, size+1);
562 talloc_set_name_const(dest, dest);
568 Convert a string to UPPER case, allocated with talloc
570 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
572 return strupper_talloc_n(ctx, src, src?strlen(src):0);
576 talloc_strdup() a unix string to upper case.
578 _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
580 return strupper_talloc(ctx, src);
584 Convert a string to lower case.
586 _PUBLIC_ void strlower_m(char *s)
589 struct smb_iconv_convenience *iconv_convenience;
591 /* this is quite a common operation, so we want it to be
592 fast. We optimise for the ascii case, knowing that all our
593 supported multi-byte character sets are ascii-compatible
594 (ie. they match for the first 128 chars) */
595 while (*s && !(((uint8_t)*s) & 0x80)) {
596 *s = tolower((uint8_t)*s);
603 iconv_convenience = get_iconv_convenience();
608 size_t c_size, c_size2;
609 codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
610 c_size2 = push_codepoint_convenience(iconv_convenience, d, tolower_m(c));
611 if (c_size2 > c_size) {
612 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
613 c, tolower_m(c), (int)c_size, (int)c_size2));
614 smb_panic("codepoint expansion in strlower_m\n");
623 Convert a string to UPPER case.
625 _PUBLIC_ void strupper_m(char *s)
628 struct smb_iconv_convenience *iconv_convenience;
630 /* this is quite a common operation, so we want it to be
631 fast. We optimise for the ascii case, knowing that all our
632 supported multi-byte character sets are ascii-compatible
633 (ie. they match for the first 128 chars) */
634 while (*s && !(((uint8_t)*s) & 0x80)) {
635 *s = toupper((uint8_t)*s);
642 iconv_convenience = get_iconv_convenience();
647 size_t c_size, c_size2;
648 codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
649 c_size2 = push_codepoint_convenience(iconv_convenience, d, toupper_m(c));
650 if (c_size2 > c_size) {
651 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
652 c, toupper_m(c), (int)c_size, (int)c_size2));
653 smb_panic("codepoint expansion in strupper_m\n");
663 Find the number of 'c' chars in a string
665 _PUBLIC_ size_t count_chars_m(const char *s, char c)
667 struct smb_iconv_convenience *ic = get_iconv_convenience();
672 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
673 if (c2 == c) count++;
682 * Copy a string from a char* unix src to a dos codepage string destination.
684 * @return the number of bytes occupied by the string in the destination.
686 * @param flags can include
688 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
689 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
692 * @param dest_len the maximum length in bytes allowed in the
693 * destination. If @p dest_len is -1 then no maximum is used.
695 static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
700 if (flags & STR_UPPER) {
701 char *tmpbuf = strupper_talloc(NULL, src);
702 if (tmpbuf == NULL) {
705 ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
710 src_len = strlen(src);
712 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
715 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, false);
719 * Copy a string from a unix char* src to an ASCII destination,
720 * allocating a buffer using talloc().
722 * @param dest always set at least to NULL
724 * @returns The number of bytes occupied by the string in the destination
725 * or -1 in case of error.
727 _PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
729 size_t src_len = strlen(src)+1;
731 return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size, false);
736 * Copy a string from a dos codepage source to a unix char* destination.
738 * The resulting string in "dest" is always null terminated.
740 * @param flags can have:
742 * <dt>STR_TERMINATE</dt>
743 * <dd>STR_TERMINATE means the string in @p src
744 * is null terminated, and src_len is ignored.</dd>
747 * @param src_len is the length of the source area in bytes.
748 * @returns the number of bytes occupied by the string in @p src.
750 static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
754 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
755 if (src_len == (size_t)-1) {
756 src_len = strlen((const char *)src) + 1;
758 size_t len = strnlen((const char *)src, src_len);
765 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, false);
768 dest[MIN(ret, dest_len-1)] = 0;
774 * Copy a string from a char* src to a unicode destination.
776 * @returns the number of bytes occupied by the string in the destination.
778 * @param flags can have:
781 * <dt>STR_TERMINATE <dd>means include the null termination.
782 * <dt>STR_UPPER <dd>means uppercase in the destination.
783 * <dt>STR_NOALIGN <dd>means don't do alignment.
786 * @param dest_len is the maximum length allowed in the
787 * destination. If dest_len is -1 then no maxiumum is used.
789 static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
792 size_t src_len = strlen(src);
795 if (flags & STR_UPPER) {
796 char *tmpbuf = strupper_talloc(NULL, src);
797 if (tmpbuf == NULL) {
800 ret = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
805 if (flags & STR_TERMINATE)
808 if (ucs2_align(NULL, dest, flags)) {
810 dest = (void *)((char *)dest + 1);
811 if (dest_len) dest_len--;
815 /* ucs2 is always a multiple of 2 bytes */
818 ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, false);
819 if (ret == (size_t)-1) {
830 * Copy a string from a unix char* src to a UCS2 destination,
831 * allocating a buffer using talloc().
833 * @param dest always set at least to NULL
835 * @returns The number of bytes occupied by the string in the destination
836 * or -1 in case of error.
838 _PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size)
840 size_t src_len = strlen(src)+1;
842 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size, false);
847 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
849 * @param dest always set at least to NULL
851 * @returns The number of bytes occupied by the string in the destination
854 _PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
856 size_t src_len = strlen(src)+1;
858 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size, false);
862 Copy a string from a ucs2 source to a unix char* destination.
864 STR_TERMINATE means the string in src is null terminated.
865 STR_NOALIGN means don't try to align.
866 if STR_TERMINATE is set then src_len is ignored if it is -1.
867 src_len is the length of the source area in bytes
868 Return the number of bytes occupied by the string in src.
869 The resulting string in "dest" is always null terminated.
872 static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
876 if (ucs2_align(NULL, src, flags)) {
877 src = (const void *)((const char *)src + 1);
882 if (flags & STR_TERMINATE) {
883 if (src_len == (size_t)-1) {
884 src_len = utf16_len(src);
886 src_len = utf16_len_n(src, src_len);
890 /* ucs2 is always a multiple of 2 bytes */
891 if (src_len != (size_t)-1)
894 ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, false);
896 dest[MIN(ret, dest_len-1)] = 0;
902 * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc
904 * @param dest always set at least to NULL
906 * @returns The number of bytes occupied by the string in the destination
909 _PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
911 size_t src_len = strlen(src)+1;
913 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
917 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
919 * @param dest always set at least to NULL
921 * @returns The number of bytes occupied by the string in the destination
924 _PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size)
926 size_t src_len = utf16_len(src);
928 return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
932 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
934 * @param dest always set at least to NULL
936 * @returns The number of bytes occupied by the string in the destination
939 _PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
941 size_t src_len = strlen(src)+1;
943 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
947 Copy a string from a char* src to a unicode or ascii
948 dos codepage destination choosing unicode or ascii based on the
949 flags in the SMB buffer starting at base_ptr.
950 Return the number of bytes occupied by the string in the destination.
952 STR_TERMINATE means include the null termination.
953 STR_UPPER means uppercase in the destination.
954 STR_ASCII use ascii even with unicode packet.
955 STR_NOALIGN means don't do alignment.
956 dest_len is the maximum length allowed in the destination. If dest_len
957 is -1 then no maxiumum is used.
960 _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
962 if (flags & STR_ASCII) {
963 return push_ascii(dest, src, dest_len, flags);
964 } else if (flags & STR_UNICODE) {
965 return push_ucs2(dest, src, dest_len, flags);
967 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
974 Copy a string from a unicode or ascii source (depending on
975 the packet flags) to a char* destination.
977 STR_TERMINATE means the string in src is null terminated.
978 STR_UNICODE means to force as unicode.
979 STR_ASCII use ascii even with unicode packet.
980 STR_NOALIGN means don't do alignment.
981 if STR_TERMINATE is set then src_len is ignored is it is -1
982 src_len is the length of the source area in bytes.
983 Return the number of bytes occupied by the string in src.
984 The resulting string in "dest" is always null terminated.
987 _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
989 if (flags & STR_ASCII) {
990 return pull_ascii(dest, src, dest_len, src_len, flags);
991 } else if (flags & STR_UNICODE) {
992 return pull_ucs2(dest, src, dest_len, src_len, flags);
994 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
1001 * Convert string from one encoding to another, making error checking etc
1003 * @param src pointer to source string (multibyte or singlebyte)
1004 * @param srclen length of the source string in bytes
1005 * @param dest pointer to destination string (multibyte or singlebyte)
1006 * @param destlen maximal length allowed for string
1007 * @returns the number of bytes occupied in the destination
1009 _PUBLIC_ size_t convert_string(charset_t from, charset_t to,
1010 void const *src, size_t srclen,
1011 void *dest, size_t destlen,
1012 bool allow_badcharcnv)
1015 if (!convert_string_convenience(get_iconv_convenience(), from, to,
1017 dest, destlen, &ret,
1024 * Convert between character sets, allocating a new buffer using talloc for the result.
1026 * @param srclen length of source buffer.
1027 * @param dest always set at least to NULL
1028 * @param converted_size Size in bytes of the converted string
1029 * @note -1 is not accepted for srclen.
1031 * @returns boolean indication whether the conversion succeeded
1034 _PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx,
1035 charset_t from, charset_t to,
1036 void const *src, size_t srclen,
1037 void *dest, size_t *converted_size,
1038 bool allow_badcharcnv)
1040 return convert_string_talloc_convenience(ctx, get_iconv_convenience(),
1041 from, to, src, srclen, dest,
1046 _PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
1049 return next_codepoint_convenience_ext(get_iconv_convenience(), str,
1053 _PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
1055 return next_codepoint_convenience(get_iconv_convenience(), str, size);
1058 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1060 return push_codepoint_convenience(get_iconv_convenience(), str, c);