2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "system/locale.h"
24 struct smb_iconv_convenience *global_iconv_convenience = NULL;
26 static inline struct smb_iconv_convenience *get_iconv_convenience(void)
28 if (global_iconv_convenience == NULL)
29 global_iconv_convenience = smb_iconv_convenience_init(talloc_autofree_context(), "ASCII", "UTF-8", true);
30 return global_iconv_convenience;
34 Case insensitive string compararison
36 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
38 codepoint_t c1=0, c2=0;
40 struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
42 /* handle null ptr comparisons to simplify the use in qsort */
43 if (s1 == s2) return 0;
44 if (s1 == NULL) return -1;
45 if (s2 == NULL) return 1;
48 c1 = next_codepoint(iconv_convenience, s1, &size1);
49 c2 = next_codepoint(iconv_convenience, s2, &size2);
58 if (c1 == INVALID_CODEPOINT ||
59 c2 == INVALID_CODEPOINT) {
60 /* what else can we do?? */
61 return strcasecmp(s1, s2);
64 if (toupper_m(c1) != toupper_m(c2)) {
73 * Get the next token from a string, return False if none found.
74 * Handles double-quotes.
76 * Based on a routine by GJC@VILLAGE.COM.
77 * Extensively modified by Andrew.Tridgell@anu.edu.au
79 _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
90 /* default to simple separators */
94 /* find the first non sep char */
95 while (*s && strchr_m(sep,*s))
102 /* copy over the token */
103 for (quoted = false; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
112 *ptr = (*s) ? s+1 : s;
119 Case insensitive string compararison, length limited
121 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
123 codepoint_t c1=0, c2=0;
125 struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
127 /* handle null ptr comparisons to simplify the use in qsort */
128 if (s1 == s2) return 0;
129 if (s1 == NULL) return -1;
130 if (s2 == NULL) return 1;
132 while (*s1 && *s2 && n) {
135 c1 = next_codepoint(iconv_convenience, s1, &size1);
136 c2 = next_codepoint(iconv_convenience, s2, &size2);
145 if (c1 == INVALID_CODEPOINT ||
146 c2 == INVALID_CODEPOINT) {
147 /* what else can we do?? */
148 return strcasecmp(s1, s2);
151 if (toupper_m(c1) != toupper_m(c2)) {
166 * @note The comparison is case-insensitive.
168 _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
170 return strcasecmp_m(s1,s2) == 0;
174 Compare 2 strings (case sensitive).
176 _PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
183 return strcmp(s1,s2) == 0;
189 NOTE: oldc and newc must be 7 bit characters
191 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
193 struct smb_iconv_convenience *ic = get_iconv_convenience();
196 codepoint_t c = next_codepoint(ic, s, &size);
205 Paranoid strcpy into a buffer of given length (includes terminating
206 zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
207 and replaces with '_'. Deliberately does *NOT* check for multibyte
208 characters. Don't change it !
211 _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
215 if (maxlength == 0) {
216 /* can't fit any bytes at all! */
221 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
231 if (len >= maxlength)
234 if (!other_safe_chars)
235 other_safe_chars = "";
237 for(i = 0; i < len; i++) {
238 int val = (src[i] & 0xff);
239 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
251 Count the number of UCS2 characters in a string. Normally this will
252 be the same as the number of bytes in a string for single byte strings,
253 but will be different for multibyte.
255 _PUBLIC_ size_t strlen_m(const char *s)
258 struct smb_iconv_convenience *ic = get_iconv_convenience();
264 while (*s && !(((uint8_t)*s) & 0x80)) {
275 codepoint_t c = next_codepoint(ic, s, &c_size);
288 Work out the number of multibyte chars in a string, including the NULL
291 _PUBLIC_ size_t strlen_m_term(const char *s)
297 return strlen_m(s) + 1;
301 Strchr and strrchr_m are a bit complex on general multi-byte strings.
303 _PUBLIC_ char *strchr_m(const char *s, char c)
305 struct smb_iconv_convenience *ic = get_iconv_convenience();
309 /* characters below 0x3F are guaranteed to not appear in
310 non-initial position in multi-byte charsets */
311 if ((c & 0xC0) == 0) {
317 codepoint_t c2 = next_codepoint(ic, s, &size);
319 return discard_const_p(char, s);
328 * Multibyte-character version of strrchr
330 _PUBLIC_ char *strrchr_m(const char *s, char c)
332 struct smb_iconv_convenience *ic = get_iconv_convenience();
339 /* characters below 0x3F are guaranteed to not appear in
340 non-initial position in multi-byte charsets */
341 if ((c & 0xC0) == 0) {
342 return strrchr(s, c);
347 codepoint_t c2 = next_codepoint(ic, s, &size);
349 ret = discard_const_p(char, s);
358 return True if any (multi-byte) character is lower case
360 _PUBLIC_ bool strhaslower(const char *string)
362 struct smb_iconv_convenience *ic = get_iconv_convenience();
368 s = next_codepoint(ic, string, &c_size);
374 return true; /* that means it has lower case chars */
382 return True if any (multi-byte) character is upper case
384 _PUBLIC_ bool strhasupper(const char *string)
386 struct smb_iconv_convenience *ic = get_iconv_convenience();
392 s = next_codepoint(ic, string, &c_size);
398 return true; /* that means it has upper case chars */
406 Convert a string to lower case, allocated with talloc
408 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
412 struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
414 /* this takes advantage of the fact that upper/lower can't
415 change the length of a character by more than 1 byte */
416 dest = talloc_array(ctx, char, 2*(strlen(src))+1);
423 codepoint_t c = next_codepoint(iconv_convenience, src, &c_size);
428 c_size = push_codepoint(iconv_convenience, dest+size, c);
438 /* trim it so talloc_append_string() works */
439 dest = talloc_realloc(ctx, dest, char, size+1);
441 talloc_set_name_const(dest, dest);
447 Convert a string to UPPER case, allocated with talloc
448 source length limited to n bytes
450 _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
454 struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
460 /* this takes advantage of the fact that upper/lower can't
461 change the length of a character by more than 1 byte */
462 dest = talloc_array(ctx, char, 2*(n+1));
467 while (*src && n--) {
469 codepoint_t c = next_codepoint(iconv_convenience, src, &c_size);
474 c_size = push_codepoint(iconv_convenience, dest+size, c);
484 /* trim it so talloc_append_string() works */
485 dest = talloc_realloc(ctx, dest, char, size+1);
487 talloc_set_name_const(dest, dest);
493 Convert a string to UPPER case, allocated with talloc
495 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
497 return strupper_talloc_n(ctx, src, src?strlen(src):0);
501 talloc_strdup() a unix string to upper case.
503 _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
505 return strupper_talloc(ctx, src);
509 Convert a string to lower case.
511 _PUBLIC_ void strlower_m(char *s)
514 struct smb_iconv_convenience *iconv_convenience;
516 /* this is quite a common operation, so we want it to be
517 fast. We optimise for the ascii case, knowing that all our
518 supported multi-byte character sets are ascii-compatible
519 (ie. they match for the first 128 chars) */
520 while (*s && !(((uint8_t)*s) & 0x80)) {
521 *s = tolower((uint8_t)*s);
528 iconv_convenience = get_iconv_convenience();
533 size_t c_size, c_size2;
534 codepoint_t c = next_codepoint(iconv_convenience, s, &c_size);
535 c_size2 = push_codepoint(iconv_convenience, d, tolower_m(c));
536 if (c_size2 > c_size) {
537 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
538 c, tolower_m(c), (int)c_size, (int)c_size2));
539 smb_panic("codepoint expansion in strlower_m\n");
548 Convert a string to UPPER case.
550 _PUBLIC_ void strupper_m(char *s)
553 struct smb_iconv_convenience *iconv_convenience;
555 /* this is quite a common operation, so we want it to be
556 fast. We optimise for the ascii case, knowing that all our
557 supported multi-byte character sets are ascii-compatible
558 (ie. they match for the first 128 chars) */
559 while (*s && !(((uint8_t)*s) & 0x80)) {
560 *s = toupper((uint8_t)*s);
567 iconv_convenience = get_iconv_convenience();
572 size_t c_size, c_size2;
573 codepoint_t c = next_codepoint(iconv_convenience, s, &c_size);
574 c_size2 = push_codepoint(iconv_convenience, d, toupper_m(c));
575 if (c_size2 > c_size) {
576 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
577 c, toupper_m(c), (int)c_size, (int)c_size2));
578 smb_panic("codepoint expansion in strupper_m\n");
588 Find the number of 'c' chars in a string
590 _PUBLIC_ size_t count_chars_m(const char *s, char c)
592 struct smb_iconv_convenience *ic = get_iconv_convenience();
597 codepoint_t c2 = next_codepoint(ic, s, &size);
598 if (c2 == c) count++;
607 * Copy a string from a char* unix src to a dos codepage string destination.
609 * @return the number of bytes occupied by the string in the destination.
611 * @param flags can include
613 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
614 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
617 * @param dest_len the maximum length in bytes allowed in the
618 * destination. If @p dest_len is -1 then no maximum is used.
620 static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
625 if (flags & STR_UPPER) {
626 char *tmpbuf = strupper_talloc(NULL, src);
627 if (tmpbuf == NULL) {
630 ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
635 src_len = strlen(src);
637 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
640 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
644 * Copy a string from a unix char* src to an ASCII destination,
645 * allocating a buffer using talloc().
647 * @param dest always set at least to NULL
649 * @returns The number of bytes occupied by the string in the destination
650 * or -1 in case of error.
652 _PUBLIC_ ssize_t push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
654 size_t src_len = strlen(src)+1;
656 return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest);
661 * Copy a string from a dos codepage source to a unix char* destination.
663 * The resulting string in "dest" is always null terminated.
665 * @param flags can have:
667 * <dt>STR_TERMINATE</dt>
668 * <dd>STR_TERMINATE means the string in @p src
669 * is null terminated, and src_len is ignored.</dd>
672 * @param src_len is the length of the source area in bytes.
673 * @returns the number of bytes occupied by the string in @p src.
675 static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
679 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
680 if (src_len == (size_t)-1) {
681 src_len = strlen((const char *)src) + 1;
683 size_t len = strnlen((const char *)src, src_len);
690 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len);
693 dest[MIN(ret, dest_len-1)] = 0;
699 * Copy a string from a char* src to a unicode destination.
701 * @returns the number of bytes occupied by the string in the destination.
703 * @param flags can have:
706 * <dt>STR_TERMINATE <dd>means include the null termination.
707 * <dt>STR_UPPER <dd>means uppercase in the destination.
708 * <dt>STR_NOALIGN <dd>means don't do alignment.
711 * @param dest_len is the maximum length allowed in the
712 * destination. If dest_len is -1 then no maxiumum is used.
714 static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
717 size_t src_len = strlen(src);
720 if (flags & STR_UPPER) {
721 char *tmpbuf = strupper_talloc(NULL, src);
722 if (tmpbuf == NULL) {
725 ret = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
730 if (flags & STR_TERMINATE)
733 if (ucs2_align(NULL, dest, flags)) {
735 dest = (void *)((char *)dest + 1);
736 if (dest_len) dest_len--;
740 /* ucs2 is always a multiple of 2 bytes */
743 ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len);
744 if (ret == (size_t)-1) {
755 * Copy a string from a unix char* src to a UCS2 destination,
756 * allocating a buffer using talloc().
758 * @param dest always set at least to NULL
760 * @returns The number of bytes occupied by the string in the destination
761 * or -1 in case of error.
763 _PUBLIC_ ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src)
765 size_t src_len = strlen(src)+1;
767 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, dest);
772 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
774 * @param dest always set at least to NULL
776 * @returns The number of bytes occupied by the string in the destination
779 _PUBLIC_ ssize_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
781 size_t src_len = strlen(src)+1;
783 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest);
787 Copy a string from a ucs2 source to a unix char* destination.
789 STR_TERMINATE means the string in src is null terminated.
790 STR_NOALIGN means don't try to align.
791 if STR_TERMINATE is set then src_len is ignored if it is -1.
792 src_len is the length of the source area in bytes
793 Return the number of bytes occupied by the string in src.
794 The resulting string in "dest" is always null terminated.
797 static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
801 if (ucs2_align(NULL, src, flags)) {
802 src = (const void *)((const char *)src + 1);
807 if (flags & STR_TERMINATE) {
808 if (src_len == (size_t)-1) {
809 src_len = utf16_len(src);
811 src_len = utf16_len_n(src, src_len);
815 /* ucs2 is always a multiple of 2 bytes */
816 if (src_len != (size_t)-1)
819 ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len);
821 dest[MIN(ret, dest_len-1)] = 0;
827 * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc
829 * @param dest always set at least to NULL
831 * @returns The number of bytes occupied by the string in the destination
834 _PUBLIC_ ssize_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
836 size_t src_len = strlen(src)+1;
838 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest);
842 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
844 * @param dest always set at least to NULL
846 * @returns The number of bytes occupied by the string in the destination
849 _PUBLIC_ ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const void *src)
851 size_t src_len = utf16_len(src);
853 return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest);
857 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
859 * @param dest always set at least to NULL
861 * @returns The number of bytes occupied by the string in the destination
864 _PUBLIC_ ssize_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
866 size_t src_len = strlen(src)+1;
868 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest);
872 Copy a string from a char* src to a unicode or ascii
873 dos codepage destination choosing unicode or ascii based on the
874 flags in the SMB buffer starting at base_ptr.
875 Return the number of bytes occupied by the string in the destination.
877 STR_TERMINATE means include the null termination.
878 STR_UPPER means uppercase in the destination.
879 STR_ASCII use ascii even with unicode packet.
880 STR_NOALIGN means don't do alignment.
881 dest_len is the maximum length allowed in the destination. If dest_len
882 is -1 then no maxiumum is used.
885 _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
887 if (flags & STR_ASCII) {
888 return push_ascii(dest, src, dest_len, flags);
889 } else if (flags & STR_UNICODE) {
890 return push_ucs2(dest, src, dest_len, flags);
892 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
899 Copy a string from a unicode or ascii source (depending on
900 the packet flags) to a char* destination.
902 STR_TERMINATE means the string in src is null terminated.
903 STR_UNICODE means to force as unicode.
904 STR_ASCII use ascii even with unicode packet.
905 STR_NOALIGN means don't do alignment.
906 if STR_TERMINATE is set then src_len is ignored is it is -1
907 src_len is the length of the source area in bytes.
908 Return the number of bytes occupied by the string in src.
909 The resulting string in "dest" is always null terminated.
912 _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
914 if (flags & STR_ASCII) {
915 return pull_ascii(dest, src, dest_len, src_len, flags);
916 } else if (flags & STR_UNICODE) {
917 return pull_ucs2(dest, src, dest_len, src_len, flags);
919 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
926 * Convert string from one encoding to another, making error checking etc
928 * @param src pointer to source string (multibyte or singlebyte)
929 * @param srclen length of the source string in bytes
930 * @param dest pointer to destination string (multibyte or singlebyte)
931 * @param destlen maximal length allowed for string
932 * @returns the number of bytes occupied in the destination
934 _PUBLIC_ ssize_t convert_string(charset_t from, charset_t to,
935 void const *src, size_t srclen,
936 void *dest, size_t destlen)
938 return convert_string_convenience(get_iconv_convenience(), from, to,
944 * Convert between character sets, allocating a new buffer using talloc for the result.
946 * @param srclen length of source buffer.
947 * @param dest always set at least to NULL
948 * @note -1 is not accepted for srclen.
950 * @returns Size in bytes of the converted string; or -1 in case of error.
953 _PUBLIC_ ssize_t convert_string_talloc(TALLOC_CTX *ctx,
954 charset_t from, charset_t to,
955 void const *src, size_t srclen,
958 return convert_string_talloc_convenience(ctx, get_iconv_convenience(),
959 from, to, src, srclen, dest);