2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "system/locale.h"
23 #include "param/param.h"
25 static inline struct smb_iconv_convenience *get_iconv_convenience(void)
27 return lp_iconv_convenience(global_loadparm);
31 Case insensitive string compararison
33 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
35 codepoint_t c1=0, c2=0;
37 struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
39 /* handle null ptr comparisons to simplify the use in qsort */
40 if (s1 == s2) return 0;
41 if (s1 == NULL) return -1;
42 if (s2 == NULL) return 1;
45 c1 = next_codepoint(iconv_convenience, s1, &size1);
46 c2 = next_codepoint(iconv_convenience, s2, &size2);
55 if (c1 == INVALID_CODEPOINT ||
56 c2 == INVALID_CODEPOINT) {
57 /* what else can we do?? */
58 return strcasecmp(s1, s2);
61 if (toupper_m(c1) != toupper_m(c2)) {
70 * Get the next token from a string, return False if none found.
71 * Handles double-quotes.
73 * Based on a routine by GJC@VILLAGE.COM.
74 * Extensively modified by Andrew.Tridgell@anu.edu.au
76 _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
87 /* default to simple separators */
91 /* find the first non sep char */
92 while (*s && strchr_m(sep,*s))
99 /* copy over the token */
100 for (quoted = false; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
109 *ptr = (*s) ? s+1 : s;
116 Case insensitive string compararison, length limited
118 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
120 codepoint_t c1=0, c2=0;
122 struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
124 /* handle null ptr comparisons to simplify the use in qsort */
125 if (s1 == s2) return 0;
126 if (s1 == NULL) return -1;
127 if (s2 == NULL) return 1;
129 while (*s1 && *s2 && n) {
132 c1 = next_codepoint(iconv_convenience, s1, &size1);
133 c2 = next_codepoint(iconv_convenience, s2, &size2);
142 if (c1 == INVALID_CODEPOINT ||
143 c2 == INVALID_CODEPOINT) {
144 /* what else can we do?? */
145 return strcasecmp(s1, s2);
148 if (toupper_m(c1) != toupper_m(c2)) {
163 * @note The comparison is case-insensitive.
165 _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
167 return strcasecmp_m(s1,s2) == 0;
171 Compare 2 strings (case sensitive).
173 _PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
180 return strcmp(s1,s2) == 0;
186 NOTE: oldc and newc must be 7 bit characters
188 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
190 struct smb_iconv_convenience *ic = get_iconv_convenience();
193 codepoint_t c = next_codepoint(ic, s, &size);
202 Paranoid strcpy into a buffer of given length (includes terminating
203 zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
204 and replaces with '_'. Deliberately does *NOT* check for multibyte
205 characters. Don't change it !
208 _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
212 if (maxlength == 0) {
213 /* can't fit any bytes at all! */
218 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
228 if (len >= maxlength)
231 if (!other_safe_chars)
232 other_safe_chars = "";
234 for(i = 0; i < len; i++) {
235 int val = (src[i] & 0xff);
236 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
248 Count the number of UCS2 characters in a string. Normally this will
249 be the same as the number of bytes in a string for single byte strings,
250 but will be different for multibyte.
252 _PUBLIC_ size_t strlen_m(const char *s)
255 struct smb_iconv_convenience *ic = get_iconv_convenience();
261 while (*s && !(((uint8_t)*s) & 0x80)) {
272 codepoint_t c = next_codepoint(ic, s, &c_size);
285 Work out the number of multibyte chars in a string, including the NULL
288 _PUBLIC_ size_t strlen_m_term(const char *s)
294 return strlen_m(s) + 1;
298 Strchr and strrchr_m are a bit complex on general multi-byte strings.
300 _PUBLIC_ char *strchr_m(const char *s, char c)
302 struct smb_iconv_convenience *ic = get_iconv_convenience();
306 /* characters below 0x3F are guaranteed to not appear in
307 non-initial position in multi-byte charsets */
308 if ((c & 0xC0) == 0) {
314 codepoint_t c2 = next_codepoint(ic, s, &size);
316 return discard_const_p(char, s);
325 * Multibyte-character version of strrchr
327 _PUBLIC_ char *strrchr_m(const char *s, char c)
329 struct smb_iconv_convenience *ic = get_iconv_convenience();
336 /* characters below 0x3F are guaranteed to not appear in
337 non-initial position in multi-byte charsets */
338 if ((c & 0xC0) == 0) {
339 return strrchr(s, c);
344 codepoint_t c2 = next_codepoint(ic, s, &size);
346 ret = discard_const_p(char, s);
355 return True if any (multi-byte) character is lower case
357 _PUBLIC_ bool strhaslower(const char *string)
359 struct smb_iconv_convenience *ic = get_iconv_convenience();
365 s = next_codepoint(ic, string, &c_size);
371 return true; /* that means it has lower case chars */
379 return True if any (multi-byte) character is upper case
381 _PUBLIC_ bool strhasupper(const char *string)
383 struct smb_iconv_convenience *ic = get_iconv_convenience();
389 s = next_codepoint(ic, string, &c_size);
395 return true; /* that means it has upper case chars */
403 Convert a string to lower case, allocated with talloc
405 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
409 struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
411 /* this takes advantage of the fact that upper/lower can't
412 change the length of a character by more than 1 byte */
413 dest = talloc_array(ctx, char, 2*(strlen(src))+1);
420 codepoint_t c = next_codepoint(iconv_convenience, src, &c_size);
425 c_size = push_codepoint(iconv_convenience, dest+size, c);
435 /* trim it so talloc_append_string() works */
436 dest = talloc_realloc(ctx, dest, char, size+1);
438 talloc_set_name_const(dest, dest);
444 Convert a string to UPPER case, allocated with talloc
445 source length limited to n bytes
447 _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
451 struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
457 /* this takes advantage of the fact that upper/lower can't
458 change the length of a character by more than 1 byte */
459 dest = talloc_array(ctx, char, 2*(n+1));
464 while (*src && n--) {
466 codepoint_t c = next_codepoint(iconv_convenience, src, &c_size);
471 c_size = push_codepoint(iconv_convenience, dest+size, c);
481 /* trim it so talloc_append_string() works */
482 dest = talloc_realloc(ctx, dest, char, size+1);
484 talloc_set_name_const(dest, dest);
490 Convert a string to UPPER case, allocated with talloc
492 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
494 return strupper_talloc_n(ctx, src, src?strlen(src):0);
498 talloc_strdup() a unix string to upper case.
500 _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
502 return strupper_talloc(ctx, src);
506 Convert a string to lower case.
508 _PUBLIC_ void strlower_m(char *s)
511 struct smb_iconv_convenience *iconv_convenience;
513 /* this is quite a common operation, so we want it to be
514 fast. We optimise for the ascii case, knowing that all our
515 supported multi-byte character sets are ascii-compatible
516 (ie. they match for the first 128 chars) */
517 while (*s && !(((uint8_t)*s) & 0x80)) {
518 *s = tolower((uint8_t)*s);
525 iconv_convenience = get_iconv_convenience();
530 size_t c_size, c_size2;
531 codepoint_t c = next_codepoint(iconv_convenience, s, &c_size);
532 c_size2 = push_codepoint(iconv_convenience, d, tolower_m(c));
533 if (c_size2 > c_size) {
534 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
535 c, tolower_m(c), (int)c_size, (int)c_size2));
536 smb_panic("codepoint expansion in strlower_m\n");
545 Convert a string to UPPER case.
547 _PUBLIC_ void strupper_m(char *s)
550 struct smb_iconv_convenience *iconv_convenience;
552 /* this is quite a common operation, so we want it to be
553 fast. We optimise for the ascii case, knowing that all our
554 supported multi-byte character sets are ascii-compatible
555 (ie. they match for the first 128 chars) */
556 while (*s && !(((uint8_t)*s) & 0x80)) {
557 *s = toupper((uint8_t)*s);
564 iconv_convenience = get_iconv_convenience();
569 size_t c_size, c_size2;
570 codepoint_t c = next_codepoint(iconv_convenience, s, &c_size);
571 c_size2 = push_codepoint(iconv_convenience, d, toupper_m(c));
572 if (c_size2 > c_size) {
573 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
574 c, toupper_m(c), (int)c_size, (int)c_size2));
575 smb_panic("codepoint expansion in strupper_m\n");
585 Find the number of 'c' chars in a string
587 _PUBLIC_ size_t count_chars_m(const char *s, char c)
589 struct smb_iconv_convenience *ic = get_iconv_convenience();
594 codepoint_t c2 = next_codepoint(ic, s, &size);
595 if (c2 == c) count++;
604 * Copy a string from a char* unix src to a dos codepage string destination.
606 * @return the number of bytes occupied by the string in the destination.
608 * @param flags can include
610 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
611 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
614 * @param dest_len the maximum length in bytes allowed in the
615 * destination. If @p dest_len is -1 then no maximum is used.
617 static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
622 if (flags & STR_UPPER) {
623 char *tmpbuf = strupper_talloc(NULL, src);
624 if (tmpbuf == NULL) {
627 ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
632 src_len = strlen(src);
634 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
637 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
641 * Copy a string from a unix char* src to an ASCII destination,
642 * allocating a buffer using talloc().
644 * @param dest always set at least to NULL
646 * @returns The number of bytes occupied by the string in the destination
647 * or -1 in case of error.
649 _PUBLIC_ ssize_t push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
651 size_t src_len = strlen(src)+1;
653 return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest);
658 * Copy a string from a dos codepage source to a unix char* destination.
660 * The resulting string in "dest" is always null terminated.
662 * @param flags can have:
664 * <dt>STR_TERMINATE</dt>
665 * <dd>STR_TERMINATE means the string in @p src
666 * is null terminated, and src_len is ignored.</dd>
669 * @param src_len is the length of the source area in bytes.
670 * @returns the number of bytes occupied by the string in @p src.
672 static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
676 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
677 if (src_len == (size_t)-1) {
678 src_len = strlen((const char *)src) + 1;
680 size_t len = strnlen((const char *)src, src_len);
687 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len);
690 dest[MIN(ret, dest_len-1)] = 0;
696 * Copy a string from a char* src to a unicode destination.
698 * @returns the number of bytes occupied by the string in the destination.
700 * @param flags can have:
703 * <dt>STR_TERMINATE <dd>means include the null termination.
704 * <dt>STR_UPPER <dd>means uppercase in the destination.
705 * <dt>STR_NOALIGN <dd>means don't do alignment.
708 * @param dest_len is the maximum length allowed in the
709 * destination. If dest_len is -1 then no maxiumum is used.
711 static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
714 size_t src_len = strlen(src);
717 if (flags & STR_UPPER) {
718 char *tmpbuf = strupper_talloc(NULL, src);
719 if (tmpbuf == NULL) {
722 ret = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
727 if (flags & STR_TERMINATE)
730 if (ucs2_align(NULL, dest, flags)) {
732 dest = (void *)((char *)dest + 1);
733 if (dest_len) dest_len--;
737 /* ucs2 is always a multiple of 2 bytes */
740 ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len);
741 if (ret == (size_t)-1) {
752 * Copy a string from a unix char* src to a UCS2 destination,
753 * allocating a buffer using talloc().
755 * @param dest always set at least to NULL
757 * @returns The number of bytes occupied by the string in the destination
758 * or -1 in case of error.
760 _PUBLIC_ ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src)
762 size_t src_len = strlen(src)+1;
764 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, dest);
769 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
771 * @param dest always set at least to NULL
773 * @returns The number of bytes occupied by the string in the destination
776 _PUBLIC_ ssize_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
778 size_t src_len = strlen(src)+1;
780 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest);
784 Copy a string from a ucs2 source to a unix char* destination.
786 STR_TERMINATE means the string in src is null terminated.
787 STR_NOALIGN means don't try to align.
788 if STR_TERMINATE is set then src_len is ignored if it is -1.
789 src_len is the length of the source area in bytes
790 Return the number of bytes occupied by the string in src.
791 The resulting string in "dest" is always null terminated.
794 static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
798 if (ucs2_align(NULL, src, flags)) {
799 src = (const void *)((const char *)src + 1);
804 if (flags & STR_TERMINATE) {
805 if (src_len == (size_t)-1) {
806 src_len = utf16_len(src);
808 src_len = utf16_len_n(src, src_len);
812 /* ucs2 is always a multiple of 2 bytes */
813 if (src_len != (size_t)-1)
816 ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len);
818 dest[MIN(ret, dest_len-1)] = 0;
824 * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc
826 * @param dest always set at least to NULL
828 * @returns The number of bytes occupied by the string in the destination
831 _PUBLIC_ ssize_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
833 size_t src_len = strlen(src)+1;
835 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest);
839 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
841 * @param dest always set at least to NULL
843 * @returns The number of bytes occupied by the string in the destination
846 _PUBLIC_ ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const void *src)
848 size_t src_len = utf16_len(src);
850 return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest);
854 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
856 * @param dest always set at least to NULL
858 * @returns The number of bytes occupied by the string in the destination
861 _PUBLIC_ ssize_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
863 size_t src_len = strlen(src)+1;
865 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest);
869 Copy a string from a char* src to a unicode or ascii
870 dos codepage destination choosing unicode or ascii based on the
871 flags in the SMB buffer starting at base_ptr.
872 Return the number of bytes occupied by the string in the destination.
874 STR_TERMINATE means include the null termination.
875 STR_UPPER means uppercase in the destination.
876 STR_ASCII use ascii even with unicode packet.
877 STR_NOALIGN means don't do alignment.
878 dest_len is the maximum length allowed in the destination. If dest_len
879 is -1 then no maxiumum is used.
882 _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
884 if (flags & STR_ASCII) {
885 return push_ascii(dest, src, dest_len, flags);
886 } else if (flags & STR_UNICODE) {
887 return push_ucs2(dest, src, dest_len, flags);
889 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
896 Copy a string from a unicode or ascii source (depending on
897 the packet flags) to a char* destination.
899 STR_TERMINATE means the string in src is null terminated.
900 STR_UNICODE means to force as unicode.
901 STR_ASCII use ascii even with unicode packet.
902 STR_NOALIGN means don't do alignment.
903 if STR_TERMINATE is set then src_len is ignored is it is -1
904 src_len is the length of the source area in bytes.
905 Return the number of bytes occupied by the string in src.
906 The resulting string in "dest" is always null terminated.
909 _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
911 if (flags & STR_ASCII) {
912 return pull_ascii(dest, src, dest_len, src_len, flags);
913 } else if (flags & STR_UNICODE) {
914 return pull_ucs2(dest, src, dest_len, src_len, flags);
916 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
923 * Convert string from one encoding to another, making error checking etc
925 * @param src pointer to source string (multibyte or singlebyte)
926 * @param srclen length of the source string in bytes
927 * @param dest pointer to destination string (multibyte or singlebyte)
928 * @param destlen maximal length allowed for string
929 * @returns the number of bytes occupied in the destination
931 _PUBLIC_ ssize_t convert_string(charset_t from, charset_t to,
932 void const *src, size_t srclen,
933 void *dest, size_t destlen)
935 return convert_string_convenience(get_iconv_convenience(), from, to,
941 * Convert between character sets, allocating a new buffer using talloc for the result.
943 * @param srclen length of source buffer.
944 * @param dest always set at least to NULL
945 * @note -1 is not accepted for srclen.
947 * @returns Size in bytes of the converted string; or -1 in case of error.
950 _PUBLIC_ ssize_t convert_string_talloc(TALLOC_CTX *ctx,
951 charset_t from, charset_t to,
952 void const *src, size_t srclen,
955 return convert_string_talloc_convenience(ctx, get_iconv_convenience(),
956 from, to, src, srclen, dest);