2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "system/locale.h"
23 #include "dynconfig/dynconfig.h"
24 #include "param/param.h"
28 * @brief Unicode string manipulation
31 /* these 2 tables define the unicode case handling. They are loaded
32 at startup either via mmap() or read() from the lib directory */
33 static void *upcase_table;
34 static void *lowcase_table;
37 /*******************************************************************
38 load the case handling tables
39 ********************************************************************/
40 static void load_case_tables(void)
44 mem_ctx = talloc_init("load_case_tables");
46 smb_panic("No memory for case_tables");
48 upcase_table = map_file(talloc_asprintf(mem_ctx, "%s/upcase.dat", dyn_DATADIR), 0x20000);
49 lowcase_table = map_file(talloc_asprintf(mem_ctx, "%s/lowcase.dat", dyn_DATADIR), 0x20000);
51 if (upcase_table == NULL) {
52 /* try also under codepages for testing purposes */
53 upcase_table = map_file("codepages/upcase.dat", 0x20000);
54 if (upcase_table == NULL) {
55 upcase_table = (void *)-1;
58 if (lowcase_table == NULL) {
59 /* try also under codepages for testing purposes */
60 lowcase_table = map_file("codepages/lowcase.dat", 0x20000);
61 if (lowcase_table == NULL) {
62 lowcase_table = (void *)-1;
68 Convert a codepoint_t to upper case.
70 _PUBLIC_ codepoint_t toupper_w(codepoint_t val)
75 if (upcase_table == NULL) {
78 if (upcase_table == (void *)-1) {
81 if (val & 0xFFFF0000) {
84 return SVAL(upcase_table, val*2);
88 Convert a codepoint_t to lower case.
90 _PUBLIC_ codepoint_t tolower_w(codepoint_t val)
95 if (lowcase_table == NULL) {
98 if (lowcase_table == (void *)-1) {
101 if (val & 0xFFFF0000) {
104 return SVAL(lowcase_table, val*2);
108 compare two codepoints case insensitively
110 _PUBLIC_ int codepoint_cmpi(codepoint_t c1, codepoint_t c2)
113 toupper_w(c1) == toupper_w(c2)) {
120 Case insensitive string compararison
122 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
124 codepoint_t c1=0, c2=0;
126 struct smb_iconv_convenience *iconv_convenience = lp_iconv_convenience(global_loadparm);
128 /* handle null ptr comparisons to simplify the use in qsort */
129 if (s1 == s2) return 0;
130 if (s1 == NULL) return -1;
131 if (s2 == NULL) return 1;
134 c1 = next_codepoint(iconv_convenience, s1, &size1);
135 c2 = next_codepoint(iconv_convenience, s2, &size2);
144 if (c1 == INVALID_CODEPOINT ||
145 c2 == INVALID_CODEPOINT) {
146 /* what else can we do?? */
147 return strcasecmp(s1, s2);
150 if (toupper_w(c1) != toupper_w(c2)) {
159 * Get the next token from a string, return False if none found.
160 * Handles double-quotes.
162 * Based on a routine by GJC@VILLAGE.COM.
163 * Extensively modified by Andrew.Tridgell@anu.edu.au
165 _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
176 /* default to simple separators */
180 /* find the first non sep char */
181 while (*s && strchr_m(sep,*s))
188 /* copy over the token */
189 for (quoted = false; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
198 *ptr = (*s) ? s+1 : s;
205 Case insensitive string compararison, length limited
207 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
209 codepoint_t c1=0, c2=0;
211 struct smb_iconv_convenience *iconv_convenience = lp_iconv_convenience(global_loadparm);
213 /* handle null ptr comparisons to simplify the use in qsort */
214 if (s1 == s2) return 0;
215 if (s1 == NULL) return -1;
216 if (s2 == NULL) return 1;
218 while (*s1 && *s2 && n) {
221 c1 = next_codepoint(iconv_convenience, s1, &size1);
222 c2 = next_codepoint(iconv_convenience, s2, &size2);
231 if (c1 == INVALID_CODEPOINT ||
232 c2 == INVALID_CODEPOINT) {
233 /* what else can we do?? */
234 return strcasecmp(s1, s2);
237 if (toupper_w(c1) != toupper_w(c2)) {
252 * @note The comparison is case-insensitive.
254 _PUBLIC_ bool strequal_w(const char *s1, const char *s2)
256 return strcasecmp_m(s1,s2) == 0;
260 Compare 2 strings (case sensitive).
262 _PUBLIC_ bool strcsequal_w(const char *s1,const char *s2)
269 return strcmp(s1,s2) == 0;
275 NOTE: oldc and newc must be 7 bit characters
277 _PUBLIC_ void string_replace_w(char *s, char oldc, char newc)
281 codepoint_t c = next_codepoint(lp_iconv_convenience(global_loadparm), s, &size);
290 Paranoid strcpy into a buffer of given length (includes terminating
291 zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
292 and replaces with '_'. Deliberately does *NOT* check for multibyte
293 characters. Don't change it !
296 _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
300 if (maxlength == 0) {
301 /* can't fit any bytes at all! */
306 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
316 if (len >= maxlength)
319 if (!other_safe_chars)
320 other_safe_chars = "";
322 for(i = 0; i < len; i++) {
323 int val = (src[i] & 0xff);
324 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
336 Count the number of UCS2 characters in a string. Normally this will
337 be the same as the number of bytes in a string for single byte strings,
338 but will be different for multibyte.
340 _PUBLIC_ size_t strlen_m(const char *s)
348 while (*s && !(((uint8_t)*s) & 0x80)) {
359 codepoint_t c = next_codepoint(lp_iconv_convenience(global_loadparm), s, &c_size);
372 Work out the number of multibyte chars in a string, including the NULL
375 _PUBLIC_ size_t strlen_m_term(const char *s)
381 return strlen_m(s) + 1;
385 Strchr and strrchr_m are a bit complex on general multi-byte strings.
387 _PUBLIC_ char *strchr_m(const char *s, char c)
389 /* characters below 0x3F are guaranteed to not appear in
390 non-initial position in multi-byte charsets */
391 if ((c & 0xC0) == 0) {
397 codepoint_t c2 = next_codepoint(lp_iconv_convenience(global_loadparm), s, &size);
399 return discard_const_p(char, s);
408 * Multibyte-character version of strrchr
410 _PUBLIC_ char *strrchr_m(const char *s, char c)
414 /* characters below 0x3F are guaranteed to not appear in
415 non-initial position in multi-byte charsets */
416 if ((c & 0xC0) == 0) {
417 return strrchr(s, c);
422 codepoint_t c2 = next_codepoint(lp_iconv_convenience(global_loadparm), s, &size);
424 ret = discard_const_p(char, s);
433 return True if any (multi-byte) character is lower case
435 _PUBLIC_ bool strhaslower(const char *string)
442 s = next_codepoint(lp_iconv_convenience(global_loadparm), string, &c_size);
448 return true; /* that means it has lower case chars */
456 return True if any (multi-byte) character is upper case
458 _PUBLIC_ bool strhasupper(const char *string)
465 s = next_codepoint(lp_iconv_convenience(global_loadparm), string, &c_size);
471 return true; /* that means it has upper case chars */
479 Convert a string to lower case, allocated with talloc
481 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
485 struct smb_iconv_convenience *iconv_convenience = lp_iconv_convenience(global_loadparm);
487 /* this takes advantage of the fact that upper/lower can't
488 change the length of a character by more than 1 byte */
489 dest = talloc_array(ctx, char, 2*(strlen(src))+1);
496 codepoint_t c = next_codepoint(iconv_convenience, src, &c_size);
501 c_size = push_codepoint(iconv_convenience, dest+size, c);
511 /* trim it so talloc_append_string() works */
512 dest = talloc_realloc(ctx, dest, char, size+1);
514 talloc_set_name_const(dest, dest);
520 Convert a string to UPPER case, allocated with talloc
522 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
526 struct smb_iconv_convenience *iconv_convenience = lp_iconv_convenience(global_loadparm);
532 /* this takes advantage of the fact that upper/lower can't
533 change the length of a character by more than 1 byte */
534 dest = talloc_array(ctx, char, 2*(strlen(src))+1);
541 codepoint_t c = next_codepoint(iconv_convenience, src, &c_size);
546 c_size = push_codepoint(iconv_convenience, dest+size, c);
556 /* trim it so talloc_append_string() works */
557 dest = talloc_realloc(ctx, dest, char, size+1);
559 talloc_set_name_const(dest, dest);
565 Convert a string to lower case.
567 _PUBLIC_ void strlower_m(char *s)
570 struct smb_iconv_convenience *iconv_convenience;
572 /* this is quite a common operation, so we want it to be
573 fast. We optimise for the ascii case, knowing that all our
574 supported multi-byte character sets are ascii-compatible
575 (ie. they match for the first 128 chars) */
576 while (*s && !(((uint8_t)*s) & 0x80)) {
577 *s = tolower((uint8_t)*s);
584 iconv_convenience = lp_iconv_convenience(global_loadparm);
589 size_t c_size, c_size2;
590 codepoint_t c = next_codepoint(iconv_convenience, s, &c_size);
591 c_size2 = push_codepoint(iconv_convenience, d, tolower_w(c));
592 if (c_size2 > c_size) {
593 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
594 c, tolower_w(c), (int)c_size, (int)c_size2));
595 smb_panic("codepoint expansion in strlower_m\n");
604 Convert a string to UPPER case.
606 _PUBLIC_ void strupper_m(char *s)
609 struct smb_iconv_convenience *iconv_convenience;
611 /* this is quite a common operation, so we want it to be
612 fast. We optimise for the ascii case, knowing that all our
613 supported multi-byte character sets are ascii-compatible
614 (ie. they match for the first 128 chars) */
615 while (*s && !(((uint8_t)*s) & 0x80)) {
616 *s = toupper((uint8_t)*s);
623 iconv_convenience = lp_iconv_convenience(global_loadparm);
628 size_t c_size, c_size2;
629 codepoint_t c = next_codepoint(iconv_convenience, s, &c_size);
630 c_size2 = push_codepoint(iconv_convenience, d, toupper_w(c));
631 if (c_size2 > c_size) {
632 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
633 c, toupper_w(c), (int)c_size, (int)c_size2));
634 smb_panic("codepoint expansion in strupper_m\n");
644 Find the number of 'c' chars in a string
646 _PUBLIC_ size_t count_chars_w(const char *s, char c)
652 codepoint_t c2 = next_codepoint(lp_iconv_convenience(global_loadparm), s, &size);
653 if (c2 == c) count++;