2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include "system/iconv.h"
27 * @brief Unicode string manipulation
30 /* these 2 tables define the unicode case handling. They are loaded
31 at startup either via mmap() or read() from the lib directory */
32 static void *upcase_table;
33 static void *lowcase_table;
36 /*******************************************************************
37 load the case handling tables
38 ********************************************************************/
39 static void load_case_tables(void)
43 mem_ctx = talloc_init("load_case_tables");
45 smb_panic("No memory for case_tables");
47 upcase_table = map_file(data_path(mem_ctx, "upcase.dat"), 0x20000);
48 lowcase_table = map_file(data_path(mem_ctx, "lowcase.dat"), 0x20000);
50 if (upcase_table == NULL) {
51 /* try also under codepages for testing purposes */
52 upcase_table = map_file("codepages/upcase.dat", 0x20000);
53 if (upcase_table == NULL) {
54 upcase_table = (void *)-1;
57 if (lowcase_table == NULL) {
58 /* try also under codepages for testing purposes */
59 lowcase_table = map_file("codepages/lowcase.dat", 0x20000);
60 if (lowcase_table == NULL) {
61 lowcase_table = (void *)-1;
67 Convert a codepoint_t to upper case.
69 codepoint_t toupper_w(codepoint_t val)
74 if (upcase_table == NULL) {
77 if (upcase_table == (void *)-1) {
80 if (val & 0xFFFF0000) {
83 return SVAL(upcase_table, val*2);
87 Convert a codepoint_t to lower case.
89 codepoint_t tolower_w(codepoint_t val)
94 if (lowcase_table == NULL) {
97 if (lowcase_table == (void *)-1) {
100 if (val & 0xFFFF0000) {
103 return SVAL(lowcase_table, val*2);
107 compare two codepoints case insensitively
109 int codepoint_cmpi(codepoint_t c1, codepoint_t c2)
112 toupper_w(c1) == toupper_w(c2)) {
119 Case insensitive string compararison
121 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
123 codepoint_t c1=0, c2=0;
127 c1 = next_codepoint(s1, &size1);
128 c2 = next_codepoint(s2, &size2);
137 if (c1 == INVALID_CODEPOINT ||
138 c2 == INVALID_CODEPOINT) {
139 /* what else can we do?? */
140 return strcasecmp(s1, s2);
143 if (toupper_w(c1) != toupper_w(c2)) {
152 * Get the next token from a string, return False if none found.
153 * Handles double-quotes.
155 * Based on a routine by GJC@VILLAGE.COM.
156 * Extensively modified by Andrew.Tridgell@anu.edu.au
158 _PUBLIC_ BOOL next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
169 /* default to simple separators */
173 /* find the first non sep char */
174 while (*s && strchr_m(sep,*s))
181 /* copy over the token */
182 for (quoted = False; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
191 *ptr = (*s) ? s+1 : s;
198 Case insensitive string compararison, length limited
200 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
202 codepoint_t c1=0, c2=0;
205 while (*s1 && *s2 && n) {
208 c1 = next_codepoint(s1, &size1);
209 c2 = next_codepoint(s2, &size2);
218 if (c1 == INVALID_CODEPOINT ||
219 c2 == INVALID_CODEPOINT) {
220 /* what else can we do?? */
221 return strcasecmp(s1, s2);
224 if (toupper_w(c1) != toupper_w(c2)) {
239 * @note The comparison is case-insensitive.
241 _PUBLIC_ BOOL strequal_w(const char *s1, const char *s2)
248 return strcasecmp_m(s1,s2) == 0;
252 Compare 2 strings (case sensitive).
254 _PUBLIC_ BOOL strcsequal_w(const char *s1,const char *s2)
261 return strcmp(s1,s2) == 0;
267 NOTE: oldc and newc must be 7 bit characters
269 _PUBLIC_ void string_replace_w(char *s, char oldc, char newc)
271 for (; s && *s; s++) {
273 codepoint_t c = next_codepoint(s, &size);
282 Paranoid strcpy into a buffer of given length (includes terminating
283 zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
284 and replaces with '_'. Deliberately does *NOT* check for multibyte
285 characters. Don't change it !
288 _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
292 if (maxlength == 0) {
293 /* can't fit any bytes at all! */
298 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
308 if (len >= maxlength)
311 if (!other_safe_chars)
312 other_safe_chars = "";
314 for(i = 0; i < len; i++) {
315 int val = (src[i] & 0xff);
316 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
328 Count the number of UCS2 characters in a string. Normally this will
329 be the same as the number of bytes in a string for single byte strings,
330 but will be different for multibyte.
332 _PUBLIC_ size_t strlen_m(const char *s)
340 while (*s && !(((uint8_t)*s) & 0x80)) {
351 codepoint_t c = next_codepoint(s, &c_size);
364 Work out the number of multibyte chars in a string, including the NULL
367 _PUBLIC_ size_t strlen_m_term(const char *s)
373 return strlen_m(s) + 1;
377 Strchr and strrchr_m are a bit complex on general multi-byte strings.
379 _PUBLIC_ char *strchr_m(const char *s, char c)
381 /* characters below 0x3F are guaranteed to not appear in
382 non-initial position in multi-byte charsets */
383 if ((c & 0xC0) == 0) {
389 codepoint_t c2 = next_codepoint(s, &size);
391 return discard_const(s);
400 * Multibyte-character version of strrchr
402 _PUBLIC_ char *strrchr_m(const char *s, char c)
406 /* characters below 0x3F are guaranteed to not appear in
407 non-initial position in multi-byte charsets */
408 if ((c & 0xC0) == 0) {
409 return strrchr(s, c);
414 codepoint_t c2 = next_codepoint(s, &size);
416 ret = discard_const(s);
425 return True if any (multi-byte) character is lower case
427 _PUBLIC_ BOOL strhaslower(const char *string)
434 s = next_codepoint(string, &c_size);
440 return True; /* that means it has lower case chars */
448 return True if any (multi-byte) character is upper case
450 _PUBLIC_ BOOL strhasupper(const char *string)
457 s = next_codepoint(string, &c_size);
463 return True; /* that means it has upper case chars */
471 Convert a string to lower case, allocated with talloc
473 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
478 /* this takes advantage of the fact that upper/lower can't
479 change the length of a character by more than 1 byte */
480 dest = talloc_size(ctx, 2*(strlen(src))+1);
487 codepoint_t c = next_codepoint(src, &c_size);
492 c_size = push_codepoint(dest+size, c);
506 Convert a string to UPPER case, allocated with talloc
508 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
517 /* this takes advantage of the fact that upper/lower can't
518 change the length of a character by more than 1 byte */
519 dest = talloc_size(ctx, 2*(strlen(src))+1);
526 codepoint_t c = next_codepoint(src, &c_size);
531 c_size = push_codepoint(dest+size, c);
545 Convert a string to lower case.
547 _PUBLIC_ void strlower_m(char *s)
551 /* this is quite a common operation, so we want it to be
552 fast. We optimise for the ascii case, knowing that all our
553 supported multi-byte character sets are ascii-compatible
554 (ie. they match for the first 128 chars) */
555 while (*s && !(((uint8_t)*s) & 0x80)) {
556 *s = tolower((uint8_t)*s);
566 size_t c_size, c_size2;
567 codepoint_t c = next_codepoint(s, &c_size);
568 c_size2 = push_codepoint(d, tolower_w(c));
569 if (c_size2 > c_size) {
570 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
571 c, tolower_w(c), (int)c_size, (int)c_size2));
572 smb_panic("codepoint expansion in strlower_m\n");
581 Convert a string to UPPER case.
583 _PUBLIC_ void strupper_m(char *s)
587 /* this is quite a common operation, so we want it to be
588 fast. We optimise for the ascii case, knowing that all our
589 supported multi-byte character sets are ascii-compatible
590 (ie. they match for the first 128 chars) */
591 while (*s && !(((uint8_t)*s) & 0x80)) {
592 *s = toupper((uint8_t)*s);
602 size_t c_size, c_size2;
603 codepoint_t c = next_codepoint(s, &c_size);
604 c_size2 = push_codepoint(d, toupper_w(c));
605 if (c_size2 > c_size) {
606 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
607 c, toupper_w(c), (int)c_size, (int)c_size2));
608 smb_panic("codepoint expansion in strupper_m\n");
618 Find the number of 'c' chars in a string
620 _PUBLIC_ size_t count_chars_w(const char *s, char c)
626 codepoint_t c2 = next_codepoint(s, &size);
627 if (c2 == c) count++;