2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
6 Copyright (C) Andrew Bartlett 2011
7 Copyright (C) Jeremy Allison 1992-2007
8 Copyright (C) Martin Pool 2003
9 Copyright (C) James Peach 2006
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 #include "system/locale.h"
33 Case insensitive string compararison, handle specified for testing
35 _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
36 const char *s1, const char *s2)
38 codepoint_t c1=0, c2=0;
41 /* handle null ptr comparisons to simplify the use in qsort */
42 if (s1 == s2) return 0;
43 if (s1 == NULL) return -1;
44 if (s2 == NULL) return 1;
47 c1 = next_codepoint_handle(iconv_handle, s1, &size1);
48 c2 = next_codepoint_handle(iconv_handle, s2, &size2);
50 if (c1 == INVALID_CODEPOINT ||
51 c2 == INVALID_CODEPOINT) {
52 return strcasecmp(s1, s2);
62 if (toupper_m(c1) != toupper_m(c2)) {
71 Case insensitive string compararison
73 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
75 struct smb_iconv_handle *iconv_handle = get_iconv_handle();
76 return strcasecmp_m_handle(iconv_handle, s1, s2);
80 Case insensitive string compararison, length limited, handle specified for testing
82 _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
83 const char *s1, const char *s2, size_t n)
85 codepoint_t c1=0, c2=0;
88 /* handle null ptr comparisons to simplify the use in qsort */
89 if (s1 == s2) return 0;
90 if (s1 == NULL) return -1;
91 if (s2 == NULL) return 1;
93 while (*s1 && *s2 && n) {
96 c1 = next_codepoint_handle(iconv_handle, s1, &size1);
97 c2 = next_codepoint_handle(iconv_handle, s2, &size2);
99 if (c1 == INVALID_CODEPOINT ||
100 c2 == INVALID_CODEPOINT) {
102 * n was specified in characters,
103 * now we must convert it to bytes.
104 * As bytes are the smallest
105 * character unit, the following
106 * increment and strncasecmp is always
109 * The source string was already known
110 * to be n characters long, so we are
111 * guaranteed to be able to look at the
112 * (n remaining + size1) bytes from the
116 return strncasecmp(s1, s2, n);
126 if (toupper_m(c1) != toupper_m(c2)) {
139 Case insensitive string compararison, length limited
141 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
143 struct smb_iconv_handle *iconv_handle = get_iconv_handle();
144 return strncasecmp_m_handle(iconv_handle, s1, s2, n);
150 * @note The comparison is case-insensitive.
152 _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
154 return strcasecmp_m(s1,s2) == 0;
158 Compare 2 strings (case sensitive).
160 _PUBLIC_ bool strcsequal(const char *s1,const char *s2)
167 return strcmp(s1,s2) == 0;
171 * Calculate the number of units (8 or 16-bit, depending on the
172 * destination charset), that would be needed to convert the input
173 * string which is expected to be in in src_charset encoding to the
174 * destination charset (which should be a unicode charset).
176 _PUBLIC_ size_t strlen_m_ext_handle(struct smb_iconv_handle *ic,
177 const char *s, charset_t src_charset, charset_t dst_charset)
182 switch (dst_charset) {
185 smb_panic("cannot call strlen_m_ext() with a variable dest charset (must be UTF16* or UTF8)");
190 switch (src_charset) {
193 smb_panic("cannot call strlen_m_ext() with a UTF16 src charset (must be DOS, UNIX, DISPLAY or UTF8)");
202 while (*s && !(((uint8_t)*s) & 0x80)) {
213 codepoint_t c = next_codepoint_handle_ext(ic, s, strnlen(s, 5),
214 src_charset, &c_size);
217 switch (dst_charset) {
222 /* Unicode char fits into 16 bits. */
225 /* Double-width unicode char - 32 bits. */
231 * this only checks ranges, and does not
232 * check for invalid codepoints
236 } else if (c < 0x800) {
238 } else if (c < 0x10000) {
246 * non-unicode encoding:
247 * assume that each codepoint fits into
248 * one unit in the destination encoding.
258 * Calculate the number of units (8 or 16-bit, depending on the
259 * destination charset), that would be needed to convert the input
260 * string which is expected to be in in src_charset encoding to the
261 * destination charset (which should be a unicode charset).
263 _PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
265 struct smb_iconv_handle *ic = get_iconv_handle();
266 return strlen_m_ext_handle(ic, s, src_charset, dst_charset);
269 _PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
270 const charset_t dst_charset)
275 return strlen_m_ext(s, src_charset, dst_charset) + 1;
279 * Calculate the number of 16-bit units that would be needed to convert
280 * the input string which is expected to be in CH_UNIX encoding to UTF16.
282 * This will be the same as the number of bytes in a string for single
283 * byte strings, but will be different for multibyte.
285 _PUBLIC_ size_t strlen_m(const char *s)
287 return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
291 Work out the number of multibyte chars in a string, including the NULL
294 _PUBLIC_ size_t strlen_m_term(const char *s)
300 return strlen_m(s) + 1;
304 * Weird helper routine for the winreg pipe: If nothing is around, return 0,
305 * if a string is there, include the terminator.
308 _PUBLIC_ size_t strlen_m_term_null(const char *s)
323 Strchr and strrchr_m are a bit complex on general multi-byte strings.
325 _PUBLIC_ char *strchr_m(const char *src, char c)
328 struct smb_iconv_handle *ic = get_iconv_handle();
332 /* characters below 0x3F are guaranteed to not appear in
333 non-initial position in multi-byte charsets */
334 if ((c & 0xC0) == 0) {
335 return strchr(src, c);
338 /* this is quite a common operation, so we want it to be
339 fast. We optimise for the ascii case, knowing that all our
340 supported multi-byte character sets are ascii-compatible
341 (ie. they match for the first 128 chars) */
343 for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
345 return discard_const_p(char, s);
351 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
352 /* With compose characters we must restart from the beginning. JRA. */
358 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
360 return discard_const_p(char, s);
369 * Multibyte-character version of strrchr
371 _PUBLIC_ char *strrchr_m(const char *s, char c)
373 struct smb_iconv_handle *ic = get_iconv_handle();
380 /* characters below 0x3F are guaranteed to not appear in
381 non-initial position in multi-byte charsets */
382 if ((c & 0xC0) == 0) {
383 return strrchr(s, c);
386 /* this is quite a common operation, so we want it to be
387 fast. We optimise for the ascii case, knowing that all our
388 supported multi-byte character sets are ascii-compatible
389 (ie. they match for the first 128 chars). Also, in Samba
390 we only search for ascii characters in 'c' and that
391 in all mb character sets with a compound character
392 containing c, if 'c' is not a match at position
393 p, then p[-1] > 0x7f. JRA. */
396 size_t len = strlen(s);
405 /* Could be a match. Part of a multibyte ? */
407 (((unsigned char)cp[-1]) & 0x80)) {
408 /* Yep - go slow :-( */
412 /* No - we have a match ! */
413 return discard_const_p(char , cp);
422 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
424 ret = discard_const_p(char, s);
433 return True if any (multi-byte) character is lower case
435 _PUBLIC_ bool strhaslower_handle(struct smb_iconv_handle *ic,
443 s = next_codepoint_handle(ic, string, &c_size);
449 return true; /* that means it has lower case chars */
456 _PUBLIC_ bool strhaslower(const char *string)
458 struct smb_iconv_handle *ic = get_iconv_handle();
459 return strhaslower_handle(ic, string);
463 return True if any (multi-byte) character is upper case
465 _PUBLIC_ bool strhasupper_handle(struct smb_iconv_handle *ic,
473 s = next_codepoint_handle(ic, string, &c_size);
479 return true; /* that means it has upper case chars */
486 _PUBLIC_ bool strhasupper(const char *string)
488 struct smb_iconv_handle *ic = get_iconv_handle();
489 return strhasupper_handle(ic, string);
492 /***********************************************************************
493 strstr_m - We convert via ucs2 for now.
494 ***********************************************************************/
496 char *strstr_m(const char *src, const char *findstr)
499 smb_ucs2_t *src_w, *find_w;
503 size_t converted_size, findstr_len = 0;
505 TALLOC_CTX *frame; /* Only set up in the iconv case */
507 /* for correctness */
509 return discard_const_p(char, src);
512 /* Samba does single character findstr calls a *lot*. */
513 if (findstr[1] == '\0')
514 return strchr_m(src, *findstr);
516 /* We optimise for the ascii case, knowing that all our
517 supported multi-byte character sets are ascii-compatible
518 (ie. they match for the first 128 chars) */
520 for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
521 if (*s == *findstr) {
523 findstr_len = strlen(findstr);
525 if (strncmp(s, findstr, findstr_len) == 0) {
526 return discard_const_p(char, s);
534 #if 1 /* def BROKEN_UNICODE_COMPOSE_CHARACTERS */
535 /* 'make check' fails unless we do this */
537 /* With compose characters we must restart from the beginning. JRA. */
541 frame = talloc_stackframe();
543 if (!push_ucs2_talloc(frame, &src_w, src, &converted_size)) {
544 DEBUG(0,("strstr_m: src malloc fail\n"));
549 if (!push_ucs2_talloc(frame, &find_w, findstr, &converted_size)) {
550 DEBUG(0,("strstr_m: find malloc fail\n"));
555 p = strstr_w(src_w, find_w);
563 if (!pull_ucs2_talloc(frame, &s2, src_w, &converted_size)) {
565 DEBUG(0,("strstr_m: dest malloc fail\n"));
568 retp = discard_const_p(char, (s+strlen(s2)));