2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "system/locale.h"
25 * Get the next token from a string, return False if none found.
26 * Handles double-quotes.
28 * Based on a routine by GJC@VILLAGE.COM.
29 * Extensively modified by Andrew.Tridgell@anu.edu.au
31 _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
42 /* default to simple separators */
46 /* find the first non sep char */
47 while (*s && strchr_m(sep,*s))
54 /* copy over the token */
55 for (quoted = false; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
64 *ptr = (*s) ? s+1 : s;
73 NOTE: oldc and newc must be 7 bit characters
75 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
77 struct smb_iconv_handle *ic = get_iconv_handle();
80 codepoint_t c = next_codepoint_handle(ic, s, &size);
89 Paranoid strcpy into a buffer of given length (includes terminating
90 zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
91 and replaces with '_'. Deliberately does *NOT* check for multibyte
92 characters. Don't change it !
95 _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
100 /* can't fit any bytes at all! */
105 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
115 if (len >= maxlength)
118 if (!other_safe_chars)
119 other_safe_chars = "";
121 for(i = 0; i < len; i++) {
122 int val = (src[i] & 0xff);
123 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
135 Convert a string to lower case, allocated with talloc
137 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
141 struct smb_iconv_handle *iconv_handle = get_iconv_handle();
147 /* this takes advantage of the fact that upper/lower can't
148 change the length of a character by more than 1 byte */
149 dest = talloc_array(ctx, char, 2*(strlen(src))+1);
156 codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
161 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
171 /* trim it so talloc_append_string() works */
172 dest = talloc_realloc(ctx, dest, char, size+1);
174 talloc_set_name_const(dest, dest);
180 Convert a string to UPPER case, allocated with talloc
181 source length limited to n bytes
183 _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
187 struct smb_iconv_handle *iconv_handle = get_iconv_handle();
193 /* this takes advantage of the fact that upper/lower can't
194 change the length of a character by more than 1 byte */
195 dest = talloc_array(ctx, char, 2*(n+1));
200 while (n-- && *src) {
202 codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
207 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
217 /* trim it so talloc_append_string() works */
218 dest = talloc_realloc(ctx, dest, char, size+1);
220 talloc_set_name_const(dest, dest);
226 Convert a string to UPPER case, allocated with talloc
228 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
230 return strupper_talloc_n(ctx, src, src?strlen(src):0);
234 talloc_strdup() a unix string to upper case.
236 _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
238 return strupper_talloc(ctx, src);
242 Convert a string to lower case.
244 _PUBLIC_ void strlower_m(char *s)
247 struct smb_iconv_handle *iconv_handle;
249 /* this is quite a common operation, so we want it to be
250 fast. We optimise for the ascii case, knowing that all our
251 supported multi-byte character sets are ascii-compatible
252 (ie. they match for the first 128 chars) */
253 while (*s && !(((uint8_t)*s) & 0x80)) {
254 *s = tolower((uint8_t)*s);
261 iconv_handle = get_iconv_handle();
266 size_t c_size, c_size2;
267 codepoint_t c = next_codepoint_handle(iconv_handle, s, &c_size);
268 c_size2 = push_codepoint_handle(iconv_handle, d, tolower_m(c));
269 if (c_size2 > c_size) {
270 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
271 c, tolower_m(c), (int)c_size, (int)c_size2));
272 smb_panic("codepoint expansion in strlower_m\n");
281 Convert a string to UPPER case.
283 _PUBLIC_ void strupper_m(char *s)
286 struct smb_iconv_handle *iconv_handle;
288 /* this is quite a common operation, so we want it to be
289 fast. We optimise for the ascii case, knowing that all our
290 supported multi-byte character sets are ascii-compatible
291 (ie. they match for the first 128 chars) */
292 while (*s && !(((uint8_t)*s) & 0x80)) {
293 *s = toupper((uint8_t)*s);
300 iconv_handle = get_iconv_handle();
305 size_t c_size, c_size2;
306 codepoint_t c = next_codepoint_handle(iconv_handle, s, &c_size);
307 c_size2 = push_codepoint_handle(iconv_handle, d, toupper_m(c));
308 if (c_size2 > c_size) {
309 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
310 c, toupper_m(c), (int)c_size, (int)c_size2));
311 smb_panic("codepoint expansion in strupper_m\n");
321 Find the number of 'c' chars in a string
323 _PUBLIC_ size_t count_chars_m(const char *s, char c)
325 struct smb_iconv_handle *ic = get_iconv_handle();
330 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
331 if (c2 == c) count++;
340 * Copy a string from a char* unix src to a dos codepage string destination.
342 * @return the number of bytes occupied by the string in the destination.
344 * @param flags can include
346 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
347 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
350 * @param dest_len the maximum length in bytes allowed in the
351 * destination. If @p dest_len is -1 then no maximum is used.
353 static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
358 if (flags & STR_UPPER) {
359 char *tmpbuf = strupper_talloc(NULL, src);
360 if (tmpbuf == NULL) {
363 ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
368 src_len = strlen(src);
370 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
373 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
377 * Copy a string from a unix char* src to an ASCII destination,
378 * allocating a buffer using talloc().
380 * @param dest always set at least to NULL
382 * @returns The number of bytes occupied by the string in the destination
383 * or -1 in case of error.
385 _PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
387 size_t src_len = strlen(src)+1;
389 return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size);
394 * Copy a string from a dos codepage source to a unix char* destination.
396 * The resulting string in "dest" is always null terminated.
398 * @param flags can have:
400 * <dt>STR_TERMINATE</dt>
401 * <dd>STR_TERMINATE means the string in @p src
402 * is null terminated, and src_len is ignored.</dd>
405 * @param src_len is the length of the source area in bytes.
406 * @returns the number of bytes occupied by the string in @p src.
408 static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
412 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
413 if (src_len == (size_t)-1) {
414 src_len = strlen((const char *)src) + 1;
416 size_t len = strnlen((const char *)src, src_len);
423 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len);
426 dest[MIN(ret, dest_len-1)] = 0;
432 * Copy a string from a char* src to a unicode destination.
434 * @returns the number of bytes occupied by the string in the destination.
436 * @param flags can have:
439 * <dt>STR_TERMINATE <dd>means include the null termination.
440 * <dt>STR_UPPER <dd>means uppercase in the destination.
441 * <dt>STR_NOALIGN <dd>means don't do alignment.
444 * @param dest_len is the maximum length allowed in the
445 * destination. If dest_len is -1 then no maxiumum is used.
447 static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
450 size_t src_len = strlen(src);
453 if (flags & STR_UPPER) {
454 char *tmpbuf = strupper_talloc(NULL, src);
455 if (tmpbuf == NULL) {
458 ret = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
463 if (flags & STR_TERMINATE)
466 if (ucs2_align(NULL, dest, flags)) {
468 dest = (void *)((char *)dest + 1);
469 if (dest_len) dest_len--;
473 /* ucs2 is always a multiple of 2 bytes */
476 ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len);
477 if (ret == (size_t)-1) {
488 * Copy a string from a unix char* src to a UCS2 destination,
489 * allocating a buffer using talloc().
491 * @param dest always set at least to NULL
493 * @returns The number of bytes occupied by the string in the destination
494 * or -1 in case of error.
496 _PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size)
498 size_t src_len = strlen(src)+1;
500 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size);
505 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
507 * @param dest always set at least to NULL
509 * @returns The number of bytes occupied by the string in the destination
512 _PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
514 size_t src_len = strlen(src)+1;
516 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size);
520 Copy a string from a ucs2 source to a unix char* destination.
522 STR_TERMINATE means the string in src is null terminated.
523 STR_NOALIGN means don't try to align.
524 if STR_TERMINATE is set then src_len is ignored if it is -1.
525 src_len is the length of the source area in bytes
526 Return the number of bytes occupied by the string in src.
527 The resulting string in "dest" is always null terminated.
530 static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
534 if (ucs2_align(NULL, src, flags)) {
535 src = (const void *)((const char *)src + 1);
540 if (flags & STR_TERMINATE) {
541 if (src_len == (size_t)-1) {
542 src_len = utf16_len(src);
544 src_len = utf16_len_n(src, src_len);
548 /* ucs2 is always a multiple of 2 bytes */
549 if (src_len != (size_t)-1)
552 ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len);
554 dest[MIN(ret, dest_len-1)] = 0;
560 * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc
562 * @param dest always set at least to NULL
564 * @returns The number of bytes occupied by the string in the destination
567 _PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
569 size_t src_len = strlen(src)+1;
571 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size);
575 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
577 * @param dest always set at least to NULL
579 * @returns The number of bytes occupied by the string in the destination
582 _PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size)
584 size_t src_len = utf16_len(src);
586 return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size);
590 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
592 * @param dest always set at least to NULL
594 * @returns The number of bytes occupied by the string in the destination
597 _PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
599 size_t src_len = strlen(src)+1;
601 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size);
605 Copy a string from a char* src to a unicode or ascii
606 dos codepage destination choosing unicode or ascii based on the
607 flags in the SMB buffer starting at base_ptr.
608 Return the number of bytes occupied by the string in the destination.
610 STR_TERMINATE means include the null termination.
611 STR_UPPER means uppercase in the destination.
612 STR_ASCII use ascii even with unicode packet.
613 STR_NOALIGN means don't do alignment.
614 dest_len is the maximum length allowed in the destination. If dest_len
615 is -1 then no maxiumum is used.
618 _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
620 if (flags & STR_ASCII) {
621 return push_ascii(dest, src, dest_len, flags);
622 } else if (flags & STR_UNICODE) {
623 return push_ucs2(dest, src, dest_len, flags);
625 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
632 Copy a string from a unicode or ascii source (depending on
633 the packet flags) to a char* destination.
635 STR_TERMINATE means the string in src is null terminated.
636 STR_UNICODE means to force as unicode.
637 STR_ASCII use ascii even with unicode packet.
638 STR_NOALIGN means don't do alignment.
639 if STR_TERMINATE is set then src_len is ignored is it is -1
640 src_len is the length of the source area in bytes.
641 Return the number of bytes occupied by the string in src.
642 The resulting string in "dest" is always null terminated.
645 _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
647 if (flags & STR_ASCII) {
648 return pull_ascii(dest, src, dest_len, src_len, flags);
649 } else if (flags & STR_UNICODE) {
650 return pull_ucs2(dest, src, dest_len, src_len, flags);
652 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
659 * Convert string from one encoding to another, making error checking etc
661 * @param src pointer to source string (multibyte or singlebyte)
662 * @param srclen length of the source string in bytes
663 * @param dest pointer to destination string (multibyte or singlebyte)
664 * @param destlen maximal length allowed for string
665 * @returns the number of bytes occupied in the destination
667 _PUBLIC_ size_t convert_string(charset_t from, charset_t to,
668 void const *src, size_t srclen,
669 void *dest, size_t destlen)
672 if (!convert_string_handle(get_iconv_handle(), from, to,
674 dest, destlen, &ret))
680 * Convert between character sets, allocating a new buffer using talloc for the result.
682 * @param srclen length of source buffer.
683 * @param dest always set at least to NULL
684 * @param converted_size Size in bytes of the converted string
685 * @note -1 is not accepted for srclen.
687 * @returns boolean indication whether the conversion succeeded
690 _PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx,
691 charset_t from, charset_t to,
692 void const *src, size_t srclen,
693 void *dest, size_t *converted_size)
695 return convert_string_talloc_handle(ctx, get_iconv_handle(),
696 from, to, src, srclen, dest,