2 * String utility routines
6 * Wireshark - Network traffic analyzer
7 * By Gerald Combs <gerald@wireshark.org>
8 * Copyright 1998 Gerald Combs
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
42 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
43 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
46 * Given a pointer into a data buffer, and to the end of the buffer,
47 * find the end of the (putative) line at that position in the data
49 * Return a pointer to the EOL character(s) in "*eol".
52 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
54 const guchar *lineend;
56 lineend = memchr(data, '\n', dataend - data);
57 if (lineend == NULL) {
59 * No LF - line is probably continued in next TCP segment.
65 * Is the LF at the beginning of the line?
69 * No - is it preceded by a carriage return?
70 * (Perhaps it's supposed to be, but that's not guaranteed....)
72 if (*(lineend - 1) == '\r') {
74 * Yes. The EOL starts with the CR.
79 * No. The EOL starts with the LF.
84 * I seem to remember that we once saw lines ending with LF-CR
85 * in an HTTP request or response, so check if it's *followed*
86 * by a carriage return.
88 if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
90 * It's <non-LF><LF><CR>; say it ends with the CR.
97 * Yes - the EOL starts with the LF.
103 * Point to the character after the last character.
111 * Get the length of the next token in a line, and the beginning of the
112 * next token after that (if any).
113 * Return 0 if there is no next token.
116 get_token_len(const guchar *linep, const guchar *lineend,
117 const guchar **next_token)
119 const guchar *tokenp;
125 * Search for a blank, a CR or an LF, or the end of the buffer.
127 while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
129 token_len = (int) (linep - tokenp);
132 * Skip trailing blanks.
134 while (linep < lineend && *linep == ' ')
143 #define INITIAL_FMTBUF_SIZE 128
146 * XXX - "isprint()" can return "true" for non-ASCII characters, but
147 * those don't work with GTK+ 1.3 or later, as they take UTF-8 strings
148 * as input. Until we fix up Wireshark to properly handle non-ASCII
149 * characters in all output (both GUI displays and text printouts)
150 * in those versions of GTK+, we work around the problem by escaping
151 * all characters that aren't printable ASCII.
153 * We don't know what version of GTK+ we're using, as epan doesn't
154 * use any GTK+ stuff; we use GLib as a proxy for that, with GLib 2.x
155 * implying GTK+ 1.3 or later (we don't support GLib 1.3[.x]).
158 #define isprint(c) (c >= 0x20 && c < 0x7f)
161 * Given a string, generate a string from it that shows non-printable
162 * characters as C-style escapes, and return a pointer to it.
165 format_text(const guchar *string, size_t len)
167 static gchar *fmtbuf[3];
168 static int fmtbuf_len[3];
171 const guchar *stringend = string + len;
178 * Allocate the buffer if it's not already allocated.
180 if (fmtbuf[idx] == NULL) {
181 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
182 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
185 while (string < stringend) {
187 * Is there enough room for this character, if it expands to
188 * a backslash plus 3 octal digits (which is the most it can
189 * expand to), and also enough room for a terminating '\0'?
191 if (column+3+1 >= fmtbuf_len[idx]) {
193 * Double the buffer's size if it's not big enough.
194 * The size of the buffer starts at 128, so doubling its size
195 * adds at least another 128 bytes, which is more than enough
196 * for one more character plus a terminating '\0'.
198 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
199 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
204 fmtbuf[idx][column] = c;
207 fmtbuf[idx][column] = '\\';
212 fmtbuf[idx][column] = 'a';
217 fmtbuf[idx][column] = 'b'; /* BS */
222 fmtbuf[idx][column] = 'f'; /* FF */
227 fmtbuf[idx][column] = 'n'; /* NL */
232 fmtbuf[idx][column] = 'r'; /* CR */
237 fmtbuf[idx][column] = 't'; /* tab */
242 fmtbuf[idx][column] = 'v';
248 fmtbuf[idx][column] = i + '0';
251 fmtbuf[idx][column] = i + '0';
254 fmtbuf[idx][column] = i + '0';
260 fmtbuf[idx][column] = '\0';
265 * Given a string, generate a string from it that shows non-printable
266 * characters as C-style escapes except a whitespace character
267 * (space, tab, carriage return, new line, vertical tab, or formfeed)
268 * which will be replaced by a space, and return a pointer to it.
271 format_text_wsp(const guchar *string, size_t len)
273 static gchar *fmtbuf[3];
274 static int fmtbuf_len[3];
277 const guchar *stringend = string + len;
284 * Allocate the buffer if it's not already allocated.
286 if (fmtbuf[idx] == NULL) {
287 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
288 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
291 while (string < stringend) {
293 * Is there enough room for this character, if it expands to
294 * a backslash plus 3 octal digits (which is the most it can
295 * expand to), and also enough room for a terminating '\0'?
297 if (column+3+1 >= fmtbuf_len[idx]) {
299 * Double the buffer's size if it's not big enough.
300 * The size of the buffer starts at 128, so doubling its size
301 * adds at least another 128 bytes, which is more than enough
302 * for one more character plus a terminating '\0'.
304 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
305 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
310 fmtbuf[idx][column] = c;
312 } else if (isspace(c)) {
313 fmtbuf[idx][column] = ' ';
316 fmtbuf[idx][column] = '\\';
321 fmtbuf[idx][column] = 'a';
326 fmtbuf[idx][column] = 'b'; /* BS */
331 fmtbuf[idx][column] = 'f'; /* FF */
336 fmtbuf[idx][column] = 'n'; /* NL */
341 fmtbuf[idx][column] = 'r'; /* CR */
346 fmtbuf[idx][column] = 't'; /* tab */
351 fmtbuf[idx][column] = 'v';
357 fmtbuf[idx][column] = i + '0';
360 fmtbuf[idx][column] = i + '0';
363 fmtbuf[idx][column] = i + '0';
369 fmtbuf[idx][column] = '\0';
374 is_byte_sep(guint8 c)
376 return (c == '-' || c == ':' || c == '.');
379 /* Turn a string of hex digits with optional separators (defined by
380 * is_byte_sep() into a byte array.
383 hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators) {
385 const guchar *p, *q, *r, *s, *punct;
386 char four_digits_first_half[3];
387 char four_digits_second_half[3];
391 if (! hex_str || ! bytes) {
394 g_byte_array_set_size(bytes, 0);
395 p = (const guchar *)hex_str;
402 && isxdigit(*p) && isxdigit(*q) &&
403 isxdigit(*r) && isxdigit(*s)) {
404 four_digits_first_half[0] = *p;
405 four_digits_first_half[1] = *q;
406 four_digits_first_half[2] = '\0';
407 four_digits_second_half[0] = *r;
408 four_digits_second_half[1] = *s;
409 four_digits_second_half[2] = '\0';
412 * Four or more hex digits in a row.
414 val = (guint8) strtoul(four_digits_first_half, NULL, 16);
415 g_byte_array_append(bytes, &val, 1);
416 val = (guint8) strtoul(four_digits_second_half, NULL, 16);
417 g_byte_array_append(bytes, &val, 1);
422 * Make sure the character after
423 * the forth hex digit is a byte
424 * separator, i.e. that we don't have
425 * more than four hex digits, or a
428 if (is_byte_sep(*punct)) {
432 else if (force_separators) {
441 else if (*q && isxdigit(*p) && isxdigit(*q)) {
444 two_digits[2] = '\0';
447 * Two hex digits in a row.
449 val = (guint8) strtoul(two_digits, NULL, 16);
450 g_byte_array_append(bytes, &val, 1);
454 * Make sure the character after
455 * the second hex digit is a byte
456 * separator, i.e. that we don't have
457 * more than two hex digits, or a
460 if (is_byte_sep(*punct)) {
464 else if (force_separators) {
472 else if (*q && isxdigit(*p) && is_byte_sep(*q)) {
477 * Only one hex digit (not at the end of the string)
479 val = (guint8) strtoul(one_digit, NULL, 16);
480 g_byte_array_append(bytes, &val, 1);
484 else if (!*q && isxdigit(*p)) {
489 * Only one hex digit (at the end of the string)
491 val = (guint8) strtoul(one_digit, NULL, 16);
492 g_byte_array_append(bytes, &val, 1);
504 * Turn an RFC 3986 percent-encoded string into a byte array.
505 * XXX - We don't check for reserved characters.
507 #define HEX_DIGIT_BUF_LEN 3
509 uri_str_to_bytes(const char *uri_str, GByteArray *bytes) {
512 guchar hex_digit[HEX_DIGIT_BUF_LEN];
514 g_byte_array_set_size(bytes, 0);
519 p = (const guchar *)uri_str;
522 if (! isascii(*p) || ! isprint(*p))
526 if (*p == '\0') return FALSE;
529 if (*p == '\0') return FALSE;
532 if (! isxdigit(hex_digit[0]) || ! isxdigit(hex_digit[1]))
534 val = (guint8) strtoul((char *)hex_digit, NULL, 16);
535 g_byte_array_append(bytes, &val, 1);
537 g_byte_array_append(bytes, (guint8 *) p, 1);
546 * Given a GByteArray, generate a string from it that shows non-printable
547 * characters as percent-style escapes, and return a pointer to it.
550 format_uri(const GByteArray *bytes, const gchar *reserved_chars)
552 static gchar *fmtbuf[3];
553 static guint fmtbuf_len[3];
555 const gchar *reserved_def = ":/?#[]@!$&'()*+,;= ";
556 const gchar *reserved = reserved_def;
559 gboolean is_reserved = FALSE;
566 reserved = reserved_chars;
569 * Allocate the buffer if it's not already allocated.
571 if (fmtbuf[idx] == NULL) {
572 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
573 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
575 for (column = 0; column < bytes->len; column++) {
577 * Is there enough room for this character, if it expands to
578 * a percent plus 2 hex digits (which is the most it can
579 * expand to), and also enough room for a terminating '\0'?
581 if (column+2+1 >= fmtbuf_len[idx]) {
583 * Double the buffer's size if it's not big enough.
584 * The size of the buffer starts at 128, so doubling its size
585 * adds at least another 128 bytes, which is more than enough
586 * for one more character plus a terminating '\0'.
588 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
589 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
591 c = bytes->data[column];
593 if (!isascii(c) || !isprint(c) || c == '%') {
597 for (i = 0; i < strlen(reserved); i++) {
598 if (c == reserved[i])
603 fmtbuf[idx][column] = c;
605 fmtbuf[idx][column] = '%';
607 fmtbuf[idx][column] = hex[c >> 4];
609 fmtbuf[idx][column] = hex[c & 0xF];
612 fmtbuf[idx][column] = '\0';
617 * Create a copy of a GByteArray
619 * @param ba The byte array to be copied.
620 * @return If ba exists, a freshly allocated copy. NULL otherwise.
624 byte_array_dup(GByteArray *ba) {
630 new_ba = g_byte_array_new();
631 g_byte_array_append(new_ba, ba->data, ba->len);
635 #define SUBID_BUF_LEN 5
637 oid_str_to_bytes(const char *oid_str, GByteArray *bytes) {
638 guint32 subid0, subid, sicnt, i;
640 guint8 buf[SUBID_BUF_LEN];
642 g_byte_array_set_size(bytes, 0);
648 if (!isdigit((guchar)*p) && (*p != '.')) return FALSE;
650 if (p == oid_str) return FALSE;
651 if (!*(p+1)) return FALSE;
652 if ((p-1) == dot) return FALSE;
657 if (!dot) return FALSE;
661 subid0 = 0; /* squelch GCC complaints */
664 while (isdigit((guchar)*p)) {
671 if (subid0 > 2) return FALSE;
672 } else if (sicnt == 1) {
673 if ((subid0 < 2) && (subid > 39)) return FALSE;
674 subid += 40 * subid0;
680 buf[i] = 0x80 | (subid % 0x80);
682 } while (subid && i);
683 buf[SUBID_BUF_LEN-1] &= 0x7F;
684 g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
694 * Compare the contents of two GByteArrays
696 * @param ba1 A byte array
697 * @param ba2 A byte array
698 * @return If both arrays are non-NULL and their lengths are equal and
699 * their contents are equal, returns TRUE. Otherwise, returns
702 * XXX - Should this be in strutil.c?
705 byte_array_equal(GByteArray *ba1, GByteArray *ba2) {
709 if (ba1->len != ba2->len)
712 if (memcmp(ba1->data, ba2->data, ba1->len) != 0)
719 /* Return a XML escaped representation of the unescaped string.
720 * The returned string must be freed when no longer in use. */
722 xml_escape(const gchar *unescaped)
724 GString *buffer = g_string_sized_new(128);
729 while ( (c = *p++) ) {
732 g_string_append(buffer, "<");
735 g_string_append(buffer, ">");
738 g_string_append(buffer, "&");
741 g_string_append(buffer, "'");
744 g_string_append(buffer, """);
747 g_string_append_c(buffer, c);
751 /* Return the string value contained within the GString
752 * after getting rid of the GString structure.
753 * This is the way to do this, see the GLib reference. */
754 return g_string_free(buffer, FALSE);
758 /* Return the first occurrence of needle in haystack.
759 * If not found, return NULL.
760 * If either haystack or needle has 0 length, return NULL.
761 * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
763 epan_memmem(const guint8 *haystack, guint haystack_len,
764 const guint8 *needle, guint needle_len)
767 const guint8 *const last_possible
768 = haystack + haystack_len - needle_len;
770 if (needle_len == 0) {
774 if (needle_len > haystack_len) {
778 for (begin = haystack ; begin <= last_possible; ++begin) {
779 if (begin[0] == needle[0] &&
780 !memcmp(&begin[1], needle + 1,
790 * Scan the search string to make sure it's valid hex. Return the
791 * number of bytes in nbytes.
794 convert_string_to_hex(const char *string, size_t *nbytes)
799 guint8 *bytes, *q, byte_val;
808 continue; /* allow white space */
809 if (c==':' || c=='.' || c=='-')
810 continue; /* skip any ':', '.', or '-' between bytes */
812 /* Not a valid hex digit - fail */
817 * We can only match bytes, not nibbles; we must have a valid
818 * hex digit immediately after that hex digit.
824 /* 2 hex digits = 1 byte */
829 * Were we given any hex digits?
837 * OK, it's valid, and it generates "n_bytes" bytes; generate the
840 bytes = g_malloc(n_bytes);
848 continue; /* allow white space */
849 if (c==':' || c=='.' || c=='-')
850 continue; /* skip any ':', '.', or '-' between bytes */
851 /* From the loop above, we know this is a hex digit */
855 byte_val = (c - 'a') + 10;
857 byte_val = (c - 'A') + 10;
860 /* We also know this is a hex digit */
865 byte_val |= (c - 'a') + 10;
867 byte_val |= (c - 'A') + 10;
876 * Copy if if it's a case-sensitive search; uppercase it if it's
877 * a case-insensitive search.
880 convert_string_case(const char *string, gboolean case_insensitive)
887 if (case_insensitive) {
888 out_string = g_malloc(strlen(string) + 1);
889 for (p = &string[0], q = &out_string[0]; (c = *p) != '\0'; p++, q++)
890 *q = toupper((unsigned char)*p);
893 out_string = g_strdup(string);
898 epan_strcasestr(const char *haystack, const char *needle)
900 gsize hlen = strlen(haystack);
901 gsize nlen = strlen(needle);
903 while (hlen-- >= nlen) {
904 if (!g_ascii_strncasecmp(haystack, needle, nlen))
905 return (char*) haystack;
912 string_or_null(const char *string)
920 escape_string_len(const char *string)
927 for (p = string; (c = *p) != '\0'; p++) {
928 /* Backslashes and double-quotes must
930 if (c == '\\' || c == '"') {
933 /* Values that can't nicely be represented
934 * in ASCII need to be escaped. */
935 else if (!isprint((unsigned char)c)) {
939 /* Other characters are just passed through. */
944 return repr_len + 2; /* string plus leading and trailing quotes */
948 escape_string(char *buf, const char *string)
957 for (p = string; (c = *p) != '\0'; p++) {
958 /* Backslashes and double-quotes must
960 if (c == '\\' || c == '"') {
964 /* Values that can't nicely be represented
965 * in ASCII need to be escaped. */
966 else if (!isprint((unsigned char)c)) {
968 g_snprintf(hexbuf,sizeof(hexbuf), "%02x", (unsigned char) c);
974 /* Other characters are just passed through. */
984 #define GN_CHAR_ALPHABET_SIZE 128
986 static gunichar IA5_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
988 /*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */
990 '?', '?', '?', '?', '?', '?', '?', '?',
991 '?', '?', '?', '?', '?', '?', '?', '?',
992 '?', '?', '?', '?', '?', '?', '?', '?',
993 '?', '?', '?', '?', '?', '?', '?', '?',
994 ' ', '!', '\"','#', '$', '%', '&', '\'',
995 '(', ')', '*', '+', ',', '-', '.', '/',
996 '0', '1', '2', '3', '4', '5', '6', '7',
997 '8', '9', ':', ';', '<', '=', '>', '?',
998 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
999 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1000 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1001 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
1002 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
1003 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
1004 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
1005 'x', 'y', 'z', '{', '|', '}', '~', '?'
1009 char_def_ia5_alphabet_decode(unsigned char value)
1011 if (value < GN_CHAR_ALPHABET_SIZE)
1013 return IA5_default_alphabet[value];
1022 IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len)
1028 for (i = 0, j = 0; j < len; j++)
1030 buf = char_def_ia5_alphabet_decode(src[j]);
1031 i += g_unichar_to_utf8(buf,&(dest[i]));
1038 * This function takes a string and copies it, inserting an underscore before
1039 * every underscore in it.
1042 ws_strdup_escape_underscore (const gchar *str)
1044 gchar *p, *q, *new_str;
1050 /* Worst case: A string that is full of underscores */
1051 q = new_str = g_malloc (strlen(str) * 2 + 1);
1066 * This function takes a string and copies it, removing any occurences of double
1067 * underscores with a single underscore.
1070 ws_strdup_unescape_underscore (const gchar *str)
1072 gchar *p, *q, *new_str;
1078 /* Worst case: A string that contains no underscores */
1079 q = new_str = g_malloc (strlen(str) + 1);
1084 if ((*p == '_') && (*(p+1) == '_'))
1094 /* Create a newly-allocated string with replacement values. */
1095 gchar *string_replace(const gchar* str, const gchar *old_val, const gchar *new_val) {
1099 if (!str || !old_val) {
1103 str_parts = g_strsplit(str, old_val, 0);
1104 new_str = g_strjoinv(new_val, str_parts);
1105 g_strfreev(str_parts);