2 * String utility routines
6 * Wireshark - Network traffic analyzer
7 * By Gerald Combs <gerald@wireshark.org>
8 * Copyright 1998 Gerald Combs
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
35 #include <../isprint.h>
44 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
45 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
48 * Given a pointer into a data buffer, and to the end of the buffer,
49 * find the end of the (putative) line at that position in the data
51 * Return a pointer to the EOL character(s) in "*eol".
54 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
56 const guchar *lineend;
58 lineend = memchr(data, '\n', dataend - data);
59 if (lineend == NULL) {
61 * No LF - line is probably continued in next TCP segment.
67 * Is the LF at the beginning of the line?
71 * No - is it preceded by a carriage return?
72 * (Perhaps it's supposed to be, but that's not guaranteed....)
74 if (*(lineend - 1) == '\r') {
76 * Yes. The EOL starts with the CR.
81 * No. The EOL starts with the LF.
86 * I seem to remember that we once saw lines ending with LF-CR
87 * in an HTTP request or response, so check if it's *followed*
88 * by a carriage return.
90 if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
92 * It's <non-LF><LF><CR>; say it ends with the CR.
99 * Yes - the EOL starts with the LF.
105 * Point to the character after the last character.
113 * Get the length of the next token in a line, and the beginning of the
114 * next token after that (if any).
115 * Return 0 if there is no next token.
118 get_token_len(const guchar *linep, const guchar *lineend,
119 const guchar **next_token)
121 const guchar *tokenp;
127 * Search for a blank, a CR or an LF, or the end of the buffer.
129 while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
131 token_len = (int) (linep - tokenp);
134 * Skip trailing blanks.
136 while (linep < lineend && *linep == ' ')
145 #define INITIAL_FMTBUF_SIZE 128
148 * Given a string, generate a string from it that shows non-printable
149 * characters as C-style escapes, and return a pointer to it.
152 format_text(const guchar *string, size_t len)
154 static gchar *fmtbuf[3];
155 static int fmtbuf_len[3];
158 const guchar *stringend = string + len;
165 * Allocate the buffer if it's not already allocated.
167 if (fmtbuf[idx] == NULL) {
168 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
169 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
172 while (string < stringend) {
174 * Is there enough room for this character, if it expands to
175 * a backslash plus 3 octal digits (which is the most it can
176 * expand to), and also enough room for a terminating '\0'?
178 if (column+3+1 >= fmtbuf_len[idx]) {
180 * Double the buffer's size if it's not big enough.
181 * The size of the buffer starts at 128, so doubling its size
182 * adds at least another 128 bytes, which is more than enough
183 * for one more character plus a terminating '\0'.
185 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
186 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
190 if (g_unichar_isprint(c)) {
191 fmtbuf[idx][column] = c;
194 fmtbuf[idx][column] = '\\';
199 fmtbuf[idx][column] = 'a';
204 fmtbuf[idx][column] = 'b'; /* BS */
209 fmtbuf[idx][column] = 'f'; /* FF */
214 fmtbuf[idx][column] = 'n'; /* NL */
219 fmtbuf[idx][column] = 'r'; /* CR */
224 fmtbuf[idx][column] = 't'; /* tab */
229 fmtbuf[idx][column] = 'v';
235 fmtbuf[idx][column] = i + '0';
238 fmtbuf[idx][column] = i + '0';
241 fmtbuf[idx][column] = i + '0';
247 fmtbuf[idx][column] = '\0';
252 * Given a string, generate a string from it that shows non-printable
253 * characters as C-style escapes except a whitespace character
254 * (space, tab, carriage return, new line, vertical tab, or formfeed)
255 * which will be replaced by a space, and return a pointer to it.
258 format_text_wsp(const guchar *string, size_t len)
260 static gchar *fmtbuf[3];
261 static int fmtbuf_len[3];
264 const guchar *stringend = string + len;
271 * Allocate the buffer if it's not already allocated.
273 if (fmtbuf[idx] == NULL) {
274 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
275 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
278 while (string < stringend) {
280 * Is there enough room for this character, if it expands to
281 * a backslash plus 3 octal digits (which is the most it can
282 * expand to), and also enough room for a terminating '\0'?
284 if (column+3+1 >= fmtbuf_len[idx]) {
286 * Double the buffer's size if it's not big enough.
287 * The size of the buffer starts at 128, so doubling its size
288 * adds at least another 128 bytes, which is more than enough
289 * for one more character plus a terminating '\0'.
291 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
292 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
296 if (g_unichar_isprint(c)) {
297 fmtbuf[idx][column] = c;
299 } else if (isspace(c)) {
300 fmtbuf[idx][column] = ' ';
303 fmtbuf[idx][column] = '\\';
308 fmtbuf[idx][column] = 'a';
313 fmtbuf[idx][column] = 'b'; /* BS */
318 fmtbuf[idx][column] = 'f'; /* FF */
323 fmtbuf[idx][column] = 'n'; /* NL */
328 fmtbuf[idx][column] = 'r'; /* CR */
333 fmtbuf[idx][column] = 't'; /* tab */
338 fmtbuf[idx][column] = 'v';
344 fmtbuf[idx][column] = i + '0';
347 fmtbuf[idx][column] = i + '0';
350 fmtbuf[idx][column] = i + '0';
356 fmtbuf[idx][column] = '\0';
361 is_byte_sep(guint8 c)
363 return (c == '-' || c == ':' || c == '.');
366 /* Turn a string of hex digits with optional separators (defined by
367 * is_byte_sep() into a byte array.
370 hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators) {
372 const guchar *p, *q, *r, *s, *punct;
373 char four_digits_first_half[3];
374 char four_digits_second_half[3];
378 if (! hex_str || ! bytes) {
381 g_byte_array_set_size(bytes, 0);
382 p = (const guchar *)hex_str;
389 && isxdigit(*p) && isxdigit(*q) &&
390 isxdigit(*r) && isxdigit(*s)) {
391 four_digits_first_half[0] = *p;
392 four_digits_first_half[1] = *q;
393 four_digits_first_half[2] = '\0';
394 four_digits_second_half[0] = *r;
395 four_digits_second_half[1] = *s;
396 four_digits_second_half[2] = '\0';
399 * Four or more hex digits in a row.
401 val = (guint8) strtoul(four_digits_first_half, NULL, 16);
402 g_byte_array_append(bytes, &val, 1);
403 val = (guint8) strtoul(four_digits_second_half, NULL, 16);
404 g_byte_array_append(bytes, &val, 1);
409 * Make sure the character after
410 * the forth hex digit is a byte
411 * separator, i.e. that we don't have
412 * more than four hex digits, or a
415 if (is_byte_sep(*punct)) {
419 else if (force_separators) {
428 else if (*q && isxdigit(*p) && isxdigit(*q)) {
431 two_digits[2] = '\0';
434 * Two hex digits in a row.
436 val = (guint8) strtoul(two_digits, NULL, 16);
437 g_byte_array_append(bytes, &val, 1);
441 * Make sure the character after
442 * the second hex digit is a byte
443 * separator, i.e. that we don't have
444 * more than two hex digits, or a
447 if (is_byte_sep(*punct)) {
451 else if (force_separators) {
459 else if (*q && isxdigit(*p) && is_byte_sep(*q)) {
464 * Only one hex digit (not at the end of the string)
466 val = (guint8) strtoul(one_digit, NULL, 16);
467 g_byte_array_append(bytes, &val, 1);
471 else if (!*q && isxdigit(*p)) {
476 * Only one hex digit (at the end of the string)
478 val = (guint8) strtoul(one_digit, NULL, 16);
479 g_byte_array_append(bytes, &val, 1);
491 * Turn an RFC 3986 percent-encoded string into a byte array.
492 * XXX - We don't check for reserved characters.
494 #define HEX_DIGIT_BUF_LEN 3
496 uri_str_to_bytes(const char *uri_str, GByteArray *bytes) {
499 guchar hex_digit[HEX_DIGIT_BUF_LEN];
501 g_byte_array_set_size(bytes, 0);
506 p = (const guchar *)uri_str;
509 if (! isascii(*p) || ! isprint(*p))
513 if (*p == '\0') return FALSE;
516 if (*p == '\0') return FALSE;
519 if (! isxdigit(hex_digit[0]) || ! isxdigit(hex_digit[1]))
521 val = (guint8) strtoul((char *)hex_digit, NULL, 16);
522 g_byte_array_append(bytes, &val, 1);
524 g_byte_array_append(bytes, (guint8 *) p, 1);
533 * Given a GByteArray, generate a string from it that shows non-printable
534 * characters as percent-style escapes, and return a pointer to it.
537 format_uri(const GByteArray *bytes, const gchar *reserved_chars)
539 static gchar *fmtbuf[3];
540 static guint fmtbuf_len[3];
542 const guchar *reserved_def = ":/?#[]@!$&'()*+,;= ";
543 const guchar *reserved = reserved_def;
546 gboolean is_reserved = FALSE;
553 reserved = reserved_chars;
556 * Allocate the buffer if it's not already allocated.
558 if (fmtbuf[idx] == NULL) {
559 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
560 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
562 for (column = 0; column < bytes->len; column++) {
564 * Is there enough room for this character, if it expands to
565 * a percent plus 2 hex digits (which is the most it can
566 * expand to), and also enough room for a terminating '\0'?
568 if (column+2+1 >= fmtbuf_len[idx]) {
570 * Double the buffer's size if it's not big enough.
571 * The size of the buffer starts at 128, so doubling its size
572 * adds at least another 128 bytes, which is more than enough
573 * for one more character plus a terminating '\0'.
575 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
576 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
578 c = bytes->data[column];
580 if (!isascii(c) || !isprint(c) || c == '%') {
584 for (i = 0; i < strlen(reserved); i++) {
585 if (c == reserved[i])
590 fmtbuf[idx][column] = c;
592 fmtbuf[idx][column] = '%';
594 fmtbuf[idx][column] = hex[c >> 4];
596 fmtbuf[idx][column] = hex[c & 0xF];
599 fmtbuf[idx][column] = '\0';
604 * Create a copy of a GByteArray
606 * @param ba The byte array to be copied.
607 * @return If ba exists, a freshly allocated copy. NULL otherwise.
611 byte_array_dup(GByteArray *ba) {
617 new_ba = g_byte_array_new();
618 g_byte_array_append(new_ba, ba->data, ba->len);
622 #define SUBID_BUF_LEN 5
624 oid_str_to_bytes(const char *oid_str, GByteArray *bytes) {
625 guint32 subid0, subid, sicnt, i;
627 guint8 buf[SUBID_BUF_LEN];
629 g_byte_array_set_size(bytes, 0);
635 if (!isdigit((guchar)*p) && (*p != '.')) return FALSE;
637 if (p == oid_str) return FALSE;
638 if (!*(p+1)) return FALSE;
639 if ((p-1) == dot) return FALSE;
644 if (!dot) return FALSE;
648 subid0 = 0; /* squelch GCC complaints */
651 while (isdigit((guchar)*p)) {
658 if (subid0 > 2) return FALSE;
659 } else if (sicnt == 1) {
660 if ((subid0 < 2) && (subid > 39)) return FALSE;
661 subid += 40 * subid0;
667 buf[i] = 0x80 | (subid % 0x80);
669 } while (subid && i);
670 buf[SUBID_BUF_LEN-1] &= 0x7F;
671 g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
681 * Compare the contents of two GByteArrays
683 * @param ba1 A byte array
684 * @param ba2 A byte array
685 * @return If both arrays are non-NULL and their lengths are equal and
686 * their contents are equal, returns TRUE. Otherwise, returns
689 * XXX - Should this be in strutil.c?
692 byte_array_equal(GByteArray *ba1, GByteArray *ba2) {
696 if (ba1->len != ba2->len)
699 if (memcmp(ba1->data, ba2->data, ba1->len) != 0)
706 /* Return a XML escaped representation of the unescaped string.
707 * The returned string must be freed when no longer in use. */
709 xml_escape(const gchar *unescaped)
711 GString *buffer = g_string_sized_new(128);
716 while ( (c = *p++) ) {
719 g_string_append(buffer, "<");
722 g_string_append(buffer, ">");
725 g_string_append(buffer, "&");
728 g_string_append(buffer, "'");
731 g_string_append(buffer, """);
734 g_string_append_c(buffer, c);
738 /* Return the string value contained within the GString
739 * after getting rid of the GString structure.
740 * This is the way to do this, see the GLib reference. */
741 return g_string_free(buffer, FALSE);
745 /* Return the first occurrence of needle in haystack.
746 * If not found, return NULL.
747 * If either haystack or needle has 0 length, return NULL.
748 * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
750 epan_memmem(const guint8 *haystack, guint haystack_len,
751 const guint8 *needle, guint needle_len)
754 const guint8 *const last_possible
755 = haystack + haystack_len - needle_len;
757 if (needle_len == 0) {
761 if (needle_len > haystack_len) {
765 for (begin = haystack ; begin <= last_possible; ++begin) {
766 if (begin[0] == needle[0] &&
767 !memcmp(&begin[1], needle + 1,
777 * Scan the search string to make sure it's valid hex. Return the
778 * number of bytes in nbytes.
781 convert_string_to_hex(const char *string, size_t *nbytes)
786 guint8 *bytes, *q, byte_val;
795 continue; /* allow white space */
796 if (c==':' || c=='.' || c=='-')
797 continue; /* skip any ':', '.', or '-' between bytes */
799 /* Not a valid hex digit - fail */
804 * We can only match bytes, not nibbles; we must have a valid
805 * hex digit immediately after that hex digit.
811 /* 2 hex digits = 1 byte */
816 * Were we given any hex digits?
824 * OK, it's valid, and it generates "n_bytes" bytes; generate the
827 bytes = g_malloc(n_bytes);
835 continue; /* allow white space */
836 if (c==':' || c=='.' || c=='-')
837 continue; /* skip any ':', '.', or '-' between bytes */
838 /* From the loop above, we know this is a hex digit */
842 byte_val = (c - 'a') + 10;
844 byte_val = (c - 'A') + 10;
847 /* We also know this is a hex digit */
852 byte_val |= (c - 'a') + 10;
854 byte_val |= (c - 'A') + 10;
863 * Copy if if it's a case-sensitive search; uppercase it if it's
864 * a case-insensitive search.
867 convert_string_case(const char *string, gboolean case_insensitive)
870 if (case_insensitive) {
871 return g_utf8_strup(string, -1);
873 return g_strdup(string);
878 epan_strcasestr(const char *haystack, const char *needle)
880 gsize hlen = strlen(haystack);
881 gsize nlen = strlen(needle);
883 while (hlen-- >= nlen) {
884 if (!g_ascii_strncasecmp(haystack, needle, nlen))
885 return (char*) haystack;
892 string_or_null(const char *string)
900 escape_string_len(const char *string)
907 for (p = string; (c = *p) != '\0'; p++) {
908 /* Backslashes and double-quotes must
910 if (c == '\\' || c == '"') {
913 /* Values that can't nicely be represented
914 * in ASCII need to be escaped. */
915 else if (!isprint((unsigned char)c)) {
919 /* Other characters are just passed through. */
924 return repr_len + 2; /* string plus leading and trailing quotes */
928 escape_string(char *buf, const char *string)
937 for (p = string; (c = *p) != '\0'; p++) {
938 /* Backslashes and double-quotes must
940 if (c == '\\' || c == '"') {
944 /* Values that can't nicely be represented
945 * in ASCII need to be escaped. */
946 else if (!isprint((unsigned char)c)) {
948 g_snprintf(hexbuf,sizeof(hexbuf), "%02x", (unsigned char) c);
954 /* Other characters are just passed through. */
964 #define GN_CHAR_ALPHABET_SIZE 128
966 static gunichar IA5_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
968 /*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */
970 '?', '?', '?', '?', '?', '?', '?', '?',
971 '?', '?', '?', '?', '?', '?', '?', '?',
972 '?', '?', '?', '?', '?', '?', '?', '?',
973 '?', '?', '?', '?', '?', '?', '?', '?',
974 ' ', '!', '\"','#', '$', '%', '&', '\'',
975 '(', ')', '*', '+', ',', '-', '.', '/',
976 '0', '1', '2', '3', '4', '5', '6', '7',
977 '8', '9', ':', ';', '<', '=', '>', '?',
978 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
979 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
980 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
981 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
982 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
983 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
984 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
985 'x', 'y', 'z', '{', '|', '}', '~', '?'
989 char_def_ia5_alphabet_decode(unsigned char value)
991 if (value < GN_CHAR_ALPHABET_SIZE)
993 return IA5_default_alphabet[value];
1002 IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len)
1008 for (i = 0, j = 0; j < len; j++)
1010 buf = char_def_ia5_alphabet_decode(src[j]);
1011 i += g_unichar_to_utf8(buf,&(dest[i]));
1018 * This function takes a string and copies it, inserting a 'chr' before
1019 * every 'chr' in it.
1022 ws_strdup_escape_char (const gchar *str, const gchar chr)
1024 gchar *p, *q, *new_str;
1030 /* Worst case: A string that is full of 'chr' */
1031 q = new_str = g_malloc (strlen(str) * 2 + 1);
1046 * This function takes a string and copies it, removing any occurences of double
1047 * 'chr' with a single 'chr'.
1050 ws_strdup_unescape_char (const gchar *str, const char chr)
1052 gchar *p, *q, *new_str;
1058 /* Worst case: A string that contains no 'chr' */
1059 q = new_str = g_malloc (strlen(str) + 1);
1064 if ((*p == chr) && (*(p+1) == chr))
1074 /* Create a newly-allocated string with replacement values. */
1075 gchar *string_replace(const gchar* str, const gchar *old_val, const gchar *new_val) {
1079 if (!str || !old_val) {
1083 str_parts = g_strsplit(str, old_val, 0);
1084 new_str = g_strjoinv(new_val, str_parts);
1085 g_strfreev(str_parts);