2 * String utility routines
6 * Wireshark - Network traffic analyzer
7 * By Gerald Combs <gerald@wireshark.org>
8 * Copyright 1998 Gerald Combs
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
35 #include <../isprint.h>
44 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
45 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
48 * Given a pointer into a data buffer, and to the end of the buffer,
49 * find the end of the (putative) line at that position in the data
51 * Return a pointer to the EOL character(s) in "*eol".
54 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
56 const guchar *lineend;
58 lineend = memchr(data, '\n', dataend - data);
59 if (lineend == NULL) {
61 * No LF - line is probably continued in next TCP segment.
67 * Is the LF at the beginning of the line?
71 * No - is it preceded by a carriage return?
72 * (Perhaps it's supposed to be, but that's not guaranteed....)
74 if (*(lineend - 1) == '\r') {
76 * Yes. The EOL starts with the CR.
81 * No. The EOL starts with the LF.
86 * I seem to remember that we once saw lines ending with LF-CR
87 * in an HTTP request or response, so check if it's *followed*
88 * by a carriage return.
90 if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
92 * It's <non-LF><LF><CR>; say it ends with the CR.
99 * Yes - the EOL starts with the LF.
105 * Point to the character after the last character.
113 * Get the length of the next token in a line, and the beginning of the
114 * next token after that (if any).
115 * Return 0 if there is no next token.
118 get_token_len(const guchar *linep, const guchar *lineend,
119 const guchar **next_token)
121 const guchar *tokenp;
127 * Search for a blank, a CR or an LF, or the end of the buffer.
129 while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
131 token_len = (int) (linep - tokenp);
134 * Skip trailing blanks.
136 while (linep < lineend && *linep == ' ')
145 #define INITIAL_FMTBUF_SIZE 128
148 * Given a string, generate a string from it that shows non-printable
149 * characters as C-style escapes, and return a pointer to it.
152 format_text(const guchar *string, size_t len)
154 static gchar *fmtbuf[3];
155 static int fmtbuf_len[3];
158 const guchar *stringend = string + len;
165 * Allocate the buffer if it's not already allocated.
167 if (fmtbuf[idx] == NULL) {
168 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
169 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
172 while (string < stringend) {
174 * Is there enough room for this character, if it expands to
175 * a backslash plus 3 octal digits (which is the most it can
176 * expand to), and also enough room for a terminating '\0'?
178 if (column+3+1 >= fmtbuf_len[idx]) {
180 * Double the buffer's size if it's not big enough.
181 * The size of the buffer starts at 128, so doubling its size
182 * adds at least another 128 bytes, which is more than enough
183 * for one more character plus a terminating '\0'.
185 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
186 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
191 fmtbuf[idx][column] = c;
194 fmtbuf[idx][column] = '\\';
199 fmtbuf[idx][column] = 'a';
204 fmtbuf[idx][column] = 'b'; /* BS */
209 fmtbuf[idx][column] = 'f'; /* FF */
214 fmtbuf[idx][column] = 'n'; /* NL */
219 fmtbuf[idx][column] = 'r'; /* CR */
224 fmtbuf[idx][column] = 't'; /* tab */
229 fmtbuf[idx][column] = 'v';
235 fmtbuf[idx][column] = i + '0';
238 fmtbuf[idx][column] = i + '0';
241 fmtbuf[idx][column] = i + '0';
247 fmtbuf[idx][column] = '\0';
252 * Given a string, generate a string from it that shows non-printable
253 * characters as C-style escapes except a whitespace character
254 * (space, tab, carriage return, new line, vertical tab, or formfeed)
255 * which will be replaced by a space, and return a pointer to it.
258 format_text_wsp(const guchar *string, size_t len)
260 static gchar *fmtbuf[3];
261 static int fmtbuf_len[3];
264 const guchar *stringend = string + len;
271 * Allocate the buffer if it's not already allocated.
273 if (fmtbuf[idx] == NULL) {
274 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
275 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
278 while (string < stringend) {
280 * Is there enough room for this character, if it expands to
281 * a backslash plus 3 octal digits (which is the most it can
282 * expand to), and also enough room for a terminating '\0'?
284 if (column+3+1 >= fmtbuf_len[idx]) {
286 * Double the buffer's size if it's not big enough.
287 * The size of the buffer starts at 128, so doubling its size
288 * adds at least another 128 bytes, which is more than enough
289 * for one more character plus a terminating '\0'.
291 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
292 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
297 fmtbuf[idx][column] = c;
299 } else if (isspace(c)) {
300 fmtbuf[idx][column] = ' ';
303 fmtbuf[idx][column] = '\\';
308 fmtbuf[idx][column] = 'a';
313 fmtbuf[idx][column] = 'b'; /* BS */
318 fmtbuf[idx][column] = 'f'; /* FF */
323 fmtbuf[idx][column] = 'n'; /* NL */
328 fmtbuf[idx][column] = 'r'; /* CR */
333 fmtbuf[idx][column] = 't'; /* tab */
338 fmtbuf[idx][column] = 'v';
344 fmtbuf[idx][column] = i + '0';
347 fmtbuf[idx][column] = i + '0';
350 fmtbuf[idx][column] = i + '0';
356 fmtbuf[idx][column] = '\0';
361 is_byte_sep(guint8 c)
363 return (c == '-' || c == ':' || c == '.');
366 /* Turn a string of hex digits with optional separators (defined by
367 * is_byte_sep() into a byte array.
370 hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators) {
372 const guchar *p, *q, *r, *s, *punct;
373 char four_digits_first_half[3];
374 char four_digits_second_half[3];
378 if (! hex_str || ! bytes) {
381 g_byte_array_set_size(bytes, 0);
382 p = (const guchar *)hex_str;
389 && isxdigit(*p) && isxdigit(*q) &&
390 isxdigit(*r) && isxdigit(*s)) {
391 four_digits_first_half[0] = *p;
392 four_digits_first_half[1] = *q;
393 four_digits_first_half[2] = '\0';
394 four_digits_second_half[0] = *r;
395 four_digits_second_half[1] = *s;
396 four_digits_second_half[2] = '\0';
399 * Four or more hex digits in a row.
401 val = (guint8) strtoul(four_digits_first_half, NULL, 16);
402 g_byte_array_append(bytes, &val, 1);
403 val = (guint8) strtoul(four_digits_second_half, NULL, 16);
404 g_byte_array_append(bytes, &val, 1);
409 * Make sure the character after
410 * the forth hex digit is a byte
411 * separator, i.e. that we don't have
412 * more than four hex digits, or a
415 if (is_byte_sep(*punct)) {
419 else if (force_separators) {
427 else if (*q && isxdigit(*p) && isxdigit(*q)) {
430 two_digits[2] = '\0';
433 * Two hex digits in a row.
435 val = (guint8) strtoul(two_digits, NULL, 16);
436 g_byte_array_append(bytes, &val, 1);
440 * Make sure the character after
441 * the second hex digit is a byte
442 * separator, i.e. that we don't have
443 * more than two hex digits, or a
446 if (is_byte_sep(*punct)) {
450 else if (force_separators) {
457 else if (*q && isxdigit(*p) && is_byte_sep(*q)) {
462 * Only one hex digit (not at the end of the string)
464 val = (guint8) strtoul(one_digit, NULL, 16);
465 g_byte_array_append(bytes, &val, 1);
469 else if (!*q && isxdigit(*p)) {
474 * Only one hex digit (at the end of the string)
476 val = (guint8) strtoul(one_digit, NULL, 16);
477 g_byte_array_append(bytes, &val, 1);
489 * Turn an RFC 3986 percent-encoded string into a byte array.
490 * XXX - We don't check for reserved characters.
492 #define HEX_DIGIT_BUF_LEN 3
494 uri_str_to_bytes(const char *uri_str, GByteArray *bytes) {
497 guchar hex_digit[HEX_DIGIT_BUF_LEN];
499 g_byte_array_set_size(bytes, 0);
504 p = (const guchar *)uri_str;
507 if (! isascii(*p) || ! isprint(*p))
511 if (*p == '\0') return FALSE;
514 if (*p == '\0') return FALSE;
517 if (! isxdigit(hex_digit[0]) || ! isxdigit(hex_digit[1]))
519 val = (guint8) strtoul((char *)hex_digit, NULL, 16);
520 g_byte_array_append(bytes, &val, 1);
522 g_byte_array_append(bytes, (const guint8 *) p, 1);
531 * Given a GByteArray, generate a string from it that shows non-printable
532 * characters as percent-style escapes, and return a pointer to it.
535 format_uri(const GByteArray *bytes, const gchar *reserved_chars)
537 static gchar *fmtbuf[3];
538 static guint fmtbuf_len[3];
540 const guchar *reserved_def = ":/?#[]@!$&'()*+,;= ";
541 const guchar *reserved = reserved_def;
544 gboolean is_reserved = FALSE;
551 reserved = reserved_chars;
554 * Allocate the buffer if it's not already allocated.
556 if (fmtbuf[idx] == NULL) {
557 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
558 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
560 for (column = 0; column < bytes->len; column++) {
562 * Is there enough room for this character, if it expands to
563 * a percent plus 2 hex digits (which is the most it can
564 * expand to), and also enough room for a terminating '\0'?
566 if (column+2+1 >= fmtbuf_len[idx]) {
568 * Double the buffer's size if it's not big enough.
569 * The size of the buffer starts at 128, so doubling its size
570 * adds at least another 128 bytes, which is more than enough
571 * for one more character plus a terminating '\0'.
573 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
574 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
576 c = bytes->data[column];
578 if (!isascii(c) || !isprint(c) || c == '%') {
582 for (i = 0; reserved[i]; i++) {
583 if (c == reserved[i])
588 fmtbuf[idx][column] = c;
590 fmtbuf[idx][column] = '%';
592 fmtbuf[idx][column] = hex[c >> 4];
594 fmtbuf[idx][column] = hex[c & 0xF];
597 fmtbuf[idx][column] = '\0';
602 * Create a copy of a GByteArray
604 * @param ba The byte array to be copied.
605 * @return If ba exists, a freshly allocated copy. NULL otherwise.
609 byte_array_dup(GByteArray *ba) {
615 new_ba = g_byte_array_new();
616 g_byte_array_append(new_ba, ba->data, ba->len);
620 #define SUBID_BUF_LEN 5
622 oid_str_to_bytes(const char *oid_str, GByteArray *bytes) {
623 guint32 subid0, subid, sicnt, i;
625 guint8 buf[SUBID_BUF_LEN];
627 g_byte_array_set_size(bytes, 0);
633 if (!isdigit((guchar)*p) && (*p != '.')) return FALSE;
635 if (p == oid_str) return FALSE;
636 if (!*(p+1)) return FALSE;
637 if ((p-1) == dot) return FALSE;
642 if (!dot) return FALSE;
646 subid0 = 0; /* squelch GCC complaints */
649 while (isdigit((guchar)*p)) {
656 if (subid0 > 2) return FALSE;
657 } else if (sicnt == 1) {
658 if ((subid0 < 2) && (subid > 39)) return FALSE;
659 subid += 40 * subid0;
665 buf[i] = 0x80 | (subid % 0x80);
667 } while (subid && i);
668 buf[SUBID_BUF_LEN-1] &= 0x7F;
669 g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
679 * Compare the contents of two GByteArrays
681 * @param ba1 A byte array
682 * @param ba2 A byte array
683 * @return If both arrays are non-NULL and their lengths are equal and
684 * their contents are equal, returns TRUE. Otherwise, returns
687 * XXX - Should this be in strutil.c?
690 byte_array_equal(GByteArray *ba1, GByteArray *ba2) {
694 if (ba1->len != ba2->len)
697 if (memcmp(ba1->data, ba2->data, ba1->len) != 0)
704 /* Return a XML escaped representation of the unescaped string.
705 * The returned string must be freed when no longer in use. */
707 xml_escape(const gchar *unescaped)
709 GString *buffer = g_string_sized_new(128);
714 while ( (c = *p++) ) {
717 g_string_append(buffer, "<");
720 g_string_append(buffer, ">");
723 g_string_append(buffer, "&");
726 g_string_append(buffer, "'");
729 g_string_append(buffer, """);
732 g_string_append_c(buffer, c);
736 /* Return the string value contained within the GString
737 * after getting rid of the GString structure.
738 * This is the way to do this, see the GLib reference. */
739 return g_string_free(buffer, FALSE);
743 /* Return the first occurrence of needle in haystack.
744 * If not found, return NULL.
745 * If either haystack or needle has 0 length, return NULL.
746 * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
748 epan_memmem(const guint8 *haystack, guint haystack_len,
749 const guint8 *needle, guint needle_len)
752 const guint8 *const last_possible
753 = haystack + haystack_len - needle_len;
755 if (needle_len == 0) {
759 if (needle_len > haystack_len) {
763 for (begin = haystack ; begin <= last_possible; ++begin) {
764 if (begin[0] == needle[0] &&
765 !memcmp(&begin[1], needle + 1,
775 * Scan the search string to make sure it's valid hex. Return the
776 * number of bytes in nbytes.
779 convert_string_to_hex(const char *string, size_t *nbytes)
784 guint8 *bytes, *q, byte_val;
793 continue; /* allow white space */
794 if (c==':' || c=='.' || c=='-')
795 continue; /* skip any ':', '.', or '-' between bytes */
797 /* Not a valid hex digit - fail */
802 * We can only match bytes, not nibbles; we must have a valid
803 * hex digit immediately after that hex digit.
809 /* 2 hex digits = 1 byte */
814 * Were we given any hex digits?
822 * OK, it's valid, and it generates "n_bytes" bytes; generate the
825 bytes = g_malloc(n_bytes);
833 continue; /* allow white space */
834 if (c==':' || c=='.' || c=='-')
835 continue; /* skip any ':', '.', or '-' between bytes */
836 /* From the loop above, we know this is a hex digit */
840 byte_val = (c - 'a') + 10;
842 byte_val = (c - 'A') + 10;
845 /* We also know this is a hex digit */
850 byte_val |= (c - 'a') + 10;
852 byte_val |= (c - 'A') + 10;
861 * Copy if if it's a case-sensitive search; uppercase it if it's
862 * a case-insensitive search.
865 convert_string_case(const char *string, gboolean case_insensitive)
868 if (case_insensitive) {
869 return g_utf8_strup(string, -1);
871 return g_strdup(string);
876 epan_strcasestr(const char *haystack, const char *needle)
878 gsize hlen = strlen(haystack);
879 gsize nlen = strlen(needle);
881 while (hlen-- >= nlen) {
882 if (!g_ascii_strncasecmp(haystack, needle, nlen))
883 return (char*) haystack;
890 string_or_null(const char *string)
898 escape_string_len(const char *string)
905 for (p = string; (c = *p) != '\0'; p++) {
906 /* Backslashes and double-quotes must
908 if (c == '\\' || c == '"') {
911 /* Values that can't nicely be represented
912 * in ASCII need to be escaped. */
913 else if (!isprint((unsigned char)c)) {
917 /* Other characters are just passed through. */
922 return repr_len + 2; /* string plus leading and trailing quotes */
926 escape_string(char *buf, const char *string)
935 for (p = string; (c = *p) != '\0'; p++) {
936 /* Backslashes and double-quotes must
938 if (c == '\\' || c == '"') {
942 /* Values that can't nicely be represented
943 * in ASCII need to be escaped. */
944 else if (!isprint((unsigned char)c)) {
946 g_snprintf(hexbuf,sizeof(hexbuf), "%02x", (unsigned char) c);
952 /* Other characters are just passed through. */
962 #define GN_CHAR_ALPHABET_SIZE 128
964 static gunichar IA5_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
966 /*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */
968 '?', '?', '?', '?', '?', '?', '?', '?',
969 '?', '?', '?', '?', '?', '?', '?', '?',
970 '?', '?', '?', '?', '?', '?', '?', '?',
971 '?', '?', '?', '?', '?', '?', '?', '?',
972 ' ', '!', '\"','#', '$', '%', '&', '\'',
973 '(', ')', '*', '+', ',', '-', '.', '/',
974 '0', '1', '2', '3', '4', '5', '6', '7',
975 '8', '9', ':', ';', '<', '=', '>', '?',
976 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
977 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
978 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
979 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
980 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
981 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
982 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
983 'x', 'y', 'z', '{', '|', '}', '~', '?'
987 char_def_ia5_alphabet_decode(unsigned char value)
989 if (value < GN_CHAR_ALPHABET_SIZE)
991 return IA5_default_alphabet[value];
1000 IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len)
1006 for (i = 0, j = 0; j < len; j++)
1008 buf = char_def_ia5_alphabet_decode(src[j]);
1009 i += g_unichar_to_utf8(buf,&(dest[i]));
1016 * This function takes a string and copies it, inserting a 'chr' before
1017 * every 'chr' in it.
1020 ws_strdup_escape_char (const gchar *str, const gchar chr)
1029 /* Worst case: A string that is full of 'chr' */
1030 q = new_str = g_malloc (strlen(str) * 2 + 1);
1045 * This function takes a string and copies it, removing any occurences of double
1046 * 'chr' with a single 'chr'.
1049 ws_strdup_unescape_char (const gchar *str, const char chr)
1058 /* Worst case: A string that contains no 'chr' */
1059 q = new_str = g_malloc (strlen(str) + 1);
1064 if ((*p == chr) && (*(p+1) == chr))
1074 /* Create a newly-allocated string with replacement values. */
1075 gchar *string_replace(const gchar* str, const gchar *old_val, const gchar *new_val) {
1079 if (!str || !old_val) {
1083 str_parts = g_strsplit(str, old_val, 0);
1084 new_str = g_strjoinv(new_val, str_parts);
1085 g_strfreev(str_parts);