2 * String utility routines
6 * Wireshark - Network traffic analyzer
7 * By Gerald Combs <gerald@wireshark.org>
8 * Copyright 1998 Gerald Combs
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
33 #include <../isprint.h>
42 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
43 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
46 * Given a pointer into a data buffer, and to the end of the buffer,
47 * find the end of the (putative) line at that position in the data
49 * Return a pointer to the EOL character(s) in "*eol".
52 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
54 const guchar *lineend;
56 lineend = (guchar *)memchr(data, '\n', dataend - data);
57 if (lineend == NULL) {
59 * No LF - line is probably continued in next TCP segment.
65 * Is the LF at the beginning of the line?
69 * No - is it preceded by a carriage return?
70 * (Perhaps it's supposed to be, but that's not guaranteed....)
72 if (*(lineend - 1) == '\r') {
74 * Yes. The EOL starts with the CR.
79 * No. The EOL starts with the LF.
84 * I seem to remember that we once saw lines ending with LF-CR
85 * in an HTTP request or response, so check if it's *followed*
86 * by a carriage return.
88 if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
90 * It's <non-LF><LF><CR>; say it ends with the CR.
97 * Yes - the EOL starts with the LF.
103 * Point to the character after the last character.
111 * Get the length of the next token in a line, and the beginning of the
112 * next token after that (if any).
113 * Return 0 if there is no next token.
116 get_token_len(const guchar *linep, const guchar *lineend,
117 const guchar **next_token)
119 const guchar *tokenp;
125 * Search for a blank, a CR or an LF, or the end of the buffer.
127 while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
129 token_len = (int) (linep - tokenp);
132 * Skip trailing blanks.
134 while (linep < lineend && *linep == ' ')
143 #define INITIAL_FMTBUF_SIZE 128
146 * Given a string, generate a string from it that shows non-printable
147 * characters as C-style escapes, and return a pointer to it.
150 format_text(const guchar *string, size_t len)
152 static gchar *fmtbuf[3];
153 static int fmtbuf_len[3];
156 const guchar *stringend = string + len;
163 * Allocate the buffer if it's not already allocated.
165 if (fmtbuf[idx] == NULL) {
166 fmtbuf[idx] = (gchar *)g_malloc(INITIAL_FMTBUF_SIZE);
167 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
170 while (string < stringend) {
172 * Is there enough room for this character, if it expands to
173 * a backslash plus 3 octal digits (which is the most it can
174 * expand to), and also enough room for a terminating '\0'?
176 if (column+3+1 >= fmtbuf_len[idx]) {
178 * Double the buffer's size if it's not big enough.
179 * The size of the buffer starts at 128, so doubling its size
180 * adds at least another 128 bytes, which is more than enough
181 * for one more character plus a terminating '\0'.
183 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
184 fmtbuf[idx] = (gchar *)g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
189 fmtbuf[idx][column] = c;
192 fmtbuf[idx][column] = '\\';
197 fmtbuf[idx][column] = 'a';
202 fmtbuf[idx][column] = 'b'; /* BS */
207 fmtbuf[idx][column] = 'f'; /* FF */
212 fmtbuf[idx][column] = 'n'; /* NL */
217 fmtbuf[idx][column] = 'r'; /* CR */
222 fmtbuf[idx][column] = 't'; /* tab */
227 fmtbuf[idx][column] = 'v';
233 fmtbuf[idx][column] = i + '0';
236 fmtbuf[idx][column] = i + '0';
239 fmtbuf[idx][column] = i + '0';
245 fmtbuf[idx][column] = '\0';
250 * Given a string, generate a string from it that shows non-printable
251 * characters as C-style escapes except a whitespace character
252 * (space, tab, carriage return, new line, vertical tab, or formfeed)
253 * which will be replaced by a space, and return a pointer to it.
256 format_text_wsp(const guchar *string, size_t len)
258 static gchar *fmtbuf[3];
259 static int fmtbuf_len[3];
262 const guchar *stringend = string + len;
269 * Allocate the buffer if it's not already allocated.
271 if (fmtbuf[idx] == NULL) {
272 fmtbuf[idx] = (gchar *)g_malloc(INITIAL_FMTBUF_SIZE);
273 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
276 while (string < stringend) {
278 * Is there enough room for this character, if it expands to
279 * a backslash plus 3 octal digits (which is the most it can
280 * expand to), and also enough room for a terminating '\0'?
282 if (column+3+1 >= fmtbuf_len[idx]) {
284 * Double the buffer's size if it's not big enough.
285 * The size of the buffer starts at 128, so doubling its size
286 * adds at least another 128 bytes, which is more than enough
287 * for one more character plus a terminating '\0'.
289 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
290 fmtbuf[idx] = (gchar *)g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
295 fmtbuf[idx][column] = c;
297 } else if (isspace(c)) {
298 fmtbuf[idx][column] = ' ';
301 fmtbuf[idx][column] = '\\';
306 fmtbuf[idx][column] = 'a';
311 fmtbuf[idx][column] = 'b'; /* BS */
316 fmtbuf[idx][column] = 'f'; /* FF */
321 fmtbuf[idx][column] = 'n'; /* NL */
326 fmtbuf[idx][column] = 'r'; /* CR */
331 fmtbuf[idx][column] = 't'; /* tab */
336 fmtbuf[idx][column] = 'v';
342 fmtbuf[idx][column] = i + '0';
345 fmtbuf[idx][column] = i + '0';
348 fmtbuf[idx][column] = i + '0';
354 fmtbuf[idx][column] = '\0';
359 is_byte_sep(guint8 c)
361 return (c == '-' || c == ':' || c == '.');
364 /* Turn a string of hex digits with optional separators (defined by
365 * is_byte_sep() into a byte array.
368 hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators) {
370 const guchar *p, *q, *r, *s, *punct;
371 char four_digits_first_half[3];
372 char four_digits_second_half[3];
376 if (! hex_str || ! bytes) {
379 g_byte_array_set_size(bytes, 0);
380 p = (const guchar *)hex_str;
387 && isxdigit(*p) && isxdigit(*q) &&
388 isxdigit(*r) && isxdigit(*s)) {
389 four_digits_first_half[0] = *p;
390 four_digits_first_half[1] = *q;
391 four_digits_first_half[2] = '\0';
392 four_digits_second_half[0] = *r;
393 four_digits_second_half[1] = *s;
394 four_digits_second_half[2] = '\0';
397 * Four or more hex digits in a row.
399 val = (guint8) strtoul(four_digits_first_half, NULL, 16);
400 g_byte_array_append(bytes, &val, 1);
401 val = (guint8) strtoul(four_digits_second_half, NULL, 16);
402 g_byte_array_append(bytes, &val, 1);
407 * Make sure the character after
408 * the forth hex digit is a byte
409 * separator, i.e. that we don't have
410 * more than four hex digits, or a
413 if (is_byte_sep(*punct)) {
417 else if (force_separators) {
424 else if (*q && isxdigit(*p) && isxdigit(*q)) {
427 two_digits[2] = '\0';
430 * Two hex digits in a row.
432 val = (guint8) strtoul(two_digits, NULL, 16);
433 g_byte_array_append(bytes, &val, 1);
437 * Make sure the character after
438 * the second hex digit is a byte
439 * separator, i.e. that we don't have
440 * more than two hex digits, or a
443 if (is_byte_sep(*punct)) {
447 else if (force_separators) {
454 else if (*q && isxdigit(*p) && is_byte_sep(*q)) {
459 * Only one hex digit (not at the end of the string)
461 val = (guint8) strtoul(one_digit, NULL, 16);
462 g_byte_array_append(bytes, &val, 1);
466 else if (!*q && isxdigit(*p)) {
471 * Only one hex digit (at the end of the string)
473 val = (guint8) strtoul(one_digit, NULL, 16);
474 g_byte_array_append(bytes, &val, 1);
486 * Turn an RFC 3986 percent-encoded string into a byte array.
487 * XXX - We don't check for reserved characters.
489 #define HEX_DIGIT_BUF_LEN 3
491 uri_str_to_bytes(const char *uri_str, GByteArray *bytes) {
494 guchar hex_digit[HEX_DIGIT_BUF_LEN];
496 g_byte_array_set_size(bytes, 0);
501 p = (const guchar *)uri_str;
504 if (! isascii(*p) || ! isprint(*p))
508 if (*p == '\0') return FALSE;
511 if (*p == '\0') return FALSE;
514 if (! isxdigit(hex_digit[0]) || ! isxdigit(hex_digit[1]))
516 val = (guint8) strtoul((char *)hex_digit, NULL, 16);
517 g_byte_array_append(bytes, &val, 1);
519 g_byte_array_append(bytes, (const guint8 *) p, 1);
528 * Given a GByteArray, generate a string from it that shows non-printable
529 * characters as percent-style escapes, and return a pointer to it.
532 format_uri(const GByteArray *bytes, const gchar *reserved_chars)
534 static gchar *fmtbuf[3];
535 static guint fmtbuf_len[3];
537 static const guchar *reserved_def = ":/?#[]@!$&'()*+,;= ";
538 const guchar *reserved = reserved_def;
541 gboolean is_reserved = FALSE;
548 reserved = reserved_chars;
551 * Allocate the buffer if it's not already allocated.
553 if (fmtbuf[idx] == NULL) {
554 fmtbuf[idx] = (gchar *)g_malloc(INITIAL_FMTBUF_SIZE);
555 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
557 for (column = 0; column < bytes->len; column++) {
559 * Is there enough room for this character, if it expands to
560 * a percent plus 2 hex digits (which is the most it can
561 * expand to), and also enough room for a terminating '\0'?
563 if (column+2+1 >= fmtbuf_len[idx]) {
565 * Double the buffer's size if it's not big enough.
566 * The size of the buffer starts at 128, so doubling its size
567 * adds at least another 128 bytes, which is more than enough
568 * for one more character plus a terminating '\0'.
570 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
571 fmtbuf[idx] = (gchar *)g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
573 c = bytes->data[column];
575 if (!isascii(c) || !isprint(c) || c == '%') {
579 for (i = 0; reserved[i]; i++) {
580 if (c == reserved[i])
585 fmtbuf[idx][column] = c;
587 fmtbuf[idx][column] = '%';
589 fmtbuf[idx][column] = hex[c >> 4];
591 fmtbuf[idx][column] = hex[c & 0xF];
594 fmtbuf[idx][column] = '\0';
599 * Create a copy of a GByteArray
601 * @param ba The byte array to be copied.
602 * @return If ba exists, a freshly allocated copy. NULL otherwise.
606 byte_array_dup(GByteArray *ba) {
612 new_ba = g_byte_array_new();
613 g_byte_array_append(new_ba, ba->data, ba->len);
617 #define SUBID_BUF_LEN 5
619 oid_str_to_bytes(const char *oid_str, GByteArray *bytes) {
620 guint32 subid0, subid, sicnt, i;
622 guint8 buf[SUBID_BUF_LEN];
624 g_byte_array_set_size(bytes, 0);
630 if (!isdigit((guchar)*p) && (*p != '.')) return FALSE;
632 if (p == oid_str) return FALSE;
633 if (!*(p+1)) return FALSE;
634 if ((p-1) == dot) return FALSE;
639 if (!dot) return FALSE;
643 subid0 = 0; /* squelch GCC complaints */
646 while (isdigit((guchar)*p)) {
653 if (subid0 > 2) return FALSE;
654 } else if (sicnt == 1) {
655 if ((subid0 < 2) && (subid > 39)) return FALSE;
656 subid += 40 * subid0;
662 buf[i] = 0x80 | (subid % 0x80);
664 } while (subid && i);
665 buf[SUBID_BUF_LEN-1] &= 0x7F;
666 g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
676 * Compare the contents of two GByteArrays
678 * @param ba1 A byte array
679 * @param ba2 A byte array
680 * @return If both arrays are non-NULL and their lengths are equal and
681 * their contents are equal, returns TRUE. Otherwise, returns
684 * XXX - Should this be in strutil.c?
687 byte_array_equal(GByteArray *ba1, GByteArray *ba2) {
691 if (ba1->len != ba2->len)
694 if (memcmp(ba1->data, ba2->data, ba1->len) != 0)
701 /* Return a XML escaped representation of the unescaped string.
702 * The returned string must be freed when no longer in use. */
704 xml_escape(const gchar *unescaped)
706 GString *buffer = g_string_sized_new(128);
711 while ( (c = *p++) ) {
714 g_string_append(buffer, "<");
717 g_string_append(buffer, ">");
720 g_string_append(buffer, "&");
723 g_string_append(buffer, "'");
726 g_string_append(buffer, """);
729 g_string_append_c(buffer, c);
733 /* Return the string value contained within the GString
734 * after getting rid of the GString structure.
735 * This is the way to do this, see the GLib reference. */
736 return g_string_free(buffer, FALSE);
740 /* Return the first occurrence of needle in haystack.
741 * If not found, return NULL.
742 * If either haystack or needle has 0 length, return NULL.
743 * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
745 epan_memmem(const guint8 *haystack, guint haystack_len,
746 const guint8 *needle, guint needle_len)
749 const guint8 *const last_possible = haystack + haystack_len - needle_len;
751 if (needle_len == 0) {
755 if (needle_len > haystack_len) {
759 for (begin = haystack ; begin <= last_possible; ++begin) {
760 if (begin[0] == needle[0] &&
761 !memcmp(&begin[1], needle + 1,
771 * Scan the search string to make sure it's valid hex. Return the
772 * number of bytes in nbytes.
775 convert_string_to_hex(const char *string, size_t *nbytes)
780 guint8 *bytes, *q, byte_val;
789 continue; /* allow white space */
790 if (c==':' || c=='.' || c=='-')
791 continue; /* skip any ':', '.', or '-' between bytes */
793 /* Not a valid hex digit - fail */
798 * We can only match bytes, not nibbles; we must have a valid
799 * hex digit immediately after that hex digit.
805 /* 2 hex digits = 1 byte */
810 * Were we given any hex digits?
818 * OK, it's valid, and it generates "n_bytes" bytes; generate the
821 bytes = (guint8 *)g_malloc(n_bytes);
829 continue; /* allow white space */
830 if (c==':' || c=='.' || c=='-')
831 continue; /* skip any ':', '.', or '-' between bytes */
832 /* From the loop above, we know this is a hex digit */
836 byte_val = (c - 'a') + 10;
838 byte_val = (c - 'A') + 10;
841 /* We also know this is a hex digit */
846 byte_val |= (c - 'a') + 10;
848 byte_val |= (c - 'A') + 10;
857 * Copy if if it's a case-sensitive search; uppercase it if it's
858 * a case-insensitive search.
861 convert_string_case(const char *string, gboolean case_insensitive)
864 if (case_insensitive) {
865 return g_utf8_strup(string, -1);
867 return g_strdup(string);
872 epan_strcasestr(const char *haystack, const char *needle)
874 gsize hlen = strlen(haystack);
875 gsize nlen = strlen(needle);
877 while (hlen-- >= nlen) {
878 if (!g_ascii_strncasecmp(haystack, needle, nlen))
879 return (char*) haystack;
886 string_or_null(const char *string)
894 escape_string_len(const char *string)
901 for (p = string; (c = *p) != '\0'; p++) {
902 /* Backslashes and double-quotes must
904 if (c == '\\' || c == '"') {
907 /* Values that can't nicely be represented
908 * in ASCII need to be escaped. */
909 else if (!isprint((unsigned char)c)) {
913 /* Other characters are just passed through. */
918 return repr_len + 2; /* string plus leading and trailing quotes */
922 escape_string(char *buf, const char *string)
931 for (p = string; (c = *p) != '\0'; p++) {
932 /* Backslashes and double-quotes must
934 if (c == '\\' || c == '"') {
938 /* Values that can't nicely be represented
939 * in ASCII need to be escaped. */
940 else if (!isprint((unsigned char)c)) {
942 g_snprintf(hexbuf,sizeof(hexbuf), "%02x", (unsigned char) c);
948 /* Other characters are just passed through. */
958 #define GN_CHAR_ALPHABET_SIZE 128
960 static gunichar IA5_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
962 /*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */
964 '?', '?', '?', '?', '?', '?', '?', '?',
965 '?', '?', '?', '?', '?', '?', '?', '?',
966 '?', '?', '?', '?', '?', '?', '?', '?',
967 '?', '?', '?', '?', '?', '?', '?', '?',
968 ' ', '!', '\"','#', '$', '%', '&', '\'',
969 '(', ')', '*', '+', ',', '-', '.', '/',
970 '0', '1', '2', '3', '4', '5', '6', '7',
971 '8', '9', ':', ';', '<', '=', '>', '?',
972 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
973 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
974 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
975 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
976 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
977 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
978 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
979 'x', 'y', 'z', '{', '|', '}', '~', '?'
983 char_def_ia5_alphabet_decode(unsigned char value)
985 if (value < GN_CHAR_ALPHABET_SIZE) {
986 return IA5_default_alphabet[value];
994 IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len)
999 for (i = 0, j = 0; j < len; j++) {
1000 buf = char_def_ia5_alphabet_decode(src[j]);
1001 i += g_unichar_to_utf8(buf,&(dest[i]));
1008 * This function takes a string and copies it, inserting a 'chr' before
1009 * every 'chr' in it.
1012 ws_strdup_escape_char (const gchar *str, const gchar chr)
1021 /* Worst case: A string that is full of 'chr' */
1022 q = new_str = (gchar *)g_malloc (strlen(str) * 2 + 1);
1036 * This function takes a string and copies it, removing any occurences of double
1037 * 'chr' with a single 'chr'.
1040 ws_strdup_unescape_char (const gchar *str, const char chr)
1049 /* Worst case: A string that contains no 'chr' */
1050 q = new_str = (gchar *)g_malloc (strlen(str) + 1);
1054 if ((*p == chr) && (*(p+1) == chr))
1064 /* Create a newly-allocated string with replacement values. */
1065 gchar *string_replace(const gchar* str, const gchar *old_val, const gchar *new_val) {
1069 if (!str || !old_val) {
1073 str_parts = g_strsplit(str, old_val, 0);
1074 new_str = g_strjoinv(new_val, str_parts);
1075 g_strfreev(str_parts);
1081 * Editor modelines - http://www.wireshark.org/tools/modelines.html
1086 * indent-tabs-mode: nil
1089 * vi: set shiftwidth=4 tabstop=8 expandtab:
1090 * :indentSize=4:tabSize=8:noTabs=true: