2 * String utility routines
6 * Wireshark - Network traffic analyzer
7 * By Gerald Combs <gerald@wireshark.org>
8 * Copyright 1998 Gerald Combs
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
42 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
43 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
46 * Given a pointer into a data buffer, and to the end of the buffer,
47 * find the end of the (putative) line at that position in the data
49 * Return a pointer to the EOL character(s) in "*eol".
52 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
54 const guchar *lineend;
56 lineend = memchr(data, '\n', dataend - data);
57 if (lineend == NULL) {
59 * No LF - line is probably continued in next TCP segment.
65 * Is the LF at the beginning of the line?
69 * No - is it preceded by a carriage return?
70 * (Perhaps it's supposed to be, but that's not guaranteed....)
72 if (*(lineend - 1) == '\r') {
74 * Yes. The EOL starts with the CR.
79 * No. The EOL starts with the LF.
84 * I seem to remember that we once saw lines ending with LF-CR
85 * in an HTTP request or response, so check if it's *followed*
86 * by a carriage return.
88 if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
90 * It's <non-LF><LF><CR>; say it ends with the CR.
97 * Yes - the EOL starts with the LF.
103 * Point to the character after the last character.
111 * Get the length of the next token in a line, and the beginning of the
112 * next token after that (if any).
113 * Return 0 if there is no next token.
116 get_token_len(const guchar *linep, const guchar *lineend,
117 const guchar **next_token)
119 const guchar *tokenp;
125 * Search for a blank, a CR or an LF, or the end of the buffer.
127 while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
129 token_len = linep - tokenp;
132 * Skip trailing blanks.
134 while (linep < lineend && *linep == ' ')
143 #define INITIAL_FMTBUF_SIZE 128
145 #if GLIB_MAJOR_VERSION >= 2
147 * XXX - "isprint()" can return "true" for non-ASCII characters, but
148 * those don't work with GTK+ 1.3 or later, as they take UTF-8 strings
149 * as input. Until we fix up Wireshark to properly handle non-ASCII
150 * characters in all output (both GUI displays and text printouts)
151 * in those versions of GTK+, we work around the problem by escaping
152 * all characters that aren't printable ASCII.
154 * We don't know what version of GTK+ we're using, as epan doesn't
155 * use any GTK+ stuff; we use GLib as a proxy for that, with GLib 2.x
156 * implying GTK+ 1.3 or later (we don't support GLib 1.3[.x]).
159 #define isprint(c) (c >= 0x20 && c < 0x7f)
163 * Given a string, generate a string from it that shows non-printable
164 * characters as C-style escapes, and return a pointer to it.
167 format_text(const guchar *string, int len)
169 static gchar *fmtbuf[3];
170 static int fmtbuf_len[3];
173 const guchar *stringend = string + len;
180 * Allocate the buffer if it's not already allocated.
182 if (fmtbuf[idx] == NULL) {
183 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
184 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
187 while (string < stringend) {
189 * Is there enough room for this character, if it expands to
190 * a backslash plus 3 octal digits (which is the most it can
191 * expand to), and also enough room for a terminating '\0'?
193 if (column+3+1 >= fmtbuf_len[idx]) {
195 * Double the buffer's size if it's not big enough.
196 * The size of the buffer starts at 128, so doubling its size
197 * adds at least another 128 bytes, which is more than enough
198 * for one more character plus a terminating '\0'.
200 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
201 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
206 fmtbuf[idx][column] = c;
209 fmtbuf[idx][column] = '\\';
214 fmtbuf[idx][column] = 'a';
219 fmtbuf[idx][column] = 'b'; /* BS */
224 fmtbuf[idx][column] = 'f'; /* FF */
229 fmtbuf[idx][column] = 'n'; /* NL */
234 fmtbuf[idx][column] = 'r'; /* CR */
239 fmtbuf[idx][column] = 't'; /* tab */
244 fmtbuf[idx][column] = 'v';
250 fmtbuf[idx][column] = i + '0';
253 fmtbuf[idx][column] = i + '0';
256 fmtbuf[idx][column] = i + '0';
262 fmtbuf[idx][column] = '\0';
267 * Given a string, generate a string from it that shows non-printable
268 * characters as C-style escapes except a whitespace character
269 * (space, tab, carriage return, new line, vertical tab, or formfeed)
270 * which will be replaved by a space, and return a pointer to it.
273 format_text_wsp(const guchar *string, int len)
275 static gchar *fmtbuf[3];
276 static int fmtbuf_len[3];
279 const guchar *stringend = string + len;
286 * Allocate the buffer if it's not already allocated.
288 if (fmtbuf[idx] == NULL) {
289 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
290 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
293 while (string < stringend) {
295 * Is there enough room for this character, if it expands to
296 * a backslash plus 3 octal digits (which is the most it can
297 * expand to), and also enough room for a terminating '\0'?
299 if (column+3+1 >= fmtbuf_len[idx]) {
301 * Double the buffer's size if it's not big enough.
302 * The size of the buffer starts at 128, so doubling its size
303 * adds at least another 128 bytes, which is more than enough
304 * for one more character plus a terminating '\0'.
306 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
307 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
312 fmtbuf[idx][column] = c;
314 } else if (isspace(c)) {
315 fmtbuf[idx][column] = ' ';
318 fmtbuf[idx][column] = '\\';
323 fmtbuf[idx][column] = 'a';
328 fmtbuf[idx][column] = 'b'; /* BS */
333 fmtbuf[idx][column] = 'f'; /* FF */
338 fmtbuf[idx][column] = 'n'; /* NL */
343 fmtbuf[idx][column] = 'r'; /* CR */
348 fmtbuf[idx][column] = 't'; /* tab */
353 fmtbuf[idx][column] = 'v';
359 fmtbuf[idx][column] = i + '0';
362 fmtbuf[idx][column] = i + '0';
365 fmtbuf[idx][column] = i + '0';
371 fmtbuf[idx][column] = '\0';
375 /* Max string length for displaying byte string. */
376 #define MAX_BYTE_STR_LEN 48
378 /* Turn an array of bytes into a string showing the bytes in hex. */
379 #define N_BYTES_TO_STR_STRINGS 6
381 bytes_to_str(const guint8 *bd, int bd_len) {
382 return bytes_to_str_punct(bd,bd_len,'\0');
385 /* Turn an array of bytes into a string showing the bytes in hex with
386 * punct as a bytes separator.
389 bytes_to_str_punct(const guint8 *bd, int bd_len, gchar punct) {
394 cur=ep_alloc(MAX_BYTE_STR_LEN+3+1);
396 len = MAX_BYTE_STR_LEN;
397 while (bd_len > 0 && len > 0) {
398 *p++ = hex[(*bd) >> 4];
399 *p++ = hex[(*bd) & 0xF];
403 if(punct && bd_len > 0){
409 /* Note that we're not showing the full string. */
419 is_byte_sep(guint8 c)
421 return (c == '-' || c == ':' || c == '.');
424 /* Turn a string of hex digits with optional separators (defined by
425 * is_byte_sep() into a byte array.
428 hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators) {
430 const guchar *p, *q, *punct;
434 g_byte_array_set_size(bytes, 0);
438 p = (const guchar *)hex_str;
441 if (*q && isxdigit(*p) && isxdigit(*q)) {
444 two_digits[2] = '\0';
447 * Two or more hex digits in a row.
448 * "strtoul()" will succeed, as it'll see at
449 * least one hex digit.
451 val = (guint8) strtoul(two_digits, NULL, 16);
452 g_byte_array_append(bytes, &val, 1);
456 * Make sure the character after
457 * the second hex digit is a byte
458 * separator, i.e. that we don't have
459 * more than two hex digits, or a
462 if (is_byte_sep(*punct)) {
466 else if (force_separators) {
474 else if (*q && isxdigit(*p) && is_byte_sep(*q)) {
479 * Only one hex digit.
480 * "strtoul()" will succeed, as it'll see that
483 val = (guint8) strtoul(one_digit, NULL, 16);
484 g_byte_array_append(bytes, &val, 1);
488 else if (!*q && isxdigit(*p)) {
493 * Only one hex digit.
494 * "strtoul()" will succeed, as it'll see that
497 val = (guint8) strtoul(one_digit, NULL, 16);
498 g_byte_array_append(bytes, &val, 1);
510 * Turn an RFC 3986 percent-encoded string into a byte array.
511 * XXX - We don't check for reserved characters.
513 #define HEX_DIGIT_BUF_LEN 3
515 uri_str_to_bytes(const char *uri_str, GByteArray *bytes) {
518 char hex_digit[HEX_DIGIT_BUF_LEN];
520 g_byte_array_set_size(bytes, 0);
528 if (! isascii(*p) || ! isprint(*p))
532 if (*p == '\0') return FALSE;
535 if (*p == '\0') return FALSE;
538 if (! isxdigit(hex_digit[0]) || ! isxdigit(hex_digit[1]))
540 val = (guint8) strtoul(hex_digit, NULL, 16);
541 g_byte_array_append(bytes, &val, 1);
543 g_byte_array_append(bytes, (guint8 *) p, 1);
552 * Given a GByteArray, generate a string from it that shows non-printable
553 * characters as percent-style escapes, and return a pointer to it.
556 format_uri(const GByteArray *bytes, const gchar *reserved_chars)
558 static gchar *fmtbuf[3];
559 static guint fmtbuf_len[3];
561 const gchar *reserved_def = ":/?#[]@!$&'()*+,;= ";
562 const gchar *reserved = reserved_def;
565 gboolean is_reserved = FALSE;
572 reserved = reserved_chars;
575 * Allocate the buffer if it's not already allocated.
577 if (fmtbuf[idx] == NULL) {
578 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
579 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
581 for (column = 0; column < bytes->len; column++) {
583 * Is there enough room for this character, if it expands to
584 * a percent plus 2 hex digits (which is the most it can
585 * expand to), and also enough room for a terminating '\0'?
587 if (column+2+1 >= fmtbuf_len[idx]) {
589 * Double the buffer's size if it's not big enough.
590 * The size of the buffer starts at 128, so doubling its size
591 * adds at least another 128 bytes, which is more than enough
592 * for one more character plus a terminating '\0'.
594 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
595 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
597 c = bytes->data[column];
599 if (!isascii(c) || !isprint(c) || c == '%') {
603 for (i = 0; i < strlen(reserved); i++) {
604 if (c == reserved[i])
609 fmtbuf[idx][column] = c;
611 fmtbuf[idx][column] = '%';
613 fmtbuf[idx][column] = hex[c >> 4];
615 fmtbuf[idx][column] = hex[c & 0xF];
618 fmtbuf[idx][column] = '\0';
623 * Create a copy of a GByteArray
625 * @param ba The byte array to be copied.
626 * @return If ba exists, a freshly allocated copy. NULL otherwise.
628 * XXX - Should this be in strutil.c?
631 byte_array_dup(GByteArray *ba) {
637 new_ba = g_byte_array_new();
638 g_byte_array_append(new_ba, ba->data, ba->len);
642 #define SUBID_BUF_LEN 5
644 oid_str_to_bytes(const char *oid_str, GByteArray *bytes) {
645 guint32 subid0, subid, sicnt, i;
647 guint8 buf[SUBID_BUF_LEN];
649 g_byte_array_set_size(bytes, 0);
655 if (!isdigit(*p) && (*p != '.')) return FALSE;
657 if (p == oid_str) return FALSE;
658 if (!*(p+1)) return FALSE;
659 if ((p-1) == dot) return FALSE;
664 if (!dot) return FALSE;
668 subid0 = 0; /* squelch GCC complaints */
671 while (isdigit(*p)) {
678 if (subid0 > 2) return FALSE;
679 } else if (sicnt == 1) {
680 if ((subid0 < 2) && (subid > 39)) return FALSE;
681 subid += 40 * subid0;
687 buf[i] = 0x80 | (subid % 0x80);
689 } while (subid && i);
690 buf[SUBID_BUF_LEN-1] &= 0x7F;
691 g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
701 * Compare the contents of two GByteArrays
703 * @param ba1 A byte array
704 * @param ba2 A byte array
705 * @return If both arrays are non-NULL and their lengths are equal and
706 * their contents are equal, returns TRUE. Otherwise, returns
709 * XXX - Should this be in strutil.c?
712 byte_array_equal(GByteArray *ba1, GByteArray *ba2) {
716 if (ba1->len != ba2->len)
719 if (memcmp(ba1->data, ba2->data, ba1->len) != 0)
726 /* Return a XML escaped representation of the unescaped string.
727 * The returned string must be freed when no longer in use. */
729 xml_escape(const gchar *unescaped)
731 GString *buffer = g_string_sized_new(128);
734 #if GLIB_MAJOR_VERSION < 2
739 while ( (c = *p++) ) {
742 g_string_append(buffer, "<");
745 g_string_append(buffer, ">");
748 g_string_append(buffer, "&");
751 g_string_append(buffer, "'");
754 g_string_append(buffer, """);
757 g_string_append_c(buffer, c);
761 #if GLIB_MAJOR_VERSION >= 2
762 /* Return the string value contained within the GString
763 * after getting rid of the GString structure.
764 * This is the way to do this, see the GLib reference. */
765 return g_string_free(buffer, FALSE);
767 /* But it's not the way to do it in GLib 1.2[.x], as
768 * 1.2[.x]'s "g_string_free()" doesn't return anything.
769 * This is the way to do this in GLib 1.2[.x]. */
771 g_string_free(buffer, FALSE);
777 /* Return the first occurrence of needle in haystack.
778 * If not found, return NULL.
779 * If either haystack or needle has 0 length, return NULL.
780 * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
782 epan_memmem(const guint8 *haystack, guint haystack_len,
783 const guint8 *needle, guint needle_len)
786 const guint8 *const last_possible
787 = haystack + haystack_len - needle_len;
789 if (needle_len == 0) {
793 if (needle_len > haystack_len) {
797 for (begin = haystack ; begin <= last_possible; ++begin) {
798 if (begin[0] == needle[0] &&
799 !memcmp(&begin[1], needle + 1,
809 * Scan the search string to make sure it's valid hex. Return the
810 * number of bytes in nbytes.
813 convert_string_to_hex(const char *string, size_t *nbytes)
818 guint8 *bytes, *q, byte_val;
827 continue; /* allow white space */
828 if (c==':' || c=='.' || c=='-')
829 continue; /* skip any ':', '.', or '-' between bytes */
831 /* Not a valid hex digit - fail */
836 * We can only match bytes, not nibbles; we must have a valid
837 * hex digit immediately after that hex digit.
843 /* 2 hex digits = 1 byte */
848 * Were we given any hex digits?
856 * OK, it's valid, and it generates "n_bytes" bytes; generate the
859 bytes = g_malloc(n_bytes);
867 continue; /* allow white space */
868 if (c==':' || c=='.' || c=='-')
869 continue; /* skip any ':', '.', or '-' between bytes */
870 /* From the loop above, we know this is a hex digit */
874 byte_val = (c - 'a') + 10;
876 byte_val = (c - 'A') + 10;
879 /* We also know this is a hex digit */
884 byte_val |= (c - 'a') + 10;
886 byte_val |= (c - 'A') + 10;
895 * Copy if if it's a case-sensitive search; uppercase it if it's
896 * a case-insensitive search.
899 convert_string_case(const char *string, gboolean case_insensitive)
906 if (case_insensitive) {
907 out_string = g_malloc(strlen(string) + 1);
908 for (p = &string[0], q = &out_string[0]; (c = *p) != '\0'; p++, q++)
909 *q = toupper((unsigned char)*p);
912 out_string = g_strdup(string);
916 /* g_strlcat() does not exist in GLib 1.2[.x] */
917 #if GLIB_MAJOR_VERSION < 2
919 g_strlcat(gchar *dst, gchar *src, gsize size)
925 g_snprintf(dst+strl, size-strl, "%s", src);