2 * String utility routines
6 * Ethereal - Network traffic analyzer
7 * By Gerald Combs <gerald@ethereal.com>
8 * Copyright 1998 Gerald Combs
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
43 * Given a pointer into a data buffer, and to the end of the buffer,
44 * find the end of the (putative) line at that position in the data
46 * Return a pointer to the EOL character(s) in "*eol".
49 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
51 const guchar *lineend;
53 lineend = memchr(data, '\n', dataend - data);
54 if (lineend == NULL) {
56 * No LF - line is probably continued in next TCP segment.
62 * Is the LF at the beginning of the line?
66 * No - is it preceded by a carriage return?
67 * (Perhaps it's supposed to be, but that's not guaranteed....)
69 if (*(lineend - 1) == '\r') {
71 * Yes. The EOL starts with the CR.
76 * No. The EOL starts with the LF.
81 * I seem to remember that we once saw lines ending with LF-CR
82 * in an HTTP request or response, so check if it's *followed*
83 * by a carriage return.
85 if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
87 * It's <non-LF><LF><CR>; say it ends with the CR.
94 * Yes - the EOL starts with the LF.
100 * Point to the character after the last character.
108 * Get the length of the next token in a line, and the beginning of the
109 * next token after that (if any).
110 * Return 0 if there is no next token.
113 get_token_len(const guchar *linep, const guchar *lineend,
114 const guchar **next_token)
116 const guchar *tokenp;
122 * Search for a blank, a CR or an LF, or the end of the buffer.
124 while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
126 token_len = linep - tokenp;
129 * Skip trailing blanks.
131 while (linep < lineend && *linep == ' ')
140 #define INITIAL_FMTBUF_SIZE 128
142 #if GLIB_MAJOR_VERSION >= 2
144 * XXX - "isprint()" can return "true" for non-ASCII characters, but
145 * those don't work with GTK+ 1.3 or later, as they take UTF-8 strings
146 * as input. Until we fix up Ethereal to properly handle non-ASCII
147 * characters in all output (both GUI displays and text printouts)
148 * in those versions of GTK+, we work around the problem by escaping
149 * all characters that aren't printable ASCII.
151 * We don't know what version of GTK+ we're using, as epan doesn't
152 * use any GTK+ stuff; we use GLib as a proxy for that, with GLib 2.x
153 * implying GTK+ 1.3 or later (we don't support GLib 1.3[.x]).
156 #define isprint(c) (c >= 0x20 && c < 0x7f)
160 * Given a string, generate a string from it that shows non-printable
161 * characters as C-style escapes, and return a pointer to it.
164 format_text(const guchar *string, int len)
166 static gchar *fmtbuf[3];
167 static int fmtbuf_len[3];
170 const guchar *stringend = string + len;
177 * Allocate the buffer if it's not already allocated.
179 if (fmtbuf[idx] == NULL) {
180 fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
181 fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
184 while (string < stringend) {
186 * Is there enough room for this character, if it expands to
187 * a backslash plus 3 octal digits (which is the most it can
188 * expand to), and also enough room for a terminating '\0'?
190 if (column+3+1 >= fmtbuf_len[idx]) {
192 * Double the buffer's size if it's not big enough.
193 * The size of the buffer starts at 128, so doubling its size
194 * adds at least another 128 bytes, which is more than enough
195 * for one more character plus a terminating '\0'.
197 fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
198 fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
203 fmtbuf[idx][column] = c;
206 fmtbuf[idx][column] = '\\';
211 fmtbuf[idx][column] = 'a';
216 fmtbuf[idx][column] = 'b';
221 fmtbuf[idx][column] = 'f';
226 fmtbuf[idx][column] = 'n';
231 fmtbuf[idx][column] = 'r';
236 fmtbuf[idx][column] = 't';
241 fmtbuf[idx][column] = 'v';
247 fmtbuf[idx][column] = i + '0';
250 fmtbuf[idx][column] = i + '0';
253 fmtbuf[idx][column] = i + '0';
259 fmtbuf[idx][column] = '\0';
263 /* Max string length for displaying byte string. */
264 #define MAX_BYTE_STR_LEN 48
266 /* Turn an array of bytes into a string showing the bytes in hex. */
267 #define N_BYTES_TO_STR_STRINGS 6
269 bytes_to_str(const guint8 *bd, int bd_len) {
270 return bytes_to_str_punct(bd,bd_len,'\0');
273 /* Turn an array of bytes into a string showing the bytes in hex with
274 * punct as a bytes separator.
277 bytes_to_str_punct(const guint8 *bd, int bd_len, gchar punct) {
281 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
282 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
284 cur=ep_alloc(MAX_BYTE_STR_LEN+3+1);
286 len = MAX_BYTE_STR_LEN;
287 while (bd_len > 0 && len > 0) {
288 *p++ = hex[(*bd) >> 4];
289 *p++ = hex[(*bd) & 0xF];
293 if(punct && bd_len > 0){
299 /* Note that we're not showing the full string. */
309 is_byte_sep(guint8 c)
311 return (c == '-' || c == ':' || c == '.');
314 /* Turn a string of hex digits with optional separators (defined by
315 * is_byte_sep() into a byte array.
318 hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators) {
320 const guchar *p, *q, *punct;
324 g_byte_array_set_size(bytes, 0);
325 p = (const guchar *)hex_str;
328 if (*q && isxdigit(*p) && isxdigit(*q)) {
331 two_digits[2] = '\0';
334 * Two or more hex digits in a row.
335 * "strtoul()" will succeed, as it'll see at
336 * least one hex digit.
338 val = (guint8) strtoul(two_digits, NULL, 16);
339 g_byte_array_append(bytes, &val, 1);
343 * Make sure the character after
344 * the second hex digit is a byte
345 * separator, i.e. that we don't have
346 * more than two hex digits, or a
349 if (is_byte_sep(*punct)) {
353 else if (force_separators) {
361 else if (*q && isxdigit(*p) && is_byte_sep(*q)) {
366 * Only one hex digit.
367 * "strtoul()" will succeed, as it'll see that
370 val = (guint8) strtoul(one_digit, NULL, 16);
371 g_byte_array_append(bytes, &val, 1);
375 else if (!*q && isxdigit(*p)) {
380 * Only one hex digit.
381 * "strtoul()" will succeed, as it'll see that
384 val = (guint8) strtoul(one_digit, NULL, 16);
385 g_byte_array_append(bytes, &val, 1);
396 #define SUBID_BUF_LEN 5
398 oid_str_to_bytes(const char *oid_str, GByteArray *bytes) {
399 guint32 subid0, subid, sicnt, i;
401 guint8 buf[SUBID_BUF_LEN];
403 g_byte_array_set_size(bytes, 0);
409 if (!isdigit(*p) && (*p != '.')) return FALSE;
411 if (p == oid_str) return FALSE;
412 if (!*(p+1)) return FALSE;
413 if ((p-1) == dot) return FALSE;
418 if (!dot) return FALSE;
422 subid0 = 0; /* squelch GCC complaints */
425 while (isdigit(*p)) {
432 if (subid0 > 2) return FALSE;
433 } else if (sicnt == 1) {
434 if ((subid0 < 2) && (subid > 39)) return FALSE;
435 subid += 40 * subid0;
441 buf[i] = 0x80 | (subid % 0x80);
443 } while (subid && i);
444 buf[SUBID_BUF_LEN-1] &= 0x7F;
445 g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
455 /* Return a XML escaped representation of the unescaped string.
456 * The returned string must be freed when no longer in use. */
458 xml_escape(const gchar *unescaped)
460 GString *buffer = g_string_sized_new(128);
463 #if GLIB_MAJOR_VERSION < 2
468 while ( (c = *p++) ) {
471 g_string_append(buffer, "<");
474 g_string_append(buffer, ">");
477 g_string_append(buffer, "&");
480 g_string_append(buffer, "'");
483 g_string_append(buffer, """);
486 g_string_append_c(buffer, c);
490 #if GLIB_MAJOR_VERSION >= 2
491 /* Return the string value contained within the GString
492 * after getting rid of the GString structure.
493 * This is the way to do this, see the GLib reference. */
494 return g_string_free(buffer, FALSE);
496 /* But it's not the way to do it in GLib 1.2[.x], as
497 * 1.2[.x]'s "g_string_free()" doesn't return anything.
498 * This is the way to do this in GLib 1.2[.x]. */
500 g_string_free(buffer, FALSE);
506 /* Return the first occurrence of needle in haystack.
507 * If not found, return NULL.
508 * If either haystack or needle has 0 length, return NULL.
509 * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
511 epan_memmem(const guint8 *haystack, guint haystack_len,
512 const guint8 *needle, guint needle_len)
515 const guint8 *const last_possible
516 = haystack + haystack_len - needle_len;
518 if (needle_len == 0) {
522 if (needle_len > haystack_len) {
526 for (begin = haystack ; begin <= last_possible; ++begin) {
527 if (begin[0] == needle[0] &&
528 !memcmp(&begin[1], needle + 1,
538 * Scan the search string to make sure it's valid hex. Return the
539 * number of bytes in nbytes.
542 convert_string_to_hex(const char *string, size_t *nbytes)
547 guint8 *bytes, *q, byte_val;
556 continue; /* allow white space */
557 if (c==':' || c=='.' || c=='-')
558 continue; /* skip any ':', '.', or '-' between bytes */
560 /* Not a valid hex digit - fail */
565 * We can only match bytes, not nibbles; we must have a valid
566 * hex digit immediately after that hex digit.
572 /* 2 hex digits = 1 byte */
577 * Were we given any hex digits?
585 * OK, it's valid, and it generates "n_bytes" bytes; generate the
588 bytes = g_malloc(n_bytes);
596 continue; /* allow white space */
597 if (c==':' || c=='.' || c=='-')
598 continue; /* skip any ':', '.', or '-' between bytes */
599 /* From the loop above, we know this is a hex digit */
603 byte_val = (c - 'a') + 10;
605 byte_val = (c - 'A') + 10;
608 /* We also know this is a hex digit */
613 byte_val |= (c - 'a') + 10;
615 byte_val |= (c - 'A') + 10;
624 * Copy if if it's a case-sensitive search; uppercase it if it's
625 * a case-insensitive search.
628 convert_string_case(const char *string, gboolean case_insensitive)
635 if (case_insensitive) {
636 out_string = g_malloc(strlen(string) + 1);
637 for (p = &string[0], q = &out_string[0]; (c = *p) != '\0'; p++, q++)
638 *q = toupper((unsigned char)*p);
641 out_string = g_strdup(string);
645 /* g_strlcat() does not exist in GLib 1.2[.x] */
646 #if GLIB_MAJOR_VERSION < 2
648 g_strlcat(gchar *dst, gchar *src, gsize size)
654 g_snprintf(dst+strl, size-strl, "%s", src);
663 * XXX - Should we use g_utf8_to_utf16() and g_utf16_to_utf8()
664 * instead? The goal of the functions below was to provide simple
665 * wrappers for UTF-8 <-> UTF-16 conversion without making the
666 * caller worry about freeing up memory afterward.
669 /* Convert from UTF-8 to UTF-16. */
670 wchar_t * utf_8to16(const char *utf8str) {
671 static wchar_t *utf16buf[3];
672 static int utf16buf_len[3];
681 * Allocate the buffer if it's not already allocated.
683 if (utf16buf[idx] == NULL) {
684 utf16buf_len[idx] = INITIAL_FMTBUF_SIZE;
685 utf16buf[idx] = g_malloc(utf16buf_len[idx] * sizeof(wchar_t));
688 while (MultiByteToWideChar(CP_UTF8, 0, utf8str,
689 -1, NULL, 0) >= utf16buf_len[idx]) {
691 * Double the buffer's size if it's not big enough.
692 * The size of the buffer starts at 128, so doubling its size
693 * adds at least another 128 bytes, which is more than enough
694 * for one more character plus a terminating '\0'.
696 utf16buf_len[idx] *= 2;
697 utf16buf[idx] = g_realloc(utf16buf[idx], utf16buf_len[idx]);
700 if (MultiByteToWideChar(CP_UTF8, 0, utf8str,
701 -1, utf16buf[idx], utf16buf_len[idx]) == 0)
704 return utf16buf[idx];
707 /* Convert from UTF-16 to UTF-8. */
708 gchar * utf_16to8(const wchar_t *utf16str) {
709 static gchar *utf8buf[3];
710 static int utf8buf_len[3];
713 if (utf16str == NULL)
719 * Allocate the buffer if it's not already allocated.
721 if (utf8buf[idx] == NULL) {
722 utf8buf_len[idx] = INITIAL_FMTBUF_SIZE;
723 utf8buf[idx] = g_malloc(utf8buf_len[idx]);
726 while (WideCharToMultiByte(CP_UTF8, 0, utf16str, -1,
727 NULL, 0, NULL, NULL) >= utf8buf_len[idx]) {
729 * Double the buffer's size if it's not big enough.
730 * The size of the buffer starts at 128, so doubling its size
731 * adds at least another 128 bytes, which is more than enough
732 * for one more character plus a terminating '\0'.
734 utf8buf_len[idx] *= 2;
735 utf8buf[idx] = g_realloc(utf8buf[idx], utf8buf_len[idx]);
738 if (WideCharToMultiByte(CP_UTF8, 0, utf16str, -1,
739 utf8buf[idx], utf8buf_len[idx], NULL, NULL) == 0)