2 * String utility routines
4 * $Id: strutil.c,v 1.18 2004/02/05 09:42:26 guy Exp $
6 * Ethereal - Network traffic analyzer
7 * By Gerald Combs <gerald@ethereal.com>
8 * Copyright 1998 Gerald Combs
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
37 * Given a pointer into a data buffer, and to the end of the buffer,
38 * find the end of the (putative) line at that position in the data
40 * Return a pointer to the EOL character(s) in "*eol".
43 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
45 const guchar *lineend;
47 lineend = memchr(data, '\n', dataend - data);
48 if (lineend == NULL) {
50 * No LF - line is probably continued in next TCP segment.
56 * Is the LF at the beginning of the line?
60 * No - is it preceded by a carriage return?
61 * (Perhaps it's supposed to be, but that's not guaranteed....)
63 if (*(lineend - 1) == '\r') {
65 * Yes. The EOL starts with the CR.
70 * No. The EOL starts with the LF.
75 * I seem to remember that we once saw lines ending with LF-CR
76 * in an HTTP request or response, so check if it's *followed*
77 * by a carriage return.
79 if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
81 * It's <non-LF><LF><CR>; say it ends with the CR.
88 * Yes - the EOL starts with the LF.
94 * Point to the character after the last character.
102 * Get the length of the next token in a line, and the beginning of the
103 * next token after that (if any).
104 * Return 0 if there is no next token.
107 get_token_len(const guchar *linep, const guchar *lineend,
108 const guchar **next_token)
110 const guchar *tokenp;
116 * Search for a blank, a CR or an LF, or the end of the buffer.
118 while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
120 token_len = linep - tokenp;
123 * Skip trailing blanks.
125 while (linep < lineend && *linep == ' ')
134 #define INITIAL_FMTBUF_SIZE 128
136 #if GLIB_MAJOR_VERSION >= 2
138 * XXX - "isprint()" can return "true" for non-ASCII characters, but
139 * those don't work with GTK+ 1.3 or later, as they take UTF-8 strings
140 * as input. Until we fix up Ethereal to properly handle non-ASCII
141 * characters in all output (both GUI displays and text printouts)
142 * in those versions of GTK+, we work around the problem by escaping
143 * all characters that aren't printable ASCII.
145 * We don't know what version of GTK+ we're using, as epan doesn't
146 * use any GTK+ stuff; we use GLib as a proxy for that, with GLib 2.x
147 * implying GTK+ 1.3 or later (we don't support GLib 1.3[.x]).
150 #define isprint(c) (c >= 0x20 && c < 0x7f)
154 * Given a string, generate a string from it that shows non-printable
155 * characters as C-style escapes, and return a pointer to it.
158 format_text(const guchar *string, int len)
160 static gchar *fmtbuf;
161 static int fmtbuf_len;
163 const guchar *stringend = string + len;
168 * Allocate the buffer if it's not already allocated.
170 if (fmtbuf == NULL) {
171 fmtbuf = g_malloc(INITIAL_FMTBUF_SIZE);
172 fmtbuf_len = INITIAL_FMTBUF_SIZE;
175 while (string < stringend) {
177 * Is there enough room for this character, if it expands to
178 * a backslash plus 3 octal digits (which is the most it can
179 * expand to), and also enough room for a terminating '\0'?
181 if (column+3+1 >= fmtbuf_len) {
183 * Double the buffer's size if it's not big enough.
184 * The size of the buffer starts at 128, so doubling its size
185 * adds at least another 128 bytes, which is more than enough
186 * for one more character plus a terminating '\0'.
188 fmtbuf_len = fmtbuf_len * 2;
189 fmtbuf = g_realloc(fmtbuf, fmtbuf_len);
197 fmtbuf[column] = '\\';
202 fmtbuf[column] = '\\';
207 fmtbuf[column] = 'a';
212 fmtbuf[column] = 'b';
217 fmtbuf[column] = 'f';
222 fmtbuf[column] = 'n';
227 fmtbuf[column] = 'r';
232 fmtbuf[column] = 't';
237 fmtbuf[column] = 'v';
243 fmtbuf[column] = i + '0';
246 fmtbuf[column] = i + '0';
249 fmtbuf[column] = i + '0';
255 fmtbuf[column] = '\0';
259 /* Max string length for displaying byte string. */
260 #define MAX_BYTE_STR_LEN 32
262 /* Turn an array of bytes into a string showing the bytes in hex. */
263 #define N_BYTES_TO_STR_STRINGS 6
265 bytes_to_str(const guint8 *bd, int bd_len) {
266 return bytes_to_str_punct(bd,bd_len,'\0');
269 /* Turn an array of bytes into a string showing the bytes in hex with
270 * punct as a bytes separator.
273 bytes_to_str_punct(const guint8 *bd, int bd_len, gchar punct) {
274 static gchar str[N_BYTES_TO_STR_STRINGS][MAX_BYTE_STR_LEN+3+1];
279 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
280 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
283 if (cur_idx >= N_BYTES_TO_STR_STRINGS)
285 cur = &str[cur_idx][0];
287 len = MAX_BYTE_STR_LEN;
288 while (bd_len > 0 && len > 0) {
289 *p++ = hex[(*bd) >> 4];
290 *p++ = hex[(*bd) & 0xF];
294 if(punct && bd_len > 0){
300 /* Note that we're not showing the full string. */
310 is_byte_sep(guint8 c)
312 return (c == '-' || c == ':' || c == '.');
315 /* Turn a string of hex digits with optional separators (defined by
316 * is_byte_sep() into a byte array.
319 hex_str_to_bytes(const char *hex_str, GByteArray *bytes) {
321 const guchar *p, *q, *punct;
325 g_byte_array_set_size(bytes, 0);
326 p = (const guchar *)hex_str;
329 if (*q && isxdigit(*p) && isxdigit(*q)) {
332 two_digits[2] = '\0';
335 * Two or more hex digits in a row.
336 * "strtoul()" will succeed, as it'll see at
337 * least one hex digit.
339 val = (guint8) strtoul(two_digits, NULL, 16);
340 g_byte_array_append(bytes, &val, 1);
344 * Make sure the character after
345 * the second hex digit is a byte
346 * separator, i.e. that we don't have
347 * more than two hex digits, or a
350 if (is_byte_sep(*punct)) {
364 else if (*q && isxdigit(*p) && is_byte_sep(*q)) {
369 * Only one hex digit.
370 * "strtoul()" will succeed, as it'll see that
373 val = (guint8) strtoul(one_digit, NULL, 16);
374 g_byte_array_append(bytes, &val, 1);
378 else if (!*q && isxdigit(*p)) {
383 * Only one hex digit.
384 * "strtoul()" will succeed, as it'll see that
387 val = (guint8) strtoul(one_digit, NULL, 16);
388 g_byte_array_append(bytes, &val, 1);
400 /* Return the first occurrence of needle in haystack.
401 * If not found, return NULL.
402 * If either haystack or needle has 0 length, return NULL.
403 * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
405 epan_memmem(const guint8 *haystack, guint haystack_len,
406 const guint8 *needle, guint needle_len)
409 const guint8 *const last_possible
410 = haystack + haystack_len - needle_len;
412 if (needle_len == 0) {
416 if (needle_len > haystack_len) {
420 for (begin = haystack ; begin <= last_possible; ++begin) {
421 if (begin[0] == needle[0] &&
422 !memcmp(&begin[1], needle + 1,