epan/strutil.c

   1 /* strutil.c
   2  * String utility routines
   3  *
   4  * $Id: strutil.c,v 1.18 2004/02/05 09:42:26 guy Exp $
   5  *
   6  * Ethereal - Network traffic analyzer
   7  * By Gerald Combs <gerald@ethereal.com>
   8  * Copyright 1998 Gerald Combs
   9  *
  10  * This program is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU General Public License
  12  * as published by the Free Software Foundation; either version 2
  13  * of the License, or (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  23  */
  24
  25 #ifdef HAVE_CONFIG_H
  26 # include "config.h"
  27 #endif
  28
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <ctype.h>
  32 #include <glib.h>
  33 #include "strutil.h"
  34
  35
  36 /*
  37  * Given a pointer into a data buffer, and to the end of the buffer,
  38  * find the end of the (putative) line at that position in the data
  39  * buffer.
  40  * Return a pointer to the EOL character(s) in "*eol".
  41  */
  42 const guchar *
  43 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
  44 {
  45   const guchar *lineend;
  46
  47   lineend = memchr(data, '\n', dataend - data);
  48   if (lineend == NULL) {
  49     /*
  50      * No LF - line is probably continued in next TCP segment.
  51      */
  52     lineend = dataend;
  53     *eol = dataend;
  54   } else {
  55     /*
  56      * Is the LF at the beginning of the line?
  57      */
  58     if (lineend > data) {
  59       /*
  60        * No - is it preceded by a carriage return?
  61        * (Perhaps it's supposed to be, but that's not guaranteed....)
  62        */
  63       if (*(lineend - 1) == '\r') {
  64         /*
  65          * Yes.  The EOL starts with the CR.
  66          */
  67         *eol = lineend - 1;
  68       } else {
  69         /*
  70          * No.  The EOL starts with the LF.
  71          */
  72         *eol = lineend;
  73
  74         /*
  75          * I seem to remember that we once saw lines ending with LF-CR
  76          * in an HTTP request or response, so check if it's *followed*
  77          * by a carriage return.
  78          */
  79         if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
  80           /*
  81            * It's <non-LF><LF><CR>; say it ends with the CR.
  82            */
  83           lineend++;
  84         }
  85       }
  86     } else {
  87       /*
  88        * Yes - the EOL starts with the LF.
  89        */
  90       *eol = lineend;
  91     }
  92
  93     /*
  94      * Point to the character after the last character.
  95      */
  96     lineend++;
  97   }
  98   return lineend;
  99 }
 100
 101 /*
 102  * Get the length of the next token in a line, and the beginning of the
 103  * next token after that (if any).
 104  * Return 0 if there is no next token.
 105  */
 106 int
 107 get_token_len(const guchar *linep, const guchar *lineend,
 108               const guchar **next_token)
 109 {
 110   const guchar *tokenp;
 111   int token_len;
 112
 113   tokenp = linep;
 114
 115   /*
 116    * Search for a blank, a CR or an LF, or the end of the buffer.
 117    */
 118   while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
 119       linep++;
 120   token_len = linep - tokenp;
 121
 122   /*
 123    * Skip trailing blanks.
 124    */
 125   while (linep < lineend && *linep == ' ')
 126     linep++;
 127
 128   *next_token = linep;
 129
 130   return token_len;
 131 }
 132
 133
 134 #define INITIAL_FMTBUF_SIZE     128
 135
 136 #if GLIB_MAJOR_VERSION >= 2
 137 /*
 138  * XXX - "isprint()" can return "true" for non-ASCII characters, but
 139  * those don't work with GTK+ 1.3 or later, as they take UTF-8 strings
 140  * as input.  Until we fix up Ethereal to properly handle non-ASCII
 141  * characters in all output (both GUI displays and text printouts)
 142  * in those versions of GTK+, we work around the problem by escaping
 143  * all characters that aren't printable ASCII.
 144  *
 145  * We don't know what version of GTK+ we're using, as epan doesn't
 146  * use any GTK+ stuff; we use GLib as a proxy for that, with GLib 2.x
 147  * implying GTK+ 1.3 or later (we don't support GLib 1.3[.x]).
 148  */
 149 #undef isprint
 150 #define isprint(c) (c >= 0x20 && c < 0x7f)
 151 #endif
 152
 153 /*
 154  * Given a string, generate a string from it that shows non-printable
 155  * characters as C-style escapes, and return a pointer to it.
 156  */
 157 gchar *
 158 format_text(const guchar *string, int len)
 159 {
 160   static gchar *fmtbuf;
 161   static int fmtbuf_len;
 162   int column;
 163   const guchar *stringend = string + len;
 164   guchar c;
 165   int i;
 166
 167   /*
 168    * Allocate the buffer if it's not already allocated.
 169    */
 170   if (fmtbuf == NULL) {
 171     fmtbuf = g_malloc(INITIAL_FMTBUF_SIZE);
 172     fmtbuf_len = INITIAL_FMTBUF_SIZE;
 173   }
 174   column = 0;
 175   while (string < stringend) {
 176     /*
 177      * Is there enough room for this character, if it expands to
 178      * a backslash plus 3 octal digits (which is the most it can
 179      * expand to), and also enough room for a terminating '\0'?
 180      */
 181     if (column+3+1 >= fmtbuf_len) {
 182       /*
 183        * Double the buffer's size if it's not big enough.
 184        * The size of the buffer starts at 128, so doubling its size
 185        * adds at least another 128 bytes, which is more than enough
 186        * for one more character plus a terminating '\0'.
 187        */
 188       fmtbuf_len = fmtbuf_len * 2;
 189       fmtbuf = g_realloc(fmtbuf, fmtbuf_len);
 190     }
 191     c = *string++;
 192
 193     if (isprint(c)) {
 194       fmtbuf[column] = c;
 195       column++;
 196     } else {
 197       fmtbuf[column] =  '\\';
 198       column++;
 199       switch (c) {
 200
 201       case '\\':
 202         fmtbuf[column] = '\\';
 203         column++;
 204         break;
 205
 206       case '\a':
 207         fmtbuf[column] = 'a';
 208         column++;
 209         break;
 210
 211       case '\b':
 212         fmtbuf[column] = 'b';
 213         column++;
 214         break;
 215
 216       case '\f':
 217         fmtbuf[column] = 'f';
 218         column++;
 219         break;
 220
 221       case '\n':
 222         fmtbuf[column] = 'n';
 223         column++;
 224         break;
 225
 226       case '\r':
 227         fmtbuf[column] = 'r';
 228         column++;
 229         break;
 230
 231       case '\t':
 232         fmtbuf[column] = 't';
 233         column++;
 234         break;
 235
 236       case '\v':
 237         fmtbuf[column] = 'v';
 238         column++;
 239         break;
 240
 241       default:
 242         i = (c>>6)&03;
 243         fmtbuf[column] = i + '0';
 244         column++;
 245         i = (c>>3)&07;
 246         fmtbuf[column] = i + '0';
 247         column++;
 248         i = (c>>0)&07;
 249         fmtbuf[column] = i + '0';
 250         column++;
 251         break;
 252       }
 253     }
 254   }
 255   fmtbuf[column] = '\0';
 256   return fmtbuf;
 257 }
 258
 259 /* Max string length for displaying byte string.  */
 260 #define MAX_BYTE_STR_LEN        32
 261
 262 /* Turn an array of bytes into a string showing the bytes in hex. */
 263 #define N_BYTES_TO_STR_STRINGS  6
 264 gchar *
 265 bytes_to_str(const guint8 *bd, int bd_len) {
 266   return bytes_to_str_punct(bd,bd_len,'\0');
 267 }
 268
 269 /* Turn an array of bytes into a string showing the bytes in hex with
 270  * punct as a bytes separator.
 271  */
 272 gchar *
 273 bytes_to_str_punct(const guint8 *bd, int bd_len, gchar punct) {
 274   static gchar  str[N_BYTES_TO_STR_STRINGS][MAX_BYTE_STR_LEN+3+1];
 275   static int    cur_idx;
 276   gchar        *cur;
 277   gchar        *p;
 278   int           len;
 279   static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 280                                 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 281
 282   cur_idx++;
 283   if (cur_idx >= N_BYTES_TO_STR_STRINGS)
 284     cur_idx = 0;
 285   cur = &str[cur_idx][0];
 286   p = cur;
 287   len = MAX_BYTE_STR_LEN;
 288   while (bd_len > 0 && len > 0) {
 289     *p++ = hex[(*bd) >> 4];
 290     *p++ = hex[(*bd) & 0xF];
 291     len -= 2;
 292     bd++;
 293     bd_len--;
 294     if(punct && bd_len > 0){
 295       *p++ = punct;
 296       len--;
 297     }
 298   }
 299   if (bd_len != 0) {
 300     /* Note that we're not showing the full string.  */
 301     *p++ = '.';
 302     *p++ = '.';
 303     *p++ = '.';
 304   }
 305   *p = '\0';
 306   return cur;
 307 }
 308
 309 static gboolean
 310 is_byte_sep(guint8 c)
 311 {
 312         return (c == '-' || c == ':' || c == '.');
 313 }
 314
 315 /* Turn a string of hex digits with optional separators (defined by
 316  * is_byte_sep() into a byte array.
 317  */
 318 gboolean
 319 hex_str_to_bytes(const char *hex_str, GByteArray *bytes) {
 320         guint8          val;
 321         const guchar    *p, *q, *punct;
 322         char            two_digits[3];
 323         char            one_digit[2];
 324
 325         g_byte_array_set_size(bytes, 0);
 326         p = (const guchar *)hex_str;
 327         while (*p) {
 328                 q = p+1;
 329                 if (*q && isxdigit(*p) && isxdigit(*q)) {
 330                         two_digits[0] = *p;
 331                         two_digits[1] = *q;
 332                         two_digits[2] = '\0';
 333
 334                         /*
 335                          * Two or more hex digits in a row.
 336                          * "strtoul()" will succeed, as it'll see at
 337                          * least one hex digit.
 338                          */
 339                         val = (guint8) strtoul(two_digits, NULL, 16);
 340                         g_byte_array_append(bytes, &val, 1);
 341                         punct = q + 1;
 342                         if (*punct) {
 343                                 /*
 344                                  * Make sure the character after
 345                                  * the second hex digit is a byte
 346                                  * separator, i.e. that we don't have
 347                                  * more than two hex digits, or a
 348                                  * bogus character.
 349                                  */
 350                                 if (is_byte_sep(*punct)) {
 351                                         p = punct + 1;
 352                                         continue;
 353                                 }
 354                                 else {
 355                                         return FALSE;
 356                                         break;
 357                                 }
 358                         }
 359                         else {
 360                                 p = punct;
 361                                 continue;
 362                         }
 363                 }
 364                 else if (*q && isxdigit(*p) && is_byte_sep(*q)) {
 365                         one_digit[0] = *p;
 366                         one_digit[1] = '\0';
 367
 368                         /*
 369                          * Only one hex digit.
 370                          * "strtoul()" will succeed, as it'll see that
 371                          * hex digit.
 372                          */
 373                         val = (guint8) strtoul(one_digit, NULL, 16);
 374                         g_byte_array_append(bytes, &val, 1);
 375                         p = q + 1;
 376                         continue;
 377                 }
 378                 else if (!*q && isxdigit(*p)) {
 379                         one_digit[0] = *p;
 380                         one_digit[1] = '\0';
 381
 382                         /*
 383                          * Only one hex digit.
 384                          * "strtoul()" will succeed, as it'll see that
 385                          * hex digit.
 386                          */
 387                         val = (guint8) strtoul(one_digit, NULL, 16);
 388                         g_byte_array_append(bytes, &val, 1);
 389                         p = q;
 390                         continue;
 391                 }
 392                 else {
 393                         return FALSE;
 394                 }
 395         }
 396         return TRUE;
 397 }
 398
 399
 400 /* Return the first occurrence of needle in haystack.
 401  * If not found, return NULL.
 402  * If either haystack or needle has 0 length, return NULL.
 403  * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
 404 const guint8 *
 405 epan_memmem(const guint8 *haystack, guint haystack_len,
 406                 const guint8 *needle, guint needle_len)
 407 {
 408         const guint8 *begin;
 409         const guint8 *const last_possible
 410                 = haystack + haystack_len - needle_len;
 411
 412         if (needle_len == 0) {
 413                 return NULL;
 414         }
 415
 416         if (needle_len > haystack_len) {
 417                 return NULL;
 418         }
 419
 420         for (begin = haystack ; begin <= last_possible; ++begin) {
 421                 if (begin[0] == needle[0] &&
 422                         !memcmp(&begin[1], needle + 1,
 423                                 needle_len - 1)) {
 424                         return begin;
 425                 }
 426         }
 427
 428         return NULL;
 429 }