epan/strutil.c

   1 /* strutil.c
   2  * String utility routines
   3  *
   4  * $Id$
   5  *
   6  * Wireshark - Network traffic analyzer
   7  * By Gerald Combs <gerald@wireshark.org>
   8  * Copyright 1998 Gerald Combs
   9  *
  10  * This program is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU General Public License
  12  * as published by the Free Software Foundation; either version 2
  13  * of the License, or (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  23  */
  24
  25 #ifdef HAVE_CONFIG_H
  26 # include "config.h"
  27 #endif
  28
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <ctype.h>
  32 #include <glib.h>
  33 #include "strutil.h"
  34 #include "emem.h"
  35
  36 #ifdef _WIN32
  37 #include <windows.h>
  38 #include <tchar.h>
  39 #include <wchar.h>
  40 #endif
  41
  42 /*
  43  * Given a pointer into a data buffer, and to the end of the buffer,
  44  * find the end of the (putative) line at that position in the data
  45  * buffer.
  46  * Return a pointer to the EOL character(s) in "*eol".
  47  */
  48 const guchar *
  49 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
  50 {
  51   const guchar *lineend;
  52
  53   lineend = memchr(data, '\n', dataend - data);
  54   if (lineend == NULL) {
  55     /*
  56      * No LF - line is probably continued in next TCP segment.
  57      */
  58     lineend = dataend;
  59     *eol = dataend;
  60   } else {
  61     /*
  62      * Is the LF at the beginning of the line?
  63      */
  64     if (lineend > data) {
  65       /*
  66        * No - is it preceded by a carriage return?
  67        * (Perhaps it's supposed to be, but that's not guaranteed....)
  68        */
  69       if (*(lineend - 1) == '\r') {
  70         /*
  71          * Yes.  The EOL starts with the CR.
  72          */
  73         *eol = lineend - 1;
  74       } else {
  75         /*
  76          * No.  The EOL starts with the LF.
  77          */
  78         *eol = lineend;
  79
  80         /*
  81          * I seem to remember that we once saw lines ending with LF-CR
  82          * in an HTTP request or response, so check if it's *followed*
  83          * by a carriage return.
  84          */
  85         if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
  86           /*
  87            * It's <non-LF><LF><CR>; say it ends with the CR.
  88            */
  89           lineend++;
  90         }
  91       }
  92     } else {
  93       /*
  94        * Yes - the EOL starts with the LF.
  95        */
  96       *eol = lineend;
  97     }
  98
  99     /*
 100      * Point to the character after the last character.
 101      */
 102     lineend++;
 103   }
 104   return lineend;
 105 }
 106
 107 /*
 108  * Get the length of the next token in a line, and the beginning of the
 109  * next token after that (if any).
 110  * Return 0 if there is no next token.
 111  */
 112 int
 113 get_token_len(const guchar *linep, const guchar *lineend,
 114               const guchar **next_token)
 115 {
 116   const guchar *tokenp;
 117   int token_len;
 118
 119   tokenp = linep;
 120
 121   /*
 122    * Search for a blank, a CR or an LF, or the end of the buffer.
 123    */
 124   while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
 125       linep++;
 126   token_len = linep - tokenp;
 127
 128   /*
 129    * Skip trailing blanks.
 130    */
 131   while (linep < lineend && *linep == ' ')
 132     linep++;
 133
 134   *next_token = linep;
 135
 136   return token_len;
 137 }
 138
 139
 140 #define INITIAL_FMTBUF_SIZE     128
 141
 142 #if GLIB_MAJOR_VERSION >= 2
 143 /*
 144  * XXX - "isprint()" can return "true" for non-ASCII characters, but
 145  * those don't work with GTK+ 1.3 or later, as they take UTF-8 strings
 146  * as input.  Until we fix up Wireshark to properly handle non-ASCII
 147  * characters in all output (both GUI displays and text printouts)
 148  * in those versions of GTK+, we work around the problem by escaping
 149  * all characters that aren't printable ASCII.
 150  *
 151  * We don't know what version of GTK+ we're using, as epan doesn't
 152  * use any GTK+ stuff; we use GLib as a proxy for that, with GLib 2.x
 153  * implying GTK+ 1.3 or later (we don't support GLib 1.3[.x]).
 154  */
 155 #undef isprint
 156 #define isprint(c) (c >= 0x20 && c < 0x7f)
 157 #endif
 158
 159 /*
 160  * Given a string, generate a string from it that shows non-printable
 161  * characters as C-style escapes, and return a pointer to it.
 162  */
 163 gchar *
 164 format_text(const guchar *string, int len)
 165 {
 166   static gchar *fmtbuf[3];
 167   static int fmtbuf_len[3];
 168   static int idx;
 169   int column;
 170   const guchar *stringend = string + len;
 171   guchar c;
 172   int i;
 173
 174   idx = (idx + 1) % 3;
 175
 176   /*
 177    * Allocate the buffer if it's not already allocated.
 178    */
 179   if (fmtbuf[idx] == NULL) {
 180     fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
 181     fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 182   }
 183   column = 0;
 184   while (string < stringend) {
 185     /*
 186      * Is there enough room for this character, if it expands to
 187      * a backslash plus 3 octal digits (which is the most it can
 188      * expand to), and also enough room for a terminating '\0'?
 189      */
 190     if (column+3+1 >= fmtbuf_len[idx]) {
 191       /*
 192        * Double the buffer's size if it's not big enough.
 193        * The size of the buffer starts at 128, so doubling its size
 194        * adds at least another 128 bytes, which is more than enough
 195        * for one more character plus a terminating '\0'.
 196        */
 197       fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 198       fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 199     }
 200     c = *string++;
 201
 202     if (isprint(c)) {
 203       fmtbuf[idx][column] = c;
 204       column++;
 205     } else {
 206       fmtbuf[idx][column] =  '\\';
 207       column++;
 208       switch (c) {
 209
 210       case '\a':
 211         fmtbuf[idx][column] = 'a';
 212         column++;
 213         break;
 214
 215       case '\b':
 216         fmtbuf[idx][column] = 'b'; /* BS */
 217         column++;
 218         break;
 219
 220       case '\f':
 221         fmtbuf[idx][column] = 'f'; /* FF */
 222         column++;
 223         break;
 224
 225       case '\n':
 226         fmtbuf[idx][column] = 'n'; /* NL */
 227         column++;
 228         break;
 229
 230       case '\r':
 231         fmtbuf[idx][column] = 'r'; /* CR */
 232         column++;
 233         break;
 234
 235       case '\t':
 236         fmtbuf[idx][column] = 't'; /* tab */
 237         column++;
 238         break;
 239
 240       case '\v':
 241         fmtbuf[idx][column] = 'v';
 242         column++;
 243         break;
 244
 245       default:
 246         i = (c>>6)&03;
 247         fmtbuf[idx][column] = i + '0';
 248         column++;
 249         i = (c>>3)&07;
 250         fmtbuf[idx][column] = i + '0';
 251         column++;
 252         i = (c>>0)&07;
 253         fmtbuf[idx][column] = i + '0';
 254         column++;
 255         break;
 256       }
 257     }
 258   }
 259   fmtbuf[idx][column] = '\0';
 260   return fmtbuf[idx];
 261 }
 262
 263 /*
 264  * Given a string, generate a string from it that shows non-printable
 265  * characters as C-style escapes except a whitespace character
 266  * (space, tab, carriage return, new line, vertical tab, or formfeed)
 267  * which will be replaved by a space, and return a pointer to it.
 268  */
 269 gchar *
 270 format_text_wsp(const guchar *string, int len)
 271 {
 272   static gchar *fmtbuf[3];
 273   static int fmtbuf_len[3];
 274   static int idx;
 275   int column;
 276   const guchar *stringend = string + len;
 277   guchar c;
 278   int i;
 279
 280   idx = (idx + 1) % 3;
 281
 282   /*
 283    * Allocate the buffer if it's not already allocated.
 284    */
 285   if (fmtbuf[idx] == NULL) {
 286     fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
 287     fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 288   }
 289   column = 0;
 290   while (string < stringend) {
 291     /*
 292      * Is there enough room for this character, if it expands to
 293      * a backslash plus 3 octal digits (which is the most it can
 294      * expand to), and also enough room for a terminating '\0'?
 295      */
 296     if (column+3+1 >= fmtbuf_len[idx]) {
 297       /*
 298        * Double the buffer's size if it's not big enough.
 299        * The size of the buffer starts at 128, so doubling its size
 300        * adds at least another 128 bytes, which is more than enough
 301        * for one more character plus a terminating '\0'.
 302        */
 303       fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 304       fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 305     }
 306     c = *string++;
 307
 308     if (isprint(c)) {
 309       fmtbuf[idx][column] = c;
 310       column++;
 311     } else if  (isspace(c)) {
 312       fmtbuf[idx][column] = ' ';
 313       column++;
 314         }else {
 315       fmtbuf[idx][column] =  '\\';
 316       column++;
 317       switch (c) {
 318
 319       case '\a':
 320         fmtbuf[idx][column] = 'a';
 321         column++;
 322         break;
 323
 324       case '\b':
 325         fmtbuf[idx][column] = 'b'; /* BS */
 326         column++;
 327         break;
 328
 329       case '\f':
 330         fmtbuf[idx][column] = 'f'; /* FF */
 331         column++;
 332         break;
 333
 334       case '\n':
 335         fmtbuf[idx][column] = 'n'; /* NL */
 336         column++;
 337         break;
 338
 339       case '\r':
 340         fmtbuf[idx][column] = 'r'; /* CR */
 341         column++;
 342         break;
 343
 344       case '\t':
 345         fmtbuf[idx][column] = 't'; /* tab */
 346         column++;
 347         break;
 348
 349       case '\v':
 350         fmtbuf[idx][column] = 'v';
 351         column++;
 352         break;
 353
 354       default:
 355         i = (c>>6)&03;
 356         fmtbuf[idx][column] = i + '0';
 357         column++;
 358         i = (c>>3)&07;
 359         fmtbuf[idx][column] = i + '0';
 360         column++;
 361         i = (c>>0)&07;
 362         fmtbuf[idx][column] = i + '0';
 363         column++;
 364         break;
 365       }
 366     }
 367   }
 368   fmtbuf[idx][column] = '\0';
 369   return fmtbuf[idx];
 370 }
 371
 372 /* Max string length for displaying byte string.  */
 373 #define MAX_BYTE_STR_LEN        48
 374
 375 /* Turn an array of bytes into a string showing the bytes in hex. */
 376 #define N_BYTES_TO_STR_STRINGS  6
 377 gchar *
 378 bytes_to_str(const guint8 *bd, int bd_len) {
 379   return bytes_to_str_punct(bd,bd_len,'\0');
 380 }
 381
 382 /* Turn an array of bytes into a string showing the bytes in hex with
 383  * punct as a bytes separator.
 384  */
 385 gchar *
 386 bytes_to_str_punct(const guint8 *bd, int bd_len, gchar punct) {
 387   gchar        *cur;
 388   gchar        *p;
 389   int           len;
 390   static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 391                                 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 392
 393   cur=ep_alloc(MAX_BYTE_STR_LEN+3+1);
 394   p = cur;
 395   len = MAX_BYTE_STR_LEN;
 396   while (bd_len > 0 && len > 0) {
 397     *p++ = hex[(*bd) >> 4];
 398     *p++ = hex[(*bd) & 0xF];
 399     len -= 2;
 400     bd++;
 401     bd_len--;
 402     if(punct && bd_len > 0){
 403       *p++ = punct;
 404       len--;
 405     }
 406   }
 407   if (bd_len != 0) {
 408     /* Note that we're not showing the full string.  */
 409     *p++ = '.';
 410     *p++ = '.';
 411     *p++ = '.';
 412   }
 413   *p = '\0';
 414   return cur;
 415 }
 416
 417 static gboolean
 418 is_byte_sep(guint8 c)
 419 {
 420         return (c == '-' || c == ':' || c == '.');
 421 }
 422
 423 /* Turn a string of hex digits with optional separators (defined by
 424  * is_byte_sep() into a byte array.
 425  */
 426 gboolean
 427 hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators) {
 428         guint8          val;
 429         const guchar    *p, *q, *punct;
 430         char            two_digits[3];
 431         char            one_digit[2];
 432
 433         g_byte_array_set_size(bytes, 0);
 434         if (! hex_str) {
 435                 return FALSE;
 436         }
 437         p = (const guchar *)hex_str;
 438         while (*p) {
 439                 q = p+1;
 440                 if (*q && isxdigit(*p) && isxdigit(*q)) {
 441                         two_digits[0] = *p;
 442                         two_digits[1] = *q;
 443                         two_digits[2] = '\0';
 444
 445                         /*
 446                          * Two or more hex digits in a row.
 447                          * "strtoul()" will succeed, as it'll see at
 448                          * least one hex digit.
 449                          */
 450                         val = (guint8) strtoul(two_digits, NULL, 16);
 451                         g_byte_array_append(bytes, &val, 1);
 452                         punct = q + 1;
 453                         if (*punct) {
 454                                 /*
 455                                  * Make sure the character after
 456                                  * the second hex digit is a byte
 457                                  * separator, i.e. that we don't have
 458                                  * more than two hex digits, or a
 459                                  * bogus character.
 460                                  */
 461                                 if (is_byte_sep(*punct)) {
 462                                         p = punct + 1;
 463                                         continue;
 464                                 }
 465                                 else if (force_separators) {
 466                                         return FALSE;
 467                                         break;
 468                                 }
 469                         }
 470                         p = punct;
 471                         continue;
 472                 }
 473                 else if (*q && isxdigit(*p) && is_byte_sep(*q)) {
 474                         one_digit[0] = *p;
 475                         one_digit[1] = '\0';
 476
 477                         /*
 478                          * Only one hex digit.
 479                          * "strtoul()" will succeed, as it'll see that
 480                          * hex digit.
 481                          */
 482                         val = (guint8) strtoul(one_digit, NULL, 16);
 483                         g_byte_array_append(bytes, &val, 1);
 484                         p = q + 1;
 485                         continue;
 486                 }
 487                 else if (!*q && isxdigit(*p)) {
 488                         one_digit[0] = *p;
 489                         one_digit[1] = '\0';
 490
 491                         /*
 492                          * Only one hex digit.
 493                          * "strtoul()" will succeed, as it'll see that
 494                          * hex digit.
 495                          */
 496                         val = (guint8) strtoul(one_digit, NULL, 16);
 497                         g_byte_array_append(bytes, &val, 1);
 498                         p = q;
 499                         continue;
 500                 }
 501                 else {
 502                         return FALSE;
 503                 }
 504         }
 505         return TRUE;
 506 }
 507
 508 #define SUBID_BUF_LEN 5
 509 gboolean
 510 oid_str_to_bytes(const char *oid_str, GByteArray *bytes) {
 511   guint32 subid0, subid, sicnt, i;
 512   const char *p, *dot;
 513   guint8 buf[SUBID_BUF_LEN];
 514
 515   g_byte_array_set_size(bytes, 0);
 516
 517   /* check syntax */
 518   p = oid_str;
 519   dot = NULL;
 520   while (*p) {
 521     if (!isdigit(*p) && (*p != '.')) return FALSE;
 522     if (*p == '.') {
 523       if (p == oid_str) return FALSE;
 524       if (!*(p+1)) return FALSE;
 525       if ((p-1) == dot) return FALSE;
 526       dot = p;
 527     }
 528     p++;
 529   }
 530   if (!dot) return FALSE;
 531
 532   p = oid_str;
 533   sicnt = 0;
 534   subid0 = 0;   /* squelch GCC complaints */
 535   while (*p) {
 536     subid = 0;
 537     while (isdigit(*p)) {
 538       subid *= 10;
 539       subid += *p - '0';
 540       p++;
 541     }
 542     if (sicnt == 0) {
 543       subid0 = subid;
 544       if (subid0 > 2) return FALSE;
 545     } else if (sicnt == 1) {
 546       if ((subid0 < 2) && (subid > 39)) return FALSE;
 547       subid += 40 * subid0;
 548     }
 549     if (sicnt) {
 550       i = SUBID_BUF_LEN;
 551       do {
 552         i--;
 553         buf[i] = 0x80 | (subid % 0x80);
 554         subid >>= 7;
 555       } while (subid && i);
 556       buf[SUBID_BUF_LEN-1] &= 0x7F;
 557       g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
 558     }
 559     sicnt++;
 560     if (*p) p++;
 561   }
 562
 563   return TRUE;
 564 }
 565
 566
 567 /* Return a XML escaped representation of the unescaped string.
 568  * The returned string must be freed when no longer in use. */
 569 gchar *
 570 xml_escape(const gchar *unescaped)
 571 {
 572         GString *buffer = g_string_sized_new(128);
 573         const gchar *p;
 574         gchar c;
 575 #if GLIB_MAJOR_VERSION < 2
 576         gchar *ret;
 577 #endif
 578
 579         p = unescaped;
 580         while ( (c = *p++) ) {
 581                 switch (c) {
 582                         case '<':
 583                                 g_string_append(buffer, "&lt;");
 584                                 break;
 585                         case '>':
 586                                 g_string_append(buffer, "&gt;");
 587                                 break;
 588                         case '&':
 589                                 g_string_append(buffer, "&amp;");
 590                                 break;
 591                         case '\'':
 592                                 g_string_append(buffer, "&apos;");
 593                                 break;
 594                         case '"':
 595                                 g_string_append(buffer, "&quot;");
 596                                 break;
 597                         default:
 598                                 g_string_append_c(buffer, c);
 599                                 break;
 600                 }
 601         }
 602 #if GLIB_MAJOR_VERSION >= 2
 603         /* Return the string value contained within the GString
 604          * after getting rid of the GString structure.
 605          * This is the way to do this, see the GLib reference. */
 606         return g_string_free(buffer, FALSE);
 607 #else
 608         /* But it's not the way to do it in GLib 1.2[.x], as
 609          * 1.2[.x]'s "g_string_free()" doesn't return anything.
 610          * This is the way to do this in GLib 1.2[.x]. */
 611         ret = buffer->str;
 612         g_string_free(buffer, FALSE);
 613         return ret;
 614 #endif
 615 }
 616
 617
 618 /* Return the first occurrence of needle in haystack.
 619  * If not found, return NULL.
 620  * If either haystack or needle has 0 length, return NULL.
 621  * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
 622 const guint8 *
 623 epan_memmem(const guint8 *haystack, guint haystack_len,
 624                 const guint8 *needle, guint needle_len)
 625 {
 626         const guint8 *begin;
 627         const guint8 *const last_possible
 628                 = haystack + haystack_len - needle_len;
 629
 630         if (needle_len == 0) {
 631                 return NULL;
 632         }
 633
 634         if (needle_len > haystack_len) {
 635                 return NULL;
 636         }
 637
 638         for (begin = haystack ; begin <= last_possible; ++begin) {
 639                 if (begin[0] == needle[0] &&
 640                         !memcmp(&begin[1], needle + 1,
 641                                 needle_len - 1)) {
 642                         return begin;
 643                 }
 644         }
 645
 646         return NULL;
 647 }
 648
 649 /*
 650  * Scan the search string to make sure it's valid hex.  Return the
 651  * number of bytes in nbytes.
 652  */
 653 guint8 *
 654 convert_string_to_hex(const char *string, size_t *nbytes)
 655 {
 656   size_t n_bytes;
 657   const char *p;
 658   guchar c;
 659   guint8 *bytes, *q, byte_val;
 660
 661   n_bytes = 0;
 662   p = &string[0];
 663   for (;;) {
 664     c = *p++;
 665     if (c == '\0')
 666       break;
 667     if (isspace(c))
 668       continue; /* allow white space */
 669     if (c==':' || c=='.' || c=='-')
 670       continue; /* skip any ':', '.', or '-' between bytes */
 671     if (!isxdigit(c)) {
 672       /* Not a valid hex digit - fail */
 673       return NULL;
 674     }
 675
 676     /*
 677      * We can only match bytes, not nibbles; we must have a valid
 678      * hex digit immediately after that hex digit.
 679      */
 680     c = *p++;
 681     if (!isxdigit(c))
 682       return NULL;
 683
 684     /* 2 hex digits = 1 byte */
 685     n_bytes++;
 686   }
 687
 688   /*
 689    * Were we given any hex digits?
 690    */
 691   if (n_bytes == 0) {
 692       /* No. */
 693       return NULL;
 694   }
 695
 696   /*
 697    * OK, it's valid, and it generates "n_bytes" bytes; generate the
 698    * raw byte array.
 699    */
 700   bytes = g_malloc(n_bytes);
 701   p = &string[0];
 702   q = &bytes[0];
 703   for (;;) {
 704     c = *p++;
 705     if (c == '\0')
 706       break;
 707     if (isspace(c))
 708       continue; /* allow white space */
 709     if (c==':' || c=='.' || c=='-')
 710       continue; /* skip any ':', '.', or '-' between bytes */
 711     /* From the loop above, we know this is a hex digit */
 712     if (isdigit(c))
 713       byte_val = c - '0';
 714     else if (c >= 'a')
 715       byte_val = (c - 'a') + 10;
 716     else
 717       byte_val = (c - 'A') + 10;
 718     byte_val <<= 4;
 719
 720     /* We also know this is a hex digit */
 721     c = *p++;
 722     if (isdigit(c))
 723       byte_val |= c - '0';
 724     else if (c >= 'a')
 725       byte_val |= (c - 'a') + 10;
 726     else if (c >= 'A')
 727       byte_val |= (c - 'A') + 10;
 728
 729     *q++ = byte_val;
 730   }
 731   *nbytes = n_bytes;
 732   return bytes;
 733 }
 734
 735 /*
 736  * Copy if if it's a case-sensitive search; uppercase it if it's
 737  * a case-insensitive search.
 738  */
 739 char *
 740 convert_string_case(const char *string, gboolean case_insensitive)
 741 {
 742   char *out_string;
 743   const char *p;
 744   char c;
 745   char *q;
 746
 747   if (case_insensitive) {
 748     out_string = g_malloc(strlen(string) + 1);
 749     for (p = &string[0], q = &out_string[0]; (c = *p) != '\0'; p++, q++)
 750       *q = toupper((unsigned char)*p);
 751     *q = '\0';
 752   } else
 753     out_string = g_strdup(string);
 754   return out_string;
 755 }
 756
 757 /* g_strlcat() does not exist in GLib 1.2[.x] */
 758 #if GLIB_MAJOR_VERSION < 2
 759 gsize
 760 g_strlcat(gchar *dst, gchar *src, gsize size)
 761 {
 762         int strl, strs;
 763         strl=strlen(dst);
 764         strs=strlen(src);
 765         if(strl<size)
 766                 g_snprintf(dst+strl, size-strl, "%s", src);
 767         dst[size-1]=0;
 768         return strl+strs;
 769 }
 770 #endif