epan/strutil.c

   1 /* strutil.c
   2  * String utility routines
   3  *
   4  * $Id$
   5  *
   6  * Wireshark - Network traffic analyzer
   7  * By Gerald Combs <gerald@wireshark.org>
   8  * Copyright 1998 Gerald Combs
   9  *
  10  * This program is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU General Public License
  12  * as published by the Free Software Foundation; either version 2
  13  * of the License, or (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  23  */
  24
  25 #ifdef HAVE_CONFIG_H
  26 # include "config.h"
  27 #endif
  28
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <ctype.h>
  32 #include <glib.h>
  33 #include "strutil.h"
  34 #include "emem.h"
  35
  36 #ifdef _WIN32
  37 #include <windows.h>
  38 #include <tchar.h>
  39 #include <wchar.h>
  40 #endif
  41
  42 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
  43                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
  44
  45 /*
  46  * Given a pointer into a data buffer, and to the end of the buffer,
  47  * find the end of the (putative) line at that position in the data
  48  * buffer.
  49  * Return a pointer to the EOL character(s) in "*eol".
  50  */
  51 const guchar *
  52 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
  53 {
  54   const guchar *lineend;
  55
  56   lineend = memchr(data, '\n', dataend - data);
  57   if (lineend == NULL) {
  58     /*
  59      * No LF - line is probably continued in next TCP segment.
  60      */
  61     lineend = dataend;
  62     *eol = dataend;
  63   } else {
  64     /*
  65      * Is the LF at the beginning of the line?
  66      */
  67     if (lineend > data) {
  68       /*
  69        * No - is it preceded by a carriage return?
  70        * (Perhaps it's supposed to be, but that's not guaranteed....)
  71        */
  72       if (*(lineend - 1) == '\r') {
  73         /*
  74          * Yes.  The EOL starts with the CR.
  75          */
  76         *eol = lineend - 1;
  77       } else {
  78         /*
  79          * No.  The EOL starts with the LF.
  80          */
  81         *eol = lineend;
  82
  83         /*
  84          * I seem to remember that we once saw lines ending with LF-CR
  85          * in an HTTP request or response, so check if it's *followed*
  86          * by a carriage return.
  87          */
  88         if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
  89           /*
  90            * It's <non-LF><LF><CR>; say it ends with the CR.
  91            */
  92           lineend++;
  93         }
  94       }
  95     } else {
  96       /*
  97        * Yes - the EOL starts with the LF.
  98        */
  99       *eol = lineend;
 100     }
 101
 102     /*
 103      * Point to the character after the last character.
 104      */
 105     lineend++;
 106   }
 107   return lineend;
 108 }
 109
 110 /*
 111  * Get the length of the next token in a line, and the beginning of the
 112  * next token after that (if any).
 113  * Return 0 if there is no next token.
 114  */
 115 int
 116 get_token_len(const guchar *linep, const guchar *lineend,
 117               const guchar **next_token)
 118 {
 119   const guchar *tokenp;
 120   int token_len;
 121
 122   tokenp = linep;
 123
 124   /*
 125    * Search for a blank, a CR or an LF, or the end of the buffer.
 126    */
 127   while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
 128       linep++;
 129   token_len = (int) (linep - tokenp);
 130
 131   /*
 132    * Skip trailing blanks.
 133    */
 134   while (linep < lineend && *linep == ' ')
 135     linep++;
 136
 137   *next_token = linep;
 138
 139   return token_len;
 140 }
 141
 142
 143 #define INITIAL_FMTBUF_SIZE     128
 144
 145 /*
 146  * XXX - "isprint()" can return "true" for non-ASCII characters, but
 147  * those don't work with GTK+ 1.3 or later, as they take UTF-8 strings
 148  * as input.  Until we fix up Wireshark to properly handle non-ASCII
 149  * characters in all output (both GUI displays and text printouts)
 150  * in those versions of GTK+, we work around the problem by escaping
 151  * all characters that aren't printable ASCII.
 152  *
 153  * We don't know what version of GTK+ we're using, as epan doesn't
 154  * use any GTK+ stuff; we use GLib as a proxy for that, with GLib 2.x
 155  * implying GTK+ 1.3 or later (we don't support GLib 1.3[.x]).
 156  */
 157 #undef isprint
 158 #define isprint(c) (c >= 0x20 && c < 0x7f)
 159
 160 /*
 161  * Given a string, generate a string from it that shows non-printable
 162  * characters as C-style escapes, and return a pointer to it.
 163  */
 164 gchar *
 165 format_text(const guchar *string, size_t len)
 166 {
 167   static gchar *fmtbuf[3];
 168   static int fmtbuf_len[3];
 169   static int idx;
 170   int column;
 171   const guchar *stringend = string + len;
 172   guchar c;
 173   int i;
 174
 175   idx = (idx + 1) % 3;
 176
 177   /*
 178    * Allocate the buffer if it's not already allocated.
 179    */
 180   if (fmtbuf[idx] == NULL) {
 181     fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
 182     fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 183   }
 184   column = 0;
 185   while (string < stringend) {
 186     /*
 187      * Is there enough room for this character, if it expands to
 188      * a backslash plus 3 octal digits (which is the most it can
 189      * expand to), and also enough room for a terminating '\0'?
 190      */
 191     if (column+3+1 >= fmtbuf_len[idx]) {
 192       /*
 193        * Double the buffer's size if it's not big enough.
 194        * The size of the buffer starts at 128, so doubling its size
 195        * adds at least another 128 bytes, which is more than enough
 196        * for one more character plus a terminating '\0'.
 197        */
 198       fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 199       fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 200     }
 201     c = *string++;
 202
 203     if (isprint(c)) {
 204       fmtbuf[idx][column] = c;
 205       column++;
 206     } else {
 207       fmtbuf[idx][column] =  '\\';
 208       column++;
 209       switch (c) {
 210
 211       case '\a':
 212         fmtbuf[idx][column] = 'a';
 213         column++;
 214         break;
 215
 216       case '\b':
 217         fmtbuf[idx][column] = 'b'; /* BS */
 218         column++;
 219         break;
 220
 221       case '\f':
 222         fmtbuf[idx][column] = 'f'; /* FF */
 223         column++;
 224         break;
 225
 226       case '\n':
 227         fmtbuf[idx][column] = 'n'; /* NL */
 228         column++;
 229         break;
 230
 231       case '\r':
 232         fmtbuf[idx][column] = 'r'; /* CR */
 233         column++;
 234         break;
 235
 236       case '\t':
 237         fmtbuf[idx][column] = 't'; /* tab */
 238         column++;
 239         break;
 240
 241       case '\v':
 242         fmtbuf[idx][column] = 'v';
 243         column++;
 244         break;
 245
 246       default:
 247         i = (c>>6)&03;
 248         fmtbuf[idx][column] = i + '0';
 249         column++;
 250         i = (c>>3)&07;
 251         fmtbuf[idx][column] = i + '0';
 252         column++;
 253         i = (c>>0)&07;
 254         fmtbuf[idx][column] = i + '0';
 255         column++;
 256         break;
 257       }
 258     }
 259   }
 260   fmtbuf[idx][column] = '\0';
 261   return fmtbuf[idx];
 262 }
 263
 264 /*
 265  * Given a string, generate a string from it that shows non-printable
 266  * characters as C-style escapes except a whitespace character
 267  * (space, tab, carriage return, new line, vertical tab, or formfeed)
 268  * which will be replaced by a space, and return a pointer to it.
 269  */
 270 gchar *
 271 format_text_wsp(const guchar *string, size_t len)
 272 {
 273   static gchar *fmtbuf[3];
 274   static int fmtbuf_len[3];
 275   static int idx;
 276   int column;
 277   const guchar *stringend = string + len;
 278   guchar c;
 279   int i;
 280
 281   idx = (idx + 1) % 3;
 282
 283   /*
 284    * Allocate the buffer if it's not already allocated.
 285    */
 286   if (fmtbuf[idx] == NULL) {
 287     fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
 288     fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 289   }
 290   column = 0;
 291   while (string < stringend) {
 292     /*
 293      * Is there enough room for this character, if it expands to
 294      * a backslash plus 3 octal digits (which is the most it can
 295      * expand to), and also enough room for a terminating '\0'?
 296      */
 297     if (column+3+1 >= fmtbuf_len[idx]) {
 298       /*
 299        * Double the buffer's size if it's not big enough.
 300        * The size of the buffer starts at 128, so doubling its size
 301        * adds at least another 128 bytes, which is more than enough
 302        * for one more character plus a terminating '\0'.
 303        */
 304       fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 305       fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 306     }
 307     c = *string++;
 308
 309     if (isprint(c)) {
 310       fmtbuf[idx][column] = c;
 311       column++;
 312     } else if  (isspace(c)) {
 313       fmtbuf[idx][column] = ' ';
 314       column++;
 315     } else {
 316       fmtbuf[idx][column] =  '\\';
 317       column++;
 318       switch (c) {
 319
 320       case '\a':
 321         fmtbuf[idx][column] = 'a';
 322         column++;
 323         break;
 324
 325       case '\b':
 326         fmtbuf[idx][column] = 'b'; /* BS */
 327         column++;
 328         break;
 329
 330       case '\f':
 331         fmtbuf[idx][column] = 'f'; /* FF */
 332         column++;
 333         break;
 334
 335       case '\n':
 336         fmtbuf[idx][column] = 'n'; /* NL */
 337         column++;
 338         break;
 339
 340       case '\r':
 341         fmtbuf[idx][column] = 'r'; /* CR */
 342         column++;
 343         break;
 344
 345       case '\t':
 346         fmtbuf[idx][column] = 't'; /* tab */
 347         column++;
 348         break;
 349
 350       case '\v':
 351         fmtbuf[idx][column] = 'v';
 352         column++;
 353         break;
 354
 355       default:
 356         i = (c>>6)&03;
 357         fmtbuf[idx][column] = i + '0';
 358         column++;
 359         i = (c>>3)&07;
 360         fmtbuf[idx][column] = i + '0';
 361         column++;
 362         i = (c>>0)&07;
 363         fmtbuf[idx][column] = i + '0';
 364         column++;
 365         break;
 366       }
 367     }
 368   }
 369   fmtbuf[idx][column] = '\0';
 370   return fmtbuf[idx];
 371 }
 372
 373 static gboolean
 374 is_byte_sep(guint8 c)
 375 {
 376         return (c == '-' || c == ':' || c == '.');
 377 }
 378
 379 /* Turn a string of hex digits with optional separators (defined by
 380  * is_byte_sep() into a byte array.
 381  */
 382 gboolean
 383 hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators) {
 384         guint8          val;
 385         const guchar    *p, *q, *r, *s, *punct;
 386         char            four_digits_first_half[3];
 387         char            four_digits_second_half[3];
 388         char            two_digits[3];
 389         char            one_digit[2];
 390
 391         if (! hex_str || ! bytes) {
 392                 return FALSE;
 393         }
 394         g_byte_array_set_size(bytes, 0);
 395         p = (const guchar *)hex_str;
 396         while (*p) {
 397                 q = p+1;
 398                 r = p+2;
 399                 s = p+3;
 400
 401                 if (*q && *r && *s
 402                     && isxdigit(*p) && isxdigit(*q) &&
 403                     isxdigit(*r) && isxdigit(*s)) {
 404                         four_digits_first_half[0] = *p;
 405                         four_digits_first_half[1] = *q;
 406                         four_digits_first_half[2] = '\0';
 407                         four_digits_second_half[0] = *r;
 408                         four_digits_second_half[1] = *s;
 409                         four_digits_second_half[2] = '\0';
 410
 411                         /*
 412                          * Four or more hex digits in a row.
 413                          */
 414                         val = (guint8) strtoul(four_digits_first_half, NULL, 16);
 415                         g_byte_array_append(bytes, &val, 1);
 416                         val = (guint8) strtoul(four_digits_second_half, NULL, 16);
 417                         g_byte_array_append(bytes, &val, 1);
 418
 419                         punct = s + 1;
 420                         if (*punct) {
 421                                 /*
 422                                  * Make sure the character after
 423                                  * the forth hex digit is a byte
 424                                  * separator, i.e. that we don't have
 425                                  * more than four hex digits, or a
 426                                  * bogus character.
 427                                  */
 428                                 if (is_byte_sep(*punct)) {
 429                                         p = punct + 1;
 430                                         continue;
 431                                 }
 432                                 else if (force_separators) {
 433                                         return FALSE;
 434                                         break;
 435                                 }
 436                         }
 437                         p = punct;
 438                         continue;
 439                 }
 440
 441                 else if (*q && isxdigit(*p) && isxdigit(*q)) {
 442                         two_digits[0] = *p;
 443                         two_digits[1] = *q;
 444                         two_digits[2] = '\0';
 445
 446                         /*
 447                          * Two hex digits in a row.
 448                          */
 449                         val = (guint8) strtoul(two_digits, NULL, 16);
 450                         g_byte_array_append(bytes, &val, 1);
 451                         punct = q + 1;
 452                         if (*punct) {
 453                                 /*
 454                                  * Make sure the character after
 455                                  * the second hex digit is a byte
 456                                  * separator, i.e. that we don't have
 457                                  * more than two hex digits, or a
 458                                  * bogus character.
 459                                  */
 460                                 if (is_byte_sep(*punct)) {
 461                                         p = punct + 1;
 462                                         continue;
 463                                 }
 464                                 else if (force_separators) {
 465                                         return FALSE;
 466                                         break;
 467                                 }
 468                         }
 469                         p = punct;
 470                         continue;
 471                 }
 472                 else if (*q && isxdigit(*p) && is_byte_sep(*q)) {
 473                         one_digit[0] = *p;
 474                         one_digit[1] = '\0';
 475
 476                         /*
 477                          * Only one hex digit (not at the end of the string)
 478                          */
 479                         val = (guint8) strtoul(one_digit, NULL, 16);
 480                         g_byte_array_append(bytes, &val, 1);
 481                         p = q + 1;
 482                         continue;
 483                 }
 484                 else if (!*q && isxdigit(*p)) {
 485                         one_digit[0] = *p;
 486                         one_digit[1] = '\0';
 487
 488                         /*
 489                          * Only one hex digit (at the end of the string)
 490                          */
 491                         val = (guint8) strtoul(one_digit, NULL, 16);
 492                         g_byte_array_append(bytes, &val, 1);
 493                         p = q;
 494                         continue;
 495                 }
 496                 else {
 497                         return FALSE;
 498                 }
 499         }
 500         return TRUE;
 501 }
 502
 503 /*
 504  * Turn an RFC 3986 percent-encoded string into a byte array.
 505  * XXX - We don't check for reserved characters.
 506  */
 507 #define HEX_DIGIT_BUF_LEN 3
 508 gboolean
 509 uri_str_to_bytes(const char *uri_str, GByteArray *bytes) {
 510         guint8          val;
 511         const guchar    *p;
 512         guchar          hex_digit[HEX_DIGIT_BUF_LEN];
 513
 514         g_byte_array_set_size(bytes, 0);
 515         if (! uri_str) {
 516                 return FALSE;
 517         }
 518
 519         p = (const guchar *)uri_str;
 520
 521         while (*p) {
 522                 if (! isascii(*p) || ! isprint(*p))
 523                         return FALSE;
 524                 if (*p == '%') {
 525                         p++;
 526                         if (*p == '\0') return FALSE;
 527                         hex_digit[0] = *p;
 528                         p++;
 529                         if (*p == '\0') return FALSE;
 530                         hex_digit[1] = *p;
 531                         hex_digit[2] = '\0';
 532                         if (! isxdigit(hex_digit[0]) || ! isxdigit(hex_digit[1]))
 533                                 return FALSE;
 534                         val = (guint8) strtoul((char *)hex_digit, NULL, 16);
 535                         g_byte_array_append(bytes, &val, 1);
 536                 } else {
 537                         g_byte_array_append(bytes, (guint8 *) p, 1);
 538                 }
 539                 p++;
 540
 541         }
 542         return TRUE;
 543 }
 544
 545 /*
 546  * Given a GByteArray, generate a string from it that shows non-printable
 547  * characters as percent-style escapes, and return a pointer to it.
 548  */
 549 gchar *
 550 format_uri(const GByteArray *bytes, const gchar *reserved_chars)
 551 {
 552   static gchar *fmtbuf[3];
 553   static guint fmtbuf_len[3];
 554   static guint idx;
 555   const gchar *reserved_def = ":/?#[]@!$&'()*+,;= ";
 556   const gchar *reserved = reserved_def;
 557   guint8 c;
 558   guint column, i;
 559   gboolean is_reserved = FALSE;
 560
 561   if (! bytes)
 562     return "";
 563
 564   idx = (idx + 1) % 3;
 565   if (reserved_chars)
 566     reserved = reserved_chars;
 567
 568   /*
 569    * Allocate the buffer if it's not already allocated.
 570    */
 571   if (fmtbuf[idx] == NULL) {
 572     fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
 573     fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 574   }
 575   for (column = 0; column < bytes->len; column++) {
 576     /*
 577      * Is there enough room for this character, if it expands to
 578      * a percent plus 2 hex digits (which is the most it can
 579      * expand to), and also enough room for a terminating '\0'?
 580      */
 581     if (column+2+1 >= fmtbuf_len[idx]) {
 582       /*
 583        * Double the buffer's size if it's not big enough.
 584        * The size of the buffer starts at 128, so doubling its size
 585        * adds at least another 128 bytes, which is more than enough
 586        * for one more character plus a terminating '\0'.
 587        */
 588       fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 589       fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 590     }
 591     c = bytes->data[column];
 592
 593     if (!isascii(c) || !isprint(c) || c == '%') {
 594       is_reserved = TRUE;
 595     }
 596
 597     for (i = 0; i < strlen(reserved); i++) {
 598       if (c == reserved[i])
 599         is_reserved = TRUE;
 600     }
 601
 602     if (!is_reserved) {
 603       fmtbuf[idx][column] = c;
 604     } else {
 605       fmtbuf[idx][column] = '%';
 606       column++;
 607       fmtbuf[idx][column] = hex[c >> 4];
 608       column++;
 609       fmtbuf[idx][column] = hex[c & 0xF];
 610     }
 611   }
 612   fmtbuf[idx][column] = '\0';
 613   return fmtbuf[idx];
 614 }
 615
 616 /**
 617  * Create a copy of a GByteArray
 618  *
 619  * @param ba The byte array to be copied.
 620  * @return If ba exists, a freshly allocated copy.  NULL otherwise.
 621  *
 622  */
 623 GByteArray *
 624 byte_array_dup(GByteArray *ba) {
 625     GByteArray *new_ba;
 626
 627     if (!ba)
 628         return NULL;
 629
 630     new_ba = g_byte_array_new();
 631     g_byte_array_append(new_ba, ba->data, ba->len);
 632     return new_ba;
 633 }
 634
 635 #define SUBID_BUF_LEN 5
 636 gboolean
 637 oid_str_to_bytes(const char *oid_str, GByteArray *bytes) {
 638   guint32 subid0, subid, sicnt, i;
 639   const char *p, *dot;
 640   guint8 buf[SUBID_BUF_LEN];
 641
 642   g_byte_array_set_size(bytes, 0);
 643
 644   /* check syntax */
 645   p = oid_str;
 646   dot = NULL;
 647   while (*p) {
 648     if (!isdigit((guchar)*p) && (*p != '.')) return FALSE;
 649     if (*p == '.') {
 650       if (p == oid_str) return FALSE;
 651       if (!*(p+1)) return FALSE;
 652       if ((p-1) == dot) return FALSE;
 653       dot = p;
 654     }
 655     p++;
 656   }
 657   if (!dot) return FALSE;
 658
 659   p = oid_str;
 660   sicnt = 0;
 661   subid0 = 0;   /* squelch GCC complaints */
 662   while (*p) {
 663     subid = 0;
 664     while (isdigit((guchar)*p)) {
 665       subid *= 10;
 666       subid += *p - '0';
 667       p++;
 668     }
 669     if (sicnt == 0) {
 670       subid0 = subid;
 671       if (subid0 > 2) return FALSE;
 672     } else if (sicnt == 1) {
 673       if ((subid0 < 2) && (subid > 39)) return FALSE;
 674       subid += 40 * subid0;
 675     }
 676     if (sicnt) {
 677       i = SUBID_BUF_LEN;
 678       do {
 679         i--;
 680         buf[i] = 0x80 | (subid % 0x80);
 681         subid >>= 7;
 682       } while (subid && i);
 683       buf[SUBID_BUF_LEN-1] &= 0x7F;
 684       g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
 685     }
 686     sicnt++;
 687     if (*p) p++;
 688   }
 689
 690   return TRUE;
 691 }
 692
 693 /**
 694  * Compare the contents of two GByteArrays
 695  *
 696  * @param ba1 A byte array
 697  * @param ba2 A byte array
 698  * @return If both arrays are non-NULL and their lengths are equal and
 699  *         their contents are equal, returns TRUE.  Otherwise, returns
 700  *         FALSE.
 701  *
 702  * XXX - Should this be in strutil.c?
 703  */
 704 gboolean
 705 byte_array_equal(GByteArray *ba1, GByteArray *ba2) {
 706     if (!ba1 || !ba2)
 707         return FALSE;
 708
 709     if (ba1->len != ba2->len)
 710         return FALSE;
 711
 712     if (memcmp(ba1->data, ba2->data, ba1->len) != 0)
 713         return FALSE;
 714
 715     return TRUE;
 716 }
 717
 718
 719 /* Return a XML escaped representation of the unescaped string.
 720  * The returned string must be freed when no longer in use. */
 721 gchar *
 722 xml_escape(const gchar *unescaped)
 723 {
 724         GString *buffer = g_string_sized_new(128);
 725         const gchar *p;
 726         gchar c;
 727
 728         p = unescaped;
 729         while ( (c = *p++) ) {
 730                 switch (c) {
 731                         case '<':
 732                                 g_string_append(buffer, "&lt;");
 733                                 break;
 734                         case '>':
 735                                 g_string_append(buffer, "&gt;");
 736                                 break;
 737                         case '&':
 738                                 g_string_append(buffer, "&amp;");
 739                                 break;
 740                         case '\'':
 741                                 g_string_append(buffer, "&apos;");
 742                                 break;
 743                         case '"':
 744                                 g_string_append(buffer, "&quot;");
 745                                 break;
 746                         default:
 747                                 g_string_append_c(buffer, c);
 748                                 break;
 749                 }
 750         }
 751         /* Return the string value contained within the GString
 752          * after getting rid of the GString structure.
 753          * This is the way to do this, see the GLib reference. */
 754         return g_string_free(buffer, FALSE);
 755 }
 756
 757
 758 /* Return the first occurrence of needle in haystack.
 759  * If not found, return NULL.
 760  * If either haystack or needle has 0 length, return NULL.
 761  * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
 762 const guint8 *
 763 epan_memmem(const guint8 *haystack, guint haystack_len,
 764                 const guint8 *needle, guint needle_len)
 765 {
 766         const guint8 *begin;
 767         const guint8 *const last_possible
 768                 = haystack + haystack_len - needle_len;
 769
 770         if (needle_len == 0) {
 771                 return NULL;
 772         }
 773
 774         if (needle_len > haystack_len) {
 775                 return NULL;
 776         }
 777
 778         for (begin = haystack ; begin <= last_possible; ++begin) {
 779                 if (begin[0] == needle[0] &&
 780                         !memcmp(&begin[1], needle + 1,
 781                                 needle_len - 1)) {
 782                         return begin;
 783                 }
 784         }
 785
 786         return NULL;
 787 }
 788
 789 /*
 790  * Scan the search string to make sure it's valid hex.  Return the
 791  * number of bytes in nbytes.
 792  */
 793 guint8 *
 794 convert_string_to_hex(const char *string, size_t *nbytes)
 795 {
 796   size_t n_bytes;
 797   const char *p;
 798   guchar c;
 799   guint8 *bytes, *q, byte_val;
 800
 801   n_bytes = 0;
 802   p = &string[0];
 803   for (;;) {
 804     c = *p++;
 805     if (c == '\0')
 806       break;
 807     if (isspace(c))
 808       continue; /* allow white space */
 809     if (c==':' || c=='.' || c=='-')
 810       continue; /* skip any ':', '.', or '-' between bytes */
 811     if (!isxdigit(c)) {
 812       /* Not a valid hex digit - fail */
 813       return NULL;
 814     }
 815
 816     /*
 817      * We can only match bytes, not nibbles; we must have a valid
 818      * hex digit immediately after that hex digit.
 819      */
 820     c = *p++;
 821     if (!isxdigit(c))
 822       return NULL;
 823
 824     /* 2 hex digits = 1 byte */
 825     n_bytes++;
 826   }
 827
 828   /*
 829    * Were we given any hex digits?
 830    */
 831   if (n_bytes == 0) {
 832       /* No. */
 833       return NULL;
 834   }
 835
 836   /*
 837    * OK, it's valid, and it generates "n_bytes" bytes; generate the
 838    * raw byte array.
 839    */
 840   bytes = g_malloc(n_bytes);
 841   p = &string[0];
 842   q = &bytes[0];
 843   for (;;) {
 844     c = *p++;
 845     if (c == '\0')
 846       break;
 847     if (isspace(c))
 848       continue; /* allow white space */
 849     if (c==':' || c=='.' || c=='-')
 850       continue; /* skip any ':', '.', or '-' between bytes */
 851     /* From the loop above, we know this is a hex digit */
 852     if (isdigit(c))
 853       byte_val = c - '0';
 854     else if (c >= 'a')
 855       byte_val = (c - 'a') + 10;
 856     else
 857       byte_val = (c - 'A') + 10;
 858     byte_val <<= 4;
 859
 860     /* We also know this is a hex digit */
 861     c = *p++;
 862     if (isdigit(c))
 863       byte_val |= c - '0';
 864     else if (c >= 'a')
 865       byte_val |= (c - 'a') + 10;
 866     else if (c >= 'A')
 867       byte_val |= (c - 'A') + 10;
 868
 869     *q++ = byte_val;
 870   }
 871   *nbytes = n_bytes;
 872   return bytes;
 873 }
 874
 875 /*
 876  * Copy if if it's a case-sensitive search; uppercase it if it's
 877  * a case-insensitive search.
 878  */
 879 char *
 880 convert_string_case(const char *string, gboolean case_insensitive)
 881 {
 882   char *out_string;
 883   const char *p;
 884   char c;
 885   char *q;
 886
 887   if (case_insensitive) {
 888     out_string = g_malloc(strlen(string) + 1);
 889     for (p = &string[0], q = &out_string[0]; (c = *p) != '\0'; p++, q++)
 890       *q = toupper((unsigned char)*p);
 891     *q = '\0';
 892   } else
 893     out_string = g_strdup(string);
 894   return out_string;
 895 }
 896
 897 char *
 898 epan_strcasestr(const char *haystack, const char *needle)
 899 {
 900         gsize hlen = strlen(haystack);
 901         gsize nlen = strlen(needle);
 902
 903         while (hlen-- >= nlen) {
 904                 if (!g_ascii_strncasecmp(haystack, needle, nlen))
 905                         return (char*) haystack;
 906                 haystack++;
 907         }
 908         return NULL;
 909 }
 910
 911 const char *
 912 string_or_null(const char *string)
 913 {
 914   if (string)
 915     return string;
 916   return "[NULL]";
 917 }
 918
 919 int
 920 escape_string_len(const char *string)
 921 {
 922         const char *p;
 923         gchar c;
 924         int repr_len;
 925
 926         repr_len = 0;
 927         for (p = string; (c = *p) != '\0'; p++) {
 928                 /* Backslashes and double-quotes must
 929                  * be escaped */
 930                 if (c == '\\' || c == '"') {
 931                         repr_len += 2;
 932                 }
 933                 /* Values that can't nicely be represented
 934                  * in ASCII need to be escaped. */
 935                 else if (!isprint((unsigned char)c)) {
 936                         /* c --> \xNN */
 937                         repr_len += 4;
 938                 }
 939                 /* Other characters are just passed through. */
 940                 else {
 941                         repr_len++;
 942                 }
 943         }
 944         return repr_len + 2;    /* string plus leading and trailing quotes */
 945 }
 946
 947 char *
 948 escape_string(char *buf, const char *string)
 949 {
 950   const gchar *p;
 951   gchar c;
 952   char *bufp;
 953   char hexbuf[3];
 954
 955   bufp = buf;
 956   *bufp++ = '"';
 957   for (p = string; (c = *p) != '\0'; p++) {
 958         /* Backslashes and double-quotes must
 959          * be escaped. */
 960         if (c == '\\' || c == '"') {
 961                 *bufp++ = '\\';
 962                 *bufp++ = c;
 963         }
 964         /* Values that can't nicely be represented
 965          * in ASCII need to be escaped. */
 966         else if (!isprint((unsigned char)c)) {
 967                 /* c --> \xNN */
 968                 g_snprintf(hexbuf,sizeof(hexbuf), "%02x", (unsigned char) c);
 969                 *bufp++ = '\\';
 970                 *bufp++ = 'x';
 971                 *bufp++ = hexbuf[0];
 972                 *bufp++ = hexbuf[1];
 973         }
 974         /* Other characters are just passed through. */
 975         else {
 976                 *bufp++ = c;
 977         }
 978   }
 979   *bufp++ = '"';
 980   *bufp = '\0';
 981   return buf;
 982 }
 983
 984 #define GN_CHAR_ALPHABET_SIZE 128
 985
 986 static gunichar IA5_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
 987
 988     /*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */
 989
 990     '?', '?', '?', '?', '?', '?', '?', '?',
 991     '?', '?', '?', '?', '?', '?', '?', '?',
 992     '?', '?', '?', '?', '?', '?', '?', '?',
 993     '?', '?', '?', '?', '?', '?', '?', '?',
 994     ' ', '!', '\"','#', '$', '%', '&', '\'',
 995     '(', ')', '*', '+', ',', '-', '.', '/',
 996     '0', '1', '2', '3', '4', '5', '6', '7',
 997     '8', '9', ':', ';', '<', '=', '>', '?',
 998     '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
 999     'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
1000     'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
1001     'X',  'Y',  'Z',  '[',  '\\',  ']',  '^',  '_',
1002     '`', 'a',  'b',  'c',  'd',  'e',  'f',  'g',
1003     'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
1004     'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
1005     'x',  'y',  'z',  '{',  '|',  '}',  '~',  '?'
1006 };
1007
1008 static gunichar
1009 char_def_ia5_alphabet_decode(unsigned char value)
1010 {
1011     if (value < GN_CHAR_ALPHABET_SIZE)
1012     {
1013                 return IA5_default_alphabet[value];
1014     }
1015     else
1016     {
1017                 return '?';
1018     }
1019 }
1020
1021 void
1022 IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len)
1023 {
1024     int i, j;
1025     gunichar buf;
1026
1027
1028     for (i = 0, j = 0; j < len;  j++)
1029     {
1030             buf = char_def_ia5_alphabet_decode(src[j]);
1031             i += g_unichar_to_utf8(buf,&(dest[i]));
1032     }
1033     dest[i]=0;
1034     return;
1035 }
1036
1037 /*
1038  * This function takes a string and copies it, inserting an underscore before
1039  * every underscore in it.
1040  */
1041 gchar*
1042 ws_strdup_escape_underscore (const gchar *str)
1043 {
1044         gchar *p, *q, *new_str;
1045
1046         if(!str)
1047                 return NULL;
1048
1049         p = (gchar *)str;
1050         /* Worst case: A string that is full of underscores */
1051         q = new_str = g_malloc (strlen(str) * 2 + 1);
1052
1053         while(*p != 0)
1054         {
1055                 if(*p == '_')
1056                         *q++ = '_';
1057
1058                 *q++ = *p++;
1059         }
1060         *q++ = '\0';
1061
1062         return new_str;
1063 }
1064
1065 /*
1066  * This function takes a string and copies it, removing any occurences of double
1067  * underscores with a single underscore.
1068  */
1069 gchar*
1070 ws_strdup_unescape_underscore (const gchar *str)
1071 {
1072         gchar *p, *q, *new_str;
1073
1074         if(!str)
1075                 return NULL;
1076
1077         p = (gchar *)str;
1078         /* Worst case: A string that contains no underscores */
1079         q = new_str = g_malloc (strlen(str) + 1);
1080
1081         while(*p != 0)
1082         {
1083                 *q++ = *p;
1084                 if ((*p == '_') && (*(p+1) == '_'))
1085                         p += 2;
1086                 else
1087                         p++;
1088         }
1089         *q++ = '\0';
1090
1091         return new_str;
1092 }
1093
1094 /* Create a newly-allocated string with replacement values. */
1095 gchar *string_replace(const gchar* str, const gchar *old_val, const gchar *new_val) {
1096         gchar **str_parts;
1097         gchar *new_str;
1098
1099         if (!str || !old_val) {
1100                 return NULL;
1101         }
1102
1103         str_parts = g_strsplit(str, old_val, 0);
1104         new_str = g_strjoinv(new_val, str_parts);
1105         g_strfreev(str_parts);
1106
1107         return new_str;
1108 }