epan/strutil.c

   1 /* strutil.c
   2  * String utility routines
   3  *
   4  * $Id$
   5  *
   6  * Wireshark - Network traffic analyzer
   7  * By Gerald Combs <gerald@wireshark.org>
   8  * Copyright 1998 Gerald Combs
   9  *
  10  * This program is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU General Public License
  12  * as published by the Free Software Foundation; either version 2
  13  * of the License, or (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  23  */
  24
  25 #include "config.h"
  26
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <ctype.h>
  30 #include <glib.h>
  31 #include "strutil.h"
  32 #include "emem.h"
  33 #include <../isprint.h>
  34
  35
  36 #ifdef _WIN32
  37 #include <windows.h>
  38 #include <tchar.h>
  39 #include <wchar.h>
  40 #endif
  41
  42 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
  43                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
  44
  45 /*
  46  * Given a pointer into a data buffer, and to the end of the buffer,
  47  * find the end of the (putative) line at that position in the data
  48  * buffer.
  49  * Return a pointer to the EOL character(s) in "*eol".
  50  */
  51 const guchar *
  52 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
  53 {
  54     const guchar *lineend;
  55
  56     lineend = (guchar *)memchr(data, '\n', dataend - data);
  57     if (lineend == NULL) {
  58         /*
  59          * No LF - line is probably continued in next TCP segment.
  60          */
  61         lineend = dataend;
  62         *eol = dataend;
  63     } else {
  64         /*
  65          * Is the LF at the beginning of the line?
  66          */
  67         if (lineend > data) {
  68             /*
  69              * No - is it preceded by a carriage return?
  70              * (Perhaps it's supposed to be, but that's not guaranteed....)
  71              */
  72             if (*(lineend - 1) == '\r') {
  73                 /*
  74                  * Yes.  The EOL starts with the CR.
  75                  */
  76                 *eol = lineend - 1;
  77             } else {
  78                 /*
  79                  * No.  The EOL starts with the LF.
  80                  */
  81                 *eol = lineend;
  82
  83                 /*
  84                  * I seem to remember that we once saw lines ending with LF-CR
  85                  * in an HTTP request or response, so check if it's *followed*
  86                  * by a carriage return.
  87                  */
  88                 if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
  89                     /*
  90                      * It's <non-LF><LF><CR>; say it ends with the CR.
  91                      */
  92                     lineend++;
  93                 }
  94             }
  95         } else {
  96             /*
  97              * Yes - the EOL starts with the LF.
  98              */
  99             *eol = lineend;
 100         }
 101
 102         /*
 103          * Point to the character after the last character.
 104          */
 105         lineend++;
 106     }
 107     return lineend;
 108 }
 109
 110 /*
 111  * Get the length of the next token in a line, and the beginning of the
 112  * next token after that (if any).
 113  * Return 0 if there is no next token.
 114  */
 115 int
 116 get_token_len(const guchar *linep, const guchar *lineend,
 117         const guchar **next_token)
 118 {
 119     const guchar *tokenp;
 120     int token_len;
 121
 122     tokenp = linep;
 123
 124     /*
 125      * Search for a blank, a CR or an LF, or the end of the buffer.
 126      */
 127     while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
 128         linep++;
 129     token_len = (int) (linep - tokenp);
 130
 131     /*
 132      * Skip trailing blanks.
 133      */
 134     while (linep < lineend && *linep == ' ')
 135         linep++;
 136
 137     *next_token = linep;
 138
 139     return token_len;
 140 }
 141
 142
 143 #define    INITIAL_FMTBUF_SIZE    128
 144
 145 /*
 146  * Given a string, generate a string from it that shows non-printable
 147  * characters as C-style escapes, and return a pointer to it.
 148  */
 149 gchar *
 150 format_text(const guchar *string, size_t len)
 151 {
 152     static gchar *fmtbuf[3];
 153     static int fmtbuf_len[3];
 154     static int idx;
 155     int column;
 156     const guchar *stringend = string + len;
 157     guchar c;
 158     int i;
 159
 160     idx = (idx + 1) % 3;
 161
 162     /*
 163      * Allocate the buffer if it's not already allocated.
 164      */
 165     if (fmtbuf[idx] == NULL) {
 166         fmtbuf[idx] = (gchar *)g_malloc(INITIAL_FMTBUF_SIZE);
 167         fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 168     }
 169     column = 0;
 170     while (string < stringend) {
 171         /*
 172          * Is there enough room for this character, if it expands to
 173          * a backslash plus 3 octal digits (which is the most it can
 174          * expand to), and also enough room for a terminating '\0'?
 175          */
 176         if (column+3+1 >= fmtbuf_len[idx]) {
 177             /*
 178              * Double the buffer's size if it's not big enough.
 179              * The size of the buffer starts at 128, so doubling its size
 180              * adds at least another 128 bytes, which is more than enough
 181              * for one more character plus a terminating '\0'.
 182              */
 183             fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 184             fmtbuf[idx] = (gchar *)g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 185         }
 186         c = *string++;
 187
 188         if (isprint(c)) {
 189             fmtbuf[idx][column] = c;
 190             column++;
 191         } else {
 192             fmtbuf[idx][column] =  '\\';
 193             column++;
 194             switch (c) {
 195
 196                 case '\a':
 197                     fmtbuf[idx][column] = 'a';
 198                     column++;
 199                     break;
 200
 201                 case '\b':
 202                     fmtbuf[idx][column] = 'b'; /* BS */
 203                     column++;
 204                     break;
 205
 206                 case '\f':
 207                     fmtbuf[idx][column] = 'f'; /* FF */
 208                     column++;
 209                     break;
 210
 211                 case '\n':
 212                     fmtbuf[idx][column] = 'n'; /* NL */
 213                     column++;
 214                     break;
 215
 216                 case '\r':
 217                     fmtbuf[idx][column] = 'r'; /* CR */
 218                     column++;
 219                     break;
 220
 221                 case '\t':
 222                     fmtbuf[idx][column] = 't'; /* tab */
 223                     column++;
 224                     break;
 225
 226                 case '\v':
 227                     fmtbuf[idx][column] = 'v';
 228                     column++;
 229                     break;
 230
 231                 default:
 232                     i = (c>>6)&03;
 233                     fmtbuf[idx][column] = i + '0';
 234                     column++;
 235                     i = (c>>3)&07;
 236                     fmtbuf[idx][column] = i + '0';
 237                     column++;
 238                     i = (c>>0)&07;
 239                     fmtbuf[idx][column] = i + '0';
 240                     column++;
 241                     break;
 242             }
 243         }
 244     }
 245     fmtbuf[idx][column] = '\0';
 246     return fmtbuf[idx];
 247 }
 248
 249 /*
 250  * Given a string, generate a string from it that shows non-printable
 251  * characters as C-style escapes except a whitespace character
 252  * (space, tab, carriage return, new line, vertical tab, or formfeed)
 253  * which will be replaced by a space, and return a pointer to it.
 254  */
 255 gchar *
 256 format_text_wsp(const guchar *string, size_t len)
 257 {
 258     static gchar *fmtbuf[3];
 259     static int fmtbuf_len[3];
 260     static int idx;
 261     int column;
 262     const guchar *stringend = string + len;
 263     guchar c;
 264     int i;
 265
 266     idx = (idx + 1) % 3;
 267
 268     /*
 269      * Allocate the buffer if it's not already allocated.
 270      */
 271     if (fmtbuf[idx] == NULL) {
 272         fmtbuf[idx] = (gchar *)g_malloc(INITIAL_FMTBUF_SIZE);
 273         fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 274     }
 275     column = 0;
 276     while (string < stringend) {
 277         /*
 278          * Is there enough room for this character, if it expands to
 279          * a backslash plus 3 octal digits (which is the most it can
 280          * expand to), and also enough room for a terminating '\0'?
 281          */
 282         if (column+3+1 >= fmtbuf_len[idx]) {
 283             /*
 284              * Double the buffer's size if it's not big enough.
 285              * The size of the buffer starts at 128, so doubling its size
 286              * adds at least another 128 bytes, which is more than enough
 287              * for one more character plus a terminating '\0'.
 288              */
 289             fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 290             fmtbuf[idx] = (gchar *)g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 291         }
 292         c = *string++;
 293
 294         if (isprint(c)) {
 295             fmtbuf[idx][column] = c;
 296             column++;
 297         } else if  (isspace(c)) {
 298             fmtbuf[idx][column] = ' ';
 299             column++;
 300         } else {
 301             fmtbuf[idx][column] =  '\\';
 302             column++;
 303             switch (c) {
 304
 305                 case '\a':
 306                     fmtbuf[idx][column] = 'a';
 307                     column++;
 308                     break;
 309
 310                 case '\b':
 311                     fmtbuf[idx][column] = 'b'; /* BS */
 312                     column++;
 313                     break;
 314
 315                 case '\f':
 316                     fmtbuf[idx][column] = 'f'; /* FF */
 317                     column++;
 318                     break;
 319
 320                 case '\n':
 321                     fmtbuf[idx][column] = 'n'; /* NL */
 322                     column++;
 323                     break;
 324
 325                 case '\r':
 326                     fmtbuf[idx][column] = 'r'; /* CR */
 327                     column++;
 328                     break;
 329
 330                 case '\t':
 331                     fmtbuf[idx][column] = 't'; /* tab */
 332                     column++;
 333                     break;
 334
 335                 case '\v':
 336                     fmtbuf[idx][column] = 'v';
 337                     column++;
 338                     break;
 339
 340                 default:
 341                     i = (c>>6)&03;
 342                     fmtbuf[idx][column] = i + '0';
 343                     column++;
 344                     i = (c>>3)&07;
 345                     fmtbuf[idx][column] = i + '0';
 346                     column++;
 347                     i = (c>>0)&07;
 348                     fmtbuf[idx][column] = i + '0';
 349                     column++;
 350                     break;
 351             }
 352         }
 353     }
 354     fmtbuf[idx][column] = '\0';
 355     return fmtbuf[idx];
 356 }
 357
 358 static gboolean
 359 is_byte_sep(guint8 c)
 360 {
 361     return (c == '-' || c == ':' || c == '.');
 362 }
 363
 364 /* Turn a string of hex digits with optional separators (defined by
 365  * is_byte_sep() into a byte array.
 366  */
 367 gboolean
 368 hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators) {
 369     guint8        val;
 370     const guchar    *p, *q, *r, *s, *punct;
 371     char        four_digits_first_half[3];
 372     char        four_digits_second_half[3];
 373     char        two_digits[3];
 374     char        one_digit[2];
 375
 376     if (! hex_str || ! bytes) {
 377         return FALSE;
 378     }
 379     g_byte_array_set_size(bytes, 0);
 380     p = (const guchar *)hex_str;
 381     while (*p) {
 382         q = p+1;
 383         r = p+2;
 384         s = p+3;
 385
 386         if (*q && *r && *s
 387                 && isxdigit(*p) && isxdigit(*q) &&
 388                 isxdigit(*r) && isxdigit(*s)) {
 389             four_digits_first_half[0] = *p;
 390             four_digits_first_half[1] = *q;
 391             four_digits_first_half[2] = '\0';
 392             four_digits_second_half[0] = *r;
 393             four_digits_second_half[1] = *s;
 394             four_digits_second_half[2] = '\0';
 395
 396             /*
 397              * Four or more hex digits in a row.
 398              */
 399             val = (guint8) strtoul(four_digits_first_half, NULL, 16);
 400             g_byte_array_append(bytes, &val, 1);
 401             val = (guint8) strtoul(four_digits_second_half, NULL, 16);
 402             g_byte_array_append(bytes, &val, 1);
 403
 404             punct = s + 1;
 405             if (*punct) {
 406                 /*
 407                  * Make sure the character after
 408                  * the forth hex digit is a byte
 409                  * separator, i.e. that we don't have
 410                  * more than four hex digits, or a
 411                  * bogus character.
 412                  */
 413                 if (is_byte_sep(*punct)) {
 414                     p = punct + 1;
 415                     continue;
 416                 }
 417                 else if (force_separators) {
 418                     return FALSE;
 419                 }
 420             }
 421             p = punct;
 422             continue;
 423         }
 424         else if (*q && isxdigit(*p) && isxdigit(*q)) {
 425             two_digits[0] = *p;
 426             two_digits[1] = *q;
 427             two_digits[2] = '\0';
 428
 429             /*
 430              * Two hex digits in a row.
 431              */
 432             val = (guint8) strtoul(two_digits, NULL, 16);
 433             g_byte_array_append(bytes, &val, 1);
 434             punct = q + 1;
 435             if (*punct) {
 436                 /*
 437                  * Make sure the character after
 438                  * the second hex digit is a byte
 439                  * separator, i.e. that we don't have
 440                  * more than two hex digits, or a
 441                  * bogus character.
 442                  */
 443                 if (is_byte_sep(*punct)) {
 444                     p = punct + 1;
 445                     continue;
 446                 }
 447                 else if (force_separators) {
 448                     return FALSE;
 449                 }
 450             }
 451             p = punct;
 452             continue;
 453         }
 454         else if (*q && isxdigit(*p) && is_byte_sep(*q)) {
 455             one_digit[0] = *p;
 456             one_digit[1] = '\0';
 457
 458             /*
 459              * Only one hex digit (not at the end of the string)
 460              */
 461             val = (guint8) strtoul(one_digit, NULL, 16);
 462             g_byte_array_append(bytes, &val, 1);
 463             p = q + 1;
 464             continue;
 465         }
 466         else if (!*q && isxdigit(*p)) {
 467             one_digit[0] = *p;
 468             one_digit[1] = '\0';
 469
 470             /*
 471              * Only one hex digit (at the end of the string)
 472              */
 473             val = (guint8) strtoul(one_digit, NULL, 16);
 474             g_byte_array_append(bytes, &val, 1);
 475             p = q;
 476             continue;
 477         }
 478         else {
 479             return FALSE;
 480         }
 481     }
 482     return TRUE;
 483 }
 484
 485 /*
 486  * Turn an RFC 3986 percent-encoded string into a byte array.
 487  * XXX - We don't check for reserved characters.
 488  */
 489 #define HEX_DIGIT_BUF_LEN 3
 490 gboolean
 491 uri_str_to_bytes(const char *uri_str, GByteArray *bytes) {
 492     guint8        val;
 493     const guchar    *p;
 494     guchar        hex_digit[HEX_DIGIT_BUF_LEN];
 495
 496     g_byte_array_set_size(bytes, 0);
 497     if (! uri_str) {
 498         return FALSE;
 499     }
 500
 501     p = (const guchar *)uri_str;
 502
 503     while (*p) {
 504         if (! isascii(*p) || ! isprint(*p))
 505             return FALSE;
 506         if (*p == '%') {
 507             p++;
 508             if (*p == '\0') return FALSE;
 509             hex_digit[0] = *p;
 510             p++;
 511             if (*p == '\0') return FALSE;
 512             hex_digit[1] = *p;
 513             hex_digit[2] = '\0';
 514             if (! isxdigit(hex_digit[0]) || ! isxdigit(hex_digit[1]))
 515                 return FALSE;
 516             val = (guint8) strtoul((char *)hex_digit, NULL, 16);
 517             g_byte_array_append(bytes, &val, 1);
 518         } else {
 519             g_byte_array_append(bytes, (const guint8 *) p, 1);
 520         }
 521         p++;
 522
 523     }
 524     return TRUE;
 525 }
 526
 527 /*
 528  * Given a GByteArray, generate a string from it that shows non-printable
 529  * characters as percent-style escapes, and return a pointer to it.
 530  */
 531 const gchar *
 532 format_uri(const GByteArray *bytes, const gchar *reserved_chars)
 533 {
 534     static gchar *fmtbuf[3];
 535     static guint fmtbuf_len[3];
 536     static guint idx;
 537     static const guchar *reserved_def = ":/?#[]@!$&'()*+,;= ";
 538     const guchar *reserved = reserved_def;
 539     guint8 c;
 540     guint column, i;
 541     gboolean is_reserved = FALSE;
 542
 543     if (! bytes)
 544         return "";
 545
 546     idx = (idx + 1) % 3;
 547     if (reserved_chars)
 548         reserved = reserved_chars;
 549
 550     /*
 551      * Allocate the buffer if it's not already allocated.
 552      */
 553     if (fmtbuf[idx] == NULL) {
 554         fmtbuf[idx] = (gchar *)g_malloc(INITIAL_FMTBUF_SIZE);
 555         fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 556     }
 557     for (column = 0; column < bytes->len; column++) {
 558         /*
 559          * Is there enough room for this character, if it expands to
 560          * a percent plus 2 hex digits (which is the most it can
 561          * expand to), and also enough room for a terminating '\0'?
 562          */
 563         if (column+2+1 >= fmtbuf_len[idx]) {
 564             /*
 565              * Double the buffer's size if it's not big enough.
 566              * The size of the buffer starts at 128, so doubling its size
 567              * adds at least another 128 bytes, which is more than enough
 568              * for one more character plus a terminating '\0'.
 569              */
 570             fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 571             fmtbuf[idx] = (gchar *)g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 572         }
 573         c = bytes->data[column];
 574
 575         if (!isascii(c) || !isprint(c) || c == '%') {
 576             is_reserved = TRUE;
 577         }
 578
 579         for (i = 0; reserved[i]; i++) {
 580             if (c == reserved[i])
 581                 is_reserved = TRUE;
 582         }
 583
 584         if (!is_reserved) {
 585             fmtbuf[idx][column] = c;
 586         } else {
 587             fmtbuf[idx][column] = '%';
 588             column++;
 589             fmtbuf[idx][column] = hex[c >> 4];
 590             column++;
 591             fmtbuf[idx][column] = hex[c & 0xF];
 592         }
 593     }
 594     fmtbuf[idx][column] = '\0';
 595     return fmtbuf[idx];
 596 }
 597
 598 /**
 599  * Create a copy of a GByteArray
 600  *
 601  * @param ba The byte array to be copied.
 602  * @return If ba exists, a freshly allocated copy.  NULL otherwise.
 603  *
 604  */
 605 GByteArray *
 606 byte_array_dup(GByteArray *ba) {
 607     GByteArray *new_ba;
 608
 609     if (!ba)
 610         return NULL;
 611
 612     new_ba = g_byte_array_new();
 613     g_byte_array_append(new_ba, ba->data, ba->len);
 614     return new_ba;
 615 }
 616
 617 #define SUBID_BUF_LEN 5
 618 gboolean
 619 oid_str_to_bytes(const char *oid_str, GByteArray *bytes) {
 620     guint32 subid0, subid, sicnt, i;
 621     const char *p, *dot;
 622     guint8 buf[SUBID_BUF_LEN];
 623
 624     g_byte_array_set_size(bytes, 0);
 625
 626     /* check syntax */
 627     p = oid_str;
 628     dot = NULL;
 629     while (*p) {
 630         if (!isdigit((guchar)*p) && (*p != '.')) return FALSE;
 631         if (*p == '.') {
 632             if (p == oid_str) return FALSE;
 633             if (!*(p+1)) return FALSE;
 634             if ((p-1) == dot) return FALSE;
 635             dot = p;
 636         }
 637         p++;
 638     }
 639     if (!dot) return FALSE;
 640
 641     p = oid_str;
 642     sicnt = 0;
 643     subid0 = 0;    /* squelch GCC complaints */
 644     while (*p) {
 645         subid = 0;
 646         while (isdigit((guchar)*p)) {
 647             subid *= 10;
 648             subid += *p - '0';
 649             p++;
 650         }
 651         if (sicnt == 0) {
 652             subid0 = subid;
 653             if (subid0 > 2) return FALSE;
 654         } else if (sicnt == 1) {
 655             if ((subid0 < 2) && (subid > 39)) return FALSE;
 656             subid += 40 * subid0;
 657         }
 658         if (sicnt) {
 659             i = SUBID_BUF_LEN;
 660             do {
 661                 i--;
 662                 buf[i] = 0x80 | (subid % 0x80);
 663                 subid >>= 7;
 664             } while (subid && i);
 665             buf[SUBID_BUF_LEN-1] &= 0x7F;
 666             g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
 667         }
 668         sicnt++;
 669         if (*p) p++;
 670     }
 671
 672     return TRUE;
 673 }
 674
 675 /**
 676  * Compare the contents of two GByteArrays
 677  *
 678  * @param ba1 A byte array
 679  * @param ba2 A byte array
 680  * @return If both arrays are non-NULL and their lengths are equal and
 681  *         their contents are equal, returns TRUE.  Otherwise, returns
 682  *         FALSE.
 683  *
 684  * XXX - Should this be in strutil.c?
 685  */
 686 gboolean
 687 byte_array_equal(GByteArray *ba1, GByteArray *ba2) {
 688     if (!ba1 || !ba2)
 689         return FALSE;
 690
 691     if (ba1->len != ba2->len)
 692         return FALSE;
 693
 694     if (memcmp(ba1->data, ba2->data, ba1->len) != 0)
 695         return FALSE;
 696
 697     return TRUE;
 698 }
 699
 700
 701 /* Return a XML escaped representation of the unescaped string.
 702  * The returned string must be freed when no longer in use. */
 703 gchar *
 704 xml_escape(const gchar *unescaped)
 705 {
 706     GString *buffer = g_string_sized_new(128);
 707     const gchar *p;
 708     gchar c;
 709
 710     p = unescaped;
 711     while ( (c = *p++) ) {
 712         switch (c) {
 713             case '<':
 714                 g_string_append(buffer, "&lt;");
 715                 break;
 716             case '>':
 717                 g_string_append(buffer, "&gt;");
 718                 break;
 719             case '&':
 720                 g_string_append(buffer, "&amp;");
 721                 break;
 722             case '\'':
 723                 g_string_append(buffer, "&apos;");
 724                 break;
 725             case '"':
 726                 g_string_append(buffer, "&quot;");
 727                 break;
 728             default:
 729                 g_string_append_c(buffer, c);
 730                 break;
 731         }
 732     }
 733     /* Return the string value contained within the GString
 734      * after getting rid of the GString structure.
 735      * This is the way to do this, see the GLib reference. */
 736     return g_string_free(buffer, FALSE);
 737 }
 738
 739
 740 /* Return the first occurrence of needle in haystack.
 741  * If not found, return NULL.
 742  * If either haystack or needle has 0 length, return NULL.
 743  * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
 744 const guint8 *
 745 epan_memmem(const guint8 *haystack, guint haystack_len,
 746         const guint8 *needle, guint needle_len)
 747 {
 748     const guint8 *begin;
 749     const guint8 *const last_possible = haystack + haystack_len - needle_len;
 750
 751     if (needle_len == 0) {
 752         return NULL;
 753     }
 754
 755     if (needle_len > haystack_len) {
 756         return NULL;
 757     }
 758
 759     for (begin = haystack ; begin <= last_possible; ++begin) {
 760         if (begin[0] == needle[0] &&
 761                 !memcmp(&begin[1], needle + 1,
 762                     needle_len - 1)) {
 763             return begin;
 764         }
 765     }
 766
 767     return NULL;
 768 }
 769
 770 /*
 771  * Scan the search string to make sure it's valid hex.  Return the
 772  * number of bytes in nbytes.
 773  */
 774 guint8 *
 775 convert_string_to_hex(const char *string, size_t *nbytes)
 776 {
 777     size_t n_bytes;
 778     const char *p;
 779     guchar c;
 780     guint8 *bytes, *q, byte_val;
 781
 782     n_bytes = 0;
 783     p = &string[0];
 784     for (;;) {
 785         c = *p++;
 786         if (c == '\0')
 787             break;
 788         if (isspace(c))
 789             continue;    /* allow white space */
 790         if (c==':' || c=='.' || c=='-')
 791             continue; /* skip any ':', '.', or '-' between bytes */
 792         if (!isxdigit(c)) {
 793             /* Not a valid hex digit - fail */
 794             return NULL;
 795         }
 796
 797         /*
 798          * We can only match bytes, not nibbles; we must have a valid
 799          * hex digit immediately after that hex digit.
 800          */
 801         c = *p++;
 802         if (!isxdigit(c))
 803             return NULL;
 804
 805         /* 2 hex digits = 1 byte */
 806         n_bytes++;
 807     }
 808
 809     /*
 810      * Were we given any hex digits?
 811      */
 812     if (n_bytes == 0) {
 813         /* No. */
 814         return NULL;
 815     }
 816
 817     /*
 818      * OK, it's valid, and it generates "n_bytes" bytes; generate the
 819      * raw byte array.
 820      */
 821     bytes = (guint8 *)g_malloc(n_bytes);
 822     p = &string[0];
 823     q = &bytes[0];
 824     for (;;) {
 825         c = *p++;
 826         if (c == '\0')
 827             break;
 828         if (isspace(c))
 829             continue;    /* allow white space */
 830         if (c==':' || c=='.' || c=='-')
 831             continue; /* skip any ':', '.', or '-' between bytes */
 832         /* From the loop above, we know this is a hex digit */
 833         if (isdigit(c))
 834             byte_val = c - '0';
 835         else if (c >= 'a')
 836             byte_val = (c - 'a') + 10;
 837         else
 838             byte_val = (c - 'A') + 10;
 839         byte_val <<= 4;
 840
 841         /* We also know this is a hex digit */
 842         c = *p++;
 843         if (isdigit(c))
 844             byte_val |= c - '0';
 845         else if (c >= 'a')
 846             byte_val |= (c - 'a') + 10;
 847         else if (c >= 'A')
 848             byte_val |= (c - 'A') + 10;
 849
 850         *q++ = byte_val;
 851     }
 852     *nbytes = n_bytes;
 853     return bytes;
 854 }
 855
 856 /*
 857  * Copy if if it's a case-sensitive search; uppercase it if it's
 858  * a case-insensitive search.
 859  */
 860 char *
 861 convert_string_case(const char *string, gboolean case_insensitive)
 862 {
 863
 864     if (case_insensitive) {
 865         return g_utf8_strup(string, -1);
 866     } else {
 867         return g_strdup(string);
 868     }
 869 }
 870
 871 char *
 872 epan_strcasestr(const char *haystack, const char *needle)
 873 {
 874     gsize hlen = strlen(haystack);
 875     gsize nlen = strlen(needle);
 876
 877     while (hlen-- >= nlen) {
 878         if (!g_ascii_strncasecmp(haystack, needle, nlen))
 879             return (char*) haystack;
 880         haystack++;
 881     }
 882     return NULL;
 883 }
 884
 885 const char *
 886 string_or_null(const char *string)
 887 {
 888     if (string)
 889         return string;
 890     return "[NULL]";
 891 }
 892
 893 int
 894 escape_string_len(const char *string)
 895 {
 896     const char *p;
 897     gchar c;
 898     int repr_len;
 899
 900     repr_len = 0;
 901     for (p = string; (c = *p) != '\0'; p++) {
 902         /* Backslashes and double-quotes must
 903          * be escaped */
 904         if (c == '\\' || c == '"') {
 905             repr_len += 2;
 906         }
 907         /* Values that can't nicely be represented
 908          * in ASCII need to be escaped. */
 909         else if (!isprint((unsigned char)c)) {
 910             /* c --> \xNN */
 911             repr_len += 4;
 912         }
 913         /* Other characters are just passed through. */
 914         else {
 915             repr_len++;
 916         }
 917     }
 918     return repr_len + 2;    /* string plus leading and trailing quotes */
 919 }
 920
 921 char *
 922 escape_string(char *buf, const char *string)
 923 {
 924     const gchar *p;
 925     gchar c;
 926     char *bufp;
 927     char hexbuf[3];
 928
 929     bufp = buf;
 930     *bufp++ = '"';
 931     for (p = string; (c = *p) != '\0'; p++) {
 932         /* Backslashes and double-quotes must
 933          * be escaped. */
 934         if (c == '\\' || c == '"') {
 935             *bufp++ = '\\';
 936             *bufp++ = c;
 937         }
 938         /* Values that can't nicely be represented
 939          * in ASCII need to be escaped. */
 940         else if (!isprint((unsigned char)c)) {
 941             /* c --> \xNN */
 942             g_snprintf(hexbuf,sizeof(hexbuf), "%02x", (unsigned char) c);
 943             *bufp++ = '\\';
 944             *bufp++ = 'x';
 945             *bufp++ = hexbuf[0];
 946             *bufp++ = hexbuf[1];
 947         }
 948         /* Other characters are just passed through. */
 949         else {
 950             *bufp++ = c;
 951         }
 952     }
 953     *bufp++ = '"';
 954     *bufp = '\0';
 955     return buf;
 956 }
 957
 958 #define GN_CHAR_ALPHABET_SIZE 128
 959
 960 static gunichar IA5_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
 961
 962     /*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */
 963
 964     '?', '?', '?', '?', '?', '?', '?', '?',
 965     '?', '?', '?', '?', '?', '?', '?', '?',
 966     '?', '?', '?', '?', '?', '?', '?', '?',
 967     '?', '?', '?', '?', '?', '?', '?', '?',
 968     ' ', '!', '\"','#', '$', '%', '&', '\'',
 969     '(', ')', '*', '+', ',', '-', '.', '/',
 970     '0', '1', '2', '3', '4', '5', '6', '7',
 971     '8', '9', ':', ';', '<', '=', '>', '?',
 972     '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
 973     'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
 974     'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
 975     'X',  'Y',  'Z',  '[',  '\\',  ']',  '^',  '_',
 976     '`', 'a',  'b',  'c',  'd',  'e',  'f',  'g',
 977     'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
 978     'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
 979     'x',  'y',  'z',  '{',  '|',  '}',  '~',  '?'
 980 };
 981
 982 static gunichar
 983 char_def_ia5_alphabet_decode(unsigned char value)
 984 {
 985     if (value < GN_CHAR_ALPHABET_SIZE) {
 986         return IA5_default_alphabet[value];
 987     }
 988     else {
 989         return '?';
 990     }
 991 }
 992
 993 void
 994 IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len)
 995 {
 996     int i, j;
 997     gunichar buf;
 998
 999     for (i = 0, j = 0; j < len;  j++) {
1000         buf = char_def_ia5_alphabet_decode(src[j]);
1001         i += g_unichar_to_utf8(buf,&(dest[i]));
1002     }
1003     dest[i]=0;
1004     return;
1005 }
1006
1007 /*
1008  * This function takes a string and copies it, inserting a 'chr' before
1009  * every 'chr' in it.
1010  */
1011 gchar*
1012 ws_strdup_escape_char (const gchar *str, const gchar chr)
1013 {
1014     const gchar *p;
1015     gchar *q, *new_str;
1016
1017     if(!str)
1018         return NULL;
1019
1020     p = str;
1021     /* Worst case: A string that is full of 'chr' */
1022     q = new_str = (gchar *)g_malloc (strlen(str) * 2 + 1);
1023
1024     while(*p != 0) {
1025         if(*p == chr)
1026             *q++ = chr;
1027
1028         *q++ = *p++;
1029     }
1030     *q = '\0';
1031
1032     return new_str;
1033 }
1034
1035 /*
1036  * This function takes a string and copies it, removing any occurences of double
1037  * 'chr' with a single 'chr'.
1038  */
1039 gchar*
1040 ws_strdup_unescape_char (const gchar *str, const char chr)
1041 {
1042     const gchar *p;
1043     gchar *q, *new_str;
1044
1045     if(!str)
1046         return NULL;
1047
1048     p = str;
1049     /* Worst case: A string that contains no 'chr' */
1050     q = new_str = (gchar *)g_malloc (strlen(str) + 1);
1051
1052     while(*p != 0) {
1053         *q++ = *p;
1054         if ((*p == chr) && (*(p+1) == chr))
1055             p += 2;
1056         else
1057             p++;
1058     }
1059     *q = '\0';
1060
1061     return new_str;
1062 }
1063
1064 /* Create a newly-allocated string with replacement values. */
1065 gchar *string_replace(const gchar* str, const gchar *old_val, const gchar *new_val) {
1066     gchar **str_parts;
1067     gchar *new_str;
1068
1069     if (!str || !old_val) {
1070         return NULL;
1071     }
1072
1073     str_parts = g_strsplit(str, old_val, 0);
1074     new_str = g_strjoinv(new_val, str_parts);
1075     g_strfreev(str_parts);
1076
1077     return new_str;
1078 }
1079
1080 /*
1081  * Editor modelines  -  http://www.wireshark.org/tools/modelines.html
1082  *
1083  * Local variables:
1084  * c-basic-offset: 4
1085  * tab-width: 8
1086  * indent-tabs-mode: nil
1087  * End:
1088  *
1089  * vi: set shiftwidth=4 tabstop=8 expandtab:
1090  * :indentSize=4:tabSize=8:noTabs=true:
1091  */