epan/strutil.c

   1 /* strutil.c
   2  * String utility routines
   3  *
   4  * Wireshark - Network traffic analyzer
   5  * By Gerald Combs <gerald@wireshark.org>
   6  * Copyright 1998 Gerald Combs
   7  *
   8  * This program is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU General Public License
  10  * as published by the Free Software Foundation; either version 2
  11  * of the License, or (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  21  */
  22
  23 #include "config.h"
  24
  25 #include <stdlib.h>
  26 #include <string.h>
  27 #include <glib.h>
  28 #include "strutil.h"
  29
  30 #include <wsutil/str_util.h>
  31 #include <epan/proto.h>
  32
  33 #ifdef _WIN32
  34 #include <windows.h>
  35 #include <tchar.h>
  36 #include <wchar.h>
  37 #endif
  38
  39 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
  40                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
  41
  42 /*
  43  * Given a pointer into a data buffer, and to the end of the buffer,
  44  * find the end of the (putative) line at that position in the data
  45  * buffer.
  46  * Return a pointer to the EOL character(s) in "*eol".
  47  */
  48 const guchar *
  49 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
  50 {
  51     const guchar *lineend;
  52
  53     lineend = (guchar *)memchr(data, '\n', dataend - data);
  54     if (lineend == NULL) {
  55         /*
  56          * No LF - line is probably continued in next TCP segment.
  57          */
  58         lineend = dataend;
  59         *eol = dataend;
  60     } else {
  61         /*
  62          * Is the LF at the beginning of the line?
  63          */
  64         if (lineend > data) {
  65             /*
  66              * No - is it preceded by a carriage return?
  67              * (Perhaps it's supposed to be, but that's not guaranteed....)
  68              */
  69             if (*(lineend - 1) == '\r') {
  70                 /*
  71                  * Yes.  The EOL starts with the CR.
  72                  */
  73                 *eol = lineend - 1;
  74             } else {
  75                 /*
  76                  * No.  The EOL starts with the LF.
  77                  */
  78                 *eol = lineend;
  79
  80                 /*
  81                  * I seem to remember that we once saw lines ending with LF-CR
  82                  * in an HTTP request or response, so check if it's *followed*
  83                  * by a carriage return.
  84                  */
  85                 if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
  86                     /*
  87                      * It's <non-LF><LF><CR>; say it ends with the CR.
  88                      */
  89                     lineend++;
  90                 }
  91             }
  92         } else {
  93             /*
  94              * Yes - the EOL starts with the LF.
  95              */
  96             *eol = lineend;
  97         }
  98
  99         /*
 100          * Point to the character after the last character.
 101          */
 102         lineend++;
 103     }
 104     return lineend;
 105 }
 106
 107 /*
 108  * Get the length of the next token in a line, and the beginning of the
 109  * next token after that (if any).
 110  * Return 0 if there is no next token.
 111  */
 112 int
 113 get_token_len(const guchar *linep, const guchar *lineend,
 114         const guchar **next_token)
 115 {
 116     const guchar *tokenp;
 117     int token_len;
 118
 119     tokenp = linep;
 120
 121     /*
 122      * Search for a blank, a CR or an LF, or the end of the buffer.
 123      */
 124     while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
 125         linep++;
 126     token_len = (int) (linep - tokenp);
 127
 128     /*
 129      * Skip trailing blanks.
 130      */
 131     while (linep < lineend && *linep == ' ')
 132         linep++;
 133
 134     *next_token = linep;
 135
 136     return token_len;
 137 }
 138
 139
 140 #define    INITIAL_FMTBUF_SIZE    128
 141
 142 /*
 143  * Given a string, generate a string from it that shows non-printable
 144  * characters as C-style escapes, and return a pointer to it.
 145  */
 146 gchar *
 147 format_text(const guchar *string, size_t len)
 148 {
 149     static gchar *fmtbuf[3];
 150     static int fmtbuf_len[3];
 151     static int idx;
 152     int column;
 153     const guchar *stringend = string + len;
 154     guchar c;
 155     int i;
 156
 157     idx = (idx + 1) % 3;
 158
 159     /*
 160      * Allocate the buffer if it's not already allocated.
 161      */
 162     if (fmtbuf[idx] == NULL) {
 163         fmtbuf[idx] = (gchar *)g_malloc(INITIAL_FMTBUF_SIZE);
 164         fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 165     }
 166     column = 0;
 167     while (string < stringend) {
 168         /*
 169          * Is there enough room for this character, if it expands to
 170          * a backslash plus 3 octal digits (which is the most it can
 171          * expand to), and also enough room for a terminating '\0'?
 172          */
 173         if (column+3+1 >= fmtbuf_len[idx]) {
 174             /*
 175              * Double the buffer's size if it's not big enough.
 176              * The size of the buffer starts at 128, so doubling its size
 177              * adds at least another 128 bytes, which is more than enough
 178              * for one more character plus a terminating '\0'.
 179              */
 180             fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 181             fmtbuf[idx] = (gchar *)g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 182         }
 183         c = *string++;
 184
 185         if (g_ascii_isprint(c)) {
 186             fmtbuf[idx][column] = c;
 187             column++;
 188         } else {
 189             fmtbuf[idx][column] =  '\\';
 190             column++;
 191             switch (c) {
 192
 193                 case '\a':
 194                     fmtbuf[idx][column] = 'a';
 195                     column++;
 196                     break;
 197
 198                 case '\b':
 199                     fmtbuf[idx][column] = 'b'; /* BS */
 200                     column++;
 201                     break;
 202
 203                 case '\f':
 204                     fmtbuf[idx][column] = 'f'; /* FF */
 205                     column++;
 206                     break;
 207
 208                 case '\n':
 209                     fmtbuf[idx][column] = 'n'; /* NL */
 210                     column++;
 211                     break;
 212
 213                 case '\r':
 214                     fmtbuf[idx][column] = 'r'; /* CR */
 215                     column++;
 216                     break;
 217
 218                 case '\t':
 219                     fmtbuf[idx][column] = 't'; /* tab */
 220                     column++;
 221                     break;
 222
 223                 case '\v':
 224                     fmtbuf[idx][column] = 'v';
 225                     column++;
 226                     break;
 227
 228                 default:
 229                     i = (c>>6)&03;
 230                     fmtbuf[idx][column] = i + '0';
 231                     column++;
 232                     i = (c>>3)&07;
 233                     fmtbuf[idx][column] = i + '0';
 234                     column++;
 235                     i = (c>>0)&07;
 236                     fmtbuf[idx][column] = i + '0';
 237                     column++;
 238                     break;
 239             }
 240         }
 241     }
 242     fmtbuf[idx][column] = '\0';
 243     return fmtbuf[idx];
 244 }
 245
 246 /*
 247  * Given a string, generate a string from it that shows non-printable
 248  * characters as C-style escapes except a whitespace character
 249  * (space, tab, carriage return, new line, vertical tab, or formfeed)
 250  * which will be replaced by a space, and return a pointer to it.
 251  */
 252 gchar *
 253 format_text_wsp(const guchar *string, size_t len)
 254 {
 255     static gchar *fmtbuf[3];
 256     static int fmtbuf_len[3];
 257     static int idx;
 258     int column;
 259     const guchar *stringend = string + len;
 260     guchar c;
 261     int i;
 262
 263     idx = (idx + 1) % 3;
 264
 265     /*
 266      * Allocate the buffer if it's not already allocated.
 267      */
 268     if (fmtbuf[idx] == NULL) {
 269         fmtbuf[idx] = (gchar *)g_malloc(INITIAL_FMTBUF_SIZE);
 270         fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 271     }
 272     column = 0;
 273     while (string < stringend) {
 274         /*
 275          * Is there enough room for this character, if it expands to
 276          * a backslash plus 3 octal digits (which is the most it can
 277          * expand to), and also enough room for a terminating '\0'?
 278          */
 279         if (column+3+1 >= fmtbuf_len[idx]) {
 280             /*
 281              * Double the buffer's size if it's not big enough.
 282              * The size of the buffer starts at 128, so doubling its size
 283              * adds at least another 128 bytes, which is more than enough
 284              * for one more character plus a terminating '\0'.
 285              */
 286             fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 287             fmtbuf[idx] = (gchar *)g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 288         }
 289         c = *string++;
 290
 291         if (g_ascii_isprint(c)) {
 292             fmtbuf[idx][column] = c;
 293             column++;
 294         } else if (g_ascii_isspace(c)) {
 295             fmtbuf[idx][column] = ' ';
 296             column++;
 297         } else {
 298             fmtbuf[idx][column] =  '\\';
 299             column++;
 300             switch (c) {
 301
 302                 case '\a':
 303                     fmtbuf[idx][column] = 'a';
 304                     column++;
 305                     break;
 306
 307                 case '\b':
 308                     fmtbuf[idx][column] = 'b'; /* BS */
 309                     column++;
 310                     break;
 311
 312                 case '\f':
 313                     fmtbuf[idx][column] = 'f'; /* FF */
 314                     column++;
 315                     break;
 316
 317                 case '\n':
 318                     fmtbuf[idx][column] = 'n'; /* NL */
 319                     column++;
 320                     break;
 321
 322                 case '\r':
 323                     fmtbuf[idx][column] = 'r'; /* CR */
 324                     column++;
 325                     break;
 326
 327                 case '\t':
 328                     fmtbuf[idx][column] = 't'; /* tab */
 329                     column++;
 330                     break;
 331
 332                 case '\v':
 333                     fmtbuf[idx][column] = 'v';
 334                     column++;
 335                     break;
 336
 337                 default:
 338                     i = (c>>6)&03;
 339                     fmtbuf[idx][column] = i + '0';
 340                     column++;
 341                     i = (c>>3)&07;
 342                     fmtbuf[idx][column] = i + '0';
 343                     column++;
 344                     i = (c>>0)&07;
 345                     fmtbuf[idx][column] = i + '0';
 346                     column++;
 347                     break;
 348             }
 349         }
 350     }
 351     fmtbuf[idx][column] = '\0';
 352     return fmtbuf[idx];
 353 }
 354
 355 /*
 356  * Given a string, generate a string from it that shows non-printable
 357  * characters as the chr parameter passed, except a whitespace character
 358  * (space, tab, carriage return, new line, vertical tab, or formfeed)
 359  * which will be replaced by a space, and return a pointer to it.
 360  */
 361 gchar *
 362 format_text_chr(const guchar *string, const size_t len, const guchar chr)
 363 {
 364     static gchar *fmtbuf[3];
 365     static int fmtbuf_len[3];
 366     static int idx;
 367     int column;
 368     const guchar *stringend = string + len;
 369     guchar c;
 370
 371     idx = (idx + 1) % 3;
 372
 373     /*
 374      * Allocate the buffer if it's not already allocated.
 375      */
 376     if (fmtbuf[idx] == NULL) {
 377         fmtbuf[idx] = (gchar *)g_malloc(INITIAL_FMTBUF_SIZE);
 378         fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 379     }
 380     column = 0;
 381     while (string < stringend)
 382     {
 383         /*
 384          * Is there enough room for this character,
 385          * and also enough room for a terminating '\0'?
 386          */
 387         if (column+1 >= fmtbuf_len[idx])
 388         {
 389             /*
 390              * Double the buffer's size if it's not big enough.
 391              * The size of the buffer starts at 128, so doubling its size
 392              * adds at least another 128 bytes, which is more than enough
 393              * for one more character plus a terminating '\0'.
 394              */
 395             fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 396             fmtbuf[idx] = (gchar *)g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 397         }
 398         c = *string++;
 399
 400         if (g_ascii_isprint(c))
 401         {
 402             fmtbuf[idx][column] = c;
 403             column++;
 404         }
 405         else if (g_ascii_isspace(c))
 406         {
 407             fmtbuf[idx][column] = ' ';
 408             column++;
 409         }
 410         else
 411         {
 412             fmtbuf[idx][column] =  chr;
 413             column++;
 414         }
 415     }
 416     fmtbuf[idx][column] = '\0';
 417     return fmtbuf[idx];
 418 }
 419
 420 static gboolean
 421 is_byte_sep(guint8 c)
 422 {
 423     return (c == '-' || c == ':' || c == '.');
 424 }
 425
 426 /* Turn a string of hex digits with optional separators (defined by
 427  * is_byte_sep() into a byte array.
 428  */
 429 gboolean
 430 hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators)
 431 {
 432     guint8        val;
 433     const gchar    *p, *q, *r, *s, *punct;
 434     char        four_digits_first_half[3];
 435     char        four_digits_second_half[3];
 436     char        two_digits[3];
 437     char        one_digit[2];
 438
 439     if (! hex_str || ! bytes) {
 440         return FALSE;
 441     }
 442     g_byte_array_set_size(bytes, 0);
 443     p = hex_str;
 444     while (*p) {
 445         q = p+1;
 446         r = p+2;
 447         s = p+3;
 448
 449         if (*q && *r && *s
 450                 && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*q) &&
 451                 g_ascii_isxdigit(*r) && g_ascii_isxdigit(*s)) {
 452             four_digits_first_half[0] = *p;
 453             four_digits_first_half[1] = *q;
 454             four_digits_first_half[2] = '\0';
 455             four_digits_second_half[0] = *r;
 456             four_digits_second_half[1] = *s;
 457             four_digits_second_half[2] = '\0';
 458
 459             /*
 460              * Four or more hex digits in a row.
 461              */
 462             val = (guint8) strtoul(four_digits_first_half, NULL, 16);
 463             g_byte_array_append(bytes, &val, 1);
 464             val = (guint8) strtoul(four_digits_second_half, NULL, 16);
 465             g_byte_array_append(bytes, &val, 1);
 466
 467             punct = s + 1;
 468             if (*punct) {
 469                 /*
 470                  * Make sure the character after
 471                  * the forth hex digit is a byte
 472                  * separator, i.e. that we don't have
 473                  * more than four hex digits, or a
 474                  * bogus character.
 475                  */
 476                 if (is_byte_sep(*punct)) {
 477                     p = punct + 1;
 478                     continue;
 479                 }
 480                 else if (force_separators) {
 481                     return FALSE;
 482                 }
 483             }
 484             p = punct;
 485             continue;
 486         }
 487         else if (*q && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*q)) {
 488             two_digits[0] = *p;
 489             two_digits[1] = *q;
 490             two_digits[2] = '\0';
 491
 492             /*
 493              * Two hex digits in a row.
 494              */
 495             val = (guint8) strtoul(two_digits, NULL, 16);
 496             g_byte_array_append(bytes, &val, 1);
 497             punct = q + 1;
 498             if (*punct) {
 499                 /*
 500                  * Make sure the character after
 501                  * the second hex digit is a byte
 502                  * separator, i.e. that we don't have
 503                  * more than two hex digits, or a
 504                  * bogus character.
 505                  */
 506                 if (is_byte_sep(*punct)) {
 507                     p = punct + 1;
 508                     continue;
 509                 }
 510                 else if (force_separators) {
 511                     return FALSE;
 512                 }
 513             }
 514             p = punct;
 515             continue;
 516         }
 517         else if (*q && g_ascii_isxdigit(*p) && is_byte_sep(*q)) {
 518             one_digit[0] = *p;
 519             one_digit[1] = '\0';
 520
 521             /*
 522              * Only one hex digit (not at the end of the string)
 523              */
 524             val = (guint8) strtoul(one_digit, NULL, 16);
 525             g_byte_array_append(bytes, &val, 1);
 526             p = q + 1;
 527             continue;
 528         }
 529         else if (!*q && g_ascii_isxdigit(*p)) {
 530             one_digit[0] = *p;
 531             one_digit[1] = '\0';
 532
 533             /*
 534              * Only one hex digit (at the end of the string)
 535              */
 536             val = (guint8) strtoul(one_digit, NULL, 16);
 537             g_byte_array_append(bytes, &val, 1);
 538             p = q;
 539             continue;
 540         }
 541         else {
 542             return FALSE;
 543         }
 544     }
 545     return TRUE;
 546 }
 547
 548 static inline gchar
 549 get_valid_byte_sep(gchar c, const guint encoding)
 550 {
 551     gchar retval = -1; /* -1 means failure */
 552
 553     switch (c) {
 554         case ':':
 555             if (encoding & ENC_SEP_COLON)
 556                 retval = c;
 557             break;
 558         case '-':
 559             if (encoding & ENC_SEP_DASH)
 560                 retval = c;
 561             break;
 562         case '.':
 563             if (encoding & ENC_SEP_DOT)
 564                 retval = c;
 565             break;
 566         case ' ':
 567             if (encoding & ENC_SEP_SPACE)
 568                 retval = c;
 569             break;
 570         case '\0':
 571             /* we were given the end of the string, so it's fine */
 572             retval = 0;
 573             break;
 574         default:
 575             if (g_ascii_isxdigit(c) && (encoding & ENC_SEP_NONE))
 576                 retval = 0;
 577             /* anything else means we've got a failure */
 578             break;
 579     }
 580
 581     return retval;
 582 }
 583
 584 /* Turn a string of hex digits with optional separators (defined by is_byte_sep())
 585  * into a byte array. Unlike hex_str_to_bytes(), this will read as many hex-char
 586  * pairs as possible and not error if it hits a non-hex-char; instead it just ends
 587  * there. (i.e., like strtol()/atoi()/etc.) Unless fail_if_partial is TRUE.
 588  *
 589  * The **endptr, if not NULL, is set to the char after the last hex character.
 590  */
 591 gboolean
 592 hex_str_to_bytes_encoding(const gchar *hex_str, GByteArray *bytes, const gchar **endptr,
 593                           const guint encoding, const gboolean fail_if_partial)
 594 {
 595     gint8 c, d;
 596     guint8 val;
 597     const gchar *end = hex_str;
 598     gboolean retval = FALSE;
 599     gchar sep = -1;
 600
 601     /* a map from ASCII hex chars to their value */
 602     static const gint8 str_to_nibble[256] = {
 603         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 604         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 605         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 606          0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
 607         -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 608         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 609         -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 610         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 611         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 612         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 613         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 614         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 615         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 616         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 617         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 618         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 619     };
 620
 621     /* we must see two hex chars at the beginning, or fail */
 622     if (bytes && *end && g_ascii_isxdigit(*end) && g_ascii_isxdigit(*(end+1))) {
 623         retval = TRUE;
 624
 625         /* set the separator character we'll allow; if this returns a -1, it means something's
 626          * invalid after the hex, but we'll let the while-loop grab the first hex-pair anyway
 627          */
 628         sep = get_valid_byte_sep(*(end+2), encoding);
 629
 630         while (*end) {
 631             c = str_to_nibble[(guchar)*end];
 632             if (c < 0) {
 633                 if (fail_if_partial) retval = FALSE;
 634                 break;
 635             }
 636             ++end;
 637
 638             d = str_to_nibble[(guchar)*end];
 639             if (d < 0) {
 640                 if (fail_if_partial) retval = FALSE;
 641                 break;
 642             }
 643             val = ((guint8)c * 16) + d;
 644             g_byte_array_append(bytes, &val, 1);
 645             ++end;
 646
 647             /* check for separator and peek at next char to make sure we should keep going */
 648             if (sep > 0 && *end == sep && str_to_nibble[(guchar)*(end+1)] > -1) {
 649                 /* yes, it's the right sep and followed by more hex, so skip the sep */
 650                 ++end;
 651             } else if (sep != 0 && *end) {
 652                 /* we either need a separator, but we don't see one; or the get_valid_byte_sep()
 653                    earlier didn't find a valid one to begin with */
 654                 if (fail_if_partial) retval = FALSE;
 655                 break;
 656             }
 657             /* otherwise, either no separator allowed, or *end is null, or *end is an invalid
 658              * sep, or *end is a valid sep but after it is not a hex char - in all those
 659              * cases, just loop back up and let it fail later naturally.
 660              */
 661         }
 662     }
 663
 664     if (!retval) {
 665         if (bytes) g_byte_array_set_size(bytes, 0);
 666         end = hex_str;
 667     }
 668
 669     if (endptr) *endptr = end;
 670
 671     return retval;
 672 }
 673
 674 /*
 675  * Turn an RFC 3986 percent-encoded string into a byte array.
 676  * XXX - We don't check for reserved characters.
 677  */
 678 #define HEX_DIGIT_BUF_LEN 3
 679 gboolean
 680 uri_str_to_bytes(const char *uri_str, GByteArray *bytes)
 681 {
 682     guint8        val;
 683     const gchar    *p;
 684     gchar         hex_digit[HEX_DIGIT_BUF_LEN];
 685
 686     g_byte_array_set_size(bytes, 0);
 687     if (! uri_str) {
 688         return FALSE;
 689     }
 690
 691     p = uri_str;
 692
 693     while (*p) {
 694         if (!g_ascii_isprint(*p))
 695             return FALSE;
 696         if (*p == '%') {
 697             p++;
 698             if (*p == '\0') return FALSE;
 699             hex_digit[0] = *p;
 700             p++;
 701             if (*p == '\0') return FALSE;
 702             hex_digit[1] = *p;
 703             hex_digit[2] = '\0';
 704             if (! g_ascii_isxdigit(hex_digit[0]) || ! g_ascii_isxdigit(hex_digit[1]))
 705                 return FALSE;
 706             val = (guint8) strtoul(hex_digit, NULL, 16);
 707             g_byte_array_append(bytes, &val, 1);
 708         } else {
 709             g_byte_array_append(bytes, (const guint8 *) p, 1);
 710         }
 711         p++;
 712
 713     }
 714     return TRUE;
 715 }
 716
 717 /*
 718  * Given a GByteArray, generate a string from it that shows non-printable
 719  * characters as percent-style escapes, and return a pointer to it.
 720  */
 721 const gchar *
 722 format_uri(const GByteArray *bytes, const gchar *reserved_chars)
 723 {
 724     static gchar *fmtbuf[3];
 725     static guint fmtbuf_len[3];
 726     static guint idx;
 727     static const guchar *reserved_def = ":/?#[]@!$&'()*+,;= ";
 728     const guchar *reserved = reserved_def;
 729     guint8 c;
 730     guint column, i;
 731     gboolean is_reserved = FALSE;
 732
 733     if (! bytes)
 734         return "";
 735
 736     idx = (idx + 1) % 3;
 737     if (reserved_chars)
 738         reserved = reserved_chars;
 739
 740     /*
 741      * Allocate the buffer if it's not already allocated.
 742      */
 743     if (fmtbuf[idx] == NULL) {
 744         fmtbuf[idx] = (gchar *)g_malloc(INITIAL_FMTBUF_SIZE);
 745         fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 746     }
 747     for (column = 0; column < bytes->len; column++) {
 748         /*
 749          * Is there enough room for this character, if it expands to
 750          * a percent plus 2 hex digits (which is the most it can
 751          * expand to), and also enough room for a terminating '\0'?
 752          */
 753         if (column+2+1 >= fmtbuf_len[idx]) {
 754             /*
 755              * Double the buffer's size if it's not big enough.
 756              * The size of the buffer starts at 128, so doubling its size
 757              * adds at least another 128 bytes, which is more than enough
 758              * for one more character plus a terminating '\0'.
 759              */
 760             fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 761             fmtbuf[idx] = (gchar *)g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 762         }
 763         c = bytes->data[column];
 764
 765         if (!g_ascii_isprint(c) || c == '%') {
 766             is_reserved = TRUE;
 767         }
 768
 769         for (i = 0; reserved[i]; i++) {
 770             if (c == reserved[i])
 771                 is_reserved = TRUE;
 772         }
 773
 774         if (!is_reserved) {
 775             fmtbuf[idx][column] = c;
 776         } else {
 777             fmtbuf[idx][column] = '%';
 778             column++;
 779             fmtbuf[idx][column] = hex[c >> 4];
 780             column++;
 781             fmtbuf[idx][column] = hex[c & 0xF];
 782         }
 783     }
 784     fmtbuf[idx][column] = '\0';
 785     return fmtbuf[idx];
 786 }
 787
 788 /**
 789  * Create a copy of a GByteArray
 790  *
 791  * @param ba The byte array to be copied.
 792  * @return If ba exists, a freshly allocated copy.  NULL otherwise.
 793  *
 794  */
 795 GByteArray *
 796 byte_array_dup(const GByteArray *ba)
 797 {
 798     GByteArray *new_ba;
 799
 800     if (!ba)
 801         return NULL;
 802
 803     new_ba = g_byte_array_new();
 804     g_byte_array_append(new_ba, ba->data, ba->len);
 805     return new_ba;
 806 }
 807
 808 #define SUBID_BUF_LEN 5
 809 gboolean
 810 oid_str_to_bytes(const char *oid_str, GByteArray *bytes)
 811 {
 812     return rel_oid_str_to_bytes(oid_str, bytes, TRUE);
 813 }
 814 gboolean
 815 rel_oid_str_to_bytes(const char *oid_str, GByteArray *bytes, gboolean is_absolute)
 816 {
 817     guint32 subid0, subid, sicnt, i;
 818     const char *p, *dot;
 819     guint8 buf[SUBID_BUF_LEN];
 820
 821     g_byte_array_set_size(bytes, 0);
 822
 823     /* check syntax */
 824     p = oid_str;
 825     dot = NULL;
 826     while (*p) {
 827         if (!g_ascii_isdigit(*p) && (*p != '.')) return FALSE;
 828         if (*p == '.') {
 829             if (p == oid_str && is_absolute) return FALSE;
 830             if (!*(p+1)) return FALSE;
 831             if ((p-1) == dot) return FALSE;
 832             dot = p;
 833         }
 834         p++;
 835     }
 836     if (!dot) return FALSE;
 837
 838     p = oid_str;
 839     sicnt = is_absolute ? 0 : 2;
 840     if (!is_absolute) p++;
 841     subid0 = 0;    /* squelch GCC complaints */
 842     while (*p) {
 843         subid = 0;
 844         while (g_ascii_isdigit(*p)) {
 845             subid *= 10;
 846             subid += *p - '0';
 847             p++;
 848         }
 849         if (sicnt == 0) {
 850             subid0 = subid;
 851             if (subid0 > 2) return FALSE;
 852         } else if (sicnt == 1) {
 853             if ((subid0 < 2) && (subid > 39)) return FALSE;
 854             subid += 40 * subid0;
 855         }
 856         if (sicnt) {
 857             i = SUBID_BUF_LEN;
 858             do {
 859                 i--;
 860                 buf[i] = 0x80 | (subid % 0x80);
 861                 subid >>= 7;
 862             } while (subid && i);
 863             buf[SUBID_BUF_LEN-1] &= 0x7F;
 864             g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
 865         }
 866         sicnt++;
 867         if (*p) p++;
 868     }
 869
 870     return TRUE;
 871 }
 872
 873 /**
 874  * Compare the contents of two GByteArrays
 875  *
 876  * @param ba1 A byte array
 877  * @param ba2 A byte array
 878  * @return If both arrays are non-NULL and their lengths are equal and
 879  *         their contents are equal, returns TRUE.  Otherwise, returns
 880  *         FALSE.
 881  *
 882  * XXX - Should this be in strutil.c?
 883  */
 884 gboolean
 885 byte_array_equal(GByteArray *ba1, GByteArray *ba2)
 886 {
 887     if (!ba1 || !ba2)
 888         return FALSE;
 889
 890     if (ba1->len != ba2->len)
 891         return FALSE;
 892
 893     if (memcmp(ba1->data, ba2->data, ba1->len) != 0)
 894         return FALSE;
 895
 896     return TRUE;
 897 }
 898
 899
 900 /* Return a XML escaped representation of the unescaped string.
 901  * The returned string must be freed when no longer in use. */
 902 gchar *
 903 xml_escape(const gchar *unescaped)
 904 {
 905     GString *buffer = g_string_sized_new(128);
 906     const gchar *p;
 907     gchar c;
 908
 909     p = unescaped;
 910     while ( (c = *p++) ) {
 911         switch (c) {
 912             case '<':
 913                 g_string_append(buffer, "&lt;");
 914                 break;
 915             case '>':
 916                 g_string_append(buffer, "&gt;");
 917                 break;
 918             case '&':
 919                 g_string_append(buffer, "&amp;");
 920                 break;
 921             case '\'':
 922                 g_string_append(buffer, "&#x27;");
 923                 break;
 924             case '"':
 925                 g_string_append(buffer, "&quot;");
 926                 break;
 927             default:
 928                 g_string_append_c(buffer, c);
 929                 break;
 930         }
 931     }
 932     /* Return the string value contained within the GString
 933      * after getting rid of the GString structure.
 934      * This is the way to do this, see the GLib reference. */
 935     return g_string_free(buffer, FALSE);
 936 }
 937
 938
 939 /* Return the first occurrence of needle in haystack.
 940  * If not found, return NULL.
 941  * If either haystack or needle has 0 length, return NULL.
 942  * Algorithm copied from GNU's glibc 2.3.2 memmem() under LGPL 2.1+ */
 943 const guint8 *
 944 epan_memmem(const guint8 *haystack, guint haystack_len,
 945         const guint8 *needle, guint needle_len)
 946 {
 947     const guint8 *begin;
 948     const guint8 *const last_possible = haystack + haystack_len - needle_len;
 949
 950     if (needle_len == 0) {
 951         return NULL;
 952     }
 953
 954     if (needle_len > haystack_len) {
 955         return NULL;
 956     }
 957
 958     for (begin = haystack ; begin <= last_possible; ++begin) {
 959         if (begin[0] == needle[0] &&
 960                 !memcmp(&begin[1], needle + 1,
 961                     needle_len - 1)) {
 962             return begin;
 963         }
 964     }
 965
 966     return NULL;
 967 }
 968
 969 /*
 970  * Scan the search string to make sure it's valid hex.  Return the
 971  * number of bytes in nbytes.
 972  */
 973 guint8 *
 974 convert_string_to_hex(const char *string, size_t *nbytes)
 975 {
 976     size_t n_bytes;
 977     const char *p;
 978     gchar c;
 979     guint8 *bytes, *q, byte_val;
 980
 981     n_bytes = 0;
 982     p = &string[0];
 983     for (;;) {
 984         c = *p++;
 985         if (c == '\0')
 986             break;
 987         if (g_ascii_isspace(c))
 988             continue;    /* allow white space */
 989         if (c==':' || c=='.' || c=='-')
 990             continue; /* skip any ':', '.', or '-' between bytes */
 991         if (!g_ascii_isxdigit(c)) {
 992             /* Not a valid hex digit - fail */
 993             return NULL;
 994         }
 995
 996         /*
 997          * We can only match bytes, not nibbles; we must have a valid
 998          * hex digit immediately after that hex digit.
 999          */
1000         c = *p++;
1001         if (!g_ascii_isxdigit(c))
1002             return NULL;
1003
1004         /* 2 hex digits = 1 byte */
1005         n_bytes++;
1006     }
1007
1008     /*
1009      * Were we given any hex digits?
1010      */
1011     if (n_bytes == 0) {
1012         /* No. */
1013         return NULL;
1014     }
1015
1016     /*
1017      * OK, it's valid, and it generates "n_bytes" bytes; generate the
1018      * raw byte array.
1019      */
1020     bytes = (guint8 *)g_malloc(n_bytes);
1021     p = &string[0];
1022     q = &bytes[0];
1023     for (;;) {
1024         c = *p++;
1025         if (c == '\0')
1026             break;
1027         if (g_ascii_isspace(c))
1028             continue;    /* allow white space */
1029         if (c==':' || c=='.' || c=='-')
1030             continue; /* skip any ':', '.', or '-' between bytes */
1031         /* From the loop above, we know this is a hex digit */
1032         byte_val = ws_xton(c);
1033         byte_val <<= 4;
1034
1035         /* We also know this is a hex digit */
1036         c = *p++;
1037         byte_val |= ws_xton(c);
1038
1039         *q++ = byte_val;
1040     }
1041     *nbytes = n_bytes;
1042     return bytes;
1043 }
1044
1045 /*
1046  * Copy if if it's a case-sensitive search; uppercase it if it's
1047  * a case-insensitive search.
1048  */
1049 char *
1050 convert_string_case(const char *string, gboolean case_insensitive)
1051 {
1052
1053     if (case_insensitive) {
1054         return g_utf8_strup(string, -1);
1055     } else {
1056         return g_strdup(string);
1057     }
1058 }
1059
1060 char *
1061 epan_strcasestr(const char *haystack, const char *needle)
1062 {
1063     gsize hlen = strlen(haystack);
1064     gsize nlen = strlen(needle);
1065
1066     while (hlen-- >= nlen) {
1067         if (!g_ascii_strncasecmp(haystack, needle, nlen))
1068             return (char*) haystack;
1069         haystack++;
1070     }
1071     return NULL;
1072 }
1073
1074 const char *
1075 string_or_null(const char *string)
1076 {
1077     if (string)
1078         return string;
1079     return "[NULL]";
1080 }
1081
1082 int
1083 escape_string_len(const char *string)
1084 {
1085     const char *p;
1086     gchar c;
1087     int repr_len;
1088
1089     repr_len = 0;
1090     for (p = string; (c = *p) != '\0'; p++) {
1091         /* Backslashes and double-quotes must
1092          * be escaped */
1093         if (c == '\\' || c == '"') {
1094             repr_len += 2;
1095         }
1096         /* Values that can't nicely be represented
1097          * in ASCII need to be escaped. */
1098         else if (!g_ascii_isprint(c)) {
1099             /* c --> \xNN */
1100             repr_len += 4;
1101         }
1102         /* Other characters are just passed through. */
1103         else {
1104             repr_len++;
1105         }
1106     }
1107     return repr_len + 2;    /* string plus leading and trailing quotes */
1108 }
1109
1110 char *
1111 escape_string(char *buf, const char *string)
1112 {
1113     const gchar *p;
1114     gchar c;
1115     char *bufp;
1116     char hexbuf[3];
1117
1118     bufp = buf;
1119     *bufp++ = '"';
1120     for (p = string; (c = *p) != '\0'; p++) {
1121         /* Backslashes and double-quotes must
1122          * be escaped. */
1123         if (c == '\\' || c == '"') {
1124             *bufp++ = '\\';
1125             *bufp++ = c;
1126         }
1127         /* Values that can't nicely be represented
1128          * in ASCII need to be escaped. */
1129         else if (!g_ascii_isprint(c)) {
1130             /* c --> \xNN */
1131             g_snprintf(hexbuf,sizeof(hexbuf), "%02x", (unsigned char) c);
1132             *bufp++ = '\\';
1133             *bufp++ = 'x';
1134             *bufp++ = hexbuf[0];
1135             *bufp++ = hexbuf[1];
1136         }
1137         /* Other characters are just passed through. */
1138         else {
1139             *bufp++ = c;
1140         }
1141     }
1142     *bufp++ = '"';
1143     *bufp = '\0';
1144     return buf;
1145 }
1146
1147 #define GN_CHAR_ALPHABET_SIZE 128
1148
1149 static gunichar IA5_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
1150
1151     /*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */
1152
1153     '?', '?', '?', '?', '?', '?', '?', '?',
1154     '?', '?', '?', '?', '?', '?', '?', '?',
1155     '?', '?', '?', '?', '?', '?', '?', '?',
1156     '?', '?', '?', '?', '?', '?', '?', '?',
1157     ' ', '!', '\"','#', '$', '%', '&', '\'',
1158     '(', ')', '*', '+', ',', '-', '.', '/',
1159     '0', '1', '2', '3', '4', '5', '6', '7',
1160     '8', '9', ':', ';', '<', '=', '>', '?',
1161     '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
1162     'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
1163     'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
1164     'X',  'Y',  'Z',  '[',  '\\',  ']',  '^',  '_',
1165     '`', 'a',  'b',  'c',  'd',  'e',  'f',  'g',
1166     'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
1167     'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
1168     'x',  'y',  'z',  '{',  '|',  '}',  '~',  '?'
1169 };
1170
1171 static gunichar
1172 char_def_ia5_alphabet_decode(unsigned char value)
1173 {
1174     if (value < GN_CHAR_ALPHABET_SIZE) {
1175         return IA5_default_alphabet[value];
1176     }
1177     else {
1178         return '?';
1179     }
1180 }
1181
1182 void
1183 IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len)
1184 {
1185     int i, j;
1186     gunichar buf;
1187
1188     for (i = 0, j = 0; j < len;  j++) {
1189         buf = char_def_ia5_alphabet_decode(src[j]);
1190         i += g_unichar_to_utf8(buf,&(dest[i]));
1191     }
1192     dest[i]=0;
1193     return;
1194 }
1195
1196 /*
1197  * This function takes a string and copies it, inserting a 'chr' before
1198  * every 'chr' in it.
1199  */
1200 gchar*
1201 ws_strdup_escape_char (const gchar *str, const gchar chr)
1202 {
1203     const gchar *p;
1204     gchar *q, *new_str;
1205
1206     if(!str)
1207         return NULL;
1208
1209     p = str;
1210     /* Worst case: A string that is full of 'chr' */
1211     q = new_str = (gchar *)g_malloc (strlen(str) * 2 + 1);
1212
1213     while(*p != 0) {
1214         if(*p == chr)
1215             *q++ = chr;
1216
1217         *q++ = *p++;
1218     }
1219     *q = '\0';
1220
1221     return new_str;
1222 }
1223
1224 /*
1225  * This function takes a string and copies it, removing any occurences of double
1226  * 'chr' with a single 'chr'.
1227  */
1228 gchar*
1229 ws_strdup_unescape_char (const gchar *str, const char chr)
1230 {
1231     const gchar *p;
1232     gchar *q, *new_str;
1233
1234     if(!str)
1235         return NULL;
1236
1237     p = str;
1238     /* Worst case: A string that contains no 'chr' */
1239     q = new_str = (gchar *)g_malloc (strlen(str) + 1);
1240
1241     while(*p != 0) {
1242         *q++ = *p;
1243         if ((*p == chr) && (*(p+1) == chr))
1244             p += 2;
1245         else
1246             p++;
1247     }
1248     *q = '\0';
1249
1250     return new_str;
1251 }
1252
1253 /* Create a newly-allocated string with replacement values. */
1254 gchar *
1255 string_replace(const gchar* str, const gchar *old_val, const gchar *new_val)
1256 {
1257     gchar **str_parts;
1258     gchar *new_str;
1259
1260     if (!str || !old_val) {
1261         return NULL;
1262     }
1263
1264     str_parts = g_strsplit(str, old_val, 0);
1265     new_str = g_strjoinv(new_val, str_parts);
1266     g_strfreev(str_parts);
1267
1268     return new_str;
1269 }
1270
1271 /*
1272  * Editor modelines  -  http://www.wireshark.org/tools/modelines.html
1273  *
1274  * Local variables:
1275  * c-basic-offset: 4
1276  * tab-width: 8
1277  * indent-tabs-mode: nil
1278  * End:
1279  *
1280  * vi: set shiftwidth=4 tabstop=8 expandtab:
1281  * :indentSize=4:tabSize=8:noTabs=true:
1282  */