epan/strutil.c

   1 /* strutil.c
   2  * String utility routines
   3  *
   4  * $Id$
   5  *
   6  * Ethereal - Network traffic analyzer
   7  * By Gerald Combs <gerald@ethereal.com>
   8  * Copyright 1998 Gerald Combs
   9  *
  10  * This program is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU General Public License
  12  * as published by the Free Software Foundation; either version 2
  13  * of the License, or (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  23  */
  24
  25 #ifdef HAVE_CONFIG_H
  26 # include "config.h"
  27 #endif
  28
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <ctype.h>
  32 #include <glib.h>
  33 #include "strutil.h"
  34 #include "emem.h"
  35
  36 #ifdef _WIN32
  37 #include <windows.h>
  38 #include <tchar.h>
  39 #include <wchar.h>
  40 #endif
  41
  42 /*
  43  * Given a pointer into a data buffer, and to the end of the buffer,
  44  * find the end of the (putative) line at that position in the data
  45  * buffer.
  46  * Return a pointer to the EOL character(s) in "*eol".
  47  */
  48 const guchar *
  49 find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
  50 {
  51   const guchar *lineend;
  52
  53   lineend = memchr(data, '\n', dataend - data);
  54   if (lineend == NULL) {
  55     /*
  56      * No LF - line is probably continued in next TCP segment.
  57      */
  58     lineend = dataend;
  59     *eol = dataend;
  60   } else {
  61     /*
  62      * Is the LF at the beginning of the line?
  63      */
  64     if (lineend > data) {
  65       /*
  66        * No - is it preceded by a carriage return?
  67        * (Perhaps it's supposed to be, but that's not guaranteed....)
  68        */
  69       if (*(lineend - 1) == '\r') {
  70         /*
  71          * Yes.  The EOL starts with the CR.
  72          */
  73         *eol = lineend - 1;
  74       } else {
  75         /*
  76          * No.  The EOL starts with the LF.
  77          */
  78         *eol = lineend;
  79
  80         /*
  81          * I seem to remember that we once saw lines ending with LF-CR
  82          * in an HTTP request or response, so check if it's *followed*
  83          * by a carriage return.
  84          */
  85         if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
  86           /*
  87            * It's <non-LF><LF><CR>; say it ends with the CR.
  88            */
  89           lineend++;
  90         }
  91       }
  92     } else {
  93       /*
  94        * Yes - the EOL starts with the LF.
  95        */
  96       *eol = lineend;
  97     }
  98
  99     /*
 100      * Point to the character after the last character.
 101      */
 102     lineend++;
 103   }
 104   return lineend;
 105 }
 106
 107 /*
 108  * Get the length of the next token in a line, and the beginning of the
 109  * next token after that (if any).
 110  * Return 0 if there is no next token.
 111  */
 112 int
 113 get_token_len(const guchar *linep, const guchar *lineend,
 114               const guchar **next_token)
 115 {
 116   const guchar *tokenp;
 117   int token_len;
 118
 119   tokenp = linep;
 120
 121   /*
 122    * Search for a blank, a CR or an LF, or the end of the buffer.
 123    */
 124   while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
 125       linep++;
 126   token_len = linep - tokenp;
 127
 128   /*
 129    * Skip trailing blanks.
 130    */
 131   while (linep < lineend && *linep == ' ')
 132     linep++;
 133
 134   *next_token = linep;
 135
 136   return token_len;
 137 }
 138
 139
 140 #define INITIAL_FMTBUF_SIZE     128
 141
 142 #if GLIB_MAJOR_VERSION >= 2
 143 /*
 144  * XXX - "isprint()" can return "true" for non-ASCII characters, but
 145  * those don't work with GTK+ 1.3 or later, as they take UTF-8 strings
 146  * as input.  Until we fix up Ethereal to properly handle non-ASCII
 147  * characters in all output (both GUI displays and text printouts)
 148  * in those versions of GTK+, we work around the problem by escaping
 149  * all characters that aren't printable ASCII.
 150  *
 151  * We don't know what version of GTK+ we're using, as epan doesn't
 152  * use any GTK+ stuff; we use GLib as a proxy for that, with GLib 2.x
 153  * implying GTK+ 1.3 or later (we don't support GLib 1.3[.x]).
 154  */
 155 #undef isprint
 156 #define isprint(c) (c >= 0x20 && c < 0x7f)
 157 #endif
 158
 159 /*
 160  * Given a string, generate a string from it that shows non-printable
 161  * characters as C-style escapes, and return a pointer to it.
 162  */
 163 gchar *
 164 format_text(const guchar *string, int len)
 165 {
 166   static gchar *fmtbuf[3];
 167   static int fmtbuf_len[3];
 168   static int idx;
 169   int column;
 170   const guchar *stringend = string + len;
 171   guchar c;
 172   int i;
 173
 174   idx = (idx + 1) % 3;
 175
 176   /*
 177    * Allocate the buffer if it's not already allocated.
 178    */
 179   if (fmtbuf[idx] == NULL) {
 180     fmtbuf[idx] = g_malloc(INITIAL_FMTBUF_SIZE);
 181     fmtbuf_len[idx] = INITIAL_FMTBUF_SIZE;
 182   }
 183   column = 0;
 184   while (string < stringend) {
 185     /*
 186      * Is there enough room for this character, if it expands to
 187      * a backslash plus 3 octal digits (which is the most it can
 188      * expand to), and also enough room for a terminating '\0'?
 189      */
 190     if (column+3+1 >= fmtbuf_len[idx]) {
 191       /*
 192        * Double the buffer's size if it's not big enough.
 193        * The size of the buffer starts at 128, so doubling its size
 194        * adds at least another 128 bytes, which is more than enough
 195        * for one more character plus a terminating '\0'.
 196        */
 197       fmtbuf_len[idx] = fmtbuf_len[idx] * 2;
 198       fmtbuf[idx] = g_realloc(fmtbuf[idx], fmtbuf_len[idx]);
 199     }
 200     c = *string++;
 201
 202     if (isprint(c)) {
 203       fmtbuf[idx][column] = c;
 204       column++;
 205     } else {
 206       fmtbuf[idx][column] =  '\\';
 207       column++;
 208       switch (c) {
 209
 210       case '\a':
 211         fmtbuf[idx][column] = 'a';
 212         column++;
 213         break;
 214
 215       case '\b':
 216         fmtbuf[idx][column] = 'b';
 217         column++;
 218         break;
 219
 220       case '\f':
 221         fmtbuf[idx][column] = 'f';
 222         column++;
 223         break;
 224
 225       case '\n':
 226         fmtbuf[idx][column] = 'n';
 227         column++;
 228         break;
 229
 230       case '\r':
 231         fmtbuf[idx][column] = 'r';
 232         column++;
 233         break;
 234
 235       case '\t':
 236         fmtbuf[idx][column] = 't';
 237         column++;
 238         break;
 239
 240       case '\v':
 241         fmtbuf[idx][column] = 'v';
 242         column++;
 243         break;
 244
 245       default:
 246         i = (c>>6)&03;
 247         fmtbuf[idx][column] = i + '0';
 248         column++;
 249         i = (c>>3)&07;
 250         fmtbuf[idx][column] = i + '0';
 251         column++;
 252         i = (c>>0)&07;
 253         fmtbuf[idx][column] = i + '0';
 254         column++;
 255         break;
 256       }
 257     }
 258   }
 259   fmtbuf[idx][column] = '\0';
 260   return fmtbuf[idx];
 261 }
 262
 263 /* Max string length for displaying byte string.  */
 264 #define MAX_BYTE_STR_LEN        48
 265
 266 /* Turn an array of bytes into a string showing the bytes in hex. */
 267 #define N_BYTES_TO_STR_STRINGS  6
 268 gchar *
 269 bytes_to_str(const guint8 *bd, int bd_len) {
 270   return bytes_to_str_punct(bd,bd_len,'\0');
 271 }
 272
 273 /* Turn an array of bytes into a string showing the bytes in hex with
 274  * punct as a bytes separator.
 275  */
 276 gchar *
 277 bytes_to_str_punct(const guint8 *bd, int bd_len, gchar punct) {
 278   gchar        *cur;
 279   gchar        *p;
 280   int           len;
 281   static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 282                                 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 283
 284   cur=ep_alloc(MAX_BYTE_STR_LEN+3+1);
 285   p = cur;
 286   len = MAX_BYTE_STR_LEN;
 287   while (bd_len > 0 && len > 0) {
 288     *p++ = hex[(*bd) >> 4];
 289     *p++ = hex[(*bd) & 0xF];
 290     len -= 2;
 291     bd++;
 292     bd_len--;
 293     if(punct && bd_len > 0){
 294       *p++ = punct;
 295       len--;
 296     }
 297   }
 298   if (bd_len != 0) {
 299     /* Note that we're not showing the full string.  */
 300     *p++ = '.';
 301     *p++ = '.';
 302     *p++ = '.';
 303   }
 304   *p = '\0';
 305   return cur;
 306 }
 307
 308 static gboolean
 309 is_byte_sep(guint8 c)
 310 {
 311         return (c == '-' || c == ':' || c == '.');
 312 }
 313
 314 /* Turn a string of hex digits with optional separators (defined by
 315  * is_byte_sep() into a byte array.
 316  */
 317 gboolean
 318 hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators) {
 319         guint8          val;
 320         const guchar    *p, *q, *punct;
 321         char            two_digits[3];
 322         char            one_digit[2];
 323
 324         g_byte_array_set_size(bytes, 0);
 325         p = (const guchar *)hex_str;
 326         while (*p) {
 327                 q = p+1;
 328                 if (*q && isxdigit(*p) && isxdigit(*q)) {
 329                         two_digits[0] = *p;
 330                         two_digits[1] = *q;
 331                         two_digits[2] = '\0';
 332
 333                         /*
 334                          * Two or more hex digits in a row.
 335                          * "strtoul()" will succeed, as it'll see at
 336                          * least one hex digit.
 337                          */
 338                         val = (guint8) strtoul(two_digits, NULL, 16);
 339                         g_byte_array_append(bytes, &val, 1);
 340                         punct = q + 1;
 341                         if (*punct) {
 342                                 /*
 343                                  * Make sure the character after
 344                                  * the second hex digit is a byte
 345                                  * separator, i.e. that we don't have
 346                                  * more than two hex digits, or a
 347                                  * bogus character.
 348                                  */
 349                                 if (is_byte_sep(*punct)) {
 350                                         p = punct + 1;
 351                                         continue;
 352                                 }
 353                                 else if (force_separators) {
 354                                         return FALSE;
 355                                         break;
 356                                 }
 357                         }
 358                         p = punct;
 359                         continue;
 360                 }
 361                 else if (*q && isxdigit(*p) && is_byte_sep(*q)) {
 362                         one_digit[0] = *p;
 363                         one_digit[1] = '\0';
 364
 365                         /*
 366                          * Only one hex digit.
 367                          * "strtoul()" will succeed, as it'll see that
 368                          * hex digit.
 369                          */
 370                         val = (guint8) strtoul(one_digit, NULL, 16);
 371                         g_byte_array_append(bytes, &val, 1);
 372                         p = q + 1;
 373                         continue;
 374                 }
 375                 else if (!*q && isxdigit(*p)) {
 376                         one_digit[0] = *p;
 377                         one_digit[1] = '\0';
 378
 379                         /*
 380                          * Only one hex digit.
 381                          * "strtoul()" will succeed, as it'll see that
 382                          * hex digit.
 383                          */
 384                         val = (guint8) strtoul(one_digit, NULL, 16);
 385                         g_byte_array_append(bytes, &val, 1);
 386                         p = q;
 387                         continue;
 388                 }
 389                 else {
 390                         return FALSE;
 391                 }
 392         }
 393         return TRUE;
 394 }
 395
 396 #define SUBID_BUF_LEN 5
 397 gboolean
 398 oid_str_to_bytes(const char *oid_str, GByteArray *bytes) {
 399   guint32 subid0, subid, sicnt, i;
 400   const char *p, *dot;
 401   guint8 buf[SUBID_BUF_LEN];
 402
 403   g_byte_array_set_size(bytes, 0);
 404
 405   /* check syntax */
 406   p = oid_str;
 407   dot = NULL;
 408   while (*p) {
 409     if (!isdigit(*p) && (*p != '.')) return FALSE;
 410     if (*p == '.') {
 411       if (p == oid_str) return FALSE;
 412       if (!*(p+1)) return FALSE;
 413       if ((p-1) == dot) return FALSE;
 414       dot = p;
 415     }
 416     p++;
 417   }
 418   if (!dot) return FALSE;
 419
 420   p = oid_str;
 421   sicnt = 0;
 422   subid0 = 0;   /* squelch GCC complaints */
 423   while (*p) {
 424     subid = 0;
 425     while (isdigit(*p)) {
 426       subid *= 10;
 427       subid += *p - '0';
 428       p++;
 429     }
 430     if (sicnt == 0) {
 431       subid0 = subid;
 432       if (subid0 > 2) return FALSE;
 433     } else if (sicnt == 1) {
 434       if ((subid0 < 2) && (subid > 39)) return FALSE;
 435       subid += 40 * subid0;
 436     }
 437     if (sicnt) {
 438       i = SUBID_BUF_LEN;
 439       do {
 440         i--;
 441         buf[i] = 0x80 | (subid % 0x80);
 442         subid >>= 7;
 443       } while (subid && i);
 444       buf[SUBID_BUF_LEN-1] &= 0x7F;
 445       g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
 446     }
 447     sicnt++;
 448     if (*p) p++;
 449   }
 450
 451   return TRUE;
 452 }
 453
 454
 455 /* Return a XML escaped representation of the unescaped string.
 456  * The returned string must be freed when no longer in use. */
 457 gchar *
 458 xml_escape(const gchar *unescaped)
 459 {
 460         GString *buffer = g_string_sized_new(128);
 461         const gchar *p;
 462         gchar c;
 463 #if GLIB_MAJOR_VERSION < 2
 464         gchar *ret;
 465 #endif
 466
 467         p = unescaped;
 468         while ( (c = *p++) ) {
 469                 switch (c) {
 470                         case '<':
 471                                 g_string_append(buffer, "&lt;");
 472                                 break;
 473                         case '>':
 474                                 g_string_append(buffer, "&gt;");
 475                                 break;
 476                         case '&':
 477                                 g_string_append(buffer, "&amp;");
 478                                 break;
 479                         case '\'':
 480                                 g_string_append(buffer, "&apos;");
 481                                 break;
 482                         case '"':
 483                                 g_string_append(buffer, "&quot;");
 484                                 break;
 485                         default:
 486                                 g_string_append_c(buffer, c);
 487                                 break;
 488                 }
 489         }
 490 #if GLIB_MAJOR_VERSION >= 2
 491         /* Return the string value contained within the GString
 492          * after getting rid of the GString structure.
 493          * This is the way to do this, see the GLib reference. */
 494         return g_string_free(buffer, FALSE);
 495 #else
 496         /* But it's not the way to do it in GLib 1.2[.x], as
 497          * 1.2[.x]'s "g_string_free()" doesn't return anything.
 498          * This is the way to do this in GLib 1.2[.x]. */
 499         ret = buffer->str;
 500         g_string_free(buffer, FALSE);
 501         return ret;
 502 #endif
 503 }
 504
 505
 506 /* Return the first occurrence of needle in haystack.
 507  * If not found, return NULL.
 508  * If either haystack or needle has 0 length, return NULL.
 509  * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
 510 const guint8 *
 511 epan_memmem(const guint8 *haystack, guint haystack_len,
 512                 const guint8 *needle, guint needle_len)
 513 {
 514         const guint8 *begin;
 515         const guint8 *const last_possible
 516                 = haystack + haystack_len - needle_len;
 517
 518         if (needle_len == 0) {
 519                 return NULL;
 520         }
 521
 522         if (needle_len > haystack_len) {
 523                 return NULL;
 524         }
 525
 526         for (begin = haystack ; begin <= last_possible; ++begin) {
 527                 if (begin[0] == needle[0] &&
 528                         !memcmp(&begin[1], needle + 1,
 529                                 needle_len - 1)) {
 530                         return begin;
 531                 }
 532         }
 533
 534         return NULL;
 535 }
 536
 537 /*
 538  * Scan the search string to make sure it's valid hex.  Return the
 539  * number of bytes in nbytes.
 540  */
 541 guint8 *
 542 convert_string_to_hex(const char *string, size_t *nbytes)
 543 {
 544   size_t n_bytes;
 545   const char *p;
 546   guchar c;
 547   guint8 *bytes, *q, byte_val;
 548
 549   n_bytes = 0;
 550   p = &string[0];
 551   for (;;) {
 552     c = *p++;
 553     if (c == '\0')
 554       break;
 555     if (isspace(c))
 556       continue; /* allow white space */
 557     if (c==':' || c=='.' || c=='-')
 558       continue; /* skip any ':', '.', or '-' between bytes */
 559     if (!isxdigit(c)) {
 560       /* Not a valid hex digit - fail */
 561       return NULL;
 562     }
 563
 564     /*
 565      * We can only match bytes, not nibbles; we must have a valid
 566      * hex digit immediately after that hex digit.
 567      */
 568     c = *p++;
 569     if (!isxdigit(c))
 570       return NULL;
 571
 572     /* 2 hex digits = 1 byte */
 573     n_bytes++;
 574   }
 575
 576   /*
 577    * Were we given any hex digits?
 578    */
 579   if (n_bytes == 0) {
 580       /* No. */
 581       return NULL;
 582   }
 583
 584   /*
 585    * OK, it's valid, and it generates "n_bytes" bytes; generate the
 586    * raw byte array.
 587    */
 588   bytes = g_malloc(n_bytes);
 589   p = &string[0];
 590   q = &bytes[0];
 591   for (;;) {
 592     c = *p++;
 593     if (c == '\0')
 594       break;
 595     if (isspace(c))
 596       continue; /* allow white space */
 597     if (c==':' || c=='.' || c=='-')
 598       continue; /* skip any ':', '.', or '-' between bytes */
 599     /* From the loop above, we know this is a hex digit */
 600     if (isdigit(c))
 601       byte_val = c - '0';
 602     else if (c >= 'a')
 603       byte_val = (c - 'a') + 10;
 604     else
 605       byte_val = (c - 'A') + 10;
 606     byte_val <<= 4;
 607
 608     /* We also know this is a hex digit */
 609     c = *p++;
 610     if (isdigit(c))
 611       byte_val |= c - '0';
 612     else if (c >= 'a')
 613       byte_val |= (c - 'a') + 10;
 614     else if (c >= 'A')
 615       byte_val |= (c - 'A') + 10;
 616
 617     *q++ = byte_val;
 618   }
 619   *nbytes = n_bytes;
 620   return bytes;
 621 }
 622
 623 /*
 624  * Copy if if it's a case-sensitive search; uppercase it if it's
 625  * a case-insensitive search.
 626  */
 627 char *
 628 convert_string_case(const char *string, gboolean case_insensitive)
 629 {
 630   char *out_string;
 631   const char *p;
 632   char c;
 633   char *q;
 634
 635   if (case_insensitive) {
 636     out_string = g_malloc(strlen(string) + 1);
 637     for (p = &string[0], q = &out_string[0]; (c = *p) != '\0'; p++, q++)
 638       *q = toupper((unsigned char)*p);
 639     *q = '\0';
 640   } else
 641     out_string = g_strdup(string);
 642   return out_string;
 643 }
 644
 645 /* g_strlcat() does not exist in GLib 1.2[.x] */
 646 #if GLIB_MAJOR_VERSION < 2
 647 gsize
 648 g_strlcat(gchar *dst, gchar *src, gsize size)
 649 {
 650         int strl, strs;
 651         strl=strlen(dst);
 652         strs=strlen(src);
 653         if(strl<size)
 654                 g_snprintf(dst+strl, size-strl, "%s", src);
 655         dst[size-1]=0;
 656         return strl+strs;
 657 }
 658 #endif
 659
 660 #ifdef _WIN32
 661
 662 /*
 663  * XXX - Should we use g_utf8_to_utf16() and g_utf16_to_utf8()
 664  * instead?  The goal of the functions below was to provide simple
 665  * wrappers for UTF-8 <-> UTF-16 conversion without making the
 666  * caller worry about freeing up memory afterward.
 667  */
 668
 669 /* Convert from UTF-8 to UTF-16. */
 670 wchar_t * utf_8to16(const char *utf8str) {
 671   static wchar_t *utf16buf[3];
 672   static int utf16buf_len[3];
 673   static int idx;
 674
 675   if (utf8str == NULL)
 676     return NULL;
 677
 678   idx = (idx + 1) % 3;
 679
 680   /*
 681    * Allocate the buffer if it's not already allocated.
 682    */
 683   if (utf16buf[idx] == NULL) {
 684     utf16buf_len[idx] = INITIAL_FMTBUF_SIZE;
 685     utf16buf[idx] = g_malloc(utf16buf_len[idx] * sizeof(wchar_t));
 686   }
 687
 688   while (MultiByteToWideChar(CP_UTF8, 0, utf8str,
 689       -1, NULL, 0) >= utf16buf_len[idx]) {
 690     /*
 691      * Double the buffer's size if it's not big enough.
 692      * The size of the buffer starts at 128, so doubling its size
 693      * adds at least another 128 bytes, which is more than enough
 694      * for one more character plus a terminating '\0'.
 695      */
 696     utf16buf_len[idx] *= 2;
 697     utf16buf[idx] = g_realloc(utf16buf[idx], utf16buf_len[idx]);
 698   }
 699
 700   if (MultiByteToWideChar(CP_UTF8, 0, utf8str,
 701       -1, utf16buf[idx], utf16buf_len[idx]) == 0)
 702     return NULL;
 703
 704   return utf16buf[idx];
 705 }
 706
 707 /* Convert from UTF-16 to UTF-8. */
 708 gchar * utf_16to8(const wchar_t *utf16str) {
 709   static gchar *utf8buf[3];
 710   static int utf8buf_len[3];
 711   static int idx;
 712
 713   if (utf16str == NULL)
 714     return NULL;
 715
 716   idx = (idx + 1) % 3;
 717
 718   /*
 719    * Allocate the buffer if it's not already allocated.
 720    */
 721   if (utf8buf[idx] == NULL) {
 722     utf8buf_len[idx] = INITIAL_FMTBUF_SIZE;
 723     utf8buf[idx] = g_malloc(utf8buf_len[idx]);
 724   }
 725
 726   while (WideCharToMultiByte(CP_UTF8, 0, utf16str, -1,
 727       NULL, 0, NULL, NULL) >= utf8buf_len[idx]) {
 728     /*
 729      * Double the buffer's size if it's not big enough.
 730      * The size of the buffer starts at 128, so doubling its size
 731      * adds at least another 128 bytes, which is more than enough
 732      * for one more character plus a terminating '\0'.
 733      */
 734     utf8buf_len[idx] *= 2;
 735     utf8buf[idx] = g_realloc(utf8buf[idx], utf8buf_len[idx]);
 736   }
 737
 738   if (WideCharToMultiByte(CP_UTF8, 0, utf16str, -1,
 739       utf8buf[idx], utf8buf_len[idx], NULL, NULL) == 0)
 740     return NULL;
 741
 742   return utf8buf[idx];
 743 }
 744
 745 #endif