epan/strutil.h

   1 /* strutil.h
   2  * String utility definitions
   3  *
   4  * Wireshark - Network traffic analyzer
   5  * By Gerald Combs <gerald@wireshark.org>
   6  * Copyright 1998 Gerald Combs
   7  *
   8  * This program is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU General Public License
  10  * as published by the Free Software Foundation; either version 2
  11  * of the License, or (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  21  */
  22
  23 #ifndef __STRUTIL_H__
  24 #define __STRUTIL_H__
  25
  26 #include "ws_symbol_export.h"
  27
  28 #include <epan/wmem/wmem.h>
  29
  30 #ifdef __cplusplus
  31 extern "C" {
  32 #endif /* __cplusplus */
  33
  34 /** @file
  35  * String handling and conversion utilities.
  36  */
  37
  38 /** Given a pointer into a data buffer, and to the end of the buffer,
  39  *  find the end of the (putative) line at that position in the data
  40  *  buffer.
  41  *
  42  * @param data A pointer to the beginning of the data
  43  * @param dataend A pointer to the end of the data
  44  * @param eol A pointer that will receive the EOL location
  45  * @return A pointer to the EOL character(s) in "*eol".
  46  */
  47 const guchar *find_line_end(const guchar *data, const guchar *dataend,
  48     const guchar **eol);
  49
  50 /** Get the length of the next token in a line, and the beginning of the
  51  *  next token after that (if any).
  52  * @param linep A pointer to the beginning of the line
  53  * @param lineend A pointer to the end of the line
  54  * @param next_token Receives the location of the next token
  55  * @return 0 if there is no next token.
  56  */
  57 WS_DLL_PUBLIC
  58 int        get_token_len(const guchar *linep, const guchar *lineend,
  59     const guchar **next_token);
  60
  61 /** Given a string, generate a string from it that shows non-printable
  62  *  characters as C-style escapes, and return a pointer to it.
  63  *
  64  * @param line A pointer to the input string
  65  * @param len The length of the input string
  66  * @return A pointer to the formatted string
  67  *
  68  * @see tvb_format_text()
  69  */
  70 WS_DLL_PUBLIC
  71 gchar*     format_text(const guchar *line, size_t len);
  72
  73 /** Given a string, generate a string from it that shows non-printable
  74  *  characters as C-style escapes, and return a pointer to it.
  75  *
  76  * @param allocator The wmem scope
  77  * @param line A pointer to the input string
  78  * @param len The length of the input string
  79  * @return A pointer to the formatted string
  80  *
  81  * @see tvb_format_text()
  82  */
  83 WS_DLL_PUBLIC
  84 gchar*     format_text_wmem(wmem_allocator_t* allocator, const guchar *line, size_t len);
  85
  86 /**
  87  * Given a string, generate a string from it that shows non-printable
  88  * characters as C-style escapes except a whitespace character
  89  * (space, tab, carriage return, new line, vertical tab, or formfeed)
  90  * which will be replaced by a space, and return a pointer to it.
  91  *
  92  * @param allocator The wmem scope
  93  * @param line A pointer to the input string
  94  * @param len The length of the input string
  95  * @return A pointer to the formatted string
  96  *
  97  */
  98 WS_DLL_PUBLIC
  99 gchar*     format_text_wsp(wmem_allocator_t* allocator, const guchar *line, size_t len);
 100
 101 /**
 102  * Given a string, generate a string from it that shows non-printable
 103  * characters as the chr parameter passed, except a whitespace character
 104  * (space, tab, carriage return, new line, vertical tab, or formfeed)
 105  * which will be replaced by a space, and return a pointer to it.
 106  *
 107  * @param allocator The wmem scope
 108  * @param string A pointer to the input string
 109  * @param len The length of the input string
 110  * @param chr The character to use to replace non-printable characters
 111  * @return A pointer to the formatted string
 112  *
 113  */
 114 WS_DLL_PUBLIC
 115 gchar*     format_text_chr(wmem_allocator_t* allocator, const guchar *string, const size_t len, const guchar chr);
 116
 117
 118 /** Turn a string of hex digits with optional separators (defined by
 119  *  is_byte_sep() into a byte array.
 120  *
 121  * @param hex_str The string of hex digits.
 122  * @param bytes The GByteArray that will receive the bytes.  This
 123  *        must be initialized by the caller.
 124  * @param force_separators If set to TRUE, separators MUST exist between
 125  *        bytes.
 126  * @return True if the string was converted successfully
 127  */
 128 WS_DLL_PUBLIC
 129 gboolean   hex_str_to_bytes(const char *hex_str, GByteArray *bytes,
 130     gboolean force_separators);
 131
 132 /* Turn a string of hex digits with optional separators (defined by encoding)
 133  * into a byte array. Unlike hex_str_to_bytes(), this will read as many hex-char
 134  * pairs as possible and not error if it hits a non-hex-char; instead it just ends
 135  * there. (i.e., like strtol()/atoi()/etc.) But it must see two hex chars at the
 136  * beginning or it will return FALSE.
 137  *
 138  * @param hex_str The string of hex digits.
 139  * @param bytes The GByteArray that will receive the bytes.  This
 140  *        must be initialized by the caller.
 141  * @param endptr if not NULL, is set to the char after the last hex character consumed.
 142  * @param encoding set to one or more bitwise-or'ed ENC_SEP_* (see proto.h)
 143  * @param fail_if_partial If set to TRUE, then the conversion fails if the whole
 144  *    hex_str is not consumed.
 145  * @return FALSE only if no bytes were generated; or if fail_if_partial is TRUE
 146  *    and the entire hex_str was not consumed.
 147  *
 148  * If no ENC_SEP_* is set, then no separators are allowed. If multiple ENC_SEP_* are
 149  * bit-or'ed, any of them can be a separator, but once the separator is seen then
 150  * only its same type is accepted for the rest of the string. (i.e., it won't convert
 151  * a "01:23-4567" even if ENC_SEP_COLON|ENC_SEP_DASH|ENC_SEP_NONE is passed in)
 152  *
 153  * This is done this way because it's likely a malformed scenario if they're mixed,
 154  * and this routine is used by dissectors via tvb_get_string_XXX routines.
 155  */
 156 WS_DLL_PUBLIC
 157 gboolean hex_str_to_bytes_encoding(const char *hex_str, GByteArray *bytes, const char **endptr,
 158                                    const guint encoding, const gboolean fail_if_partial);
 159
 160 /** Turn an RFC 3986 percent-encoded string into a byte array.
 161  *
 162  * @param uri_str The string of hex digits.
 163  * @param bytes The GByteArray that will receive the bytes.  This
 164  *        must be initialized by the caller.
 165  * @return True if the string was converted successfully
 166  * @see format_uri()
 167  */
 168 WS_DLL_PUBLIC
 169 gboolean   uri_str_to_bytes(const char *uri_str, GByteArray *bytes);
 170
 171 /** Turn a byte array into an RFC 3986 percent-encoded string.
 172  *
 173  * @param bytes The GByteArray that will receive the bytes.  This
 174  *        must be initialized by the caller.
 175  * @param reserved_chars Normally the "gen-delims" and "sub-delims"
 176  *        from RFC 3986 (":/?#[]@" and "!$&'()*+,;=" respectively)
 177  *        plus space (hex value 20) are treated as reserved characters.
 178  *        If this variable is non-NULL, its contents will be used
 179  *        instead.
 180  * @note Any non-printing character determined by isprint(), along
 181  *       with the % character itself are always reserved.
 182  * @see uri_str_to_bytes(),  format_text(), isprint()
 183  */
 184 WS_DLL_PUBLIC
 185 const gchar* format_uri(const GByteArray *bytes, const gchar *reserved_chars);
 186
 187 /** Turn a OID string representation (dot notation) into a byte array.
 188  *
 189  * @param oid_str The OID string (dot notaion).
 190  * @param bytes The GByteArray that will receive the bytes.  This
 191  *        must be initialized by the caller.
 192  * @param is_absolute True if this is an absolute OID; false for relative OID.
 193  * @return True if the string was converted successfully
 194  */
 195 WS_DLL_PUBLIC
 196 gboolean   rel_oid_str_to_bytes(const char *oid_str, GByteArray *bytes, gboolean is_absolute);
 197
 198 /** Turn a OID string representation (dot notation) into a byte array.
 199  *
 200  * @param oid_str The OID string (dot notaion).
 201  * @param bytes The GByteArray that will receive the bytes.  This
 202  *        must be initialized by the caller.
 203  * @return True if the string was converted successfully
 204  */
 205 WS_DLL_PUBLIC
 206 gboolean   oid_str_to_bytes(const char *oid_str, GByteArray *bytes);
 207
 208 /**
 209  * Create a copy of a GByteArray
 210  *
 211  * @param ba The byte array to be copied.
 212  * @return If ba exists, a freshly allocated copy.  NULL otherwise.
 213  *
 214  * @todo - Should this be in strutil.c?
 215  */
 216 WS_DLL_PUBLIC
 217 GByteArray *byte_array_dup(const GByteArray *ba);
 218
 219 /**
 220  * Compare the contents of two GByteArrays
 221  *
 222  * @param ba1 A byte array
 223  * @param ba2 A byte array
 224  * @return If both arrays are non-NULL and their lengths are equal and
 225  *         their contents are equal, returns TRUE.  Otherwise, returns
 226  *         FALSE.
 227  *
 228  * @todo - Should this be in strutil.c?
 229  */
 230 WS_DLL_PUBLIC
 231 gboolean byte_array_equal(GByteArray *ba1, GByteArray *ba2);
 232
 233
 234 /** Return a XML escaped representation of the unescaped string.
 235  *  The returned string must be freed when no longer in use.
 236  *
 237  * @param unescaped The unescaped string
 238  * @return An XML-escaped representation of the input string
 239  */
 240 WS_DLL_PUBLIC
 241 gchar*     xml_escape(const gchar *unescaped);
 242
 243 /**
 244  * Return the first occurrence of needle in haystack.
 245  * Algorithm copied from GNU's glibc 2.3.2 memcmp()
 246  *
 247  * @param haystack The data to search
 248  * @param haystack_len The length of the search data
 249  * @param needle The string to look for
 250  * @param needle_len The length of the search string
 251  * @return A pointer to the first occurrence of "needle" in
 252  *         "haystack".  If "needle" isn't found or is NULL, or if
 253  *         "needle_len" is 0, NULL is returned.
 254  */
 255 WS_DLL_PUBLIC
 256 const guint8 * epan_memmem(const guint8 *haystack, guint haystack_len,
 257                 const guint8 *needle, guint needle_len);
 258
 259 /** Scan a string to make sure it's valid hex.
 260  *
 261  * @param string The string to validate
 262  * @param nbytes The length of the return buffer
 263  * @return A pointer to a buffer containing the converted raw bytes.  This
 264  *         buffer must be g_free()d by the caller.
 265  */
 266 WS_DLL_PUBLIC
 267 guint8 * convert_string_to_hex(const char *string, size_t *nbytes);
 268
 269 /** Prep a string for case-sensitive vs case-insensitive searching.
 270  *
 271  * @param string The search string
 272  * @param case_insensitive TRUE if case-insensitive, FALSE if not
 273  * @return A direct copy of the string if it's a case-sensitive search and
 274  * an uppercased version if not.  In either case the string must be g_free()d
 275  * by the caller.
 276  */
 277 WS_DLL_PUBLIC
 278 char * convert_string_case(const char *string, gboolean case_insensitive);
 279
 280 /** Finds the first occurrence of string 'needle' in string 'haystack'.
 281  *  The matching is done in a case insensitive manner.
 282  *
 283  * @param haystack The string possibly containing the substring
 284  * @param needle The substring to be searched
 285  * @return A pointer into 'haystack' where 'needle' is first found.
 286  *   Otherwise it returns NULL.
 287  */
 288 WS_DLL_PUBLIC
 289 const char * epan_strcasestr(const char *haystack, const char *needle);
 290
 291 /** Guarantee a non-null string.
 292  *
 293  * @param string The string to check
 294  * @return A pointer 'string' if it's non-null, otherwise "[NULL]".
 295  */
 296 WS_DLL_PUBLIC
 297 const char * string_or_null(const char *string);
 298
 299 WS_DLL_PUBLIC
 300 int escape_string_len(const char *string);
 301 WS_DLL_PUBLIC
 302 char * escape_string(char *dst, const char *string);
 303
 304
 305 WS_DLL_PUBLIC
 306 void IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len);
 307
 308 /** Copy a string, escaping the 'chr' characters in it
 309  *
 310  * @param str The string to be copied
 311  * @param chr The character to be escaped
 312  * @return A copy of the string with every original 'chr' being
 313  * transformed into double 'chr'.
 314  */
 315 WS_DLL_PUBLIC
 316 gchar* ws_strdup_escape_char (const gchar *str, const gchar chr);
 317
 318 /** Copy a string, unescaping the 'chr' characters in it
 319  *
 320  * @param str The string to be copied
 321  * @param chr The character to be escaped
 322  * @return A copy of the string with every occurrence of double 'chr' in
 323  * the original string being copied as a single 'chr'.
 324  */
 325 WS_DLL_PUBLIC
 326 gchar* ws_strdup_unescape_char (const gchar *str, const gchar chr);
 327
 328 /** Replace values in a string
 329  *
 330  * @param str String containing 0 or more values to be replaced.
 331  * @param old_val Old value.
 332  * @param new_val New value. May be NULL, in which case occurences of
 333  *                           old_value will be removed.
 334  * @return A newly-allocated version of str with replacement values or
 335  * NULL on failure.
 336  */
 337 WS_DLL_PUBLIC
 338 gchar *string_replace(const gchar* str, const gchar *old_val, const gchar *new_val);
 339
 340 #ifdef __cplusplus
 341 }
 342 #endif /* __cplusplus */
 343
 344 #endif /* __STRUTIL_H__ */