Add missing comments in syntax description for -z expert

[obnox/wireshark/wip.git] / doc / README.developer
diff --git a/doc/README.developer b/doc/README.developer

index 7112feca66f378edd2f8ff787054d54be1e96889..6cb335e02f5584be99ae094539e0cc1b779d9944 100644 (file)
--- a/doc/README.developer
+++ b/doc/README.developer
@@ -100,15 +100,16 @@ Don't declare variables in the middle of executable code; not all C
  compilers support that.  Variables should be declared outside a
  function, or at the beginning of a function or compound statement.
  
-Don't use anonymous unions; not all compilers support it.
+Don't use anonymous unions; not all compilers support them.
  Example:
-typedef struct foo {
-  guint32 foo;
-  union {
-    guint32 foo_l;
-    guint16 foo_s;
-  } u;  /* have a name here */
-} foo_t;
+
+       typedef struct foo {
+         guint32 foo;
+         union {
+           guint32 foo_l;
+           guint16 foo_s;
+         } u;  /* have a name here */
+       } foo_t;
  
  Don't use "uchar", "u_char", "ushort", "u_short", "uint", "u_int",
  "ulong", "u_long" or "boolean"; they aren't defined on all platforms.
@@ -193,6 +194,26 @@ rather than
  
         11644473600ULL
  
+Don't assume that you can scan through a va_list initialized by va_start
+more than once without closing it with va_end and re-initalizing it with
+va_start.  This applies even if you're not scanning through it yourself,
+but are calling a routine that scans through it, such as vfprintf() or
+one of the routines in Wireshark that takes a format and a va_list as an
+argument.  You must do
+
+       va_start(ap, format);
+       call_routine1(xxx, format, ap);
+       va_end(ap);
+       va_start(ap, format);
+       call_routine2(xxx, format, ap);
+       va_end(ap);
+
+rather
+       va_start(ap, format);
+       call_routine1(xxx, format, ap);
+       call_routine2(xxx, format, ap);
+       va_end(ap);
+
  Don't use a label without a statement following it.  For example,
  something such as
  
@@ -413,10 +434,11 @@ cause a trap, which will, at best, result in the OS slowly performing an
  unaligned access for you, and will, on at least some platforms, cause
  the program to be terminated.
  
-Wireshark supports platforms with GLib 2.4[.x]/GTK+ 2.4[.x] or newer.
-If a Glib/GTK+ mechanism is available only in Glib/GTK+ versions
-newer than 2.4/2.4 then use "#if GTK_CHECK_VERSION(...)" to conditionally
-compile code using that mechanism.
+Wireshark supports platforms with GLib 2.14[.x]/GTK+ 2.12[.x] or newer.
+If a Glib/GTK+ mechanism is available only in Glib/GTK+ versions newer
+than 2.14/2.12 then use "#if GLIB_CHECK_VERSION(...)" or "#if
+GTK_CHECK_VERSION(...)" to conditionally compile code using that
+mechanism.
  
  When different code must be used on UN*X and Win32, use a #if or #ifdef
  that tests _WIN32, not WIN32.  Try to write code portably whenever
@@ -607,6 +629,17 @@ the length was added to it, if the length field is greater than 24 bits
  long, so that, if the length value is *very* large and adding it to the
  offset causes an overflow, that overflow is detected.
  
+If you have a
+
+       for (i = {start}; i < {end}; i++)
+
+loop, make sure that the type of the loop index variable is large enough
+to hold the maximum {end} value plus 1; otherwise, the loop index
+variable can overflow before it ever reaches its maximum value.  In
+particular, be very careful when using gint8, guint8, gint16, or guint16
+variables as loop indices; you almost always want to use an "int"/"gint"
+or "unsigned int"/"guint" as the loop index rather than a shorter type.
+
  If you are fetching a length field from the buffer, corresponding to the
  length of a portion of the packet, and subtracting from that length a
  value corresponding to the length of, for example, a header in the
@@ -671,6 +704,8 @@ indentation logic at the beginning of this new file, especially if
  you're using non-mod-8 tabs.  The tabs-vs-spaces document above provides
  examples of Emacs and vi modelines for this purpose.
  
+Please do not leave trailing whitespace (spaces/tabs) on lines.
+
  When editing an existing file, try following the existing indentation
  logic and even if it very tempting, never ever use a restyler/reindenter
  utility on an existing file.  If you run across wildly varying
@@ -711,17 +746,7 @@ protocol, followed by ".h"; any dissector file that calls your dissector
  should be changed to include that file.
  
  You may not need to include all the headers listed in the skeleton
-below, and you may need to include additional headers.  For example, the
-code inside
-
-       #ifdef HAVE_LIBPCRE
-
-               ...
-
-       #endif
-
-is needed only if you are using a function from libpcre, e.g. the
-"pcre_compile()" function.
+below, and you may need to include additional headers.
  
  The stdio.h, stdlib.h and string.h header files should be included only as needed.
  
@@ -903,13 +928,13 @@ dissect_PROTOABBREV(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
     offset to the end of the packet. */
  
  /* create display subtree for the protocol */
-               ti = proto_tree_add_item(tree, proto_PROTOABBREV, tvb, 0, -1, FALSE);
+               ti = proto_tree_add_item(tree, proto_PROTOABBREV, tvb, 0, -1, ENC_NA);
  
                 PROTOABBREV_tree = proto_item_add_subtree(ti, ett_PROTOABBREV);
  
  /* add an item to the subtree, see section 1.6 for more information */
                 proto_tree_add_item(PROTOABBREV_tree,
-                   hf_PROTOABBREV_FIELDABBREV, tvb, offset, len, FALSE);
+                   hf_PROTOABBREV_FIELDABBREV, tvb, offset, len, ENC_xxx);
  
  
  /* Continue adding tree items to process the packet here */
@@ -1017,8 +1042,6 @@ proto_reg_handoff_PROTOABBREV(void)
   */
                 PROTOABBREV_handle = new_create_dissector_handle(dissect_PROTOABBREV,
                                                                  proto_PROTOABBREV);
-               dissector_add("PARENT_SUBFIELD", ID_VALUE, PROTOABBREV_handle);
-
                 initialized = TRUE;
         } else {
  
@@ -1033,12 +1056,12 @@ proto_reg_handoff_PROTOABBREV(void)
                   function (proto_reg_handoff).
                 */
  
-               dissector_delete("tcp.port", currentPort, PROTOABBREV_handle);
+               dissector_delete_uint("tcp.port", currentPort, PROTOABBREV_handle);
         }
  
         currentPort = gPORT_PREF;
  
-       dissector_add("tcp.port", currentPort, PROTOABBREV_handle);
+       dissector_add_uint("tcp.port", currentPort, PROTOABBREV_handle);
  
  }
  
@@ -1058,11 +1081,25 @@ proto_reg_handoff_PROTOABBREV(void)
   */
         PROTOABBREV_handle = new_create_dissector_handle(dissect_PROTOABBREV,
                                                          proto_PROTOABBREV);
-       dissector_add("PARENT_SUBFIELD", ID_VALUE, PROTOABBREV_handle);
+       dissector_add_uint("PARENT_SUBFIELD", ID_VALUE, PROTOABBREV_handle);
  }
  #endif
  
  
+/*
+ * Editor modelines  -  http://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 4
+ * tab-width: 8
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vi: set shiftwidth=4 tabstop=8 expandtab:
+ * :indentSize=4:tabSize=8:noTabs=true:
+ */
+
+
  ------------------------------------Cut here------------------------------------
  
  1.3 Explanation of needed substitutions in code skeleton.
@@ -1090,7 +1127,7 @@ FIELDABBREV       The abbreviated name for the header field. (NO SPACES)
  FIELDTYPE      FT_NONE, FT_BOOLEAN, FT_UINT8, FT_UINT16, FT_UINT24,
                 FT_UINT32, FT_UINT64, FT_INT8, FT_INT16, FT_INT24, FT_INT32,
                 FT_INT64, FT_FLOAT, FT_DOUBLE, FT_ABSOLUTE_TIME,
-               FT_RELATIVE_TIME, FT_STRING, FT_STRINGZ, FT_EBCDIC,
+               FT_RELATIVE_TIME, FT_STRING, FT_STRINGZ, FT_EUI64,
                 FT_UINT_STRING, FT_ETHER, FT_BYTES, FT_UINT_BYTES, FT_IPv4,
                 FT_IPv6, FT_IPXNET, FT_FRAMENUM, FT_PROTOCOL, FT_GUID, FT_OID
  FIELDDISPLAY   For FT_UINT{8,16,24,32,64} and FT_INT{8,16,24,32,64):
@@ -1113,7 +1150,7 @@ FIELDDISPLAY      For FT_UINT{8,16,24,32,64} and FT_INT{8,16,24,32,64):
                 BASE_NONE
  FIELDCONVERT   VALS(x), RVALS(x), TFS(x), NULL
  BITMASK                Usually 0x0 unless using the TFS(x) field conversion.
-FIELDDESCR     A brief description of the field, or NULL.
+FIELDDESCR     A brief description of the field, or NULL. [Please do not use ""].
  PARENT_SUBFIELD        Lower level protocol field used for lookup, i.e. "tcp.port"
  ID_VALUE       Lower level protocol field value that identifies this protocol
                 For example the TCP or UDP port number
@@ -1168,6 +1205,9 @@ integers, 32-bit integers (guint32), and 64-bit integers (guint64):
  guint16 tvb_get_ntohs(tvbuff_t*, gint offset);
  guint32 tvb_get_ntoh24(tvbuff_t*, gint offset);
  guint32 tvb_get_ntohl(tvbuff_t*, gint offset);
+guint64 tvb_get_ntoh40(tvbuff_t*, gint offset);
+guint64 tvb_get_ntoh48(tvbuff_t*, gint offset);
+guint64 tvb_get_ntoh56(tvbuff_t*, gint offset);
  guint64 tvb_get_ntoh64(tvbuff_t*, gint offset);
  
  Network-to-host-order accessors for single-precision and
@@ -1183,6 +1223,9 @@ Little-Endian-to-host-order accessors for 16-bit integers (guint16),
  guint16 tvb_get_letohs(tvbuff_t*, gint offset);
  guint32 tvb_get_letoh24(tvbuff_t*, gint offset);
  guint32 tvb_get_letohl(tvbuff_t*, gint offset);
+guint64 tvb_get_letoh40(tvbuff_t*, gint offset);
+guint64 tvb_get_letoh48(tvbuff_t*, gint offset);
+guint64 tvb_get_letoh56(tvbuff_t*, gint offset);
  guint64 tvb_get_letoh64(tvbuff_t*, gint offset);
  
  Little-Endian-to-host-order accessors for single-precision and
@@ -1212,7 +1255,9 @@ void tvb_get_letohguid(tvbuff_t *, gint offset, e_guid_t *guid);
  String accessors:
  
  guint8 *tvb_get_string(tvbuff_t*, gint offset, gint length);
+gchar  *tvb_get_unicode_string(tvbuff_t *tvb, const gint offset, gint length, const guint encoding);
  guint8 *tvb_get_ephemeral_string(tvbuff_t*, gint offset, gint length);
+gchar  *tvb_get_ephemeral_unicode_string(tvbuff_t *tvb, const gint offset, gint length, const guint encoding);
  guint8 *tvb_get_seasonal_string(tvbuff_t*, gint offset, gint length);
  
  Returns a null-terminated buffer containing data from the specified
@@ -1224,55 +1269,68 @@ tvb_get_string() returns a buffer allocated by g_malloc() so you must
  g_free() it when you are finished with the string. Failure to g_free() this
  buffer will lead to memory leaks.
  
+tvb_get_unicode_string() is a unicode (UTF-16) version of above.  This
+is intended for reading UTF-16 unicode strings out of a tvbuff and
+returning them as a UTF-8 string for use in Wireshark.  The offset and
+returned length pointer are in bytes, not UTF-16 characters.
+
  tvb_get_ephemeral_string() returns a buffer allocated from a special heap
  with a lifetime until the next packet is dissected. You do not need to
  free() this buffer, it will happen automatically once the next packet is
  dissected.
  
+tvb_get_ephemeral_unicode_string() is a unicode (UTF-16) version of above.
+This is intended for reading UTF-16 unicode strings out of a tvbuff and
+returning them as a UTF-8 string for use in Wireshark.  The offset and
+returned length pointer are in bytes, not UTF-16 characters.
+
  tvb_get_seasonal_string() returns a buffer allocated from a special heap
  with a lifetime of the current capture session. You do not need to
  free() this buffer, it will happen automatically once the a new capture or
  file is opened.
  
  guint8 *tvb_get_stringz(tvbuff_t *tvb, gint offset, gint *lengthp);
+const guint8 *tvb_get_const stringz(tvbuff_t *tvb, gint offset, gint *lengthp);
  guint8 *tvb_get_ephemeral_stringz(tvbuff_t *tvb, gint offset, gint *lengthp);
+gchar  *tvb_get_ephemeral_unicode_stringz(tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding);
  guint8 *tvb_get_seasonal_stringz(tvbuff_t *tvb, gint offset, gint *lengthp);
  
-Returns a null-terminated buffer, allocated with "g_malloc()",
-containing data from the specified tvbuff, starting at the
-specified offset, and containing all characters from the tvbuff up to
-and including a terminating null character in the tvbuff.  "*lengthp"
-will be set to the length of the string, including the terminating null.
+Returns a null-terminated buffer containing data from the specified tvbuff,
+starting at the specified offset, and containing all characters from the
+tvbuff up to and including a terminating null character in the tvbuff.
+"*lengthp" will be set to the length of the string, including the terminating
+null.
  
  tvb_get_stringz() returns a buffer allocated by g_malloc() so you must
  g_free() it when you are finished with the string. Failure to g_free() this
  buffer will lead to memory leaks.
+
+tvb_get_const_stringz() returns a pointer to the (const) string in the tvbuff.
+You do not need to free() this buffer, it will happen automatically once the
+next packet is dissected.  This function is slightly more efficient than the
+others because it does not allocate memory and copy the string.
+
  tvb_get_ephemeral_stringz() returns a buffer allocated from a special heap
  with a lifetime until the next packet is dissected. You do not need to
  free() this buffer, it will happen automatically once the next packet is
  dissected.
  
+tvb_get_ephemeral_unicode_stringz() is a unicode (UTF-16) version of
+above.  This is intended for reading UTF-16 unicode strings out of a tvbuff
+and returning them as a UTF-8 string for use in Wireshark.  The offset and
+returned length pointer are in bytes, not UTF-16 characters.
+
  tvb_get_seasonal_stringz() returns a buffer allocated from a special heap
  with a lifetime of the current capture session. You do not need to
  free() this buffer, it will happen automatically once the a new capture or
  file is opened.
  
-guint8 *tvb_fake_unicode(tvbuff_t*, gint offset, gint length, gboolean little_endian);
-guint8 *tvb_get_ephemeral_faked_unicode(tvbuff_t*, gint offset, gint length, gboolean little_endian);
+tvb_fake_unicode() has been superseded by tvb_get_unicode_string(), which
+properly handles Unicode (UTF-16) strings by converting them to UTF-8.
  
-Converts a 2-byte unicode string to an ASCII string.
-Returns a null-terminated buffer containing data from the specified
-tvbuff, starting at the specified offset, and containing the specified
-length worth of characters (the length of the buffer will be length+1,
-as it includes a null character to terminate the string).
-
-tvb_fake_unicode() returns a buffer allocated by g_malloc() so you must
-g_free() it when you are finished with the string. Failure to g_free() this
-buffer will lead to memory leaks.
-tvb_get_ephemeral_faked_unicode() returns a buffer allocated from a special
-heap with a lifetime until the next packet is dissected. You do not need to
-free() this buffer, it will happen automatically once the next packet is
-dissected.
+tvb_get_ephemeral_faked_unicode() has been superseded by
+tvb_get_ephemeral_string(), which properly handles Unicode (UTF-16) strings by
+converting them to UTF-8.
  
  Byte Array Accessors:
  
@@ -1290,6 +1348,15 @@ gchar *tvb_bytes_to_str_punct(tvbuff_t *tvb, gint offset, gint len, gchar punct)
  This function is similar to tvb_bytes_to_str(...) except that 'punct' is inserted
  between the hex representation of each byte.
  
+gchar *tvb_bcd_dig_to_ep_str(tvbuff_t *tvb, const gint offset, const gint len, dgt_set_t *dgt, gboolean skip_first);
+
+Given a tvbuff, an offset into the tvbuff, and a length that starts
+at that offset (which may be -1 for "all the way to the end of the
+tvbuff"), fetch BCD encoded digits from a tvbuff starting from either
+the low or high half byte, formating the digits according to an input digit set,
+if NUll a default digit set of 0-9 returning "?" for overdecadic digits will be used.
+A pointer to the EP allocated string will be returned.
+Note: a tvbuff content of 0xf is considered a 'filler' and will end the conversion.
  
  Copying memory:
  guint8* tvb_memcpy(tvbuff_t*, guint8* target, gint offset, gint length);
@@ -1660,11 +1727,10 @@ The type of value this field holds. The current field types are:
         FT_INT64                A 64-bit signed integer.
         FT_FLOAT                A single-precision floating point number.
         FT_DOUBLE               A double-precision floating point number.
-       FT_ABSOLUTE_TIME        Seconds (4 bytes) and nanoseconds (4 bytes)
-                               of time since January 1, 1970, midnight
-                               UTC, displayed as the date, followed by
-                               the time, as hours, minutes, and seconds
-                               with 9 digits after the decimal point.
+       FT_ABSOLUTE_TIME        An absolute time from some fixed point in time,
+                               displayed as the date, followed by the time, as
+                               hours, minutes, and seconds with 9 digits after
+                               the decimal point.
         FT_RELATIVE_TIME        Seconds (4 bytes) and nanoseconds (4 bytes)
                                 of time relative to an arbitrary time.
                                 displayed as seconds and 9 digits
@@ -1675,10 +1741,11 @@ The type of value this field holds. The current field types are:
                                 types, are to be used for text strings,
                                 not raw binary data.
         FT_STRINGZ              A NUL-terminated string of characters.
-       FT_EBCDIC               A string of characters, not necessarily
-                               NUL-terminated, but possibly NUL-padded.
-                               The data from the packet is converted from
-                               EBCDIC to ASCII before displaying to the user.
+                               The string length is normally the length
+                               given in the proto_tree_add_item() call.
+                               However if the length given in the call
+                               is -1, then the length used is that
+                               returned by calling tvb_strsize().
         FT_UINT_STRING          A counted string of characters, consisting
                                 of a count (represented as an integral value,
                                 of width given in the proto_tree_add_item()
@@ -1703,6 +1770,7 @@ The type of value this field holds. The current field types are:
                                 address.
         FT_GUID                 A Globally Unique Identifier
         FT_OID                  An ASN.1 Object Identifier
+       FT_EUI64                A EUI-64 Address
  
  Some of these field types are still not handled in the display filter
  routines, but the most common ones are. The FT_UINT* variables all
@@ -1777,6 +1845,7 @@ integral fields.
  
  strings
  -------
+-- value_string
  Some integer fields, of type FT_UINT*, need labels to represent the true
  value of a field.  You could think of those fields as having an
  enumerated data type, rather than an integral data type.
@@ -1803,6 +1872,37 @@ indicate the end of the array).  The 'strings' field would be set to
  If the field has a numeric rather than an enumerated type, the 'strings'
  field would be set to NULL.
  
+-- Extended value strings
+You can also use an extended version of the value_string for faster lookups.
+It requires a value_string as input.
+If all of a contiguous range of values from min to max are present in the array
+the value will be used as as a direct index into a value_string array.
+
+If the values in the array are not contiguous (ie: there are "gaps"), but are
+in assending order a binary search will be used.
+
+Note: "gaps" in a value_string array can be filled with "empty" entries eg:
+{value, "Unknown"} so that direct access to the array is is possible.
+
+The init macro (see below) will perform a check on the value string the first
+time it is used to determine which search algorithm fits and fall back to a
+linear search if the value_string does not meet the criteria above.
+
+Use this macro to initialise the extended value_string at comile time:
+
+static value_string_ext valstringname_ext = VALUE_STRING_EXT_INIT(valstringname);
+
+Extended value strings can be created at runtime by calling
+   value_string_ext_new(<ptr to value_string array>,
+                        <total number of entries in the value_string_array>, /* include {0, NULL} entry */
+                        <value_string_name>);
+
+For hf[] array FT_(U)INT* fields that need a 'valstringname_ext' struct, the
+'strings' field would be set to '&valstringname_ext)'. Furthermore, 'display'
+field must be ORed with 'BASE_EXT_STRING' (e.g. BASE_DEC|BASE_EXT_STRING).
+
+
+-- Ranges
  If the field has a numeric type that might logically fit in ranges of values
  one can use a range_string struct.
  
@@ -1828,6 +1928,7 @@ For FT_(U)INT* fields that need a 'range_string' struct, the 'strings' field
  would be set to 'RVALS(rvalstringname)'. Furthermore, 'display' field must be
  ORed with 'BASE_RANGE_STRING' (e.g. BASE_DEC|BASE_RANGE_STRING).
  
+-- Booleans
  FT_BOOLEANs have a default map of 0 = "False", 1 (or anything else) = "True".
  Sometimes it is useful to change the labels for boolean values (e.g.,
  to "Yes"/"No", "Fast"/"Slow", etc.).  For these mappings, a struct called
@@ -1871,7 +1972,7 @@ blurb
  -----
  This is a string giving a proper description of the field.  It should be
  at least one grammatically complete sentence, or NULL in which case the
-name field is used.
+name field is used. (Please do not use "").
  It is meant to provide a more detailed description of the field than the
  name alone provides. This information will be used in the man page, and
  in a future GUI display-filter creation tool. We might also add tooltips
@@ -1982,11 +2083,17 @@ array of pointers to "gint" variables to hold the subtree type values to
  in your "register" routine, just as you register the protocol and the
  fields for that protocol.
  
+The ett_ variables identify particular type of subtree so that if you expand
+one of them, Wireshark keeps track of that and, when you click on
+another packet, it automatically opens all subtrees of that type.
+If you close one of them, all subtrees of that type will be closed when
+you move to another packet.
+
  There are several functions that the programmer can use to add either
  protocol or field labels to the proto_tree:
  
         proto_item*
-       proto_tree_add_item(tree, id, tvb, start, length, little_endian);
+       proto_tree_add_item(tree, id, tvb, start, length, encoding);
  
         proto_item*
         proto_tree_add_none_format(tree, id, tvb, start, length, format, ...);
@@ -2173,6 +2280,17 @@ protocol or field labels to the proto_tree:
         proto_tree_add_oid_format(tree, id, tvb, start, length, value_ptr,
                 format, ...);
  
+       proto_item *
+       proto_tree_add_eui64(tree, id, tvb, start, length, value);
+
+       proto_item *
+       proto_tree_add_eui64_format(tree, id, tvb, start, length, value,
+               format, ...);
+
+       proto_item *
+       proto_tree_add_eui64_format_value(tree, id, tvb, start, length,
+               value, format, ...);
+
         proto_item *
         proto_tree_add_oid_format_value(tree, id, tvb, start, length,
                 value_ptr, format, ...);
@@ -2216,9 +2334,52 @@ proto_tree_add_item is used when you wish to do no special formatting.
  The item added to the GUI tree will contain the name (as passed in the
  proto_register_*() function) and a value.  The value will be fetched
  from the tvbuff by proto_tree_add_item(), based on the type of the field
-and, for integral and Boolean fields, the byte order of the value; the
-byte order is specified by the 'little_endian' argument, which is TRUE
-if the value is little-endian and FALSE if it is big-endian.
+and the encoding of the value as specified by the "encoding" argument.
+
+For FT_NONE, FT_BYTES, FT_ETHER, FT_IPv6, FT_IPXNET, FT_OID fields,
+and 'protocol' fields the encoding is not relevant; the 'encoding'
+argument should be ENC_NA (Not Applicable).
+
+For integral, floating-point, Boolean, FT_GUID, and FT_EUI64 fields,
+the encoding specifies the byte order of the value; the 'encoding'
+argument should be is ENC_LITTLE_ENDIAN if the value is little-endian
+and ENC_BIG_ENDIAN if it is big-endian.
+
+For FT_IPv4 fields, the encoding also specifies the byte order of the
+value.  In almost all cases, the encoding is in network byte order,
+hence big-endian, but in at least one protocol dissected by Wireshark,
+at least one IPv4 address is byte-swapped, so it's in little-endian
+order.
+
+For string fields, the encoding specifies the character set used for the
+string and the way individual code points in that character set are
+encoded.  For FT_UINT_STRING fields, the byte order of the count must be
+specified; when support for UTF-16 encoding is added, the byte order of
+the encoding will also have to be specified.  In other cases, ENC_NA
+should be used.  The character encodings that are currently
+supported are:
+
+       ENC_UTF_8 - UTF-8
+       ENC_ASCII - ASCII (currently treated as UTF-8; in the future,
+               all bytes with the 8th bit set will be treated as
+               errors)
+       ENC_EBCDIC - EBCDIC
+
+Other encodings will be added in the future.
+
+For FT_ABSOLUTE_TIME fields, the encoding specifies the form in which
+the time stamp is specified, as well as its byte order.  The time stamp
+encodings that are curretly supported are:
+
+       ENC_TIME_TIMESPEC - seconds (4 bytes) and nanoseconds (4 bytes)
+               of time since January 1, 1970, midnight UTC.
+
+       ENC_TIME_NTP - an NTP timestamp, represented as a 64-bit
+               unsigned fixed-point number, in seconds relative to 0h
+               on 1 January 1900.  The integer part is in the first 32
+               bits and the fraction part in the last 32 bits.
+
+For other types, there is no support for proto_tree_add_item().
  
  Now that definitions of fields have detailed information about bitfield
  fields, you can use proto_tree_add_item() with no extra processing to
@@ -2239,7 +2400,8 @@ against the parent field, the first byte of the TH.
  
  The code to add the FID to the tree would be;
  
-       proto_tree_add_item(bf_tree, hf_sna_th_fid, tvb, offset, 1, TRUE);
+       proto_tree_add_item(bf_tree, hf_sna_th_fid, tvb, offset, 1,
+           ENC_BIG_ENDIAN);
  
  The definition of the field already has the information about bitmasking
  and bitshifting, so it does the work of masking and shifting for us!
@@ -2298,6 +2460,7 @@ proto_tree_add_int()
  proto_tree_add_int64()
  proto_tree_add_guid()
  proto_tree_add_oid()
+proto_tree_add_eui64()
  ------------------------
  These routines are used to add items to the protocol tree if either:
  
@@ -2317,6 +2480,10 @@ any more.
  For proto_tree_add_bytes(), the 'value_ptr' argument is a pointer to a
  sequence of bytes.
  
+For proto_tree_add_bytes_format() and proto_tree_add_bytes_format_value(), the
+'value_ptr' argument is a pointer to a sequence of bytes or NULL if the bytes
+should be taken from the given TVB using the given offset and length.
+
  For proto_tree_add_time(), the 'value_ptr' argument is a pointer to an
  "nstime_t", which is a structure containing the time to be added; it has
  'secs' and 'nsecs' members, giving the integral part and the fractional
@@ -2369,6 +2536,9 @@ e_guid_t structure.
  For proto_tree_add_oid(), the 'value_ptr' argument is a pointer to an
  ASN.1 Object Identifier.
  
+For proto_tree_add_eui64(), the 'value' argument is a 64-bit integer
+value
+
  proto_tree_add_bytes_format()
  proto_tree_add_time_format()
  proto_tree_add_ipxnet_format()
@@ -2385,6 +2555,7 @@ proto_tree_add_int_format()
  proto_tree_add_int64_format()
  proto_tree_add_guid_format()
  proto_tree_add_oid_format()
+proto_tree_add_eui64_format()
  ----------------------------
  These routines are used to add items to the protocol tree when the
  dissector routine wants complete control over how the field and value
@@ -2410,6 +2581,7 @@ proto_tree_add_int_format_value()
  proto_tree_add_int64_format_value()
  proto_tree_add_guid_format_value()
  proto_tree_add_oid_format_value()
+proto_tree_add_eui64_format_value()
  ------------------------------------
  
  These routines are used to add items to the protocol tree when the
@@ -2623,13 +2795,15 @@ skeleton of how the programmer might code this.
         for(i = 0; i < num_rings; i++) {
                 proto_item *pi;
  
-               pi = proto_tree_add_item(tree, hf_tr_rif_ring, ..., FALSE);
+               pi = proto_tree_add_item(tree, hf_tr_rif_ring, ...,
+                   ENC_BIG_ENDIAN);
                 PROTO_ITEM_SET_HIDDEN(pi);
         }
         for(i = 0; i < num_rings - 1; i++) {
                 proto_item *pi;
  
-               pi = proto_tree_add_item(tree, hf_tr_rif_bridge, ..., FALSE);
+               pi = proto_tree_add_item(tree, hf_tr_rif_bridge, ...,
+                   ENC_BIG_ENDIAN);
                 PROTO_ITEM_SET_HIDDEN(pi);
         }
  
@@ -2659,7 +2833,7 @@ clicks as well, launching the configured browser with this URL as parameter.
  
  1.7 Utility routines.
  
-1.7.1 match_strval and val_to_str.
+1.7.1 match_strval, match_strval_ext, val_to_str and val_to_str_ext.
  
  A dissector may need to convert a value to a string, using a
  'value_string' structure, by hand, rather than by declaring a field with
@@ -2694,6 +2868,17 @@ You can use it in a call to generate a COL_INFO line for a frame such as
  
         col_add_fstr(COL_INFO, ", %s", val_to_str(val, table, "Unknown %d"));
  
+The match_strval_ext and val_to_str_ext functions are "extended" versions
+of match_strval and val_to_str. They should be used for large value-string
+arrays which contain many entries. They implement value to string conversions
+which will do either a direct access or a binary search of the
+value string array if possible. See "Extended Value Strings" under
+section  1.6 "Constructing the protocol tree" for more information.
+
+See epan/value_string.h for detailed information on the various value_string
+functions.
+
+
  1.7.2 match_strrval and rval_to_str.
  
  A dissector may need to convert a range of values to a string, using a
@@ -2803,6 +2988,34 @@ to the DISSECTOR_SRC section of epan/CMakeLists.txt
  
    See <http://www.wireshark.org/develop.html>
  
+
+1.10a Using git with the SVN source code tree.
+
+  Install git and the git-svn package.
+  Run "mkdir git; cd git; git svn clone <svn-url>", e.g. if you are using
+  the anonymous svn tree, run
+  "git svn clone http://anonsvn.wireshark.org/wireshark/trunk/"
+
+  After that, a typical workflow may look like this (from "man git-svn"):
+
+  # Clone a repo (like git clone):
+          git svn clone http://svn.example.com/project/trunk
+  # Enter the newly cloned directory:
+          cd trunk
+  # You should be on master branch, double-check with ´git branch´
+          git branch
+  # Do some work and commit locally to git:
+          git commit ...
+  # Something is committed to SVN, rebase your local changes against the
+  # latest changes in SVN:
+          git svn rebase
+  # Now commit your changes (that were committed previously using git) to SVN
+  # as well as automatically updating your working HEAD:
+          git svn dcommit
+  # Append svn:ignore settings to the default git exclude file:
+          git svn show-ignore >> .git/info/exclude
+
+
  1.11 Submitting code for your new dissector.
  
    - VERIFY that your dissector code does not use prohibited or deprecated APIs
@@ -2856,25 +3069,29 @@ address:port combinations.  A conversation is not sensitive to the direction of
  the packet.  The same conversation will be returned for a packet bound from
  ServerA:1000 to ClientA:2000 and the packet from ClientA:2000 to ServerA:1000.
  
-There are five routines that you will use to work with a conversation:
+2.2.1 Conversation Routines
+
+There are six routines that you will use to work with a conversation:
  conversation_new, find_conversation, conversation_add_proto_data,
-conversation_get_proto_data, and conversation_delete_proto_data.
+conversation_get_proto_data, conversation_delete_proto_data,
+and conversation_set_dissector.
  
  
-2.2.1 The conversation_init function.
+2.2.1.1 The conversation_init function.
  
  This is an internal routine for the conversation code.  As such you
  will not have to call this routine.  Just be aware that this routine is
  called at the start of each capture and before the packets are filtered
  with a display filter.  The routine will destroy all stored
  conversations.  This routine does NOT clean up any data pointers that are
-passed in the conversation_new 'data' variable.  You are responsible for
-this clean up if you pass a malloc'ed pointer in this variable.
+passed in the conversation_add_proto_data 'data' variable.  You are
+responsible for this clean up if you pass a malloc'ed pointer
+in this variable.
  
-See item 2.2.8 for more information about the 'data' pointer.
+See item 2.2.1.5 for more information about use of the 'data' pointer.
  
  
-2.2.2 The conversation_new function.
+2.2.1.2 The conversation_new function.
  
  This routine will create a new conversation based upon two address/port
  pairs.  If you want to associate with the conversation a pointer to a
@@ -2920,7 +3137,7 @@ packet indicates that, later in the capture, a conversation will be
  created using certain addresses and ports, in the case where the packet
  doesn't specify the addresses and ports of both sides.
  
-2.2.3 The find_conversation function.
+2.2.1.3 The find_conversation function.
  
  Call this routine to look up a conversation.  If no conversation is found,
  the routine will return a NULL value.
@@ -2970,7 +3187,25 @@ any "wildcarded" address and the "port_b" port will be treated as
  matching any "wildcarded" port.
  
  
-2.2.4 The conversation_add_proto_data function.
+2.2.1.4 The find_or_create_conversation function.
+
+This convenience function will create find an existing conversation (by calling
+find_conversation()) and, if a conversation does not already exist, create a
+new conversation by calling conversation_new().
+
+The find_or_create_conversation prototype:
+
+       extern conversation_t *find_or_create_conversation(packet_info *pinfo);
+
+Where:
+       packet_info *pinfo = the packet_info structure
+
+The frame number and the addresses necessary for find_conversation() and
+conversation_new() are taken from the pinfo structure (as is commonly done)
+and no 'options' are used.
+
+
+2.2.1.5 The conversation_add_proto_data function.
  
  Once you have created a conversation with conversation_new, you can
  associate data with it using this function.
@@ -2989,11 +3224,14 @@ Where:
  unique protocol number created with proto_register_protocol.  Protocols
  are typically registered in the proto_register_XXXX section of your
  dissector.  "data" is a pointer to the data you wish to associate with the
-conversation.  Using the protocol number allows several dissectors to
+conversation.  "data" usually points to "se_alloc'd" memory; the
+memory will be automatically freed each time a new dissection begins
+and thus need not be managed (freed) by the dissector.
+Using the protocol number allows several dissectors to
  associate data with a given conversation.
  
  
-2.2.5 The conversation_get_proto_data function.
+2.2.1.6 The conversation_get_proto_data function.
  
  After you have located a conversation with find_conversation, you can use
  this function to retrieve any data associated with it.
@@ -3012,12 +3250,12 @@ typically in the proto_register_XXXX portion of a dissector.  The function
  returns a pointer to the data requested, or NULL if no data was found.
  
  
-2.2.6 The conversation_delete_proto_data function.
+2.2.1.7 The conversation_delete_proto_data function.
  
  After you are finished with a conversation, you can remove your association
  with this function.  Please note that ONLY the conversation entry is
-removed.  If you have allocated any memory for your data, you must free it
-as well.
+removed.  If you have allocated any memory for your data (other than with se_alloc),
+ you must free it as well.
  
  The conversation_delete_proto_data prototype:
  
@@ -3031,8 +3269,22 @@ Where:
  is a unique protocol number created with proto_register_protocol,
  typically in the proto_register_XXXX portion of a dissector.
  
+2.2.1.8 The conversation_set_dissector function
+
+This function sets the protocol dissector to be invoked whenever
+conversation parameters (addresses, port_types, ports, etc) are matched
+during the dissection of a packet.
+
+The conversation_set_dissector prototype:
  
-2.2.7 Using timestamps relative to the conversation
+        void conversation_set_dissector(conversation_t *conversation, const dissector_handle_t handle);
+
+Where:
+       conversation_t *conv = the conversation in question
+        const dissector_handle_t handle = the dissector handle.
+
+
+2.2.2 Using timestamps relative to the conversation
  
  There is a framework to calculate timestamps relative to the start of the
  conversation. First of all the timestamp of the first packet that has been
@@ -3079,33 +3331,22 @@ SVN 23058 to see the implementation of conversation timestamps for
  the tcp-dissector.
  
  
-2.2.8 The example conversation code with GMemChunk's.
+2.2.3 The example conversation code using se_alloc'd memory.
  
  For a conversation between two IP addresses and ports you can use this as an
-example.  This example uses the GMemChunk to allocate memory and stores the data
+example.  This example uses se_alloc() to allocate memory and stores the data
  pointer in the conversation 'data' variable.
  
-NOTE: Remember to register the init routine (my_dissector_init) in the
-protocol_register routine.
-
-
  /************************ Global values ************************/
  
-/* the number of entries in the memory chunk array */
-#define my_init_count 10
-
  /* define your structure here */
  typedef struct {
  
  } my_entry_t;
  
-/* the GMemChunk base structure */
-static GMemChunk *my_vals = NULL;
-
  /* Registered protocol number */
  static int my_proto = -1;
  
-
  /********************* in the dissector routine *********************/
  
  /* the local variables in the dissector */
@@ -3126,7 +3367,7 @@ else {
  
      /* new conversation create local data structure */
  
-    data_ptr = g_mem_chunk_alloc(my_vals);
+    data_ptr = se_alloc(sizeof(my_entry_t));
  
      /*** add your code here to setup the new data structure ***/
  
@@ -3139,38 +3380,12 @@ else {
  
  /* at this point the conversation data is ready */
  
-
-/******************* in the dissector init routine *******************/
-
-#define my_init_count 20
-
-static void
-my_dissector_init(void)
-{
-
-    /* destroy memory chunks if needed */
-
-    if (my_vals)
-       g_mem_chunk_destroy(my_vals);
-
-    /* now create memory chunks */
-
-    my_vals = g_mem_chunk_new("my_proto_vals",
-           sizeof(my_entry_t),
-           my_init_count * sizeof(my_entry_t),
-           G_ALLOC_AND_FREE);
-}
-
  /***************** in the protocol register routine *****************/
  
-/* register re-init routine */
-
-register_init_routine(&my_dissector_init);
-
  my_proto = proto_register_protocol("My Protocol", "My Protocol", "my_proto");
  
  
-2.2.9 An example conversation code that starts at a specific frame number.
+2.2.4 An example conversation code that starts at a specific frame number.
  
  Sometimes a dissector has determined that a new conversation is needed that
  starts at a specific frame number, when a capture session encompasses multiple
@@ -3194,7 +3409,7 @@ that starts at the specific frame number.
         }
  
  
-2.2.10 The example conversation code using conversation index field.
+2.2.5 The example conversation code using conversation index field.
  
  Sometimes the conversation isn't enough to define a unique data storage
  value for the network traffic.  For example if you are storing information
@@ -3216,14 +3431,7 @@ upon the conversation index and values inside the request packets.
          /* then used the conversation index, and request data to find data */
          /* in the local hash table */
  
-       conversation = find_conversation(pinfo->fd->num, &pinfo->src, &pinfo->dst,
-           pinfo->ptype, pinfo->srcport, pinfo->destport, 0);
-       if (conversation == NULL) {
-               /* It's not part of any conversation - create a new one. */
-               conversation = conversation_new(pinfo->fd->num, &pinfo->src,
-                   &pinfo->dst, pinfo->ptype, pinfo->srcport, pinfo->destport,
-                   NULL, 0);
-       }
+       conversation = find_or_create_conversation(pinfo);
  
         request_key.conversation = conversation->index;
         request_key.service = pntohs(&rxh->serviceId);
@@ -3236,10 +3444,10 @@ upon the conversation index and values inside the request packets.
         opcode = 0;
         if (!request_val && !reply)
         {
-               new_request_key = g_mem_chunk_alloc(afs_request_keys);
+               new_request_key = se_alloc(sizeof(struct afs_request_key));
                 *new_request_key = request_key;
  
-               request_val = g_mem_chunk_alloc(afs_request_vals);
+               request_val = se_alloc(sizeof(struct afs_request_val));
                 request_val -> opcode = pntohl(&afsh->opcode);
                 opcode = request_val->opcode;
  
@@ -3302,7 +3510,7 @@ static void sub_dissector(tvbuff_t *tvb, packet_info *pinfo,
  */
         conversation = find_conversation(pinfo->fd->num,
                                 &pinfo->src, &pinfo->dst, protocol,
-                               src_port, dst_port, new_conv_info, 0);
+                               src_port, dst_port,  0);
  
  /* If there is no such conversation, or if there is one but for
     someone else's protocol then we just create a new conversation
@@ -3310,7 +3518,7 @@ static void sub_dissector(tvbuff_t *tvb, packet_info *pinfo,
  */
         if ( (conversation == NULL) ||
              (conversation->dissector_handle != sub_dissector_handle) ) {
-            new_conv_info = g_mem_chunk_alloc(new_conv_vals);
+            new_conv_info = se_alloc(sizeof(struct _new_conv_info));
              new_conv_info->data1 = value1;
  
  /* create the conversation for the dynamic port */
@@ -3384,7 +3592,7 @@ static dissector_handle_t sub_dissector_handle;
  
  /* if conversation has a data field, create it and load structure */
  
-        new_conv_info = g_mem_chunk_alloc(new_conv_vals);
+        new_conv_info = se_alloc(sizeof(struct _new_conv_info));
          new_conv_info->data1 = value1;
  
  /* create the conversation for the dynamic server address and port     */
@@ -3484,12 +3692,19 @@ Where: module - Returned by the prefs_register_protocol routine
                     "." between them, to construct a name that identifies
                     the field in the preference file; the name itself
                     should not include the protocol name, as the name in
-                   the preference file will already have it
+                   the preference file will already have it. Make sure that
+                   only lower-case ASCII letters, numbers, underscores and
+                   dots appear in the preference name.
          title    - Field title in the preferences dialog
          description - Comments added to the preference file above the
-                      preference value
+                      preference value and shown as tooltip in the GUI, or NULL
          var      - pointer to the storage location that is updated when the
-                   field is changed in the preference dialog box
+                   field is changed in the preference dialog box.  Note that
+                   with string preferences the given pointer is overwritten
+                   with a pointer to a new copy of the string during the
+                   preference registration.  The passed-in string may be
+                   freed, but you must keep another pointer to the string
+                   in order to free it.
          base     - Base that the unsigned integer is expected to be in,
                     see strtoul(3).
          enumvals - an array of enum_val_t structures.  This must be
@@ -3580,10 +3795,10 @@ example, stolen from packet-dns.c:
         mdns_udp_handle = create_dissector_handle(dissect_mdns_udp,
             proto_dns);
  
-       dissector_add("udp.port", UDP_PORT_DNS, dns_udp_handle);
-       dissector_add("tcp.port", TCP_PORT_DNS, dns_tcp_handle);
-       dissector_add("udp.port", UDP_PORT_MDNS, mdns_udp_handle);
-       dissector_add("tcp.port", TCP_PORT_MDNS, dns_tcp_handle);
+       dissector_add_uint("udp.port", UDP_PORT_DNS, dns_udp_handle);
+       dissector_add_uint("tcp.port", TCP_PORT_DNS, dns_tcp_handle);
+       dissector_add_uint("udp.port", UDP_PORT_MDNS, mdns_udp_handle);
+       dissector_add_uint("tcp.port", TCP_PORT_MDNS, dns_tcp_handle);
  
  The dissect_dns_udp function does very little work and calls
  dissect_dns_common, while dissect_dns_tcp calls tcp_dissect_pdus with a
@@ -3660,7 +3875,7 @@ static hf_register_info hf[] = {
     };
  
  /**
-*   Dissect a buffer containing C strings.
+*   Dissect a buffer containing ASCII C strings.
  *
  *   @param  tvb     The buffer to dissect.
  *   @param  pinfo   Packet Info.
@@ -3685,7 +3900,8 @@ static void dissect_cstr(tvbuff_t * tvb, packet_info * pinfo, proto_tree * tree)
          len += 1; /* Add one for the '\0' */
  
          if (tree) {
-            proto_tree_add_item(tree, hf_cstring, tvb, offset, len, FALSE);
+            proto_tree_add_item(tree, hf_cstring, tvb, offset, len,
+                               ENC_ASCII|ENC_NA);
          }
          offset += (guint)len;
      }
@@ -3742,16 +3958,16 @@ You must call this and use this ptvcursor_t object so you can use the
  ptvcursor API.
  
  proto_item*
-ptvcursor_add(ptvcursor_t* ptvc, int hf, gint length, gboolean endianness)
+ptvcursor_add(ptvcursor_t* ptvc, int hf, gint length, const guint encoding)
      This will extract 'length' bytes from the tvbuff and place it in
  the proto_tree as field 'hf', which is a registered header_field. The
  pointer to the proto_item that is created is passed back to you. Internally,
  the ptvcursor advances its cursor so the next call to ptvcursor_add
-starts where this call finished. The 'endianness' parameter matters for
-FT_UINT* and FT_INT* fields.
+starts where this call finished. The 'encoding' parameter is relevant for
+certain type of fields (See above under proto_tree_add_item()).
  
  proto_item*
-ptvcursor_add_no_advance(ptvcursor_t* ptvc, int hf, gint length, gboolean endianness)
+ptvcursor_add_no_advance(ptvcursor_t* ptvc, int hf, gint length, const guint encoding)
      Like ptvcursor_add, but does not advance the internal cursor.
  
  void
@@ -3775,7 +3991,7 @@ ptvcursor_pop_subtree(ptvcursor_t* ptvc);
  
  proto_tree*
  ptvcursor_add_with_subtree(ptvcursor_t* ptvc, int hfindex, gint length,
-                           gboolean little_endian, gint ett_subtree);
+                           const guint encoding, gint ett_subtree);
      Adds an item to the tree and creates a subtree.
  If the length is unknown, length may be defined as SUBTREE_UNDEFINED_LENGTH.
  In this case, at the next pop, the item length will be equal to the advancement