Don't do fcn calls in arg of g_?to??(); Macro may very well eval args multiple times.
[obnox/wireshark/wip.git] / epan / dissectors / packet-xml.c
index 32d2c3c58a1806430617ad0ae0bee80ba0d6514c..645b739df76d09c1646ac7e1d1a93f361002e047 100644 (file)
 #include <dirent.h>
 #endif
 
-#include <ctype.h>
-#include <stdlib.h>
 #include <string.h>
-#include <stdarg.h>
 #include <errno.h>
 
-#include <stdio.h>
-
 #include <glib.h>
 
 #include <wsutil/str_util.h>
@@ -97,12 +92,14 @@ static gboolean pref_heuristic_udp_save = FALSE;
 static range_t *global_xml_tcp_range = NULL;
 static range_t *xml_tcp_range = NULL;
 
+static gboolean pref_heuristic_unicode = FALSE;
+
 #define XML_CDATA -1000
 #define XML_SCOPED_NAME -1001
 
 
-GArray* hf_arr;
-GArray* ett_arr;
+static GArray* hf_arr;
+static GArray* ett_arr;
 
 static const gchar* default_media_types[] = {
        "text/xml",
@@ -126,7 +123,21 @@ static const gchar* default_media_types[] = {
        "application/rls-services+xml",
        "application/smil",
        "application/simple-filter+xml",
+       "application/simservs+xml",
        "application/soap+xml",
+       "application/vnd.etsi.aoc+xml",
+       "application/vnd.etsi.cug+xml",
+       "application/vnd.etsi.iptvcommand+xml",
+       "application/vnd.etsi.iptvdiscovery+xml",
+       "application/vnd.etsi.iptvprofile+xml",
+       "application/vnd.etsi.iptvsad-bc+xml",
+       "application/vnd.etsi.iptvsad-cod+xml",
+       "application/vnd.etsi.iptvsad-npvr+xml",
+       "application/vnd.etsi.iptvueprofile+xml",
+       "application/vnd.etsi.mcid+xml",
+       "application/vnd.etsi.sci+xml",
+       "application/vnd.etsi.simservs+xml",
+       "application/vnd.3gpp.cw+xml",
        "application/vnd.wv.csp+xml",
        "application/vnd.wv.csp.xml",
        "application/watcherinfo+xml",
@@ -143,12 +154,12 @@ static const gchar* default_media_types[] = {
        "application/x-wms-logconnectstats",
        "application/x-wms-logplaystats",
        "application/x-wms-sendevent",
-       "application/rss+xml",   
+       "application/rss+xml",
        "image/svg+xml",
 };
 
 static void insert_xml_frame(xml_frame_t *parent, xml_frame_t *new_child) {
-       new_child->firts_child = NULL;
+       new_child->first_child = NULL;
        new_child->last_child = NULL;
 
        new_child->parent = parent;
@@ -156,8 +167,8 @@ static void insert_xml_frame(xml_frame_t *parent, xml_frame_t *new_child) {
        new_child->prev_sibling = NULL;
        if (parent == NULL) return;  /* root */
 
-       if (parent->firts_child == NULL) {  /* the 1st child */
-               parent->firts_child = new_child;
+       if (parent->first_child == NULL) {  /* the 1st child */
+               parent->first_child = new_child;
        } else {  /* following children */
                parent->last_child->next_sibling = new_child;
                new_child->prev_sibling = parent->last_child;
@@ -169,11 +180,10 @@ static void
 dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
 {
        tvbparse_t* tt;
-       tvbparse_elem_t* tok = NULL;
        static GPtrArray* stack = NULL;
        xml_frame_t* current_frame;
        char* colinfo_str;
-       
+
        if (stack != NULL)
                g_ptr_array_free(stack,TRUE);
 
@@ -202,8 +212,7 @@ dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
                ascii_strup_inplace(colinfo_str);
        }
 
-       if (check_col(pinfo->cinfo, COL_PROTOCOL))
-               col_append_str(pinfo->cinfo, COL_PROTOCOL, colinfo_str);
+       col_append_str(pinfo->cinfo, COL_PROTOCOL, colinfo_str);
 
        current_frame->ns = root_ns;
 
@@ -211,27 +220,35 @@ dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
        current_frame->tree = proto_item_add_subtree(current_frame->item,current_frame->ns->ett);
        current_frame->last_item = current_frame->item;
 
-       while(( tok = tvbparse_get(tt, want) )) ;
+       while(tvbparse_get(tt, want)) ;
 
        pinfo->private_data = current_frame;  /* pass XML structure to the dissector calling XML */
 }
 
 static gboolean dissect_xml_heur(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) {
-       if ( (pref_heuristic_media || pref_heuristic_tcp || pref_heuristic_udp)
-            && tvbparse_peek(tvbparse_init(tvb,0,-1,NULL,want_ignore), want_heur)) {
-               dissect_xml(tvb, pinfo, tree);
-               return TRUE;
-       } else {
-               return FALSE;
+       if (pref_heuristic_media || pref_heuristic_tcp || pref_heuristic_udp) {
+               if (tvbparse_peek(tvbparse_init(tvb,0,-1,NULL,want_ignore), want_heur)) {
+                       dissect_xml(tvb, pinfo, tree);
+                       return TRUE;
+               } else if (pref_heuristic_unicode) {
+                       const guint8 *data = tvb_get_ephemeral_unicode_string(tvb, 0, tvb_length(tvb)/2, ENC_LITTLE_ENDIAN);
+                       tvbuff_t *unicode_tvb = tvb_new_child_real_data(tvb, data, tvb_length(tvb)/2, tvb_length(tvb)/2);
+                       if (tvbparse_peek(tvbparse_init(unicode_tvb,0,-1,NULL,want_ignore), want_heur)) {
+                               add_new_data_source(pinfo, unicode_tvb, "UTF8");
+                               dissect_xml(unicode_tvb, pinfo, tree);
+                               return TRUE;
+                       }
+               }
        }
+       return FALSE;
 }
 
 xml_frame_t *xml_get_tag(xml_frame_t *frame, const gchar *name) {
        xml_frame_t *tag = NULL;
 
-       xml_frame_t *xml_item = frame->firts_child;
+       xml_frame_t *xml_item = frame->first_child;
        while (xml_item) {
-               if ((xml_item->type == XML_FRAME_TAG)) {
+               if (xml_item->type == XML_FRAME_TAG) {
                        if (!name) {  /* get the 1st tag */
                        tag = xml_item;
                                break;
@@ -249,9 +266,9 @@ xml_frame_t *xml_get_tag(xml_frame_t *frame, const gchar *name) {
 xml_frame_t *xml_get_attrib(xml_frame_t *frame, const gchar *name) {
        xml_frame_t *attr = NULL;
 
-       xml_frame_t *xml_item = frame->firts_child;
+       xml_frame_t *xml_item = frame->first_child;
        while (xml_item) {
-               if ((xml_item->type == XML_FRAME_ATTRIB) && 
+               if ((xml_item->type == XML_FRAME_ATTRIB) &&
                        xml_item->name_orig_case && !strcmp(xml_item->name_orig_case, name)) {
                attr = xml_item;
                        break;
@@ -265,9 +282,9 @@ xml_frame_t *xml_get_attrib(xml_frame_t *frame, const gchar *name) {
 xml_frame_t *xml_get_cdata(xml_frame_t *frame) {
        xml_frame_t *cdata = NULL;
 
-       xml_frame_t *xml_item = frame->firts_child;
+       xml_frame_t *xml_item = frame->first_child;
        while (xml_item) {
-               if ((xml_item->type == XML_FRAME_CDATA)) {
+               if (xml_item->type == XML_FRAME_CDATA) {
                cdata = xml_item;
                        break;
                }
@@ -486,7 +503,7 @@ static void before_dtd_doctype(void* tvbparse_data, const void* wanted_data _U_,
        xml_frame_t* new_frame;
        tvbparse_elem_t* name_tok = tok->sub->next->next->next->sub->sub;
        proto_tree* dtd_item = proto_tree_add_item(current_frame->tree, hf_doctype,
-                                                                                          name_tok->tvb, name_tok->offset, name_tok->len, FALSE);
+                                                                                          name_tok->tvb, name_tok->offset, name_tok->len, ENC_ASCII|ENC_NA);
 
        proto_item_set_text(dtd_item,"%s",tvb_format_text(tok->tvb,tok->offset,tok->len));
 
@@ -560,6 +577,8 @@ static void after_attrib(void* tvbparse_data, const void* wanted_data _U_, tvbpa
        pi = proto_tree_add_item(current_frame->tree,hfid,value->tvb,value->offset,value->len,FALSE);
        proto_item_set_text(pi, "%s", tvb_format_text(tok->tvb,tok->offset,tok->len));
 
+       current_frame->last_item = pi;
+
        new_frame = ep_alloc(sizeof(xml_frame_t));
        new_frame->type = XML_FRAME_ATTRIB;
        new_frame->name = name;
@@ -585,8 +604,8 @@ static void unrecognized_token(void* tvbparse_data, const void* wanted_data _U_,
 
 
 static void init_xml_parser(void) {
-       tvbparse_wanted_t* want_name = tvbparse_chars(-1,1,0,"abcdefghijklmnopqrstuvwxyz-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",NULL,NULL,NULL);
-       tvbparse_wanted_t* want_attr_name = tvbparse_chars(-1,1,0,"abcdefghijklmnopqrstuvwxyz-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:",NULL,NULL,NULL);
+       tvbparse_wanted_t* want_name = tvbparse_chars(-1,1,0,"abcdefghijklmnopqrstuvwxyz.-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",NULL,NULL,NULL);
+       tvbparse_wanted_t* want_attr_name = tvbparse_chars(-1,1,0,"abcdefghijklmnopqrstuvwxyz.-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:",NULL,NULL,NULL);
 
        tvbparse_wanted_t* want_scoped_name = tvbparse_set_seq(XML_SCOPED_NAME, NULL, NULL, NULL,
                                                               want_name,
@@ -757,10 +776,10 @@ static void add_xml_field(GArray* hfs, int* p_id, gchar* name, gchar* fqn) {
        hfri.hfinfo.display = BASE_NONE;
        hfri.hfinfo.strings = NULL;
        hfri.hfinfo.bitmask = 0x0;
-       hfri.hfinfo.blurb = "";
+       hfri.hfinfo.blurb = NULL;
        hfri.hfinfo.id = 0;
        hfri.hfinfo.parent = 0;
-       hfri.hfinfo.ref_count = 0;
+       hfri.hfinfo.ref_type = HF_REF_TYPE_NONE;
        hfri.hfinfo.bitshift = 0;
        hfri.hfinfo.same_name_next = NULL;
        hfri.hfinfo.same_name_prev = NULL;
@@ -862,7 +881,7 @@ static gchar* fully_qualified_name(GPtrArray* hier, gchar* name, gchar* proto_na
        GString* s = g_string_new(proto_name);
        gchar* str;
        g_string_append(s,".");
-       
+
        for (i = 1; i < hier->len; i++) {
                g_string_append_printf(s, "%s.",(gchar*)g_ptr_array_index(hier,i));
        }
@@ -1010,21 +1029,19 @@ static void register_dtd(dtd_build_data_t* dtd_data, GString* errors) {
                dtd_named_list_t* nl = g_ptr_array_remove_index(dtd_data->attributes,0);
                xml_ns_t* element = g_hash_table_lookup(elements,nl->name);
 
-               if (!element) {
-                       g_string_append_printf(errors,"element %s is not defined\n", nl->name);
+               if (element) {
+               while(nl->list->len) {
+                   gchar* name = g_ptr_array_remove_index(nl->list,0);
+                   int* id_p = g_malloc(sizeof(int));
 
-                       goto next_attribute;
+                   *id_p = -1;
+                   g_hash_table_insert(element->attributes,name,id_p);
+               }
                }
-
-               while(nl->list->len) {
-                       gchar* name = g_ptr_array_remove_index(nl->list,0);
-                       int* id_p = g_malloc(sizeof(int));
-
-                       *id_p = -1;
-                       g_hash_table_insert(element->attributes,name,id_p);
+               else {
+            g_string_append_printf(errors,"element %s is not defined\n", nl->name);
                }
 
-next_attribute:
                g_free(nl->name);
                g_ptr_array_free(nl->list,TRUE);
                g_free(nl);
@@ -1047,7 +1064,6 @@ next_attribute:
                hfs = hf_arr;
                etts = ett_arr;
                g_ptr_array_add(hier,g_strdup("xml"));
-               root_element = &xml_ns;
        } else {
                /*
                 * if we were given a proto_name the namespace will be registered
@@ -1278,11 +1294,11 @@ static void init_xml_names(void) {
 }
 
 static void range_delete_xml_tcp_callback(guint32 port) {
-       dissector_delete("tcp.port", port, xml_handle);
+       dissector_delete_uint("tcp.port", port, xml_handle);
 }
 
 static void range_add_xml_tcp_callback(guint32 port) {
-       dissector_add("tcp.port", port, xml_handle);
+       dissector_add_uint("tcp.port", port, xml_handle);
 }
 
 static void apply_prefs(void) {
@@ -1299,7 +1315,7 @@ static void apply_prefs(void) {
                        pref_heuristic_media_save = FALSE;
                }
        }
-       
+
        if (pref_heuristic_tcp_save != pref_heuristic_tcp ) {
                if (pref_heuristic_tcp) {
                        heur_dissector_add("tcp", dissect_xml_heur, xml_ns.hf_tag);
@@ -1309,7 +1325,7 @@ static void apply_prefs(void) {
                        pref_heuristic_tcp_save = FALSE;
                }
        }
-       
+
        if (pref_heuristic_udp_save != pref_heuristic_udp ) {
                if (pref_heuristic_udp) {
                        heur_dissector_add("udp", dissect_xml_heur, xml_ns.hf_tag);
@@ -1323,7 +1339,7 @@ static void apply_prefs(void) {
        range_foreach(xml_tcp_range, range_delete_xml_tcp_callback);
        g_free(xml_tcp_range);
        xml_tcp_range = range_copy(global_xml_tcp_range);
-       range_foreach(xml_tcp_range, range_add_xml_tcp_callback);       
+       range_foreach(xml_tcp_range, range_add_xml_tcp_callback);
 }
 
 void
@@ -1364,7 +1380,7 @@ proto_register_xml(void) {
        prefs_register_bool_preference(xml_module, "heuristic", "Use Heuristics for media types",
                                    "Try to recognize XML for unknown media types",
                                    &pref_heuristic_media);
-       prefs_register_bool_preference(xml_module, "heuristic_tcp", "Use Heuristics for tcp",
+       prefs_register_bool_preference(xml_module, "heuristic_tcp", "Use Heuristics for TCP",
                                    "Try to recognize XML for unknown TCP ports",
                                    &pref_heuristic_tcp);
        prefs_register_range_preference(xml_module, "tcp.port", "TCP Ports",
@@ -1373,7 +1389,10 @@ proto_register_xml(void) {
        prefs_register_bool_preference(xml_module, "heuristic_udp", "Use Heuristics for UDP",
                                    "Try to recognize XML for unknown UDP ports",
                                    &pref_heuristic_udp);
-       
+       prefs_register_bool_preference(xml_module, "heuristic_unicode", "Use Unicode in heuristics",
+                                   "Try to recognize XML encoded in Unicode (UCS-2)",
+                                   &pref_heuristic_unicode);
+
        g_array_free(hf_arr,FALSE);
        g_array_free(ett_arr,TRUE);