Add heuristic dissection of XML
authorlego <lego@f5534014-38df-0310-8fa8-9805f1628bb7>
Thu, 9 Feb 2006 13:05:32 +0000 (13:05 +0000)
committerlego <lego@f5534014-38df-0310-8fa8-9805f1628bb7>
Thu, 9 Feb 2006 13:05:32 +0000 (13:05 +0000)
git-svn-id: http://anonsvn.wireshark.org/wireshark/trunk@17228 f5534014-38df-0310-8fa8-9805f1628bb7

epan/dissectors/packet-media.c
epan/dissectors/packet-xml.c
epan/tvbparse.c
epan/tvbparse.h

index d9b595b920cdef4d73b7354587a384f9f840fd68..75c2a4d2b7793e60515e24e2351536b4453ef20f 100644 (file)
  * print routines
  */
 int proto_media = -1;
+static heur_dissector_list_t heur_subdissector_list;
 
 static void
 dissect_media(tvbuff_t *tvb, packet_info *pinfo , proto_tree *tree)
 {
     int bytes;
 
+    if (dissector_try_heuristic(heur_subdissector_list, tvb, pinfo, tree)) {
+        return;
+    }
+    
     /* Add media type to the INFO column if it is visible */
     if (check_col(pinfo->cinfo, COL_INFO)) {
        col_append_fstr(pinfo->cinfo, COL_INFO, " (%s)", pinfo->match_string);
@@ -79,6 +84,8 @@ proto_register_media(void)
            "media"             /* abbrev */
            );
     register_dissector("media", dissect_media, proto_media);
+       register_heur_dissector_list("media", &heur_subdissector_list);
+    
 
     /*
      * "Media" is used to dissect something whose normal dissector
index e3923ec09cd0d68c9d6b93fcf4e110d36622e40a..5f619449fc9b7695f9dd0a7a605a831b3c4cb4ad 100644 (file)
@@ -51,6 +51,7 @@
 #include <epan/dtd.h>
 #include <epan/report_err.h>
 #include <epan/filesystem.h>
+#include <epan/prefs.h>
 
 typedef struct _xml_ns_t {
     /* the name of this namespace */ 
@@ -111,6 +112,7 @@ static dissector_handle_t xml_handle;
 /* parser definitions */
 static tvbparse_wanted_t* want;
 static tvbparse_wanted_t* want_ignore;
+static tvbparse_wanted_t* want_heur;
 
 static GHashTable* xmpli_names;
 static GHashTable* media_types;
@@ -119,6 +121,8 @@ static xml_ns_t xml_ns = {"xml","/",-1,-1,-1,NULL,NULL,NULL};
 static xml_ns_t unknown_ns = {"unknown","?",-1,-1,-1,NULL,NULL,NULL};
 static xml_ns_t* root_ns;
 
+static gboolean pref_heuristic = FALSE;
+
 #define XML_CDATA -1000
 #define XML_SCOPED_NAME -1001
 
@@ -152,7 +156,6 @@ static const gchar* default_media_types[] = {
        "application/resource-lists+xml",
 };
 
-
 static void
 dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
 {
@@ -191,6 +194,14 @@ dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
        while(( tok = tvbparse_get(tt, want) )) ;
 } 
 
+static gboolean dissect_xml_heur(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) {
+    if ( pref_heuristic && tvbparse_peek(tvbparse_init(tvb,0,-1,NULL,want_ignore), want_heur)) {
+        dissect_xml(tvb, pinfo, tree);
+        return TRUE;
+    } else {
+        return FALSE;
+    }
+}
 
 static void after_token(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
        GPtrArray* stack = tvbparse_data;
@@ -568,6 +579,13 @@ static void init_xml_parser(void) {
                                                          tvbparse_not_chars(-1,1,0," \t\r\n",NULL,NULL,unrecognized_token),
                                                          NULL);
        
+    want_heur = tvbparse_set_oneof(-1, NULL, NULL, NULL,
+                                   want_comment,
+                                   want_xmlpi,
+                                   want_doctype_start,
+                                   want_dtd_tag,
+                                   want_tag,
+                                   NULL);
        
 }
 
@@ -1146,9 +1164,15 @@ static void init_xml_names(void) {
 #endif    
 }
 
+static void apply_prefs(void) {
+    if (pref_heuristic) {
+        heur_dissector_add("http", dissect_xml_heur, xml_ns.hf_tag);   
+        heur_dissector_add("media", dissect_xml_heur, xml_ns.hf_tag);   
+    }
+}
+
 void
 proto_register_xml(void) {
-       
        static gint *ett_base[] = {
                &unknown_ns.ett,
                &xml_ns.ett,
@@ -1165,7 +1189,8 @@ proto_register_xml(void) {
                { &unknown_ns.hf_cdata, {"CDATA", "xml.cdata", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
                { &unknown_ns.hf_tag, {"Tag", "xml.tag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
                { &xml_ns.hf_cdata, {"Unknown", "xml.unknown", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}
-       };
+    };
+       module_t* xml_module;
 
        hf_arr = g_array_new(FALSE,FALSE,sizeof(hf_register_info));
        ett_arr = g_array_new(FALSE,FALSE,sizeof(gint*));
@@ -1179,7 +1204,12 @@ proto_register_xml(void) {
 
        proto_register_field_array(xml_ns.hf_tag, (hf_register_info*)hf_arr->data, hf_arr->len);
        proto_register_subtree_array((gint**)ett_arr->data, ett_arr->len);
-       
+    
+       xml_module = prefs_register_protocol(xml_ns.hf_tag,apply_prefs);
+    prefs_register_bool_preference(xml_module, "heuristic", "Use Heuristics",
+                                   "Try to recognize XML for unknown HTTP media types",
+                                   &pref_heuristic);
+    
     g_array_free(hf_arr,FALSE);
     g_array_free(ett_arr,TRUE);
     
@@ -1200,5 +1230,5 @@ proto_reg_handoff_xml(void)
        xml_handle = find_dissector("xml");
        
        g_hash_table_foreach(media_types,add_dissector_media,NULL);
-       
+    
 }
index 06b3f094f6bfcb673011079a18d8edb2d11c3295..5b2a02797d33a36563b7b43afe67c1a9bc80914f 100644 (file)
@@ -67,8 +67,8 @@
 #define TVBPARSE_DEBUG_FIND 0x00000100
 #define TVBPARSE_DEBUG_NEWTOK 0x00000080
 #define TVBPARSE_DEBUG_IGNORE 0x00000040
-/*#define TVBPARSE_DEBUG_ 0x00000020
-#define TVBPARSE_DEBUG_ 0x00000010
+#define TVBPARSE_DEBUG_PEEK 0x00000020
+/*#define TVBPARSE_DEBUG_ 0x00000010
 #define TVBPARSE_DEBUG_ 0x00000008
 #define TVBPARSE_DEBUG_ 0x00000004
 #define TVBPARSE_DEBUG_ 0x00000002
@@ -1272,6 +1272,38 @@ static void execute_callbacks(tvbparse_t* tt, tvbparse_elem_t* curr) {
 
 }
 
+gboolean tvbparse_peek(tvbparse_t* tt,
+                              const tvbparse_wanted_t* wanted) {
+       tvbparse_elem_t* tok = NULL;
+       int consumed;
+    int offset = tt->offset;
+    
+#ifdef TVBPARSE_DEBUG
+    if (TVBPARSE_DEBUG & TVBPARSE_DEBUG_PEEK) g_warning("tvbparse_peek: ENTER offset=%i",offset);
+#endif                            
+    
+    offset += ignore(tt,offset);
+    
+#ifdef TVBPARSE_DEBUG
+    if (TVBPARSE_DEBUG & TVBPARSE_DEBUG_PEEK) g_warning("tvbparse_peek: after ignore offset=%i",offset);
+#endif                            
+    
+    consumed = wanted->condition(tt,offset,wanted,&tok);
+    
+    if (consumed >= 0) {
+#ifdef TVBPARSE_DEBUG
+        if (TVBPARSE_DEBUG & TVBPARSE_DEBUG_PEEK) g_warning("tvbparse_peek: GOT len=%i",consumed);
+#endif                            
+        return TRUE;
+    } else {
+#ifdef TVBPARSE_DEBUG
+        if (TVBPARSE_DEBUG & TVBPARSE_DEBUG_PEEK) g_warning("tvbparse_peek: NOT GOT");
+#endif                            
+        return FALSE;
+    }
+    
+}
+
 tvbparse_elem_t* tvbparse_get(tvbparse_t* tt,
                               const tvbparse_wanted_t* wanted) {
        tvbparse_elem_t* tok = NULL;
index 742363a0aa28b73632992d61f990a256da2087d6..2afc4eecfe2967979c5d3648ddaf43be83441d84 100644 (file)
@@ -440,8 +440,19 @@ gboolean tvbparse_reset(tvbparse_t* tt, int offset, int len);
 guint tvbparse_curr_offset(tvbparse_t* tt);
 guint tvbparse_len_left(tvbparse_t* tt);
 
+
+
+/*
+ * This will look for the wanted token at the current offset or after any given
+ * number of ignored tokens returning FALSE if there's no match or TRUE if there
+ * is a match.
+ * The parser will be left in its original state and no callbacks will be called. 
+ */
+gboolean tvbparse_peek(tvbparse_t* tt,
+                        const tvbparse_wanted_t* wanted);
+
 /*
- * This ill look for the wanted token at the current offset or after any given
+ * This will look for the wanted token at the current offset or after any given
  * number of ignored tokens returning NULL if there's no match.
  * if there is a match it will set the offset of the current parser after
  * the end of the token