the dtd parser (still missing the glue) and few fixes to packet-xml.c
authorlego <lego@f5534014-38df-0310-8fa8-9805f1628bb7>
Sat, 10 Sep 2005 17:29:15 +0000 (17:29 +0000)
committerlego <lego@f5534014-38df-0310-8fa8-9805f1628bb7>
Sat, 10 Sep 2005 17:29:15 +0000 (17:29 +0000)
git-svn-id: http://anonsvn.wireshark.org/wireshark/trunk@15745 f5534014-38df-0310-8fa8-9805f1628bb7

epan/Makefile.am
epan/Makefile.common
epan/Makefile.nmake
epan/dissectors/packet-xml.c
epan/dtd.h [new file with mode: 0644]
epan/dtd_grammar.lemon [new file with mode: 0644]
epan/dtd_parse.l [new file with mode: 0644]
epan/dtd_preparse.l [new file with mode: 0644]

index 56efd3acdb29f644c203b9013375f777dcb363aa..6faee211343b88609dc22faa967dbf1d35984013 100644 (file)
@@ -50,6 +50,9 @@ EXTRA_libethereal_la_SOURCES =        \
        inet_v6defs.h
 
 EXTRA_DIST = \
+       dtd_grammar.lemon \
+       dtd_parse.l \
+       dtd_preparse.l \
        enterprise-numbers  \
        libethereal.def \
        Makefile.common \
@@ -66,7 +69,12 @@ CLEANFILES = \
        *~
 
 DISTCLEANFILES = \
+       dtd_grammar.c \
+       dtd_grammar.h \
+       dtd_parse.c             \
+       dtd_preparse.c  \
        radius_dict.c
+       
 
 MAINTAINERCLEANFILES = \
        Makefile.in
@@ -85,7 +93,20 @@ exntest: exntest.o except.o
 
 radius_dict.c: radius_dict.l
        $(LEX) $^
+       
+dtd_parse.c : dtd_parse.l
+       $(LEX) -odtd_parse.c $(srcdir)/dtd_parse.l
 
+dtd_preparse.c : dtd_preparse.l
+       $(LEX) -odtd_preparse.c $(srcdir)/dtd_preparse.l
+
+dtd_grammar.h: dtd_grammar.c
+
+LEMON=../tools/lemon
+
+dtd_grammar.c: dtd_grammar.lemon
+       $(LEMON)/lemon t=$(srcdir)/$(LEMON)/lempar.c $^
+       
 tvbtest.o exntest.o: exceptions.h
 
 sminmpec.c: enterprise-numbers make-sminmpec.pl
index 2f20c4c08d150c3b4190ff1efcc897dab422b661..150140a00a3cc56093d6308fbd905e30c0a6b7bc 100644 (file)
@@ -43,6 +43,9 @@ LIBETHEREAL_SRC =             \
        crypt-md4.c             \
        crypt-md5.c             \
        crypt-rc4.c             \
+       dtd_grammar.c   \
+       dtd_parse.c             \
+       dtd_preparse.c  \
        emem.c                  \
        epan.c                  \
        except.c                \
@@ -105,6 +108,8 @@ LIBETHEREAL_INCLUDES =              \
        crypt-md4.h             \
        crypt-md5.h             \
        crypt-rc4.h             \
+       dtd.h                   \
+       dtd_grammar.h   \
        emem.h                  \
        epan.h                  \
        epan_dissect.h          \
index a5d9e98906c2f318c9cf89ae975ce489c36ae1af..a6f05223ee03baedfd93ca134d3ad19857d1003b 100644 (file)
@@ -153,3 +153,17 @@ radius_dict.c : radius_dict.l
 
 sminmpec.c: enterprise-numbers make-sminmpec.pl
        $(PERL) make-sminmpec.pl enterprise-numbers sminmpec.c
+
+dtd_parse.c : dtd_parse.l
+       $(LEX) -odtd_parse.c $(srcdir)/dtd_parse.l
+
+dtd_preparse.c : dtd_preparse.l
+       $(LEX) -odtd_preparse.c $(srcdir)/dtd_preparse.l
+
+dtd_grammar.h: dtd_grammar.c
+
+LEMON=../tools/lemon
+
+dtd_grammar.c: dtd_grammar.lemon
+       $(LEMON)/lemon t=$(srcdir)/$(LEMON)/lempar.c $^
+       
index ca4d2a4f1e233348e6b8eb94cbe2eb6adaabc3ed..b75dfee282d5186bf6ac70583dad997a4d9a6a7e 100644 (file)
 #include <string.h>
 #include <stdarg.h>
 
+#include <stdio.h>
+
 #include <glib.h>
 #include <epan/emem.h>
 #include <epan/packet.h>
 #include <epan/strutil.h>
 #include <epan/tvbparse.h>
+#include <epan/dtd.h>
+
+typedef struct _xml_names_t {
+       gchar* name;
+       gchar* longname;
+       gchar* blurb;
+       int hf_tag;
+       int hf_cdata;
+       gint ett;
+       
+       gboolean is_root;
+
+       GHashTable* attributes;
+       GHashTable* elements;
+} xml_names_t;
 
 typedef struct {
        proto_tree* tree;
        proto_item* item;
        proto_item* last_item;
+       xml_names_t* ns;
        int start_offset;
 } xml_frame_t;
 
-static int proto_xml = -1;
-
-static gint ett_i = -1;
-static gint ett_tag = -1;
 static gint ett_dtd = -1;
+static gint ett_xmpli = -1;
 
-static int hf_what = -1;
-static int hf_attrib = -1;
-static int hf_cdata = -1;
+static int hf_junk = -1;
+static int hf_unknowwn_attrib = -1;
 static int hf_comment = -1;
 static int hf_xmlpi = -1;
-static int hf_tag = -1;
 static int hf_dtd_tag = -1;
 static int hf_doctype = -1;
-static int hf_entity = -1;
 
 /* Dissector handles */
 static dissector_handle_t xml_handle;
@@ -72,11 +84,20 @@ static dissector_handle_t xml_handle;
 static tvbparse_wanted_t* want;
 static tvbparse_wanted_t* want_ignore;
 
+static GHashTable* xmpli_names;
+static GHashTable* media_types;
 
+static xml_names_t xml_ns = {"xml","eXtesible Markup Language","XML",-1,-1,-1,TRUE,NULL,NULL};
+static xml_names_t unknown_ns = {"","","",-1,-1,-1,TRUE,NULL,NULL};
+static xml_names_t* root_ns;
 
+#define XML_CDATA -1000
+
+GArray* hf;
+GArray* ett_arr;
 
 static void
-dissect_xml(tvbuff_t *tvb, packet_info *pinfo _U_, proto_tree *tree)
+dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
 {
        tvbparse_t* tt;
        tvbparse_elem_t* tok = NULL;
@@ -93,12 +114,20 @@ dissect_xml(tvbuff_t *tvb, packet_info *pinfo _U_, proto_tree *tree)
        g_ptr_array_add(stack,current_frame);
 
        tt = tvbparse_init(tvb,0,-1,stack,want_ignore);
+       current_frame->start_offset = 0;
+       
+       root_ns = g_hash_table_lookup(media_types,pinfo->match_string);
+       
+       if (! root_ns ) {
+               root_ns = &unknown_ns;
+       }
+       
+       current_frame->ns = root_ns;
        
-       current_frame->item = proto_tree_add_item(tree,proto_xml,tvb,0,-1,FALSE);
-       current_frame->tree = proto_item_add_subtree(current_frame->item,ett_i);
+       current_frame->item = proto_tree_add_item(tree,xml_ns.hf_tag,tvb,0,-1,FALSE);
+       current_frame->tree = proto_item_add_subtree(current_frame->item,xml_ns.ett);
        current_frame->last_item = current_frame->item;
-       current_frame->start_offset = 0;
-
+       
        while(( tok = tvbparse_get(tt, want) )) ;
 } 
 
@@ -109,15 +138,18 @@ static void after_token(void* tvbparse_data, const void* wanted_data _U_, tvbpar
        int hfid;
        proto_item* pi;
 
-       if (tok->id > 0)
+       if (tok->id == XML_CDATA) {
+               hfid = current_frame->ns->hf_cdata;
+       } else if ( tok->id > 0) {
                hfid = tok->id;
-       else
-               hfid = hf_what;
+       } else {
+               hfid = hf_junk;
+       }
        
        pi = proto_tree_add_item(current_frame->tree, hfid, tok->tvb, tok->offset, tok->len, FALSE);
        
        proto_item_set_text(pi, "%s",
-                                               tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len));
+                                               tvb_format_text(tok->tvb,tok->offset,tok->len));
 }
 
 static void before_xmpli(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
@@ -126,18 +158,32 @@ static void before_xmpli(void* tvbparse_data, const void* wanted_data _U_, tvbpa
        proto_item* pi;
        proto_tree* pt;
        tvbparse_elem_t* name_tok = tok->sub->next;
+       gchar* name = g_strdown(tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len));
+       xml_names_t* ns = g_hash_table_lookup(xmpli_names,name);
+       int hf_tag;
+       gint ett;
+       
+       if (!ns) {
+               hf_tag = hf_xmlpi;
+               ett = ett_xmpli;
+       } else {
+               hf_tag = ns->hf_tag;
+               ett = ns->ett;
+       }
        
-       pi = proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,-1,
-                                                        "<? %s",
-                                                        tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len));
-       pt = proto_item_add_subtree(pi,ett_tag);
+       pi = proto_tree_add_item(current_frame->tree,hf_tag,tok->tvb,tok->offset,tok->len,FALSE);
+       
+       proto_item_set_text(pi,tvb_format_text(tok->tvb,tok->offset,(name_tok->offset - tok->offset) + name_tok->len));
+       
+       pt = proto_item_add_subtree(pi,ett);
        
        current_frame = ep_alloc(sizeof(xml_frame_t));
        current_frame->item = pi;
        current_frame->last_item = pi;
        current_frame->tree = pt;
        current_frame->start_offset = tok->offset;
-       
+       current_frame->ns = ns;
+
        g_ptr_array_add(stack,current_frame);
        
 }
@@ -146,42 +192,53 @@ static void after_xmlpi(void* tvbparse_data, const void* wanted_data _U_, tvbpar
        GPtrArray* stack = tvbparse_data;
        xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
                
-       proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
-       proto_item_append_text(current_frame->last_item," ?>");
+       proto_tree_add_text(current_frame->tree,
+                                                  tok->tvb, tok->offset, tok->len,
+                                                  tvb_format_text(tok->tvb,tok->offset,tok->len));
        
        if (stack->len > 1) {
                g_ptr_array_remove_index_fast(stack,stack->len - 1);
        } else {
-               proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened tag ]");
+               proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened xmpli tag ]");
        }
 }
 
 static void before_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
        GPtrArray* stack = tvbparse_data;
        xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
+       tvbparse_elem_t* name_tok = tok->sub->next;
+       gchar* name = g_strdown(tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len));
+       xml_names_t* ns = g_hash_table_lookup(current_frame->ns->elements,name);
+       xml_frame_t* new_frame;
        proto_item* pi;
        proto_tree* pt;
-       tvbparse_elem_t* name_tok = tok->sub->next;
-       gchar* name = tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len);
        
-       pi = proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,-1,"<%s",name);
-       pt = proto_item_add_subtree(pi,ett_tag);
+       if (!ns) {
+               if (! ( ns = g_hash_table_lookup(root_ns->elements,name) ) ) {
+                       ns = &unknown_ns;
+               }
+       }
        
-       current_frame = ep_alloc(sizeof(xml_frame_t));
-       current_frame->item = pi;
-       current_frame->last_item = pi;
-       current_frame->tree = pt;
-       current_frame->start_offset = tok->offset;
+       pi = proto_tree_add_item(current_frame->tree,ns->hf_tag,tok->tvb,tok->offset,tok->len,FALSE);
+       proto_item_set_text(pi,tvb_format_text(tok->tvb,tok->offset,(name_tok->offset - tok->offset) + name_tok->len));
        
-       g_ptr_array_add(stack,current_frame);
+       pt = proto_item_add_subtree(pi,ns->ett);
+       
+       new_frame = ep_alloc(sizeof(xml_frame_t));
+       new_frame->item = pi;
+       new_frame->last_item = pi;
+       new_frame->tree = pt;
+       new_frame->start_offset = tok->offset;
+       new_frame->ns = ns;
+
+       g_ptr_array_add(stack,new_frame);
 
 }
 
-static void after_open_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
+static void after_open_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok _U_) {
        GPtrArray* stack = tvbparse_data;
        xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
 
-       proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
        proto_item_append_text(current_frame->last_item,">");
 }
 
@@ -189,7 +246,6 @@ static void after_closed_tag(void* tvbparse_data, const void* wanted_data _U_, t
        GPtrArray* stack = tvbparse_data;
        xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
 
-       proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
        proto_item_append_text(current_frame->last_item,"/>");                                  
 
        if (stack->len > 1) {
@@ -206,7 +262,7 @@ void after_untag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem
        proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
        
        proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"%s",
-                                               tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len));
+                                               tvb_format_text(tok->tvb,tok->offset,tok->len));
 
        if (stack->len > 1) {
                g_ptr_array_remove_index_fast(stack,stack->len - 1);
@@ -223,13 +279,14 @@ static void before_dtd_doctype(void* tvbparse_data, const void* wanted_data _U_,
        proto_tree* dtd_item = proto_tree_add_item(current_frame->tree, hf_doctype,
                                                                                           name_tok->tvb, name_tok->offset, name_tok->len, FALSE);
                                                                                           
-       proto_item_set_text(dtd_item,"%s",tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len));
+       proto_item_set_text(dtd_item,"%s",tvb_format_text(tok->tvb,tok->offset,tok->len));
 
        current_frame = ep_alloc(sizeof(xml_frame_t));
        current_frame->item = dtd_item;
        current_frame->last_item = dtd_item;
        current_frame->tree = proto_item_add_subtree(dtd_item,ett_dtd);
        current_frame->start_offset = tok->offset;
+       current_frame->ns = NULL;
 
        g_ptr_array_add(stack,current_frame);
 }
@@ -251,7 +308,7 @@ static void after_dtd_close(void* tvbparse_data, const void* wanted_data _U_, tv
        xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
        
        proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"%s",
-                                               tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len));
+                                               tvb_format_text(tok->tvb,tok->offset,tok->len));
        if (stack->len > 1) {
                g_ptr_array_remove_index_fast(stack,stack->len - 1);
        } else {
@@ -266,13 +323,20 @@ static void get_attrib_value(void* tvbparse_data _U_, const void* wanted_data _U
 static void after_attrib(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
        GPtrArray* stack = tvbparse_data;
        xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
-       gchar* name = tvb_get_ephemeral_string(tok->sub->tvb,tok->sub->offset,tok->sub->len);
+       gchar* name = g_strdown(tvb_get_ephemeral_string(tok->sub->tvb,tok->sub->offset,tok->sub->len));
        tvbparse_elem_t* value = tok->sub->next->next->data;
+       int* hfidp;
+       int hfid;
 
-       name = name;
+       if(current_frame->ns && (hfidp = g_hash_table_lookup(current_frame->ns->attributes,g_strdown(name)) )) {
+               hfid = *hfidp;
+       } else {
+               hfid = hf_unknowwn_attrib;
+               value = tok;
+       }
        
-       current_frame->last_item = proto_tree_add_item(current_frame->tree,hf_attrib,value->tvb,value->offset,value->len,FALSE);
-       proto_item_set_text(current_frame->last_item, "%s", tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len));
+       current_frame->last_item = proto_tree_add_item(current_frame->tree,hfid,value->tvb,value->offset,value->len,FALSE);
+       proto_item_set_text(current_frame->last_item, "%s", tvb_format_text(tok->tvb,tok->offset,tok->len));
 
 }
 
@@ -287,10 +351,10 @@ static void unrecognized_token(void* tvbparse_data, const void* wanted_data _U_,
 
 
 void init_xml_parser(void) {   
-       tvbparse_wanted_t* want_name = tvbparse_chars(-1,0,0,"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-",NULL,NULL,NULL);
+       tvbparse_wanted_t* want_name = tvbparse_chars(-1,0,0,"abcdefghijklmnopqrstuvwxyz-_:ABCDEFGHIJKLMNOPQRSTUVWXYZ",NULL,NULL,NULL);
 
        tvbparse_wanted_t* want_attributes = tvbparse_one_or_more(-1, NULL, NULL, NULL,
-                                                                                                                         tvbparse_set_seq(hf_attrib, NULL, NULL, after_attrib,
+                                                                                                                         tvbparse_set_seq(-1, NULL, NULL, after_attrib,
                                                                                                                                                           want_name,
                                                                                                                                                           tvbparse_char(-1,"=",NULL,NULL,NULL),
                                                                                                                                                           tvbparse_set_oneof(0, NULL, NULL, get_attrib_value,
@@ -306,7 +370,7 @@ void init_xml_parser(void) {
                                                                                                                 tvbparse_string(-1, "/>", NULL, NULL, after_closed_tag),
                                                                                                                 NULL);
        
-       tvbparse_wanted_t* want_stopxmlpi = tvbparse_string(-1,"?>",NULL,NULL,NULL);
+       tvbparse_wanted_t* want_stopxmlpi = tvbparse_string(-1,"?>",NULL,NULL,after_xmlpi);
        
        want_ignore = tvbparse_chars(-1,0,0," \t\r\n",NULL,NULL,NULL);
        
@@ -317,7 +381,7 @@ void init_xml_parser(void) {
                                                                                                                          tvbparse_string(-1,"-->",NULL,NULL,NULL),
                                                                                                                          TRUE),
                                                                                           NULL),
-                                                         tvbparse_set_seq(hf_xmlpi,NULL,before_xmpli,after_xmlpi,
+                                                         tvbparse_set_seq(hf_xmlpi,NULL,before_xmpli,NULL,
                                                                                           tvbparse_string(-1,"<?",NULL,NULL,NULL),
                                                                                           want_name,
                                                                                           tvbparse_set_oneof(-1,NULL,NULL,NULL,
@@ -377,12 +441,7 @@ void init_xml_parser(void) {
                                                                                                                                  want_stoptag,
                                                                                                                                  NULL),
                                                                                           NULL),
-                                                         tvbparse_set_seq(hf_entity,NULL,NULL,after_token,
-                                                                                          tvbparse_char(4,"&",NULL,NULL,NULL),
-                                                                                          want_name,
-                                                                                          tvbparse_char(4,";",NULL,NULL,NULL),
-                                                                                          NULL),
-                                                         tvbparse_not_chars(hf_cdata,0,0,"<",NULL,NULL,after_token),
+                                                         tvbparse_not_chars(XML_CDATA,0,0,"<",NULL,NULL,after_token),
                                                          tvbparse_not_chars(-1,0,0," \t\r\n",NULL,NULL,unrecognized_token),
                                                          NULL);
        
@@ -390,35 +449,139 @@ void init_xml_parser(void) {
 }
 
 
+xml_names_t* xml_new_namespace(GHashTable* hash, gchar* name, gchar* longname, gchar* blurb, ...) {
+       xml_names_t* ns = g_malloc(sizeof(xml_names_t));
+       va_list ap;
+       gchar* attr_name;
+       
+       ns->name = g_strdup(name);
+       ns->longname = g_strdup(longname);
+       ns->blurb = g_strdup(blurb);
+       ns->hf_tag = -1;
+       ns->hf_cdata = -1;
+       ns->ett = -1;
+       ns->attributes = g_hash_table_new(g_str_hash,g_str_equal);
+       ns->elements = g_hash_table_new(g_str_hash,g_str_equal);
+       
+       va_start(ap,blurb);
+       
+       while(( attr_name = va_arg(ap,gchar*) )) {
+               int* hfp = g_malloc(sizeof(int));
+               *hfp = -1;
+               g_hash_table_insert(ns->attributes,g_strdup(attr_name),hfp);
+       };
+       
+       va_end(ap);
+       
+       g_hash_table_insert(hash,ns->name,ns);
+       
+       return ns;
+}
+
+void add_xml_attribute_names(gpointer k, gpointer v, gpointer p) {
+       gchar* basename = g_strdup_printf("%s.%s",(gchar*)p,(gchar*)k);
+       hf_register_info hfri;
+       
+       hfri.p_id = (int*)v;
+       hfri.hfinfo.name = basename;
+       hfri.hfinfo.abbrev = basename;
+       hfri.hfinfo.type = FT_STRING;
+       hfri.hfinfo.display = BASE_NONE;
+       hfri.hfinfo.strings = NULL;
+       hfri.hfinfo.bitmask = 0x0;
+       hfri.hfinfo.blurb = basename;
+       hfri.hfinfo.id = 0;
+       hfri.hfinfo.parent = 0;
+       hfri.hfinfo.ref_count = 0;
+       hfri.hfinfo.bitshift = 0;
+       hfri.hfinfo.same_name_next = NULL;
+       hfri.hfinfo.same_name_prev = NULL;
+       
+       g_array_append_val(hf,hfri);
+}
+
+void add_xmlpi_namespace(gpointer k _U_, gpointer v, gpointer p) {
+       xml_names_t* ns = v;
+       hf_register_info hfri;
+       gchar* basename = g_strdup_printf("%s.%s",(gchar*)p,ns->name);
+       gint* ett_p = &(ns->ett);
+       
+       hfri.p_id = &(ns->hf_tag);
+       hfri.hfinfo.name = basename;
+       hfri.hfinfo.abbrev = basename;
+       hfri.hfinfo.type = FT_STRING;
+       hfri.hfinfo.display = BASE_NONE;
+       hfri.hfinfo.strings = NULL;
+       hfri.hfinfo.bitmask = 0x0;
+       hfri.hfinfo.blurb = basename;
+       hfri.hfinfo.id = 0;
+       hfri.hfinfo.parent = 0;
+       hfri.hfinfo.ref_count = 0;
+       hfri.hfinfo.bitshift = 0;
+       hfri.hfinfo.same_name_next = NULL;
+       hfri.hfinfo.same_name_prev = NULL;
+
+       g_array_append_val(hf,hfri);
+       g_array_append_val(ett_arr,ett_p);
+       
+       g_hash_table_foreach(ns->attributes,add_xml_attribute_names,basename);
+
+}
+
+void init_xml_names(void) {
+       xml_names_t* xmlpi_xml_ns;
+
+       xmpli_names = g_hash_table_new(g_str_hash,g_str_equal);
+       media_types = g_hash_table_new(g_str_hash,g_str_equal);
+       
+       unknown_ns.elements = g_hash_table_new(g_str_hash,g_str_equal);
+       unknown_ns.attributes = g_hash_table_new(g_str_hash,g_str_equal);
+       
+       xmlpi_xml_ns = xml_new_namespace(xmpli_names,"xml","XML XMLPI","XML XMLPI",
+                                                                        "version","encoding","standalone",NULL);
+       
+       g_hash_table_destroy(xmlpi_xml_ns->elements);
+       xmlpi_xml_ns->elements = NULL;
+       
+       g_hash_table_foreach(xmpli_names,add_xmlpi_namespace,"xml.xmlpi");
+}
+
+
 void
 proto_register_xml(void) {
        
-       static gint *ett[] = {
-               &ett_i,
-               &ett_tag,
+       static gint *ett_base[] = {
+               &unknown_ns.ett,
+               &xml_ns.ett,
                &ett_dtd,
+               &ett_xmpli
        };
        
-       static hf_register_info hf[] = {
-               { &hf_cdata, {"CDATA", "xml.cdata", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
+       static hf_register_info hf_base[] = {
                { &hf_xmlpi, {"XMLPI", "xml.xmlpi", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
-               { &hf_entity, {"Entity", "xml.entity", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
-               { &hf_attrib, {"Attribute", "xml.attribute", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
                { &hf_comment, {"Comment", "xml.comment", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
-               { &hf_tag, {"Tag", "xml.tag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
+               { &hf_unknowwn_attrib, {"Attribute", "xml.attribute", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
                { &hf_doctype, {"Doctype", "xml.doctype", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
                { &hf_dtd_tag, {"DTD Tag", "xml.dtdtag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
-               { &hf_what, {"Unknown", "xml.unknown", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}
+               { &unknown_ns.hf_cdata, {"CDATA", "xml.cdata", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
+               { &unknown_ns.hf_tag, {"Tag", "xml.tag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
+               { &hf_junk, {"Unknown", "xml.unknown", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}
        };
+
+       hf = g_array_new(FALSE,FALSE,sizeof(hf_register_info));
+       ett_arr = g_array_new(FALSE,FALSE,sizeof(gint*));
+
+       g_array_append_vals(hf,hf_base,array_length(hf_base));
+       g_array_append_vals(ett_arr,ett_base,array_length(ett_base));
        
-       proto_xml = proto_register_protocol("eXtensible Markup Language",
-                                                                               "XML",
-                                                                               "xml");
-       
-       proto_register_field_array(proto_xml, hf, array_length(hf));
-       proto_register_subtree_array(ett, array_length(ett));
+       init_xml_names();
+
+       xml_ns.hf_tag = proto_register_protocol(xml_ns.blurb, xml_ns.longname, xml_ns.name);
+
+       proto_register_field_array(xml_ns.hf_tag, (hf_register_info*)hf->data, hf->len);
+       proto_register_subtree_array((gint**)ett_arr->data, ett_arr->len);
        
-       register_dissector("xml", dissect_xml, proto_xml);
+       register_dissector("xml", dissect_xml, xml_ns.hf_tag);
        
        init_xml_parser();
 }
diff --git a/epan/dtd.h b/epan/dtd.h
new file mode 100644 (file)
index 0000000..1db10fd
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ *  dtd.h
+ *
+ * XML dissector for ethereal 
+ * DTD import declarations
+ *
+ * Copyright 2005, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
+ *
+ * $Id $
+ *
+ * Ethereal - Network traffic analyzer
+ * By Gerald Combs <gerald@ethereal.com>
+ * Copyright 1998 Gerald Combs
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef _DTD_H_
+#define _DTD_H_
+
+#include <glib.h>
+
+typedef struct _dtd_build_data_t {
+       gchar* proto_name;
+       gchar* media_type;
+       gchar* description;
+       gchar* proto_root;
+
+       GPtrArray* elements;
+       GPtrArray* attributes;
+
+       gchar* location;
+       GString* error;
+} dtd_build_data_t;
+
+typedef struct _dtd_token_data_t {
+       gchar* text;
+       gchar* location;
+} dtd_token_data_t;
+
+typedef struct _dtd_named_list_t {
+       gchar* name;
+       GPtrArray* list;
+} dtd_named_list_t;
+
+extern GString* dtd_preparse(gchar* dname, gchar* fname, GString* err);
+extern dtd_build_data_t* dtd_parse(GString* s);
+
+#endif
diff --git a/epan/dtd_grammar.lemon b/epan/dtd_grammar.lemon
new file mode 100644 (file)
index 0000000..fc98472
--- /dev/null
@@ -0,0 +1,151 @@
+%include {
+
+/* dtd_parser.lemon
+* XML dissector for ethereal 
+* XML's DTD grammar
+*
+* Copyright 2005, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
+*
+* $Id $
+*
+* Ethereal - Network traffic analyzer
+* By Gerald Combs <gerald@ethereal.com>
+* Copyright 1998 Gerald Combs
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2
+* of the License, or (at your option) any later version.
+* 
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+* 
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <glib.h>
+#include "dtd.h"
+
+
+static dtd_named_list_t* dtd_named_list_new(gchar* name, GPtrArray* list) {
+       dtd_named_list_t* nl = g_malloc(sizeof(dtd_named_list_t));
+
+       nl->name = name;
+       nl->list = list;
+       
+       return nl;
+}
+
+static GPtrArray* g_ptr_array_join(GPtrArray* a, GPtrArray* b){
+       
+       while(b->len > 0) {
+               g_ptr_array_add(a,g_ptr_array_remove_index_fast(b,0));
+       }
+       
+       g_ptr_array_free(b,FALSE);
+
+       return a;
+}
+
+}
+
+%name DtdParse
+
+%extra_argument { dtd_build_data_t *bd }
+
+%token_destructor { 
+       if ($$) {
+               if ($$->text) g_free($$->text);
+               if ($$->location) g_free($$->location);
+               g_free($$);
+       }
+}
+
+%syntax_error {
+       if (!TOKEN)
+               g_string_sprintfa(bd->error,"syntax error at end of file");
+       else 
+               g_string_sprintfa(bd->error,"syntax error in %s at or before '%s': \n", bd->location,TOKEN->text);
+}
+
+%parse_failure {
+       g_string_sprintfa(bd->error,"DTD parsing failure at %s\n",bd->location);
+}
+
+%token_prefix TOKEN_
+
+%token_type { dtd_token_data_t* }
+
+dtd ::= doctype.
+dtd ::= dtd_parts.
+
+doctype ::= TAG_START DOCTYPE_KW NAME(Name) OPEN_BRACKET dtd_parts CLOSE_BRACKET TAG_STOP. {
+       bd->proto_name = g_strdown(g_strdup(Name->text));
+}
+
+dtd_parts ::= dtd_parts element(Element). { g_ptr_array_add(bd->elements,Element); }
+dtd_parts ::= dtd_parts attlist(Attlist). { g_ptr_array_add(bd->attributes,Attlist); }
+dtd_parts ::= element(Element). { g_ptr_array_add(bd->elements,Element); }
+dtd_parts ::= attlist(Attlist). { g_ptr_array_add(bd->attributes,Attlist); }
+
+%type   attlist                                { dtd_named_list_t* }
+attlist(A) ::= TAG_START ATTLIST_KW NAME(B) attrib_list(TheList) TAG_STOP. { A = dtd_named_list_new(B->text,TheList); }
+
+%type element { dtd_named_list_t* }
+element(A) ::= TAG_START ELEMENT_KW NAME(B) sub_elements(C) TAG_STOP. { A = dtd_named_list_new(B->text,C); }
+
+%type   attrib_list                    { GPtrArray* }
+attrib_list(A) ::= attrib_list(B) attrib(C). { g_ptr_array_add(B,C); A = B; }
+attrib_list(A) ::= attrib(B).  { A = g_ptr_array_new(); g_ptr_array_add(A,B);  }
+
+%type   attrib                         { gchar* }
+attrib(A) ::= NAME(B) att_type att_default. { A = g_strdown(g_strdup(B->text)); }
+
+att_type ::= ATT_TYPE.
+att_type ::= enumeration.
+
+att_default ::= ATT_DEF.
+att_default ::= ATT_DEF_WITH_VALUE QUOTED. 
+att_default ::= QUOTED.
+att_default ::= IMPLIED_KW.
+att_default ::= REQUIRED_KW.
+
+enumeration ::= OPEN_PARENS enum_list CLOSE_PARENS.
+
+enum_list ::= enum_list PIPE enum_item.
+enum_list ::= enum_item.
+enum_list ::= enumeration.
+enum_list ::= enum_list PIPE enumeration.
+
+enum_item ::= NAME.
+enum_item ::= QUOTED.
+
+
+%type   sub_elements           { GPtrArray* }
+sub_elements(A) ::= sub_elements(B) STAR. {A=B;}
+sub_elements(A) ::= sub_elements(B) PLUS. {A=B;}
+sub_elements(A) ::= sub_elements(B) QUESTION. {A=B;}
+sub_elements(A) ::= OPEN_PARENS ELEM_DATA CLOSE_PARENS. { A = g_ptr_array_new(); }
+sub_elements(A) ::= OPEN_PARENS element_list(B) COMMA ELEM_DATA CLOSE_PARENS.  { A = B; }
+sub_elements(A) ::= OPEN_PARENS element_list(B) PIPE ELEM_DATA CLOSE_PARENS.   { A = B; }
+sub_elements(A) ::= OPEN_PARENS element_list(B) CLOSE_PARENS. { A = B; }
+sub_elements(A) ::= EMPTY_KW. { A = g_ptr_array_new(); }
+
+%type   element_list   { GPtrArray* }
+element_list(A)        ::= element_list(B) COMMA element_child(C).     { g_ptr_array_add(B,C); A = B; }
+element_list(A)        ::= element_list(B) PIPE element_child(C).      { g_ptr_array_add(B,C); A = B; }
+element_list(A)        ::= element_child(B).                                           { A = g_ptr_array_new(); g_ptr_array_add(A,B); }
+element_list(A) ::= sub_elements(B).                                           { A = B; }
+element_list(A) ::= element_list(B) COMMA sub_elements(C).   { A = g_ptr_array_join(B,C); }
+element_list(A) ::= element_list(B) PIPE sub_elements(C).   { A = g_ptr_array_join(B,C); }
+
+%type   element_child          { gchar* }
+element_child(A) ::= NAME(B).                  { A = g_strdown(g_strdup(B->text)); }
+element_child(A) ::= NAME(B) STAR.             { A = g_strdown(g_strdup(B->text)); }
+element_child(A) ::= NAME(B) QUESTION. { A = g_strdown(g_strdup(B->text)); }
+element_child(A) ::= NAME(B) PLUS.             { A = g_strdown(g_strdup(B->text)); }
+
diff --git a/epan/dtd_parse.l b/epan/dtd_parse.l
new file mode 100644 (file)
index 0000000..8c33ee2
--- /dev/null
@@ -0,0 +1,316 @@
+%option noyywrap
+%option nounput 
+%option outfile="dtd_parse.c"
+%option prefix="Dtd_Parse_"
+%option never-interactive
+
+%{
+
+       /* dtd_lexer.l
+       * an XML dissector for ethereal 
+       * lexical analyzer for DTDs
+       *
+       * Copyright 2004, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
+       *
+       * $Id$
+       *
+       * Ethereal - Network traffic analyzer
+       * By Gerald Combs <gerald@ethereal.com>
+       * Copyright 1998 Gerald Combs
+       *
+       * This program is free software; you can redistribute it and/or
+       * modify it under the terms of the GNU General Public License
+       * as published by the Free Software Foundation; either version 2
+       * of the License, or (at your option) any later version.
+       * 
+       * This program is distributed in the hope that it will be useful,
+       * but WITHOUT ANY WARRANTY; without even the implied warranty of
+       * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+       * GNU General Public License for more details.
+       * 
+       * You should have received a copy of the GNU General Public License
+       * along with this program; if not, write to the Free Software
+       * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+       */
+       
+#include <glib.h>
+#include <string.h>
+       
+#include "dtd.h"
+#include "dtd_grammar.h"
+       
+       struct _proto_xmlpi_attr {
+               gchar* name;
+               void (*act)(gchar*);
+       };
+
+       void DtdParse(void*,int,dtd_token_data_t*,dtd_build_data_t*);
+       void *DtdParseAlloc(void *(*)(gulong));
+       void DtdParseFree( void*, void(*)(void*) );
+       void DtdParseTrace(FILE *TraceFILE, char *zTracePrompt);        
+       void* pParser;
+       GString* input_string;  
+       guint offset;
+       guint len;
+       gchar* location;
+       gchar* attr_name;
+       
+       static int my_yyinput(char* buff,guint size);
+       
+       static dtd_token_data_t* new_token(gchar*);
+
+       static dtd_build_data_t* build_data;
+       
+       static void set_proto_name (gchar* val) { if(build_data->proto_name) g_free(build_data->proto_name); build_data->proto_name = g_strdup(val); }
+       static void set_media_type (gchar* val) { if(build_data->media_type) g_free(build_data->media_type); build_data->media_type = g_strdup(val); }
+       static void set_proto_root (gchar* val) { if(build_data->proto_root) g_free(build_data->proto_root); build_data->proto_root = g_strdup(val); }
+       static void set_description (gchar* val) { if(build_data->description) g_free(build_data->description); build_data->description = g_strdup(val); }
+
+       struct _proto_xmlpi_attr proto_attrs[] =
+       {
+               { "name", set_proto_name },
+               { "media", set_media_type },
+               { "root", set_proto_root },
+               { "description", set_description },
+               {NULL,NULL}
+       };
+       
+#define DTD_PARSE(token_type) \
+       { build_data->location = location; \
+               DtdParse(pParser, (token_type), new_token(yytext), build_data); \
+               if(build_data->error->len > 0) yyterminate(); \
+       }
+
+#define YY_INPUT(buff,result,max_size) ( (result) = my_yyinput((buff),(max_size)) )
+
+%}
+
+start_xmlpi "<?"
+
+location_xmlpi "ethereal:location"
+protocol_xmlpi "ethereal:protocol"
+
+get_attr_quote =[:blank:]*["]
+avoid_editor_bug ["]
+
+get_location_xmlpi  [^[:blank:]]+
+
+stop_xmlpi "?>"
+
+special_start  "<!"
+special_stop   ">"
+whitespace     [[:blank:]\r\n]+
+newline        \n
+attlist_kw     ATTLIST
+doctype_kw     DOCTYPE
+element_kw     ELEMENT
+
+pcdata         #PCDATA
+any            ANY
+cdata          #CDATA
+
+iD             ID
+idref          IDREF
+idrefs         IDREFS
+nmtoken        NMTOKEN
+nmtokens       NMTOKENS
+entity         ENTITY
+entities       ENTITIES
+notation       NOTATION
+cdata_t        CDATA
+
+empty          EMPTY
+defaulT        #DEFAULT
+fixed          #FIXED
+required       #REQUIRED
+implied        #IMPLIED
+
+star           "*"
+question       "?"
+plus           "+"
+open_parens    "("
+close_parens   ")"
+open_bracket   "["
+close_bracket  "]"
+comma          ","
+pipe           "|"
+dquote         ["]
+
+name           [a-z][-a-z0-9_]*
+dquoted        ["][^\"]*["]
+squoted        ['][^\']*[']
+
+%START DTD XMLPI LOCATION DONE PROTOCOL GET_ATTR_QUOTE GET_ATTR_VAL GET_ATTR_CLOSE_QUOTE
+%%
+
+{whitespace}            ;
+
+<DTD>{start_xmlpi}             {
+       BEGIN XMLPI;
+}
+
+<XMLPI>{location_xmlpi} {
+       if(location) g_free(location);
+       BEGIN LOCATION;
+}
+
+<XMLPI>{protocol_xmlpi} {
+       BEGIN PROTOCOL;
+}
+
+<XMLPI><.> ;
+<XMLPI>{stop_xmlpi} BEGIN DTD;
+
+<LOCATION>{get_location_xmlpi} {
+       location = g_strdup(yytext);
+       BEGIN DONE;
+}
+
+<DONE>{stop_xmlpi}  BEGIN DTD;
+
+<PROTOCOL>{name} {
+       attr_name = g_strdup(yytext);
+       BEGIN GET_ATTR_QUOTE;
+}
+
+<GET_ATTR_QUOTE>{get_attr_quote} { BEGIN GET_ATTR_VAL; }
+
+<GET_ATTR_QUOTE>. {
+       g_string_sprintfa(build_data->error,
+                                       "error in ethereal:protocol xmpli at %s : could not find attribute value!",
+                                       location);
+       yyterminate();
+}
+
+<GET_ATTR_VAL>[^"]+ {
+       /*"*/
+       struct _proto_xmlpi_attr* pa;
+       gboolean got_it = FALSE;
+       
+       for(pa = proto_attrs; pa->name; pa++) {
+               if (g_strcasecmp(attr_name,pa->name) == 0) {
+                       pa->act(yytext);
+                       got_it = TRUE;
+                       break;
+               }
+       }
+       
+       if (! got_it) {
+               g_string_sprintfa(build_data->error,
+                                               "error in ethereal:protocol xmpli at %s : no such parameter %s!",
+                                               location, attr_name);
+               g_free(attr_name);
+               yyterminate();
+       }
+       
+       g_free(attr_name);
+               
+       BEGIN GET_ATTR_CLOSE_QUOTE;
+}
+
+<GET_ATTR_CLOSE_QUOTE>{dquote} { BEGIN PROTOCOL;}
+
+<PROTOCOL>{stop_xmlpi} BEGIN DTD;
+
+<DTD>{special_start}         { DTD_PARSE(TOKEN_TAG_START); }
+<DTD>{special_stop}          { DTD_PARSE(TOKEN_TAG_STOP); }
+
+<DTD>{attlist_kw}            { DTD_PARSE(TOKEN_ATTLIST_KW); }
+<DTD>{element_kw}            { DTD_PARSE(TOKEN_ELEMENT_KW); }
+<DTD>{doctype_kw}            { DTD_PARSE(TOKEN_DOCTYPE_KW); }
+
+<DTD>{pcdata}                { DTD_PARSE(TOKEN_ELEM_DATA); } 
+<DTD>{any}                   { DTD_PARSE(TOKEN_ELEM_DATA); }
+<DTD>{cdata}                 { DTD_PARSE(TOKEN_ELEM_DATA); }
+<DTD>{empty}                            { DTD_PARSE(TOKEN_EMPTY_KW); }
+
+<DTD>{iD}                               { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{idref}                 { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{idrefs}                { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{nmtoken}               { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{nmtokens}              { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{entity}                { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{entities}              { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{notation}              { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{cdata_t}               { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{defaulT}               { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
+<DTD>{fixed}                 { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
+<DTD>{required}              { DTD_PARSE(TOKEN_ATT_DEF); }
+<DTD>{implied}               { DTD_PARSE(TOKEN_ATT_DEF); }
+
+<DTD>{star}                  { DTD_PARSE(TOKEN_STAR); }
+<DTD>{question}              { DTD_PARSE(TOKEN_QUESTION); }
+<DTD>{plus}                  { DTD_PARSE(TOKEN_PLUS); }
+<DTD>{comma}                  { DTD_PARSE(TOKEN_COMMA); }
+<DTD>{open_parens}           { DTD_PARSE(TOKEN_OPEN_PARENS); }
+<DTD>{close_parens}          { DTD_PARSE(TOKEN_CLOSE_PARENS); }
+<DTD>{open_bracket}          { DTD_PARSE(TOKEN_OPEN_BRACKET); }
+<DTD>{close_bracket}         { DTD_PARSE(TOKEN_CLOSE_BRACKET); }
+<DTD>{pipe}                  { DTD_PARSE(TOKEN_PIPE); }
+
+<DTD>{dquoted}               |
+<DTD>{squoted}               { DTD_PARSE(TOKEN_QUOTED); }
+<DTD>{name}                  { DTD_PARSE(TOKEN_NAME); }
+
+%%
+
+static dtd_token_data_t* new_token(gchar* text) {
+       dtd_token_data_t* t = g_malloc(sizeof(dtd_token_data_t));
+       
+       t->text = g_strdup(text);
+       t->location = g_strdup(location);
+       
+       return t;
+}
+
+
+
+static int my_yyinput(char* buff, guint size) {
+
+       if (offset >= len ) {
+               return YY_NULL;
+       } else if ( offset + size <= len ) {
+               memcpy(buff, input_string->str + offset,size);
+               offset += size;
+               return size;
+       } else {
+               size = len - offset;
+               memcpy(buff, input_string->str + offset,size);
+               offset = len;
+               return size;
+       }
+}
+
+extern dtd_build_data_t* dtd_parse(GString* s) {
+
+       input_string = s;
+       offset = 0;
+       len = input_string->len;
+       
+       pParser = DtdParseAlloc(g_malloc);
+       
+       build_data = g_malloc(sizeof(dtd_build_data_t));
+
+       build_data->proto_name = NULL;
+       build_data->media_type = NULL;
+       build_data->description = NULL;
+       build_data->proto_root = NULL;
+       
+       build_data->elements = g_ptr_array_new();
+       build_data->attributes = g_ptr_array_new();
+
+       build_data->location = NULL;
+       build_data->error = g_string_new("");
+       
+       BEGIN DTD;
+       
+       yylex();
+
+       DtdParse(pParser, 0, NULL,build_data);
+
+       yyrestart(NULL);
+       
+       DtdParseFree(pParser, g_free );
+       
+       return build_data;
+}
diff --git a/epan/dtd_preparse.l b/epan/dtd_preparse.l
new file mode 100644 (file)
index 0000000..101a916
--- /dev/null
@@ -0,0 +1,258 @@
+%option noyywrap
+%option nounput
+%option prefix="Dtd_PreParse_"
+%option never-interactive
+%option caseless
+%option outfile="dtd_preparse.c"
+
+%{
+       /*
+        * dtd_preparser.l
+        *
+        * an XML dissector for ethereal 
+        *
+        * DTD Preparser -  import a dtd file into a GString
+        *                                      including files, removing comments
+        *                  and resolving %entities;
+        * 
+        * Copyright 2004, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
+        *
+        * $Id$
+        *
+        * Ethereal - Network traffic analyzer
+        * By Gerald Combs <gerald@ethereal.com>
+        * Copyright 1998 Gerald Combs
+        *
+        * This program is free software; you can redistribute it and/or
+        * modify it under the terms of the GNU General Public License
+        * as published by the Free Software Foundation; either version 2
+        * of the License, or (at your option) any later version.
+        * 
+        * This program is distributed in the hope that it will be useful,
+        * but WITHOUT ANY WARRANTY; without even the implied warranty of
+        * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+        * GNU General Public License for more details.
+        * 
+        * You should have received a copy of the GNU General Public License
+        * along with this program; if not, write to the Free Software
+        * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+        */
+                       
+#include <glib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include "dtd.h"
+
+#define MAX_INCLUDE_DEPTH 10
+YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
+int include_stack_ptr = 0;
+
+#define ECHO g_string_append(current,yytext);
+
+GString* current;
+GString* output;
+GHashTable* entities;
+gchar* entity_name;
+GString* error;
+
+gchar* dirname;
+gchar* filename;
+guint linenum;
+
+static gchar* replace_entity(gchar* s);
+static const gchar* location(void);
+static gchar* load_entity_file(gchar* filename);
+               /* [:blank:]+file[:blank:]*=[:blank:]*["] */
+
+%}
+xmlpi_start "<?"
+xmlpi_stop  "?>"
+xmlpi_chars .
+
+comment_start "<!--"
+comment_stop "-->"
+special_start "<!"
+special_stop ">"
+
+entity_start     "<!"[[:blank:]\n]*entity[[:blank:]\n]*"%"
+system     SYSTEM
+filename   [^"]+
+
+
+name [A-Za-z][-:A-Za-z0-9_]*
+
+quote "\""
+percent [%]
+escaped_quote "\\\""
+non_quote [^"%]+
+
+avoid_editor_bug ["]
+
+entity        [%&][A-Za-z][-A-Za-z0-9_]*;
+
+whitespace [[blank:]]+
+newline    \n
+%START OUTSIDE IN_COMMENT IN_ENTITY NAMED_ENTITY IN_QUOTE ENTITY_DONE GET_FNAME_OPEN_QUOTE GET_FNAME GET_FNAME_CLOSE_QUOTE XMLPI
+%%
+
+
+{entity}                                               if (current) g_string_sprintfa(current,"%s\n%s\n",replace_entity(yytext),location());
+
+{whitespace}                                   if (current) g_string_append(current," ");
+
+<OUTSIDE>{xmlpi_start}                 { g_string_append(current,yytext); BEGIN XMLPI; }
+<XMLPI>{xmlpi_chars}                   { g_string_append(current,yytext); }
+<XMLPI>{newline}                               { g_string_append(current,yytext); }
+<XMLPI>{xmlpi_stop}                            { g_string_append(current,yytext); BEGIN OUTSIDE; }
+
+<OUTSIDE>{comment_start}               { current = NULL; BEGIN IN_COMMENT; }
+<IN_COMMENT>[^-]?                              |
+<IN_COMMENT>[-]                                        ;
+<IN_COMMENT>{comment_stop}             { current = output; BEGIN OUTSIDE; }
+       
+{newline}                                              {
+       linenum++;
+       if (current) g_string_sprintfa(current,"%s\n",location());
+}
+
+
+<OUTSIDE>{entity_start}                        { BEGIN IN_ENTITY; }
+<IN_ENTITY>{name}                              { entity_name = g_strdup_printf("%%%s;",yytext); BEGIN NAMED_ENTITY; }
+<NAMED_ENTITY>{quote}                  { current = g_string_new(location()); BEGIN IN_QUOTE; }
+<IN_QUOTE>{quote}                              { g_hash_table_insert(entities,entity_name,current);  BEGIN ENTITY_DONE; }
+<IN_QUOTE>{percent}                            |
+<IN_QUOTE>{non_quote}                  |
+<IN_QUOTE>{escaped_quote}              g_string_append(current,yytext);
+<NAMED_ENTITY>{system}                 { BEGIN GET_FNAME_OPEN_QUOTE; }
+<GET_FNAME_OPEN_QUOTE>{quote}  { BEGIN GET_FNAME; }
+<GET_FNAME>{filename}              {  g_hash_table_insert(entities,entity_name,load_entity_file(yytext));  BEGIN GET_FNAME_CLOSE_QUOTE; }
+<GET_FNAME_CLOSE_QUOTE>{quote}  { BEGIN ENTITY_DONE; }
+<ENTITY_DONE>{special_stop}            { current = output; g_string_append(current,"\n"); BEGIN OUTSIDE; }
+
+%%
+
+static gchar* load_entity_file(gchar* fname) {
+       gchar* fullname = g_strdup_printf("%s%s",dirname,fname);
+       gchar* save_filename = filename;
+       guint save_linenum = linenum; 
+       FILE* fp = fopen(fullname,"r");
+       GString* filetext;
+       gchar* retstr;
+       gchar* line;
+       size_t linelen;
+       
+       g_free(fullname);
+       
+       if (!fp) {
+               g_string_sprintfa(error,"at %s:%u: could not load file %s: %s", filename, linenum, fname, strerror(errno));
+               return "";
+       }
+       
+       filename = fname;
+       linenum = 1;
+       
+       filetext = g_string_new(location());
+       
+       while(( line = fgetln(fp,&linelen) )) {
+               g_string_append(filetext,location());
+               g_string_append_len(filetext,line,linelen);
+               linenum++;
+       }
+
+       retstr = filetext->str;
+       g_string_free(filetext,FALSE);
+
+       if ( ferror(fp) ) {
+               g_string_sprintfa(error,"at %s:%u: problem reading file %s: %s", filename, linenum, fname, strerror(errno));
+       }
+
+       filename = save_filename;
+       save_linenum = linenum;
+       
+       return retstr;
+}
+
+static gchar* replace_entity(gchar* entity) {
+       GString* replacement;
+       
+       *entity = '%';
+       
+       replacement = g_hash_table_lookup(entities,entity);
+       
+       if (replacement) {
+               return replacement->str;
+       } else {
+               g_string_sprintfa(error,"dtd_preparse: in file '%s': %s does not exists\n", filename, entity);
+               return "";
+       }
+       
+}
+
+static const gchar* location(void) {
+       static GString* loc = NULL;
+       guint i = include_stack_ptr + 1;
+       
+       if (loc) {
+               g_string_truncate(loc,0);
+       } else {
+               loc = g_string_new("");
+       }
+
+       g_string_sprintfa(loc,"<? ethereal:location ");
+
+       while (i--) {
+                       g_string_sprintfa(loc, "%s:%u from",
+                                                         filename,
+                                                         linenum);                     
+       }
+
+       g_string_truncate(loc,(loc->len) - 4);
+       
+       g_string_sprintfa(loc,"?>");
+       
+       return loc->str;
+}
+
+static gboolean free_gstring_hash_items(gpointer k,gpointer v,gpointer p _U_) {
+       g_free(k);
+       g_string_free(v,TRUE);
+       return TRUE;
+}
+
+extern GString* dtd_preparse(gchar* dname, gchar* fname, GString* err) {
+       gchar* fullname = g_strdup_printf("%s%s",dname,fname);
+
+       dirname = dname;
+       filename = fname;
+
+       yyin = fopen(fullname,"r");
+       
+       g_free(fullname);
+       
+       if (!yyin) {
+               if (err)
+                       g_string_sprintfa(err, "Could not open file: '%s', error: %s",filename,strerror(errno));
+                       
+               return NULL;
+       }
+       
+       filename = filename;
+       linenum = 1;
+       
+       error = err;
+       
+       entities = g_hash_table_new(g_str_hash,g_str_equal);
+       current = output = g_string_new(location());
+       
+       BEGIN OUTSIDE;
+
+       yylex();
+               
+       yyrestart(NULL);
+
+       g_hash_table_foreach_remove(entities,free_gstring_hash_items,NULL);
+       g_hash_table_destroy(entities);
+
+       return output;
+}