2 * wireshark's xml dissector .
4 * (C) 2005, Luis E. Garcia Ontanon.
8 * Refer to the AUTHORS file or the AUTHORS section in the man page
9 * for contacting the author(s) of this file.
11 * Wireshark - Network traffic analyzer
12 * By Gerald Combs <gerald@wireshark.org>
13 * Copyright 1998 Gerald Combs
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
47 #include <epan/emem.h>
48 #include <epan/packet.h>
49 #include <epan/strutil.h>
50 #include <epan/tvbparse.h>
52 #include <epan/report_err.h>
53 #include <epan/filesystem.h>
54 #include <epan/prefs.h>
55 #include <epan/garrayfix.h>
58 typedef struct _xml_ns_t {
59 /* the name of this namespace */
62 /* its fully qualified name */
65 /* the contents of the whole element from <> to </> */
68 /* chunks of cdata from <> to </> excluding sub tags */
71 /* the subtree for its sub items */
74 GHashTable* attributes;
75 /* key: the attribute name
76 value: hf_id of what's between quotes */
78 /* the namespace's namespaces */
80 /* key: the element name
81 value: the child namespace */
83 GPtrArray* element_names;
84 /* imported directly from the parser and used while building the namespace */
91 proto_item* last_item;
96 struct _attr_reg_data {
102 static gint ett_dtd = -1;
103 static gint ett_xmpli = -1;
105 static int hf_unknowwn_attrib = -1;
106 static int hf_comment = -1;
107 static int hf_xmlpi = -1;
108 static int hf_dtd_tag = -1;
109 static int hf_doctype = -1;
111 /* dissector handles */
112 static dissector_handle_t xml_handle;
114 /* parser definitions */
115 static tvbparse_wanted_t* want;
116 static tvbparse_wanted_t* want_ignore;
117 static tvbparse_wanted_t* want_heur;
119 static GHashTable* xmpli_names;
120 static GHashTable* media_types;
122 static xml_ns_t xml_ns = {"xml","/",-1,-1,-1,NULL,NULL,NULL};
123 static xml_ns_t unknown_ns = {"unknown","?",-1,-1,-1,NULL,NULL,NULL};
124 static xml_ns_t* root_ns;
126 static gboolean pref_heuristic_media = FALSE;
127 static gboolean pref_heuristic_tcp = FALSE;
128 static gboolean pref_heuristic_udp = FALSE;
129 static gboolean pref_heuristic_media_save = FALSE;
130 static gboolean pref_heuristic_tcp_save = FALSE;
131 static gboolean pref_heuristic_udp_save = FALSE;
132 static range_t *global_xml_tcp_range = NULL;
133 static range_t *xml_tcp_range = NULL;
135 #define XML_CDATA -1000
136 #define XML_SCOPED_NAME -1001
142 static const gchar* default_media_types[] = {
149 "application/auth-policy+xml",
150 "application/cpim-pidf+xml",
151 "application/cpl+xml",
152 "application/mathml+xml",
153 "application/media_control+xml",
154 "application/note+xml",
155 "application/pidf+xml",
156 "application/poc-settings+xml",
157 "application/rdf+xml",
158 "application/reginfo+xml",
159 "application/resource-lists+xml",
160 "application/rlmi+xml",
161 "application/rls-services+xml",
163 "application/simple-filter+xml",
164 "application/soap+xml",
165 "application/vnd.wv.csp+xml",
166 "application/vnd.wv.csp.xml",
167 "application/watcherinfo+xml",
168 "application/xcap-att+xml",
169 "application/xcap-caps+xml",
170 "application/xcap-diff+xml",
171 "application/xcap-el+xml",
172 "application/xcap-error+xml",
173 "application/xcap-ns+xml",
175 "application/xml-dtd",
176 "application/xpidf+xml",
177 "application/xslt+xml",
178 "application/x-wms-logconnectstats",
179 "application/x-wms-logplaystats",
180 "application/x-wms-sendevent",
181 "application/rss+xml",
186 dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
189 tvbparse_elem_t* tok = NULL;
190 static GPtrArray* stack = NULL;
191 xml_frame_t* current_frame;
197 g_ptr_array_free(stack,TRUE);
199 stack = g_ptr_array_new();
200 current_frame = ep_alloc(sizeof(xml_frame_t));
201 g_ptr_array_add(stack,current_frame);
203 tt = tvbparse_init(tvb,0,-1,stack,want_ignore);
204 current_frame->start_offset = 0;
208 if (pinfo->match_string)
209 root_ns = g_hash_table_lookup(media_types,pinfo->match_string);
213 colinfo_str = "/XML";
215 colinfo_str = ep_strdup_printf("/%s",root_ns->name);
216 g_strup(colinfo_str);
219 if (check_col(pinfo->cinfo, COL_PROTOCOL))
220 col_append_str(pinfo->cinfo, COL_PROTOCOL, colinfo_str);
222 current_frame->ns = root_ns;
224 current_frame->item = proto_tree_add_item(tree,current_frame->ns->hf_tag,tvb,0,-1,FALSE);
225 current_frame->tree = proto_item_add_subtree(current_frame->item,current_frame->ns->ett);
226 current_frame->last_item = current_frame->item;
228 while(( tok = tvbparse_get(tt, want) )) ;
231 static gboolean dissect_xml_heur(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) {
232 if ( (pref_heuristic_media || pref_heuristic_tcp || pref_heuristic_udp)
233 && tvbparse_peek(tvbparse_init(tvb,0,-1,NULL,want_ignore), want_heur)) {
234 dissect_xml(tvb, pinfo, tree);
241 static void after_token(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
242 GPtrArray* stack = tvbparse_data;
243 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
247 if (tok->id == XML_CDATA) {
248 hfid = current_frame->ns ? current_frame->ns->hf_cdata : xml_ns.hf_cdata;
249 } else if ( tok->id > 0) {
252 hfid = xml_ns.hf_cdata;
255 pi = proto_tree_add_item(current_frame->tree, hfid, tok->tvb, tok->offset, tok->len, FALSE);
257 proto_item_set_text(pi, "%s",
258 tvb_format_text(tok->tvb,tok->offset,tok->len));
261 static void before_xmpli(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
262 GPtrArray* stack = tvbparse_data;
263 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
266 tvbparse_elem_t* name_tok = tok->sub->next;
267 gchar* name = (gchar*)tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len);
268 xml_ns_t* ns = g_hash_table_lookup(xmpli_names,name);
282 pi = proto_tree_add_item(current_frame->tree,hf_tag,tok->tvb,tok->offset,tok->len,FALSE);
284 proto_item_set_text(pi, "%s", tvb_format_text(tok->tvb,tok->offset,(name_tok->offset - tok->offset) + name_tok->len));
286 pt = proto_item_add_subtree(pi,ett);
288 current_frame = ep_alloc(sizeof(xml_frame_t));
289 current_frame->item = pi;
290 current_frame->last_item = pi;
291 current_frame->tree = pt;
292 current_frame->start_offset = tok->offset;
293 current_frame->ns = ns;
295 g_ptr_array_add(stack,current_frame);
299 static void after_xmlpi(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
300 GPtrArray* stack = tvbparse_data;
301 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
303 proto_tree_add_text(current_frame->tree,
304 tok->tvb, tok->offset, tok->len,
305 tvb_format_text(tok->tvb,tok->offset,tok->len));
307 if (stack->len > 1) {
308 g_ptr_array_remove_index_fast(stack,stack->len - 1);
310 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened xmpli tag ]");
314 static void before_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
315 GPtrArray* stack = tvbparse_data;
316 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
317 tvbparse_elem_t* name_tok = tok->sub->next;
321 xml_frame_t* new_frame;
325 if (name_tok->sub->id == XML_SCOPED_NAME) {
326 tvbparse_elem_t* root_tok = name_tok->sub->sub;
327 tvbparse_elem_t* leaf_tok = name_tok->sub->sub->next->next;
328 xml_ns_t* nameroot_ns;
330 root_name = (gchar*)tvb_get_ephemeral_string(root_tok->tvb,root_tok->offset,root_tok->len);
331 name = (gchar*)tvb_get_ephemeral_string(leaf_tok->tvb,leaf_tok->offset,leaf_tok->len);
333 nameroot_ns = g_hash_table_lookup(xml_ns.elements,root_name);
336 ns = g_hash_table_lookup(nameroot_ns->elements,name);
345 name = (gchar*)tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len);
348 if(current_frame->ns) {
349 ns = g_hash_table_lookup(current_frame->ns->elements,name);
352 if (! ( ns = g_hash_table_lookup(root_ns->elements,name) ) ) {
361 pi = proto_tree_add_item(current_frame->tree,ns->hf_tag,tok->tvb,tok->offset,tok->len,FALSE);
362 proto_item_set_text(pi, "%s", tvb_format_text(tok->tvb,tok->offset,(name_tok->offset - tok->offset) + name_tok->len));
364 pt = proto_item_add_subtree(pi,ns->ett);
366 new_frame = ep_alloc(sizeof(xml_frame_t));
367 new_frame->item = pi;
368 new_frame->last_item = pi;
369 new_frame->tree = pt;
370 new_frame->start_offset = tok->offset;
373 g_ptr_array_add(stack,new_frame);
377 static void after_open_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok _U_) {
378 GPtrArray* stack = tvbparse_data;
379 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
381 proto_item_append_text(current_frame->last_item,">");
384 static void after_closed_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
385 GPtrArray* stack = tvbparse_data;
386 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
388 proto_item_append_text(current_frame->last_item,"/>");
390 if (stack->len > 1) {
391 g_ptr_array_remove_index_fast(stack,stack->len - 1);
393 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened tag ]");
397 static void after_untag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok){
398 GPtrArray* stack = tvbparse_data;
399 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
401 proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
403 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"%s",
404 tvb_format_text(tok->tvb,tok->offset,tok->len));
406 if (stack->len > 1) {
407 g_ptr_array_remove_index_fast(stack,stack->len - 1);
409 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,
410 "[ ERROR: Closing an unopened tag ]");
414 static void before_dtd_doctype(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok){
415 GPtrArray* stack = tvbparse_data;
416 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
417 tvbparse_elem_t* name_tok = tok->sub->next->next->next->sub->sub;
418 proto_tree* dtd_item = proto_tree_add_item(current_frame->tree, hf_doctype,
419 name_tok->tvb, name_tok->offset, name_tok->len, FALSE);
421 proto_item_set_text(dtd_item,"%s",tvb_format_text(tok->tvb,tok->offset,tok->len));
423 current_frame = ep_alloc(sizeof(xml_frame_t));
424 current_frame->item = dtd_item;
425 current_frame->last_item = dtd_item;
426 current_frame->tree = proto_item_add_subtree(dtd_item,ett_dtd);
427 current_frame->start_offset = tok->offset;
428 current_frame->ns = NULL;
430 g_ptr_array_add(stack,current_frame);
433 static void pop_stack(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok _U_) {
434 GPtrArray* stack = tvbparse_data;
435 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
437 if (stack->len > 1) {
438 g_ptr_array_remove_index_fast(stack,stack->len - 1);
440 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,
441 "[ ERROR: Closing an unopened tag ]");
445 static void after_dtd_close(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok){
446 GPtrArray* stack = tvbparse_data;
447 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
449 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"%s",
450 tvb_format_text(tok->tvb,tok->offset,tok->len));
451 if (stack->len > 1) {
452 g_ptr_array_remove_index_fast(stack,stack->len - 1);
454 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened tag ]");
458 static void get_attrib_value(void* tvbparse_data _U_, const void* wanted_data _U_, tvbparse_elem_t* tok) {
459 tok->data = tok->sub;
462 static void after_attrib(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
463 GPtrArray* stack = tvbparse_data;
464 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
465 gchar* name = (gchar*)tvb_get_ephemeral_string(tok->sub->tvb,tok->sub->offset,tok->sub->len);
466 tvbparse_elem_t* value = tok->sub->next->next->data;
471 if(current_frame->ns && (hfidp = g_hash_table_lookup(current_frame->ns->attributes,name) )) {
474 hfid = hf_unknowwn_attrib;
478 current_frame->last_item = proto_tree_add_item(current_frame->tree,hfid,value->tvb,value->offset,value->len,FALSE);
479 proto_item_set_text(current_frame->last_item, "%s", tvb_format_text(tok->tvb,tok->offset,tok->len));
483 static void unrecognized_token(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok _U_){
484 GPtrArray* stack = tvbparse_data;
485 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
487 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Unrecognized text ]");
493 static void init_xml_parser(void) {
494 tvbparse_wanted_t* want_name = tvbparse_chars(-1,1,0,"abcdefghijklmnopqrstuvwxyz-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",NULL,NULL,NULL);
495 tvbparse_wanted_t* want_attr_name = tvbparse_chars(-1,1,0,"abcdefghijklmnopqrstuvwxyz-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:",NULL,NULL,NULL);
497 tvbparse_wanted_t* want_scoped_name = tvbparse_set_seq(XML_SCOPED_NAME, NULL, NULL, NULL,
499 tvbparse_char(-1,":",NULL,NULL,NULL),
503 tvbparse_wanted_t* want_tag_name = tvbparse_set_oneof(0, NULL, NULL, NULL,
508 tvbparse_wanted_t* want_attrib_value = tvbparse_set_oneof(0, NULL, NULL, get_attrib_value,
509 tvbparse_quoted(-1, NULL, NULL, tvbparse_shrink_token_cb,'\"','\\'),
510 tvbparse_quoted(-1, NULL, NULL, tvbparse_shrink_token_cb,'\'','\\'),
511 tvbparse_chars(-1,1,0,"0123456789",NULL,NULL,NULL),
515 tvbparse_wanted_t* want_attributes = tvbparse_one_or_more(-1, NULL, NULL, NULL,
516 tvbparse_set_seq(-1, NULL, NULL, after_attrib,
518 tvbparse_char(-1,"=",NULL,NULL,NULL),
522 tvbparse_wanted_t* want_stoptag = tvbparse_set_oneof(-1,NULL,NULL,NULL,
523 tvbparse_char(-1, ">", NULL, NULL, after_open_tag),
524 tvbparse_string(-1, "/>", NULL, NULL, after_closed_tag),
527 tvbparse_wanted_t* want_stopxmlpi = tvbparse_string(-1,"?>",NULL,NULL,after_xmlpi);
529 tvbparse_wanted_t* want_comment = tvbparse_set_seq(hf_comment,NULL,NULL,after_token,
530 tvbparse_string(-1,"<!--",NULL,NULL,NULL),
531 tvbparse_until(-1,NULL,NULL,NULL,
532 tvbparse_string(-1,"-->",NULL,NULL,NULL),
536 tvbparse_wanted_t* want_xmlpi = tvbparse_set_seq(hf_xmlpi,NULL,before_xmpli,NULL,
537 tvbparse_string(-1,"<?",NULL,NULL,NULL),
539 tvbparse_set_oneof(-1,NULL,NULL,NULL,
541 tvbparse_set_seq(-1,NULL,NULL,NULL,
548 tvbparse_wanted_t* want_closing_tag = tvbparse_set_seq(0,NULL,NULL,after_untag,
549 tvbparse_char(-1, "<", NULL, NULL, NULL),
550 tvbparse_char(-1, "/", NULL, NULL, NULL),
552 tvbparse_char(-1, ">", NULL, NULL, NULL),
555 tvbparse_wanted_t* want_doctype_start = tvbparse_set_seq(-1,NULL,before_dtd_doctype,NULL,
556 tvbparse_char(-1,"<",NULL,NULL,NULL),
557 tvbparse_char(-1,"!",NULL,NULL,NULL),
558 tvbparse_casestring(-1,"DOCTYPE",NULL,NULL,NULL),
559 tvbparse_set_oneof(-1,NULL,NULL,NULL,
560 tvbparse_set_seq(-1,NULL,NULL,NULL,
562 tvbparse_char(-1,"[",NULL,NULL,NULL),
564 tvbparse_set_seq(-1,NULL,NULL,pop_stack,
566 tvbparse_set_oneof(-1,NULL,NULL,NULL,
567 tvbparse_casestring(-1,"PUBLIC",NULL,NULL,NULL),
568 tvbparse_casestring(-1,"SYSTEM",NULL,NULL,NULL),
570 tvbparse_until(-1,NULL,NULL,NULL,
571 tvbparse_char(-1,">",NULL,NULL,NULL),
577 tvbparse_wanted_t* want_dtd_tag = tvbparse_set_seq(hf_dtd_tag,NULL,NULL,after_token,
578 tvbparse_char(-1,"<",NULL,NULL,NULL),
579 tvbparse_char(-1,"!",NULL,NULL,NULL),
580 tvbparse_until(-1,NULL,NULL,NULL,
581 tvbparse_char(-1, ">", NULL, NULL, NULL),
585 tvbparse_wanted_t* want_tag = tvbparse_set_seq(-1, NULL, before_tag, NULL,
586 tvbparse_char(-1,"<",NULL,NULL,NULL),
588 tvbparse_set_oneof(-1,NULL,NULL,NULL,
589 tvbparse_set_seq(-1,NULL,NULL,NULL,
597 tvbparse_wanted_t* want_dtd_close = tvbparse_set_seq(-1,NULL,NULL,after_dtd_close,
598 tvbparse_char(-1,"]",NULL,NULL,NULL),
599 tvbparse_char(-1,">",NULL,NULL,NULL),
602 want_ignore = tvbparse_chars(-1,1,0," \t\r\n",NULL,NULL,NULL);
605 want = tvbparse_set_oneof(-1, NULL, NULL, NULL,
613 tvbparse_not_chars(XML_CDATA,1,0,"<",NULL,NULL,after_token),
614 tvbparse_not_chars(-1,1,0," \t\r\n",NULL,NULL,unrecognized_token),
617 want_heur = tvbparse_set_oneof(-1, NULL, NULL, NULL,
628 static xml_ns_t* xml_new_namespace(GHashTable* hash, gchar* name, ...) {
629 xml_ns_t* ns = g_malloc(sizeof(xml_ns_t));
633 ns->name = g_strdup(name);
637 ns->attributes = g_hash_table_new(g_str_hash,g_str_equal);
638 ns->elements = g_hash_table_new(g_str_hash,g_str_equal);
642 while(( attr_name = va_arg(ap,gchar*) )) {
643 int* hfp = g_malloc(sizeof(int));
645 g_hash_table_insert(ns->attributes,g_strdup(attr_name),hfp);
650 g_hash_table_insert(hash,ns->name,ns);
656 static void add_xml_field(GArray* hfs, int* p_id, gchar* name, gchar* fqn) {
657 hf_register_info hfri;
660 hfri.hfinfo.name = name;
661 hfri.hfinfo.abbrev = fqn;
662 hfri.hfinfo.type = FT_STRING;
663 hfri.hfinfo.display = BASE_NONE;
664 hfri.hfinfo.strings = NULL;
665 hfri.hfinfo.bitmask = 0x0;
666 hfri.hfinfo.blurb = "";
668 hfri.hfinfo.parent = 0;
669 hfri.hfinfo.ref_count = 0;
670 hfri.hfinfo.bitshift = 0;
671 hfri.hfinfo.same_name_next = NULL;
672 hfri.hfinfo.same_name_prev = NULL;
674 g_array_append_val(hfs,hfri);
677 static void add_xml_attribute_names(gpointer k, gpointer v, gpointer p) {
678 struct _attr_reg_data* d = p;
679 gchar* basename = g_strdup_printf("%s.%s",d->basename,(gchar*)k);
680 add_xml_field(d->hf, (int*) v, (gchar*)k, basename);
684 static void add_xmlpi_namespace(gpointer k _U_, gpointer v, gpointer p) {
686 gchar* basename = g_strdup_printf("%s.%s",(gchar*)p,ns->name);
687 gint* ett_p = &(ns->ett);
688 struct _attr_reg_data d;
690 add_xml_field(hf_arr, &(ns->hf_tag), basename, basename);
692 g_array_append_val(ett_arr,ett_p);
694 d.basename = basename;
697 g_hash_table_foreach(ns->attributes,add_xml_attribute_names,&d);
701 static void destroy_dtd_data(dtd_build_data_t* dtd_data) {
703 if(dtd_data->proto_name) g_free(dtd_data->proto_name);
704 if(dtd_data->media_type) g_free(dtd_data->media_type);
705 if(dtd_data->description) g_free(dtd_data->description);
706 if(dtd_data->proto_root) g_free(dtd_data->proto_root);
708 g_string_free(dtd_data->error,TRUE);
711 while(dtd_data->elements->len) {
712 dtd_named_list_t* nl = g_ptr_array_remove_index_fast(dtd_data->elements,0);
713 g_ptr_array_free(nl->list,TRUE);
717 g_ptr_array_free(dtd_data->elements,TRUE);
719 while(dtd_data->attributes->len) {
720 dtd_named_list_t* nl = g_ptr_array_remove_index_fast(dtd_data->attributes,0);
721 g_ptr_array_free(nl->list,TRUE);
725 g_ptr_array_free(dtd_data->attributes,TRUE);
732 static void copy_attrib_item(gpointer k, gpointer v _U_, gpointer p) {
733 gchar* key = g_strdup(k);
734 int* value = g_malloc(sizeof(int));
738 g_hash_table_insert(dst,key,value);
742 static GHashTable* copy_attributes_hash(GHashTable* src) {
743 GHashTable* dst = g_hash_table_new(g_str_hash,g_str_equal);
745 g_hash_table_foreach(src,copy_attrib_item,dst);
750 static xml_ns_t* duplicate_element(xml_ns_t* orig) {
751 xml_ns_t* new_item = g_malloc(sizeof(xml_ns_t));
754 new_item->name = g_strdup(orig->name);
755 new_item->hf_tag = -1;
756 new_item->hf_cdata = -1;
758 new_item->attributes = copy_attributes_hash(orig->attributes);
759 new_item->elements = g_hash_table_new(g_str_hash,g_str_equal);
760 new_item->element_names = g_ptr_array_new();
762 for(i=0; i < orig->element_names->len; i++) {
763 g_ptr_array_add(new_item->element_names,
764 g_ptr_array_index(orig->element_names,i));
770 static gchar* fully_qualified_name(GPtrArray* hier, gchar* name, gchar* proto_name) {
772 GString* s = g_string_new(proto_name);
774 g_string_append(s,".");
776 for (i = 1; i < hier->len; i++) {
777 g_string_append_printf(s, "%s.",(gchar*)g_ptr_array_index(hier,i));
780 g_string_append(s,name);
782 g_string_free(s,FALSE);
788 static xml_ns_t* make_xml_hier(gchar* elem_name,
790 GHashTable* elements,
800 struct _attr_reg_data d;
801 gboolean recurred = FALSE;
804 if ( g_str_equal(elem_name,root->name) ) {
808 if (! ( orig = g_hash_table_lookup(elements,elem_name) )) {
809 g_string_append_printf(error,"element '%s' is not defined\n", elem_name);
813 for (i = 0; i < hier->len; i++) {
814 if( strcmp(elem_name,(gchar*) g_ptr_array_index(hier,i) ) == 0 ) {
823 fqn = fully_qualified_name(hier,elem_name,proto_name);
825 new = duplicate_element(orig);
828 add_xml_field(hfs, &(new->hf_tag), g_strdup(elem_name), fqn);
829 add_xml_field(hfs, &(new->hf_cdata), g_strdup(elem_name), fqn);
832 g_array_append_val(etts,ett_p);
837 g_hash_table_foreach(new->attributes,add_xml_attribute_names,&d);
839 while(new->element_names->len) {
840 gchar* child_name = g_ptr_array_remove_index(new->element_names,0);
841 xml_ns_t* child_element = NULL;
843 g_ptr_array_add(hier,elem_name);
844 child_element = make_xml_hier(child_name, root, elements, hier,error,hfs,etts,proto_name);
845 g_ptr_array_remove_index_fast(hier,hier->len - 1);
848 g_hash_table_insert(new->elements,child_element->name,child_element);
852 g_ptr_array_free(new->element_names,TRUE);
853 new->element_names = NULL;
857 static gboolean free_both(gpointer k, gpointer v, gpointer p _U_) {
863 static gboolean free_elements(gpointer k _U_, gpointer v, gpointer p _U_) {
866 g_hash_table_foreach_remove(e->attributes,free_both,NULL);
867 g_hash_table_destroy(e->attributes);
868 g_hash_table_destroy(e->elements);
870 while (e->element_names->len) {
871 g_free(g_ptr_array_remove_index(e->element_names,0));
874 g_ptr_array_free(e->element_names,TRUE);
880 static void register_dtd(dtd_build_data_t* dtd_data, GString* errors) {
881 GHashTable* elements = g_hash_table_new(g_str_hash,g_str_equal);
882 gchar* root_name = NULL;
883 xml_ns_t* root_element = NULL;
888 GPtrArray* element_names = g_ptr_array_new();
890 /* we first populate elements with the those coming from the parser */
891 while(dtd_data->elements->len) {
892 dtd_named_list_t* nl = g_ptr_array_remove_index(dtd_data->elements,0);
893 xml_ns_t* element = g_malloc(sizeof(xml_ns_t));
895 /* we will use the first element found as root in case no other one was given. */
896 if (root_name == NULL)
897 root_name = g_strdup(nl->name);
899 element->name = nl->name;
900 element->element_names = nl->list;
901 element->hf_tag = -1;
902 element->hf_cdata = -1;
904 element->attributes = g_hash_table_new(g_str_hash,g_str_equal);
905 element->elements = g_hash_table_new(g_str_hash,g_str_equal);
907 if( g_hash_table_lookup(elements,element->name) ) {
908 g_string_append_printf(errors,"element %s defined more than once\n", element->name);
909 free_elements(NULL,element,NULL);
911 g_hash_table_insert(elements,element->name,element);
912 g_ptr_array_add(element_names,g_strdup(element->name));
918 /* then we add the attributes to its relative elements */
919 while(dtd_data->attributes->len) {
920 dtd_named_list_t* nl = g_ptr_array_remove_index(dtd_data->attributes,0);
921 xml_ns_t* element = g_hash_table_lookup(elements,nl->name);
924 g_string_append_printf(errors,"element %s is not defined\n", nl->name);
929 while(nl->list->len) {
930 gchar* name = g_ptr_array_remove_index(nl->list,0);
931 int* id_p = g_malloc(sizeof(int));
934 g_hash_table_insert(element->attributes,name,id_p);
939 g_ptr_array_free(nl->list,TRUE);
943 /* if a proto_root is defined in the dtd we'll use that as root */
944 if( dtd_data->proto_root ) {
947 root_name = g_strdup(dtd_data->proto_root);
950 /* we use a stack with the names to avoid recurring infinitelly */
951 hier = g_ptr_array_new();
954 * if a proto name was given in the dtd the dtd will be used as a protocol
955 * or else the dtd will be loaded as a branch of the xml namespace
957 if( ! dtd_data->proto_name ) {
960 g_ptr_array_add(hier,g_strdup("xml"));
961 root_element = &xml_ns;
964 * if we were given a proto_name the namespace will be registered
965 * as an indipendent protocol with its own hf and ett arrays.
967 hfs = g_array_new(FALSE,FALSE,sizeof(hf_register_info));
968 etts = g_array_new(FALSE,FALSE,sizeof(gint*));
971 /* the root element of the dtd's namespace */
972 root_element = g_malloc(sizeof(xml_ns_t));
973 root_element->name = g_strdup(root_name);
974 root_element->fqn = dtd_data->proto_name ? g_strdup(dtd_data->proto_name) : root_element->name;
975 root_element->hf_tag = -1;
976 root_element->hf_cdata = -1;
977 root_element->ett = -1;
978 root_element->elements = g_hash_table_new(g_str_hash,g_str_equal);
979 root_element->element_names = element_names;
982 * we can either create a namespace as a flat namespace
983 * in which all the elements are at the root level
984 * or we can create a recursive namespace
986 if (dtd_data->recursion) {
989 make_xml_hier(root_name, root_element, elements,hier,errors,hfs,etts,dtd_data->proto_name);
991 g_hash_table_insert(root_element->elements,root_element->name,root_element);
993 orig_root = g_hash_table_lookup(elements,root_name);
995 /* if the root element was defined copy its attrlist to the child */
997 struct _attr_reg_data d;
999 d.basename = dtd_data->proto_name;
1002 root_element->attributes = copy_attributes_hash(orig_root->attributes);
1003 g_hash_table_foreach(root_element->attributes,add_xml_attribute_names,&d);
1005 root_element->attributes = g_hash_table_new(g_str_hash,g_str_equal);
1008 /* we then create all the sub hierachies to catch the recurred cases */
1009 g_ptr_array_add(hier,root_name);
1011 while(root_element->element_names->len) {
1012 curr_name = g_ptr_array_remove_index(root_element->element_names,0);
1014 if( ! g_hash_table_lookup(root_element->elements,curr_name) ) {
1015 xml_ns_t* new = make_xml_hier(curr_name, root_element, elements,hier,errors,hfs,etts,dtd_data->proto_name);
1016 g_hash_table_insert(root_element->elements,new->name,new);
1023 /* a flat namespace */
1024 g_ptr_array_add(hier,root_name);
1026 root_element->attributes = g_hash_table_new(g_str_hash,g_str_equal);
1028 while(root_element->element_names->len) {
1031 struct _attr_reg_data d;
1033 curr_name = g_ptr_array_remove_index(root_element->element_names,0);
1034 new = duplicate_element(g_hash_table_lookup(elements,curr_name));
1035 new->fqn = fully_qualified_name(hier, curr_name, root_name);
1037 add_xml_field(hfs, &(new->hf_tag), curr_name, new->fqn);
1038 add_xml_field(hfs, &(new->hf_cdata), curr_name, new->fqn);
1040 d.basename = new->fqn;
1043 g_hash_table_foreach(new->attributes,add_xml_attribute_names,&d);
1046 g_array_append_val(etts,ett_p);
1048 g_ptr_array_free(new->element_names,TRUE);
1050 g_hash_table_insert(root_element->elements,new->name,new);
1054 g_ptr_array_free(element_names,TRUE);
1056 g_ptr_array_free(hier,TRUE);
1059 * if we were given a proto_name the namespace will be registered
1060 * as an indipendent protocol.
1062 if( dtd_data->proto_name ) {
1065 if ( ! dtd_data->description) {
1066 dtd_data->description = g_strdup(root_name);
1069 ett_p = &root_element->ett;
1070 g_array_append_val(etts,ett_p);
1072 add_xml_field(hfs, &root_element->hf_cdata, root_element->name, root_element->fqn);
1074 root_element->hf_tag = proto_register_protocol(dtd_data->description, dtd_data->proto_name, dtd_data->proto_name);
1075 proto_register_field_array(root_element->hf_tag, (hf_register_info*)g_array_data(hfs), hfs->len);
1076 proto_register_subtree_array((gint**)g_array_data(etts), etts->len);
1078 if (dtd_data->media_type) {
1079 g_hash_table_insert(media_types,dtd_data->media_type,root_element);
1080 dtd_data->media_type = NULL;
1083 dtd_data->description = NULL;
1084 dtd_data->proto_name = NULL;
1085 g_array_free(hfs,FALSE);
1086 g_array_free(etts,TRUE);
1089 g_hash_table_insert(xml_ns.elements,root_element->name,root_element);
1091 g_hash_table_foreach_remove(elements,free_elements,NULL);
1092 g_hash_table_destroy(elements);
1094 destroy_dtd_data(dtd_data);
1100 # define DIRECTORY_T GDir
1101 # define FILE_T gchar
1102 # define OPENDIR_OP(name) g_dir_open(name, 0, dummy)
1103 # define DIRGETNEXT_OP(dir) g_dir_read_name(dir)
1104 # define GETFNAME_OP(file) (file);
1105 # define CLOSEDIR_OP(dir) g_dir_close(dir)
1107 static void init_xml_names(void) {
1108 xml_ns_t* xmlpi_xml_ns;
1112 const gchar* filename;
1115 GError** dummy = g_malloc(sizeof(GError *));
1118 xmpli_names = g_hash_table_new(g_str_hash,g_str_equal);
1119 media_types = g_hash_table_new(g_str_hash,g_str_equal);
1121 unknown_ns.elements = xml_ns.elements = g_hash_table_new(g_str_hash,g_str_equal);
1122 unknown_ns.attributes = xml_ns.attributes = g_hash_table_new(g_str_hash,g_str_equal);
1124 xmlpi_xml_ns = xml_new_namespace(xmpli_names,"xml","version","encoding","standalone",NULL);
1126 g_hash_table_destroy(xmlpi_xml_ns->elements);
1127 xmlpi_xml_ns->elements = NULL;
1130 dirname = get_persconffile_path("dtds", FALSE, FALSE);
1132 if (test_for_directory(dirname) != EISDIR) {
1133 /* Although dir isn't a directory it may still use memory */
1135 dirname = get_datafile_path("dtds");
1138 if (test_for_directory(dirname) == EISDIR) {
1140 if ((dir = OPENDIR_OP(dirname)) != NULL) {
1141 while ((file = DIRGETNEXT_OP(dir)) != NULL) {
1143 filename = GETFNAME_OP(file);
1145 namelen = strlen(filename);
1146 if ( namelen > 4 && ( g_ascii_strcasecmp(filename+(namelen-4),".dtd") == 0 ) ) {
1147 GString* errors = g_string_new("");
1148 GString* preparsed = dtd_preparse(dirname, filename, errors);
1149 dtd_build_data_t* dtd_data;
1152 report_failure("Dtd Preparser in file %s%c%s: %s",dirname,G_DIR_SEPARATOR,filename,errors->str);
1156 dtd_data = dtd_parse(preparsed);
1158 g_string_free(preparsed,TRUE);
1160 if (dtd_data->error->len) {
1161 report_failure("Dtd Parser in file %s%c%s: %s",dirname,G_DIR_SEPARATOR,filename,dtd_data->error->str);
1162 destroy_dtd_data(dtd_data);
1166 register_dtd(dtd_data,errors);
1169 report_failure("Dtd Registration in file: %s%c%s: %s",dirname,G_DIR_SEPARATOR,filename,errors->str);
1170 g_string_free(errors,TRUE);
1182 for(i=0;i<array_length(default_media_types);i++) {
1183 if( ! g_hash_table_lookup(media_types,default_media_types[i]) ) {
1184 g_hash_table_insert(media_types,(gpointer)default_media_types[i],&xml_ns);
1188 g_hash_table_foreach(xmpli_names,add_xmlpi_namespace,"xml.xmlpi");
1193 static void range_delete_xml_tcp_callback(guint32 port) {
1194 dissector_delete("tcp.port", port, xml_handle);
1197 static void range_add_xml_tcp_callback(guint32 port) {
1198 dissector_add("tcp.port", port, xml_handle);
1201 static void apply_prefs(void) {
1202 if (pref_heuristic_media_save != pref_heuristic_media) {
1203 if (pref_heuristic_media) {
1204 heur_dissector_add("http", dissect_xml_heur, xml_ns.hf_tag);
1205 heur_dissector_add("sip", dissect_xml_heur, xml_ns.hf_tag);
1206 heur_dissector_add("media", dissect_xml_heur, xml_ns.hf_tag);
1207 pref_heuristic_media_save = TRUE;
1209 heur_dissector_delete("http", dissect_xml_heur, xml_ns.hf_tag);
1210 heur_dissector_delete("sip", dissect_xml_heur, xml_ns.hf_tag);
1211 heur_dissector_delete("media", dissect_xml_heur, xml_ns.hf_tag);
1212 pref_heuristic_media_save = FALSE;
1216 if (pref_heuristic_tcp_save != pref_heuristic_tcp ) {
1217 if (pref_heuristic_tcp) {
1218 heur_dissector_add("tcp", dissect_xml_heur, xml_ns.hf_tag);
1219 pref_heuristic_tcp_save = TRUE;
1221 heur_dissector_delete("tcp", dissect_xml_heur, xml_ns.hf_tag);
1222 pref_heuristic_tcp_save = FALSE;
1226 if (pref_heuristic_udp_save != pref_heuristic_udp ) {
1227 if (pref_heuristic_udp) {
1228 heur_dissector_add("udp", dissect_xml_heur, xml_ns.hf_tag);
1229 pref_heuristic_udp_save = TRUE;
1231 heur_dissector_delete("udp", dissect_xml_heur, xml_ns.hf_tag);
1232 pref_heuristic_udp_save = FALSE;
1236 range_foreach(xml_tcp_range, range_delete_xml_tcp_callback);
1237 g_free(xml_tcp_range);
1238 xml_tcp_range = range_copy(global_xml_tcp_range);
1239 range_foreach(xml_tcp_range, range_add_xml_tcp_callback);
1243 proto_register_xml(void) {
1244 static gint *ett_base[] = {
1251 static hf_register_info hf_base[] = {
1252 { &hf_xmlpi, {"XMLPI", "xml.xmlpi", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
1253 { &hf_comment, {"Comment", "xml.comment", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
1254 { &hf_unknowwn_attrib, {"Attribute", "xml.attribute", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
1255 { &hf_doctype, {"Doctype", "xml.doctype", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
1256 { &hf_dtd_tag, {"DTD Tag", "xml.dtdtag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
1257 { &unknown_ns.hf_cdata, {"CDATA", "xml.cdata", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
1258 { &unknown_ns.hf_tag, {"Tag", "xml.tag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
1259 { &xml_ns.hf_cdata, {"Unknown", "xml.unknown", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}
1261 module_t* xml_module;
1263 hf_arr = g_array_new(FALSE,FALSE,sizeof(hf_register_info));
1264 ett_arr = g_array_new(FALSE,FALSE,sizeof(gint*));
1266 g_array_append_vals(hf_arr,hf_base,array_length(hf_base));
1267 g_array_append_vals(ett_arr,ett_base,array_length(ett_base));
1271 xml_ns.hf_tag = proto_register_protocol("eXtensible Markup Language", "XML", xml_ns.name);
1273 proto_register_field_array(xml_ns.hf_tag, (hf_register_info*)g_array_data(hf_arr), hf_arr->len);
1274 proto_register_subtree_array((gint**)g_array_data(ett_arr), ett_arr->len);
1276 xml_module = prefs_register_protocol(xml_ns.hf_tag,apply_prefs);
1277 prefs_register_bool_preference(xml_module, "heuristic", "Use Heuristics for media types",
1278 "Try to recognize XML for unknown media types",
1279 &pref_heuristic_media);
1280 prefs_register_bool_preference(xml_module, "heuristic_tcp", "Use Heuristics for tcp",
1281 "Try to recognize XML for unknown TCP ports",
1282 &pref_heuristic_tcp);
1283 prefs_register_range_preference(xml_module, "tcp.port", "TCP Ports",
1285 &global_xml_tcp_range, 65535);
1286 prefs_register_bool_preference(xml_module, "heuristic_udp", "Use Heuristics for UDP",
1287 "Try to recognize XML for unknown UDP ports",
1288 &pref_heuristic_udp);
1290 g_array_free(hf_arr,FALSE);
1291 g_array_free(ett_arr,TRUE);
1293 register_dissector("xml", dissect_xml, xml_ns.hf_tag);
1297 xml_tcp_range = range_empty();
1302 static void add_dissector_media(gpointer k, gpointer v _U_, gpointer p _U_) {
1303 dissector_add_string("media_type", (gchar*)k, xml_handle);
1307 proto_reg_handoff_xml(void)
1310 xml_handle = find_dissector("xml");
1312 g_hash_table_foreach(media_types,add_dissector_media,NULL);