2 * wireshark's xml dissector .
4 * (C) 2005, Luis E. Garcia Ontanon.
8 * Refer to the AUTHORS file or the AUTHORS section in the man page
9 * for contacting the author(s) of this file.
11 * Wireshark - Network traffic analyzer
12 * By Gerald Combs <gerald@wireshark.org>
13 * Copyright 1998 Gerald Combs
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
43 #include <wsutil/str_util.h>
45 #include <epan/emem.h>
46 #include <epan/packet.h>
47 #include <epan/tvbparse.h>
49 #include <epan/report_err.h>
50 #include <epan/filesystem.h>
51 #include <epan/prefs.h>
52 #include <epan/garrayfix.h>
54 #include "packet-xml.h"
56 struct _attr_reg_data {
62 static gint ett_dtd = -1;
63 static gint ett_xmpli = -1;
65 static int hf_unknowwn_attrib = -1;
66 static int hf_comment = -1;
67 static int hf_xmlpi = -1;
68 static int hf_dtd_tag = -1;
69 static int hf_doctype = -1;
71 /* dissector handles */
72 static dissector_handle_t xml_handle;
74 /* parser definitions */
75 static tvbparse_wanted_t* want;
76 static tvbparse_wanted_t* want_ignore;
77 static tvbparse_wanted_t* want_heur;
79 static GHashTable* xmpli_names;
80 static GHashTable* media_types;
82 static xml_ns_t xml_ns = {"xml","/",-1,-1,-1,NULL,NULL,NULL};
83 static xml_ns_t unknown_ns = {"unknown","?",-1,-1,-1,NULL,NULL,NULL};
84 static xml_ns_t* root_ns;
86 static gboolean pref_heuristic_media = FALSE;
87 static gboolean pref_heuristic_tcp = FALSE;
88 static gboolean pref_heuristic_udp = FALSE;
89 static gboolean pref_heuristic_media_save = FALSE;
90 static gboolean pref_heuristic_tcp_save = FALSE;
91 static gboolean pref_heuristic_udp_save = FALSE;
92 static range_t *global_xml_tcp_range = NULL;
93 static range_t *xml_tcp_range = NULL;
95 static gboolean pref_heuristic_unicode = FALSE;
97 #define XML_CDATA -1000
98 #define XML_SCOPED_NAME -1001
101 static GArray* hf_arr;
102 static GArray* ett_arr;
104 static const gchar* default_media_types[] = {
111 "application/auth-policy+xml",
112 "application/cpim-pidf+xml",
113 "application/cpl+xml",
114 "application/mathml+xml",
115 "application/media_control+xml",
116 "application/note+xml",
117 "application/pidf+xml",
118 "application/poc-settings+xml",
119 "application/rdf+xml",
120 "application/reginfo+xml",
121 "application/resource-lists+xml",
122 "application/rlmi+xml",
123 "application/rls-services+xml",
125 "application/simple-filter+xml",
126 "application/soap+xml",
127 "application/vnd.etsi.aoc+xml",
128 "application/vnd.etsi.cug+xml",
129 "application/vnd.etsi.iptvcommand+xml",
130 "application/vnd.etsi.iptvdiscovery+xml",
131 "application/vnd.etsi.iptvprofile+xml",
132 "application/vnd.etsi.iptvsad-bc+xml",
133 "application/vnd.etsi.iptvsad-cod+xml",
134 "application/vnd.etsi.iptvsad-npvr+xml",
135 "application/vnd.etsi.iptvueprofile+xml",
136 "application/vnd.etsi.mcid+xml",
137 "application/vnd.etsi.sci+xml",
138 "application/vnd.etsi.simservs+xml",
139 "application/vnd.3gpp.cw+xml",
140 "application/vnd.wv.csp+xml",
141 "application/vnd.wv.csp.xml",
142 "application/watcherinfo+xml",
143 "application/xcap-att+xml",
144 "application/xcap-caps+xml",
145 "application/xcap-diff+xml",
146 "application/xcap-el+xml",
147 "application/xcap-error+xml",
148 "application/xcap-ns+xml",
150 "application/xml-dtd",
151 "application/xpidf+xml",
152 "application/xslt+xml",
153 "application/x-wms-logconnectstats",
154 "application/x-wms-logplaystats",
155 "application/x-wms-sendevent",
156 "application/rss+xml",
160 static void insert_xml_frame(xml_frame_t *parent, xml_frame_t *new_child) {
161 new_child->first_child = NULL;
162 new_child->last_child = NULL;
164 new_child->parent = parent;
165 new_child->next_sibling = NULL;
166 new_child->prev_sibling = NULL;
167 if (parent == NULL) return; /* root */
169 if (parent->first_child == NULL) { /* the 1st child */
170 parent->first_child = new_child;
171 } else { /* following children */
172 parent->last_child->next_sibling = new_child;
173 new_child->prev_sibling = parent->last_child;
175 parent->last_child = new_child;
179 dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
182 tvbparse_elem_t* tok = NULL;
183 static GPtrArray* stack = NULL;
184 xml_frame_t* current_frame;
188 g_ptr_array_free(stack,TRUE);
190 stack = g_ptr_array_new();
191 current_frame = ep_alloc(sizeof(xml_frame_t));
192 current_frame->type = XML_FRAME_ROOT;
193 current_frame->name = NULL;
194 current_frame->name_orig_case = NULL;
195 current_frame->value = NULL;
196 insert_xml_frame(NULL, current_frame);
197 g_ptr_array_add(stack,current_frame);
199 tt = tvbparse_init(tvb,0,-1,stack,want_ignore);
200 current_frame->start_offset = 0;
204 if (pinfo->match_string)
205 root_ns = g_hash_table_lookup(media_types,pinfo->match_string);
209 colinfo_str = "/XML";
211 colinfo_str = ep_strdup_printf("/%s",root_ns->name);
212 ascii_strup_inplace(colinfo_str);
215 col_append_str(pinfo->cinfo, COL_PROTOCOL, colinfo_str);
217 current_frame->ns = root_ns;
219 current_frame->item = proto_tree_add_item(tree,current_frame->ns->hf_tag,tvb,0,-1,FALSE);
220 current_frame->tree = proto_item_add_subtree(current_frame->item,current_frame->ns->ett);
221 current_frame->last_item = current_frame->item;
223 while(( tok = tvbparse_get(tt, want) )) ;
225 pinfo->private_data = current_frame; /* pass XML structure to the dissector calling XML */
228 static gboolean dissect_xml_heur(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) {
229 if (pref_heuristic_media || pref_heuristic_tcp || pref_heuristic_udp) {
230 if (tvbparse_peek(tvbparse_init(tvb,0,-1,NULL,want_ignore), want_heur)) {
231 dissect_xml(tvb, pinfo, tree);
233 } else if (pref_heuristic_unicode) {
234 const guint8 *data = tvb_get_ephemeral_faked_unicode(tvb, 0, tvb_length(tvb)/2, TRUE);
235 tvbuff_t *unicode_tvb = tvb_new_real_data(data, tvb_length(tvb)/2, tvb_length(tvb)/2);
236 if (tvbparse_peek(tvbparse_init(unicode_tvb,0,-1,NULL,want_ignore), want_heur)) {
237 dissect_xml(unicode_tvb, pinfo, tree);
245 xml_frame_t *xml_get_tag(xml_frame_t *frame, const gchar *name) {
246 xml_frame_t *tag = NULL;
248 xml_frame_t *xml_item = frame->first_child;
250 if ((xml_item->type == XML_FRAME_TAG)) {
251 if (!name) { /* get the 1st tag */
254 } else if (xml_item->name_orig_case && !strcmp(xml_item->name_orig_case, name)) {
259 xml_item = xml_item->next_sibling;
265 xml_frame_t *xml_get_attrib(xml_frame_t *frame, const gchar *name) {
266 xml_frame_t *attr = NULL;
268 xml_frame_t *xml_item = frame->first_child;
270 if ((xml_item->type == XML_FRAME_ATTRIB) &&
271 xml_item->name_orig_case && !strcmp(xml_item->name_orig_case, name)) {
275 xml_item = xml_item->next_sibling;
281 xml_frame_t *xml_get_cdata(xml_frame_t *frame) {
282 xml_frame_t *cdata = NULL;
284 xml_frame_t *xml_item = frame->first_child;
286 if ((xml_item->type == XML_FRAME_CDATA)) {
290 xml_item = xml_item->next_sibling;
296 static void after_token(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
297 GPtrArray* stack = tvbparse_data;
298 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
300 gboolean is_cdata = FALSE;
302 xml_frame_t* new_frame;
304 if (tok->id == XML_CDATA) {
305 hfid = current_frame->ns ? current_frame->ns->hf_cdata : xml_ns.hf_cdata;
307 } else if ( tok->id > 0) {
310 hfid = xml_ns.hf_cdata;
313 pi = proto_tree_add_item(current_frame->tree, hfid, tok->tvb, tok->offset, tok->len, FALSE);
315 proto_item_set_text(pi, "%s",
316 tvb_format_text(tok->tvb,tok->offset,tok->len));
319 new_frame = ep_alloc(sizeof(xml_frame_t));
320 new_frame->type = XML_FRAME_CDATA;
321 new_frame->name = NULL;
322 new_frame->name_orig_case = NULL;
323 new_frame->value = tvb_new_subset(tok->tvb, tok->offset, tok->len, tok->len);
324 insert_xml_frame(current_frame, new_frame);
325 new_frame->item = pi;
326 new_frame->last_item = pi;
327 new_frame->tree = NULL;
328 new_frame->start_offset = tok->offset;
329 new_frame->ns = NULL;
333 static void before_xmpli(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
334 GPtrArray* stack = tvbparse_data;
335 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
338 tvbparse_elem_t* name_tok = tok->sub->next;
339 gchar* name = tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len);
340 xml_ns_t* ns = g_hash_table_lookup(xmpli_names,name);
341 xml_frame_t* new_frame;
346 ascii_strdown_inplace(name);
355 pi = proto_tree_add_item(current_frame->tree,hf_tag,tok->tvb,tok->offset,tok->len,FALSE);
357 proto_item_set_text(pi, "%s", tvb_format_text(tok->tvb,tok->offset,(name_tok->offset - tok->offset) + name_tok->len));
359 pt = proto_item_add_subtree(pi,ett);
361 new_frame = ep_alloc(sizeof(xml_frame_t));
362 new_frame->type = XML_FRAME_XMPLI;
363 new_frame->name = name;
364 new_frame->name_orig_case = name;
365 new_frame->value = NULL;
366 insert_xml_frame(current_frame, new_frame);
367 new_frame->item = pi;
368 new_frame->last_item = pi;
369 new_frame->tree = pt;
370 new_frame->start_offset = tok->offset;
373 g_ptr_array_add(stack,new_frame);
377 static void after_xmlpi(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
378 GPtrArray* stack = tvbparse_data;
379 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
381 proto_tree_add_text(current_frame->tree,
382 tok->tvb, tok->offset, tok->len, "%s",
383 tvb_format_text(tok->tvb,tok->offset,tok->len));
385 if (stack->len > 1) {
386 g_ptr_array_remove_index_fast(stack,stack->len - 1);
388 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened xmpli tag ]");
392 static void before_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
393 GPtrArray* stack = tvbparse_data;
394 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
395 tvbparse_elem_t* name_tok = tok->sub->next;
397 gchar *name = NULL, *name_orig_case = NULL;
399 xml_frame_t* new_frame;
403 if (name_tok->sub->id == XML_SCOPED_NAME) {
404 tvbparse_elem_t* root_tok = name_tok->sub->sub;
405 tvbparse_elem_t* leaf_tok = name_tok->sub->sub->next->next;
406 xml_ns_t* nameroot_ns;
408 root_name = (gchar*)tvb_get_ephemeral_string(root_tok->tvb,root_tok->offset,root_tok->len);
409 name = (gchar*)tvb_get_ephemeral_string(leaf_tok->tvb,leaf_tok->offset,leaf_tok->len);
410 name_orig_case = name;
412 nameroot_ns = g_hash_table_lookup(xml_ns.elements,root_name);
415 ns = g_hash_table_lookup(nameroot_ns->elements,name);
424 name = tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len);
425 name_orig_case = ep_strdup(name);
426 ascii_strdown_inplace(name);
428 if(current_frame->ns) {
429 ns = g_hash_table_lookup(current_frame->ns->elements,name);
432 if (! ( ns = g_hash_table_lookup(root_ns->elements,name) ) ) {
441 pi = proto_tree_add_item(current_frame->tree,ns->hf_tag,tok->tvb,tok->offset,tok->len,FALSE);
442 proto_item_set_text(pi, "%s", tvb_format_text(tok->tvb,tok->offset,(name_tok->offset - tok->offset) + name_tok->len));
444 pt = proto_item_add_subtree(pi,ns->ett);
446 new_frame = ep_alloc(sizeof(xml_frame_t));
447 new_frame->type = XML_FRAME_TAG;
448 new_frame->name = name;
449 new_frame->name_orig_case = name_orig_case;
450 new_frame->value = NULL;
451 insert_xml_frame(current_frame, new_frame);
452 new_frame->item = pi;
453 new_frame->last_item = pi;
454 new_frame->tree = pt;
455 new_frame->start_offset = tok->offset;
458 g_ptr_array_add(stack,new_frame);
462 static void after_open_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok _U_) {
463 GPtrArray* stack = tvbparse_data;
464 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
466 proto_item_append_text(current_frame->last_item,">");
469 static void after_closed_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
470 GPtrArray* stack = tvbparse_data;
471 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
473 proto_item_append_text(current_frame->last_item,"/>");
475 if (stack->len > 1) {
476 g_ptr_array_remove_index_fast(stack,stack->len - 1);
478 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened tag ]");
482 static void after_untag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok){
483 GPtrArray* stack = tvbparse_data;
484 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
486 proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
488 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"%s",
489 tvb_format_text(tok->tvb,tok->offset,tok->len));
491 if (stack->len > 1) {
492 g_ptr_array_remove_index_fast(stack,stack->len - 1);
494 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,
495 "[ ERROR: Closing an unopened tag ]");
499 static void before_dtd_doctype(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok){
500 GPtrArray* stack = tvbparse_data;
501 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
502 xml_frame_t* new_frame;
503 tvbparse_elem_t* name_tok = tok->sub->next->next->next->sub->sub;
504 proto_tree* dtd_item = proto_tree_add_item(current_frame->tree, hf_doctype,
505 name_tok->tvb, name_tok->offset, name_tok->len, FALSE);
507 proto_item_set_text(dtd_item,"%s",tvb_format_text(tok->tvb,tok->offset,tok->len));
509 new_frame = ep_alloc(sizeof(xml_frame_t));
510 new_frame->type = XML_FRAME_DTD_DOCTYPE;
511 new_frame->name = (gchar*)tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len);
512 new_frame->name_orig_case = new_frame->name;
513 new_frame->value = NULL;
514 insert_xml_frame(current_frame, new_frame);
515 new_frame->item = dtd_item;
516 new_frame->last_item = dtd_item;
517 new_frame->tree = proto_item_add_subtree(dtd_item,ett_dtd);
518 new_frame->start_offset = tok->offset;
519 new_frame->ns = NULL;
521 g_ptr_array_add(stack,new_frame);
524 static void pop_stack(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok _U_) {
525 GPtrArray* stack = tvbparse_data;
526 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
528 if (stack->len > 1) {
529 g_ptr_array_remove_index_fast(stack,stack->len - 1);
531 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,
532 "[ ERROR: Closing an unopened tag ]");
536 static void after_dtd_close(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok){
537 GPtrArray* stack = tvbparse_data;
538 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
540 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"%s",
541 tvb_format_text(tok->tvb,tok->offset,tok->len));
542 if (stack->len > 1) {
543 g_ptr_array_remove_index_fast(stack,stack->len - 1);
545 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened tag ]");
549 static void get_attrib_value(void* tvbparse_data _U_, const void* wanted_data _U_, tvbparse_elem_t* tok) {
550 tok->data = tok->sub;
553 static void after_attrib(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
554 GPtrArray* stack = tvbparse_data;
555 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
556 gchar *name = NULL, *name_orig_case = NULL;
557 tvbparse_elem_t* value;
558 tvbparse_elem_t* value_part = tok->sub->next->next->data;
562 xml_frame_t* new_frame;
564 name = tvb_get_ephemeral_string(tok->sub->tvb,tok->sub->offset,tok->sub->len);
565 name_orig_case = ep_strdup(name);
566 ascii_strdown_inplace(name);
568 if(current_frame->ns && (hfidp = g_hash_table_lookup(current_frame->ns->attributes,name) )) {
572 hfid = hf_unknowwn_attrib;
576 pi = proto_tree_add_item(current_frame->tree,hfid,value->tvb,value->offset,value->len,FALSE);
577 proto_item_set_text(pi, "%s", tvb_format_text(tok->tvb,tok->offset,tok->len));
579 current_frame->last_item = pi;
581 new_frame = ep_alloc(sizeof(xml_frame_t));
582 new_frame->type = XML_FRAME_ATTRIB;
583 new_frame->name = name;
584 new_frame->name_orig_case = name_orig_case;
585 new_frame->value = tvb_new_subset(value_part->tvb, value_part->offset, value_part->len, value_part->len);
586 insert_xml_frame(current_frame, new_frame);
587 new_frame->item = pi;
588 new_frame->last_item = pi;
589 new_frame->tree = NULL;
590 new_frame->start_offset = tok->offset;
591 new_frame->ns = NULL;
595 static void unrecognized_token(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok _U_){
596 GPtrArray* stack = tvbparse_data;
597 xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
599 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Unrecognized text ]");
605 static void init_xml_parser(void) {
606 tvbparse_wanted_t* want_name = tvbparse_chars(-1,1,0,"abcdefghijklmnopqrstuvwxyz.-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",NULL,NULL,NULL);
607 tvbparse_wanted_t* want_attr_name = tvbparse_chars(-1,1,0,"abcdefghijklmnopqrstuvwxyz.-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:",NULL,NULL,NULL);
609 tvbparse_wanted_t* want_scoped_name = tvbparse_set_seq(XML_SCOPED_NAME, NULL, NULL, NULL,
611 tvbparse_char(-1,":",NULL,NULL,NULL),
615 tvbparse_wanted_t* want_tag_name = tvbparse_set_oneof(0, NULL, NULL, NULL,
620 tvbparse_wanted_t* want_attrib_value = tvbparse_set_oneof(0, NULL, NULL, get_attrib_value,
621 tvbparse_quoted(-1, NULL, NULL, tvbparse_shrink_token_cb,'\"','\\'),
622 tvbparse_quoted(-1, NULL, NULL, tvbparse_shrink_token_cb,'\'','\\'),
623 tvbparse_chars(-1,1,0,"0123456789",NULL,NULL,NULL),
627 tvbparse_wanted_t* want_attributes = tvbparse_one_or_more(-1, NULL, NULL, NULL,
628 tvbparse_set_seq(-1, NULL, NULL, after_attrib,
630 tvbparse_char(-1,"=",NULL,NULL,NULL),
634 tvbparse_wanted_t* want_stoptag = tvbparse_set_oneof(-1,NULL,NULL,NULL,
635 tvbparse_char(-1, ">", NULL, NULL, after_open_tag),
636 tvbparse_string(-1, "/>", NULL, NULL, after_closed_tag),
639 tvbparse_wanted_t* want_stopxmlpi = tvbparse_string(-1,"?>",NULL,NULL,after_xmlpi);
641 tvbparse_wanted_t* want_comment = tvbparse_set_seq(hf_comment,NULL,NULL,after_token,
642 tvbparse_string(-1,"<!--",NULL,NULL,NULL),
643 tvbparse_until(-1,NULL,NULL,NULL,
644 tvbparse_string(-1,"-->",NULL,NULL,NULL),
648 tvbparse_wanted_t* want_xmlpi = tvbparse_set_seq(hf_xmlpi,NULL,before_xmpli,NULL,
649 tvbparse_string(-1,"<?",NULL,NULL,NULL),
651 tvbparse_set_oneof(-1,NULL,NULL,NULL,
653 tvbparse_set_seq(-1,NULL,NULL,NULL,
660 tvbparse_wanted_t* want_closing_tag = tvbparse_set_seq(0,NULL,NULL,after_untag,
661 tvbparse_char(-1, "<", NULL, NULL, NULL),
662 tvbparse_char(-1, "/", NULL, NULL, NULL),
664 tvbparse_char(-1, ">", NULL, NULL, NULL),
667 tvbparse_wanted_t* want_doctype_start = tvbparse_set_seq(-1,NULL,before_dtd_doctype,NULL,
668 tvbparse_char(-1,"<",NULL,NULL,NULL),
669 tvbparse_char(-1,"!",NULL,NULL,NULL),
670 tvbparse_casestring(-1,"DOCTYPE",NULL,NULL,NULL),
671 tvbparse_set_oneof(-1,NULL,NULL,NULL,
672 tvbparse_set_seq(-1,NULL,NULL,NULL,
674 tvbparse_char(-1,"[",NULL,NULL,NULL),
676 tvbparse_set_seq(-1,NULL,NULL,pop_stack,
678 tvbparse_set_oneof(-1,NULL,NULL,NULL,
679 tvbparse_casestring(-1,"PUBLIC",NULL,NULL,NULL),
680 tvbparse_casestring(-1,"SYSTEM",NULL,NULL,NULL),
682 tvbparse_until(-1,NULL,NULL,NULL,
683 tvbparse_char(-1,">",NULL,NULL,NULL),
689 tvbparse_wanted_t* want_dtd_tag = tvbparse_set_seq(hf_dtd_tag,NULL,NULL,after_token,
690 tvbparse_char(-1,"<",NULL,NULL,NULL),
691 tvbparse_char(-1,"!",NULL,NULL,NULL),
692 tvbparse_until(-1,NULL,NULL,NULL,
693 tvbparse_char(-1, ">", NULL, NULL, NULL),
697 tvbparse_wanted_t* want_tag = tvbparse_set_seq(-1, NULL, before_tag, NULL,
698 tvbparse_char(-1,"<",NULL,NULL,NULL),
700 tvbparse_set_oneof(-1,NULL,NULL,NULL,
701 tvbparse_set_seq(-1,NULL,NULL,NULL,
709 tvbparse_wanted_t* want_dtd_close = tvbparse_set_seq(-1,NULL,NULL,after_dtd_close,
710 tvbparse_char(-1,"]",NULL,NULL,NULL),
711 tvbparse_char(-1,">",NULL,NULL,NULL),
714 want_ignore = tvbparse_chars(-1,1,0," \t\r\n",NULL,NULL,NULL);
717 want = tvbparse_set_oneof(-1, NULL, NULL, NULL,
725 tvbparse_not_chars(XML_CDATA,1,0,"<",NULL,NULL,after_token),
726 tvbparse_not_chars(-1,1,0," \t\r\n",NULL,NULL,unrecognized_token),
729 want_heur = tvbparse_set_oneof(-1, NULL, NULL, NULL,
740 static xml_ns_t* xml_new_namespace(GHashTable* hash, gchar* name, ...) {
741 xml_ns_t* ns = g_malloc(sizeof(xml_ns_t));
745 ns->name = g_strdup(name);
749 ns->attributes = g_hash_table_new(g_str_hash,g_str_equal);
750 ns->elements = g_hash_table_new(g_str_hash,g_str_equal);
754 while(( attr_name = va_arg(ap,gchar*) )) {
755 int* hfp = g_malloc(sizeof(int));
757 g_hash_table_insert(ns->attributes,g_strdup(attr_name),hfp);
762 g_hash_table_insert(hash,ns->name,ns);
768 static void add_xml_field(GArray* hfs, int* p_id, gchar* name, gchar* fqn) {
769 hf_register_info hfri;
772 hfri.hfinfo.name = name;
773 hfri.hfinfo.abbrev = fqn;
774 hfri.hfinfo.type = FT_STRING;
775 hfri.hfinfo.display = BASE_NONE;
776 hfri.hfinfo.strings = NULL;
777 hfri.hfinfo.bitmask = 0x0;
778 hfri.hfinfo.blurb = NULL;
780 hfri.hfinfo.parent = 0;
781 hfri.hfinfo.ref_type = HF_REF_TYPE_NONE;
782 hfri.hfinfo.bitshift = 0;
783 hfri.hfinfo.same_name_next = NULL;
784 hfri.hfinfo.same_name_prev = NULL;
786 g_array_append_val(hfs,hfri);
789 static void add_xml_attribute_names(gpointer k, gpointer v, gpointer p) {
790 struct _attr_reg_data* d = p;
791 gchar* basename = g_strdup_printf("%s.%s",d->basename,(gchar*)k);
792 add_xml_field(d->hf, (int*) v, (gchar*)k, basename);
796 static void add_xmlpi_namespace(gpointer k _U_, gpointer v, gpointer p) {
798 gchar* basename = g_strdup_printf("%s.%s",(gchar*)p,ns->name);
799 gint* ett_p = &(ns->ett);
800 struct _attr_reg_data d;
802 add_xml_field(hf_arr, &(ns->hf_tag), basename, basename);
804 g_array_append_val(ett_arr,ett_p);
806 d.basename = basename;
809 g_hash_table_foreach(ns->attributes,add_xml_attribute_names,&d);
813 static void destroy_dtd_data(dtd_build_data_t* dtd_data) {
814 g_free(dtd_data->proto_name);
815 g_free(dtd_data->media_type);
816 g_free(dtd_data->description);
817 g_free(dtd_data->proto_root);
819 g_string_free(dtd_data->error,TRUE);
821 while(dtd_data->elements->len) {
822 dtd_named_list_t* nl = g_ptr_array_remove_index_fast(dtd_data->elements,0);
823 g_ptr_array_free(nl->list,TRUE);
827 g_ptr_array_free(dtd_data->elements,TRUE);
829 while(dtd_data->attributes->len) {
830 dtd_named_list_t* nl = g_ptr_array_remove_index_fast(dtd_data->attributes,0);
831 g_ptr_array_free(nl->list,TRUE);
835 g_ptr_array_free(dtd_data->attributes,TRUE);
840 static void copy_attrib_item(gpointer k, gpointer v _U_, gpointer p) {
841 gchar* key = g_strdup(k);
842 int* value = g_malloc(sizeof(int));
846 g_hash_table_insert(dst,key,value);
850 static GHashTable* copy_attributes_hash(GHashTable* src) {
851 GHashTable* dst = g_hash_table_new(g_str_hash,g_str_equal);
853 g_hash_table_foreach(src,copy_attrib_item,dst);
858 static xml_ns_t* duplicate_element(xml_ns_t* orig) {
859 xml_ns_t* new_item = g_malloc(sizeof(xml_ns_t));
862 new_item->name = g_strdup(orig->name);
863 new_item->hf_tag = -1;
864 new_item->hf_cdata = -1;
866 new_item->attributes = copy_attributes_hash(orig->attributes);
867 new_item->elements = g_hash_table_new(g_str_hash,g_str_equal);
868 new_item->element_names = g_ptr_array_new();
870 for(i=0; i < orig->element_names->len; i++) {
871 g_ptr_array_add(new_item->element_names,
872 g_ptr_array_index(orig->element_names,i));
878 static gchar* fully_qualified_name(GPtrArray* hier, gchar* name, gchar* proto_name) {
880 GString* s = g_string_new(proto_name);
882 g_string_append(s,".");
884 for (i = 1; i < hier->len; i++) {
885 g_string_append_printf(s, "%s.",(gchar*)g_ptr_array_index(hier,i));
888 g_string_append(s,name);
890 g_string_free(s,FALSE);
896 static xml_ns_t* make_xml_hier(gchar* elem_name,
898 GHashTable* elements,
908 struct _attr_reg_data d;
909 gboolean recurred = FALSE;
912 if ( g_str_equal(elem_name,root->name) ) {
916 if (! ( orig = g_hash_table_lookup(elements,elem_name) )) {
917 g_string_append_printf(error,"element '%s' is not defined\n", elem_name);
921 for (i = 0; i < hier->len; i++) {
922 if( strcmp(elem_name,(gchar*) g_ptr_array_index(hier,i) ) == 0 ) {
931 fqn = fully_qualified_name(hier,elem_name,proto_name);
933 new = duplicate_element(orig);
936 add_xml_field(hfs, &(new->hf_tag), g_strdup(elem_name), fqn);
937 add_xml_field(hfs, &(new->hf_cdata), g_strdup(elem_name), fqn);
940 g_array_append_val(etts,ett_p);
945 g_hash_table_foreach(new->attributes,add_xml_attribute_names,&d);
947 while(new->element_names->len) {
948 gchar* child_name = g_ptr_array_remove_index(new->element_names,0);
949 xml_ns_t* child_element = NULL;
951 g_ptr_array_add(hier,elem_name);
952 child_element = make_xml_hier(child_name, root, elements, hier,error,hfs,etts,proto_name);
953 g_ptr_array_remove_index_fast(hier,hier->len - 1);
956 g_hash_table_insert(new->elements,child_element->name,child_element);
960 g_ptr_array_free(new->element_names,TRUE);
961 new->element_names = NULL;
965 static gboolean free_both(gpointer k, gpointer v, gpointer p _U_) {
971 static gboolean free_elements(gpointer k _U_, gpointer v, gpointer p _U_) {
974 g_hash_table_foreach_remove(e->attributes,free_both,NULL);
975 g_hash_table_destroy(e->attributes);
976 g_hash_table_destroy(e->elements);
978 while (e->element_names->len) {
979 g_free(g_ptr_array_remove_index(e->element_names,0));
982 g_ptr_array_free(e->element_names,TRUE);
988 static void register_dtd(dtd_build_data_t* dtd_data, GString* errors) {
989 GHashTable* elements = g_hash_table_new(g_str_hash,g_str_equal);
990 gchar* root_name = NULL;
991 xml_ns_t* root_element = NULL;
996 GPtrArray* element_names = g_ptr_array_new();
998 /* we first populate elements with the those coming from the parser */
999 while(dtd_data->elements->len) {
1000 dtd_named_list_t* nl = g_ptr_array_remove_index(dtd_data->elements,0);
1001 xml_ns_t* element = g_malloc(sizeof(xml_ns_t));
1003 /* we will use the first element found as root in case no other one was given. */
1004 if (root_name == NULL)
1005 root_name = g_strdup(nl->name);
1007 element->name = nl->name;
1008 element->element_names = nl->list;
1009 element->hf_tag = -1;
1010 element->hf_cdata = -1;
1012 element->attributes = g_hash_table_new(g_str_hash,g_str_equal);
1013 element->elements = g_hash_table_new(g_str_hash,g_str_equal);
1015 if( g_hash_table_lookup(elements,element->name) ) {
1016 g_string_append_printf(errors,"element %s defined more than once\n", element->name);
1017 free_elements(NULL,element,NULL);
1019 g_hash_table_insert(elements,element->name,element);
1020 g_ptr_array_add(element_names,g_strdup(element->name));
1026 /* then we add the attributes to its relative elements */
1027 while(dtd_data->attributes->len) {
1028 dtd_named_list_t* nl = g_ptr_array_remove_index(dtd_data->attributes,0);
1029 xml_ns_t* element = g_hash_table_lookup(elements,nl->name);
1032 while(nl->list->len) {
1033 gchar* name = g_ptr_array_remove_index(nl->list,0);
1034 int* id_p = g_malloc(sizeof(int));
1037 g_hash_table_insert(element->attributes,name,id_p);
1041 g_string_append_printf(errors,"element %s is not defined\n", nl->name);
1045 g_ptr_array_free(nl->list,TRUE);
1049 /* if a proto_root is defined in the dtd we'll use that as root */
1050 if( dtd_data->proto_root ) {
1052 root_name = g_strdup(dtd_data->proto_root);
1055 /* we use a stack with the names to avoid recurring infinitelly */
1056 hier = g_ptr_array_new();
1059 * if a proto name was given in the dtd the dtd will be used as a protocol
1060 * or else the dtd will be loaded as a branch of the xml namespace
1062 if( ! dtd_data->proto_name ) {
1065 g_ptr_array_add(hier,g_strdup("xml"));
1066 root_element = &xml_ns;
1069 * if we were given a proto_name the namespace will be registered
1070 * as an independent protocol with its own hf and ett arrays.
1072 hfs = g_array_new(FALSE,FALSE,sizeof(hf_register_info));
1073 etts = g_array_new(FALSE,FALSE,sizeof(gint*));
1076 /* the root element of the dtd's namespace */
1077 root_element = g_malloc(sizeof(xml_ns_t));
1078 root_element->name = g_strdup(root_name);
1079 root_element->fqn = dtd_data->proto_name ? g_strdup(dtd_data->proto_name) : root_element->name;
1080 root_element->hf_tag = -1;
1081 root_element->hf_cdata = -1;
1082 root_element->ett = -1;
1083 root_element->elements = g_hash_table_new(g_str_hash,g_str_equal);
1084 root_element->element_names = element_names;
1087 * we can either create a namespace as a flat namespace
1088 * in which all the elements are at the root level
1089 * or we can create a recursive namespace
1091 if (dtd_data->recursion) {
1092 xml_ns_t* orig_root;
1094 make_xml_hier(root_name, root_element, elements,hier,errors,hfs,etts,dtd_data->proto_name);
1096 g_hash_table_insert(root_element->elements,root_element->name,root_element);
1098 orig_root = g_hash_table_lookup(elements,root_name);
1100 /* if the root element was defined copy its attrlist to the child */
1102 struct _attr_reg_data d;
1104 d.basename = dtd_data->proto_name;
1107 root_element->attributes = copy_attributes_hash(orig_root->attributes);
1108 g_hash_table_foreach(root_element->attributes,add_xml_attribute_names,&d);
1110 root_element->attributes = g_hash_table_new(g_str_hash,g_str_equal);
1113 /* we then create all the sub hierachies to catch the recurred cases */
1114 g_ptr_array_add(hier,root_name);
1116 while(root_element->element_names->len) {
1117 curr_name = g_ptr_array_remove_index(root_element->element_names,0);
1119 if( ! g_hash_table_lookup(root_element->elements,curr_name) ) {
1120 xml_ns_t* new = make_xml_hier(curr_name, root_element, elements,hier,errors,hfs,etts,dtd_data->proto_name);
1121 g_hash_table_insert(root_element->elements,new->name,new);
1128 /* a flat namespace */
1129 g_ptr_array_add(hier,root_name);
1131 root_element->attributes = g_hash_table_new(g_str_hash,g_str_equal);
1133 while(root_element->element_names->len) {
1136 struct _attr_reg_data d;
1138 curr_name = g_ptr_array_remove_index(root_element->element_names,0);
1139 new = duplicate_element(g_hash_table_lookup(elements,curr_name));
1140 new->fqn = fully_qualified_name(hier, curr_name, root_name);
1142 add_xml_field(hfs, &(new->hf_tag), curr_name, new->fqn);
1143 add_xml_field(hfs, &(new->hf_cdata), curr_name, new->fqn);
1145 d.basename = new->fqn;
1148 g_hash_table_foreach(new->attributes,add_xml_attribute_names,&d);
1151 g_array_append_val(etts,ett_p);
1153 g_ptr_array_free(new->element_names,TRUE);
1155 g_hash_table_insert(root_element->elements,new->name,new);
1159 g_ptr_array_free(element_names,TRUE);
1161 g_ptr_array_free(hier,TRUE);
1164 * if we were given a proto_name the namespace will be registered
1165 * as an independent protocol.
1167 if( dtd_data->proto_name ) {
1170 if ( ! dtd_data->description) {
1171 dtd_data->description = g_strdup(root_name);
1174 ett_p = &root_element->ett;
1175 g_array_append_val(etts,ett_p);
1177 add_xml_field(hfs, &root_element->hf_cdata, root_element->name, root_element->fqn);
1179 root_element->hf_tag = proto_register_protocol(dtd_data->description, dtd_data->proto_name, dtd_data->proto_name);
1180 proto_register_field_array(root_element->hf_tag, (hf_register_info*)g_array_data(hfs), hfs->len);
1181 proto_register_subtree_array((gint**)g_array_data(etts), etts->len);
1183 if (dtd_data->media_type) {
1184 g_hash_table_insert(media_types,dtd_data->media_type,root_element);
1185 dtd_data->media_type = NULL;
1188 dtd_data->description = NULL;
1189 dtd_data->proto_name = NULL;
1190 g_array_free(hfs,FALSE);
1191 g_array_free(etts,TRUE);
1194 g_hash_table_insert(xml_ns.elements,root_element->name,root_element);
1196 g_hash_table_foreach_remove(elements,free_elements,NULL);
1197 g_hash_table_destroy(elements);
1199 destroy_dtd_data(dtd_data);
1203 # define DIRECTORY_T GDir
1204 # define FILE_T gchar
1205 # define OPENDIR_OP(name) g_dir_open(name, 0, dummy)
1206 # define DIRGETNEXT_OP(dir) g_dir_read_name(dir)
1207 # define GETFNAME_OP(file) (file);
1208 # define CLOSEDIR_OP(dir) g_dir_close(dir)
1210 static void init_xml_names(void) {
1211 xml_ns_t* xmlpi_xml_ns;
1215 const gchar* filename;
1218 GError** dummy = g_malloc(sizeof(GError *));
1221 xmpli_names = g_hash_table_new(g_str_hash,g_str_equal);
1222 media_types = g_hash_table_new(g_str_hash,g_str_equal);
1224 unknown_ns.elements = xml_ns.elements = g_hash_table_new(g_str_hash,g_str_equal);
1225 unknown_ns.attributes = xml_ns.attributes = g_hash_table_new(g_str_hash,g_str_equal);
1227 xmlpi_xml_ns = xml_new_namespace(xmpli_names,"xml","version","encoding","standalone",NULL);
1229 g_hash_table_destroy(xmlpi_xml_ns->elements);
1230 xmlpi_xml_ns->elements = NULL;
1233 dirname = get_persconffile_path("dtds", FALSE, FALSE);
1235 if (test_for_directory(dirname) != EISDIR) {
1236 /* Although dir isn't a directory it may still use memory */
1238 dirname = get_datafile_path("dtds");
1241 if (test_for_directory(dirname) == EISDIR) {
1243 if ((dir = OPENDIR_OP(dirname)) != NULL) {
1244 while ((file = DIRGETNEXT_OP(dir)) != NULL) {
1246 filename = GETFNAME_OP(file);
1248 namelen = (int)strlen(filename);
1249 if ( namelen > 4 && ( g_ascii_strcasecmp(filename+(namelen-4),".dtd") == 0 ) ) {
1250 GString* errors = g_string_new("");
1251 GString* preparsed = dtd_preparse(dirname, filename, errors);
1252 dtd_build_data_t* dtd_data;
1255 report_failure("Dtd Preparser in file %s%c%s: %s",dirname,G_DIR_SEPARATOR,filename,errors->str);
1259 dtd_data = dtd_parse(preparsed);
1261 g_string_free(preparsed,TRUE);
1263 if (dtd_data->error->len) {
1264 report_failure("Dtd Parser in file %s%c%s: %s",dirname,G_DIR_SEPARATOR,filename,dtd_data->error->str);
1265 destroy_dtd_data(dtd_data);
1269 register_dtd(dtd_data,errors);
1272 report_failure("Dtd Registration in file: %s%c%s: %s",dirname,G_DIR_SEPARATOR,filename,errors->str);
1273 g_string_free(errors,TRUE);
1285 for(i=0;i<array_length(default_media_types);i++) {
1286 if( ! g_hash_table_lookup(media_types,default_media_types[i]) ) {
1287 g_hash_table_insert(media_types,(gpointer)default_media_types[i],&xml_ns);
1291 g_hash_table_foreach(xmpli_names,add_xmlpi_namespace,"xml.xmlpi");
1296 static void range_delete_xml_tcp_callback(guint32 port) {
1297 dissector_delete_uint("tcp.port", port, xml_handle);
1300 static void range_add_xml_tcp_callback(guint32 port) {
1301 dissector_add_uint("tcp.port", port, xml_handle);
1304 static void apply_prefs(void) {
1305 if (pref_heuristic_media_save != pref_heuristic_media) {
1306 if (pref_heuristic_media) {
1307 heur_dissector_add("http", dissect_xml_heur, xml_ns.hf_tag);
1308 heur_dissector_add("sip", dissect_xml_heur, xml_ns.hf_tag);
1309 heur_dissector_add("media", dissect_xml_heur, xml_ns.hf_tag);
1310 pref_heuristic_media_save = TRUE;
1312 heur_dissector_delete("http", dissect_xml_heur, xml_ns.hf_tag);
1313 heur_dissector_delete("sip", dissect_xml_heur, xml_ns.hf_tag);
1314 heur_dissector_delete("media", dissect_xml_heur, xml_ns.hf_tag);
1315 pref_heuristic_media_save = FALSE;
1319 if (pref_heuristic_tcp_save != pref_heuristic_tcp ) {
1320 if (pref_heuristic_tcp) {
1321 heur_dissector_add("tcp", dissect_xml_heur, xml_ns.hf_tag);
1322 pref_heuristic_tcp_save = TRUE;
1324 heur_dissector_delete("tcp", dissect_xml_heur, xml_ns.hf_tag);
1325 pref_heuristic_tcp_save = FALSE;
1329 if (pref_heuristic_udp_save != pref_heuristic_udp ) {
1330 if (pref_heuristic_udp) {
1331 heur_dissector_add("udp", dissect_xml_heur, xml_ns.hf_tag);
1332 pref_heuristic_udp_save = TRUE;
1334 heur_dissector_delete("udp", dissect_xml_heur, xml_ns.hf_tag);
1335 pref_heuristic_udp_save = FALSE;
1339 range_foreach(xml_tcp_range, range_delete_xml_tcp_callback);
1340 g_free(xml_tcp_range);
1341 xml_tcp_range = range_copy(global_xml_tcp_range);
1342 range_foreach(xml_tcp_range, range_add_xml_tcp_callback);
1346 proto_register_xml(void) {
1347 static gint *ett_base[] = {
1354 static hf_register_info hf_base[] = {
1355 { &hf_xmlpi, {"XMLPI", "xml.xmlpi", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1356 { &hf_comment, {"Comment", "xml.comment", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1357 { &hf_unknowwn_attrib, {"Attribute", "xml.attribute", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1358 { &hf_doctype, {"Doctype", "xml.doctype", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1359 { &hf_dtd_tag, {"DTD Tag", "xml.dtdtag", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1360 { &unknown_ns.hf_cdata, {"CDATA", "xml.cdata", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1361 { &unknown_ns.hf_tag, {"Tag", "xml.tag", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1362 { &xml_ns.hf_cdata, {"Unknown", "xml.unknown", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }}
1364 module_t* xml_module;
1366 hf_arr = g_array_new(FALSE,FALSE,sizeof(hf_register_info));
1367 ett_arr = g_array_new(FALSE,FALSE,sizeof(gint*));
1369 g_array_append_vals(hf_arr,hf_base,array_length(hf_base));
1370 g_array_append_vals(ett_arr,ett_base,array_length(ett_base));
1374 xml_ns.hf_tag = proto_register_protocol("eXtensible Markup Language", "XML", xml_ns.name);
1376 proto_register_field_array(xml_ns.hf_tag, (hf_register_info*)g_array_data(hf_arr), hf_arr->len);
1377 proto_register_subtree_array((gint**)g_array_data(ett_arr), ett_arr->len);
1379 xml_module = prefs_register_protocol(xml_ns.hf_tag,apply_prefs);
1380 prefs_register_bool_preference(xml_module, "heuristic", "Use Heuristics for media types",
1381 "Try to recognize XML for unknown media types",
1382 &pref_heuristic_media);
1383 prefs_register_bool_preference(xml_module, "heuristic_tcp", "Use Heuristics for TCP",
1384 "Try to recognize XML for unknown TCP ports",
1385 &pref_heuristic_tcp);
1386 prefs_register_range_preference(xml_module, "tcp.port", "TCP Ports",
1388 &global_xml_tcp_range, 65535);
1389 prefs_register_bool_preference(xml_module, "heuristic_udp", "Use Heuristics for UDP",
1390 "Try to recognize XML for unknown UDP ports",
1391 &pref_heuristic_udp);
1392 prefs_register_bool_preference(xml_module, "heuristic_unicode", "Use Unicode in heuristics",
1393 "Try to recognize XML encoded in Unicode (UCS-2)",
1394 &pref_heuristic_unicode);
1396 g_array_free(hf_arr,FALSE);
1397 g_array_free(ett_arr,TRUE);
1399 register_dissector("xml", dissect_xml, xml_ns.hf_tag);
1403 xml_tcp_range = range_empty();
1408 static void add_dissector_media(gpointer k, gpointer v _U_, gpointer p _U_) {
1409 dissector_add_string("media_type", (gchar*)k, xml_handle);
1413 proto_reg_handoff_xml(void)
1416 xml_handle = find_dissector("xml");
1418 g_hash_table_foreach(media_types,add_dissector_media,NULL);