From Kovarththanan Rajaratnam via bug 3719:
[obnox/wireshark/wip.git] / epan / dissectors / packet-xml.c
1 /* packet-xml.c
2  * wireshark's xml dissector .
3  *
4  * (C) 2005, Luis E. Garcia Ontanon.
5  *
6  * $Id$
7  *
8  * Refer to the AUTHORS file or the AUTHORS section in the man page
9  * for contacting the author(s) of this file.
10  *
11  * Wireshark - Network traffic analyzer
12  * By Gerald Combs <gerald@wireshark.org>
13  * Copyright 1998 Gerald Combs
14  *
15  * This program is free software; you can redistribute it and/or
16  * modify it under the terms of the GNU General Public License
17  * as published by the Free Software Foundation; either version 2
18  * of the License, or (at your option) any later version.
19  *
20  * This program is distributed in the hope that it will be useful,
21  * but WITHOUT ANY WARRANTY; without even the implied warranty of
22  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  * GNU General Public License for more details.
24  *
25  * You should have received a copy of the GNU General Public License
26  * along with this program; if not, write to the Free Software
27  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
28  */
29
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33
34 #ifdef HAVE_DIRENT_H
35 #include <dirent.h>
36 #endif
37
38 #include <ctype.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <stdarg.h>
42 #include <errno.h>
43
44 #include <stdio.h>
45
46 #include <glib.h>
47
48 #include <wsutil/str_util.h>
49
50 #include <epan/emem.h>
51 #include <epan/packet.h>
52 #include <epan/tvbparse.h>
53 #include <epan/dtd.h>
54 #include <epan/report_err.h>
55 #include <epan/filesystem.h>
56 #include <epan/prefs.h>
57 #include <epan/garrayfix.h>
58
59 #include "packet-xml.h"
60
61 struct _attr_reg_data {
62         GArray* hf;
63         gchar* basename;
64 };
65
66
67 static gint ett_dtd = -1;
68 static gint ett_xmpli = -1;
69
70 static int hf_unknowwn_attrib = -1;
71 static int hf_comment = -1;
72 static int hf_xmlpi = -1;
73 static int hf_dtd_tag = -1;
74 static int hf_doctype = -1;
75
76 /* dissector handles */
77 static dissector_handle_t xml_handle;
78
79 /* parser definitions */
80 static tvbparse_wanted_t* want;
81 static tvbparse_wanted_t* want_ignore;
82 static tvbparse_wanted_t* want_heur;
83
84 static GHashTable* xmpli_names;
85 static GHashTable* media_types;
86
87 static xml_ns_t xml_ns = {"xml","/",-1,-1,-1,NULL,NULL,NULL};
88 static xml_ns_t unknown_ns = {"unknown","?",-1,-1,-1,NULL,NULL,NULL};
89 static xml_ns_t* root_ns;
90
91 static gboolean pref_heuristic_media = FALSE;
92 static gboolean pref_heuristic_tcp = FALSE;
93 static gboolean pref_heuristic_udp = FALSE;
94 static gboolean pref_heuristic_media_save = FALSE;
95 static gboolean pref_heuristic_tcp_save = FALSE;
96 static gboolean pref_heuristic_udp_save = FALSE;
97 static range_t *global_xml_tcp_range = NULL;
98 static range_t *xml_tcp_range = NULL;
99
100 #define XML_CDATA -1000
101 #define XML_SCOPED_NAME -1001
102
103
104 GArray* hf_arr;
105 GArray* ett_arr;
106
107 static const gchar* default_media_types[] = {
108         "text/xml",
109         "text/vnd.wap.wml",
110         "text/vnd.wap.si",
111         "text/vnd.wap.sl",
112         "text/vnd.wap.co",
113         "text/vnd.wap.emn",
114         "application/auth-policy+xml",
115         "application/cpim-pidf+xml",
116         "application/cpl+xml",
117         "application/mathml+xml",
118         "application/media_control+xml",
119         "application/note+xml",
120         "application/pidf+xml",
121         "application/poc-settings+xml",
122         "application/rdf+xml",
123         "application/reginfo+xml",
124         "application/resource-lists+xml",
125         "application/rlmi+xml",
126         "application/rls-services+xml",
127         "application/smil",
128         "application/simple-filter+xml",
129         "application/soap+xml",
130         "application/vnd.wv.csp+xml",
131         "application/vnd.wv.csp.xml",
132         "application/watcherinfo+xml",
133         "application/xcap-att+xml",
134         "application/xcap-caps+xml",
135         "application/xcap-diff+xml",
136         "application/xcap-el+xml",
137         "application/xcap-error+xml",
138         "application/xcap-ns+xml",
139         "application/xml",
140         "application/xml-dtd",
141         "application/xpidf+xml",
142         "application/xslt+xml",
143         "application/x-wms-logconnectstats",
144         "application/x-wms-logplaystats",
145         "application/x-wms-sendevent",
146         "application/rss+xml",   
147         "image/svg+xml",
148 };
149
150 static void insert_xml_frame(xml_frame_t *parent, xml_frame_t *new_child) {
151         new_child->firts_child = NULL;
152         new_child->last_child = NULL;
153
154         new_child->parent = parent;
155         new_child->next_sibling = NULL;
156         new_child->prev_sibling = NULL;
157         if (parent == NULL) return;  /* root */
158
159         if (parent->firts_child == NULL) {  /* the 1st child */
160                 parent->firts_child = new_child;
161         } else {  /* following children */
162                 parent->last_child->next_sibling = new_child;
163                 new_child->prev_sibling = parent->last_child;
164         }
165         parent->last_child = new_child;
166 }
167
168 static void
169 dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
170 {
171         tvbparse_t* tt;
172         tvbparse_elem_t* tok = NULL;
173         static GPtrArray* stack = NULL;
174         xml_frame_t* current_frame;
175         char* colinfo_str;
176         
177         if (stack != NULL)
178                 g_ptr_array_free(stack,TRUE);
179
180         stack = g_ptr_array_new();
181         current_frame = ep_alloc(sizeof(xml_frame_t));
182         current_frame->type = XML_FRAME_ROOT;
183         current_frame->name = NULL;
184         current_frame->name_orig_case = NULL;
185         current_frame->value = NULL;
186         insert_xml_frame(NULL, current_frame);
187         g_ptr_array_add(stack,current_frame);
188
189         tt = tvbparse_init(tvb,0,-1,stack,want_ignore);
190         current_frame->start_offset = 0;
191
192         root_ns = NULL;
193
194         if (pinfo->match_string)
195                 root_ns = g_hash_table_lookup(media_types,pinfo->match_string);
196
197         if (! root_ns ) {
198                 root_ns = &xml_ns;
199                 colinfo_str = "/XML";
200         } else {
201                 colinfo_str = ep_strdup_printf("/%s",root_ns->name);
202                 ascii_strup_inplace(colinfo_str);
203         }
204
205         if (check_col(pinfo->cinfo, COL_PROTOCOL))
206                 col_append_str(pinfo->cinfo, COL_PROTOCOL, colinfo_str);
207
208         current_frame->ns = root_ns;
209
210         current_frame->item = proto_tree_add_item(tree,current_frame->ns->hf_tag,tvb,0,-1,FALSE);
211         current_frame->tree = proto_item_add_subtree(current_frame->item,current_frame->ns->ett);
212         current_frame->last_item = current_frame->item;
213
214         while(( tok = tvbparse_get(tt, want) )) ;
215
216         pinfo->private_data = current_frame;  /* pass XML structure to the dissector calling XML */
217 }
218
219 static gboolean dissect_xml_heur(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) {
220         if ( (pref_heuristic_media || pref_heuristic_tcp || pref_heuristic_udp)
221              && tvbparse_peek(tvbparse_init(tvb,0,-1,NULL,want_ignore), want_heur)) {
222                 dissect_xml(tvb, pinfo, tree);
223                 return TRUE;
224         } else {
225                 return FALSE;
226         }
227 }
228
229 xml_frame_t *xml_get_tag(xml_frame_t *frame, const gchar *name) {
230         xml_frame_t *tag = NULL;
231
232         xml_frame_t *xml_item = frame->firts_child;
233         while (xml_item) {
234                 if ((xml_item->type == XML_FRAME_TAG)) {
235                         if (!name) {  /* get the 1st tag */
236                         tag = xml_item;
237                                 break;
238                         } else if (xml_item->name_orig_case && !strcmp(xml_item->name_orig_case, name)) {
239                         tag = xml_item;
240                                 break;
241                         }
242                 }
243                 xml_item = xml_item->next_sibling;
244         }
245
246         return tag;
247 }
248
249 xml_frame_t *xml_get_attrib(xml_frame_t *frame, const gchar *name) {
250         xml_frame_t *attr = NULL;
251
252         xml_frame_t *xml_item = frame->firts_child;
253         while (xml_item) {
254                 if ((xml_item->type == XML_FRAME_ATTRIB) && 
255                         xml_item->name_orig_case && !strcmp(xml_item->name_orig_case, name)) {
256                 attr = xml_item;
257                         break;
258                 }
259                 xml_item = xml_item->next_sibling;
260         }
261
262         return attr;
263 }
264
265 xml_frame_t *xml_get_cdata(xml_frame_t *frame) {
266         xml_frame_t *cdata = NULL;
267
268         xml_frame_t *xml_item = frame->firts_child;
269         while (xml_item) {
270                 if ((xml_item->type == XML_FRAME_CDATA)) {
271                 cdata = xml_item;
272                         break;
273                 }
274                 xml_item = xml_item->next_sibling;
275         }
276
277         return cdata;
278 }
279
280 static void after_token(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
281         GPtrArray* stack = tvbparse_data;
282         xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
283         int hfid;
284         gboolean is_cdata = FALSE;
285         proto_item* pi;
286         xml_frame_t* new_frame;
287
288         if (tok->id == XML_CDATA) {
289                 hfid = current_frame->ns ? current_frame->ns->hf_cdata : xml_ns.hf_cdata;
290                 is_cdata = TRUE;
291         } else if ( tok->id > 0) {
292                 hfid = tok->id;
293         } else {
294                 hfid = xml_ns.hf_cdata;
295         }
296
297         pi = proto_tree_add_item(current_frame->tree, hfid, tok->tvb, tok->offset, tok->len, FALSE);
298
299         proto_item_set_text(pi, "%s",
300                                                 tvb_format_text(tok->tvb,tok->offset,tok->len));
301
302         if (is_cdata) {
303                 new_frame = ep_alloc(sizeof(xml_frame_t));
304                 new_frame->type = XML_FRAME_CDATA;
305                 new_frame->name = NULL;
306                 new_frame->name_orig_case = NULL;
307                 new_frame->value = tvb_new_subset(tok->tvb, tok->offset, tok->len, tok->len);
308                 insert_xml_frame(current_frame, new_frame);
309                 new_frame->item = pi;
310                 new_frame->last_item = pi;
311                 new_frame->tree = NULL;
312                 new_frame->start_offset = tok->offset;
313                 new_frame->ns = NULL;
314         }
315 }
316
317 static void before_xmpli(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
318         GPtrArray* stack = tvbparse_data;
319         xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
320         proto_item* pi;
321         proto_tree* pt;
322         tvbparse_elem_t* name_tok = tok->sub->next;
323         gchar* name = tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len);
324         xml_ns_t* ns = g_hash_table_lookup(xmpli_names,name);
325         xml_frame_t* new_frame;
326
327         int hf_tag;
328         gint ett;
329
330         ascii_strdown_inplace(name);
331         if (!ns) {
332                 hf_tag = hf_xmlpi;
333                 ett = ett_xmpli;
334         } else {
335                 hf_tag = ns->hf_tag;
336                 ett = ns->ett;
337         }
338
339         pi = proto_tree_add_item(current_frame->tree,hf_tag,tok->tvb,tok->offset,tok->len,FALSE);
340
341         proto_item_set_text(pi, "%s", tvb_format_text(tok->tvb,tok->offset,(name_tok->offset - tok->offset) + name_tok->len));
342
343         pt = proto_item_add_subtree(pi,ett);
344
345         new_frame = ep_alloc(sizeof(xml_frame_t));
346         new_frame->type = XML_FRAME_XMPLI;
347         new_frame->name = name;
348         new_frame->name_orig_case = name;
349         new_frame->value = NULL;
350         insert_xml_frame(current_frame, new_frame);
351         new_frame->item = pi;
352         new_frame->last_item = pi;
353         new_frame->tree = pt;
354         new_frame->start_offset = tok->offset;
355         new_frame->ns = ns;
356
357         g_ptr_array_add(stack,new_frame);
358
359 }
360
361 static void after_xmlpi(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
362         GPtrArray* stack = tvbparse_data;
363         xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
364
365         proto_tree_add_text(current_frame->tree,
366                                                    tok->tvb, tok->offset, tok->len, "%s",
367                                                    tvb_format_text(tok->tvb,tok->offset,tok->len));
368
369         if (stack->len > 1) {
370                 g_ptr_array_remove_index_fast(stack,stack->len - 1);
371         } else {
372                 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened xmpli tag ]");
373         }
374 }
375
376 static void before_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
377         GPtrArray* stack = tvbparse_data;
378         xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
379         tvbparse_elem_t* name_tok = tok->sub->next;
380         gchar* root_name;
381         gchar *name = NULL, *name_orig_case = NULL;
382         xml_ns_t* ns;
383         xml_frame_t* new_frame;
384         proto_item* pi;
385         proto_tree* pt;
386
387         if (name_tok->sub->id == XML_SCOPED_NAME) {
388                 tvbparse_elem_t* root_tok = name_tok->sub->sub;
389                 tvbparse_elem_t* leaf_tok = name_tok->sub->sub->next->next;
390                 xml_ns_t* nameroot_ns;
391
392                 root_name = (gchar*)tvb_get_ephemeral_string(root_tok->tvb,root_tok->offset,root_tok->len);
393                 name = (gchar*)tvb_get_ephemeral_string(leaf_tok->tvb,leaf_tok->offset,leaf_tok->len);
394                 name_orig_case = name;
395
396                 nameroot_ns = g_hash_table_lookup(xml_ns.elements,root_name);
397
398                 if(nameroot_ns) {
399                         ns = g_hash_table_lookup(nameroot_ns->elements,name);
400                         if (!ns) {
401                                 ns = &unknown_ns;
402                         }
403                 } else {
404                         ns = &unknown_ns;
405                 }
406
407         } else {
408                 name = tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len);
409                 name_orig_case = ep_strdup(name);
410                 ascii_strdown_inplace(name);
411
412                 if(current_frame->ns) {
413                         ns = g_hash_table_lookup(current_frame->ns->elements,name);
414
415                         if (!ns) {
416                                 if (! ( ns = g_hash_table_lookup(root_ns->elements,name) ) ) {
417                                         ns = &unknown_ns;
418                                 }
419                         }
420                 } else {
421                         ns = &unknown_ns;
422                 }
423         }
424
425         pi = proto_tree_add_item(current_frame->tree,ns->hf_tag,tok->tvb,tok->offset,tok->len,FALSE);
426         proto_item_set_text(pi, "%s", tvb_format_text(tok->tvb,tok->offset,(name_tok->offset - tok->offset) + name_tok->len));
427
428         pt = proto_item_add_subtree(pi,ns->ett);
429
430         new_frame = ep_alloc(sizeof(xml_frame_t));
431         new_frame->type = XML_FRAME_TAG;
432         new_frame->name = name;
433         new_frame->name_orig_case = name_orig_case;
434         new_frame->value = NULL;
435         insert_xml_frame(current_frame, new_frame);
436         new_frame->item = pi;
437         new_frame->last_item = pi;
438         new_frame->tree = pt;
439         new_frame->start_offset = tok->offset;
440         new_frame->ns = ns;
441
442         g_ptr_array_add(stack,new_frame);
443
444 }
445
446 static void after_open_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok _U_) {
447         GPtrArray* stack = tvbparse_data;
448         xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
449
450         proto_item_append_text(current_frame->last_item,">");
451 }
452
453 static void after_closed_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
454         GPtrArray* stack = tvbparse_data;
455         xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
456
457         proto_item_append_text(current_frame->last_item,"/>");
458
459         if (stack->len > 1) {
460                 g_ptr_array_remove_index_fast(stack,stack->len - 1);
461         } else {
462                 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened tag ]");
463         }
464 }
465
466 static void after_untag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok){
467         GPtrArray* stack = tvbparse_data;
468         xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
469
470         proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
471
472         proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"%s",
473                                                 tvb_format_text(tok->tvb,tok->offset,tok->len));
474
475         if (stack->len > 1) {
476                 g_ptr_array_remove_index_fast(stack,stack->len - 1);
477         } else {
478                 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,
479                                                         "[ ERROR: Closing an unopened tag ]");
480         }
481 }
482
483 static void before_dtd_doctype(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok){
484         GPtrArray* stack = tvbparse_data;
485         xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
486         xml_frame_t* new_frame;
487         tvbparse_elem_t* name_tok = tok->sub->next->next->next->sub->sub;
488         proto_tree* dtd_item = proto_tree_add_item(current_frame->tree, hf_doctype,
489                                                                                            name_tok->tvb, name_tok->offset, name_tok->len, FALSE);
490
491         proto_item_set_text(dtd_item,"%s",tvb_format_text(tok->tvb,tok->offset,tok->len));
492
493         new_frame = ep_alloc(sizeof(xml_frame_t));
494         new_frame->type = XML_FRAME_DTD_DOCTYPE;
495         new_frame->name = (gchar*)tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len);
496         new_frame->name_orig_case = new_frame->name;
497         new_frame->value = NULL;
498         insert_xml_frame(current_frame, new_frame);
499         new_frame->item = dtd_item;
500         new_frame->last_item = dtd_item;
501         new_frame->tree = proto_item_add_subtree(dtd_item,ett_dtd);
502         new_frame->start_offset = tok->offset;
503         new_frame->ns = NULL;
504
505         g_ptr_array_add(stack,new_frame);
506 }
507
508 static void pop_stack(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok _U_) {
509         GPtrArray* stack = tvbparse_data;
510         xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
511
512         if (stack->len > 1) {
513                 g_ptr_array_remove_index_fast(stack,stack->len - 1);
514         } else {
515                 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,
516                                                         "[ ERROR: Closing an unopened tag ]");
517         }
518 }
519
520 static void after_dtd_close(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok){
521         GPtrArray* stack = tvbparse_data;
522         xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
523
524         proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"%s",
525                                                 tvb_format_text(tok->tvb,tok->offset,tok->len));
526         if (stack->len > 1) {
527                 g_ptr_array_remove_index_fast(stack,stack->len - 1);
528         } else {
529                 proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened tag ]");
530         }
531 }
532
533 static void get_attrib_value(void* tvbparse_data _U_, const void* wanted_data _U_, tvbparse_elem_t* tok) {
534         tok->data = tok->sub;
535 }
536
537 static void after_attrib(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
538         GPtrArray* stack = tvbparse_data;
539         xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
540         gchar *name = NULL, *name_orig_case = NULL;
541         tvbparse_elem_t* value;
542         tvbparse_elem_t* value_part = tok->sub->next->next->data;
543         int* hfidp;
544         int hfid;
545         proto_item* pi;
546         xml_frame_t* new_frame;
547
548         name = tvb_get_ephemeral_string(tok->sub->tvb,tok->sub->offset,tok->sub->len);
549         name_orig_case = ep_strdup(name);
550         ascii_strdown_inplace(name);
551
552         if(current_frame->ns && (hfidp = g_hash_table_lookup(current_frame->ns->attributes,name) )) {
553                 hfid = *hfidp;
554                 value = value_part;
555         } else {
556                 hfid = hf_unknowwn_attrib;
557                 value = tok;
558         }
559
560         pi = proto_tree_add_item(current_frame->tree,hfid,value->tvb,value->offset,value->len,FALSE);
561         proto_item_set_text(pi, "%s", tvb_format_text(tok->tvb,tok->offset,tok->len));
562
563         new_frame = ep_alloc(sizeof(xml_frame_t));
564         new_frame->type = XML_FRAME_ATTRIB;
565         new_frame->name = name;
566         new_frame->name_orig_case = name_orig_case;
567         new_frame->value = tvb_new_subset(value_part->tvb, value_part->offset, value_part->len, value_part->len);
568         insert_xml_frame(current_frame, new_frame);
569         new_frame->item = pi;
570         new_frame->last_item = pi;
571         new_frame->tree = NULL;
572         new_frame->start_offset = tok->offset;
573         new_frame->ns = NULL;
574
575 }
576
577 static void unrecognized_token(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok _U_){
578         GPtrArray* stack = tvbparse_data;
579         xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
580
581         proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Unrecognized text ]");
582
583 }
584
585
586
587 static void init_xml_parser(void) {
588         tvbparse_wanted_t* want_name = tvbparse_chars(-1,1,0,"abcdefghijklmnopqrstuvwxyz-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",NULL,NULL,NULL);
589         tvbparse_wanted_t* want_attr_name = tvbparse_chars(-1,1,0,"abcdefghijklmnopqrstuvwxyz-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:",NULL,NULL,NULL);
590
591         tvbparse_wanted_t* want_scoped_name = tvbparse_set_seq(XML_SCOPED_NAME, NULL, NULL, NULL,
592                                                                want_name,
593                                                                tvbparse_char(-1,":",NULL,NULL,NULL),
594                                                                want_name,
595                                                                NULL);
596
597         tvbparse_wanted_t* want_tag_name = tvbparse_set_oneof(0, NULL, NULL, NULL,
598                                                               want_scoped_name,
599                                                               want_name,
600                                                               NULL);
601
602         tvbparse_wanted_t* want_attrib_value = tvbparse_set_oneof(0, NULL, NULL, get_attrib_value,
603                                                                   tvbparse_quoted(-1, NULL, NULL, tvbparse_shrink_token_cb,'\"','\\'),
604                                                                   tvbparse_quoted(-1, NULL, NULL, tvbparse_shrink_token_cb,'\'','\\'),
605                                                                   tvbparse_chars(-1,1,0,"0123456789",NULL,NULL,NULL),
606                                                                   want_name,
607                                                                   NULL);
608
609         tvbparse_wanted_t* want_attributes = tvbparse_one_or_more(-1, NULL, NULL, NULL,
610                                                                   tvbparse_set_seq(-1, NULL, NULL, after_attrib,
611                                                                                    want_attr_name,
612                                                                                    tvbparse_char(-1,"=",NULL,NULL,NULL),
613                                                                                    want_attrib_value,
614                                                                                    NULL));
615
616         tvbparse_wanted_t* want_stoptag = tvbparse_set_oneof(-1,NULL,NULL,NULL,
617                                                              tvbparse_char(-1, ">", NULL, NULL, after_open_tag),
618                                                              tvbparse_string(-1, "/>", NULL, NULL, after_closed_tag),
619                                                              NULL);
620
621         tvbparse_wanted_t* want_stopxmlpi = tvbparse_string(-1,"?>",NULL,NULL,after_xmlpi);
622
623         tvbparse_wanted_t* want_comment = tvbparse_set_seq(hf_comment,NULL,NULL,after_token,
624                                                            tvbparse_string(-1,"<!--",NULL,NULL,NULL),
625                                                            tvbparse_until(-1,NULL,NULL,NULL,
626                                                                           tvbparse_string(-1,"-->",NULL,NULL,NULL),
627                                                                           TP_UNTIL_INCLUDE),
628                                                            NULL);
629
630         tvbparse_wanted_t* want_xmlpi = tvbparse_set_seq(hf_xmlpi,NULL,before_xmpli,NULL,
631                                                          tvbparse_string(-1,"<?",NULL,NULL,NULL),
632                                                          want_name,
633                                                          tvbparse_set_oneof(-1,NULL,NULL,NULL,
634                                                                             want_stopxmlpi,
635                                                                             tvbparse_set_seq(-1,NULL,NULL,NULL,
636                                                                                              want_attributes,
637                                                                                              want_stopxmlpi,
638                                                                                              NULL),
639                                                                             NULL),
640                                                          NULL);
641
642         tvbparse_wanted_t* want_closing_tag = tvbparse_set_seq(0,NULL,NULL,after_untag,
643                                                                tvbparse_char(-1, "<", NULL, NULL, NULL),
644                                                                tvbparse_char(-1, "/", NULL, NULL, NULL),
645                                                                want_tag_name,
646                                                                tvbparse_char(-1, ">", NULL, NULL, NULL),
647                                                                NULL);
648
649         tvbparse_wanted_t* want_doctype_start = tvbparse_set_seq(-1,NULL,before_dtd_doctype,NULL,
650                                                                  tvbparse_char(-1,"<",NULL,NULL,NULL),
651                                                                  tvbparse_char(-1,"!",NULL,NULL,NULL),
652                                                                  tvbparse_casestring(-1,"DOCTYPE",NULL,NULL,NULL),
653                                                                  tvbparse_set_oneof(-1,NULL,NULL,NULL,
654                                                                                     tvbparse_set_seq(-1,NULL,NULL,NULL,
655                                                                                                      want_name,
656                                                                                                      tvbparse_char(-1,"[",NULL,NULL,NULL),
657                                                                                                      NULL),
658                                                                                     tvbparse_set_seq(-1,NULL,NULL,pop_stack,
659                                                                                                      want_name,
660                                                                                                      tvbparse_set_oneof(-1,NULL,NULL,NULL,
661                                                                                                                         tvbparse_casestring(-1,"PUBLIC",NULL,NULL,NULL),
662                                                                                                                         tvbparse_casestring(-1,"SYSTEM",NULL,NULL,NULL),
663                                                                                                                         NULL),
664                                                                                                      tvbparse_until(-1,NULL,NULL,NULL,
665                                                                                                                     tvbparse_char(-1,">",NULL,NULL,NULL),
666                                                                                                                     TP_UNTIL_INCLUDE),
667                                                                                                      NULL),
668                                                                                     NULL),
669                                                                  NULL);
670
671         tvbparse_wanted_t* want_dtd_tag = tvbparse_set_seq(hf_dtd_tag,NULL,NULL,after_token,
672                                                            tvbparse_char(-1,"<",NULL,NULL,NULL),
673                                                            tvbparse_char(-1,"!",NULL,NULL,NULL),
674                                                            tvbparse_until(-1,NULL,NULL,NULL,
675                                                                           tvbparse_char(-1, ">", NULL, NULL, NULL),
676                                                                           TP_UNTIL_INCLUDE),
677                                                            NULL);
678
679         tvbparse_wanted_t* want_tag = tvbparse_set_seq(-1, NULL, before_tag, NULL,
680                                                        tvbparse_char(-1,"<",NULL,NULL,NULL),
681                                                        want_tag_name,
682                                                        tvbparse_set_oneof(-1,NULL,NULL,NULL,
683                                                                           tvbparse_set_seq(-1,NULL,NULL,NULL,
684                                                                                            want_attributes,
685                                                                                            want_stoptag,
686                                                                                            NULL),
687                                                                           want_stoptag,
688                                                                           NULL),
689                                                        NULL);
690
691         tvbparse_wanted_t* want_dtd_close = tvbparse_set_seq(-1,NULL,NULL,after_dtd_close,
692                                                              tvbparse_char(-1,"]",NULL,NULL,NULL),
693                                                              tvbparse_char(-1,">",NULL,NULL,NULL),
694                                                              NULL);
695
696         want_ignore = tvbparse_chars(-1,1,0," \t\r\n",NULL,NULL,NULL);
697
698
699         want = tvbparse_set_oneof(-1, NULL, NULL, NULL,
700                                   want_comment,
701                                   want_xmlpi,
702                                   want_closing_tag,
703                                   want_doctype_start,
704                                   want_dtd_close,
705                                   want_dtd_tag,
706                                   want_tag,
707                                   tvbparse_not_chars(XML_CDATA,1,0,"<",NULL,NULL,after_token),
708                                   tvbparse_not_chars(-1,1,0," \t\r\n",NULL,NULL,unrecognized_token),
709                                   NULL);
710
711         want_heur = tvbparse_set_oneof(-1, NULL, NULL, NULL,
712                                        want_comment,
713                                        want_xmlpi,
714                                        want_doctype_start,
715                                        want_dtd_tag,
716                                        want_tag,
717                                        NULL);
718
719 }
720
721
722 static xml_ns_t* xml_new_namespace(GHashTable* hash, gchar* name, ...) {
723         xml_ns_t* ns = g_malloc(sizeof(xml_ns_t));
724         va_list ap;
725         gchar* attr_name;
726
727         ns->name = g_strdup(name);
728         ns->hf_tag = -1;
729         ns->hf_cdata = -1;
730         ns->ett = -1;
731         ns->attributes = g_hash_table_new(g_str_hash,g_str_equal);
732         ns->elements = g_hash_table_new(g_str_hash,g_str_equal);
733
734         va_start(ap,name);
735
736         while(( attr_name = va_arg(ap,gchar*) )) {
737                 int* hfp = g_malloc(sizeof(int));
738                 *hfp = -1;
739                 g_hash_table_insert(ns->attributes,g_strdup(attr_name),hfp);
740         };
741
742         va_end(ap);
743
744         g_hash_table_insert(hash,ns->name,ns);
745
746         return ns;
747 }
748
749
750 static void add_xml_field(GArray* hfs, int* p_id, gchar* name, gchar* fqn) {
751         hf_register_info hfri;
752
753         hfri.p_id = p_id;
754         hfri.hfinfo.name = name;
755         hfri.hfinfo.abbrev = fqn;
756         hfri.hfinfo.type = FT_STRING;
757         hfri.hfinfo.display = BASE_NONE;
758         hfri.hfinfo.strings = NULL;
759         hfri.hfinfo.bitmask = 0x0;
760         hfri.hfinfo.blurb = NULL;
761         hfri.hfinfo.id = 0;
762         hfri.hfinfo.parent = 0;
763         hfri.hfinfo.ref_count = HF_REF_TYPE_NONE;
764         hfri.hfinfo.bitshift = 0;
765         hfri.hfinfo.same_name_next = NULL;
766         hfri.hfinfo.same_name_prev = NULL;
767
768         g_array_append_val(hfs,hfri);
769 }
770
771 static void add_xml_attribute_names(gpointer k, gpointer v, gpointer p) {
772         struct _attr_reg_data* d = p;
773         gchar* basename = g_strdup_printf("%s.%s",d->basename,(gchar*)k);
774         add_xml_field(d->hf, (int*) v, (gchar*)k, basename);
775 }
776
777
778 static void add_xmlpi_namespace(gpointer k _U_, gpointer v, gpointer p) {
779         xml_ns_t* ns = v;
780         gchar* basename = g_strdup_printf("%s.%s",(gchar*)p,ns->name);
781         gint* ett_p = &(ns->ett);
782         struct _attr_reg_data d;
783
784         add_xml_field(hf_arr, &(ns->hf_tag), basename, basename);
785
786         g_array_append_val(ett_arr,ett_p);
787
788         d.basename = basename;
789         d.hf = hf_arr;
790
791         g_hash_table_foreach(ns->attributes,add_xml_attribute_names,&d);
792
793 }
794
795 static void destroy_dtd_data(dtd_build_data_t* dtd_data) {
796         g_free(dtd_data->proto_name);
797         g_free(dtd_data->media_type);
798         g_free(dtd_data->description);
799         g_free(dtd_data->proto_root);
800
801         g_string_free(dtd_data->error,TRUE);
802
803         while(dtd_data->elements->len) {
804                 dtd_named_list_t* nl = g_ptr_array_remove_index_fast(dtd_data->elements,0);
805                 g_ptr_array_free(nl->list,TRUE);
806                 g_free(nl);
807         }
808
809         g_ptr_array_free(dtd_data->elements,TRUE);
810
811         while(dtd_data->attributes->len) {
812                 dtd_named_list_t* nl = g_ptr_array_remove_index_fast(dtd_data->attributes,0);
813                 g_ptr_array_free(nl->list,TRUE);
814                 g_free(nl);
815         }
816
817         g_ptr_array_free(dtd_data->attributes,TRUE);
818
819         g_free(dtd_data);
820 }
821
822 static void copy_attrib_item(gpointer k, gpointer v _U_, gpointer p) {
823         gchar* key = g_strdup(k);
824         int* value = g_malloc(sizeof(int));
825         GHashTable* dst = p;
826
827         *value = -1;
828         g_hash_table_insert(dst,key,value);
829
830 }
831
832 static GHashTable* copy_attributes_hash(GHashTable* src) {
833         GHashTable* dst = g_hash_table_new(g_str_hash,g_str_equal);
834
835         g_hash_table_foreach(src,copy_attrib_item,dst);
836
837         return dst;
838 }
839
840 static xml_ns_t* duplicate_element(xml_ns_t* orig) {
841         xml_ns_t* new_item  = g_malloc(sizeof(xml_ns_t));
842         guint i;
843
844         new_item->name = g_strdup(orig->name);
845         new_item->hf_tag = -1;
846         new_item->hf_cdata = -1;
847         new_item->ett = -1;
848         new_item->attributes = copy_attributes_hash(orig->attributes);
849         new_item->elements =  g_hash_table_new(g_str_hash,g_str_equal);
850         new_item->element_names = g_ptr_array_new();
851
852         for(i=0; i < orig->element_names->len; i++) {
853                 g_ptr_array_add(new_item->element_names,
854                                                    g_ptr_array_index(orig->element_names,i));
855         }
856
857         return new_item;
858 }
859
860 static gchar* fully_qualified_name(GPtrArray* hier, gchar* name, gchar* proto_name) {
861         guint i;
862         GString* s = g_string_new(proto_name);
863         gchar* str;
864         g_string_append(s,".");
865         
866         for (i = 1; i < hier->len; i++) {
867                 g_string_append_printf(s, "%s.",(gchar*)g_ptr_array_index(hier,i));
868         }
869
870         g_string_append(s,name);
871         str = s->str;
872         g_string_free(s,FALSE);
873
874         return str;
875 }
876
877
878 static xml_ns_t* make_xml_hier(gchar* elem_name,
879                                xml_ns_t* root,
880                                GHashTable* elements,
881                                GPtrArray* hier,
882                                GString* error,
883                                GArray* hfs,
884                                GArray* etts,
885                                char* proto_name) {
886         xml_ns_t* new;
887         xml_ns_t* orig;
888         gchar* fqn;
889         gint* ett_p;
890         struct _attr_reg_data d;
891         gboolean recurred = FALSE;
892         guint i;
893
894         if ( g_str_equal(elem_name,root->name) ) {
895                 return NULL;
896         }
897
898         if (! ( orig = g_hash_table_lookup(elements,elem_name) )) {
899                 g_string_append_printf(error,"element '%s' is not defined\n", elem_name);
900                 return NULL;
901         }
902
903         for (i = 0; i < hier->len; i++) {
904                 if( strcmp(elem_name,(gchar*) g_ptr_array_index(hier,i) ) == 0 ) {
905                         recurred = TRUE;
906                 }
907         }
908
909         if (recurred) {
910                 return NULL;
911         }
912
913         fqn = fully_qualified_name(hier,elem_name,proto_name);
914
915         new = duplicate_element(orig);
916         new->fqn = fqn;
917
918         add_xml_field(hfs, &(new->hf_tag), g_strdup(elem_name), fqn);
919         add_xml_field(hfs, &(new->hf_cdata), g_strdup(elem_name), fqn);
920
921         ett_p = &new->ett;
922         g_array_append_val(etts,ett_p);
923
924         d.basename = fqn;
925         d.hf = hfs;
926
927         g_hash_table_foreach(new->attributes,add_xml_attribute_names,&d);
928
929         while(new->element_names->len) {
930                 gchar* child_name = g_ptr_array_remove_index(new->element_names,0);
931                 xml_ns_t* child_element = NULL;
932
933                 g_ptr_array_add(hier,elem_name);
934                 child_element = make_xml_hier(child_name, root, elements, hier,error,hfs,etts,proto_name);
935                 g_ptr_array_remove_index_fast(hier,hier->len - 1);
936
937                 if (child_element) {
938                         g_hash_table_insert(new->elements,child_element->name,child_element);
939                 }
940         }
941
942         g_ptr_array_free(new->element_names,TRUE);
943         new->element_names = NULL;
944         return new;
945 }
946
947 static gboolean free_both(gpointer k, gpointer v, gpointer p _U_) {
948         g_free(k);
949         g_free(v);
950         return TRUE;
951 }
952
953 static gboolean free_elements(gpointer k _U_, gpointer v, gpointer p _U_) {
954         xml_ns_t* e = v;
955         g_free(e->name);
956         g_hash_table_foreach_remove(e->attributes,free_both,NULL);
957         g_hash_table_destroy(e->attributes);
958         g_hash_table_destroy(e->elements);
959
960         while (e->element_names->len) {
961                 g_free(g_ptr_array_remove_index(e->element_names,0));
962         }
963
964         g_ptr_array_free(e->element_names,TRUE);
965         g_free(e);
966
967         return TRUE;
968 }
969
970 static void register_dtd(dtd_build_data_t* dtd_data, GString* errors) {
971         GHashTable* elements = g_hash_table_new(g_str_hash,g_str_equal);
972         gchar* root_name = NULL;
973         xml_ns_t* root_element = NULL;
974         GArray* hfs;
975         GArray* etts;
976         GPtrArray* hier;
977         gchar* curr_name;
978         GPtrArray* element_names = g_ptr_array_new();
979
980         /* we first populate elements with the those coming from the parser */
981         while(dtd_data->elements->len) {
982                 dtd_named_list_t* nl = g_ptr_array_remove_index(dtd_data->elements,0);
983                 xml_ns_t* element = g_malloc(sizeof(xml_ns_t));
984
985                 /* we will use the first element found as root in case no other one was given. */
986                 if (root_name == NULL)
987                         root_name = g_strdup(nl->name);
988
989                 element->name = nl->name;
990                 element->element_names = nl->list;
991                 element->hf_tag = -1;
992                 element->hf_cdata = -1;
993                 element->ett = -1;
994                 element->attributes = g_hash_table_new(g_str_hash,g_str_equal);
995                 element->elements = g_hash_table_new(g_str_hash,g_str_equal);
996
997                 if( g_hash_table_lookup(elements,element->name) ) {
998                         g_string_append_printf(errors,"element %s defined more than once\n", element->name);
999                         free_elements(NULL,element,NULL);
1000                 } else {
1001                         g_hash_table_insert(elements,element->name,element);
1002                         g_ptr_array_add(element_names,g_strdup(element->name));
1003                 }
1004
1005                 g_free(nl);
1006         }
1007
1008         /* then we add the attributes to its relative elements */
1009         while(dtd_data->attributes->len) {
1010                 dtd_named_list_t* nl = g_ptr_array_remove_index(dtd_data->attributes,0);
1011                 xml_ns_t* element = g_hash_table_lookup(elements,nl->name);
1012
1013                 if (!element) {
1014                         g_string_append_printf(errors,"element %s is not defined\n", nl->name);
1015
1016                         goto next_attribute;
1017                 }
1018
1019                 while(nl->list->len) {
1020                         gchar* name = g_ptr_array_remove_index(nl->list,0);
1021                         int* id_p = g_malloc(sizeof(int));
1022
1023                         *id_p = -1;
1024                         g_hash_table_insert(element->attributes,name,id_p);
1025                 }
1026
1027 next_attribute:
1028                 g_free(nl->name);
1029                 g_ptr_array_free(nl->list,TRUE);
1030                 g_free(nl);
1031         }
1032
1033         /* if a proto_root is defined in the dtd we'll use that as root */
1034         if( dtd_data->proto_root ) {
1035                 g_free(root_name);
1036                 root_name = g_strdup(dtd_data->proto_root);
1037         }
1038
1039         /* we use a stack with the names to avoid recurring infinitelly */
1040         hier = g_ptr_array_new();
1041
1042         /*
1043          * if a proto name was given in the dtd the dtd will be used as a protocol
1044          * or else the dtd will be loaded as a branch of the xml namespace
1045          */
1046         if( ! dtd_data->proto_name ) {
1047                 hfs = hf_arr;
1048                 etts = ett_arr;
1049                 g_ptr_array_add(hier,g_strdup("xml"));
1050                 root_element = &xml_ns;
1051         } else {
1052                 /*
1053                  * if we were given a proto_name the namespace will be registered
1054                  * as an independent protocol with its own hf and ett arrays.
1055                  */
1056                 hfs = g_array_new(FALSE,FALSE,sizeof(hf_register_info));
1057                 etts = g_array_new(FALSE,FALSE,sizeof(gint*));
1058         }
1059
1060         /* the root element of the dtd's namespace */
1061         root_element = g_malloc(sizeof(xml_ns_t));
1062         root_element->name = g_strdup(root_name);
1063         root_element->fqn = dtd_data->proto_name ? g_strdup(dtd_data->proto_name) : root_element->name;
1064         root_element->hf_tag = -1;
1065         root_element->hf_cdata = -1;
1066         root_element->ett = -1;
1067         root_element->elements = g_hash_table_new(g_str_hash,g_str_equal);
1068         root_element->element_names = element_names;
1069
1070         /*
1071          * we can either create a namespace as a flat namespace
1072          * in which all the elements are at the root level
1073          * or we can create a recursive namespace
1074          */
1075         if (dtd_data->recursion) {
1076                 xml_ns_t* orig_root;
1077
1078                 make_xml_hier(root_name, root_element, elements,hier,errors,hfs,etts,dtd_data->proto_name);
1079
1080                 g_hash_table_insert(root_element->elements,root_element->name,root_element);
1081
1082                 orig_root = g_hash_table_lookup(elements,root_name);
1083
1084                 /* if the root element was defined copy its attrlist to the child */
1085                 if(orig_root) {
1086                         struct _attr_reg_data d;
1087
1088                         d.basename = dtd_data->proto_name;
1089                         d.hf = hfs;
1090
1091                         root_element->attributes = copy_attributes_hash(orig_root->attributes);
1092                         g_hash_table_foreach(root_element->attributes,add_xml_attribute_names,&d);
1093                 } else {
1094                         root_element->attributes = g_hash_table_new(g_str_hash,g_str_equal);
1095                 }
1096
1097                 /* we then create all the sub hierachies to catch the recurred cases */
1098                 g_ptr_array_add(hier,root_name);
1099
1100                 while(root_element->element_names->len) {
1101                         curr_name = g_ptr_array_remove_index(root_element->element_names,0);
1102
1103                         if( ! g_hash_table_lookup(root_element->elements,curr_name) ) {
1104                                 xml_ns_t* new = make_xml_hier(curr_name, root_element, elements,hier,errors,hfs,etts,dtd_data->proto_name);
1105                                 g_hash_table_insert(root_element->elements,new->name,new);
1106                         }
1107
1108                         g_free(curr_name);
1109                 }
1110
1111         } else {
1112                 /* a flat namespace */
1113                 g_ptr_array_add(hier,root_name);
1114
1115                 root_element->attributes = g_hash_table_new(g_str_hash,g_str_equal);
1116
1117                 while(root_element->element_names->len) {
1118                         xml_ns_t* new;
1119                         gint* ett_p;
1120                         struct _attr_reg_data d;
1121
1122                         curr_name = g_ptr_array_remove_index(root_element->element_names,0);
1123                         new = duplicate_element(g_hash_table_lookup(elements,curr_name));
1124                         new->fqn = fully_qualified_name(hier, curr_name, root_name);
1125
1126                         add_xml_field(hfs, &(new->hf_tag), curr_name, new->fqn);
1127                         add_xml_field(hfs, &(new->hf_cdata), curr_name, new->fqn);
1128
1129                         d.basename = new->fqn;
1130                         d.hf = hfs;
1131
1132                         g_hash_table_foreach(new->attributes,add_xml_attribute_names,&d);
1133
1134                         ett_p = &new->ett;
1135                         g_array_append_val(etts,ett_p);
1136
1137                         g_ptr_array_free(new->element_names,TRUE);
1138
1139                         g_hash_table_insert(root_element->elements,new->name,new);
1140                 }
1141         }
1142
1143         g_ptr_array_free(element_names,TRUE);
1144
1145         g_ptr_array_free(hier,TRUE);
1146
1147         /*
1148          * if we were given a proto_name the namespace will be registered
1149          * as an independent protocol.
1150          */
1151         if( dtd_data->proto_name ) {
1152                 gint* ett_p;
1153
1154                 if ( ! dtd_data->description) {
1155                         dtd_data->description = g_strdup(root_name);
1156                 }
1157
1158                 ett_p = &root_element->ett;
1159                 g_array_append_val(etts,ett_p);
1160
1161                 add_xml_field(hfs, &root_element->hf_cdata, root_element->name, root_element->fqn);
1162
1163                 root_element->hf_tag = proto_register_protocol(dtd_data->description, dtd_data->proto_name, dtd_data->proto_name);
1164                 proto_register_field_array(root_element->hf_tag, (hf_register_info*)g_array_data(hfs), hfs->len);
1165                 proto_register_subtree_array((gint**)g_array_data(etts), etts->len);
1166
1167                 if (dtd_data->media_type) {
1168                         g_hash_table_insert(media_types,dtd_data->media_type,root_element);
1169                         dtd_data->media_type = NULL;
1170                 }
1171
1172                 dtd_data->description = NULL;
1173                 dtd_data->proto_name = NULL;
1174                 g_array_free(hfs,FALSE);
1175                 g_array_free(etts,TRUE);
1176         }
1177
1178         g_hash_table_insert(xml_ns.elements,root_element->name,root_element);
1179
1180         g_hash_table_foreach_remove(elements,free_elements,NULL);
1181         g_hash_table_destroy(elements);
1182
1183         destroy_dtd_data(dtd_data);
1184         g_free(root_name);
1185 }
1186
1187 #  define DIRECTORY_T GDir
1188 #  define FILE_T gchar
1189 #  define OPENDIR_OP(name) g_dir_open(name, 0, dummy)
1190 #  define DIRGETNEXT_OP(dir) g_dir_read_name(dir)
1191 #  define GETFNAME_OP(file) (file);
1192 #  define CLOSEDIR_OP(dir) g_dir_close(dir)
1193
1194 static void init_xml_names(void) {
1195         xml_ns_t* xmlpi_xml_ns;
1196         guint i;
1197         DIRECTORY_T* dir;
1198         const FILE_T* file;
1199         const gchar* filename;
1200         gchar* dirname;
1201
1202         GError** dummy = g_malloc(sizeof(GError *));
1203         *dummy = NULL;
1204
1205         xmpli_names = g_hash_table_new(g_str_hash,g_str_equal);
1206         media_types = g_hash_table_new(g_str_hash,g_str_equal);
1207
1208         unknown_ns.elements = xml_ns.elements = g_hash_table_new(g_str_hash,g_str_equal);
1209         unknown_ns.attributes = xml_ns.attributes = g_hash_table_new(g_str_hash,g_str_equal);
1210
1211         xmlpi_xml_ns = xml_new_namespace(xmpli_names,"xml","version","encoding","standalone",NULL);
1212
1213         g_hash_table_destroy(xmlpi_xml_ns->elements);
1214         xmlpi_xml_ns->elements = NULL;
1215
1216
1217         dirname = get_persconffile_path("dtds", FALSE, FALSE);
1218
1219         if (test_for_directory(dirname) != EISDIR) {
1220                 /* Although dir isn't a directory it may still use memory */
1221                 g_free(dirname);
1222                 dirname = get_datafile_path("dtds");
1223         }
1224
1225         if (test_for_directory(dirname) == EISDIR) {
1226
1227                 if ((dir = OPENDIR_OP(dirname)) != NULL) {
1228                         while ((file = DIRGETNEXT_OP(dir)) != NULL) {
1229                                 guint namelen;
1230                                 filename = GETFNAME_OP(file);
1231
1232                                 namelen = (int)strlen(filename);
1233                                 if ( namelen > 4 && ( g_ascii_strcasecmp(filename+(namelen-4),".dtd")  == 0 ) ) {
1234                                         GString* errors = g_string_new("");
1235                                         GString* preparsed = dtd_preparse(dirname, filename, errors);
1236                                         dtd_build_data_t* dtd_data;
1237
1238                                         if (errors->len) {
1239                                                 report_failure("Dtd Preparser in file %s%c%s: %s",dirname,G_DIR_SEPARATOR,filename,errors->str);
1240                                                 continue;
1241                                         }
1242
1243                                         dtd_data = dtd_parse(preparsed);
1244
1245                                         g_string_free(preparsed,TRUE);
1246
1247                                         if (dtd_data->error->len) {
1248                                                 report_failure("Dtd Parser in file %s%c%s: %s",dirname,G_DIR_SEPARATOR,filename,dtd_data->error->str);
1249                                                 destroy_dtd_data(dtd_data);
1250                                                 continue;
1251                                         }
1252
1253                                         register_dtd(dtd_data,errors);
1254
1255                                         if (errors->len) {
1256                                                 report_failure("Dtd Registration in file: %s%c%s: %s",dirname,G_DIR_SEPARATOR,filename,errors->str);
1257                                                 g_string_free(errors,TRUE);
1258                                                 continue;
1259                                         }
1260                                 }
1261                         }
1262
1263                         CLOSEDIR_OP(dir);
1264                 }
1265         }
1266
1267         g_free(dirname);
1268
1269         for(i=0;i<array_length(default_media_types);i++) {
1270                 if( ! g_hash_table_lookup(media_types,default_media_types[i]) ) {
1271                         g_hash_table_insert(media_types,(gpointer)default_media_types[i],&xml_ns);
1272                 }
1273         }
1274
1275         g_hash_table_foreach(xmpli_names,add_xmlpi_namespace,"xml.xmlpi");
1276
1277         g_free(dummy);
1278 }
1279
1280 static void range_delete_xml_tcp_callback(guint32 port) {
1281         dissector_delete("tcp.port", port, xml_handle);
1282 }
1283
1284 static void range_add_xml_tcp_callback(guint32 port) {
1285         dissector_add("tcp.port", port, xml_handle);
1286 }
1287
1288 static void apply_prefs(void) {
1289         if (pref_heuristic_media_save != pref_heuristic_media) {
1290                 if (pref_heuristic_media) {
1291                         heur_dissector_add("http", dissect_xml_heur, xml_ns.hf_tag);
1292                         heur_dissector_add("sip", dissect_xml_heur, xml_ns.hf_tag);
1293                         heur_dissector_add("media", dissect_xml_heur, xml_ns.hf_tag);
1294                         pref_heuristic_media_save = TRUE;
1295                 } else {
1296                         heur_dissector_delete("http", dissect_xml_heur, xml_ns.hf_tag);
1297                         heur_dissector_delete("sip", dissect_xml_heur, xml_ns.hf_tag);
1298                         heur_dissector_delete("media", dissect_xml_heur, xml_ns.hf_tag);
1299                         pref_heuristic_media_save = FALSE;
1300                 }
1301         }
1302         
1303         if (pref_heuristic_tcp_save != pref_heuristic_tcp ) {
1304                 if (pref_heuristic_tcp) {
1305                         heur_dissector_add("tcp", dissect_xml_heur, xml_ns.hf_tag);
1306                         pref_heuristic_tcp_save = TRUE;
1307                 } else {
1308                         heur_dissector_delete("tcp", dissect_xml_heur, xml_ns.hf_tag);
1309                         pref_heuristic_tcp_save = FALSE;
1310                 }
1311         }
1312         
1313         if (pref_heuristic_udp_save != pref_heuristic_udp ) {
1314                 if (pref_heuristic_udp) {
1315                         heur_dissector_add("udp", dissect_xml_heur, xml_ns.hf_tag);
1316                         pref_heuristic_udp_save = TRUE;
1317                 } else {
1318                         heur_dissector_delete("udp", dissect_xml_heur, xml_ns.hf_tag);
1319                         pref_heuristic_udp_save = FALSE;
1320                 }
1321         }
1322
1323         range_foreach(xml_tcp_range, range_delete_xml_tcp_callback);
1324         g_free(xml_tcp_range);
1325         xml_tcp_range = range_copy(global_xml_tcp_range);
1326         range_foreach(xml_tcp_range, range_add_xml_tcp_callback);       
1327 }
1328
1329 void
1330 proto_register_xml(void) {
1331         static gint *ett_base[] = {
1332                 &unknown_ns.ett,
1333                 &xml_ns.ett,
1334                 &ett_dtd,
1335                 &ett_xmpli
1336         };
1337
1338         static hf_register_info hf_base[] = {
1339                 { &hf_xmlpi, {"XMLPI", "xml.xmlpi", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1340                 { &hf_comment, {"Comment", "xml.comment", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1341                 { &hf_unknowwn_attrib, {"Attribute", "xml.attribute", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1342                 { &hf_doctype, {"Doctype", "xml.doctype", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1343                 { &hf_dtd_tag, {"DTD Tag", "xml.dtdtag", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1344                 { &unknown_ns.hf_cdata, {"CDATA", "xml.cdata", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1345                 { &unknown_ns.hf_tag, {"Tag", "xml.tag", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }},
1346                 { &xml_ns.hf_cdata, {"Unknown", "xml.unknown", FT_STRING, BASE_NONE, NULL, 0, NULL, HFILL }}
1347     };
1348         module_t* xml_module;
1349
1350         hf_arr = g_array_new(FALSE,FALSE,sizeof(hf_register_info));
1351         ett_arr = g_array_new(FALSE,FALSE,sizeof(gint*));
1352
1353         g_array_append_vals(hf_arr,hf_base,array_length(hf_base));
1354         g_array_append_vals(ett_arr,ett_base,array_length(ett_base));
1355
1356         init_xml_names();
1357
1358         xml_ns.hf_tag = proto_register_protocol("eXtensible Markup Language", "XML", xml_ns.name);
1359
1360         proto_register_field_array(xml_ns.hf_tag, (hf_register_info*)g_array_data(hf_arr), hf_arr->len);
1361         proto_register_subtree_array((gint**)g_array_data(ett_arr), ett_arr->len);
1362
1363         xml_module = prefs_register_protocol(xml_ns.hf_tag,apply_prefs);
1364         prefs_register_bool_preference(xml_module, "heuristic", "Use Heuristics for media types",
1365                                    "Try to recognize XML for unknown media types",
1366                                    &pref_heuristic_media);
1367         prefs_register_bool_preference(xml_module, "heuristic_tcp", "Use Heuristics for tcp",
1368                                    "Try to recognize XML for unknown TCP ports",
1369                                    &pref_heuristic_tcp);
1370         prefs_register_range_preference(xml_module, "tcp.port", "TCP Ports",
1371                                                                         "TCP Ports range",
1372                                                                         &global_xml_tcp_range, 65535);
1373         prefs_register_bool_preference(xml_module, "heuristic_udp", "Use Heuristics for UDP",
1374                                    "Try to recognize XML for unknown UDP ports",
1375                                    &pref_heuristic_udp);
1376         
1377         g_array_free(hf_arr,FALSE);
1378         g_array_free(ett_arr,TRUE);
1379
1380         register_dissector("xml", dissect_xml, xml_ns.hf_tag);
1381
1382         init_xml_parser();
1383
1384         xml_tcp_range = range_empty();
1385
1386
1387 }
1388
1389 static void add_dissector_media(gpointer k, gpointer v _U_, gpointer p _U_) {
1390         dissector_add_string("media_type", (gchar*)k, xml_handle);
1391 }
1392
1393 void
1394 proto_reg_handoff_xml(void)
1395 {
1396
1397         xml_handle = find_dissector("xml");
1398
1399         g_hash_table_foreach(media_types,add_dissector_media,NULL);
1400
1401 }