HTTPS (almost) everywhere.
[metze/wireshark/wip.git] / epan / dissectors / packet-xml.c
1 /* packet-xml.c
2  * wireshark's xml dissector .
3  *
4  * (C) 2005, Luis E. Garcia Ontanon.
5  *
6  * Refer to the AUTHORS file or the AUTHORS section in the man page
7  * for contacting the author(s) of this file.
8  *
9  * Wireshark - Network traffic analyzer
10  * By Gerald Combs <gerald@wireshark.org>
11  * Copyright 1998 Gerald Combs
12  *
13  * SPDX-License-Identifier: GPL-2.0-or-later
14  */
15
16 #include "config.h"
17
18 #include <string.h>
19 #include <errno.h>
20
21 #include <epan/packet.h>
22 #include <epan/tvbparse.h>
23 #include <epan/dtd.h>
24 #include <epan/proto_data.h>
25 #include <wsutil/filesystem.h>
26 #include <epan/prefs.h>
27 #include <epan/expert.h>
28 #include <epan/garrayfix.h>
29 #include <wsutil/str_util.h>
30 #include <wsutil/report_message.h>
31
32 #include "packet-xml.h"
33
34 void proto_register_xml(void);
35 void proto_reg_handoff_xml(void);
36
37 struct _attr_reg_data {
38     wmem_array_t *hf;
39     const gchar *basename;
40 };
41
42
43 static gint ett_dtd = -1;
44 static gint ett_xmpli = -1;
45
46 static int hf_unknowwn_attrib = -1;
47 static int hf_comment = -1;
48 static int hf_xmlpi = -1;
49 static int hf_dtd_tag = -1;
50 static int hf_doctype = -1;
51 static int hf_cdatasection = -1;
52
53 static expert_field ei_xml_closing_unopened_tag = EI_INIT;
54 static expert_field ei_xml_closing_unopened_xmpli_tag = EI_INIT;
55 static expert_field ei_xml_unrecognized_text = EI_INIT;
56
57 /* dissector handles */
58 static dissector_handle_t xml_handle;
59
60 /* parser definitions */
61 static tvbparse_wanted_t *want;
62 static tvbparse_wanted_t *want_ignore;
63 static tvbparse_wanted_t *want_heur;
64
65 static wmem_map_t *xmpli_names;
66 static wmem_map_t *media_types;
67
68 static xml_ns_t xml_ns     = {"xml",     "/", -1, -1, -1, NULL, NULL, NULL};
69 static xml_ns_t unknown_ns = {"unknown", "?", -1, -1, -1, NULL, NULL, NULL};
70 static xml_ns_t *root_ns;
71
72 static gboolean pref_heuristic_unicode    = FALSE;
73
74
75 #define XML_CDATA       -1000
76 #define XML_SCOPED_NAME -1001
77
78
79 static wmem_array_t *hf_arr;
80 static GArray *ett_arr;
81
82 static const gchar *default_media_types[] = {
83     "text/xml",
84     "text/vnd.wap.wml",
85     "text/vnd.wap.si",
86     "text/vnd.wap.sl",
87     "text/vnd.wap.co",
88     "text/vnd.wap.emn",
89     "application/3gpp-ims+xml",
90     "application/atom+xml",
91     "application/auth-policy+xml",
92     "application/ccmp+xml",
93     "application/conference-info+xml",          /*RFC4575*/
94     "application/cpim-pidf+xml",
95     "application/cpl+xml",
96     "application/dds-web+xml",
97     "application/im-iscomposing+xml",           /*RFC3994*/
98     "application/load-control+xml",             /*RFC7200*/
99     "application/mathml+xml",
100     "application/media_control+xml",
101     "application/note+xml",
102     "application/pidf+xml",
103     "application/pidf-diff+xml",
104     "application/poc-settings+xml",
105     "application/rdf+xml",
106     "application/reginfo+xml",
107     "application/resource-lists+xml",
108     "application/rlmi+xml",
109     "application/rls-services+xml",
110     "application/rss+xml",
111     "application/rs-metadata+xml",
112     "application/smil",
113     "application/simple-filter+xml",
114     "application/simple-message-summary+xml",   /*RFC3842*/
115     "application/simservs+xml",
116     "application/soap+xml",
117     "application/vnd.etsi.aoc+xml",
118     "application/vnd.etsi.cug+xml",
119     "application/vnd.etsi.iptvcommand+xml",
120     "application/vnd.etsi.iptvdiscovery+xml",
121     "application/vnd.etsi.iptvprofile+xml",
122     "application/vnd.etsi.iptvsad-bc+xml",
123     "application/vnd.etsi.iptvsad-cod+xml",
124     "application/vnd.etsi.iptvsad-npvr+xml",
125     "application/vnd.etsi.iptvservice+xml",
126     "application/vnd.etsi.iptvsync+xml",
127     "application/vnd.etsi.iptvueprofile+xml",
128     "application/vnd.etsi.mcid+xml",
129     "application/vnd.etsi.overload-control-policy-dataset+xml",
130     "application/vnd.etsi.pstn+xml",
131     "application/vnd.etsi.sci+xml",
132     "application/vnd.etsi.simservs+xml",
133     "application/vnd.etsi.tsl+xml",
134     "application/vnd.oma.xdm-apd+xml",
135     "application/vnd.oma.fnl+xml",
136     "application/vnd.oma.access-permissions-list+xml",
137     "application/vnd.oma.alias-principals-list+xml",
138     "application/upp-directory+xml",            /*OMA-ERELD-XDM-V2_2_1-20170124-A*/
139     "application/vnd.oma.xdm-hi+xml",
140     "application/vnd.oma.xdm-rhi+xml",
141     "application/vnd.oma.xdm-prefs+xml",
142     "application/vnd.oma.xdcp+xml",
143     "application/vnd.oma.bcast.associated-procedure-parameter+xml",
144     "application/vnd.oma.bcast.drm-trigger+xml",
145     "application/vnd.oma.bcast.imd+xml",
146     "application/vnd.oma.bcast.notification+xml",
147     "application/vnd.oma.bcast.sgdd+xml",
148     "application/vnd.oma.bcast.smartcard-trigger+xml",
149     "application/vnd.oma.bcast.sprov+xml",
150     "application/vnd.oma.cab-address-book+xml",
151     "application/vnd.oma.cab-feature-handler+xml",
152     "application/vnd.oma.cab-pcc+xml",
153     "application/vnd.oma.cab-subs-invite+xml",
154     "application/vnd.oma.cab-user-prefs+xml",
155     "application/vnd.oma.dd2+xml",
156     "application/vnd.oma.drm.risd+xml",
157     "application/vnd.oma.group-usage-list+xml",
158     "application/vnd.oma.pal+xml",
159     "application/vnd.oma.poc.detailed-progress-report+xml",
160     "application/vnd.oma.poc.final-report+xml",
161     "application/vnd.oma.poc.groups+xml",
162     "application/vnd.oma.poc.invocation-descriptor+xml",
163     "application/vnd.oma.poc.optimized-progress-report+xml",
164     "application/vnd.oma.scidm.messages+xml",
165     "application/vnd.oma.suppnot+xml",          /*OMA-ERELD-Presence_SIMPLE-V2_0-20120710-A*/
166     "application/vnd.oma.xcap-directory+xml",
167     "application/vnd.omads-email+xml",
168     "application/vnd.omads-file+xml",
169     "application/vnd.omads-folder+xml",
170     "application/vnd.3gpp.access-transfer-events+xml",
171     "application/vnd.3gpp.bsf+xml",
172     "application/vnd.3gpp.comm-div-info+xml",   /*3GPP TS 24.504  version 8.19.0*/
173     "application/vnd.3gpp.cw+xml",
174     "application/vnd.3gpp.iut+xml",             /*3GPP TS 24.337*/
175     "application/vnc.3gpp.iut-config+xml",      /*3GPP TS 24.337*/
176     "application/vnd.3gpp.mid-call+xml",
177     "application/vnd.3gpp-prose-pc3ch+xml",
178     "application/vnd.3gpp-prose+xml",
179     "application/vnd.3gpp.replication+xml",     /*3GPP TS 24.337*/
180     "application/vnd.3gpp.sms+xml",
181     "application/vnd.3gpp.srvcc-info+xml",
182     "application/vnd.3gpp.srvcc-ext+xml",
183     "application/vnd.3gpp.state-and-event-info+xml",
184     "application/vnd.3gpp.ussd+xml",
185     "application/vnd.3gpp2.bcmcsinfo+xml",
186     "application/vnd.wv.csp+xml",
187     "application/vnd.wv.csp.xml",
188     "application/watcherinfo+xml",
189     "application/xcap-att+xml",
190     "application/xcap-caps+xml",
191     "application/xcap-diff+xml",
192     "application/xcap-el+xml",
193     "application/xcap-error+xml",
194     "application/xcap-ns+xml",
195     "application/xml",
196     "application/xml-dtd",
197     "application/xpidf+xml",
198     "application/xslt+xml",
199     "application/x-crd+xml",
200     "application/x-wms-logconnectstats",
201     "application/x-wms-logplaystats",
202     "application/x-wms-sendevent",
203     "image/svg+xml",
204     "message/imdn+xml",                         /*RFC5438*/
205 };
206
207 static void insert_xml_frame(xml_frame_t *parent, xml_frame_t *new_child)
208 {
209     new_child->first_child  = NULL;
210     new_child->last_child   = NULL;
211
212     new_child->parent       = parent;
213     new_child->next_sibling = NULL;
214     new_child->prev_sibling = NULL;
215     if (parent == NULL) return;  /* root */
216
217     if (parent->first_child == NULL) {  /* the 1st child */
218         parent->first_child = new_child;
219     } else {  /* following children */
220         parent->last_child->next_sibling = new_child;
221         new_child->prev_sibling = parent->last_child;
222     }
223     parent->last_child = new_child;
224 }
225
226 static int
227 dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void* data _U_)
228 {
229     tvbparse_t       *tt;
230     static GPtrArray *stack;
231     xml_frame_t      *current_frame;
232     const char       *colinfo_str;
233
234     if (stack != NULL)
235         g_ptr_array_free(stack, TRUE);
236
237     stack = g_ptr_array_new();
238     current_frame                 = wmem_new(wmem_packet_scope(), xml_frame_t);
239     current_frame->type           = XML_FRAME_ROOT;
240     current_frame->name           = NULL;
241     current_frame->name_orig_case = NULL;
242     current_frame->value          = NULL;
243     current_frame->pinfo          = pinfo;
244     insert_xml_frame(NULL, current_frame);
245     g_ptr_array_add(stack, current_frame);
246
247     tt = tvbparse_init(tvb, 0, -1, stack, want_ignore);
248     current_frame->start_offset = 0;
249     current_frame->length = tvb_captured_length(tvb);
250
251     root_ns = NULL;
252
253     if (pinfo->match_string)
254         root_ns = (xml_ns_t *)wmem_map_lookup(media_types, pinfo->match_string);
255
256     if (! root_ns ) {
257         root_ns = &xml_ns;
258         colinfo_str = "/XML";
259     } else {
260         char *colinfo_str_buf;
261         colinfo_str_buf = wmem_strconcat(wmem_packet_scope(), "/", root_ns->name, NULL);
262         ascii_strup_inplace(colinfo_str_buf);
263         colinfo_str = colinfo_str_buf;
264     }
265
266     col_append_str(pinfo->cinfo, COL_PROTOCOL, colinfo_str);
267
268     current_frame->ns = root_ns;
269
270     current_frame->item = proto_tree_add_item(tree, current_frame->ns->hf_tag, tvb, 0, -1, ENC_UTF_8|ENC_NA);
271     current_frame->tree = proto_item_add_subtree(current_frame->item, current_frame->ns->ett);
272     current_frame->last_item = current_frame->item;
273
274     while(tvbparse_get(tt, want)) ;
275
276     /* Save XML structure in case it is useful for the caller (only XMPP for now) */
277     p_add_proto_data(pinfo->pool, pinfo, xml_ns.hf_tag, 0, current_frame);
278
279     return tvb_captured_length(tvb);
280 }
281
282 static gboolean dissect_xml_heur(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *data)
283 {
284     if (tvbparse_peek(tvbparse_init(tvb, 0, -1, NULL, want_ignore), want_heur)) {
285         dissect_xml(tvb, pinfo, tree, data);
286         return TRUE;
287     } else if (pref_heuristic_unicode) {
288         /* XXX - UCS-2, or UTF-16? */
289         const guint8 *data_str    = tvb_get_string_enc(pinfo->pool, tvb, 0, tvb_captured_length(tvb), ENC_UCS_2|ENC_LITTLE_ENDIAN);
290         tvbuff_t     *unicode_tvb = tvb_new_child_real_data(tvb, data_str, tvb_captured_length(tvb)/2, tvb_captured_length(tvb)/2);
291         if (tvbparse_peek(tvbparse_init(unicode_tvb, 0, -1, NULL, want_ignore), want_heur)) {
292             add_new_data_source(pinfo, unicode_tvb, "UTF8");
293             dissect_xml(unicode_tvb, pinfo, tree, data);
294             return TRUE;
295         }
296     }
297     return FALSE;
298 }
299
300 xml_frame_t *xml_get_tag(xml_frame_t *frame, const gchar *name)
301 {
302     xml_frame_t *tag = NULL;
303
304     xml_frame_t *xml_item = frame->first_child;
305     while (xml_item) {
306         if (xml_item->type == XML_FRAME_TAG) {
307             if (!name) {  /* get the 1st tag */
308                 tag = xml_item;
309                 break;
310             } else if (xml_item->name_orig_case && !strcmp(xml_item->name_orig_case, name)) {
311                 tag = xml_item;
312                 break;
313             }
314         }
315         xml_item = xml_item->next_sibling;
316     }
317
318     return tag;
319 }
320
321 xml_frame_t *xml_get_attrib(xml_frame_t *frame, const gchar *name)
322 {
323     xml_frame_t *attr = NULL;
324
325     xml_frame_t *xml_item = frame->first_child;
326     while (xml_item) {
327         if ((xml_item->type == XML_FRAME_ATTRIB) &&
328             xml_item->name_orig_case && !strcmp(xml_item->name_orig_case, name)) {
329             attr = xml_item;
330             break;
331         }
332         xml_item = xml_item->next_sibling;
333     }
334
335     return attr;
336 }
337
338 xml_frame_t *xml_get_cdata(xml_frame_t *frame)
339 {
340     xml_frame_t *cdata = NULL;
341
342     xml_frame_t *xml_item = frame->first_child;
343     while (xml_item) {
344         if (xml_item->type == XML_FRAME_CDATA) {
345             cdata = xml_item;
346             break;
347         }
348         xml_item = xml_item->next_sibling;
349     }
350
351     return cdata;
352 }
353
354 static void after_token(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
355 {
356     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
357     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
358     int          hfid;
359     gboolean     is_cdata      = FALSE;
360     proto_item  *pi;
361     xml_frame_t *new_frame;
362
363     if (tok->id == XML_CDATA) {
364         hfid = current_frame->ns ? current_frame->ns->hf_cdata : xml_ns.hf_cdata;
365         is_cdata = TRUE;
366     } else if ( tok->id > 0) {
367         hfid = tok->id;
368     } else {
369         hfid = xml_ns.hf_cdata;
370     }
371
372     pi = proto_tree_add_item(current_frame->tree, hfid, tok->tvb, tok->offset, tok->len, ENC_UTF_8|ENC_NA);
373
374     proto_item_set_text(pi, "%s",
375                         tvb_format_text(tok->tvb, tok->offset, tok->len));
376
377     if (is_cdata) {
378         new_frame                 = wmem_new(wmem_packet_scope(), xml_frame_t);
379         new_frame->type           = XML_FRAME_CDATA;
380         new_frame->name           = NULL;
381         new_frame->name_orig_case = NULL;
382         new_frame->value          = tvb_new_subset_length(tok->tvb, tok->offset, tok->len);
383         insert_xml_frame(current_frame, new_frame);
384         new_frame->item           = pi;
385         new_frame->last_item      = pi;
386         new_frame->tree           = NULL;
387         new_frame->start_offset   = tok->offset;
388         new_frame->length         = tok->len;
389         new_frame->ns             = NULL;
390         new_frame->pinfo          = current_frame->pinfo;
391     }
392 }
393
394 static void before_xmpli(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
395 {
396     GPtrArray       *stack         = (GPtrArray *)tvbparse_data;
397     xml_frame_t     *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
398     proto_item      *pi;
399     proto_tree      *pt;
400     tvbparse_elem_t *name_tok      = tok->sub->next;
401     gchar           *name          = tvb_get_string_enc(wmem_packet_scope(), name_tok->tvb, name_tok->offset, name_tok->len, ENC_ASCII);
402     xml_ns_t        *ns            = (xml_ns_t *)wmem_map_lookup(xmpli_names, name);
403     xml_frame_t     *new_frame;
404
405     int  hf_tag;
406     gint ett;
407
408     ascii_strdown_inplace(name);
409     if (!ns) {
410         hf_tag = hf_xmlpi;
411         ett = ett_xmpli;
412     } else {
413         hf_tag = ns->hf_tag;
414         ett = ns->ett;
415     }
416
417     pi = proto_tree_add_item(current_frame->tree, hf_tag, tok->tvb, tok->offset, tok->len, ENC_UTF_8|ENC_NA);
418
419     proto_item_set_text(pi, "%s", tvb_format_text(tok->tvb, tok->offset, (name_tok->offset - tok->offset) + name_tok->len));
420
421     pt = proto_item_add_subtree(pi, ett);
422
423     new_frame                 = wmem_new(wmem_packet_scope(), xml_frame_t);
424     new_frame->type           = XML_FRAME_XMPLI;
425     new_frame->name           = name;
426     new_frame->name_orig_case = name;
427     new_frame->value          = NULL;
428     insert_xml_frame(current_frame, new_frame);
429     new_frame->item           = pi;
430     new_frame->last_item      = pi;
431     new_frame->tree           = pt;
432     new_frame->start_offset   = tok->offset;
433     new_frame->length         = tok->len;
434     new_frame->ns             = ns;
435     new_frame->pinfo          = current_frame->pinfo;
436
437     g_ptr_array_add(stack, new_frame);
438
439 }
440
441 static void after_xmlpi(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
442 {
443     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
444     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
445
446     proto_tree_add_format_text(current_frame->tree, tok->tvb, tok->offset, tok->len);
447
448     if (stack->len > 1) {
449         g_ptr_array_remove_index_fast(stack, stack->len - 1);
450     } else {
451         proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_xmpli_tag,
452             tok->tvb, tok->offset, tok->len);
453     }
454 }
455
456 static void before_tag(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
457 {
458     GPtrArray       *stack         = (GPtrArray *)tvbparse_data;
459     xml_frame_t     *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
460     tvbparse_elem_t *name_tok      = tok->sub->next;
461     gchar           *root_name;
462     gchar           *name          = NULL, *name_orig_case = NULL;
463     xml_ns_t        *ns;
464     xml_frame_t     *new_frame;
465     proto_item      *pi;
466     proto_tree      *pt;
467
468     if (name_tok->sub->id == XML_SCOPED_NAME) {
469         tvbparse_elem_t *root_tok = name_tok->sub->sub;
470         tvbparse_elem_t *leaf_tok = name_tok->sub->sub->next->next;
471         xml_ns_t        *nameroot_ns;
472
473         root_name      = (gchar *)tvb_get_string_enc(wmem_packet_scope(), root_tok->tvb, root_tok->offset, root_tok->len, ENC_ASCII);
474         name           = (gchar *)tvb_get_string_enc(wmem_packet_scope(), leaf_tok->tvb, leaf_tok->offset, leaf_tok->len, ENC_ASCII);
475         name_orig_case = name;
476
477         nameroot_ns = (xml_ns_t *)wmem_map_lookup(xml_ns.elements, root_name);
478
479         if(nameroot_ns) {
480             ns = (xml_ns_t *)wmem_map_lookup(nameroot_ns->elements, name);
481             if (!ns) {
482                 ns = &unknown_ns;
483             }
484         } else {
485             ns = &unknown_ns;
486         }
487
488     } else {
489         name = tvb_get_string_enc(wmem_packet_scope(), name_tok->tvb, name_tok->offset, name_tok->len, ENC_ASCII);
490         name_orig_case = wmem_strdup(wmem_packet_scope(), name);
491         ascii_strdown_inplace(name);
492
493         if(current_frame->ns) {
494             ns = (xml_ns_t *)wmem_map_lookup(current_frame->ns->elements, name);
495
496             if (!ns) {
497                 if (! ( ns = (xml_ns_t *)wmem_map_lookup(root_ns->elements, name) ) ) {
498                     ns = &unknown_ns;
499                 }
500             }
501         } else {
502             ns = &unknown_ns;
503         }
504     }
505
506     pi = proto_tree_add_item(current_frame->tree, ns->hf_tag, tok->tvb, tok->offset, tok->len, ENC_UTF_8|ENC_NA);
507     proto_item_set_text(pi, "%s", tvb_format_text(tok->tvb,
508                                                   tok->offset,
509                                                   (name_tok->offset - tok->offset) + name_tok->len));
510
511     pt = proto_item_add_subtree(pi, ns->ett);
512
513     new_frame = wmem_new(wmem_packet_scope(), xml_frame_t);
514     new_frame->type           = XML_FRAME_TAG;
515     new_frame->name           = name;
516     new_frame->name_orig_case = name_orig_case;
517     new_frame->value          = NULL;
518     insert_xml_frame(current_frame, new_frame);
519     new_frame->item           = pi;
520     new_frame->last_item      = pi;
521     new_frame->tree           = pt;
522     new_frame->start_offset   = tok->offset;
523     new_frame->length         = tok->len;
524     new_frame->ns             = ns;
525     new_frame->pinfo          = current_frame->pinfo;
526
527     g_ptr_array_add(stack, new_frame);
528
529 }
530
531 static void after_open_tag(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok _U_)
532 {
533     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
534     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
535
536     proto_item_append_text(current_frame->last_item, ">");
537 }
538
539 static void after_closed_tag(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
540 {
541     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
542     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
543
544     proto_item_append_text(current_frame->last_item, "/>");
545
546     if (stack->len > 1) {
547         g_ptr_array_remove_index_fast(stack, stack->len - 1);
548     } else {
549         proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_tag,
550                               tok->tvb, tok->offset, tok->len);
551     }
552 }
553
554 static void after_untag(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
555 {
556     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
557     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
558
559     proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
560     current_frame->length = (tok->offset - current_frame->start_offset) + tok->len;
561
562     proto_tree_add_format_text(current_frame->tree, tok->tvb, tok->offset, tok->len);
563
564     if (stack->len > 1) {
565         g_ptr_array_remove_index_fast(stack, stack->len - 1);
566     } else {
567         proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_tag,
568             tok->tvb, tok->offset, tok->len);
569     }
570 }
571
572 static void before_dtd_doctype(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
573 {
574     GPtrArray       *stack         = (GPtrArray *)tvbparse_data;
575     xml_frame_t     *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
576     xml_frame_t     *new_frame;
577     tvbparse_elem_t *name_tok      = tok->sub->next->next->next->sub->sub;
578     proto_tree      *dtd_item      = proto_tree_add_item(current_frame->tree, hf_doctype,
579                                                          name_tok->tvb, name_tok->offset,
580                                                          name_tok->len, ENC_ASCII|ENC_NA);
581
582     proto_item_set_text(dtd_item, "%s", tvb_format_text(tok->tvb, tok->offset, tok->len));
583
584     new_frame = wmem_new(wmem_packet_scope(), xml_frame_t);
585     new_frame->type           = XML_FRAME_DTD_DOCTYPE;
586     new_frame->name           = (gchar *)tvb_get_string_enc(wmem_packet_scope(), name_tok->tvb,
587                                                                   name_tok->offset,
588                                                                   name_tok->len, ENC_ASCII);
589     new_frame->name_orig_case = new_frame->name;
590     new_frame->value          = NULL;
591     insert_xml_frame(current_frame, new_frame);
592     new_frame->item           = dtd_item;
593     new_frame->last_item      = dtd_item;
594     new_frame->tree           = proto_item_add_subtree(dtd_item, ett_dtd);
595     new_frame->start_offset   = tok->offset;
596     new_frame->length         = tok->len;
597     new_frame->ns             = NULL;
598     new_frame->pinfo          = current_frame->pinfo;
599
600     g_ptr_array_add(stack, new_frame);
601 }
602
603 static void pop_stack(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok _U_)
604 {
605     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
606     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
607
608     if (stack->len > 1) {
609         g_ptr_array_remove_index_fast(stack, stack->len - 1);
610     } else {
611         proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_tag,
612             tok->tvb, tok->offset, tok->len);
613     }
614 }
615
616 static void after_dtd_close(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
617 {
618     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
619     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
620
621     proto_tree_add_format_text(current_frame->tree, tok->tvb, tok->offset, tok->len);
622     if (stack->len > 1) {
623         g_ptr_array_remove_index_fast(stack, stack->len - 1);
624     } else {
625         proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_tag,
626             tok->tvb, tok->offset, tok->len);
627     }
628 }
629
630 static void get_attrib_value(void *tvbparse_data _U_, const void *wanted_data _U_, tvbparse_elem_t *tok)
631 {
632     tok->data = tok->sub;
633 }
634
635 static void after_attrib(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
636 {
637     GPtrArray       *stack         = (GPtrArray *)tvbparse_data;
638     xml_frame_t     *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
639     gchar           *name, *name_orig_case;
640     tvbparse_elem_t *value;
641     tvbparse_elem_t *value_part    = (tvbparse_elem_t *)tok->sub->next->next->data;
642     int             *hfidp;
643     int              hfid;
644     proto_item      *pi;
645     xml_frame_t     *new_frame;
646
647     name           = tvb_get_string_enc(wmem_packet_scope(), tok->sub->tvb, tok->sub->offset, tok->sub->len, ENC_ASCII);
648     name_orig_case = wmem_strdup(wmem_packet_scope(), name);
649     ascii_strdown_inplace(name);
650
651     if(current_frame->ns && (hfidp = (int *)wmem_map_lookup(current_frame->ns->attributes, name) )) {
652         hfid  = *hfidp;
653         value = value_part;
654     } else {
655         hfid  = hf_unknowwn_attrib;
656         value = tok;
657     }
658
659     pi = proto_tree_add_item(current_frame->tree, hfid, value->tvb, value->offset, value->len, ENC_UTF_8|ENC_NA);
660     proto_item_set_text(pi, "%s", tvb_format_text(tok->tvb, tok->offset, tok->len));
661
662     current_frame->last_item = pi;
663
664     new_frame = wmem_new(wmem_packet_scope(), xml_frame_t);
665     new_frame->type           = XML_FRAME_ATTRIB;
666     new_frame->name           = name;
667     new_frame->name_orig_case = name_orig_case;
668     new_frame->value          = tvb_new_subset_length(value_part->tvb, value_part->offset,
669                            value_part->len);
670     insert_xml_frame(current_frame, new_frame);
671     new_frame->item           = pi;
672     new_frame->last_item      = pi;
673     new_frame->tree           = NULL;
674     new_frame->start_offset   = tok->offset;
675     new_frame->length         = tok->len;
676     new_frame->ns             = NULL;
677     new_frame->pinfo          = current_frame->pinfo;
678
679 }
680
681 static void unrecognized_token(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok _U_)
682 {
683     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
684     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
685
686     proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_unrecognized_text,
687                     tok->tvb, tok->offset, tok->len);
688
689 }
690
691
692
693 static void init_xml_parser(void)
694 {
695     tvbparse_wanted_t *want_name =
696         tvbparse_chars(-1, 1, 0,
697                    "abcdefghijklmnopqrstuvwxyz.-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
698                    NULL, NULL, NULL);
699     tvbparse_wanted_t *want_attr_name =
700         tvbparse_chars(-1, 1, 0,
701                    "abcdefghijklmnopqrstuvwxyz.-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:",
702                    NULL, NULL, NULL);
703
704     tvbparse_wanted_t *want_scoped_name = tvbparse_set_seq(XML_SCOPED_NAME, NULL, NULL, NULL,
705                                    want_name,
706                                    tvbparse_char(-1, ":", NULL, NULL, NULL),
707                                    want_name,
708                                    NULL);
709
710     tvbparse_wanted_t *want_tag_name = tvbparse_set_oneof(0, NULL, NULL, NULL,
711                                   want_scoped_name,
712                                   want_name,
713                                   NULL);
714
715     tvbparse_wanted_t *want_attrib_value = tvbparse_set_oneof(0, NULL, NULL, get_attrib_value,
716                                   tvbparse_quoted(-1, NULL, NULL, tvbparse_shrink_token_cb, '\"', '\\'),
717                                   tvbparse_quoted(-1, NULL, NULL, tvbparse_shrink_token_cb, '\'', '\\'),
718                                   tvbparse_chars(-1, 1, 0, "0123456789", NULL, NULL, NULL),
719                                   want_name,
720                                   NULL);
721
722     tvbparse_wanted_t *want_attributes = tvbparse_one_or_more(-1, NULL, NULL, NULL,
723                                   tvbparse_set_seq(-1, NULL, NULL, after_attrib,
724                                            want_attr_name,
725                                            tvbparse_char(-1, "=", NULL, NULL, NULL),
726                                            want_attrib_value,
727                                            NULL));
728
729     tvbparse_wanted_t *want_stoptag = tvbparse_set_oneof(-1, NULL, NULL, NULL,
730                                  tvbparse_char(-1, ">", NULL, NULL, after_open_tag),
731                                  tvbparse_string(-1, "/>", NULL, NULL, after_closed_tag),
732                                  NULL);
733
734     tvbparse_wanted_t *want_stopxmlpi = tvbparse_string(-1, "?>", NULL, NULL, after_xmlpi);
735
736     tvbparse_wanted_t *want_comment = tvbparse_set_seq(hf_comment, NULL, NULL, after_token,
737                                tvbparse_string(-1, "<!--", NULL, NULL, NULL),
738                                tvbparse_until(-1, NULL, NULL, NULL,
739                                       tvbparse_string(-1, "-->", NULL, NULL, NULL),
740                                       TP_UNTIL_INCLUDE),
741                                NULL);
742
743     tvbparse_wanted_t *want_cdatasection = tvbparse_set_seq(hf_cdatasection, NULL, NULL, after_token,
744                                tvbparse_string(-1, "<![CDATA[", NULL, NULL, NULL),
745                                tvbparse_until(-1, NULL, NULL, NULL,
746                                        tvbparse_string(-1, "]]>", NULL, NULL, NULL),
747                                        TP_UNTIL_INCLUDE),
748                                 NULL);
749
750     tvbparse_wanted_t *want_xmlpi = tvbparse_set_seq(hf_xmlpi, NULL, before_xmpli, NULL,
751                              tvbparse_string(-1, "<?", NULL, NULL, NULL),
752                              want_name,
753                              tvbparse_set_oneof(-1, NULL, NULL, NULL,
754                                         want_stopxmlpi,
755                                         tvbparse_set_seq(-1, NULL, NULL, NULL,
756                                                  want_attributes,
757                                                  want_stopxmlpi,
758                                                  NULL),
759                                         NULL),
760                              NULL);
761
762     tvbparse_wanted_t *want_closing_tag = tvbparse_set_seq(0, NULL, NULL, after_untag,
763                                    tvbparse_char(-1, "<", NULL, NULL, NULL),
764                                    tvbparse_char(-1, "/", NULL, NULL, NULL),
765                                    want_tag_name,
766                                    tvbparse_char(-1, ">", NULL, NULL, NULL),
767                                    NULL);
768
769     tvbparse_wanted_t *want_doctype_start = tvbparse_set_seq(-1, NULL, before_dtd_doctype, NULL,
770                                  tvbparse_char(-1, "<", NULL, NULL, NULL),
771                                  tvbparse_char(-1, "!", NULL, NULL, NULL),
772                                  tvbparse_casestring(-1, "DOCTYPE", NULL, NULL, NULL),
773                                  tvbparse_set_oneof(-1, NULL, NULL, NULL,
774                                             tvbparse_set_seq(-1, NULL, NULL, NULL,
775                                                      want_name,
776                                                      tvbparse_char(-1, "[", NULL, NULL, NULL),
777                                                      NULL),
778                                             tvbparse_set_seq(-1, NULL, NULL, pop_stack,
779                                                      want_name,
780                                                      tvbparse_set_oneof(-1, NULL, NULL, NULL,
781                                                             tvbparse_casestring(-1, "PUBLIC", NULL, NULL, NULL),
782                                                             tvbparse_casestring(-1, "SYSTEM", NULL, NULL, NULL),
783                                                             NULL),
784                                                      tvbparse_until(-1, NULL, NULL, NULL,
785                                                             tvbparse_char(-1, ">", NULL, NULL, NULL),
786                                                             TP_UNTIL_INCLUDE),
787                                                      NULL),
788                                             NULL),
789                                  NULL);
790
791     tvbparse_wanted_t *want_dtd_tag = tvbparse_set_seq(hf_dtd_tag, NULL, NULL, after_token,
792                                tvbparse_char(-1, "<", NULL, NULL, NULL),
793                                tvbparse_char(-1, "!", NULL, NULL, NULL),
794                                tvbparse_until(-1, NULL, NULL, NULL,
795                                       tvbparse_char(-1, ">", NULL, NULL, NULL),
796                                       TP_UNTIL_INCLUDE),
797                                NULL);
798
799     tvbparse_wanted_t *want_tag = tvbparse_set_seq(-1, NULL, before_tag, NULL,
800                                tvbparse_char(-1, "<", NULL, NULL, NULL),
801                                want_tag_name,
802                                tvbparse_set_oneof(-1, NULL, NULL, NULL,
803                                       tvbparse_set_seq(-1, NULL, NULL, NULL,
804                                                want_attributes,
805                                                want_stoptag,
806                                                NULL),
807                                       want_stoptag,
808                                       NULL),
809                                NULL);
810
811     tvbparse_wanted_t *want_dtd_close = tvbparse_set_seq(-1, NULL, NULL, after_dtd_close,
812                                  tvbparse_char(-1, "]", NULL, NULL, NULL),
813                                  tvbparse_char(-1, ">", NULL, NULL, NULL),
814                                  NULL);
815
816     want_ignore = tvbparse_chars(-1, 1, 0, " \t\r\n", NULL, NULL, NULL);
817
818
819     want = tvbparse_set_oneof(-1, NULL, NULL, NULL,
820                   want_comment,
821                   want_cdatasection,
822                   want_xmlpi,
823                   want_closing_tag,
824                   want_doctype_start,
825                   want_dtd_close,
826                   want_dtd_tag,
827                   want_tag,
828                   tvbparse_not_chars(XML_CDATA, 1, 0, "<", NULL, NULL, after_token),
829                   tvbparse_not_chars(-1, 1, 0, " \t\r\n", NULL, NULL, unrecognized_token),
830                   NULL);
831
832     want_heur = tvbparse_set_oneof(-1, NULL, NULL, NULL,
833                        want_comment,
834                        want_cdatasection,
835                        want_xmlpi,
836                        want_doctype_start,
837                        want_dtd_tag,
838                        want_tag,
839                        NULL);
840
841 }
842
843
844 static xml_ns_t *xml_new_namespace(wmem_map_t *hash, const gchar *name, ...)
845 {
846     xml_ns_t *ns = wmem_new(wmem_epan_scope(), xml_ns_t);
847     va_list   ap;
848     gchar    *attr_name;
849
850     ns->name       = wmem_strdup(wmem_epan_scope(), name);
851     ns->hf_tag     = -1;
852     ns->hf_cdata   = -1;
853     ns->ett        = -1;
854     ns->attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
855     ns->elements   = NULL;
856
857     va_start(ap, name);
858
859     while(( attr_name = va_arg(ap, gchar *) )) {
860         int *hfp = wmem_new(wmem_epan_scope(), int);
861         *hfp = -1;
862         wmem_map_insert(ns->attributes, wmem_strdup(wmem_epan_scope(), attr_name), hfp);
863     };
864
865     va_end(ap);
866
867     wmem_map_insert(hash, ns->name, ns);
868
869     return ns;
870 }
871
872
873 static void add_xml_field(wmem_array_t *hfs, int *p_id, const gchar *name, const gchar *fqn)
874 {
875     hf_register_info hfri;
876
877     hfri.p_id          = p_id;
878     hfri.hfinfo.name           = name;
879     hfri.hfinfo.abbrev         = fqn;
880     hfri.hfinfo.type           = FT_STRING;
881     hfri.hfinfo.display        = BASE_NONE;
882     hfri.hfinfo.strings        = NULL;
883     hfri.hfinfo.bitmask        = 0x0;
884     hfri.hfinfo.blurb          = NULL;
885     HFILL_INIT(hfri);
886
887     wmem_array_append_one(hfs, hfri);
888 }
889
890 static void add_xml_attribute_names(gpointer k, gpointer v, gpointer p)
891 {
892     struct _attr_reg_data *d = (struct _attr_reg_data *)p;
893     const gchar *basename = wmem_strconcat(wmem_epan_scope(), d->basename, ".", (gchar *)k, NULL);
894
895     add_xml_field(d->hf, (int*) v, (gchar *)k, basename);
896 }
897
898
899 static void add_xmlpi_namespace(gpointer k _U_, gpointer v, gpointer p)
900 {
901     xml_ns_t *ns       = (xml_ns_t *)v;
902     const gchar *basename = wmem_strconcat(wmem_epan_scope(), (gchar *)p, ".", ns->name, NULL);
903     gint     *ett_p    = &(ns->ett);
904     struct _attr_reg_data d;
905
906     add_xml_field(hf_arr, &(ns->hf_tag), basename, basename);
907
908     g_array_append_val(ett_arr, ett_p);
909
910     d.basename = basename;
911     d.hf = hf_arr;
912
913     wmem_map_foreach(ns->attributes, add_xml_attribute_names, &d);
914
915 }
916
917 static void destroy_dtd_data(dtd_build_data_t *dtd_data)
918 {
919     g_free(dtd_data->proto_name);
920     g_free(dtd_data->media_type);
921     g_free(dtd_data->description);
922     g_free(dtd_data->proto_root);
923
924     g_string_free(dtd_data->error, TRUE);
925
926     while(dtd_data->elements->len) {
927         dtd_named_list_t *nl = (dtd_named_list_t *)g_ptr_array_remove_index_fast(dtd_data->elements, 0);
928         g_ptr_array_free(nl->list, TRUE);
929         g_free(nl->name);
930         g_free(nl);
931     }
932
933     g_ptr_array_free(dtd_data->elements, TRUE);
934
935     while(dtd_data->attributes->len) {
936         dtd_named_list_t *nl = (dtd_named_list_t *)g_ptr_array_remove_index_fast(dtd_data->attributes, 0);
937         g_ptr_array_free(nl->list, TRUE);
938         g_free(nl->name);
939         g_free(nl);
940     }
941
942     g_ptr_array_free(dtd_data->attributes, TRUE);
943
944     g_free(dtd_data);
945 }
946
947 static void copy_attrib_item(gpointer k, gpointer v _U_, gpointer p)
948 {
949     gchar      *key   = (gchar *)wmem_strdup(wmem_epan_scope(), (const gchar *)k);
950     int        *value = wmem_new(wmem_epan_scope(), int);
951     wmem_map_t *dst   = (wmem_map_t *)p;
952
953     *value = -1;
954     wmem_map_insert(dst, key, value);
955
956 }
957
958 static wmem_map_t *copy_attributes_hash(wmem_map_t *src)
959 {
960     wmem_map_t *dst = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
961
962     wmem_map_foreach(src, copy_attrib_item, dst);
963
964     return dst;
965 }
966
967 static xml_ns_t *duplicate_element(xml_ns_t *orig)
968 {
969     xml_ns_t *new_item = wmem_new(wmem_epan_scope(), xml_ns_t);
970     guint     i;
971
972     new_item->name          = wmem_strdup(wmem_epan_scope(), orig->name);
973     new_item->hf_tag        = -1;
974     new_item->hf_cdata      = -1;
975     new_item->ett           = -1;
976     new_item->attributes    = copy_attributes_hash(orig->attributes);
977     new_item->elements      = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
978     new_item->element_names = g_ptr_array_new();
979
980     for(i=0; i < orig->element_names->len; i++) {
981         g_ptr_array_add(new_item->element_names,
982                            g_ptr_array_index(orig->element_names, i));
983     }
984
985     return new_item;
986 }
987
988 static gchar *fully_qualified_name(GPtrArray *hier, gchar *name, gchar *proto_name)
989 {
990     guint    i;
991     wmem_strbuf_t *s = wmem_strbuf_new(wmem_epan_scope(), proto_name);
992
993     wmem_strbuf_append(s, ".");
994
995     for (i = 1; i < hier->len; i++) {
996         wmem_strbuf_append_printf(s, "%s.", (gchar *)g_ptr_array_index(hier, i));
997     }
998
999     wmem_strbuf_append(s, name);
1000
1001     return wmem_strbuf_finalize(s);;
1002 }
1003
1004
1005 static xml_ns_t *make_xml_hier(gchar      *elem_name,
1006                                xml_ns_t   *root,
1007                                wmem_map_t *elements,
1008                                GPtrArray  *hier,
1009                                GString    *error,
1010                                wmem_array_t *hfs,
1011                                GArray     *etts,
1012                                char       *proto_name)
1013 {
1014     xml_ns_t *fresh;
1015     xml_ns_t *orig;
1016     gchar    *fqn;
1017     gint     *ett_p;
1018     gboolean  recurred = FALSE;
1019     guint     i;
1020     struct _attr_reg_data  d;
1021
1022     if ( g_str_equal(elem_name, root->name) ) {
1023         return NULL;
1024     }
1025
1026     if (! ( orig = (xml_ns_t *)wmem_map_lookup(elements, elem_name) )) {
1027         g_string_append_printf(error, "element '%s' is not defined\n", elem_name);
1028         return NULL;
1029     }
1030
1031     for (i = 0; i < hier->len; i++) {
1032         if( (elem_name) && (strcmp(elem_name, (gchar *) g_ptr_array_index(hier, i) ) == 0 )) {
1033             recurred = TRUE;
1034         }
1035     }
1036
1037     if (recurred) {
1038         return NULL;
1039     }
1040
1041     fqn = fully_qualified_name(hier, elem_name, proto_name);
1042
1043     fresh = duplicate_element(orig);
1044     fresh->fqn = fqn;
1045
1046     add_xml_field(hfs, &(fresh->hf_tag), wmem_strdup(wmem_epan_scope(), elem_name), fqn);
1047     add_xml_field(hfs, &(fresh->hf_cdata), wmem_strdup(wmem_epan_scope(), elem_name), fqn);
1048
1049     ett_p = &fresh->ett;
1050     g_array_append_val(etts, ett_p);
1051
1052     d.basename = fqn;
1053     d.hf = hfs;
1054
1055     wmem_map_foreach(fresh->attributes, add_xml_attribute_names, &d);
1056
1057     while(fresh->element_names->len) {
1058         gchar *child_name = (gchar *)g_ptr_array_remove_index(fresh->element_names, 0);
1059         xml_ns_t *child_element = NULL;
1060
1061         g_ptr_array_add(hier, elem_name);
1062         child_element = make_xml_hier(child_name, root, elements, hier, error, hfs, etts, proto_name);
1063         g_ptr_array_remove_index_fast(hier, hier->len - 1);
1064
1065         if (child_element) {
1066             wmem_map_insert(fresh->elements, child_element->name, child_element);
1067         }
1068     }
1069
1070     g_ptr_array_free(fresh->element_names, TRUE);
1071     fresh->element_names = NULL;
1072     return fresh;
1073 }
1074
1075 static void free_elements(gpointer k _U_, gpointer v, gpointer p _U_)
1076 {
1077     xml_ns_t *e = (xml_ns_t *)v;
1078
1079     while (e->element_names->len) {
1080         g_free(g_ptr_array_remove_index(e->element_names, 0));
1081     }
1082
1083     g_ptr_array_free(e->element_names, TRUE);
1084 }
1085
1086 static void register_dtd(dtd_build_data_t *dtd_data, GString *errors)
1087 {
1088     wmem_map_t *elements      = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1089     gchar      *root_name     = NULL;
1090     xml_ns_t   *root_element  = NULL;
1091     wmem_array_t *hfs;
1092     GArray     *etts;
1093     GPtrArray  *hier;
1094     gchar      *curr_name;
1095     GPtrArray  *element_names = g_ptr_array_new();
1096
1097     /* we first populate elements with the those coming from the parser */
1098     while(dtd_data->elements->len) {
1099         dtd_named_list_t *nl      = (dtd_named_list_t *)g_ptr_array_remove_index(dtd_data->elements, 0);
1100         xml_ns_t         *element = wmem_new(wmem_epan_scope(), xml_ns_t);
1101
1102         /* we will use the first element found as root in case no other one was given. */
1103         if (root_name == NULL)
1104             root_name = wmem_strdup(wmem_epan_scope(), nl->name);
1105
1106         element->name          = wmem_strdup(wmem_epan_scope(), nl->name);
1107         element->element_names = nl->list;
1108         element->hf_tag        = -1;
1109         element->hf_cdata      = -1;
1110         element->ett           = -1;
1111         element->attributes    = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1112         element->elements      = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1113
1114         if( wmem_map_lookup(elements, element->name) ) {
1115             g_string_append_printf(errors, "element %s defined more than once\n", element->name);
1116             free_elements(NULL, element, NULL);
1117         } else {
1118             wmem_map_insert(elements, element->name, element);
1119             g_ptr_array_add(element_names, wmem_strdup(wmem_epan_scope(), element->name));
1120         }
1121
1122         g_free(nl->name);
1123         g_free(nl);
1124     }
1125
1126     /* then we add the attributes to its relative elements */
1127     while(dtd_data->attributes->len) {
1128         dtd_named_list_t *nl      = (dtd_named_list_t *)g_ptr_array_remove_index(dtd_data->attributes, 0);
1129         xml_ns_t         *element = (xml_ns_t *)wmem_map_lookup(elements, nl->name);
1130
1131         if (element) {
1132             while(nl->list->len) {
1133                 gchar *name = (gchar *)g_ptr_array_remove_index(nl->list, 0);
1134                 int   *id_p = wmem_new(wmem_epan_scope(), int);
1135
1136                 *id_p = -1;
1137                 wmem_map_insert(element->attributes, wmem_strdup(wmem_epan_scope(), name), id_p);
1138                 g_free(name);            }
1139         }
1140         else {
1141             g_string_append_printf(errors, "element %s is not defined\n", nl->name);
1142         }
1143
1144         g_free(nl->name);
1145         g_ptr_array_free(nl->list, TRUE);
1146         g_free(nl);
1147     }
1148
1149     /* if a proto_root is defined in the dtd we'll use that as root */
1150     if( dtd_data->proto_root ) {
1151         wmem_free(wmem_epan_scope(), root_name);
1152         root_name = wmem_strdup(wmem_epan_scope(), dtd_data->proto_root);
1153     }
1154
1155     /* we use a stack with the names to avoid recurring infinitelly */
1156     hier = g_ptr_array_new();
1157
1158     /*
1159      * if a proto name was given in the dtd the dtd will be used as a protocol
1160      * or else the dtd will be loaded as a branch of the xml namespace
1161      */
1162     if( ! dtd_data->proto_name ) {
1163         hfs  = hf_arr;
1164         etts = ett_arr;
1165         g_ptr_array_add(hier, wmem_strdup(wmem_epan_scope(), "xml"));
1166     } else {
1167         /*
1168          * if we were given a proto_name the namespace will be registered
1169          * as an independent protocol with its own hf and ett arrays.
1170          */
1171         hfs  = wmem_array_new(wmem_epan_scope(), sizeof(hf_register_info));
1172         etts = g_array_new(FALSE, FALSE, sizeof(gint *));
1173     }
1174
1175     /* the root element of the dtd's namespace */
1176     root_element = wmem_new(wmem_epan_scope(), xml_ns_t);
1177     root_element->name          = wmem_strdup(wmem_epan_scope(), root_name);
1178     root_element->fqn           = dtd_data->proto_name ? wmem_strdup(wmem_epan_scope(), dtd_data->proto_name) : root_element->name;
1179     root_element->hf_tag        = -1;
1180     root_element->hf_cdata      = -1;
1181     root_element->ett           = -1;
1182     root_element->elements      = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1183     root_element->element_names = element_names;
1184
1185     /*
1186      * we can either create a namespace as a flat namespace
1187      * in which all the elements are at the root level
1188      * or we can create a recursive namespace
1189      */
1190     if (dtd_data->recursion) {
1191         xml_ns_t *orig_root;
1192
1193         make_xml_hier(root_name, root_element, elements, hier, errors, hfs, etts, dtd_data->proto_name);
1194
1195         wmem_map_insert(root_element->elements, (gpointer)root_element->name, root_element);
1196
1197         orig_root = (xml_ns_t *)wmem_map_lookup(elements, root_name);
1198
1199         /* if the root element was defined copy its attrlist to the child */
1200         if(orig_root) {
1201             struct _attr_reg_data d;
1202
1203             d.basename = dtd_data->proto_name;
1204             d.hf = hfs;
1205
1206             root_element->attributes = copy_attributes_hash(orig_root->attributes);
1207             wmem_map_foreach(root_element->attributes, add_xml_attribute_names, &d);
1208         } else {
1209             root_element->attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1210         }
1211
1212         /* we then create all the sub hierarchies to catch the recurred cases */
1213         g_ptr_array_add(hier, root_name);
1214
1215         while(root_element->element_names->len) {
1216             curr_name = (gchar *)g_ptr_array_remove_index(root_element->element_names, 0);
1217
1218             if( ! wmem_map_lookup(root_element->elements, curr_name) ) {
1219                 xml_ns_t *fresh = make_xml_hier(curr_name, root_element, elements, hier, errors,
1220                                               hfs, etts, dtd_data->proto_name);
1221                 wmem_map_insert(root_element->elements, (gpointer)fresh->name, fresh);
1222             }
1223         }
1224
1225     } else {
1226         /* a flat namespace */
1227         g_ptr_array_add(hier, root_name);
1228
1229         root_element->attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1230
1231         while(root_element->element_names->len) {
1232             xml_ns_t *fresh;
1233             gint *ett_p;
1234             struct _attr_reg_data d;
1235
1236             curr_name = (gchar *)g_ptr_array_remove_index(root_element->element_names, 0);
1237             fresh       = duplicate_element((xml_ns_t *)wmem_map_lookup(elements, curr_name));
1238             fresh->fqn  = fully_qualified_name(hier, curr_name, root_name);
1239
1240             add_xml_field(hfs, &(fresh->hf_tag), curr_name, fresh->fqn);
1241             add_xml_field(hfs, &(fresh->hf_cdata), curr_name, fresh->fqn);
1242
1243             d.basename = fresh->fqn;
1244             d.hf = hfs;
1245
1246             wmem_map_foreach(fresh->attributes, add_xml_attribute_names, &d);
1247
1248             ett_p = &fresh->ett;
1249             g_array_append_val(etts, ett_p);
1250
1251             g_ptr_array_free(fresh->element_names, TRUE);
1252
1253             wmem_map_insert(root_element->elements, (gpointer)fresh->name, fresh);
1254         }
1255     }
1256
1257     g_ptr_array_free(element_names, TRUE);
1258
1259     g_ptr_array_free(hier, TRUE);
1260
1261     /*
1262      * if we were given a proto_name the namespace will be registered
1263      * as an independent protocol.
1264      */
1265     if( dtd_data->proto_name ) {
1266         gint *ett_p;
1267         gchar *full_name, *short_name;
1268
1269         if (dtd_data->description) {
1270             full_name = wmem_strdup(wmem_epan_scope(), dtd_data->description);
1271         } else {
1272             full_name = wmem_strdup(wmem_epan_scope(), root_name);
1273         }
1274         short_name = wmem_strdup(wmem_epan_scope(), dtd_data->proto_name);
1275
1276         ett_p = &root_element->ett;
1277         g_array_append_val(etts, ett_p);
1278
1279         add_xml_field(hfs, &root_element->hf_cdata, root_element->name, root_element->fqn);
1280
1281         root_element->hf_tag = proto_register_protocol(full_name, short_name, short_name);
1282         proto_register_field_array(root_element->hf_tag, (hf_register_info*)wmem_array_get_raw(hfs), wmem_array_get_count(hfs));
1283         proto_register_subtree_array((gint **)g_array_data(etts), etts->len);
1284
1285         if (dtd_data->media_type) {
1286             gchar* media_type = wmem_strdup(wmem_epan_scope(), dtd_data->media_type);
1287             wmem_map_insert(media_types, media_type, root_element);
1288         }
1289
1290         g_array_free(etts, TRUE);
1291     }
1292
1293     wmem_map_insert(xml_ns.elements, root_element->name, root_element);
1294     wmem_map_foreach(elements, free_elements, NULL);
1295
1296     destroy_dtd_data(dtd_data);
1297     wmem_free(wmem_epan_scope(), root_name);
1298 }
1299
1300 #  define DIRECTORY_T GDir
1301 #  define FILE_T gchar
1302 #  define OPENDIR_OP(name) g_dir_open(name, 0, dummy)
1303 #  define DIRGETNEXT_OP(dir) g_dir_read_name(dir)
1304 #  define GETFNAME_OP(file) (file);
1305 #  define CLOSEDIR_OP(dir) g_dir_close(dir)
1306
1307 static void init_xml_names(void)
1308 {
1309     guint         i;
1310     DIRECTORY_T  *dir;
1311     const FILE_T *file;
1312     const gchar  *filename;
1313     gchar        *dirname;
1314
1315     GError **dummy = wmem_new(wmem_epan_scope(), GError *);
1316     *dummy = NULL;
1317
1318     xmpli_names = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1319     media_types = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1320
1321     unknown_ns.elements = xml_ns.elements = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1322     unknown_ns.attributes = xml_ns.attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1323
1324     xml_new_namespace(xmpli_names, "xml", "version", "encoding", "standalone", NULL);
1325
1326     dirname = get_persconffile_path("dtds", FALSE);
1327
1328     if (test_for_directory(dirname) != EISDIR) {
1329         /* Although dir isn't a directory it may still use memory */
1330         g_free(dirname);
1331         dirname = get_datafile_path("dtds");
1332     }
1333
1334     if (test_for_directory(dirname) == EISDIR) {
1335         if ((dir = OPENDIR_OP(dirname)) != NULL) {
1336             GString *errors = g_string_new("");
1337
1338             while ((file = DIRGETNEXT_OP(dir)) != NULL) {
1339                 guint namelen;
1340                 filename = GETFNAME_OP(file);
1341
1342                 namelen = (int)strlen(filename);
1343                 if ( namelen > 4 && ( g_ascii_strcasecmp(filename+(namelen-4), ".dtd")  == 0 ) ) {
1344                     GString *preparsed;
1345                     dtd_build_data_t *dtd_data;
1346
1347                     g_string_truncate(errors, 0);
1348                     preparsed = dtd_preparse(dirname, filename, errors);
1349
1350                     if (errors->len) {
1351                         report_failure("Dtd Preparser in file %s%c%s: %s",
1352                                        dirname, G_DIR_SEPARATOR, filename, errors->str);
1353                         continue;
1354                     }
1355
1356                     dtd_data = dtd_parse(preparsed);
1357
1358                     g_string_free(preparsed, TRUE);
1359
1360                     if (dtd_data->error->len) {
1361                         report_failure("Dtd Parser in file %s%c%s: %s",
1362                                        dirname, G_DIR_SEPARATOR, filename, dtd_data->error->str);
1363                         destroy_dtd_data(dtd_data);
1364                         continue;
1365                     }
1366
1367                     register_dtd(dtd_data, errors);
1368
1369                     if (errors->len) {
1370                         report_failure("Dtd Registration in file: %s%c%s: %s",
1371                                        dirname, G_DIR_SEPARATOR, filename, errors->str);
1372                         continue;
1373                     }
1374                 }
1375             }
1376             g_string_free(errors, TRUE);
1377
1378             CLOSEDIR_OP(dir);
1379         }
1380     }
1381
1382     g_free(dirname);
1383
1384     for(i=0;i<array_length(default_media_types);i++) {
1385         if( ! wmem_map_lookup(media_types, default_media_types[i]) ) {
1386             wmem_map_insert(media_types, (gpointer)default_media_types[i], &xml_ns);
1387         }
1388     }
1389
1390     wmem_map_foreach(xmpli_names, add_xmlpi_namespace, (gpointer)"xml.xmlpi");
1391
1392     wmem_free(wmem_epan_scope(), dummy);
1393 }
1394
1395 void
1396 proto_register_xml(void)
1397 {
1398     static gint *ett_base[] = {
1399         &unknown_ns.ett,
1400         &xml_ns.ett,
1401         &ett_dtd,
1402         &ett_xmpli
1403     };
1404
1405     static hf_register_info hf_base[] = {
1406         { &hf_xmlpi,
1407           {"XMLPI", "xml.xmlpi",
1408            FT_STRING, BASE_NONE, NULL, 0,
1409            NULL, HFILL }
1410         },
1411         { &hf_cdatasection,
1412           {"CDATASection", "xml.cdatasection",
1413            FT_STRING, BASE_NONE, NULL, 0,
1414            NULL, HFILL }
1415         },
1416         { &hf_comment,
1417           {"Comment", "xml.comment",
1418            FT_STRING, BASE_NONE, NULL, 0,
1419            NULL, HFILL }
1420         },
1421         { &hf_unknowwn_attrib,
1422           {"Attribute", "xml.attribute",
1423            FT_STRING, BASE_NONE, NULL, 0,
1424            NULL, HFILL }
1425         },
1426         { &hf_doctype,
1427           {"Doctype", "xml.doctype",
1428            FT_STRING, BASE_NONE, NULL, 0,
1429            NULL, HFILL }
1430         },
1431         { &hf_dtd_tag,
1432           {"DTD Tag", "xml.dtdtag",
1433            FT_STRING, BASE_NONE, NULL, 0,
1434            NULL, HFILL }
1435         },
1436         { &unknown_ns.hf_cdata,
1437           {"CDATA", "xml.cdata",
1438            FT_STRING, BASE_NONE, NULL, 0, NULL,
1439            HFILL }
1440         },
1441         { &unknown_ns.hf_tag,
1442           {"Tag", "xml.tag",
1443            FT_STRING, BASE_NONE, NULL, 0,
1444            NULL, HFILL }
1445         },
1446         { &xml_ns.hf_cdata,
1447           {"Unknown", "xml.unknown",
1448            FT_STRING, BASE_NONE, NULL, 0,
1449            NULL, HFILL }
1450         }
1451     };
1452
1453     static ei_register_info ei[] = {
1454         { &ei_xml_closing_unopened_tag, { "xml.closing_unopened_tag", PI_MALFORMED, PI_ERROR, "Closing an unopened tag", EXPFILL }},
1455         { &ei_xml_closing_unopened_xmpli_tag, { "xml.closing_unopened_xmpli_tag", PI_MALFORMED, PI_ERROR, "Closing an unopened xmpli tag", EXPFILL }},
1456         { &ei_xml_unrecognized_text, { "xml.unrecognized_text", PI_PROTOCOL, PI_WARN, "Unrecognized text", EXPFILL }},
1457     };
1458
1459     module_t *xml_module;
1460     expert_module_t* expert_xml;
1461
1462     hf_arr  = wmem_array_new(wmem_epan_scope(), sizeof(hf_register_info));
1463     ett_arr = g_array_new(FALSE, FALSE, sizeof(gint *));
1464
1465     wmem_array_append(hf_arr, hf_base, array_length(hf_base));
1466     g_array_append_vals(ett_arr, ett_base, array_length(ett_base));
1467
1468     init_xml_names();
1469
1470     xml_ns.hf_tag = proto_register_protocol("eXtensible Markup Language", "XML", xml_ns.name);
1471
1472     proto_register_field_array(xml_ns.hf_tag, (hf_register_info*)wmem_array_get_raw(hf_arr), wmem_array_get_count(hf_arr));
1473     proto_register_subtree_array((gint **)g_array_data(ett_arr), ett_arr->len);
1474     expert_xml = expert_register_protocol(xml_ns.hf_tag);
1475     expert_register_field_array(expert_xml, ei, array_length(ei));
1476
1477     xml_module = prefs_register_protocol(xml_ns.hf_tag, NULL);
1478     prefs_register_obsolete_preference(xml_module, "heuristic");
1479     prefs_register_obsolete_preference(xml_module, "heuristic_tcp");
1480     prefs_register_obsolete_preference(xml_module, "heuristic_udp");
1481     /* XXX - UCS-2, or UTF-16? */
1482     prefs_register_bool_preference(xml_module, "heuristic_unicode", "Use Unicode in heuristics",
1483                                    "Try to recognize XML encoded in Unicode (UCS-2BE)",
1484                                    &pref_heuristic_unicode);
1485
1486     g_array_free(ett_arr, TRUE);
1487
1488     xml_handle = register_dissector("xml", dissect_xml, xml_ns.hf_tag);
1489
1490     init_xml_parser();
1491 }
1492
1493 static void
1494 add_dissector_media(gpointer k, gpointer v _U_, gpointer p _U_)
1495 {
1496     dissector_add_string("media_type", (gchar *)k, xml_handle);
1497 }
1498
1499 void
1500 proto_reg_handoff_xml(void)
1501 {
1502     wmem_map_foreach(media_types, add_dissector_media, NULL);
1503     dissector_add_uint_range_with_preference("tcp.port", "", xml_handle);
1504
1505     heur_dissector_add("http",  dissect_xml_heur, "XML in HTTP", "xml_http", xml_ns.hf_tag, HEURISTIC_DISABLE);
1506     heur_dissector_add("sip",   dissect_xml_heur, "XML in SIP", "xml_sip", xml_ns.hf_tag, HEURISTIC_DISABLE);
1507     heur_dissector_add("media", dissect_xml_heur, "XML in media", "xml_media", xml_ns.hf_tag, HEURISTIC_DISABLE);
1508     heur_dissector_add("tcp", dissect_xml_heur, "XML over TCP", "xml_tcp", xml_ns.hf_tag, HEURISTIC_DISABLE);
1509     heur_dissector_add("udp", dissect_xml_heur, "XML over UDP", "xml_udp", xml_ns.hf_tag, HEURISTIC_DISABLE);
1510
1511     heur_dissector_add("wtap_file", dissect_xml_heur, "XML file", "xml_wtap", xml_ns.hf_tag, HEURISTIC_ENABLE);
1512
1513 }
1514
1515 /*
1516  * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
1517  *
1518  * Local variables:
1519  * c-basic-offset: 4
1520  * tab-width: 8
1521  * indent-tabs-mode: nil
1522  * End:
1523  *
1524  * vi: set shiftwidth=4 tabstop=8 expandtab:
1525  * :indentSize=4:tabSize=8:noTabs=true:
1526  */