the dtd parser (still missing the glue) and few fixes to packet-xml.c
[obnox/wireshark/wip.git] / epan / dtd_parse.l
1 %option noyywrap
2 %option nounput 
3 %option outfile="dtd_parse.c"
4 %option prefix="Dtd_Parse_"
5 %option never-interactive
6
7 %{
8
9         /* dtd_lexer.l
10         * an XML dissector for ethereal 
11         * lexical analyzer for DTDs
12         *
13         * Copyright 2004, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
14         *
15         * $Id$
16         *
17         * Ethereal - Network traffic analyzer
18         * By Gerald Combs <gerald@ethereal.com>
19         * Copyright 1998 Gerald Combs
20         *
21         * This program is free software; you can redistribute it and/or
22         * modify it under the terms of the GNU General Public License
23         * as published by the Free Software Foundation; either version 2
24         * of the License, or (at your option) any later version.
25         * 
26         * This program is distributed in the hope that it will be useful,
27         * but WITHOUT ANY WARRANTY; without even the implied warranty of
28         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29         * GNU General Public License for more details.
30         * 
31         * You should have received a copy of the GNU General Public License
32         * along with this program; if not, write to the Free Software
33         * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
34         */
35         
36 #include <glib.h>
37 #include <string.h>
38         
39 #include "dtd.h"
40 #include "dtd_grammar.h"
41         
42         struct _proto_xmlpi_attr {
43                 gchar* name;
44                 void (*act)(gchar*);
45         };
46
47         void DtdParse(void*,int,dtd_token_data_t*,dtd_build_data_t*);
48         void *DtdParseAlloc(void *(*)(gulong));
49         void DtdParseFree( void*, void(*)(void*) );
50         void DtdParseTrace(FILE *TraceFILE, char *zTracePrompt);        
51         void* pParser;
52         GString* input_string;  
53         guint offset;
54         guint len;
55         gchar* location;
56         gchar* attr_name;
57         
58         static int my_yyinput(char* buff,guint size);
59         
60         static dtd_token_data_t* new_token(gchar*);
61
62         static dtd_build_data_t* build_data;
63         
64         static void set_proto_name (gchar* val) { if(build_data->proto_name) g_free(build_data->proto_name); build_data->proto_name = g_strdup(val); }
65         static void set_media_type (gchar* val) { if(build_data->media_type) g_free(build_data->media_type); build_data->media_type = g_strdup(val); }
66         static void set_proto_root (gchar* val) { if(build_data->proto_root) g_free(build_data->proto_root); build_data->proto_root = g_strdup(val); }
67         static void set_description (gchar* val) { if(build_data->description) g_free(build_data->description); build_data->description = g_strdup(val); }
68
69         struct _proto_xmlpi_attr proto_attrs[] =
70         {
71                 { "name", set_proto_name },
72                 { "media", set_media_type },
73                 { "root", set_proto_root },
74                 { "description", set_description },
75                 {NULL,NULL}
76         };
77         
78 #define DTD_PARSE(token_type) \
79         { build_data->location = location; \
80                 DtdParse(pParser, (token_type), new_token(yytext), build_data); \
81                 if(build_data->error->len > 0) yyterminate(); \
82         }
83
84 #define YY_INPUT(buff,result,max_size) ( (result) = my_yyinput((buff),(max_size)) )
85
86 %}
87
88 start_xmlpi "<?"
89
90 location_xmlpi "ethereal:location"
91 protocol_xmlpi "ethereal:protocol"
92
93 get_attr_quote =[:blank:]*["]
94 avoid_editor_bug ["]
95
96 get_location_xmlpi  [^[:blank:]]+
97
98 stop_xmlpi "?>"
99
100 special_start  "<!"
101 special_stop   ">"
102 whitespace     [[:blank:]\r\n]+
103 newline        \n
104 attlist_kw     ATTLIST
105 doctype_kw     DOCTYPE
106 element_kw     ELEMENT
107
108 pcdata         #PCDATA
109 any            ANY
110 cdata          #CDATA
111
112 iD             ID
113 idref          IDREF
114 idrefs         IDREFS
115 nmtoken        NMTOKEN
116 nmtokens       NMTOKENS
117 entity         ENTITY
118 entities       ENTITIES
119 notation       NOTATION
120 cdata_t        CDATA
121
122 empty          EMPTY
123 defaulT        #DEFAULT
124 fixed          #FIXED
125 required       #REQUIRED
126 implied        #IMPLIED
127
128 star           "*"
129 question       "?"
130 plus           "+"
131 open_parens    "("
132 close_parens   ")"
133 open_bracket   "["
134 close_bracket  "]"
135 comma          ","
136 pipe           "|"
137 dquote         ["]
138
139 name           [a-z][-a-z0-9_]*
140 dquoted        ["][^\"]*["]
141 squoted        ['][^\']*[']
142
143 %START DTD XMLPI LOCATION DONE PROTOCOL GET_ATTR_QUOTE GET_ATTR_VAL GET_ATTR_CLOSE_QUOTE
144 %%
145
146 {whitespace}            ;
147
148 <DTD>{start_xmlpi}              {
149         BEGIN XMLPI;
150 }
151
152 <XMLPI>{location_xmlpi} {
153         if(location) g_free(location);
154         BEGIN LOCATION;
155 }
156
157 <XMLPI>{protocol_xmlpi} {
158         BEGIN PROTOCOL;
159 }
160
161 <XMLPI><.> ;
162 <XMLPI>{stop_xmlpi} BEGIN DTD;
163
164 <LOCATION>{get_location_xmlpi} {
165         location = g_strdup(yytext);
166         BEGIN DONE;
167 }
168
169 <DONE>{stop_xmlpi}  BEGIN DTD;
170
171 <PROTOCOL>{name} {
172         attr_name = g_strdup(yytext);
173         BEGIN GET_ATTR_QUOTE;
174 }
175
176 <GET_ATTR_QUOTE>{get_attr_quote} { BEGIN GET_ATTR_VAL; }
177
178 <GET_ATTR_QUOTE>. {
179         g_string_sprintfa(build_data->error,
180                                         "error in ethereal:protocol xmpli at %s : could not find attribute value!",
181                                         location);
182         yyterminate();
183 }
184
185 <GET_ATTR_VAL>[^"]+ {
186         /*"*/
187         struct _proto_xmlpi_attr* pa;
188         gboolean got_it = FALSE;
189         
190         for(pa = proto_attrs; pa->name; pa++) {
191                 if (g_strcasecmp(attr_name,pa->name) == 0) {
192                         pa->act(yytext);
193                         got_it = TRUE;
194                         break;
195                 }
196         }
197         
198         if (! got_it) {
199                 g_string_sprintfa(build_data->error,
200                                                 "error in ethereal:protocol xmpli at %s : no such parameter %s!",
201                                                 location, attr_name);
202                 g_free(attr_name);
203                 yyterminate();
204         }
205         
206         g_free(attr_name);
207                 
208         BEGIN GET_ATTR_CLOSE_QUOTE;
209 }
210
211 <GET_ATTR_CLOSE_QUOTE>{dquote} { BEGIN PROTOCOL;}
212
213 <PROTOCOL>{stop_xmlpi} BEGIN DTD;
214
215 <DTD>{special_start}         { DTD_PARSE(TOKEN_TAG_START); }
216 <DTD>{special_stop}          { DTD_PARSE(TOKEN_TAG_STOP); }
217
218 <DTD>{attlist_kw}            { DTD_PARSE(TOKEN_ATTLIST_KW); }
219 <DTD>{element_kw}            { DTD_PARSE(TOKEN_ELEMENT_KW); }
220 <DTD>{doctype_kw}            { DTD_PARSE(TOKEN_DOCTYPE_KW); }
221
222 <DTD>{pcdata}                { DTD_PARSE(TOKEN_ELEM_DATA); } 
223 <DTD>{any}                   { DTD_PARSE(TOKEN_ELEM_DATA); }
224 <DTD>{cdata}                 { DTD_PARSE(TOKEN_ELEM_DATA); }
225 <DTD>{empty}                             { DTD_PARSE(TOKEN_EMPTY_KW); }
226
227 <DTD>{iD}                                { DTD_PARSE(TOKEN_ATT_TYPE); }
228 <DTD>{idref}                 { DTD_PARSE(TOKEN_ATT_TYPE); }
229 <DTD>{idrefs}                { DTD_PARSE(TOKEN_ATT_TYPE); }
230 <DTD>{nmtoken}               { DTD_PARSE(TOKEN_ATT_TYPE); }
231 <DTD>{nmtokens}              { DTD_PARSE(TOKEN_ATT_TYPE); }
232 <DTD>{entity}                { DTD_PARSE(TOKEN_ATT_TYPE); }
233 <DTD>{entities}              { DTD_PARSE(TOKEN_ATT_TYPE); }
234 <DTD>{notation}              { DTD_PARSE(TOKEN_ATT_TYPE); }
235 <DTD>{cdata_t}               { DTD_PARSE(TOKEN_ATT_TYPE); }
236 <DTD>{defaulT}               { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
237 <DTD>{fixed}                 { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
238 <DTD>{required}              { DTD_PARSE(TOKEN_ATT_DEF); }
239 <DTD>{implied}               { DTD_PARSE(TOKEN_ATT_DEF); }
240
241 <DTD>{star}                  { DTD_PARSE(TOKEN_STAR); }
242 <DTD>{question}              { DTD_PARSE(TOKEN_QUESTION); }
243 <DTD>{plus}                  { DTD_PARSE(TOKEN_PLUS); }
244 <DTD>{comma}                  { DTD_PARSE(TOKEN_COMMA); }
245 <DTD>{open_parens}           { DTD_PARSE(TOKEN_OPEN_PARENS); }
246 <DTD>{close_parens}          { DTD_PARSE(TOKEN_CLOSE_PARENS); }
247 <DTD>{open_bracket}          { DTD_PARSE(TOKEN_OPEN_BRACKET); }
248 <DTD>{close_bracket}         { DTD_PARSE(TOKEN_CLOSE_BRACKET); }
249 <DTD>{pipe}                  { DTD_PARSE(TOKEN_PIPE); }
250
251 <DTD>{dquoted}               |
252 <DTD>{squoted}               { DTD_PARSE(TOKEN_QUOTED); }
253 <DTD>{name}                  { DTD_PARSE(TOKEN_NAME); }
254
255 %%
256
257 static dtd_token_data_t* new_token(gchar* text) {
258         dtd_token_data_t* t = g_malloc(sizeof(dtd_token_data_t));
259         
260         t->text = g_strdup(text);
261         t->location = g_strdup(location);
262         
263         return t;
264 }
265
266
267
268 static int my_yyinput(char* buff, guint size) {
269
270         if (offset >= len ) {
271                 return YY_NULL;
272         } else if ( offset + size <= len ) {
273                 memcpy(buff, input_string->str + offset,size);
274                 offset += size;
275                 return size;
276         } else {
277                 size = len - offset;
278                 memcpy(buff, input_string->str + offset,size);
279                 offset = len;
280                 return size;
281         }
282 }
283
284 extern dtd_build_data_t* dtd_parse(GString* s) {
285
286         input_string = s;
287         offset = 0;
288         len = input_string->len;
289         
290         pParser = DtdParseAlloc(g_malloc);
291         
292         build_data = g_malloc(sizeof(dtd_build_data_t));
293
294         build_data->proto_name = NULL;
295         build_data->media_type = NULL;
296         build_data->description = NULL;
297         build_data->proto_root = NULL;
298         
299         build_data->elements = g_ptr_array_new();
300         build_data->attributes = g_ptr_array_new();
301
302         build_data->location = NULL;
303         build_data->error = g_string_new("");
304         
305         BEGIN DTD;
306         
307         yylex();
308
309         DtdParse(pParser, 0, NULL,build_data);
310
311         yyrestart(NULL);
312         
313         DtdParseFree(pParser, g_free );
314         
315         return build_data;
316 }