Make the frame_data_sequence structure opaque, and move some other
[obnox/wireshark/wip.git] / epan / dtd_parse.l
1 /*
2  * We don't use unput, so don't generate code for it.
3  */
4 %option nounput 
5
6 /*
7  * We don't read from the terminal.
8  */
9 %option never-interactive
10
11 /*
12  * Prefix scanner routines with "Dtd_Parse_" rather than "yy", so this scanner
13  * can coexist with other scanners.
14  */
15 %option prefix="Dtd_Parse_"
16
17 %option outfile="dtd_parse.c"
18
19 %{
20
21         /* dtd_parse.l
22         * an XML dissector for Wireshark 
23         * lexical analyzer for DTDs
24         *
25         * Copyright 2004, Luis E. Garcia Ontanon <luis@ontanon.org>
26         *
27         * $Id$
28         *
29         * Wireshark - Network traffic analyzer
30         * By Gerald Combs <gerald@wireshark.org>
31         * Copyright 1998 Gerald Combs
32         *
33         * This program is free software; you can redistribute it and/or
34         * modify it under the terms of the GNU General Public License
35         * as published by the Free Software Foundation; either version 2
36         * of the License, or (at your option) any later version.
37         * 
38         * This program is distributed in the hope that it will be useful,
39         * but WITHOUT ANY WARRANTY; without even the implied warranty of
40         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
41         * GNU General Public License for more details.
42         * 
43         * You should have received a copy of the GNU General Public License
44         * along with this program; if not, write to the Free Software
45         * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
46         */
47         
48 #include <glib.h>
49 #include <string.h>
50         
51 #include "dtd.h"
52 #include "dtd_grammar.h"
53 #include "dtd_parse.h"
54 #include "dtd_parse_lex.h"
55         
56         struct _proto_xmlpi_attr {
57                 gchar* name;
58                 void (*act)(gchar*);
59         };
60
61         static void* pParser;
62         static GString* input_string;   
63         static guint offset;
64         static guint len;
65         static gchar* location;
66         static gchar* attr_name;
67         
68         static int my_yyinput(char* buff,guint size);
69         
70         static dtd_token_data_t* new_token(gchar*);
71
72         static dtd_build_data_t* build_data;
73         
74         static void set_proto_name (gchar* val) { if(build_data->proto_name) g_free(build_data->proto_name); build_data->proto_name = g_strdup(val); }
75         static void set_media_type (gchar* val) { if(build_data->media_type) g_free(build_data->media_type); build_data->media_type = g_strdup(val); }
76         static void set_proto_root (gchar* val) { if(build_data->proto_root) g_free(build_data->proto_root); build_data->proto_root = g_strdup(val); }
77         static void set_description (gchar* val) { if(build_data->description) g_free(build_data->description); build_data->description = g_strdup(val); }
78         static void set_recursive (gchar* val) { build_data->recursion = ( g_ascii_strcasecmp(val,"yes") == 0 ) ? TRUE : FALSE; }
79
80         static struct _proto_xmlpi_attr proto_attrs[] =
81         {
82                 { "proto_name", set_proto_name },
83                 { "media", set_media_type },
84                 { "root", set_proto_root },
85                 { "description", set_description },
86                 { "hierarchy", set_recursive },
87                 {NULL,NULL}
88         };
89         
90 #ifdef DEBUG_DTD_PARSER
91 #define DEBUG_DTD_TOKEN fprintf(stderr,"->%s (%i)%s\n",location,token_type,yytext)
92 #else
93 #define DEBUG_DTD_TOKEN
94 #endif
95     
96 #define DTD_PARSE(token_type) \
97         {   DEBUG_DTD_TOKEN; \
98                 DtdParse(pParser, (token_type), new_token(yytext), build_data); \
99                 if(build_data->error->len > 0) yyterminate(); \
100         }
101
102
103 #define YY_INPUT(buff,result,max_size) ( (result) = my_yyinput((buff),(max_size)) )
104
105 /*
106  * Flex (v 2.5.35) uses this symbol to "exclude" unistd.h
107  */
108 #ifdef _WIN32
109 #define YY_NO_UNISTD_H
110 #endif
111
112 %}
113
114 comment_start "<!--"
115 comment_stop "-->"
116
117 start_xmlpi "<?"
118
119 location_xmlpi "wireshark:location"
120 protocol_xmlpi "wireshark:protocol"
121
122 get_attr_quote =[:blank:]*["]
123 avoid_editor_bug ["]
124
125 get_location_xmlpi  [^[:blank:]]+
126
127 stop_xmlpi "?>"
128
129 notation_tag       "<!"[:blank:]*NOTATION
130
131 special_start  "<!"
132 special_stop   ">"
133 whitespace     [[:blank:]\r\n]+
134 newline        \n
135 attlist_kw     ATTLIST
136 doctype_kw     DOCTYPE
137 element_kw     ELEMENT
138
139 pcdata         #PCDATA
140 any            ANY
141 cdata          #CDATA
142
143 iD             ID
144 idref          IDREF
145 idrefs         IDREFS
146 nmtoken        NMTOKEN
147 nmtokens       NMTOKENS
148 entity         ENTITY
149 entities       ENTITIES
150 notation       NOTATION
151 cdata_t        CDATA
152
153 empty          EMPTY
154 defaulT        #DEFAULT
155 fixed          #FIXED
156 required       #REQUIRED
157 implied        #IMPLIED
158
159 star           "*"
160 question       "?"
161 plus           "+"
162 open_parens    "("
163 close_parens   ")"
164 open_bracket   "["
165 close_bracket  "]"
166 comma          ","
167 pipe           "|"
168 dquote         ["]
169
170 name           [A-Za-z0-9][-a-zA-Z0-9_]*
171 dquoted        ["][^\"]*["]
172 squoted        ['][^\']*[']
173
174 %START DTD XMLPI LOCATION DONE PROTOCOL GET_ATTR_QUOTE GET_ATTR_VAL GET_ATTR_CLOSE_QUOTE IN_COMMENT IN_NOTATION 
175 %%
176
177 {whitespace}            ;
178
179
180 <DTD>{comment_start}            { BEGIN IN_COMMENT; }
181 <IN_COMMENT>[^-]?                               |
182 <IN_COMMENT>[-]                                 ;
183 <IN_COMMENT>{comment_stop}              { BEGIN DTD; }
184
185 <DTD>{notation_tag} { BEGIN IN_NOTATION; }
186 <IN_NOTATION>[^>]  ;
187 <IN_NOTATION>{special_stop} { BEGIN DTD; }
188
189 <DTD>{start_xmlpi}              {
190         BEGIN XMLPI;
191 }
192
193 <XMLPI>{location_xmlpi} {
194         BEGIN LOCATION;
195 }
196
197 <XMLPI>{protocol_xmlpi} {
198         BEGIN PROTOCOL;
199 }
200
201 <XMLPI><.> ;
202 <XMLPI>{stop_xmlpi} BEGIN DTD;
203
204 <LOCATION>{get_location_xmlpi} {
205     if(location) g_free(location);
206         location = g_strdup(yytext);
207         BEGIN DONE;
208 }
209
210 <DONE>{stop_xmlpi}  BEGIN DTD;
211
212 <PROTOCOL>{name} {
213         attr_name = g_strdup(yytext);
214         g_strdown(attr_name);
215         BEGIN GET_ATTR_QUOTE;
216 }
217
218 <GET_ATTR_QUOTE>{get_attr_quote} { BEGIN GET_ATTR_VAL; }
219
220 <GET_ATTR_QUOTE>. {
221         g_string_append_printf(build_data->error,
222                                         "error in wireshark:protocol xmpli at %s : could not find attribute value!",
223                                         location);
224         yyterminate();
225 }
226
227 <GET_ATTR_VAL>[^"]+ {
228         /*"*/
229         struct _proto_xmlpi_attr* pa;
230         gboolean got_it = FALSE;
231         
232         for(pa = proto_attrs; pa->name; pa++) {
233                 if (g_ascii_strcasecmp(attr_name,pa->name) == 0) {
234                         pa->act(yytext);
235                         got_it = TRUE;
236                         break;
237                 }
238         }
239         
240         if (! got_it) {
241                 g_string_append_printf(build_data->error,
242                                                 "error in wireshark:protocol xmpli at %s : no such parameter %s!",
243                                                 location, attr_name);
244                 g_free(attr_name);
245                 yyterminate();
246         }
247         
248         g_free(attr_name);
249                 
250         BEGIN GET_ATTR_CLOSE_QUOTE;
251 }
252
253 <GET_ATTR_CLOSE_QUOTE>{dquote} { BEGIN PROTOCOL;}
254
255 <PROTOCOL>{stop_xmlpi} BEGIN DTD;
256
257 <DTD>{special_start}         { DTD_PARSE(TOKEN_TAG_START); }
258 <DTD>{special_stop}          { DTD_PARSE(TOKEN_TAG_STOP); }
259
260 <DTD>{attlist_kw}            { DTD_PARSE(TOKEN_ATTLIST_KW); }
261 <DTD>{element_kw}            { DTD_PARSE(TOKEN_ELEMENT_KW); }
262 <DTD>{doctype_kw}            { DTD_PARSE(TOKEN_DOCTYPE_KW); }
263
264 <DTD>{pcdata}                { DTD_PARSE(TOKEN_ELEM_DATA); } 
265 <DTD>{any}                   { DTD_PARSE(TOKEN_ELEM_DATA); }
266 <DTD>{cdata}                 { DTD_PARSE(TOKEN_ELEM_DATA); }
267 <DTD>{empty}                             { DTD_PARSE(TOKEN_EMPTY_KW); }
268
269 <DTD>{iD}                                { DTD_PARSE(TOKEN_ATT_TYPE); }
270 <DTD>{idref}                 { DTD_PARSE(TOKEN_ATT_TYPE); }
271 <DTD>{idrefs}                { DTD_PARSE(TOKEN_ATT_TYPE); }
272 <DTD>{nmtoken}               { DTD_PARSE(TOKEN_ATT_TYPE); }
273 <DTD>{nmtokens}              { DTD_PARSE(TOKEN_ATT_TYPE); }
274 <DTD>{entity}                { DTD_PARSE(TOKEN_ATT_TYPE); }
275 <DTD>{entities}              { DTD_PARSE(TOKEN_ATT_TYPE); }
276 <DTD>{notation}              { DTD_PARSE(TOKEN_ATT_TYPE); }
277 <DTD>{cdata_t}               { DTD_PARSE(TOKEN_ATT_TYPE); }
278 <DTD>{defaulT}               { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
279 <DTD>{fixed}                 { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
280 <DTD>{required}              { DTD_PARSE(TOKEN_ATT_DEF); }
281 <DTD>{implied}               { DTD_PARSE(TOKEN_ATT_DEF); }
282
283 <DTD>{star}                  { DTD_PARSE(TOKEN_STAR); }
284 <DTD>{question}              { DTD_PARSE(TOKEN_QUESTION); }
285 <DTD>{plus}                  { DTD_PARSE(TOKEN_PLUS); }
286 <DTD>{comma}                  { DTD_PARSE(TOKEN_COMMA); }
287 <DTD>{open_parens}           { DTD_PARSE(TOKEN_OPEN_PARENS); }
288 <DTD>{close_parens}          { DTD_PARSE(TOKEN_CLOSE_PARENS); }
289 <DTD>{open_bracket}          { DTD_PARSE(TOKEN_OPEN_BRACKET); }
290 <DTD>{close_bracket}         { DTD_PARSE(TOKEN_CLOSE_BRACKET); }
291 <DTD>{pipe}                  { DTD_PARSE(TOKEN_PIPE); }
292
293 <DTD>{dquoted}               |
294 <DTD>{squoted}               { DTD_PARSE(TOKEN_QUOTED); }
295 <DTD>{name}                  { DTD_PARSE(TOKEN_NAME); }
296
297 %%
298
299 static dtd_token_data_t* new_token(gchar* text) {
300         dtd_token_data_t* t = g_malloc(sizeof(dtd_token_data_t));
301         
302         t->text = g_strdup(text);
303         t->location = g_strdup(location);
304         
305         return t;
306 }
307
308
309 static int my_yyinput(char* buff, guint size) {
310
311         if (offset >= len ) {
312                 return YY_NULL;
313         } else if ( offset + size <= len ) {
314                 memcpy(buff, input_string->str + offset,size);
315                 offset += size;
316                 return size;
317         } else {
318                 size = len - offset;
319                 memcpy(buff, input_string->str + offset,size);
320                 offset = len;
321                 return size;
322         }
323 }
324
325 extern dtd_build_data_t* dtd_parse(GString* s) {
326
327         input_string = s;
328         offset = 0;
329         len = (guint) input_string->len;
330         
331         pParser = DtdParseAlloc(g_malloc);
332
333 #ifdef DEBUG_DTD_PARSER
334         DtdParseTrace(stderr, ">>");
335 #endif
336     
337         build_data = g_malloc(sizeof(dtd_build_data_t));
338
339         build_data->proto_name = NULL;
340         build_data->media_type = NULL;
341         build_data->description = NULL;
342         build_data->proto_root = NULL;
343         build_data->recursion = FALSE;
344     
345         build_data->elements = g_ptr_array_new();
346         build_data->attributes = g_ptr_array_new();
347
348         build_data->error = g_string_new("");
349         
350         location = NULL;
351     
352         BEGIN DTD;
353         
354         yylex();
355
356         DtdParse(pParser, 0, NULL,build_data);
357
358         yyrestart(NULL);
359         
360         if (location) g_free(location);
361         
362         location = NULL;
363     
364         DtdParseFree(pParser, g_free );
365         
366         return build_data;
367 }
368
369 /*
370  * We want to stop processing when we get to the end of the input.
371  * (%option noyywrap is not used because if used then 
372  * some flex versions (eg: 2.5.35) generate code which causes
373  * warnings by the Windows VC compiler).
374  */
375
376 int yywrap(void) {
377     return 1;
378 }