Licepnse -> License
[obnox/wireshark/wip.git] / epan / dtd_parse.l
1 %option noyywrap
2 %option nounput 
3 %option outfile="dtd_parse.c"
4 %option prefix="Dtd_Parse_"
5 %option never-interactive
6
7 %{
8
9         /* dtd_parse.l
10         * an XML dissector for Wireshark 
11         * lexical analyzer for DTDs
12         *
13         * Copyright 2004, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
14         *
15         * $Id$
16         *
17         * Wireshark - Network traffic analyzer
18         * By Gerald Combs <gerald@wireshark.org>
19         * Copyright 1998 Gerald Combs
20         *
21         * This program is free software; you can redistribute it and/or
22         * modify it under the terms of the GNU General Public License
23         * as published by the Free Software Foundation; either version 2
24         * of the License, or (at your option) any later version.
25         * 
26         * This program is distributed in the hope that it will be useful,
27         * but WITHOUT ANY WARRANTY; without even the implied warranty of
28         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29         * GNU General Public License for more details.
30         * 
31         * You should have received a copy of the GNU General Public License
32         * along with this program; if not, write to the Free Software
33         * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
34         */
35         
36 #include <glib.h>
37 #include <string.h>
38         
39 #include "dtd.h"
40 #include "dtd_grammar.h"
41 #include "dtd_parse.h"
42         
43         struct _proto_xmlpi_attr {
44                 gchar* name;
45                 void (*act)(gchar*);
46         };
47
48         static void* pParser;
49         static GString* input_string;   
50         static guint offset;
51         static guint len;
52         static gchar* location;
53         static gchar* attr_name;
54         
55         static int my_yyinput(char* buff,guint size);
56         
57         static dtd_token_data_t* new_token(gchar*);
58
59         static dtd_build_data_t* build_data;
60         
61         static void set_proto_name (gchar* val) { if(build_data->proto_name) g_free(build_data->proto_name); build_data->proto_name = g_strdup(val); }
62         static void set_media_type (gchar* val) { if(build_data->media_type) g_free(build_data->media_type); build_data->media_type = g_strdup(val); }
63         static void set_proto_root (gchar* val) { if(build_data->proto_root) g_free(build_data->proto_root); build_data->proto_root = g_strdup(val); }
64         static void set_description (gchar* val) { if(build_data->description) g_free(build_data->description); build_data->description = g_strdup(val); }
65         static void set_recursive (gchar* val) { build_data->recursion = ( g_strcasecmp(val,"yes") == 0 ) ? TRUE : FALSE; }
66
67         static struct _proto_xmlpi_attr proto_attrs[] =
68         {
69                 { "proto_name", set_proto_name },
70                 { "media", set_media_type },
71                 { "root", set_proto_root },
72                 { "description", set_description },
73                 { "hierarchy", set_recursive },
74                 {NULL,NULL}
75         };
76         
77 #ifdef DEBUG_DTD_PARSER
78 #define DEBUG_DTD_TOKEN fprintf(stderr,"->%s (%i)%s\n",location,token_type,yytext)
79 #else
80 #define DEBUG_DTD_TOKEN
81 #endif
82     
83 #define DTD_PARSE(token_type) \
84         {   DEBUG_DTD_TOKEN; \
85                 DtdParse(pParser, (token_type), new_token(yytext), build_data); \
86                 if(build_data->error->len > 0) yyterminate(); \
87         }
88
89
90 #define YY_INPUT(buff,result,max_size) ( (result) = my_yyinput((buff),(max_size)) )
91
92 %}
93
94 comment_start "<!--"
95 comment_stop "-->"
96
97 start_xmlpi "<?"
98
99 location_xmlpi "wireshark:location"
100 protocol_xmlpi "wireshark:protocol"
101
102 get_attr_quote =[:blank:]*["]
103 avoid_editor_bug ["]
104
105 get_location_xmlpi  [^[:blank:]]+
106
107 stop_xmlpi "?>"
108
109 notation_tag       "<!"[:blank:]*NOTATION
110
111 special_start  "<!"
112 special_stop   ">"
113 whitespace     [[:blank:]\r\n]+
114 newline        \n
115 attlist_kw     ATTLIST
116 doctype_kw     DOCTYPE
117 element_kw     ELEMENT
118
119 pcdata         #PCDATA
120 any            ANY
121 cdata          #CDATA
122
123 iD             ID
124 idref          IDREF
125 idrefs         IDREFS
126 nmtoken        NMTOKEN
127 nmtokens       NMTOKENS
128 entity         ENTITY
129 entities       ENTITIES
130 notation       NOTATION
131 cdata_t        CDATA
132
133 empty          EMPTY
134 defaulT        #DEFAULT
135 fixed          #FIXED
136 required       #REQUIRED
137 implied        #IMPLIED
138
139 star           "*"
140 question       "?"
141 plus           "+"
142 open_parens    "("
143 close_parens   ")"
144 open_bracket   "["
145 close_bracket  "]"
146 comma          ","
147 pipe           "|"
148 dquote         ["]
149
150 name           [A-Za-z][-a-zA-Z0-9_]*
151 dquoted        ["][^\"]*["]
152 squoted        ['][^\']*[']
153
154 %START DTD XMLPI LOCATION DONE PROTOCOL GET_ATTR_QUOTE GET_ATTR_VAL GET_ATTR_CLOSE_QUOTE IN_COMMENT IN_NOTATION 
155 %%
156
157 {whitespace}            ;
158
159
160 <DTD>{comment_start}            { BEGIN IN_COMMENT; }
161 <IN_COMMENT>[^-]?                               |
162 <IN_COMMENT>[-]                                 ;
163 <IN_COMMENT>{comment_stop}              { BEGIN DTD; }
164
165 <DTD>{notation_tag} { BEGIN IN_NOTATION; }
166 <IN_NOTATION>[^>]  ;
167 <IN_NOTATION>{special_stop} { BEGIN DTD; }
168
169 <DTD>{start_xmlpi}              {
170         BEGIN XMLPI;
171 }
172
173 <XMLPI>{location_xmlpi} {
174         BEGIN LOCATION;
175 }
176
177 <XMLPI>{protocol_xmlpi} {
178         BEGIN PROTOCOL;
179 }
180
181 <XMLPI><.> ;
182 <XMLPI>{stop_xmlpi} BEGIN DTD;
183
184 <LOCATION>{get_location_xmlpi} {
185     if(location) g_free(location);
186         location = g_strdup(yytext);
187         BEGIN DONE;
188 }
189
190 <DONE>{stop_xmlpi}  BEGIN DTD;
191
192 <PROTOCOL>{name} {
193         attr_name = g_strdup(yytext);
194         g_strdown(attr_name);
195         BEGIN GET_ATTR_QUOTE;
196 }
197
198 <GET_ATTR_QUOTE>{get_attr_quote} { BEGIN GET_ATTR_VAL; }
199
200 <GET_ATTR_QUOTE>. {
201         g_string_sprintfa(build_data->error,
202                                         "error in wireshark:protocol xmpli at %s : could not find attribute value!",
203                                         location);
204         yyterminate();
205 }
206
207 <GET_ATTR_VAL>[^"]+ {
208         /*"*/
209         struct _proto_xmlpi_attr* pa;
210         gboolean got_it = FALSE;
211         
212         for(pa = proto_attrs; pa->name; pa++) {
213                 if (g_strcasecmp(attr_name,pa->name) == 0) {
214                         pa->act(yytext);
215                         got_it = TRUE;
216                         break;
217                 }
218         }
219         
220         if (! got_it) {
221                 g_string_sprintfa(build_data->error,
222                                                 "error in wireshark:protocol xmpli at %s : no such parameter %s!",
223                                                 location, attr_name);
224                 g_free(attr_name);
225                 yyterminate();
226         }
227         
228         g_free(attr_name);
229                 
230         BEGIN GET_ATTR_CLOSE_QUOTE;
231 }
232
233 <GET_ATTR_CLOSE_QUOTE>{dquote} { BEGIN PROTOCOL;}
234
235 <PROTOCOL>{stop_xmlpi} BEGIN DTD;
236
237 <DTD>{special_start}         { DTD_PARSE(TOKEN_TAG_START); }
238 <DTD>{special_stop}          { DTD_PARSE(TOKEN_TAG_STOP); }
239
240 <DTD>{attlist_kw}            { DTD_PARSE(TOKEN_ATTLIST_KW); }
241 <DTD>{element_kw}            { DTD_PARSE(TOKEN_ELEMENT_KW); }
242 <DTD>{doctype_kw}            { DTD_PARSE(TOKEN_DOCTYPE_KW); }
243
244 <DTD>{pcdata}                { DTD_PARSE(TOKEN_ELEM_DATA); } 
245 <DTD>{any}                   { DTD_PARSE(TOKEN_ELEM_DATA); }
246 <DTD>{cdata}                 { DTD_PARSE(TOKEN_ELEM_DATA); }
247 <DTD>{empty}                             { DTD_PARSE(TOKEN_EMPTY_KW); }
248
249 <DTD>{iD}                                { DTD_PARSE(TOKEN_ATT_TYPE); }
250 <DTD>{idref}                 { DTD_PARSE(TOKEN_ATT_TYPE); }
251 <DTD>{idrefs}                { DTD_PARSE(TOKEN_ATT_TYPE); }
252 <DTD>{nmtoken}               { DTD_PARSE(TOKEN_ATT_TYPE); }
253 <DTD>{nmtokens}              { DTD_PARSE(TOKEN_ATT_TYPE); }
254 <DTD>{entity}                { DTD_PARSE(TOKEN_ATT_TYPE); }
255 <DTD>{entities}              { DTD_PARSE(TOKEN_ATT_TYPE); }
256 <DTD>{notation}              { DTD_PARSE(TOKEN_ATT_TYPE); }
257 <DTD>{cdata_t}               { DTD_PARSE(TOKEN_ATT_TYPE); }
258 <DTD>{defaulT}               { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
259 <DTD>{fixed}                 { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
260 <DTD>{required}              { DTD_PARSE(TOKEN_ATT_DEF); }
261 <DTD>{implied}               { DTD_PARSE(TOKEN_ATT_DEF); }
262
263 <DTD>{star}                  { DTD_PARSE(TOKEN_STAR); }
264 <DTD>{question}              { DTD_PARSE(TOKEN_QUESTION); }
265 <DTD>{plus}                  { DTD_PARSE(TOKEN_PLUS); }
266 <DTD>{comma}                  { DTD_PARSE(TOKEN_COMMA); }
267 <DTD>{open_parens}           { DTD_PARSE(TOKEN_OPEN_PARENS); }
268 <DTD>{close_parens}          { DTD_PARSE(TOKEN_CLOSE_PARENS); }
269 <DTD>{open_bracket}          { DTD_PARSE(TOKEN_OPEN_BRACKET); }
270 <DTD>{close_bracket}         { DTD_PARSE(TOKEN_CLOSE_BRACKET); }
271 <DTD>{pipe}                  { DTD_PARSE(TOKEN_PIPE); }
272
273 <DTD>{dquoted}               |
274 <DTD>{squoted}               { DTD_PARSE(TOKEN_QUOTED); }
275 <DTD>{name}                  { DTD_PARSE(TOKEN_NAME); }
276
277 %%
278
279 static dtd_token_data_t* new_token(gchar* text) {
280         dtd_token_data_t* t = g_malloc(sizeof(dtd_token_data_t));
281         
282         t->text = g_strdup(text);
283         t->location = g_strdup(location);
284         
285         return t;
286 }
287
288
289 static int my_yyinput(char* buff, guint size) {
290
291         if (offset >= len ) {
292                 return YY_NULL;
293         } else if ( offset + size <= len ) {
294                 memcpy(buff, input_string->str + offset,size);
295                 offset += size;
296                 return size;
297         } else {
298                 size = len - offset;
299                 memcpy(buff, input_string->str + offset,size);
300                 offset = len;
301                 return size;
302         }
303 }
304
305 extern dtd_build_data_t* dtd_parse(GString* s) {
306
307         input_string = s;
308         offset = 0;
309         len = input_string->len;
310         
311         pParser = DtdParseAlloc(g_malloc);
312
313 #ifdef DEBUG_DTD_PARSER
314         DtdParseTrace(stderr, ">>");
315 #endif
316     
317         build_data = g_malloc(sizeof(dtd_build_data_t));
318
319         build_data->proto_name = NULL;
320         build_data->media_type = NULL;
321         build_data->description = NULL;
322         build_data->proto_root = NULL;
323         build_data->recursion = FALSE;
324     
325         build_data->elements = g_ptr_array_new();
326         build_data->attributes = g_ptr_array_new();
327
328         build_data->error = g_string_new("");
329         
330         location = NULL;
331     
332         BEGIN DTD;
333         
334         yylex();
335
336         DtdParse(pParser, 0, NULL,build_data);
337
338         yyrestart(NULL);
339         
340         location = NULL;
341     
342         DtdParseFree(pParser, g_free );
343         
344         return build_data;
345 }