From Harald Welte:
[obnox/wireshark/wip.git] / epan / dtd_preparse.l
1 /*
2  * We don't use unput, so don't generate code for it.
3  */
4 %option nounput
5
6 /*
7  * We don't read from the terminal.
8  */
9 %option never-interactive
10
11 /*
12  * The language we're scanning is case-insensitive.
13  */
14 %option caseless
15
16 /*
17  * Prefix scanner routines with "Dtd_PreParse_" rather than "yy", so this
18  * scanner can coexist with other scanners.
19  */
20 %option prefix="Dtd_PreParse_"
21
22 %option outfile="dtd_preparse.c"
23
24 %{
25         /*
26          * dtd_preparser.l
27          *
28          * an XML dissector for wireshark
29          *
30          * DTD Preparser -  import a dtd file into a GString
31          *                                      including files, removing comments
32          *                  and resolving %entities;
33          *
34          * Copyright 2004, Luis E. Garcia Ontanon <luis@ontanon.org>
35          *
36          * $Id$
37          *
38          * Wireshark - Network traffic analyzer
39          * By Gerald Combs <gerald@wireshark.org>
40          * Copyright 1998 Gerald Combs
41          *
42          * This program is free software; you can redistribute it and/or
43          * modify it under the terms of the GNU General Public License
44          * as published by the Free Software Foundation; either version 2
45          * of the License, or (at your option) any later version.
46          *
47          * This program is distributed in the hope that it will be useful,
48          * but WITHOUT ANY WARRANTY; without even the implied warranty of
49          * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
50          * GNU General Public License for more details.
51          *
52          * You should have received a copy of the GNU General Public License
53          * along with this program; if not, write to the Free Software
54          * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
55          */
56
57 #ifdef HAVE_CONFIG_H
58 # include "config.h"
59 #endif
60
61 #include <glib.h>
62 #include <string.h>
63 #include <errno.h>
64 #include <stdio.h>
65 #include "dtd.h"
66 #include "dtd_preparse_lex.h"
67 #include <wsutil/file_util.h>
68
69 #define ECHO g_string_append(current,yytext);
70
71 static GString* current;
72 static GString* output;
73 static GHashTable* entities;
74 static gchar* entity_name;
75 static GString* error;
76
77 static const gchar* dtd_dirname;
78 static const gchar* filename;
79 static guint linenum;
80
81 static gchar* replace_entity(gchar* s);
82 static const gchar* location(void);
83
84 /*
85  * Flex (v 2.5.35) uses this symbol to "exclude" unistd.h
86  */
87 #ifdef _WIN32
88 #define YY_NO_UNISTD_H
89 #endif
90
91 #ifdef _WIN32
92 /* disable Windows VC compiler warning "signed/unsigned mismatch" associated  */
93 /* with YY_INPUT code generated by flex versions such as 2.5.35.              */
94 #pragma warning (disable:4018)
95 #endif
96
97 %}
98 xmlpi_start "<?"
99 xmlpi_stop  "?>"
100 xmlpi_chars .
101
102 comment_start "<!--"
103 comment_stop "-->"
104 special_start "<!"
105 special_stop ">"
106
107 entity_start     "<!"[[:blank:]\n]*entity[[:blank:]\n]*"%"
108 system     SYSTEM
109 filename   [^"]+
110
111
112 name [A-Za-z][-:A-Za-z0-9_\.]*
113
114 quote "\""
115 percent [%]
116 escaped_quote "\\\""
117 non_quote [^"%]+
118
119 avoid_editor_bug ["]
120
121 entity        [%&][A-Za-z][-A-Za-z0-9_]*;
122
123 whitespace [[blank:]]+
124 newline    \n
125 %START OUTSIDE IN_COMMENT IN_ENTITY NAMED_ENTITY IN_QUOTE ENTITY_DONE XMLPI
126 %%
127
128
129 {entity}                                                if (current) g_string_append_printf(current,"%s\n%s\n",replace_entity(yytext),location());
130
131 {whitespace}                                    if (current) g_string_append(current," ");
132
133 <OUTSIDE>{xmlpi_start}                  { g_string_append(current,yytext); BEGIN XMLPI; }
134 <XMLPI>{xmlpi_chars}                    { g_string_append(current,yytext); }
135 <XMLPI>{newline}                                { g_string_append(current,yytext); }
136 <XMLPI>{xmlpi_stop}                             { g_string_append(current,yytext); BEGIN OUTSIDE; }
137
138 <OUTSIDE>{comment_start}                { current = NULL; BEGIN IN_COMMENT; }
139 <IN_COMMENT>[^-]?                               |
140 <IN_COMMENT>[-]                                 ;
141 <IN_COMMENT>{comment_stop}              { current = output; BEGIN OUTSIDE; }
142
143 {newline}                                               {
144         linenum++;
145         if (current) g_string_append_printf(current,"%s\n",location());
146 }
147
148
149 <OUTSIDE>{entity_start}                 { BEGIN IN_ENTITY; }
150 <IN_ENTITY>{name}                               { entity_name = g_strdup_printf("%%%s;",yytext); BEGIN NAMED_ENTITY; }
151 <NAMED_ENTITY>{quote}                   { current = g_string_new(location()); BEGIN IN_QUOTE; }
152 <IN_QUOTE>{quote}                               { g_hash_table_insert(entities,entity_name,current);  BEGIN ENTITY_DONE; }
153 <IN_QUOTE>{percent}                             |
154 <IN_QUOTE>{non_quote}                   |
155 <IN_QUOTE>{escaped_quote}               g_string_append(current,yytext);
156 <NAMED_ENTITY>{system}                  {
157     g_string_append_printf(error,"at %s:%u: file inclusion is not supported!", filename, linenum);
158     yyterminate();
159 }
160 <ENTITY_DONE>{special_stop}             { current = output; g_string_append(current,"\n"); BEGIN OUTSIDE; }
161
162 %%
163
164 static gchar* replace_entity(gchar* entity) {
165         GString* replacement;
166
167         *entity = '%';
168
169         replacement = g_hash_table_lookup(entities,entity);
170
171         if (replacement) {
172                 return replacement->str;
173         } else {
174                 g_string_append_printf(error,"dtd_preparse: in file '%s': entity %s does not exists\n", filename, entity);
175                 return "";
176         }
177
178 }
179
180 static const gchar* location(void) {
181         static gchar* loc = NULL;
182
183         if (loc) g_free(loc);
184
185         loc = g_strdup_printf("<? wireshark:location %s:%u ?>", filename, linenum);
186
187         return loc;
188 }
189
190 static gboolean free_gstring_hash_items(gpointer k,gpointer v,gpointer p _U_) {
191         g_free(k);
192         g_string_free(v,TRUE);
193         return TRUE;
194 }
195
196 extern GString* dtd_preparse(const gchar* dname,const  gchar* fname, GString* err) {
197         gchar* fullname = g_strdup_printf("%s%c%s",dname,G_DIR_SEPARATOR,fname);
198
199         dtd_dirname = dname;
200         filename = fname;
201         linenum = 1;
202
203         yyin = ws_fopen(fullname,"r");
204
205         if (!yyin) {
206                 if (err)
207                         g_string_append_printf(err, "Could not open file: '%s', error: %s",fullname,g_strerror(errno));
208
209                 return NULL;
210         }
211
212         error = err;
213
214         entities = g_hash_table_new(g_str_hash,g_str_equal);
215         current = output = g_string_new(location());
216
217         BEGIN OUTSIDE;
218
219         yylex();
220
221         fclose(yyin);
222
223         yyrestart(NULL);
224
225         g_hash_table_foreach_remove(entities,free_gstring_hash_items,NULL);
226         g_hash_table_destroy(entities);
227
228     g_free(fullname);
229
230         return output;
231 }
232
233 /*
234  * We want to stop processing when we get to the end of the input.
235  * (%option noyywrap is not used because if used then
236  * some flex versions (eg: 2.5.35) generate code which causes
237  * warnings by the Windows VC compiler).
238  */
239
240 int yywrap(void) {
241     return 1;
242 }