Because there's more than just text in XML...
[metze/wireshark/wip.git] / epan / dtd_preparse.l
1 %option noyywrap
2 %option nounput
3 %option prefix="Dtd_PreParse_"
4 %option never-interactive
5 %option caseless
6 %option outfile="dtd_preparse.c"
7
8 %{
9         /*
10          * dtd_preparser.l
11          *
12          * an XML dissector for ethereal 
13          *
14          * DTD Preparser -  import a dtd file into a GString
15          *                                      including files, removing comments
16          *                  and resolving %entities;
17          * 
18          * Copyright 2004, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
19          *
20          * $Id$
21          *
22          * Ethereal - Network traffic analyzer
23          * By Gerald Combs <gerald@ethereal.com>
24          * Copyright 1998 Gerald Combs
25          *
26          * This program is free software; you can redistribute it and/or
27          * modify it under the terms of the GNU General Public License
28          * as published by the Free Software Foundation; either version 2
29          * of the License, or (at your option) any later version.
30          * 
31          * This program is distributed in the hope that it will be useful,
32          * but WITHOUT ANY WARRANTY; without even the implied warranty of
33          * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34          * GNU General Public License for more details.
35          * 
36          * You should have received a copy of the GNU General Public License
37          * along with this program; if not, write to the Free Software
38          * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
39          */
40                         
41 #include <glib.h>
42 #include <string.h>
43 #include <errno.h>
44 #include <stdio.h>
45 #include "dtd.h"
46
47 #define MAX_INCLUDE_DEPTH 10
48 YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
49 int include_stack_ptr = 0;
50
51 #define ECHO g_string_append(current,yytext);
52
53 GString* current;
54 GString* output;
55 GHashTable* entities;
56 gchar* entity_name;
57 GString* error;
58
59 const gchar* dirname;
60 const gchar* filename;
61 guint linenum;
62
63 GString* textstr;
64
65 static gchar* replace_entity(gchar* s);
66 static const gchar* location(void);
67
68 %}
69 xmlpi_start "<?"
70 xmlpi_stop  "?>"
71 xmlpi_chars .
72
73 comment_start "<!--"
74 comment_stop "-->"
75 special_start "<!"
76 special_stop ">"
77
78 entity_start     "<!"[[:blank:]\n]*entity[[:blank:]\n]*"%"
79 system     SYSTEM
80 filename   [^"]+
81
82
83 name [A-Za-z][-:A-Za-z0-9_]*
84
85 quote "\""
86 percent [%]
87 escaped_quote "\\\""
88 non_quote [^"%]+
89
90 avoid_editor_bug ["]
91
92 entity        [%&][A-Za-z][-A-Za-z0-9_]*;
93
94 whitespace [[blank:]]+
95 newline    \n
96 %START OUTSIDE IN_COMMENT IN_ENTITY NAMED_ENTITY IN_QUOTE ENTITY_DONE XMLPI
97 %%
98
99
100 {entity}                                                if (current) g_string_sprintfa(current,"%s\n%s\n",replace_entity(yytext),location());
101
102 {whitespace}                                    if (current) g_string_append(current," ");
103
104 <OUTSIDE>{xmlpi_start}                  { g_string_append(current,yytext); BEGIN XMLPI; }
105 <XMLPI>{xmlpi_chars}                    { g_string_append(current,yytext); }
106 <XMLPI>{newline}                                { g_string_append(current,yytext); }
107 <XMLPI>{xmlpi_stop}                             { g_string_append(current,yytext); BEGIN OUTSIDE; }
108
109 <OUTSIDE>{comment_start}                { current = NULL; BEGIN IN_COMMENT; }
110 <IN_COMMENT>[^-]?                               |
111 <IN_COMMENT>[-]                                 ;
112 <IN_COMMENT>{comment_stop}              { current = output; BEGIN OUTSIDE; }
113         
114 {newline}                                               {
115         linenum++;
116         if (current) g_string_sprintfa(current,"%s\n",location());
117 }
118
119
120 <OUTSIDE>{entity_start}                 { BEGIN IN_ENTITY; }
121 <IN_ENTITY>{name}                               { entity_name = g_strdup_printf("%%%s;",yytext); BEGIN NAMED_ENTITY; }
122 <NAMED_ENTITY>{quote}                   { current = g_string_new(location()); BEGIN IN_QUOTE; }
123 <IN_QUOTE>{quote}                               { g_hash_table_insert(entities,entity_name,current);  BEGIN ENTITY_DONE; }
124 <IN_QUOTE>{percent}                             |
125 <IN_QUOTE>{non_quote}                   |
126 <IN_QUOTE>{escaped_quote}               g_string_append(current,yytext);
127 <NAMED_ENTITY>{system}                  {
128     g_string_sprintfa(error,"at %s:%u: file inclusion is not supported!", filename, linenum);
129     yyterminate();
130 }
131 <ENTITY_DONE>{special_stop}             { current = output; g_string_append(current,"\n"); BEGIN OUTSIDE; }
132
133 %%
134
135 static gchar* replace_entity(gchar* entity) {
136         GString* replacement;
137         
138         *entity = '%';
139         
140         replacement = g_hash_table_lookup(entities,entity);
141         
142         if (replacement) {
143                 return replacement->str;
144         } else {
145                 g_string_sprintfa(error,"dtd_preparse: in file '%s': entity %s does not exists\n", filename, entity);
146                 return "";
147         }
148         
149 }
150
151 static const gchar* location(void) {
152         static GString* loc = NULL;
153         guint i = include_stack_ptr + 1;
154         
155         if (loc) {
156                 g_string_truncate(loc,0);
157         } else {
158                 loc = g_string_new("");
159         }
160
161         g_string_sprintfa(loc,"<? ethereal:location ");
162
163         while (i--) {
164                         g_string_sprintfa(loc, "%s:%u from",
165                                                           filename,
166                                                           linenum);                     
167         }
168
169         g_string_truncate(loc,(loc->len) - 4);
170         
171         g_string_sprintfa(loc,"?>");
172         
173         return loc->str;
174 }
175
176 static gboolean free_gstring_hash_items(gpointer k,gpointer v,gpointer p _U_) {
177         g_free(k);
178         g_string_free(v,TRUE);
179         return TRUE;
180 }
181
182 extern GString* dtd_preparse(const gchar* dname,const  gchar* fname, GString* err) {
183         gchar* fullname = g_strdup_printf("%s%c%s",dname,G_DIR_SEPARATOR,fname);
184
185         dirname = dname;
186         filename = fname;
187
188         yyin = fopen(fullname,"r");
189         
190         g_free(fullname);
191         
192         if (!yyin) {
193                 if (err)
194                         g_string_sprintfa(err, "Could not open file: '%s', error: %s",filename,strerror(errno));
195                         
196                 return NULL;
197         }
198         
199         filename = filename;
200         linenum = 1;
201         
202         error = err;
203         
204         entities = g_hash_table_new(g_str_hash,g_str_equal);
205         current = output = g_string_new(location());
206         
207         BEGIN OUTSIDE;
208
209         yylex();
210                 
211         yyrestart(NULL);
212
213         g_hash_table_foreach_remove(entities,free_gstring_hash_items,NULL);
214         g_hash_table_destroy(entities);
215
216         return output;
217 }