2 /* Include this before everything else, for various large-file definitions */
7 * We want a reentrant scanner.
12 * We don't use input, so don't generate code for it.
17 * We don't use unput, so don't generate code for it.
22 * We don't read interactively from the terminal.
24 %option never-interactive
27 * Prefix scanner routines with "df_" rather than "yy", so this scanner
28 * can coexist with other scanners.
33 * We're reading from a string, so we don't need yywrap.
38 * The type for the state we keep for a scanner.
40 %option extra-type="df_scanner_state_t *"
43 * We have to override the memory allocators so that we don't get
44 * "unused argument" warnings from the yyscanner argument (which
45 * we don't use, as we have a global memory allocator).
47 * We provide, as macros, our own versions of the routines generated by Flex,
48 * which just call malloc()/realloc()/free() (as the Flex versions do),
49 * discarding the extra argument.
57 * Wireshark - Network traffic analyzer
58 * By Gerald Combs <gerald@wireshark.org>
59 * Copyright 2001 Gerald Combs
61 * SPDX-License-Identifier: GPL-2.0-or-later
67 #include "dfilter-int.h"
68 #include "syntax-tree.h"
70 #include "dfunctions.h"
73 * Disable diagnostics in the code generated by Flex.
78 #define LVAL_TYPE stnode_t*
79 #define LVAL_INIT_VAL NULL
81 #define FLEX_YY_PREFIX df_
83 #include <lemonflex-head.inc>
85 /*#undef YY_NO_UNPUT*/
87 static int set_lval(int token, gpointer data);
88 static int set_lval_int(dfwork_t *dfw, int token, char *s);
89 static int simple(int token);
90 static gboolean str_to_gint32(dfwork_t *dfw, char *s, gint32* pint);
91 static void mark_lval_deprecated(const char *s);
94 * Sleazy hack to suppress compiler warnings in yy_fatal_error().
96 #define YY_EXIT_FAILURE ((void)yyscanner, 2)
99 * Macros for the allocators, to discard the extra argument.
101 #define df_alloc(size, yyscanner) (void *)malloc(size)
102 #define df_realloc(ptr, size, yyscanner) (void *)realloc((char *)(ptr), (size))
103 #define df_free(ptr, yyscanner) free((char *)ptr)
115 /* Ignore whitespace, unless set elements are being parsed. Perhaps it
116 * should have used commas from the beginning, but now we are stuck with
117 * whitespace as separators. */
118 if (yyextra->in_set) {
119 return simple(TOKEN_WHITESPACE);
125 "(" return simple(TOKEN_LPAREN);
126 ")" return simple(TOKEN_RPAREN);
127 "," return simple(TOKEN_COMMA);
130 yyextra->in_set = TRUE;
131 return simple(TOKEN_LBRACE);
133 [[:blank:]\n]*".."[[:blank:]\n]* return simple(TOKEN_DOTDOT);
135 yyextra->in_set = FALSE;
136 return simple(TOKEN_RBRACE);
139 "==" return simple(TOKEN_TEST_EQ);
140 "eq" return simple(TOKEN_TEST_EQ);
142 mark_lval_deprecated("!=");
143 return simple(TOKEN_TEST_NE);
146 mark_lval_deprecated("ne");
147 return simple(TOKEN_TEST_NE);
149 ">" return simple(TOKEN_TEST_GT);
150 "gt" return simple(TOKEN_TEST_GT);
151 ">=" return simple(TOKEN_TEST_GE);
152 "ge" return simple(TOKEN_TEST_GE);
153 "<" return simple(TOKEN_TEST_LT);
154 "lt" return simple(TOKEN_TEST_LT);
155 "<=" return simple(TOKEN_TEST_LE);
156 "le" return simple(TOKEN_TEST_LE);
157 "bitwise_and" return simple(TOKEN_TEST_BITWISE_AND);
158 "&" return simple(TOKEN_TEST_BITWISE_AND);
159 "contains" return simple(TOKEN_TEST_CONTAINS);
160 "~" return simple(TOKEN_TEST_MATCHES);
161 "matches" return simple(TOKEN_TEST_MATCHES);
162 "!" return simple(TOKEN_TEST_NOT);
163 "not" return simple(TOKEN_TEST_NOT);
164 "&&" return simple(TOKEN_TEST_AND);
165 "and" return simple(TOKEN_TEST_AND);
166 "||" return simple(TOKEN_TEST_OR);
167 "or" return simple(TOKEN_TEST_OR);
168 "in" return simple(TOKEN_TEST_IN);
173 return simple(TOKEN_LBRACKET);
176 <RANGE_INT>[+-]?[[:digit:]]+ {
178 return set_lval_int(yyextra->dfw, TOKEN_INTEGER, yytext);
181 <RANGE_INT>[+-]?0x[[:xdigit:]]+ {
183 return set_lval_int(yyextra->dfw, TOKEN_INTEGER, yytext);
186 <RANGE_INT,RANGE_PUNCT>":" {
188 return simple(TOKEN_COLON);
193 return simple(TOKEN_HYPHEN);
196 <RANGE_INT,RANGE_PUNCT>"," {
198 return simple(TOKEN_COMMA);
201 <RANGE_INT,RANGE_PUNCT>"]" {
203 return simple(TOKEN_RBRACKET);
206 /* Error if none of the above while scanning a range (slice) */
208 <RANGE_PUNCT>[^:\-,\]]+ {
209 dfilter_fail(yyextra->dfw, "Invalid string \"%s\" found while scanning slice.", yytext);
213 /* XXX It would be nice to be able to match an entire non-integer string,
214 * but beware of Flex's "match the most text" rule.
218 dfilter_fail(yyextra->dfw, "Invalid character \"%s\" found while scanning slice; expected integer.", yytext);
223 /* start quote of a quoted string */
224 /* The example of how to scan for strings was taken from
225 the flex 2.5.4 manual, from the section "Start Conditions".
227 http://www.gnu.org/software/flex/manual/html_node/flex_11.html */
230 /* A previous filter that failed to compile due to
231 a missing end quote will have left quoted_string set
232 to something. Clear it now that we are starting
233 a new quoted string. */
234 if (yyextra->quoted_string) {
235 g_string_free(yyextra->quoted_string, TRUE);
236 /* Don't set quoted_string to NULL, as we
237 do in other quoted_string-cleanup code, as we're
238 about to set it in the next line. */
240 yyextra->quoted_string = g_string_new("");
244 /* unterminated string */
245 /* The example of how to handle unclosed strings was taken from
246 the flex 2.5.4 manual, from the section "End-of-file rules".
248 http://www.gnu.org/software/flex/manual/html_node/flex_13.html */
250 dfilter_fail(yyextra->dfw, "The final quote was missing from a quoted string.");
258 token = set_lval(TOKEN_STRING, yyextra->quoted_string->str);
259 g_string_free(yyextra->quoted_string, TRUE);
260 yyextra->quoted_string = NULL;
264 <DQUOTE>\\[0-7]{1,3} {
266 unsigned long result;
267 result = strtoul(yytext + 1, NULL, 8);
269 g_string_free(yyextra->quoted_string, TRUE);
270 yyextra->quoted_string = NULL;
271 dfilter_fail(yyextra->dfw, "%s is larger than 255.", yytext);
274 g_string_append_c(yyextra->quoted_string, (gchar) result);
277 <DQUOTE>\\x[[:xdigit:]]{1,2} {
279 unsigned long result;
280 result = strtoul(yytext + 2, NULL, 16);
281 g_string_append_c(yyextra->quoted_string, (gchar) result);
286 /* escaped character */
287 g_string_append_c(yyextra->quoted_string, yytext[1]);
291 /* non-escaped string */
292 g_string_append(yyextra->quoted_string, yytext);
297 /* start quote of a quoted character value */
298 /* The example of how to scan for strings was taken from
299 the Flex manual, from the section "Start Conditions".
301 http://flex.sourceforge.net/manual/Start-Conditions.html#Start-Conditions */
304 /* A previous filter that failed to compile due to
305 a missing end quote will have left quoted_string set
306 to something. Clear it now that we are starting
307 a new quoted string. */
308 if (yyextra->quoted_string) {
309 g_string_free(yyextra->quoted_string, TRUE);
310 /* Don't set quoted_string to NULL, as we
311 do in other quoted_string-cleanup code, as we're
312 about to set it in the next line. */
314 yyextra->quoted_string = g_string_new("'");
318 /* unterminated character value */
319 /* The example of how to handle unclosed strings was taken from
320 the Flex manual, from the section "End-of-file rules".
322 http://flex.sourceforge.net/manual/EOF.html#EOF.html */
324 dfilter_fail(yyextra->dfw, "The final quote was missing from a character constant.");
332 g_string_append_c(yyextra->quoted_string, '\'');
333 token = set_lval(TOKEN_CHARCONST, yyextra->quoted_string->str);
334 g_string_free(yyextra->quoted_string, TRUE);
335 yyextra->quoted_string = NULL;
340 /* escaped character */
341 g_string_append(yyextra->quoted_string, yytext);
345 /* non-escaped string */
346 g_string_append(yyextra->quoted_string, yytext);
351 [-[:alnum:]_\.:]*\/[[:digit:]]+ {
353 return set_lval(TOKEN_UNPARSED, yytext);
356 ([.][-+[:alnum:]_:]+)+[.]{0,2} |
357 [-+[:alnum:]_:]+([.][-+[:alnum:]_:]+)*[.]{0,2} {
358 /* Is it a field name or some other value (float, integer, bytes, ...)? */
359 header_field_info *hfinfo;
360 df_func_def_t *df_func_def;
362 /* Trailing dot is allowed for floats, but make sure that trailing ".."
363 * is interpreted as a token on its own. */
364 if (strstr(yytext, "..")) {
368 hfinfo = proto_registrar_get_byname(yytext);
370 /* Yes, it's a field name */
371 return set_lval(TOKEN_FIELD, hfinfo);
374 /* Is it a function name? */
375 df_func_def = df_func_lookup(yytext);
377 /* yes, it's a dfilter function */
378 return set_lval(TOKEN_FUNCTION, df_func_def);
381 /* No, so treat it as an unparsed string */
382 return set_lval(TOKEN_UNPARSED, yytext);
389 return set_lval(TOKEN_UNPARSED, yytext);
396 * Turn diagnostics back on, so we check the code that we've written.
414 case TOKEN_WHITESPACE:
421 case TOKEN_TEST_BITWISE_AND:
422 case TOKEN_TEST_CONTAINS:
423 case TOKEN_TEST_MATCHES:
430 g_assert_not_reached();
436 set_lval(int token, gpointer data)
438 sttype_id_t type_id = STTYPE_UNINITIALIZED;
442 type_id = STTYPE_STRING;
444 case TOKEN_CHARCONST:
445 type_id = STTYPE_CHARCONST;
448 type_id = STTYPE_FIELD;
451 type_id = STTYPE_UNPARSED;
454 type_id = STTYPE_FUNCTION;
457 g_assert_not_reached();
459 stnode_init(df_lval, type_id, data);
464 set_lval_int(dfwork_t *dfw, int token, char *s)
466 sttype_id_t type_id = STTYPE_UNINITIALIZED;
469 if (!str_to_gint32(dfw, s, &val)) {
475 type_id = STTYPE_INTEGER;
478 g_assert_not_reached();
481 stnode_init_int(df_lval, type_id, val);
487 str_to_gint32(dfwork_t *dfw, char *s, gint32* pint)
493 integer = strtol(s, &endptr, 0);
495 if (errno == EINVAL || endptr == s || *endptr != '\0') {
496 /* This isn't a valid number. */
497 dfilter_fail(dfw, "\"%s\" is not a valid number.", s);
500 if (errno == ERANGE) {
501 if (integer == LONG_MAX) {
502 dfilter_fail(dfw, "\"%s\" causes an integer overflow.", s);
504 else if (integer == LONG_MIN) {
505 dfilter_fail(dfw, "\"%s\" causes an integer underflow.", s);
509 * XXX - can "strtol()" set errno to ERANGE without
510 * returning LONG_MAX or LONG_MIN?
512 dfilter_fail(dfw, "\"%s\" is not an integer.", s);
516 if (integer > G_MAXINT32) {
518 * Fits in a long, but not in a gint32 (a long might be
521 dfilter_fail(dfw, "\"%s\" causes an integer overflow.", s);
524 if (integer < G_MININT32) {
526 * Fits in a long, but not in a gint32 (a long might be
529 dfilter_fail(dfw, "\"%s\" causes an integer underflow.", s);
533 *pint = (gint32)integer;
538 mark_lval_deprecated(const char *s)
540 df_lval->deprecated_token = s;