3 * an API for text tvb parsers
5 * Copyright 2005, Luis E. Garcia Ontanon <luis@ontanon.org>
7 * Wireshark - Network traffic analyzer
8 * By Gerald Combs <gerald@wireshark.org>
9 * Copyright 1998 Gerald Combs
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 2
14 * of the License, or (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 The intention behind this is to ease the writing of dissectors that have to
28 parse text without the need of writing into buffers.
30 It was originally written to avoid using lex and yacc for the xml dissector.
32 the parser is able to look for wanted elements these can be:
35 - a char out of a string of needles
36 - a char not belonging to a string of needles
37 - a sequence of chars that belong to a set of chars
38 - a sequence of chars that do not belong to a set of chars
41 - all the characters up to a certain wanted element (included or excluded)
44 - one of a given group of wanted elements
45 - a sequence of wanted elements
46 - some (at least one) instances of a wanted element
48 Once a wanted element is successfully extracted, by either tvbparse_get or
49 tvbparse_find, the parser will invoke a given callback
50 before and another one after every of its component's subelement's callbacks
53 If tvbparse_get or tvbparse_find fail to extract the wanted element the
54 subelements callbacks are not going to be invoked.
56 The wanted elements are instantiated once by the proto_register_xxx function.
58 The parser is instantiated for every packet and it mantains its state.
60 The element's data is destroyed before the next packet is dissected.
66 #include <epan/tvbuff.h>
68 #include "ws_symbol_export.h"
70 typedef struct _tvbparse_elem_t tvbparse_elem_t;
71 typedef struct _tvbparse_wanted_t tvbparse_wanted_t;
72 typedef struct _tvbparse_t tvbparse_t;
76 * a callback function to be called before or after an element has been
77 * successfuly extracted.
79 * Note that if the token belongs to a composed token the callbacks of the
80 * components won't be called unless the composed token is successfully
83 * tvbparse_data: the private data of the parser
84 * wanted_data: the private data of the wanted element
85 * elem: the extracted element
87 typedef void (*tvbparse_action_t)(void* tvbparse_data, const void* wanted_data, struct _tvbparse_elem_t* elem);
89 typedef int (*tvbparse_condition_t)
90 (tvbparse_t*, const int,
91 const tvbparse_wanted_t*,
96 TP_UNTIL_INCLUDE, /* last elem is included, its span is spent by the parser */
97 TP_UNTIL_SPEND, /* last elem is not included, but its span is spent by the parser */
98 TP_UNTIL_LEAVE /* last elem is not included, neither its span is spent by the parser */
102 struct _tvbparse_wanted_t {
104 tvbparse_condition_t condition;
108 struct _tvbparse_wanted_t** handle;
115 gboolean (*comp)(void*,const void*);
116 void* (*extract)(tvbuff_t*,guint);
121 const tvbparse_wanted_t* subelem;
125 struct _tvbparse_wanted_t* key;
126 struct _tvbparse_wanted_t* other;
129 const tvbparse_wanted_t* subelem;
140 tvbparse_action_t before;
141 tvbparse_action_t after;
144 /* an instance of a per packet parser */
150 const tvbparse_wanted_t* ignore;
154 /* a matching token returned by either tvbparser_get or tvb_parser_find */
155 struct _tvbparse_elem_t {
164 struct _tvbparse_elem_t* sub;
166 struct _tvbparse_elem_t* next;
167 struct _tvbparse_elem_t* last;
169 const tvbparse_wanted_t* wanted;
174 * definition of wanted token types
176 * the following functions define the tokens we will be able to look for in a tvb
177 * common parameters are:
179 * id: an arbitrary id that will be copied to the eventual token (don't use 0)
180 * private_data: persistent data to be passed to the callback action (wanted_data)
181 * before_cb: an callback function to be called before those of the subelements
182 * after_cb: an callback function to be called after those of the subelements
189 * When looked for it returns a simple element one character long if the char
190 * at the current offset matches one of the the needles.
193 tvbparse_wanted_t* tvbparse_char(const int id,
194 const gchar* needles,
195 const void* private_data,
196 tvbparse_action_t before_cb,
197 tvbparse_action_t after_cb);
200 * a not_char element.
202 * When looked for it returns a simple element one character long if the char
203 * at the current offset does not match one of the the needles.
206 tvbparse_wanted_t* tvbparse_not_char(const int id,
208 const void* private_data,
209 tvbparse_action_t before_cb,
210 tvbparse_action_t after_cb);
215 * When looked for it returns a simple element one or more characters long if
216 * one or more char(s) starting from the current offset match one of the needles.
217 * An element will be returned if at least min_len chars are given (1 if it's 0)
218 * It will get at most max_len chars or as much as it can if max_len is 0.
221 tvbparse_wanted_t* tvbparse_chars(const int id,
224 const gchar* needles,
225 const void* private_data,
226 tvbparse_action_t before_cb,
227 tvbparse_action_t after_cb);
230 * a not_chars element
232 * When looked for it returns a simple element one or more characters long if
233 * one or more char(s) starting from the current offset do not match one of the
235 * An element will be returned if at least min_len chars are given (1 if it's 0)
236 * It will get at most max_len chars or as much as it can if max_len is 0.
239 tvbparse_wanted_t* tvbparse_not_chars(const int id,
242 const gchar* needles,
243 const void* private_data,
244 tvbparse_action_t before_cb,
245 tvbparse_action_t after_cb);
250 * When looked for it returns a simple element if we have the given string at
254 tvbparse_wanted_t* tvbparse_string(const int id,
256 const void* private_data,
257 tvbparse_action_t before_cb,
258 tvbparse_action_t after_cb);
263 * When looked for it returns a simple element if we have a matching string at
267 tvbparse_wanted_t* tvbparse_casestring(const int id,
270 tvbparse_action_t before_cb,
271 tvbparse_action_t after_cb);
276 * When looked for it returns a simple element containing all the characters
277 * found until the first match of the ending element if the ending element is
280 * When looking for until elements it calls tvbparse_find so it can be very slow.
282 * It won't have a subelement, the ending's callbacks won't get called.
286 * op_mode values determine how the terminating element and the current offset
287 * of the parser are handled
290 tvbparse_wanted_t* tvbparse_until(const int id,
291 const void* private_data,
292 tvbparse_action_t before_cb,
293 tvbparse_action_t after_cb,
294 const tvbparse_wanted_t* ending,
295 until_mode_t until_mode);
300 * When looked for it will try to match to the given candidates and return a
301 * composed element whose subelement is the first match.
303 * The list of candidates is terminated with a NULL
307 tvbparse_wanted_t* tvbparse_set_oneof(const int id,
308 const void* private_data,
309 tvbparse_action_t before_cb,
310 tvbparse_action_t after_cb,
317 tvbparse_wanted_t* tvbparse_hashed(const int id,
319 tvbparse_action_t before_cb,
320 tvbparse_action_t after_cb,
321 tvbparse_wanted_t* key,
322 tvbparse_wanted_t* other,
326 void tvbparse_hashed_add(tvbparse_wanted_t* w, ...);
331 * When looked for it will try to match in order all the given candidates. If
332 * every candidate is found in the given order it will return a composed
333 * element whose subelements are the matcheed elemets.
335 * The list of candidates is terminated with a NULL.
339 tvbparse_wanted_t* tvbparse_set_seq(const int id,
340 const void* private_data,
341 tvbparse_action_t before_cb,
342 tvbparse_action_t after_cb,
348 * When looked for it will try to match the given candidate at least min times
349 * and at most max times. If the given candidate is matched at least min times
350 * a composed element is returned.
354 tvbparse_wanted_t* tvbparse_some(const int id,
357 const void* private_data,
358 tvbparse_action_t before_cb,
359 tvbparse_action_t after_cb,
360 const tvbparse_wanted_t* wanted);
362 #define tvbparse_one_or_more(id, private_data, before_cb, after_cb, wanted)\
363 tvbparse_some(id, 1, G_MAXINT, private_data, before_cb, after_cb, wanted)
369 * this is a pointer to a pointer to a wanted element (that might have not
370 * been initialized yet) so that recursive structures
373 tvbparse_wanted_t* tvbparse_handle(tvbparse_wanted_t** handle);
387 tvbparse_wanted_t* tvbparse_ft(int id,
389 tvbparse_action_t before_cb,
390 tvbparse_action_t after_cb,
394 tvbparse_wanted_t* tvbparse_end_of_buffer(int id,
396 tvbparse_action_t before_cb,
397 tvbparse_action_t after_cb);
399 tvbparse_wanted_t* tvbparse_ft_numcmp(int id,
401 tvbparse_action_t before_cb,
402 tvbparse_action_t after_cb,
405 enum ft_cmp_op ft_cmp_op,
411 * this is a composed candidate, that will try to match a quoted string
412 * (included the quotes) including into it every escaped quote.
414 * C strings are matched with tvbparse_quoted(-1,NULL,NULL,NULL,"\"","\\")
417 tvbparse_wanted_t* tvbparse_quoted(const int id,
419 tvbparse_action_t before_cb,
420 tvbparse_action_t after_cb,
425 * a helper callback for quoted strings that will shrink the token to contain
426 * only the string andnot the quotes
429 void tvbparse_shrink_token_cb(void* tvbparse_data,
430 const void* wanted_data,
431 tvbparse_elem_t* tok);
436 /* initialize the parser (at every packet)
437 * tvb: what are we parsing?
439 * len: for how many bytes
440 * private_data: will be passed to the action callbacks
441 * ignore: a wanted token type to be ignored (the associated cb WILL be called when it matches)
444 tvbparse_t* tvbparse_init(tvbuff_t* tvb,
448 const tvbparse_wanted_t* ignore);
450 /* reset the parser */
452 gboolean tvbparse_reset(tvbparse_t* tt, const int offset, int len);
455 guint tvbparse_curr_offset(tvbparse_t* tt);
456 guint tvbparse_len_left(tvbparse_t* tt);
461 * This will look for the wanted token at the current offset or after any given
462 * number of ignored tokens returning FALSE if there's no match or TRUE if there
464 * The parser will be left in its original state and no callbacks will be called.
467 gboolean tvbparse_peek(tvbparse_t* tt,
468 const tvbparse_wanted_t* wanted);
471 * This will look for the wanted token at the current offset or after any given
472 * number of ignored tokens returning NULL if there's no match.
473 * if there is a match it will set the offset of the current parser after
474 * the end of the token
477 tvbparse_elem_t* tvbparse_get(tvbparse_t* tt,
478 const tvbparse_wanted_t* wanted);
481 * Like tvbparse_get but this will look for a wanted token even beyond the
483 * This function is slow.
486 tvbparse_elem_t* tvbparse_find(tvbparse_t* tt,
487 const tvbparse_wanted_t* wanted);
491 void tvbparse_tree_add_elem(proto_tree* tree, tvbparse_elem_t* curr);