an API for "bufferless" parsing of text tvbs
authorlego <lego@f5534014-38df-0310-8fa8-9805f1628bb7>
Thu, 8 Sep 2005 19:50:13 +0000 (19:50 +0000)
committerlego <lego@f5534014-38df-0310-8fa8-9805f1628bb7>
Thu, 8 Sep 2005 19:50:13 +0000 (19:50 +0000)
git-svn-id: http://anonsvn.wireshark.org/wireshark/trunk@15726 f5534014-38df-0310-8fa8-9805f1628bb7

epan/tvbparse.c [new file with mode: 0644]
epan/tvbparse.h [new file with mode: 0644]

diff --git a/epan/tvbparse.c b/epan/tvbparse.c
new file mode 100644 (file)
index 0000000..3bbcc1a
--- /dev/null
@@ -0,0 +1,756 @@
+/* tvbparse.c
+*
+* Copyright 2005, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
+*
+* $Id:  $
+*
+* Ethereal - Network traffic analyzer
+* By Gerald Combs <gerald@ethereal.com>
+* Copyright 1998 Gerald Combs
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2
+* of the License, or (at your option) any later version.
+* 
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+* 
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <glib.h>
+
+#include <epan/emem.h>
+#include <epan/proto.h>
+#include <epan/tvbparse.h>
+
+typedef enum _tvbparse_wanted_type_t {
+       TVBPARSE_WANTED_NONE, /* currently unused */
+       
+       /* simple tokens */
+       TVBPARSE_WANTED_SIMPLE_CHAR, /* just one matching char */
+       TVBPARSE_WANTED_SIMPLE_CHARS, /* a sequence of matching chars */
+       TVBPARSE_WANTED_SIMPLE_NOT_CHAR, /* one non matching char */ 
+       TVBPARSE_WANTED_SIMPLE_NOT_CHARS, /* a sequence of non matching chars */
+       TVBPARSE_WANTED_SIMPLE_STRING, /* a string */
+       TVBPARSE_WANTED_SIMPLE_CASESTRING, /* a caseless string */
+       TVBPARSE_WANTED_UNTIL, /* all the characters until the first matching token */
+       
+       /* composed tokens */
+       TVBPARSE_WANTED_SET_ONEOF, /* one of the given types */
+       TVBPARSE_WANTED_SET_SEQ, /* an exact sequence of tokens of the given types */
+       TVBPARSE_WANTED_CARDINALITY, /* one or more tokens of the given type */ 
+} tvbparse_type_t;
+
+struct _tvbparse_t {
+       tvbuff_t* tvb;
+       int offset;
+       int max_len;
+       void* data;
+       const tvbparse_wanted_t* ignore;
+       guint depth;
+};
+
+struct _tvbparse_wanted_t {
+       int id;
+       tvbparse_type_t type;
+       
+       const gchar* ctl;
+       int len;
+       
+       guint min;
+       guint max;
+       
+       const void* data;
+       tvbparse_action_t before;
+       tvbparse_action_t after;
+       
+       GPtrArray* elems;
+};
+
+
+tvbparse_wanted_t* tvbparse_char(int id,
+                                                 const gchar* chr,
+                                                 const void* data,
+                                                 tvbparse_action_t before_cb,
+                                                 tvbparse_action_t after_cb) {
+       tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
+       
+       w->id = id;
+       w->type = TVBPARSE_WANTED_SIMPLE_CHAR;
+       w->ctl = chr;
+       w->len = 1;
+       w->min = 0;
+       w->max = 0;
+       w->data = data;
+       w->before = before_cb;
+       w->after = after_cb;
+       w->elems = g_ptr_array_new();
+       
+       return w;
+}
+
+tvbparse_wanted_t* tvbparse_chars(int id,
+                                                                 guint min_len,
+                                                                 guint max_len,
+                                                                 const gchar* chr,
+                                                                 const void* data,
+                                                                 tvbparse_action_t before_cb,
+                                                                 tvbparse_action_t after_cb) {
+       tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
+       
+       w->id = id;
+       w->type = TVBPARSE_WANTED_SIMPLE_CHARS;
+       w->ctl = chr;
+       w->len = 0;
+       w->min = min_len ? min_len : 1;
+       w->max = max_len ? max_len : G_MAXINT;
+       w->data = data;
+       w->before = before_cb;
+       w->after = after_cb;
+       w->elems = g_ptr_array_new();
+       
+       return w;
+}
+
+tvbparse_wanted_t* tvbparse_not_char(int id,
+                                                         const gchar* chr,
+                                                         const void* data,
+                                                         tvbparse_action_t before_cb,
+                                                         tvbparse_action_t after_cb) {
+       tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
+       
+       w->id = id;
+       w->type = TVBPARSE_WANTED_SIMPLE_NOT_CHAR;
+       w->ctl = chr;
+       w->len = 0;
+       w->min = 0;
+       w->max = 0;
+       w->data = data;
+       w->before = before_cb;
+       w->after = after_cb;
+       w->elems = g_ptr_array_new();
+       
+       return w;
+}
+
+tvbparse_wanted_t* tvbparse_not_chars(int id,
+                                                                         guint min_len,
+                                                                         guint max_len,
+                                                                         const gchar* chr,
+                                                                         const void* data,
+                                                                         tvbparse_action_t before_cb,
+                                                                         tvbparse_action_t after_cb){
+       tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
+       
+       w->id = id;
+       w->type = TVBPARSE_WANTED_SIMPLE_NOT_CHARS;
+       w->ctl = chr;
+       w->len = 0;
+       w->min = min_len ? min_len : 1;
+       w->max = max_len ? max_len : G_MAXINT;
+       w->data = data;
+       w->before = before_cb;
+       w->after = after_cb;
+       w->elems = g_ptr_array_new();
+       
+       return w;
+}
+
+
+tvbparse_wanted_t* tvbparse_string(int id,
+                                                                  const gchar* str,
+                                                                  const void* data,
+                                                                  tvbparse_action_t before_cb,
+                                                                  tvbparse_action_t after_cb) {
+       tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
+       
+       w->id = id;
+       w->type = TVBPARSE_WANTED_SIMPLE_STRING;
+       w->ctl = str;
+       w->len = strlen(str);
+       w->min = 0;
+       w->max = 0;
+       w->data = data;
+       w->before = before_cb;
+       w->after = after_cb;
+       w->elems = g_ptr_array_new();
+       
+       return w;
+}
+
+tvbparse_wanted_t* tvbparse_casestring(int id,
+                                                                  const gchar* str,
+                                                                  const void* data,
+                                                                  tvbparse_action_t before_cb,
+                                                                  tvbparse_action_t after_cb) {
+       tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
+       
+       w->id = id;
+       w->type = TVBPARSE_WANTED_SIMPLE_CASESTRING;
+       w->ctl = str;
+       w->len = strlen(str);
+       w->min = 0;
+       w->max = 0;
+       w->data = data;
+       w->before = before_cb;
+       w->after = after_cb;
+       w->elems = g_ptr_array_new();
+       
+       return w;
+}
+
+
+tvbparse_wanted_t* tvbparse_set_oneof(int id,
+                                                          const void* data, 
+                                                          tvbparse_action_t before_cb,
+                                                          tvbparse_action_t after_cb,
+                                                          ...) {
+       tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
+       tvbparse_t* el;
+       va_list ap;
+       
+       w->id = id;
+       w->type = TVBPARSE_WANTED_SET_ONEOF;
+       w->ctl = NULL;
+       w->len = 0;
+       w->min = 0;
+       w->max = 0;
+       w->data = data;
+       w->before = before_cb;
+       w->after = after_cb;
+       w->elems = g_ptr_array_new();
+       
+       va_start(ap,after_cb);
+       
+       while(( el = va_arg(ap,tvbparse_t*) )) {
+               g_ptr_array_add(w->elems,el);
+       };
+       
+       va_end(ap);
+       
+       return w;
+}
+
+tvbparse_wanted_t* tvbparse_set_seq(int id,
+                                                        const void* data,
+                                                        tvbparse_action_t before_cb,
+                                                        tvbparse_action_t after_cb,
+                                                        ...) {
+       tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
+       tvbparse_wanted_t*  el = NULL;
+       va_list ap;
+       
+       w->id = id;
+       w->type = TVBPARSE_WANTED_SET_SEQ;
+       w->ctl = NULL;
+       w->len = 0;
+       w->min = 0;
+       w->max = 0;
+       w->data = data;
+       w->before = before_cb;
+       w->after = after_cb;
+       w->elems = g_ptr_array_new();
+       
+       va_start(ap,after_cb);
+       
+       while(( el = va_arg(ap,tvbparse_wanted_t*) )) {
+               g_ptr_array_add(w->elems,el);
+       };
+       
+       va_end(ap);
+       return w;
+}
+
+
+tvbparse_wanted_t* tvbparse_some(int id,
+                                                                guint from,
+                                                                guint to,
+                                                                const void* data,
+                                                                tvbparse_action_t before_cb,
+                                                                tvbparse_action_t after_cb,
+                                                                const tvbparse_wanted_t* el) {
+       
+       tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
+       
+       g_assert(from > 0 && from < to);
+       
+       w->id = id;
+       w->type = TVBPARSE_WANTED_CARDINALITY;
+       w->ctl = NULL;
+       w->len = 0;
+       w->min = from;
+       w->max = to;
+       w->data = data;
+       w->before = before_cb;
+       w->after = after_cb;
+       w->elems = g_ptr_array_new();
+       
+       g_ptr_array_add(w->elems,(gpointer)el);
+       
+       return w;
+}
+
+tvbparse_wanted_t* tvbparse_until(int id,
+                                                  const void* data,
+                                                  tvbparse_action_t before_cb,
+                                                  tvbparse_action_t after_cb,
+                                                  const tvbparse_wanted_t* el,
+                                                  gboolean include_term) {
+       tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
+       
+       w->id = id;
+       w->type = TVBPARSE_WANTED_UNTIL;
+       
+       /* XXX this is ugly */
+       w->ctl = include_term ? "include" : "do not include";
+       
+       w->len = 0;
+       w->min = 0;
+       w->max = 0;
+       w->data = data;
+       w->before = before_cb;
+       w->after = after_cb;
+       w->elems = g_ptr_array_new();
+       
+       g_ptr_array_add(w->elems,(gpointer)el);
+       
+       return w;
+}
+
+
+tvbparse_wanted_t* tvbparse_quoted(int id,
+                                                                  const void* data,
+                                                                  tvbparse_action_t before_cb,
+                                                                  tvbparse_action_t after_cb,
+                                                                  char quote,
+                                                                  char esc) {
+       
+       gchar* esc_quot = g_strdup_printf("%c%c",esc,quote);
+       gchar* quot = g_strdup_printf("%c",quote);
+       tvbparse_wanted_t* want_quot = tvbparse_char(-1,quot,NULL,NULL,NULL);
+       
+       return tvbparse_set_oneof(id, data, before_cb, after_cb,
+                                                         tvbparse_set_seq(-1, NULL, NULL, NULL,
+                                                                                          want_quot,
+                                                                                          tvbparse_set_seq(-1,NULL,NULL,NULL,
+                                                                                                                               tvbparse_set_oneof(-1, NULL, NULL, NULL,
+                                                                                                                                                                  tvbparse_string(-1,esc_quot,NULL,NULL,NULL),
+                                                                                                                                                                  tvbparse_not_chars(-1,0,0,quot,NULL,NULL,NULL),
+                                                                                                                                                                  NULL),
+                                                                                                                               NULL),
+                                                                                          want_quot,
+                                                                                          NULL),
+                                                         tvbparse_set_seq(-1, NULL, NULL, NULL,
+                                                                                          want_quot,
+                                                                                          want_quot,
+                                                                                          NULL),                                                                                                               
+                                                         NULL);
+       
+}
+
+void tvbparse_shrink_token_cb(void* tvbparse_data _U_,
+                                                         const void* wanted_data _U_,
+                                                         tvbparse_elem_t* tok) {
+       tok->offset += 1;
+       tok->len -= 2;
+}
+
+tvbparse_t* tvbparse_init(tvbuff_t* tvb,
+                                                 int offset,
+                                                 int len,
+                                                 void* data,
+                                                 const tvbparse_wanted_t* ignore) {
+       tvbparse_t* tt = ep_alloc(sizeof(tvbparse_t));
+       
+       tt->tvb = tvb;
+       tt->offset = offset;
+       tt->max_len = (len == -1) ? (int) tvb_length(tvb) : len;
+       tt->data = data;
+       tt->ignore = ignore;
+       tt->depth = 0;
+       return tt;
+}
+
+gboolean tvbparse_reset(tvbparse_t* tt,
+                                               int offset,
+                                               int len) {
+       
+       len = (len == -1) ? (int) tvb_length(tt->tvb) : len;
+       
+       if( tvb_length_remaining(tt->tvb, offset) >= len) {
+               tt->offset = offset;
+               tt->max_len = len;
+               tt->depth = 0;
+               return TRUE;
+       } else {
+               tt->depth = 0;
+               return FALSE;
+       }
+}
+
+static tvbparse_elem_t* new_tok(tvbparse_t* tt,
+                                                          int id,
+                                                          int offset,
+                                                          int len,
+                                                          const tvbparse_wanted_t* wanted) {
+       tvbparse_elem_t* tok = ep_alloc(sizeof(tvbparse_elem_t));
+       
+       tok->tvb = tt->tvb;
+       tok->id = id;
+       tok->offset = offset;
+       tok->len = len;
+       tok->data = NULL;
+       tok->sub = NULL;
+       tok->next = NULL;
+       tok->wanted = wanted;
+       tok->last = tok;
+       
+       return tok;
+}
+
+tvbparse_elem_t* tvbparse_get(tvbparse_t* tt,
+                                                                 const tvbparse_wanted_t* wanted) {
+       tvbparse_elem_t* tok = NULL;
+       int save_offset = tt->offset;
+       int save_len = tt->max_len;
+       
+       tt->depth++;
+       
+       if (tt->ignore && tt->ignore != wanted) {
+               tvbparse_wanted_t* save = (void*)tt->ignore;
+               tt->ignore = NULL;
+               while ( tvbparse_get(tt,save) )  {
+                       ;
+               }
+               tt->ignore = save;
+       }
+       
+       switch(wanted->type) {
+               case TVBPARSE_WANTED_NONE:
+                       goto reject;
+               case TVBPARSE_WANTED_SIMPLE_NOT_CHAR:
+               {
+                       gchar c, t;
+                       guint i;
+                       gboolean not_matched = FALSE;
+                       
+                       if (! tt->max_len )
+                               goto reject;
+                       
+                       t = (gchar) tvb_get_guint8(tt->tvb,tt->offset);
+                       
+                       for(i = 0; (c = wanted->ctl[i]) && tt->max_len; i++) {
+                               if ( c == t ) {
+                                       not_matched = TRUE;
+                               }
+                       }
+                       
+                       if (not_matched) {
+                               goto reject;
+                       } else {
+                               tt->offset++;
+                               tt->max_len--;
+                               tok =  new_tok(tt,wanted->id,tt->offset-1,1,wanted);
+                               goto accept;
+                       }
+               }
+               case TVBPARSE_WANTED_SIMPLE_CHAR:
+               {
+                       gchar c,t;
+                       guint i;
+                       
+                       if (! tt->max_len )
+                               goto reject;
+                       
+                       t = (gchar) tvb_get_guint8(tt->tvb,tt->offset);
+                       
+                       for(i = 0; (c = wanted->ctl[i]) && tt->max_len; i++) {
+                               if ( c == t ) {
+                                       tt->offset++;
+                                       tt->max_len--;
+                                       tok =  new_tok(tt,wanted->id,tt->offset-1,1,wanted);
+                                       goto accept;
+                               }
+                       }
+                       goto reject;
+               }
+               case TVBPARSE_WANTED_SIMPLE_NOT_CHARS:
+               {
+                       gchar c, t;
+                       guint i;
+                       guint offset = tt->offset;
+                       guint length = 0;
+                       
+                       while( tt->max_len && length < wanted->max) {
+                               gboolean not_matched = FALSE;
+                               t = (gchar) tvb_get_guint8(tt->tvb,tt->offset);
+                               i = 0;
+                               
+                               while ( (c = wanted->ctl[i]) && tt->max_len ) {
+                                       
+                                       if (c == t) {
+                                               not_matched = TRUE;
+                                       }
+                                       
+                                       i++;
+                               }
+                               
+                               if ( not_matched )
+                                       break;
+
+                               length++;
+                               tt->offset++;
+                               tt->max_len--;
+                       };
+                       
+                       if ( length < wanted->min ) {
+                               goto reject;
+                       } else {
+                               tok = new_tok(tt,wanted->id,offset,length,wanted);
+                               goto accept;                    
+                       }
+               }
+               case TVBPARSE_WANTED_SIMPLE_CHARS:
+               {
+                       gchar c, t;
+                       guint i;
+                       guint offset = tt->offset;
+                       guint length = 0;
+                       
+                       while( tt->max_len && length < wanted->max) {
+                               gboolean matched = FALSE;
+                               t = (gchar) tvb_get_guint8(tt->tvb,tt->offset);
+                               i = 0;
+                               
+                               while ( (c = wanted->ctl[i]) && tt->max_len ) {
+                                       
+                                       if (c == t) {
+                                               matched = TRUE;
+                                               break;
+                                       }
+                                       
+                                       i++;
+                               }
+                               
+                               if (! matched )
+                                       break;
+                               
+                               length++;
+                               tt->offset++;
+                               tt->max_len--;
+                       };
+                       
+                       if (length < wanted->min) {
+                               goto reject;
+                       } else {
+                               tok = new_tok(tt,wanted->id,offset,length,wanted);
+                               goto accept;                    
+                       }
+               }
+               case TVBPARSE_WANTED_SIMPLE_STRING:
+               {
+                       if ( tvb_strneql(tt->tvb, tt->offset, wanted->ctl, wanted->len) == 0 ) {
+                               int offset = tt->offset;
+                               tt->offset += wanted->len;
+                               tt->max_len -= wanted->len;
+                               tok = new_tok(tt,wanted->id,offset,wanted->len,wanted);
+                               goto accept;
+                       } else {
+                               goto reject;
+                       }
+               }
+               case TVBPARSE_WANTED_SIMPLE_CASESTRING:
+               {
+                       if ( tvb_strncaseeql(tt->tvb, tt->offset, wanted->ctl, wanted->len) == 0 ) {
+                               int offset = tt->offset;
+                               tt->offset += wanted->len;
+                               tt->max_len -= wanted->len;
+                               tok = new_tok(tt,wanted->id,offset,wanted->len,wanted);
+                               goto accept;
+                       } else {
+                               goto reject;
+                       }
+               }
+               case TVBPARSE_WANTED_SET_ONEOF:
+               {
+                       guint i;
+                       
+                       for(i=0; i < wanted->elems->len; i++) {
+                               tvbparse_wanted_t* w = g_ptr_array_index(wanted->elems,i);
+                               tvbparse_elem_t* new = tvbparse_get(tt, w);
+                               
+                               if (new) {
+                                       tok = new_tok(tt, wanted->id, new->offset, new->len, wanted);
+                                       tok->sub = new;
+                                       goto accept;                    
+                               }
+                       }
+                       goto reject;
+               }
+               case TVBPARSE_WANTED_SET_SEQ:
+               {
+                       guint i;
+                       
+                       for(i=0; i < wanted->elems->len; i++) {
+                               tvbparse_wanted_t* w = g_ptr_array_index(wanted->elems,i);
+                               tvbparse_elem_t* new = tvbparse_get(tt, w);
+                               
+                               if (new) {
+                                       if (tok) {
+                                               tok->len = (new->offset - tok->offset) + new->len;
+                                               tok->sub->last->next = new;
+                                               tok->sub->last = new;
+                                       } else {
+                                               tok = new_tok(tt, wanted->id, new->offset, new->len, wanted);
+                                               tok->sub = new;
+                                       }
+                               } else {
+                                       goto reject;
+                               }
+                               
+                       }
+                       
+                       goto accept;                    
+               }
+               case TVBPARSE_WANTED_CARDINALITY:
+               {
+                       guint got_so_far = 0;
+                       tvbparse_wanted_t* w = g_ptr_array_index(wanted->elems,0);
+                       
+                       while (got_so_far < wanted->max) {
+                               tvbparse_elem_t* new = tvbparse_get(tt, w);
+                               
+                               if(new) {
+                                       if (tok) {
+                                               tok->len = (new->offset - tok->offset) + new->len;
+                                               tok->sub->last->next = new;
+                                               tok->sub->last = new;
+                                       } else {
+                                               tok = new_tok(tt, wanted->id, new->offset, new->len, wanted);
+                                               tok->sub = new;
+                                       }
+                               } else {
+                                       break;
+                               }
+                               
+                               got_so_far++;
+                       }
+                       
+                       if(got_so_far < wanted->min) {
+                               goto reject;
+                       }
+                       
+                       goto accept;                    
+               }
+               case TVBPARSE_WANTED_UNTIL:
+               {
+                       int offset = tt->offset;
+                       tvbparse_wanted_t* w = g_ptr_array_index(wanted->elems,0);
+                       tvbparse_elem_t* new = tvbparse_find(tt, w);
+                       
+                       if (new) {
+                               tok = new;
+                               
+                               /* XXX this is ugly */
+                               if (*(wanted->ctl) == 'i' ) {
+                                       tok->len = (tok->offset - offset) + tok->len;
+                               } else {
+                                       tok->len = (tok->offset - offset);
+                                       
+                                       tt->offset = save_offset + tok->len;
+                                       tt->max_len = save_len - tok->len;
+                               }
+                               
+                               tok->offset = offset;
+                               tok->id = wanted->id;
+                               tok->next = NULL;
+                               tok->last = tok;
+                               tok->wanted = wanted;
+                               
+                               goto accept;
+                       } else {
+                               goto reject;
+                       }
+               }
+       }
+       
+       DISSECTOR_ASSERT_NOT_REACHED();
+       return NULL;
+       
+accept:
+               if (tok) {
+                       if( tt->depth == 1 ) {
+                               GPtrArray* stack = g_ptr_array_new();
+                               tvbparse_elem_t* curr = tok;
+                               
+                               while (curr) {
+                                       
+                                       if(curr->wanted->before) {
+                                               curr->wanted->before(tt->data, curr->wanted->data, curr);
+                                       }
+                                       
+                                       if(curr->sub) {
+                                               g_ptr_array_add(stack,curr);
+                                               curr = curr->sub;
+                                               continue;
+                                       } else {
+                                               if(curr->wanted->after) curr->wanted->after(tt->data, curr->wanted->data, curr);
+                                       }
+                                       
+                                       curr = curr->next;
+                                       
+                                       while( !curr && stack->len ) {
+                                               curr = g_ptr_array_remove_index_fast(stack,stack->len - 1);
+                                               if( curr->wanted->after ) curr->wanted->after(tt->data, curr->wanted->data, curr);
+                                               curr = curr->next;
+                                       }
+                                       
+                               }
+                               
+                               g_ptr_array_free(stack,FALSE);
+                       }
+                       
+                       tt->depth--;
+                       return tok; 
+               }
+       
+reject:
+               tt->offset = save_offset;
+       tt->max_len = save_len;
+       tt->depth--;
+       return NULL;
+                               
+}
+
+
+tvbparse_elem_t* tvbparse_find(tvbparse_t* tt, const tvbparse_wanted_t* wanted) {
+       int save_offset = tt->offset;
+       int save_len = tt->max_len;
+       tvbparse_elem_t* tok = NULL;
+       
+       while ( tvb_length_remaining(tt->tvb,tt->offset) >= wanted->len ) {
+               if (( tok = tvbparse_get(tt, wanted) )) {
+                       return tok;
+               }
+               tt->offset++;
+               tt->max_len--;
+       }
+       
+       tt->offset = save_offset;
+       tt->max_len = save_len;
+       
+       return NULL;
+}
+
diff --git a/epan/tvbparse.h b/epan/tvbparse.h
new file mode 100644 (file)
index 0000000..e011b6f
--- /dev/null
@@ -0,0 +1,329 @@
+
+/* tvbparse.h
+*
+* an API for text tvb parsers
+*
+* Copyright 2005, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
+*
+* $Id:  $
+*
+* Ethereal - Network traffic analyzer
+* By Gerald Combs <gerald@ethereal.com>
+* Copyright 1998 Gerald Combs
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2
+* of the License, or (at your option) any later version.
+* 
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+* 
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+/*
+ The intention behind this is to ease the writing of dissectors that have to
+ parse text without the need of writing into buffers.
+ It was originally written to avoid using lex and yacc for the xml dissector.
+ the parser is able to look for wanted elements these can be:
+ simple tokens:
+ - a char out of a string of needles
+ - a char not belonging to a string of needles
+ - a sequence of chars that belong to a set of chars
+ - a sequence of chars that do not belong to a set of chars
+ - a string
+ - a caseless string
+ - all the characters up to a certain wanted element (included or excluded)
+ composed elements:
+ - one of a given group of wanted elements
+ - a sequence of wanted elements
+ - some (at least one) instances of a wanted element
+ Once a wanted element is successfully extracted, by either tvbparse_get or
+ tvbparse_find, the parser will invoke a given callback 
+ before and another one after every of its component's subelement's callbacks
+ are being called.
+ If tvbparse_get or tvbparse_find fail to extract the wanted element the
+ subelements callbacks are not going to be invoked.
+ The wanted elements are instantiated once by the proto_register_xxx function.
+ The parser is isntantiated for every packet and it mantains its state.
+ The element's data is destroyed before the next packet is dissected.
+ */
+
+#ifndef _TVB_PARSE_H_
+#define _TVB_PARSE_H_
+
+#include <epan/tvbuff.h>
+#include <glib.h>
+
+/* a definition of something we want to look for */
+typedef struct _tvbparse_wanted_t tvbparse_wanted_t;
+
+/* an instance of a per packet parser */
+typedef struct _tvbparse_t tvbparse_t;
+
+/* a matching token returned by either tvbparser_get or tvb_parser_find */
+typedef struct _tvbparse_elem_t {
+       int id;
+       
+       tvbuff_t* tvb;
+       int offset;
+       int len;
+       
+       void* data;
+       
+       struct _tvbparse_elem_t* sub;
+       
+       struct _tvbparse_elem_t* next;
+       struct _tvbparse_elem_t* last;
+       
+       const tvbparse_wanted_t* wanted;
+} tvbparse_elem_t;
+
+/*
+ * a callback function to be called before or after an element has been successfuly extracted.
+ * Note that if the token belongs to a composed token the callbacks of the components won't be called.
+ *
+ * tvbparse_data: the private data of the parser
+ * wanted_data: the private data of the wanted element
+ * elem: the extracted element
+ */
+typedef void (*tvbparse_action_t)(void* tvbparse_data, const void* wanted_data, struct _tvbparse_elem_t* elem);
+
+
+/*
+ * definition of wanted token types
+ *
+ * the following functions define the tokens we will be able to look for in a tvb
+ * common parameters are:
+ *
+ * id: an arbitrary id that will be copied to the eventual token (don't use 0)
+ * private_data: persistent data to be passed to the callback action (wanted_data)
+ * before_cb: an callback function to be called before those of the subelements
+ * after_cb: an callback function to be called after those of the subelements
+ */
+
+
+/*
+ * a char element.
+ *
+ * When looked for it returns a simple element one character long if the char
+ * at the current offset matches one of the the needles.
+ */
+tvbparse_wanted_t* tvbparse_char(int id,
+                                                                const gchar* needles,
+                                                                const void* private_data,
+                                                                tvbparse_action_t before_cb,
+                                                                tvbparse_action_t after_cb);
+
+/*
+ * a not_char element.
+ *
+ * When looked for it returns a simple element one character long if the char
+ * at the current offset does not match one of the the needles.
+ */
+tvbparse_wanted_t* tvbparse_not_char(int id,
+                                                                        const gchar* needle,
+                                                                        const void* private_data,
+                                                                        tvbparse_action_t before_cb,
+                                                                        tvbparse_action_t after_cb);
+
+/*
+ * a chars element
+ *
+ * When looked for it returns a simple element one or more characters long if
+ * one or more char(s) starting from the current offset match one of the needles.
+ * An element will be returned if at least min_len chars are given (1 if it's 0) 
+ * It will get at most max_len chars or as much as it can if max_len is 0.
+ */
+tvbparse_wanted_t* tvbparse_chars(int id,
+                                                                 guint min_len,
+                                                                 guint max_len,
+                                                                 const gchar* needles,
+                                                                 const void* private_data,
+                                                                 tvbparse_action_t before_cb,
+                                                                 tvbparse_action_t after_cb);
+
+/*
+ * a not_chars element
+ *
+ * When looked for it returns a simple element one or more characters long if
+ * one or more char(s) starting from the current offset do not match one of the
+ * needles.
+ * An element will be returned if at least min_len chars are given (1 if it's 0) 
+ * It will get at most max_len chars or as much as it can if max_len is 0.
+ */
+tvbparse_wanted_t* tvbparse_not_chars(int id,
+                                                                         guint min_len,
+                                                                         guint max_len,
+                                                                         const gchar* needles,
+                                                                         const void* private_data,
+                                                                         tvbparse_action_t before_cb,
+                                                                         tvbparse_action_t after_cb);
+
+/*
+ * a string element
+ *
+ * When looked for it returns a simple element if we have the given string at
+ * the current offset 
+ */
+tvbparse_wanted_t* tvbparse_string(int id,
+                                                                  const gchar* string,
+                                                                  const void* private_data,
+                                                                  tvbparse_action_t before_cb,
+                                                                  tvbparse_action_t after_cb);
+
+/*
+ * casestring
+ *
+ * When looked for it returns a simple element if we have a matching string at
+ * the current offset 
+ */
+tvbparse_wanted_t* tvbparse_casestring(int id,
+                                                                          const gchar* str,
+                                                                          const void* data,
+                                                                          tvbparse_action_t before_cb,
+                                                                          tvbparse_action_t after_cb);
+
+/*
+ * until
+ *
+ * When looked for it returns a simple element containing all the characters 
+ * found until the first match of the ending element if the ending element is
+ * found.
+ *
+ * It won't have a subelement, the ending's callbacks won't get called.
+ */
+tvbparse_wanted_t* tvbparse_until(int id,
+                                                                 const void* private_data,
+                                                                 tvbparse_action_t before_cb,
+                                                                 tvbparse_action_t after_cb,
+                                                                 const tvbparse_wanted_t* ending,
+                                                                 gboolean include_ending);
+
+
+/*
+ * one_of
+ *
+ * When looked for it will try to match to the given candidates and return a
+ * composed element whose subelement is the first match.
+ *
+ * The list of candidates is terminated with a NULL
+ *
+ */
+tvbparse_wanted_t* tvbparse_set_oneof(int id,
+                                                                         const void* private_data,
+                                                                         tvbparse_action_t before_cb,
+                                                                         tvbparse_action_t after_cb,
+                                                                         ...);
+
+/*
+ * sequence
+ *
+ * When looked for it will try to match in order all the given candidates. If
+ * every candidate is found in the given order it will return a composed
+ * element whose subelements are the matcheed elemets.
+ *
+ * The list of candidates is terminated with a NULL.
+ *
+ */
+tvbparse_wanted_t* tvbparse_set_seq(int id,
+                                                                       const void* private_data,
+                                                                       tvbparse_action_t before_cb,
+                                                                       tvbparse_action_t after_cb,
+                                                                       ...);
+/*
+ * some
+ *
+ * When looked for it will try to match the given candidate at least min times
+ * and at most max times. If the given candidate is matched at least min times
+ * a composed element is returned.
+ *
+ */
+tvbparse_wanted_t* tvbparse_some(int id,
+                                                                guint min,
+                                                                guint max,
+                                                                const void* private_data,
+                                                                tvbparse_action_t before_cb,
+                                                                tvbparse_action_t after_cb,
+                                                                const tvbparse_wanted_t* wanted);
+
+#define tvbparse_one_or_more(id, private_data, before_cb, after_cb, wanted)\
+       tvbparse_some(id, 1, G_MAXINT, private_data, before_cb, after_cb, wanted)
+
+/*  quoted
+ *  this is a composed candidate, that will try to match a quoted string
+ *  (included the quotes) including into it every escaped quote.
+ *
+ *  C strings are matched with tvbparse_quoted(-1,NULL,NULL,NULL,"\"","\\")
+ */
+tvbparse_wanted_t* tvbparse_quoted(int id,
+                                                                  const void* data,
+                                                                  tvbparse_action_t before_cb,
+                                                                  tvbparse_action_t after_cb,
+                                                                  char quote,
+                                                                  char escape);
+
+/*
+ * a helper callback for quoted strings that will shrink the token to contain
+ * only the string andnot the quotes
+ */
+void tvbparse_shrink_token_cb(void* tvbparse_data,
+                                                         const void* wanted_data,
+                                                         tvbparse_elem_t* tok);
+
+
+
+
+
+
+
+/* initialize the parser (at every packet)
+* tvb: what are we parsing? 
+* offset: from where
+* len: for how many bytes
+* private_data: will be passed to the action callbacks 
+* ignore: a wanted token type to be ignored (the associated cb WILL be called when it matches)
+*/
+tvbparse_t* tvbparse_init(tvbuff_t* tvb,
+                                                 int offset,
+                                                 int len,
+                                                 void* private_data,
+                                                 const tvbparse_wanted_t* ignore);
+
+/* reset the parser */
+gboolean tvbparse_reset(tvbparse_t* tt, int offset, int len);
+
+/* it will look for the wanted token at the current offset or after any given
+*   number of ignored tokens returning NULL if there's no match.
+*  if there is a match it will set the offset of the current parser after
+*  the end of the token 
+*/
+tvbparse_elem_t* tvbparse_get(tvbparse_t* tt,
+                                                         const tvbparse_wanted_t* wanted);
+
+/* it will look for a wanted token even beyond the current offset
+* AVOID USING IT because:
+* is TOO slow,
+* if the wanted type is a composite type and is matched partially even more
+* times while looking for it the callbacks of the matched subtokens WILL be
+* called every time
+*/
+
+tvbparse_elem_t* tvbparse_find(tvbparse_t* tt,
+                                                          const tvbparse_wanted_t* wanted);
+
+#endif