Internal PCRE field type for efficient RE parsing in dfilters.
[metze/wireshark/wip.git] / epan / ftypes / ftype-string.c
1 /*
2  * $Id: ftype-string.c,v 1.17 2003/12/09 23:02:39 obiot Exp $
3  *
4  * Ethereal - Network traffic analyzer
5  * By Gerald Combs <gerald@ethereal.com>
6  * Copyright 2001 Gerald Combs
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version 2
11  * of the License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
21  */
22
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26
27 #include <ftypes-int.h>
28 #include <string.h>
29
30 #ifdef HAVE_LIBPCRE
31 #include <pcre.h>
32 #define CMP_MATCHES cmp_matches
33 #else
34 #define CMP_MATCHES NULL
35 #endif
36
37 static void
38 string_fvalue_new(fvalue_t *fv)
39 {
40         fv->value.string = NULL;
41 }
42
43 static void
44 string_fvalue_free(fvalue_t *fv)
45 {
46         if (fv->value.string) {
47                 g_free(fv->value.string);
48         }
49 }
50
51 static void
52 string_fvalue_set(fvalue_t *fv, gpointer value, gboolean already_copied)
53 {
54         g_assert(value != NULL);
55
56         /* Free up the old value, if we have one */
57         string_fvalue_free(fv);
58
59         if (already_copied) {
60                 fv->value.string = value;
61         }
62         else {
63                 fv->value.string = g_strdup(value);
64         }
65 }
66
67 static int
68 string_repr_len(fvalue_t *fv, ftrepr_t rtype)
69 {
70         gchar *p, c;
71         int repr_len;
72
73         switch (rtype) {
74                 case FTREPR_DISPLAY:
75                         return strlen(fv->value.string);
76                 case FTREPR_DFILTER:
77                         repr_len = 0;
78                         for (p = fv->value.string; (c = *p) != '\0'; p++) {
79                                 if (c == '\\' || c == '"') {
80                                         /* Backslashes and double-quotes
81                                            must be escaped. */
82                                         repr_len++;
83                                 }
84                                 repr_len++;
85                         }
86                         return repr_len + 2;    /* string plus leading and trailing quotes */
87         }
88         g_assert_not_reached();
89         return -1;
90 }
91
92 static void
93 string_to_repr(fvalue_t *fv, ftrepr_t rtype, char *buf)
94 {
95         gchar *p, c;
96         char *bufp;
97
98         if (rtype == FTREPR_DFILTER) {
99                 bufp = buf;
100                 *bufp++ = '"';
101                 for (p = fv->value.string; (c = *p) != '\0'; p++) {
102                         if (c == '\\' || c == '"') {
103                                 /* Backslashes and double-quotes
104                                    must be escaped. */
105                                 *bufp++ = '\\';
106                         }
107                         *bufp++ = c;
108                 }
109                 *bufp++ = '"';
110                 *bufp = '\0';
111         }
112         else {
113                 strcpy(buf, fv->value.string);
114         }
115 }
116
117
118 static gpointer
119 value_get(fvalue_t *fv)
120 {
121         return fv->value.string;
122 }
123
124 static gboolean
125 val_from_string(fvalue_t *fv, char *s, LogFunc logfunc _U_)
126 {
127         /* Free up the old value, if we have one */
128         string_fvalue_free(fv);
129
130         fv->value.string = g_strdup(s);
131         return TRUE;
132 }
133
134 static gboolean
135 val_from_unparsed(fvalue_t *fv, char *s, gboolean allow_partial_value _U_, LogFunc logfunc)
136 {
137         fvalue_t *fv_bytes;
138
139         /* Free up the old value, if we have one */
140         string_fvalue_free(fv);
141
142         /* Does this look like a byte-string? */
143         fv_bytes = fvalue_from_unparsed(FT_BYTES, s, TRUE, NULL);
144         if (fv_bytes) {
145                 /* Copy the bytes over to a string and terminate it
146                  * with a NUL. XXX - what if the user embeds a NUL
147                  * in the middle of the byte string? */
148                 int num_bytes = fv_bytes->value.bytes->len;
149
150                 fv->value.string = g_malloc(num_bytes + 1);
151                 memcpy(fv->value.string, fv->value.bytes->data, num_bytes);
152                 fv->value.string[num_bytes] = '\0';
153
154                 FVALUE_FREE(fv_bytes);
155                 return TRUE;
156         }
157         else {
158                 /* Just turn it into a string */
159                 return val_from_string(fv, s, logfunc);
160         }
161         g_assert_not_reached();
162 }
163
164 static guint
165 len(fvalue_t *fv)
166 {
167         return strlen(fv->value.string);
168 }
169
170 static void
171 slice(fvalue_t *fv, GByteArray *bytes, guint offset, guint length)
172 {
173         guint8* data;
174
175         data = fv->value.string + offset;
176
177         g_byte_array_append(bytes, data, length);
178 }
179
180
181 static gboolean
182 cmp_eq(fvalue_t *a, fvalue_t *b)
183 {
184         return (strcmp(a->value.string, b->value.string) == 0);
185 }
186
187 static gboolean
188 cmp_ne(fvalue_t *a, fvalue_t *b)
189 {
190         return (strcmp(a->value.string, b->value.string) != 0);
191 }
192
193 static gboolean
194 cmp_gt(fvalue_t *a, fvalue_t *b)
195 {
196         return (strcmp(a->value.string, b->value.string) > 0);
197 }
198
199 static gboolean
200 cmp_ge(fvalue_t *a, fvalue_t *b)
201 {
202         return (strcmp(a->value.string, b->value.string) >= 0);
203 }
204
205 static gboolean
206 cmp_lt(fvalue_t *a, fvalue_t *b)
207 {
208         return (strcmp(a->value.string, b->value.string) < 0);
209 }
210
211 static gboolean
212 cmp_le(fvalue_t *a, fvalue_t *b)
213 {
214         return (strcmp(a->value.string, b->value.string) <= 0);
215 }
216
217 static gboolean
218 cmp_contains(fvalue_t *fv_a, fvalue_t *fv_b)
219 {
220         /* According to
221         * http://www.introl.com/introl-demo/Libraries/C/ANSI_C/string/strstr.html
222         * strstr() returns a non-NULL value if needle is an empty
223         * string. We don't that behavior for cmp_contains. */
224         if (strlen(fv_b->value.string) == 0) {
225                 return FALSE;
226         }
227
228         if (strstr(fv_a->value.string, fv_b->value.string)) {
229                 return TRUE;
230         }
231         else {
232                 return FALSE;
233         }
234 }
235
236 #ifdef HAVE_LIBPCRE
237 static gboolean
238 cmp_matches(fvalue_t *fv_a, fvalue_t *fv_b)
239 {
240         int options = 0;
241         int rc;
242
243         /* fv_b is always a FT_PCRE, otherwise the dfilter semcheck() would have
244          * warned us. For the same reason (and because we're using g_malloc()),
245          * fv_b->value.re is not NULL.
246          */
247         if (strcmp(fv_b->ftype->name, "FT_PCRE") != 0) {
248                 return FALSE;
249         }
250         if (! fv_b->value.re) {
251                 return FALSE;
252         }
253         rc = pcre_exec(
254                         (fv_b->value.re)->re,   /* Compiled PCRE */
255                         (fv_b->value.re)->ex,   /* PCRE extra from pcre_study() */
256                         fv_a->value.string,             /* The data to check for the pattern... */
257                         (int)strlen(fv_a->value.string),        /* ... and its length */
258                         0,                      /* Start offset within data */
259                         options,        /* PCRE options */
260                         NULL,           /* We are not interested in the matched string */
261                         0                       /* of the pattern; only in success or failure. */
262                         );
263         if (rc == 0) {
264                 return TRUE;
265         }
266         return FALSE;
267 }
268 #endif
269
270 void
271 ftype_register_string(void)
272 {
273
274         static ftype_t string_type = {
275                 "FT_STRING",                    /* name */
276                 "character string",             /* pretty_name */
277                 0,                              /* wire_size */
278                 string_fvalue_new,              /* new_value */
279                 string_fvalue_free,             /* free_value */
280                 val_from_unparsed,              /* val_from_unparsed */
281                 val_from_string,                /* val_from_string */
282                 string_to_repr,                 /* val_to_string_repr */
283                 string_repr_len,                /* len_string_repr */
284
285                 string_fvalue_set,              /* set_value */
286                 NULL,                           /* set_value_integer */
287                 NULL,                           /* set_value_floating */
288
289                 value_get,                      /* get_value */
290                 NULL,                           /* get_value_integer */
291                 NULL,                           /* get_value_floating */
292
293                 cmp_eq,
294                 cmp_ne,
295                 cmp_gt,
296                 cmp_ge,
297                 cmp_lt,
298                 cmp_le,
299                 cmp_contains,
300                 CMP_MATCHES,
301
302                 len,
303                 slice,
304         };
305         static ftype_t stringz_type = {
306                 "FT_STRINGZ",
307                 "character string",
308                 0,
309                 string_fvalue_new,
310                 string_fvalue_free,
311                 val_from_unparsed,              /* val_from_unparsed */
312                 val_from_string,                /* val_from_string */
313                 NULL,                           /* val_to_string_repr */
314                 NULL,                           /* len_string_repr */
315
316                 string_fvalue_set,
317                 NULL,
318                 NULL,
319
320                 value_get,
321                 NULL,
322                 NULL,
323
324                 cmp_eq,
325                 cmp_ne,
326                 cmp_gt,
327                 cmp_ge,
328                 cmp_lt,
329                 cmp_le,
330                 cmp_contains,                   /* cmp_contains */
331                 CMP_MATCHES,
332
333                 len,
334                 slice,
335         };
336         static ftype_t uint_string_type = {
337                 "FT_UINT_STRING",
338                 "character string",
339                 0,
340                 string_fvalue_new,
341                 string_fvalue_free,
342                 val_from_unparsed,              /* val_from_unparsed */
343                 val_from_string,                /* val_from_string */
344                 NULL,                           /* val_to_string_repr */
345                 NULL,                           /* len_string_repr */
346
347                 string_fvalue_set,
348                 NULL,
349                 NULL,
350
351                 value_get,
352                 NULL,
353                 NULL,
354
355                 cmp_eq,
356                 cmp_ne,
357                 cmp_gt,
358                 cmp_ge,
359                 cmp_lt,
360                 cmp_le,
361                 cmp_contains,                   /* cmp_contains */
362                 CMP_MATCHES,
363
364                 len,
365                 slice,
366         };
367
368         ftype_register(FT_STRING, &string_type);
369         ftype_register(FT_STRINGZ, &stringz_type);
370         ftype_register(FT_UINT_STRING, &uint_string_type);
371 }