Add an additional "title" attribute for UAT fields; that's what's
[obnox/wireshark/wip.git] / epan / dfilter / semcheck.c
index db728d7ca2238fdc14abc4639dd34c8872c4bb09..7c3fb259c79cfc3cd18102a65faa3f2d0a9061e6 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * $Id: semcheck.c,v 1.23 2004/02/11 21:20:52 guy Exp $
+ * $Id$
  *
- * Ethereal - Network traffic analyzer
- * By Gerald Combs <gerald@ethereal.com>
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
  * Copyright 2001 Gerald Combs
  *
  * This program is free software; you can redistribute it and/or
 #include <string.h>
 
 #include "dfilter-int.h"
+#include "semcheck.h"
 #include "syntax-tree.h"
 #include "sttype-range.h"
 #include "sttype-test.h"
+#include "sttype-function.h"
 
 #include <epan/exceptions.h>
 #include <epan/packet.h>
 
+
 /* Enable debug logging by defining AM_CFLAGS
  * so that it contains "-DDEBUG_dfilter".
  * Usage: DebugLog(("Error: string=%s\n", str)); */
+
 #ifdef DEBUG_dfilter
 #define DebugLog(x) \
        printf("%s:%u: ", __FILE__, __LINE__); \
@@ -73,7 +77,9 @@ compatible_ftypes(ftenum_t a, ftenum_t b)
                case FT_ETHER:
                case FT_BYTES:
                case FT_UINT_BYTES:
-                       return (b == FT_ETHER || b == FT_BYTES || b == FT_UINT_BYTES);
+               case FT_GUID:
+               case FT_OID:
+                       return (b == FT_ETHER || b == FT_BYTES || b == FT_UINT_BYTES || b == FT_GUID || b == FT_OID);
 
                case FT_BOOLEAN:
                case FT_FRAMENUM:
@@ -103,6 +109,7 @@ compatible_ftypes(ftenum_t a, ftenum_t b)
 
                case FT_STRING:
                case FT_STRINGZ:
+               case FT_EBCDIC:
                case FT_UINT_STRING:
                        switch (b) {
                                case FT_STRING:
@@ -129,7 +136,7 @@ mk_uint32_fvalue(guint32 val)
        fvalue_t *fv;
 
        fv = fvalue_new(FT_UINT32);
-       fvalue_set_integer(fv, val);
+       fvalue_set_uinteger(fv, val);
 
        return fv;
 }
@@ -160,10 +167,13 @@ mk_fvalue_from_val_string(header_field_info *hfinfo, char *s)
                case FT_UINT_BYTES:
                case FT_STRING:
                case FT_STRINGZ:
+               case FT_EBCDIC:
                case FT_UINT_STRING:
                case FT_UINT64:
                case FT_INT64:
                case FT_PCRE:
+               case FT_GUID:
+               case FT_OID:
                        return FALSE;
 
                case FT_BOOLEAN:
@@ -193,10 +203,10 @@ mk_fvalue_from_val_string(header_field_info *hfinfo, char *s)
                        tf = hfinfo->strings;
                }
 
-               if (strcasecmp(s, tf->true_string) == 0) {
+               if (g_ascii_strcasecmp(s, tf->true_string) == 0) {
                        return mk_uint32_fvalue(TRUE);
                }
-               else if (strcasecmp(s, tf->false_string) == 0) {
+               else if (g_ascii_strcasecmp(s, tf->false_string) == 0) {
                        return mk_uint32_fvalue(FALSE);
                }
                else {
@@ -215,7 +225,7 @@ mk_fvalue_from_val_string(header_field_info *hfinfo, char *s)
 
        vals = hfinfo->strings;
        while (vals->strptr != NULL) {
-               if (strcasecmp(s, vals->strptr) == 0) {
+               if (g_ascii_strcasecmp(s, vals->strptr) == 0) {
                        return mk_uint32_fvalue(vals->value);
                }
                vals++;
@@ -233,6 +243,8 @@ is_bytes_type(enum ftenum type)
                case FT_BYTES:
                case FT_UINT_BYTES:
                case FT_IPv6:
+               case FT_GUID:
+               case FT_OID:
                        return TRUE;
 
                case FT_NONE:
@@ -245,6 +257,7 @@ is_bytes_type(enum ftenum type)
                case FT_IPXNET:
                case FT_STRING:
                case FT_STRINGZ:
+               case FT_EBCDIC:
                case FT_UINT_STRING:
                case FT_BOOLEAN:
                case FT_FRAMENUM:
@@ -300,6 +313,13 @@ check_exists(stnode_t *st_arg1)
                        THROW(TypeError);
                        break;
 
+               case STTYPE_FUNCTION:
+            /* XXX - Maybe we should change functions so they can return fields,
+             * in which case the 'exist' should be fine. */
+                       dfilter_fail("You cannot test whether a function is present.");
+                       THROW(TypeError);
+                       break;
+
                case STTYPE_UNINITIALIZED:
                case STTYPE_TEST:
                case STTYPE_INTEGER:
@@ -309,6 +329,98 @@ check_exists(stnode_t *st_arg1)
        }
 }
 
+struct check_drange_sanity_args {
+       stnode_t                *st;
+       gboolean                err;
+};
+
+/* Q: Where are sttype_range_drange() and sttype_range_hfinfo() defined?
+ *
+ * A: Those functions are defined by macros in epan/dfilter/sttype-range.h
+ *
+ *    The macro which creates them, STTYPE_ACCESSOR, is defined in
+ *    epan/dfilter/syntax-tree.h.
+ *
+ * From http://www.ethereal.com/lists/ethereal-dev/200308/msg00070.html
+ */
+
+static void
+check_drange_node_sanity(gpointer data, gpointer user_data)
+{
+       drange_node*            drnode = data;
+       struct check_drange_sanity_args *args = user_data;
+       gint                    start_offset, end_offset, length;
+       header_field_info       *hfinfo;
+
+       switch (drange_node_get_ending(drnode)) {
+
+       case LENGTH:
+               length = drange_node_get_length(drnode);
+               if (length <= 0) {
+                       if (!args->err) {
+                               args->err = TRUE;
+                               start_offset = drange_node_get_start_offset(drnode);
+                               hfinfo = sttype_range_hfinfo(args->st);
+                               dfilter_fail("Range %d:%d specified for \"%s\" isn't valid, "
+                                       "as length %d isn't positive",
+                                       start_offset, length,
+                                       hfinfo->abbrev,
+                                       length);
+                       }
+               }
+               break;
+
+       case OFFSET:
+               /*
+                * Make sure the start offset isn't beyond the end
+                * offset.  This applies to negative offsets too.
+                */
+
+               /* XXX - [-ve - +ve] is probably pathological, but isn't
+                * disallowed.
+                * [+ve - -ve] is probably pathological too, and happens to be
+                * disallowed.
+                */
+               start_offset = drange_node_get_start_offset(drnode);
+               end_offset = drange_node_get_end_offset(drnode);
+               if (start_offset > end_offset) {
+                       if (!args->err) {
+                               args->err = TRUE;
+                               hfinfo = sttype_range_hfinfo(args->st);
+                               dfilter_fail("Range %d-%d specified for \"%s\" isn't valid, "
+                                       "as %d is greater than %d",
+                                       start_offset, end_offset,
+                                       hfinfo->abbrev,
+                                       start_offset, end_offset);
+                       }
+               }
+               break;
+
+       case TO_THE_END:
+               break;
+
+       case UNINITIALIZED:
+       default:
+               g_assert_not_reached();
+       }
+}
+
+static void
+check_drange_sanity(stnode_t *st)
+{
+       struct check_drange_sanity_args args;
+
+       args.st = st;
+       args.err = FALSE;
+
+       drange_foreach_drange_node(sttype_range_drange(st),
+           check_drange_node_sanity, &args);
+
+       if (args.err) {
+               THROW(TypeError);
+       }
+}
+
 /* If the LHS of a relation test is a FIELD, run some checks
  * and possibly some modifications of syntax tree nodes. */
 static void
@@ -397,6 +509,7 @@ check_relation_LHS_FIELD(const char *relation_string, FtypeCanFunc can_func,
                stnode_free(st_arg2);
        }
        else if (type2 == STTYPE_RANGE) {
+               check_drange_sanity(st_arg2);
                if (!is_bytes_type(ftype1)) {
                        if (!ftype_can_slice(ftype1)) {
                                dfilter_fail("\"%s\" is a %s and cannot be converted into a sequence of bytes.",
@@ -423,7 +536,8 @@ check_relation_LHS_FIELD(const char *relation_string, FtypeCanFunc can_func,
 }
 
 static void
-check_relation_LHS_STRING(FtypeCanFunc can_func _U_, gboolean allow_partial_value _U_,
+check_relation_LHS_STRING(const char* relation_string,
+               FtypeCanFunc can_func _U_, gboolean allow_partial_value _U_,
                stnode_t *st_node,
                stnode_t *st_arg1, stnode_t *st_arg2)
 {
@@ -443,6 +557,13 @@ check_relation_LHS_STRING(FtypeCanFunc can_func _U_, gboolean allow_partial_valu
                hfinfo2 = stnode_data(st_arg2);
                ftype2 = hfinfo2->type;
 
+               if (!can_func(ftype2)) {
+                       dfilter_fail("%s (type=%s) cannot participate in '%s' comparison.",
+                                       hfinfo2->abbrev, ftype_pretty_name(ftype2),
+                                       relation_string);
+                       THROW(TypeError);
+               }
+
                s = stnode_data(st_arg1);
                fvalue = fvalue_from_string(ftype2, s, dfilter_fail);
                if (!fvalue) {
@@ -465,6 +586,7 @@ check_relation_LHS_STRING(FtypeCanFunc can_func _U_, gboolean allow_partial_valu
                THROW(TypeError);
        }
        else if (type2 == STTYPE_RANGE) {
+               check_drange_sanity(st_arg2);
                s = stnode_data(st_arg1);
                fvalue = fvalue_from_string(FT_BYTES, s, dfilter_fail);
                if (!fvalue) {
@@ -480,7 +602,8 @@ check_relation_LHS_STRING(FtypeCanFunc can_func _U_, gboolean allow_partial_valu
 }
 
 static void
-check_relation_LHS_UNPARSED(FtypeCanFunc can_func _U_, gboolean allow_partial_value,
+check_relation_LHS_UNPARSED(const char* relation_string,
+               FtypeCanFunc can_func, gboolean allow_partial_value,
                stnode_t *st_node,
                stnode_t *st_arg1, stnode_t *st_arg2)
 {
@@ -500,6 +623,13 @@ check_relation_LHS_UNPARSED(FtypeCanFunc can_func _U_, gboolean allow_partial_va
                hfinfo2 = stnode_data(st_arg2);
                ftype2 = hfinfo2->type;
 
+               if (!can_func(ftype2)) {
+                       dfilter_fail("%s (type=%s) cannot participate in '%s' comparison.",
+                                       hfinfo2->abbrev, ftype_pretty_name(ftype2),
+                                       relation_string);
+                       THROW(TypeError);
+               }
+
                s = stnode_data(st_arg1);
                fvalue = fvalue_from_unparsed(ftype2, s, allow_partial_value, dfilter_fail);
                if (!fvalue) {
@@ -522,7 +652,7 @@ check_relation_LHS_UNPARSED(FtypeCanFunc can_func _U_, gboolean allow_partial_va
                THROW(TypeError);
        }
        else if (type2 == STTYPE_RANGE) {
-               /* XXX - is this right? */
+               check_drange_sanity(st_arg2);
                s = stnode_data(st_arg1);
                fvalue = fvalue_from_unparsed(FT_BYTES, s, allow_partial_value, dfilter_fail);
                if (!fvalue) {
@@ -537,69 +667,6 @@ check_relation_LHS_UNPARSED(FtypeCanFunc can_func _U_, gboolean allow_partial_va
        }
 }
 
-struct check_drange_sanity_args {
-       drange_node*    err_node;
-};
-
-static void
-check_drange_node_sanity(gpointer data, gpointer user_data)
-{
-       drange_node* drnode = data;
-       struct check_drange_sanity_args *args = user_data;
-       gint start_offset, end_offset;
-
-       switch (drange_node_get_ending(drnode)) {
-
-       case UNINITIALIZED:
-               g_assert_not_reached();
-               break;
-
-       case LENGTH:
-               /*
-                * Any sanity checks required here?
-                */
-               break;
-
-       case OFFSET:
-               /*
-                * Make sure the start offset isn't beyond the end
-                * offset.
-                */
-               start_offset = drange_node_get_start_offset(drnode);
-               end_offset = drange_node_get_end_offset(drnode);
-               if (start_offset > end_offset) {
-                       if (args->err_node == NULL)
-                               args->err_node = drnode;
-               }
-               break;
-
-       case TO_THE_END:
-               break;
-       }
-}
-
-static void
-check_drange_sanity(stnode_t *st)
-{
-       struct check_drange_sanity_args args;
-       header_field_info               *hfinfo;
-       gint                            start_offset, end_offset;
-
-       args.err_node = NULL;
-       drange_foreach_drange_node(sttype_range_drange(st),
-           check_drange_node_sanity, &args);
-       if (args.err_node != NULL) {
-               hfinfo = sttype_range_hfinfo(st);
-               start_offset = drange_node_get_start_offset(args.err_node);
-               end_offset = drange_node_get_end_offset(args.err_node);
-               dfilter_fail("Range %d-%d specified for \"%s\" isn't valid, as %d is greater than %d",
-                   start_offset, end_offset,
-                   hfinfo->abbrev,
-                   start_offset, end_offset);
-               THROW(TypeError);
-       }
-}
-
 static void
 check_relation_LHS_RANGE(const char *relation_string, FtypeCanFunc can_func _U_,
                gboolean allow_partial_value,
@@ -631,7 +698,7 @@ check_relation_LHS_RANGE(const char *relation_string, FtypeCanFunc can_func _U_,
 
        if (type2 == STTYPE_FIELD) {
                DebugLog(("    5 check_relation_LHS_RANGE(type2 = STTYPE_FIELD)\n"));
-               hfinfo2 = sttype_range_hfinfo(st_arg2);
+               hfinfo2 = stnode_data(st_arg2);
                ftype2 = hfinfo2->type;
 
                if (!is_bytes_type(ftype2)) {
@@ -691,7 +758,161 @@ check_relation_LHS_RANGE(const char *relation_string, FtypeCanFunc can_func _U_,
        else if (type2 == STTYPE_RANGE) {
                DebugLog(("    5 check_relation_LHS_RANGE(type2 = STTYPE_RANGE)\n"));
                check_drange_sanity(st_arg2);
-               /* XXX - check lengths of both ranges */
+       }
+       else {
+               g_assert_not_reached();
+       }
+}
+
+static stnode_t*
+check_param_entity(stnode_t *st_node)
+{
+       sttype_id_t             e_type;
+       stnode_t                *new_st;
+       fvalue_t                *fvalue;
+    char *s;
+
+       e_type = stnode_type_id(st_node);
+    /* If there's an unparsed string, change it to an FT_STRING */
+    if (e_type == STTYPE_UNPARSED) {
+               s = stnode_data(st_node);
+        fvalue = fvalue_from_unparsed(FT_STRING, s, FALSE, dfilter_fail);
+               if (!fvalue) {
+                       THROW(TypeError);
+               }
+
+               new_st = stnode_new(STTYPE_FVALUE, fvalue);
+               stnode_free(st_node);
+        return new_st;
+    }
+
+    return st_node;
+}
+
+
+/* If the LHS of a relation test is a FUNCTION, run some checks
+ * and possibly some modifications of syntax tree nodes. */
+static void
+check_relation_LHS_FUNCTION(const char *relation_string, FtypeCanFunc can_func,
+               gboolean allow_partial_value,
+               stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2)
+{
+       stnode_t                *new_st;
+       sttype_id_t             type2;
+       header_field_info       *hfinfo2;
+       ftenum_t                ftype1, ftype2;
+       fvalue_t                *fvalue;
+       char                    *s;
+    int             param_i;
+       drange_node             *rn;
+    df_func_def_t   *funcdef;
+    guint             num_params;
+    GSList          *params;
+
+       type2 = stnode_type_id(st_arg2);
+
+    funcdef = sttype_function_funcdef(st_arg1);
+       ftype1 = funcdef->retval_ftype;
+
+    params = sttype_function_params(st_arg1);
+    num_params = g_slist_length(params);
+    if (num_params < funcdef->min_nargs) {
+        dfilter_fail("Function %s needs at least %u arguments.",
+                funcdef->name, funcdef->min_nargs);
+        THROW(TypeError);
+    }
+    else if (num_params > funcdef->max_nargs) {
+        dfilter_fail("Function %s can only accept %u arguments.",
+                funcdef->name, funcdef->max_nargs);
+        THROW(TypeError);
+    }
+
+    param_i = 0;
+    while (params) {
+        params->data = check_param_entity(params->data);
+        funcdef->semcheck_param_function(param_i, params->data);
+        params = params->next;
+    }
+
+       DebugLog(("    5 check_relation_LHS_FUNCTION(%s)\n", relation_string));
+
+       if (!can_func(ftype1)) {
+               dfilter_fail("Function %s (type=%s) cannot participate in '%s' comparison.",
+                               funcdef->name, ftype_pretty_name(ftype1),
+                               relation_string);
+               THROW(TypeError);
+       }
+
+       if (type2 == STTYPE_FIELD) {
+               hfinfo2 = stnode_data(st_arg2);
+               ftype2 = hfinfo2->type;
+
+               if (!compatible_ftypes(ftype1, ftype2)) {
+                       dfilter_fail("Function %s and %s are not of compatible types.",
+                                       funcdef->name, hfinfo2->abbrev);
+                       THROW(TypeError);
+               }
+               /* Do this check even though you'd think that if
+                * they're compatible, then can_func() would pass. */
+               if (!can_func(ftype2)) {
+                       dfilter_fail("%s (type=%s) cannot participate in specified comparison.",
+                                       hfinfo2->abbrev, ftype_pretty_name(ftype2));
+                       THROW(TypeError);
+               }
+       }
+       else if (type2 == STTYPE_STRING) {
+               s = stnode_data(st_arg2);
+               if (strcmp(relation_string, "matches") == 0) {
+                       /* Convert to a FT_PCRE */
+                       fvalue = fvalue_from_string(FT_PCRE, s, dfilter_fail);
+               } else {
+                       fvalue = fvalue_from_string(ftype1, s, dfilter_fail);
+               }
+               if (!fvalue) {
+                       THROW(TypeError);
+               }
+
+               new_st = stnode_new(STTYPE_FVALUE, fvalue);
+               sttype_test_set2_args(st_node, st_arg1, new_st);
+               stnode_free(st_arg2);
+       }
+       else if (type2 == STTYPE_UNPARSED) {
+               s = stnode_data(st_arg2);
+               if (strcmp(relation_string, "matches") == 0) {
+                       /* Convert to a FT_PCRE */
+                       fvalue = fvalue_from_unparsed(FT_PCRE, s, FALSE, dfilter_fail);
+               } else {
+                       fvalue = fvalue_from_unparsed(ftype1, s, allow_partial_value, dfilter_fail);
+               }
+               if (!fvalue) {
+                       THROW(TypeError);
+               }
+
+               new_st = stnode_new(STTYPE_FVALUE, fvalue);
+               sttype_test_set2_args(st_node, st_arg1, new_st);
+               stnode_free(st_arg2);
+       }
+       else if (type2 == STTYPE_RANGE) {
+               check_drange_sanity(st_arg2);
+               if (!is_bytes_type(ftype1)) {
+                       if (!ftype_can_slice(ftype1)) {
+                               dfilter_fail("Function \"%s\" is a %s and cannot be converted into a sequence of bytes.",
+                                               funcdef->name,
+                                               ftype_pretty_name(ftype1));
+                               THROW(TypeError);
+                       }
+
+                       /* Convert entire field to bytes */
+                       new_st = stnode_new(STTYPE_RANGE, NULL);
+
+                       rn = drange_node_new();
+                       drange_node_set_start_offset(rn, 0);
+                       drange_node_set_to_the_end(rn);
+                       /* st_arg1 is freed in this step */
+                       sttype_range_set1(new_st, st_arg1, rn);
+
+                       sttype_test_set2_args(st_node, new_st, st_arg2);
+               }
        }
        else {
                g_assert_not_reached();
@@ -708,15 +929,30 @@ check_relation(const char *relation_string, gboolean allow_partial_value,
 #ifdef DEBUG_dfilter
        static guint i = 0;
 #endif
+header_field_info   *hfinfo;
 
        DebugLog(("   4 check_relation(\"%s\") [%u]\n", relation_string, i++));
+
+       /* Protocol can only be on LHS (for "contains" or "matches" operators).
+        * Check to see if protocol is on RHS.  This catches the case where the
+        * user has written "fc" on the RHS, probably intending a byte value
+        * rather than the fibre channel protocol.
+        */
+
+       if (stnode_type_id(st_arg2) == STTYPE_FIELD) {
+               hfinfo = stnode_data(st_arg2);
+               if (hfinfo->type == FT_PROTOCOL)
+                       dfilter_fail("Protocol (\"%s\") cannot appear on right-hand side of comparison.", hfinfo->abbrev);
+                       THROW(TypeError);
+       }
+
        switch (stnode_type_id(st_arg1)) {
                case STTYPE_FIELD:
                        check_relation_LHS_FIELD(relation_string, can_func,
                                        allow_partial_value, st_node, st_arg1, st_arg2);
                        break;
                case STTYPE_STRING:
-                       check_relation_LHS_STRING(can_func,
+                       check_relation_LHS_STRING(relation_string, can_func,
                                        allow_partial_value, st_node, st_arg1, st_arg2);
                        break;
                case STTYPE_RANGE:
@@ -724,7 +960,11 @@ check_relation(const char *relation_string, gboolean allow_partial_value,
                                        allow_partial_value, st_node, st_arg1, st_arg2);
                        break;
                case STTYPE_UNPARSED:
-                       check_relation_LHS_UNPARSED(can_func,
+                       check_relation_LHS_UNPARSED(relation_string, can_func,
+                                       allow_partial_value, st_node, st_arg1, st_arg2);
+                       break;
+               case STTYPE_FUNCTION:
+                       check_relation_LHS_FUNCTION(relation_string, can_func,
                                        allow_partial_value, st_node, st_arg1, st_arg2);
                        break;
 
@@ -732,7 +972,7 @@ check_relation(const char *relation_string, gboolean allow_partial_value,
                case STTYPE_TEST:
                case STTYPE_INTEGER:
                case STTYPE_FVALUE:
-               case STTYPE_NUM_TYPES:
+        default:
                        g_assert_not_reached();
        }
 }
@@ -788,6 +1028,9 @@ check_test(stnode_t *st_node)
                case TEST_OP_LE:
                        check_relation("<=", FALSE, ftype_can_le, st_node, st_arg1, st_arg2);
                        break;
+               case TEST_OP_BITWISE_AND:
+                       check_relation("&", FALSE, ftype_can_bitwise_and, st_node, st_arg1, st_arg2);
+                       break;
                case TEST_OP_CONTAINS:
                        check_relation("contains", TRUE, ftype_can_contains, st_node, st_arg1, st_arg2);
                        break;
@@ -795,7 +1038,7 @@ check_test(stnode_t *st_node)
 #ifdef HAVE_LIBPCRE
                        check_relation("matches", TRUE, ftype_can_matches, st_node, st_arg1, st_arg2);
 #else
-                       dfilter_fail("This Ethereal version does not support the \"matches\" operation.");
+                       dfilter_fail("This Wireshark version does not support the \"matches\" operation.");
                        THROW(TypeError);
 #endif
                        break;
@@ -833,6 +1076,7 @@ semcheck(stnode_t *st_node)
 gboolean
 dfw_semcheck(dfwork_t *dfw)
 {
+       volatile gboolean ok_filter = TRUE;
 #ifdef DEBUG_dfilter
        static guint i = 0;
 #endif
@@ -845,13 +1089,11 @@ dfw_semcheck(dfwork_t *dfw)
                semcheck(dfw->st_root);
        }
        CATCH(TypeError) {
-               DebugLog(("1 dfw_semcheck(dfwork_t *dfw = %p) [%u] - Returns FALSE\n",
-                                       dfw, i++));
-               return FALSE;
+               ok_filter = FALSE;
        }
        ENDTRY;
 
-       DebugLog(("1 dfw_semcheck(dfwork_t *dfw = %p) [%u] - Returns FALSE\n",
-                               dfw, i++));
-       return TRUE;
+       DebugLog(("1 dfw_semcheck(dfwork_t *dfw = %p) [%u] - Returns %d\n",
+                               dfw, i++,ok_filter));
+       return ok_filter;
 }