OK, does it matter in which order "extern" and "declspec" appear?

[metze/wireshark/wip.git] / tools / checkAPIs.pl
diff --git a/tools/checkAPIs.pl b/tools/checkAPIs.pl

index c7180c931616809491f078639d965742bffc4609..7abea4b808a4b3a16acd3ed3d44bbdd6df4b631f 100755 (executable)
--- a/tools/checkAPIs.pl
+++ b/tools/checkAPIs.pl
@@ -7,7 +7,10 @@
  # be called by Wireshark code and to perform certain other checks.
  #
  # Usage:
-# checkAPIs.pl [-M] [-g group1] [-g group2] [-s summary-group1] [-s summary-group2] [--nocheck-value-string-array-null-termination] file1 file2 ...
+# checkAPIs.pl [-M] [-g group1] [-g group2] ...
+#              [-s summary-group1] [-s summary-group2] ...
+#              [--nocheck-value-string-array-null-termination]
+#              [--nocheck-addtext] [--nocheck-hf] [--debug] file1 file2 ...
  #
  # $Id$
  #
@@ -119,21 +122,75 @@ my %APIs = (
                  'fopen',
                  'freopen',
                  # Misc
-                'tmpnam'        # use mkstemp
+                'tmpnam',       # use mkstemp
+                '_snwprintf'    # use StringCchPrintf
                  ] },
  
+        ### Deprecated emem functions (use wmem instead!)
+        # These will become errors once they've been removed from all the
+        # existing dissectors
+        'emem' => { 'count_errors' => 0, 'functions' => [
+                'ep_alloc',
+                'ep_new',
+                'ep_alloc0',
+                'ep_new0',
+                'ep_strdup',
+                'ep_strndup',
+                'ep_memdup',
+                'ep_strdup_vprintf',
+                'ep_strdup_printf',
+                'ep_strconcat',
+                'ep_alloc_array',
+                'ep_alloc_array0',
+                'ep_strsplit',
+                'ep_stack_new',
+                'ep_stack_push',
+                'ep_stack_pop',
+                'ep_stack_peek',
+                'se_alloc',
+                'se_new',
+                'se_alloc0',
+                'se_new0',
+                'se_strdup',
+                'se_strndup',
+                'se_memdup',
+                'se_strdup_vprintf',
+                'se_strdup_printf',
+                'se_alloc_array',
+                'se_tree_create',
+                'se_tree_insert32',
+                'se_tree_lookup32',
+                'se_tree_lookup32_le',
+                'se_tree_insert32_array',
+                'se_tree_lookup32_array',
+                'se_tree_lookup32_array_le',
+                'emem_tree_insert32',
+                'emem_tree_lookup32',
+                'emem_tree_lookup32_le',
+                'emem_tree_insert32_array',
+                'emem_tree_lookup32_array',
+                'emem_tree_lookup32_array_le',
+                'emem_tree_insert_string',
+                'emem_tree_lookup_string',
+                'emem_tree_foreach',
+                'ep_strbuf_new',
+                'ep_strbuf_new_label',
+                'ep_strbuf_sized_new',
+                'ep_strbuf_append_vprintf',
+                'ep_strbuf_printf',
+                'ep_strbuf_append_printf',
+                'ep_strbuf_append',
+                'ep_strbuf_append_c',
+                'ep_strbuf_append_unichar',
+                'ep_strbuf_truncate',
+                'emem_print_tree'
+            ] },
+
          # APIs that SHOULD NOT be used in Wireshark (any more)
          'deprecated' => { 'count_errors' => 1, 'functions' => [
                  'perror',                                       # Use g_strerror() and report messages in whatever
                                                                  #  fashion is appropriate for the code in question.
                  'ctime',                                        # Use abs_time_secs_to_str()
-                'dissector_add',                                # Use dissector_add_uint()
-                'dissector_change',                             # Use dissector_change_uint()
-                'dissector_delete',                             # Use dissector_delete_uint()
-                'dissector_get_port_handle',                    # Use dissector_get_uint_handle()
-                'dissector_reset',                              # Use dissector_reset_uint()
-                'dissector_try_port',                           # Use dissector_try_uint()
-                'dissector_try_port_new',                       # Use dissector_try_uint_new()
                  'next_tvb_add_port',                            # Use next_tvb_add_uint() (and a matching change
                                                                  #  of NTVB_PORT -> NTVB_UINT)
  
@@ -272,6 +329,8 @@ my %APIs = (
  
  );
  
+my @apiGroups = qw(prohibited deprecated emem);
+
  # Deprecated GTK+ (and GDK) functions/macros with (E)rror or (W)arning flag:
  # (The list is based upon the GTK+ 2.24.8 documentation;
  # E: There should be no current Wireshark use so Error if seen;
@@ -1317,6 +1376,12 @@ sub checkAddTextCalls($$)
          my $add_text_count = 0;
          my $okay_add_text_count = 0;
          my $add_xxx_count = 0;
+        my $total_count = 0;
+        my $aggressive = 0;
+        my $percentage = 100;
+
+        # The 3 loops here are slow, but trying a single loop with capturing
+        # parenthesis is even slower!
  
          # First count how many proto_tree_add_text() calls there are in total
          while (${$fileContentsRef} =~ m/ \W* proto_tree_add_text \W* \( /gox) {
@@ -1336,15 +1401,30 @@ sub checkAddTextCalls($$)
          $add_xxx_count -= $add_text_count;
          $add_text_count -= $okay_add_text_count;
  
+        $total_count = $add_text_count+$add_xxx_count;
+
          # Don't bother with files with small counts
-        if ($add_xxx_count < 10 || $add_text_count < 10) {
+        if (($add_xxx_count < 10 || $add_text_count < 10) && ($total_count < 20)) {
                  return;
          }
  
-        my $percentage = 100*$add_text_count/$add_xxx_count;
-        if ($percentage > 50) {
+        if ($add_xxx_count > 0) {
+            $percentage = 100*$add_text_count/$add_xxx_count;
+        }
+
+        if ($aggressive > 0) {
+            if ((($total_count <= 50) && ($percentage > 50)) ||
+                (($total_count > 50) && ($total_count <= 100) && ($percentage > 40)) ||
+                (($total_count > 100) && ($total_count <= 200) && ($percentage > 30)) ||
+                (($total_count > 200) && ($percentage > 20))) {
+                    printf "%s: found %d useless add_text() vs. %d add_<something else>() calls (%.2f%%)\n",
+                        $filename, $add_text_count, $add_xxx_count, $percentage;
+            }
+        } else {
+            if ($percentage > 50) {
                  printf "%s: found %d useless add_text() vs. %d add_<something else>() calls (%.2f%%)\n",
                          $filename, $add_text_count, $add_xxx_count, $percentage;
+            }
          }
  }
  
@@ -1354,6 +1434,7 @@ my @TvbPtrAPIs = (
          # given offset and length are equal) with these:
          'proto_tree_add_bytes_format',
          'proto_tree_add_bytes_format_value',
+        'proto_tree_add_ether',
          # Use the tvb_* version of these:
          'ether_to_str',
          'ip_to_str',
@@ -1363,6 +1444,8 @@ my @TvbPtrAPIs = (
          # Use tvb_bytes_to_str[_punct] instead of:
          'bytes_to_str',
          'bytes_to_str_punct',
+        'SET_ADDRESS',
+        'SET_ADDRESS_HF',
  );
  
  sub checkAPIsCalledWithTvbGetPtr($$$)
@@ -1407,6 +1490,64 @@ sub check_snprintf_plus_strlen($$)
          }
  }
  
+sub check_included_files($$)
+{
+        my ($fileContentsRef, $filename) = @_;
+        my @incFiles;
+
+        # wsutils/wsgcrypt.h is our wrapper around gcrypt.h, it must be excluded from the tests
+        if ($filename =~ /wsgcrypt\.h/) {
+                return;
+        }
+
+        @incFiles = (${$fileContentsRef} =~ m/\#include \s* [<"](.+)[>"]/gox);
+        foreach (@incFiles) {
+                if ( m#(^|/+)gcrypt\.h$# ) {
+                        print STDERR "Warning: ".$filename." includes gcrypt.h directly. ".
+                                "Include wsutil/wsgrypt.h instead.\n";
+                        last;
+                }
+        }
+}
+
+sub check_proto_tree_add_XXX_encoding($$)
+{
+        my ($fileContentsRef, $filename) = @_;
+        my @items;
+        my $errorCount = 0;
+
+        @items = (${$fileContentsRef} =~ m/ (proto_tree_add_[_a-z0-9]+) \( ([^;]*) \) \s* ; /xsg);
+
+        while (@items) {
+                my ($func) = @items;
+                shift @items;
+                my ($args) = @items;
+                shift @items;
+
+                # Remove anything inside parenthesis in the arguments so we
+                # don't get false positives when someone calls
+                # proto_tree_add_XXX(..., tvb_YYY(..., ENC_ZZZ))
+                $args =~ s/\(.*\)//g;
+
+                if ($args =~ /,\s*ENC_/xos) {
+                        if (!($func =~ /proto_tree_add_(item|bitmask|bits_item|bits_ret_val)/xos)
+                           ) {
+                                print STDERR "Error: ".$filename." uses $func with ENC_*.\n";
+                                $errorCount++;
+
+                                # Print out the function args to make it easier
+                                # to find the offending code.  But first make
+                                # it readable by eliminating extra white space.
+                                $args =~ s/\s+/ /g;
+                                print STDERR "\tArgs: " . $args . "\n";
+                        }
+                }
+        }
+
+        return $errorCount;
+}
+
+
  # Verify that all declared ett_ variables are registered.
  # Don't bother trying to check usage (for now)...
  sub check_ett_registration($$)
@@ -1415,6 +1556,7 @@ sub check_ett_registration($$)
          my @ett_declarations;
          my %ett_registrations;
          my @unRegisteredEtts;
+        my $errorCount = 0;
  
          # A pattern to match ett variable names.  Obviously this assumes that
          # they start with ett_
@@ -1501,8 +1643,10 @@ sub check_ett_registration($$)
  
          if (@unRegisteredEtts) {
                  print STDERR "Error: found these unregistered ett variables in ".$filename.": ".join(',', @unRegisteredEtts)."\n";
+                $errorCount++;
          }
  
+        return $errorCount;
  }
  
  # Given the file contents and a file name, check all of the hf entries for
@@ -1613,6 +1757,110 @@ sub check_hf_entries($$)
          return $errorCount;
  }
  
+sub print_usage
+{
+        print "Usage: checkAPIs.pl [-M] [-h] [-g group1] [-g group2] ... \n";
+        print "                    [--build] [-s group1] [-s group2] ... \n";
+        print "                    [--nocheck-value-string-array-null-termination] \n";
+        print "                    [--nocheck-addtext] [--nocheck-hf] [--debug] file1 file2 ...\n";
+        print "\n";
+        print "       -M: Generate output for -g in 'machine-readable' format\n";
+        print "       -h: help, print usage message\n";
+        print "       -g <group>:  Check input files for use of APIs in <group>\n";
+        print "                    (in addition to the default groups)\n";
+        print "       -s <group>:  Output summary (count) for each API in <group>\n";
+        print "                    (-g <group> also req'd)\n";
+        print "       ---nocheck-value-string-array-null-termination: UNDOCUMENTED\n";
+        print "       ---nocheck-addtext: UNDOCUMENTED\n";
+        print "       ---nocheck-hf: UNDOCUMENTED\n";
+        print "       ---debug: UNDOCUMENTED\n";
+        print "       ---build: UNDOCUMENTED\n";
+        print "\n";
+        print "   Default Groups[-g]: ", join (", ", sort @apiGroups), "\n";
+        print "   Available Groups:   ", join (", ", sort keys %APIs), "\n";
+}
+
+# -------------
+# action:  remove '#if 0'd code from the input string
+# args     codeRef, fileName
+# returns: codeRef
+#
+# Essentially: Use s//patsub/meg to pass each line to patsub.
+#              patsub monitors #if/#if 0/etc and determines
+#               if a particular code line should be removed.
+# XXX: This is probably pretty inefficient;
+#      I could imagine using another approach such as converting
+#       the input string to an array of lines and then making
+#       a pass through the array deleting lines as needed.
+
+{  # block begin
+my ($if_lvl, $if0_lvl, $if0); # shared vars
+my $debug = 0;
+
+    sub remove_if0_code {
+        my ($codeRef, $fileName)  = @_;
+
+        my ($preprocRegEx) = qr {
+                                    (                                    # $1 [complete line)
+                                        ^
+                                        (?:                              # non-capturing
+                                            \s* \# \s*
+                                            (if \s 0| if | else | endif) # $2 (only if #...)
+                                        ) ?
+                                        .*
+                                        $
+                                    )
+                            }xom;
+
+        ($if_lvl, $if0_lvl, $if0) = (0,0,0);
+        $$codeRef =~ s{ $preprocRegEx }{patsub($1,$2)}xegm;
+
+        ($debug == 2) && print "==> After Remove if0: code: [$fileName]\n$$codeRef\n===<\n";
+        return $codeRef;
+    }
+
+    sub patsub {
+        if ($debug == 99) {
+            print "-->$_[0]\n";
+            (defined $_[1]) && print "  >$_[1]<\n";
+        }
+
+        # #if/#if 0/#else/#ndif processing
+        if (defined $_[1]) {
+            my ($if) = $_[1];
+            if ($if eq 'if') {
+                $if_lvl += 1;
+            } elsif ($if eq 'if 0') {
+                $if_lvl += 1;
+                if ($if0_lvl == 0) {
+                    $if0_lvl = $if_lvl;
+                    $if0     = 1;  # inside #if 0
+                }
+            } elsif ($if eq 'else') {
+                if ($if0_lvl == $if_lvl) {
+                    $if0 = 0;
+                }
+            } elsif ($if eq 'endif') {
+                if ($if0_lvl == $if_lvl) {
+                    $if0     = 0;
+                    $if0_lvl = 0;
+                }
+                $if_lvl -= 1;
+                if ($if_lvl < 0) {
+                    die "patsub: #if/#endif mismatch"
+                }
+            }
+            return $_[0];  # don't remove preprocessor lines themselves
+        }
+
+        # not preprocessor line: See if under #if 0: If so, remove
+        if ($if0 == 1) {
+            return '';  # remove
+        }
+        return $_[0];
+    }
+}  # block end
+
  # The below Regexp are based on those from:
  # http://aspn.activestate.com/ASPN/Cookbook/Rx/Recipe/59811
  # They are in the public domain.
@@ -1641,44 +1889,42 @@ my $SingleQuotedStr = qr{ (?: \' (?: \\. | [^\'\\])* [']) }x;
  # 4. Wireshark is strictly a C program so don't take out C++ style comments
  #    since they shouldn't be there anyway...
  #    Also: capturing the comment isn't necessary.
-my $commentAndStringRegex = qr{ (?: $DoubleQuotedStr | $SingleQuotedStr | $CComment) }x;
+## my $commentAndStringRegex = qr{ (?: $DoubleQuotedStr | $SingleQuotedStr | $CComment) }x;
  
  #### Regex for use when searching for value-string definitions
  my $StaticRegex             = qr/ static \s+                                                            /xs;
  my $ConstRegex              = qr/ const  \s+                                                            /xs;
  my $Static_andor_ConstRegex = qr/ (?: $StaticRegex $ConstRegex | $StaticRegex | $ConstRegex)            /xs;
-my $ValueStringRegex        = qr/ $Static_andor_ConstRegex value_string \ + [^;*]+ = [^;]+ [{] [^;]+ ;  /xs;
+my $ValueStringRegex        = qr/ ^ \s* $Static_andor_ConstRegex (?:value|string|range)_string \ + [^;*]+ = [^;]+ [{] [^;]+ ;  /xms;
+my $EnumValRegex        = qr/ $Static_andor_ConstRegex enum_val_t \ + [^;*]+ = [^;]+ [{] [^;]+ ;  /xs;
  
  #
  # MAIN
  #
  my $errorCount = 0;
  # The default list, which can be expanded.
-my @apiGroups = qw(prohibited deprecated);
  my @apiSummaryGroups = ();
  my $check_value_string_array_null_termination = 1;      # default: enabled
  my $machine_readable_output = 0;                        # default: disabled
  my $check_hf = 1;                                       # default: enabled
-my $debug_flag = 0;
+my $check_addtext = 1;                                  # default: enabled
+my $debug_flag = 0;                                     # default: disabled
  my $buildbot_flag = 0;
+my $help_flag = 0;
  
  my $result = GetOptions(
                          'group=s' => \@apiGroups,
                          'summary-group=s' => \@apiSummaryGroups,
                          'check-value-string-array-null-termination!' => \$check_value_string_array_null_termination,
                          'Machine-readable' => \$machine_readable_output,
-                        'nohf' => \$check_hf,
+                        'check-hf!' => \$check_hf,
+                        'check-addtext!' => \$check_addtext,
                          'build' => \$buildbot_flag,
-                        'debug' => \$debug_flag
+                        'debug' => \$debug_flag,
+                       'help' => \$help_flag
                          );
-if (!$result) {
-        print "Usage: checkAPIs.pl [-M] [-g group1] [-g group2] ... [-s group1] [-s group2] ... [--nocheck-value-string-array-null-termination] file1 file2 ..\n";
-        print "       -g <group>:  Check input files for use of APIs in <group> (in addition to the default groups)\n";
-        print "       -s <group>:  Output summary (count) for each API in <group> (-g <group> also req'd)\n";
-        print "       -M: Generate output for -g in 'machine-readable' format\n";
-        print "\n";
-        print "   Default Groups[-g]: ", join (", ", sort @apiGroups), "\n";
-        print "   Available Groups:   ", join (", ", sort keys %APIs), "\n";
+if (!$result || $help_flag) {
+       print_usage();
          exit(1);
  }
  
@@ -1704,7 +1950,10 @@ while ($_ = $ARGV[0])
  
          # delete leading './'
          $filename =~ s{ ^ \. / } {}xo;
-
+       unless (-f $filename) {
+               print STDERR "Warning: $filename is not of type file - skipping.\n";
+               next;
+       }
          # Read in the file (ouch, but it's easier that way)
          open(FC, $filename) || die("Couldn't open $filename");
          $line = 1;
@@ -1738,42 +1987,59 @@ while ($_ = $ARGV[0])
                  print STDERR "Warning: ".$filename." does not have an SVN Id tag.\n";
          }
  
-        # optionally check the hf entries
+        # Remove all the C-comments
+        $fileContents =~ s{ $CComment } []xog;
+
+        # optionally check the hf entries (including those under #if 0)
          if ($check_hf) {
-                $errorCount += check_hf_entries(\$fileContents, $filename);
+            $errorCount += check_hf_entries(\$fileContents, $filename);
          }
  
-        # Remove all the C-comments and strings
-        $fileContents =~ s {$commentAndStringRegex} []xog;
+        # check for files that we should not include directly
+        # this must be done before quoted strings (#include "file.h") are removed
+        check_included_files(\$fileContents, $filename);
+
+        # Remove all the quoted strings
+        $fileContents =~ s{ $DoubleQuotedStr | $SingleQuotedStr } []xog;
  
-        #check_ett_registration(\$fileContents, $filename);
+        #$errorCount += check_ett_registration(\$fileContents, $filename);
  
-        if ($fileContents =~ m{ // }xo)
+        if ($fileContents =~ m{ \s// }xo)
          {
                  print STDERR "Error: Found C++ style comments in " .$filename."\n";
                  $errorCount++;
          }
  
+        # Remove all blank lines
+        $fileContents =~ s{ ^ \s* $ } []xog;
+
+        # Remove all '#if 0'd' code
+        remove_if0_code(\$fileContents, $filename);
+
          #checkAPIsCalledWithTvbGetPtr(\@TvbPtrAPIs, \$fileContents, \@foundAPIs);
          #if (@foundAPIs) {
-        #       print STDERR "Found APIs with embedded tvb_get_ptr() calls in ".$filename.": ".join(',', @foundAPIs)."\n"
+        #       print STDERR "Found APIs with embedded tvb_get_ptr() calls in ".$filename." : ".join(',', @foundAPIs)."\n"
          #}
  
          check_snprintf_plus_strlen(\$fileContents, $filename);
  
-        if (! $buildbot_flag) {
+        if ($check_addtext && ! $buildbot_flag) {
                  checkAddTextCalls(\$fileContents, $filename);
          }
  
-        # Brute force check for value_string arrays which are missing {0, NULL} as the final (terminating) array entry
+        $errorCount += check_proto_tree_add_XXX_encoding(\$fileContents, $filename);
+
+        # Brute force check for value_string (and string_string or range_string) arrays
+        # which are missing {0, NULL} as the final (terminating) array entry
          if ($check_value_string_array_null_termination) {
                  #  Assumption: definition is of form (pseudo-Regex):
-                #    " (static const|static|const) value_string .+ = { .+ ;" (possibly over multiple lines)
+                #    " (static const|static|const) (value|string|range)_string .+ = { .+ ;"
+                #  (possibly over multiple lines)
                  while ($fileContents =~ / ( $ValueStringRegex ) /xsog) {
-                        # value_string array definition found; check if NULL terminated
+                        # XXX_string array definition found; check if NULL terminated
                          my $vs = my $vsx = $1;
                          if ($debug_flag) {
-                                $vsx =~ / ( .+ value_string [^=]+ ) = /xo;
+                                $vsx =~ / ( .+ (?:value|string|range)_string [^=]+ ) = /xo;
                                  printf STDERR "==> %-35.35s: %s\n", $filename, $1;
                                  printf STDERR "%s\n", $vs;
                          }
@@ -1782,12 +2048,45 @@ while ($_ = $ARGV[0])
                          #  "Don't put a comma after the last tuple of an initializer of an array"
                          # However: since this usage is present in some number of cases, we'll allow for now
                          if ($vs !~ / , NULL [}] ,? [}] ; $/xo) {
-                                $vsx =~ /( value_string [^=]+ ) = /xo;
-                                printf STDERR "Error: %-35.35s: {0, NULL} is required as the last value_string array entry: %s\n", $filename, $1;
+                                $vsx =~ /( (?:value|string|range)_string [^=]+ ) = /xo;
+                                printf STDERR "Error: %-35.35s: {..., NULL} is required as the last XXX_string array entry: %s\n", $filename, $1;
+                                $errorCount++;
+                        }
+                        if ($vs !~ / (static)? const (?:value|string|range)_string /xo)  {
+                                $vsx =~ /( (?:value|string|range)_string [^=]+ ) = /xo;
+                                printf STDERR "Error: %-35.35s: Missing 'const': %s\n", $filename, $1;
+                                $errorCount++;
+                        }
+                }
+        }
+
+        # Brute force check for enum_val_t arrays which are missing {NULL, NULL, ...}
+        # as the final (terminating) array entry
+        # For now use the same option to turn this and value_string checking on and off.
+        # (Is the option even necessary?)
+        if ($check_value_string_array_null_termination) {
+                #  Assumption: definition is of form (pseudo-Regex):
+                #    " (static const|static|const) enum_val_t .+ = { .+ ;"
+                #  (possibly over multiple lines)
+                while ($fileContents =~ / ( $EnumValRegex ) /xsog) {
+                        # enum_val_t array definition found; check if NULL terminated
+                        my $vs = my $vsx = $1;
+                        if ($debug_flag) {
+                                $vsx =~ / ( .+ enum_val_t [^=]+ ) = /xo;
+                                printf STDERR "==> %-35.35s: %s\n", $filename, $1;
+                                printf STDERR "%s\n", $vs;
+                        }
+                        $vs =~ s{ \s } {}xg;
+                        # README.developer says
+                        #  "Don't put a comma after the last tuple of an initializer of an array"
+                        # However: since this usage is present in some number of cases, we'll allow for now
+                        if ($vs !~ / NULL, NULL, -?[0-9] [}] ,? [}] ; $/xo) {
+                                $vsx =~ /( enum_val_t [^=]+ ) = /xo;
+                                printf STDERR "Error: %-35.35s: {NULL, NULL, ...} is required as the last enum_val_t array entry: %s\n", $filename, $1;
                                  $errorCount++;
                          }
-                        if ($vs !~ / (static)? const value_string /xo)  {
-                                $vsx =~ /( value_string [^=]+ ) = /xo;
+                        if ($vs !~ / (static)? const enum_val_t /xo)  {
+                                $vsx =~ /( enum_val_t [^=]+ ) = /xo;
                                  printf STDERR "Error: %-35.35s: Missing 'const': %s\n", $filename, $1;
                                  $errorCount++;
                          }