3 # Copyright 2013, William Meier (See AUTHORS file)
5 # Validate hf_... usage for a dissector file;
7 # Usage: checkhf.pl [--debug=?] <file or files>
9 # Wireshark - Network traffic analyzer
10 # By Gerald Combs <gerald@wireshark.org>
11 # Copyright 1998 Gerald Combs
13 # This program is free software; you can redistribute it and/or
14 # modify it under the terms of the GNU General Public License
15 # as published by the Free Software Foundation; either version 2
16 # of the License, or (at your option) any later version.
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 # GNU General Public License for more details.
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 ## Note: This program is a re-implementation of the
29 ## original checkhf.pl written and (C) by Joerg Mayer.
30 ## The overall objective of the new implementation was to reduce
31 ## the number of false positives which occurred with the
32 ## original checkhf.pl
34 ## This program can be used to scan original .c source files or source
35 ## files which have been passed through a C pre-processor.
36 ## Operating on pre-processed source files is optimal; There should be
37 ## minimal false positives.
38 ## If the .c input is an original source file there may very well be
39 ## false positives/negatives due to the fact that the hf_... variables & etc
40 ## may be created via macros.
42 ## ----- (The following is extracted from the original checkhf.pl with thanks to Joerg) -------
44 ## ~/work/wireshark/trunk/epan/dissectors> ../../tools/checkhf.pl packet-afs.c
45 ## Unused entry: packet-afs.c, hf_afs_ubik_voteend
46 ## Unused entry: packet-afs.c, hf_afs_ubik_errcode
47 ## Unused entry: packet-afs.c, hf_afs_ubik_votetype
48 ## ERROR: NO ARRAY: packet-afs.c, hf_afs_fs_ipaddr
50 ## or checkhf.pl packet-*.c, which will check all the dissector files.
52 ## NOTE: This tool currently generates false positives!
54 ## The "NO ARRAY" messages - if accurate - points to an error that will
55 ## cause (t|wire)shark to report a DISSECTOR_BUG when a packet containing
56 ## this particular element is being dissected.
58 ## The "Unused entry" message indicates the opposite: We define an entry but
59 ## never use it (e.g., in a proto_...add... function).
60 ## ------------------------------------------------------------------------------------
62 # ------------------------------------------------------------------------------------
66 # 1. Clean the input: remove blank lines, comments, quoted strings and code under '#if 0'.
68 # Find (and remove from input) list of hf_... variable
69 # definitions ('static? g?int hf_... ;')
70 # 2. hf_array_entries:
71 # Find (and remove from input) list of hf_... variables
72 # referenced in the hf[] entries;
74 # From the remaining input, extract list of all strings of form hf_...
75 # (which may include strings which are not actually valid
76 # hf_... variable references).
78 # If entries in hf_defs not in hf_usage then "unused" (for static hf_defs only)
79 # If entries in hf_defs not in hf_array_entries then "ERROR: NO ARRAY";
87 my $debug = 0; # default: off; 1=cmt; 2=#if0; 3=hf_defs; 4=hf_array_entries; 5=hfusage (See code)
91 'help|?' => \$help_flag
93 if (!$sts || $help_flag || !$ARGV[0]) {
99 while (my $filename = $ARGV[0]) {
103 my (%hf_defs, %hf_static_defs, %hf_array_entries, %hf_usage);
104 my ($unused_href, $no_array_href);
106 read_file(\$filename, \$file_contents);
108 remove_comments (\$file_contents, $filename);
109 remove_blank_lines (\$file_contents, $filename);
110 remove_quoted_strings(\$file_contents, $filename);
111 remove_if0_code (\$file_contents, $filename);
113 find_remove_hf_defs (\$file_contents, $filename, \%hf_defs);
114 find_remove_hf_array_entries (\$file_contents, $filename, \%hf_array_entries);
115 find_remove_proto_get_id_hf_assignments(\$file_contents, $filename, \%hf_array_entries);
116 find_hf_usage (\$file_contents, $filename, \%hf_usage);
119 # 1. Are all the static hf_defs entries in hf_usage ?
120 # if not: "Unused entry:"
123 # create a hash containing entries just for the static definitions
124 @hf_static_defs{grep {$hf_defs{$_} == 0} keys %hf_defs} = (); # All values in the new hash will be undef
126 $unused_href = diff_hash(\%hf_static_defs, \%hf_usage);
127 remove_hf_pid_from_unused_if_add_oui_call(\$file_contents, $filename, $unused_href);
129 print_list("Unused entry: $filename, ", $unused_href);
131 # 2. Are all the hf_defs entries (static and global) in hf_array_entries ?
132 # (Note: if a static hf_def is "unused", don't check for same in hf_array_entries)
133 # if not: "ERROR: NO ARRAY"
135 ## Checking for missing global defs currently gives false positives
136 ## So: only check static defs for now.
137 ## $no_array_href = diff_hash(\%hf_defs, \%hf_array_entries);
138 $no_array_href = diff_hash(\%hf_static_defs, \%hf_array_entries);
139 $no_array_href = diff_hash($no_array_href, $unused_href); # Remove "unused" hf_... from no_array list
141 print_list("ERROR: NO ARRAY: $filename, ", $no_array_href);
143 if ((keys %{$no_array_href}) != 0) {
148 exit (($error == 0) ? 0 : 1); # exit 1 if ERROR
151 # ---------------------------------------------------------------------
154 print "Usage: $0 [--debug=n] Filename [...]\n";
158 # ---------------------------------------------------------------------
159 # action: read contents of a file to specified string
160 # arg: filename_ref, file_contents_ref
163 my ($filename_ref, $file_contents_ref) = @_;
165 die "No such file: \"${$filename_ref}\"\n" if (! -e ${$filename_ref});
167 # delete leading './'
168 ${$filename_ref} =~ s{ ^ [.] / } {}xmso;
170 # Read in the file (ouch, but it's easier that way)
171 open(my $fci, "<:crlf", ${$filename_ref}) || die("Couldn't open ${$filename_ref}");
173 ${$file_contents_ref} = do { local( $/ ) ; <$fci> } ;
180 # ---------------------------------------------------------------------
181 # action: Create a hash containing entries in 'a' that are not in 'b'
182 # arg: a_href, b_href
183 # returns: pointer to hash
186 my ($a_href, $b_href) = @_;
190 @diffs{grep {! exists $b_href->{$_}} keys %{$a_href}} = (); # All values in the new hash will be undef
195 # ---------------------------------------------------------------------
196 # action: print a list
197 # arg: hdr, list_href
200 my ($hdr, $list_href) = @_;
211 # action: remove blank lines from input string
212 # arg: code_ref, filename
214 sub remove_blank_lines {
215 my ($code_ref, $filename) = @_;
217 ${$code_ref} =~ s{ ^ \s* \n ? } {}xmsog;
223 # action: remove comments from input string
224 # arg: code_ref, filename
226 sub remove_comments {
227 my ($code_ref, $filename) = @_;
229 # The below Regexp is based on one from:
230 # http://aspn.activestate.com/ASPN/Cookbook/Rx/Recipe/59811
231 # It is in the public domain.
232 # A complicated regex which matches C-style comments.
233 my $c_comment_regex = qr{ / [*] [^*]* [*]+ (?: [^/*] [^*]* [*]+ )* / }xmso;
235 ${$code_ref} =~ s{ $c_comment_regex } {}xmsog;
237 ($debug == 1) && print "==> After Remove Comments: code: [$filename]\n${$code_ref}\n===<\n";
243 # action: remove quoted strings from input string
244 # arg: code_ref, filename
246 sub remove_quoted_strings {
247 my ($code_ref, $filename) = @_;
249 # A regex which matches double-quoted strings.
250 # 's' modifier added so that strings containing a 'line continuation'
251 # ( \ followed by a new-line) will match.
252 my $double_quoted_str = qr{ (?: ["] (?: \\. | [^\"\\])* ["]) }xmso;
254 # A regex which matches single-quoted strings.
255 my $single_quoted_str = qr{ (?: ['] (?: \\. | [^\'\\])* [']) }xmso;
257 ${$code_ref} =~ s{ $double_quoted_str | $single_quoted_str } {}xmsog;
259 ($debug == 1) && print "==> After Remove quoted strings: code: [$filename]\n${$code_ref}\n===<\n";
265 # action: remove '#if 0'd code from the input string
266 # args code_ref, filename
268 # Essentially: Use s//patsub/meg to pass each line to patsub.
269 # patsub monitors #if/#if 0/etc and determines
270 # if a particular code line should be removed.
271 # XXX: This is probably pretty inefficient;
272 # I could imagine using another approach such as converting
273 # the input string to an array of lines and then making
274 # a pass through the array deleting lines as needed.
277 my ($if_lvl, $if0_lvl, $if0); # shared vars
279 sub remove_if0_code {
280 my ($code_ref, $filename) = @_;
282 # First see if any '#if 0' lines which need to be handled
283 if (${$code_ref} !~ m{ \# \s* if \s+ 0 }xmso ) {
287 my ($preproc_regex) = qr{
288 ( # $1 [complete line)
292 (if \s 0| if | else | endif) # $2 (only if #...)
299 ($if_lvl, $if0_lvl, $if0) = (0,0,0);
300 ${$code_ref} =~ s{ $preproc_regex } { patsub($1,$2) }xmsoeg;
302 ($debug == 2) && print "==> After Remove if0: code: [$filename]\n${$code_ref}\n===<\n";
309 (defined $_[1]) && print " >$_[1]<\n";
312 # #if/#if 0/#else/#endif processing
318 elsif ($if eq 'if 0') {
322 $if0 = 1; # inside #if 0
325 elsif ($if eq 'else') {
326 if ($if0_lvl == $if_lvl) {
330 elsif ($if eq 'endif') {
331 if ($if0_lvl == $if_lvl) {
337 die "patsub: #if/#endif mismatch"
340 return $_[0]; # don't remove preprocessor lines themselves
343 # not preprocessor line: See if under #if 0: If so, remove
351 # ---------------------------------------------------------------------
352 # action: Add to hash an entry for each
353 # 'static? g?int hf_...' definition (including array names)
354 # in the input string.
355 # The entry value will be 0 for 'static' definitions and 1 for 'global' definitions;
356 # Remove each definition found from the input string.
357 # args: code_ref, filename, hf_defs_href
358 # returns: ref to the hash
360 sub find_remove_hf_defs {
361 my ($code_ref, $filename, $hf_defs_href) = @_;
363 # Build pattern to match any of the following
364 # static? g?int hf_foo = -1;
365 # static? g?int hf_foo = HF_EMPTY;
366 # static? g?int hf_foo[xxx];
367 # static? g?int hf_foo[xxx] = {
369 # p1: 'static? g?int hf_foo'
376 (hf_[a-zA-Z0-9_]+) # hf_..
379 # p2a: ' = -1;' or ' = HF_EMPTY;'
388 # p2b: '[xxx];' or '[xxx] = {'
396 my $hf_def_regex = qr{ $p1_regex (?: $p2a_regex | $p2b_regex ) }xmso;
398 while (${$code_ref} =~ m{ $hf_def_regex }xmsog) {
399 #print ">%s< >$2<\n", (defined $1) ? $1 ; "";
400 $hf_defs_href->{$2} = (defined $1) ? 0 : 1; # 'static' if $1 is defined.
402 ($debug == 3) && debug_print_hash("VD: $filename", $hf_defs_href); # VariableDefinition
405 ${$code_ref} =~ s{ $hf_def_regex } {}xmsog;
406 ($debug == 3) && print "==> After remove hf_defs: code: [$filename]\n${$code_ref}\n===<\n";
411 # ---------------------------------------------------------------------
412 # action: Add to hash an entry (hf_...) for each hf[] entry.
413 # Remove each hf[] entries found from the input string.
414 # args: code_ref, filename, hf_array_entries_href
416 sub find_remove_hf_array_entries {
417 my ($code_ref, $filename, $hf_array_entries_href) = @_;
419 # hf[] entry regex (to extract an hf_index_name and associated field type)
420 my $hf_array_entry_regex = qr /
423 & \s* ( [a-zA-Z0-9_]+ ) # &hf
425 \s* [[] [^]]+ []] # optional array ref
431 (FT_[a-zA-Z0-9_]+) # field type
436 HFILL | HF_REF_TYPE_NONE
444 # find all the hf[] entries (searching ${$code_ref}).
445 while (${$code_ref} =~ m{ $hf_array_entry_regex }xmsog) {
446 ($debug == 98) && print "+++ $1 $2\n";
447 $hf_array_entries_href->{$1} = undef;
450 ($debug == 4) && debug_print_hash("AE: $filename", $hf_array_entries_href); # ArrayEntry
453 ${$code_ref} =~ s{ $hf_array_entry_regex } {}xmsog;
454 ($debug == 4) && print "==> After remove hf_array_entries: code: [$filename]\n${$code_ref}\n===<\n";
459 # ---------------------------------------------------------------------
460 # action: Add to hash an entry (hf_...) for each hf_... var
461 # found in statements of the form:
462 # 'hf_... = proto_registrar_get_id_byname ...'
463 # 'hf_... = proto_get_id_by_filtername ...'
464 # Remove each such statement found from the input string.
465 # args: code_ref, filename, hf_array_entries_href
467 sub find_remove_proto_get_id_hf_assignments {
468 my ($code_ref, $filename, $hf_array_entries_href) = @_;
470 my $_regex = qr{ ( hf_ [a-zA-Z0-9_]+ )
472 (?: proto_registrar_get_id_byname | proto_get_id_by_filter_name )
475 my @hfvars = ${$code_ref} =~ m{ $_regex }xmsog;
482 # Sanity check: hf_vars shouldn't already be in hf_array_entries
483 if (defined @$hf_array_entries_href{@hfvars}) {
484 printf "? one or more of [@hfvars] initialized via proto_registrar_get_by_name() also in hf[] ??\n";
487 # Now: add to hf_array_entries
488 @$hf_array_entries_href{@hfvars} = ();
490 ($debug == 4) && debug_print_hash("PR: $filename", $hf_array_entries_href);
492 # remove from input (so not considered as 'usage')
493 ${$code_ref} =~ s{ $_regex } {}xmsog;
495 ($debug == 4) && print "==> After remove proto_registrar_by_name: code: [$filename]\n${$code_ref}\n===<\n";
500 # ---------------------------------------------------------------------
501 # action: Add to hash all hf_... strings remaining in input string.
502 # arga: code_ref, filename, hf_usage_href
503 # return: ref to hf_usage hash
505 # The hash will include *all* strings of form hf_...
506 # which are in the input string (even strings which
507 # aren't actually vars).
508 # We don't care since we'll be checking only
509 # known valid vars against these strings.
512 my ($code_ref, $filename, $hf_usage_href) = @_;
514 my $hf_usage_regex = qr{
515 \b ( hf_[a-zA-Z0-9_]+ ) # hf_...
518 while (${$code_ref} =~ m{ $hf_usage_regex }xmsog) {
520 $hf_usage_href->{$1} += 1;
523 ($debug == 5) && debug_print_hash("VU: $filename", $hf_usage_href); # VariableUsage
528 # ---------------------------------------------------------------------
529 # action: Remove from 'unused' hash an instance of a variable named hf_..._pid
530 # if the source has a call to llc_add_oui() or ieee802a_add_oui().
531 # (This is rather a bit of a hack).
532 # arga: code_ref, filename, unused_href
534 sub remove_hf_pid_from_unused_if_add_oui_call {
535 my ($code_ref, $filename, $unused_href) = @_;
537 if ((keys %{$unused_href}) == 0) {
541 my @hfvars = grep { m/ ^ hf_ [a-zA-Z0-9_]+ _pid $ /xmso} keys %{$unused_href};
543 if ((@hfvars == 0) || (@hfvars > 1)) {
544 return; # if multiple unused hf_..._pid
547 if (${$code_ref} !~ m{ llc_add_oui | ieee802a_add_oui }xmso) {
551 # hf_...pid unused var && a call to ..._add_oui(); delete entry from unused
552 # XXX: maybe hf_..._pid should really be added to hfUsed ?
553 delete @$unused_href{@hfvars};
558 # ---------------------------------------------------------------------
559 sub debug_print_hash {
560 my ($title, $href) = @_;
562 ##print "==> $title\n";
563 for my $k (sort keys %{$href}) {
564 my $h = defined($href->{$k}) ? $href->{$k} : "undef";
565 printf "%-40.40s %5.5s %s\n", $title, $h, $k;