3 # Copyright 2013, William Meier (See AUTHORS file)
5 # Validate hf_... and ei_... usage for a dissector file;
7 # Usage: checkhf.pl [--debug=?] <file or files>
9 # Wireshark - Network traffic analyzer
10 # By Gerald Combs <gerald@wireshark.org>
11 # Copyright 1998 Gerald Combs
13 # This program is free software; you can redistribute it and/or
14 # modify it under the terms of the GNU General Public License
15 # as published by the Free Software Foundation; either version 2
16 # of the License, or (at your option) any later version.
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 # GNU General Public License for more details.
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 ## Note: This program is a re-implementation of the
29 ## original checkhf.pl written and (C) by Joerg Mayer.
30 ## The overall objective of the new implementation was to reduce
31 ## the number of false positives which occurred with the
32 ## original checkhf.pl
34 ## This program can be used to scan original .c source files or source
35 ## files which have been passed through a C pre-processor.
36 ## Operating on pre-processed source files is optimal; There should be
37 ## minimal false positives.
38 ## If the .c input is an original source file there may very well be
39 ## false positives/negatives due to the fact that the hf_... variables & etc
40 ## may be created via macros.
42 ## ----- (The following is extracted from the original checkhf.pl with thanks to Joerg) -------
44 ## ~/work/wireshark/trunk/epan/dissectors> ../../tools/checkhf.pl packet-afs.c
45 ## Unused entry: packet-afs.c, hf_afs_ubik_voteend
46 ## Unused entry: packet-afs.c, hf_afs_ubik_errcode
47 ## Unused entry: packet-afs.c, hf_afs_ubik_votetype
48 ## ERROR: NO ARRAY: packet-afs.c, hf_afs_fs_ipaddr
50 ## or checkhf.pl packet-*.c, which will check all the dissector files.
52 ## NOTE: This tool currently generates false positives!
54 ## The "NO ARRAY" messages - if accurate - points to an error that will
55 ## cause (t|wire)shark to report a DISSECTOR_BUG when a packet containing
56 ## this particular element is being dissected.
58 ## The "Unused entry" message indicates the opposite: We define an entry but
59 ## never use it (e.g., in a proto_...add... function).
60 ## ------------------------------------------------------------------------------------
62 # ------------------------------------------------------------------------------------
66 # 1. Clean the input: remove blank lines, comments, quoted strings and code under '#if 0'.
68 # Find (and remove from input) list of hf_... variable
69 # definitions ('static? g?int hf_... ;')
70 # 2. hf_array_entries:
71 # Find (and remove from input) list of hf_... variables
72 # referenced in the hf[] entries;
74 # From the remaining input, extract list of all strings of form hf_...
75 # (which may include strings which are not actually valid
76 # hf_... variable references).
78 # If entries in hf_defs not in hf_usage then "unused" (for static hf_defs only)
79 # If entries in hf_defs not in hf_array_entries then "ERROR: NO ARRAY";
87 my $debug = 0; # default: off; 1=cmt; 2=#if0; 3=hf_defs; 4=hf_array_entries; 5=hfusage (See code)
91 'help|?' => \$help_flag
93 if (!$sts || $help_flag || !$ARGV[0]) {
99 while (my $filename = $ARGV[0]) {
103 my (%hf_defs, %hf_static_defs, %hf_array_entries, %hf_usage);
104 my ($unused_href, $no_array_href);
105 my (%ei_defs, %ei_static_defs, %ei_array_entries, %ei_usage);
106 my ($unused_ei, $no_array_ei);
108 read_file(\$filename, \$file_contents);
110 remove_comments (\$file_contents, $filename);
111 remove_blank_lines (\$file_contents, $filename);
112 remove_quoted_strings(\$file_contents, $filename);
113 remove_if0_code (\$file_contents, $filename);
115 find_remove_hf_defs (\$file_contents, $filename, \%hf_defs);
116 find_remove_hf_array_entries (\$file_contents, $filename, \%hf_array_entries);
117 find_remove_proto_get_id_hf_assignments(\$file_contents, $filename, \%hf_array_entries);
118 find_hf_usage (\$file_contents, $filename, \%hf_usage);
120 find_remove_ei_defs (\$file_contents, $filename, \%ei_defs);
121 find_remove_ei_array_entries (\$file_contents, $filename, \%ei_array_entries);
122 find_ei_usage (\$file_contents, $filename, \%ei_usage);
125 # 1. Are all the static hf_defs and ei_defs entries in hf_usage and ei_usage?
126 # if not: "Unused entry:"
129 # create a hash containing entries just for the static definitions
130 @hf_static_defs{grep {$hf_defs{$_} == 0} keys %hf_defs} = (); # All values in the new hash will be undef
131 @ei_static_defs{grep {$ei_defs{$_} == 0} keys %ei_defs} = (); # All values in the new hash will be undef
133 $unused_href = diff_hash(\%hf_static_defs, \%hf_usage);
134 remove_hf_pid_from_unused_if_add_oui_call(\$file_contents, $filename, $unused_href);
136 $unused_ei = diff_hash(\%ei_static_defs, \%ei_usage);
138 print_list("Unused href entry: $filename: ", $unused_href);
139 print_list("Unused ei entry: $filename: ", $unused_ei);
141 # 2. Are all the hf_defs and ei_ entries (static and global) in [hf|ei]_array_entries ?
142 # (Note: if a static hf_def or ei is "unused", don't check for same in [hf|ei]_array_entries)
143 # if not: "ERROR: NO ARRAY"
145 ## Checking for missing global defs currently gives false positives
146 ## So: only check static defs for now.
147 ## $no_array_href = diff_hash(\%hf_defs, \%hf_array_entries);
148 $no_array_href = diff_hash(\%hf_static_defs, \%hf_array_entries);
149 $no_array_href = diff_hash($no_array_href, $unused_href); # Remove "unused" hf_... from no_array list
150 $no_array_ei = diff_hash(\%ei_static_defs, \%ei_array_entries);
151 $no_array_ei = diff_hash($no_array_ei, $unused_ei); # Remove "unused" ei_... from no_array list
153 print_list("ERROR: NO ARRAY: $filename: ", $no_array_href);
154 print_list("ERROR: NO ARRAY: $filename: ", $no_array_ei);
156 if ((keys %{$no_array_href}) != 0) {
159 if ((keys %{$no_array_ei}) != 0) {
164 exit (($error == 0) ? 0 : 1); # exit 1 if ERROR
167 # ---------------------------------------------------------------------
170 print "Usage: $0 [--debug=n] Filename [...]\n";
174 # ---------------------------------------------------------------------
175 # action: read contents of a file to specified string
176 # arg: filename_ref, file_contents_ref
179 my ($filename_ref, $file_contents_ref) = @_;
181 die "No such file: \"${$filename_ref}\"\n" if (! -e ${$filename_ref});
183 # delete leading './'
184 ${$filename_ref} =~ s{ ^ [.] / } {}xmso;
186 # Read in the file (ouch, but it's easier that way)
187 open(my $fci, "<:crlf", ${$filename_ref}) || die("Couldn't open ${$filename_ref}");
189 ${$file_contents_ref} = do { local( $/ ) ; <$fci> } ;
196 # ---------------------------------------------------------------------
197 # action: Create a hash containing entries in 'a' that are not in 'b'
198 # arg: a_href, b_href
199 # returns: pointer to hash
202 my ($a_href, $b_href) = @_;
206 @diffs{grep {! exists $b_href->{$_}} keys %{$a_href}} = (); # All values in the new hash will be undef
211 # ---------------------------------------------------------------------
212 # action: print a list
213 # arg: hdr, list_href
216 my ($hdr, $list_href) = @_;
227 # action: remove blank lines from input string
228 # arg: code_ref, filename
230 sub remove_blank_lines {
231 my ($code_ref, $filename) = @_;
233 ${$code_ref} =~ s{ ^ \s* \n ? } {}xmsog;
239 # action: remove comments from input string
240 # arg: code_ref, filename
242 sub remove_comments {
243 my ($code_ref, $filename) = @_;
245 # The below Regexp is based on one from:
246 # http://aspn.activestate.com/ASPN/Cookbook/Rx/Recipe/59811
247 # It is in the public domain.
248 # A complicated regex which matches C-style comments.
249 my $c_comment_regex = qr{ / [*] [^*]* [*]+ (?: [^/*] [^*]* [*]+ )* / }xmso;
251 ${$code_ref} =~ s{ $c_comment_regex } {}xmsog;
253 ($debug == 1) && print "==> After Remove Comments: code: [$filename]\n${$code_ref}\n===<\n";
259 # action: remove quoted strings from input string
260 # arg: code_ref, filename
262 sub remove_quoted_strings {
263 my ($code_ref, $filename) = @_;
265 # A regex which matches double-quoted strings.
266 # 's' modifier added so that strings containing a 'line continuation'
267 # ( \ followed by a new-line) will match.
268 my $double_quoted_str = qr{ (?: ["] (?: \\. | [^\"\\])* ["]) }xmso;
270 # A regex which matches single-quoted strings.
271 my $single_quoted_str = qr{ (?: ['] (?: \\. | [^\'\\])* [']) }xmso;
273 ${$code_ref} =~ s{ $double_quoted_str | $single_quoted_str } {}xmsog;
275 ($debug == 1) && print "==> After Remove quoted strings: code: [$filename]\n${$code_ref}\n===<\n";
281 # action: remove '#if 0'd code from the input string
282 # args code_ref, filename
284 # Essentially: Use s//patsub/meg to pass each line to patsub.
285 # patsub monitors #if/#if 0/etc and determines
286 # if a particular code line should be removed.
287 # XXX: This is probably pretty inefficient;
288 # I could imagine using another approach such as converting
289 # the input string to an array of lines and then making
290 # a pass through the array deleting lines as needed.
293 my ($if_lvl, $if0_lvl, $if0); # shared vars
295 sub remove_if0_code {
296 my ($code_ref, $filename) = @_;
298 # First see if any '#if 0' lines which need to be handled
299 if (${$code_ref} !~ m{ \# \s* if \s+ 0 }xmso ) {
303 my ($preproc_regex) = qr{
304 ( # $1 [complete line)
308 (if \s 0| if | else | endif) # $2 (only if #...)
315 ($if_lvl, $if0_lvl, $if0) = (0,0,0);
316 ${$code_ref} =~ s{ $preproc_regex } { patsub($1,$2) }xmsoeg;
318 ($debug == 2) && print "==> After Remove if0: code: [$filename]\n${$code_ref}\n===<\n";
325 (defined $_[1]) && print " >$_[1]<\n";
328 # #if/#if 0/#else/#endif processing
334 elsif ($if eq 'if 0') {
338 $if0 = 1; # inside #if 0
341 elsif ($if eq 'else') {
342 if ($if0_lvl == $if_lvl) {
346 elsif ($if eq 'endif') {
347 if ($if0_lvl == $if_lvl) {
353 die "patsub: #if/#endif mismatch"
356 return $_[0]; # don't remove preprocessor lines themselves
359 # not preprocessor line: See if under #if 0: If so, remove
367 # ---------------------------------------------------------------------
368 # action: Add to hash an entry for each
369 # 'static? g?int hf_...' definition (including array names)
370 # in the input string.
371 # The entry value will be 0 for 'static' definitions and 1 for 'global' definitions;
372 # Remove each definition found from the input string.
373 # args: code_ref, filename, hf_defs_href
374 # returns: ref to the hash
376 sub find_remove_hf_defs {
377 my ($code_ref, $filename, $hf_defs_href) = @_;
379 # Build pattern to match any of the following
380 # static? g?int hf_foo = -1;
381 # static? g?int hf_foo[xxx];
382 # static? g?int hf_foo[xxx] = {
384 # p1: 'static? g?int hf_foo'
391 (hf_[a-zA-Z0-9_]+) # hf_..
403 # p2b: '[xxx];' or '[xxx] = {'
411 my $hf_def_regex = qr{ $p1_regex (?: $p2a_regex | $p2b_regex ) }xmso;
413 while (${$code_ref} =~ m{ $hf_def_regex }xmsog) {
414 #print ">%s< >$2<\n", (defined $1) ? $1 ; "";
415 $hf_defs_href->{$2} = (defined $1) ? 0 : 1; # 'static' if $1 is defined.
417 ($debug == 3) && debug_print_hash("VD: $filename", $hf_defs_href); # VariableDefinition
420 ${$code_ref} =~ s{ $hf_def_regex } {}xmsog;
421 ($debug == 3) && print "==> After remove hf_defs: code: [$filename]\n${$code_ref}\n===<\n";
426 # ---------------------------------------------------------------------
427 # action: Add to hash an entry (hf_...) for each hf[] entry.
428 # Remove each hf[] entries found from the input string.
429 # args: code_ref, filename, hf_array_entries_href
431 sub find_remove_hf_array_entries {
432 my ($code_ref, $filename, $hf_array_entries_href) = @_;
434 # hf[] entry regex (to extract an hf_index_name and associated field type)
435 my $hf_array_entry_regex = qr /
438 & \s* ( [a-zA-Z0-9_]+ ) # &hf
440 \s* [[] [^]]+ []] # optional array ref
446 (FT_[a-zA-Z0-9_]+) # field type
451 HFILL | HF_REF_TYPE_NONE
459 # find all the hf[] entries (searching ${$code_ref}).
460 while (${$code_ref} =~ m{ $hf_array_entry_regex }xmsog) {
461 ($debug == 98) && print "+++ $1 $2\n";
462 $hf_array_entries_href->{$1} = undef;
465 ($debug == 4) && debug_print_hash("AE: $filename", $hf_array_entries_href); # ArrayEntry
468 ${$code_ref} =~ s{ $hf_array_entry_regex } {}xmsog;
469 ($debug == 4) && print "==> After remove hf_array_entries: code: [$filename]\n${$code_ref}\n===<\n";
474 # ---------------------------------------------------------------------
475 # action: Add to hash an entry (hf_...) for each hf_... var
476 # found in statements of the form:
477 # 'hf_... = proto_registrar_get_id_byname ...'
478 # 'hf_... = proto_get_id_by_filtername ...'
479 # Remove each such statement found from the input string.
480 # args: code_ref, filename, hf_array_entries_href
482 sub find_remove_proto_get_id_hf_assignments {
483 my ($code_ref, $filename, $hf_array_entries_href) = @_;
485 my $_regex = qr{ ( hf_ [a-zA-Z0-9_]+ )
487 (?: proto_registrar_get_id_byname | proto_get_id_by_filter_name )
490 my @hfvars = ${$code_ref} =~ m{ $_regex }xmsog;
497 # Sanity check: hf_vars shouldn't already be in hf_array_entries
498 if (defined @$hf_array_entries_href{@hfvars}) {
499 printf "? one or more of [@hfvars] initialized via proto_registrar_get_by_name() also in hf[] ??\n";
502 # Now: add to hf_array_entries
503 @$hf_array_entries_href{@hfvars} = ();
505 ($debug == 4) && debug_print_hash("PR: $filename", $hf_array_entries_href);
507 # remove from input (so not considered as 'usage')
508 ${$code_ref} =~ s{ $_regex } {}xmsog;
510 ($debug == 4) && print "==> After remove proto_registrar_by_name: code: [$filename]\n${$code_ref}\n===<\n";
515 # ---------------------------------------------------------------------
516 # action: Add to hash all hf_... strings remaining in input string.
517 # arga: code_ref, filename, hf_usage_href
518 # return: ref to hf_usage hash
520 # The hash will include *all* strings of form hf_...
521 # which are in the input string (even strings which
522 # aren't actually vars).
523 # We don't care since we'll be checking only
524 # known valid vars against these strings.
527 my ($code_ref, $filename, $hf_usage_href) = @_;
529 my $hf_usage_regex = qr{
530 \b ( hf_[a-zA-Z0-9_]+ ) # hf_...
533 while (${$code_ref} =~ m{ $hf_usage_regex }xmsog) {
535 $hf_usage_href->{$1} += 1;
538 ($debug == 5) && debug_print_hash("VU: $filename", $hf_usage_href); # VariableUsage
543 # ---------------------------------------------------------------------
544 # action: Remove from 'unused' hash an instance of a variable named hf_..._pid
545 # if the source has a call to llc_add_oui() or ieee802a_add_oui().
546 # (This is rather a bit of a hack).
547 # arga: code_ref, filename, unused_href
549 sub remove_hf_pid_from_unused_if_add_oui_call {
550 my ($code_ref, $filename, $unused_href) = @_;
552 if ((keys %{$unused_href}) == 0) {
556 my @hfvars = grep { m/ ^ hf_ [a-zA-Z0-9_]+ _pid $ /xmso} keys %{$unused_href};
558 if ((@hfvars == 0) || (@hfvars > 1)) {
559 return; # if multiple unused hf_..._pid
562 if (${$code_ref} !~ m{ llc_add_oui | ieee802a_add_oui }xmso) {
566 # hf_...pid unused var && a call to ..._add_oui(); delete entry from unused
567 # XXX: maybe hf_..._pid should really be added to hfUsed ?
568 delete @$unused_href{@hfvars};
573 # ---------------------------------------------------------------------
574 # action: Add to hash an entry for each
575 # 'static? expert_field ei_...' definition (including array names)
576 # in the input string.
577 # The entry value will be 0 for 'static' definitions and 1 for 'global' definitions;
578 # Remove each definition found from the input string.
579 # args: code_ref, filename, hf_defs_href
580 # returns: ref to the hash
582 sub find_remove_ei_defs {
583 my ($code_ref, $filename, $ei_defs_eiref) = @_;
585 # Build pattern to match any of the following
586 # static? expert_field ei_foo = -1;
587 # static? expert_field ei_foo[xxx];
588 # static? expert_field ei_foo[xxx] = {
590 # p1: 'static? expert_field ei_foo'
597 (ei_[a-zA-Z0-9_]+) # ei_..
609 # p2b: '[xxx];' or '[xxx] = {'
617 my $ei_def_regex = qr{ $p1_regex (?: $p2a_regex | $p2b_regex ) }xmso;
619 while (${$code_ref} =~ m{ $ei_def_regex }xmsog) {
620 #print ">%s< >$2<\n", (defined $1) ? $1 ; "";
621 $ei_defs_eiref->{$2} = (defined $1) ? 0 : 1; # 'static' if $1 is defined.
623 ($debug == 3) && debug_print_hash("VD: $filename", $ei_defs_eiref); # VariableDefinition
626 ${$code_ref} =~ s{ $ei_def_regex } {}xmsog;
627 ($debug == 3) && print "==> After remove ei_defs: code: [$filename]\n${$code_ref}\n===<\n";
632 # ---------------------------------------------------------------------
633 # action: Add to hash an entry (ei_...) for each ei[] entry.
634 # Remove each ei[] entries found from the input string.
635 # args: code_ref, filename, ei_array_entries_href
637 sub find_remove_ei_array_entries {
638 my ($code_ref, $filename, $ei_array_entries_eiref) = @_;
640 # ei[] entry regex (to extract an ei_index_name and associated field type)
641 my $ei_array_entry_regex = qr /
644 & \s* ( [a-zA-Z0-9_]+ ) # &ei
646 \s* [ [^]]+ ] # optional array ref
650 # \s* "[^"]+" # (filter string has been removed already)
652 PI_[A-Z0-9_]+ # event group
654 PI_[A-Z0-9_]+ # event severity
656 [^,]* # description string (already removed) or NULL
665 # find all the ei[] entries (searching ${$code_ref}).
666 while (${$code_ref} =~ m{ $ei_array_entry_regex }xsg) {
667 ($debug == 98) && print "+++ $1\n";
668 $ei_array_entries_eiref->{$1} = undef;
671 ($debug == 4) && debug_print_hash("AE: $filename", $ei_array_entries_eiref); # ArrayEntry
674 ${$code_ref} =~ s{ $ei_array_entry_regex } {}xmsog;
675 ($debug == 4) && print "==> After remove ei_array_entries: code: [$filename]\n${$code_ref}\n===<\n";
680 # ---------------------------------------------------------------------
681 # action: Add to hash all ei_... strings remaining in input string.
682 # arga: code_ref, filename, ei_usage_eiref
683 # return: ref to ei_usage hash
685 # The hash will include *all* strings of form ei_...
686 # which are in the input string (even strings which
687 # aren't actually vars).
688 # We don't care since we'll be checking only
689 # known valid vars against these strings.
692 my ($code_ref, $filename, $ei_usage_eiref) = @_;
694 my $ei_usage_regex = qr{
695 \b ( ei_[a-zA-Z0-9_]+ ) # ei_...
698 while (${$code_ref} =~ m{ $ei_usage_regex }xmsog) {
700 $ei_usage_eiref->{$1} += 1;
703 ($debug == 5) && debug_print_hash("VU: $filename", $ei_usage_eiref); # VariableUsage
708 # ---------------------------------------------------------------------
709 sub debug_print_hash {
710 my ($title, $href) = @_;
712 ##print "==> $title\n";
713 for my $k (sort keys %{$href}) {
714 my $h = defined($href->{$k}) ? $href->{$k} : "undef";
715 printf "%-40.40s %5.5s %s\n", $title, $h, $k;