Add support for determining BASE_EXT_STRING.
[metze/wireshark/wip.git] / tools / checkfiltername.pl
1 #!/usr/bin/perl
2
3 my $debug = 0;
4 # 0: off
5 # 1: specific debug
6 # 2: full debug
7
8 #
9 # verify that display filter names correspond with the PROTABBREV of 
10 # of the dissector.  Enforces the dissector to have a source 
11 # filename of format packet-PROTABBREV.c
12 #
13 # Usage: checkfiltername.pl <file or files>
14
15 # $Id$
16
17 #
18 # Copyright 2011 Michael Mann (see AUTHORS file)
19 #
20 # Wireshark - Network traffic analyzer
21 # By Gerald Combs <gerald@wireshark.org>
22 # Copyright 1998 Gerald Combs
23 #
24 # This program is free software; you can redistribute it and/or
25 # modify it under the terms of the GNU General Public License
26 # as published by the Free Software Foundation; either version 2
27 # of the License, or (at your option) any later version.
28 #
29 # This program is distributed in the hope that it will be useful,
30 # but WITHOUT ANY WARRANTY; without even the implied warranty of
31 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
32 # GNU General Public License for more details.
33 #
34 # You should have received a copy of the GNU General Public License
35 # along with this program; if not, write to the Free Software
36 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
37
38 #
39 # Example:
40 # ~/work/wireshark/trunk/epan/dissectors> ../../tools/checkfiltername.pl packet-3com-xns.c
41 # packet-3com-xns.c (2 (of 2) fields)
42 # 102 3comxns.type doesn't match PROTOABBREV of 3com-xns
43 # 106 3comxns.type doesn't match PROTOABBREV of 3com-xns
44 #
45 # or checkfiltername.pl packet-*.c, which will check all the dissector files.
46 #
47 #
48
49 use warnings;
50 use strict;
51 use Getopt::Long;
52
53 my @elements;
54 my @elements_dup;
55 my @protocols;
56 my %filters;
57 my %expert_filters;
58 my @acceptedprefixes = ("dcerpc-");
59 my @asn1automatedfilelist;
60 my @dcerpcautomatedfilelist;
61 my @idl2wrsautomatedfilelist;
62 my @filemanipulationfilelist;
63 my @prefixfilelist;
64 my @nofieldfilelist;
65 my %unique;
66 my @uniquefilelist;
67 my @noregprotocolfilelist;
68 my @periodinfilternamefilelist;
69
70 my $showlinenoFlag = '';
71 my $showautomatedFlag = '';
72
73 my $state;
74 # "s_unknown",
75 # "s_start",
76 # "s_in_hf_register_info",
77 # "s_hf_register_info_entry",
78 # "s_header_field_info_entry",
79 # "s_header_field_info_entry_start",
80 # "s_header_field_info_entry_name",
81 # "s_header_field_info_entry_abbrev",
82 # "s_header_field_info_entry_abbrev_end",
83 # "s_start_expert",
84 # "s_in_ei_register_info",
85 # "s_ei_register_info_entry",
86 # "s_ei_register_info_entry_start",
87 # "s_ei_register_info_entry_abbrev_end",
88 # "s_nofields"
89
90 my $restofline;
91 my $filecount = 0;
92 my $currfile = "";
93 my $protabbrev = "";
94 my $protabbrev_index;
95 my $PFNAME_value = "";
96 my $linenumber = 1;
97 my $totalerrorcount = 0;
98 my $errorfilecount = 0;
99 my $onefield = 0;
100 my $nofields = 0;
101 my $noperiod = 0;
102 my $noregprotocol = 1;
103 my $automated = 0;
104 my $more_tokens;
105 my $showall = 0;
106
107 my $comment = 0;
108
109 my $error = 0;
110
111 sub checkprotoabbrev {
112         my $abbrev = "";
113         my $abbrevpos;
114         my $proto_abbrevpos1;
115         my $proto_abbrevpos2;
116         my $afterabbrev = "";
117         my $modprotabbrev = "";
118         my $errorline = 0;
119         my $prefix;
120         
121         if (($automated == 0) || ($showall == 1)) {
122                 $abbrevpos = index($_[0], ".");
123                 if ($abbrevpos == -1) {
124                         $abbrev = $_[0];
125                 }
126                 else {
127                         $abbrev = substr($_[0], 0, $abbrevpos);
128                         $afterabbrev = substr($_[0], $abbrevpos+1, length($_[0])-$abbrevpos);
129                         $afterabbrev = substr($afterabbrev, 0, length($abbrev));
130                 }
131
132                 if ($abbrev ne $protabbrev) {
133                         $errorline = 1;
134                                                 
135                         #check if there is a supported protocol that matches the abbrev.
136                         #This may be a case of filename != PROTOABBREV
137                         foreach (@protocols) {
138                                 if ($abbrev eq $_) {
139                                         $errorline = 0;
140                                 } elsif (index($_, ".") != -1) {
141                                 
142                                         #compare from start of string for each period found
143                                         $proto_abbrevpos1 = 0;
144                                         while ((($proto_abbrevpos2 = index($_, ".", $proto_abbrevpos1)) != -1) &&
145                                                         ($errorline == 1)) {
146                                                 if ($abbrev eq substr($_, 0, $proto_abbrevpos2)) {
147                                                         $errorline = 0;
148                                                 }
149
150                                                 $proto_abbrevpos1 = $proto_abbrevpos2+1;
151                                         }
152                                 }
153                         }                       
154                 }
155
156                 # find any underscores that preface or follow a period
157                 if ((index($_[0], "._") >= 0) || (index($_[0], "_.") >= 0)) {
158                         if ($showlinenoFlag) {
159                                 push(@elements, "$_[1] $_[0] contains an unnecessary \'_\'\n");
160                         } else {
161                                 push(@elements, "$_[0] contains an unnecessary \'_\'\n");
162                         }
163                 }
164                 
165                 if (($errorline == 1) && ($showall == 0)) {
166                         #try some "accepted" variations of PROTOABBREV
167                         
168                         #replace '-' with '_'
169                         $modprotabbrev = $protabbrev;
170                         $modprotabbrev =~ s/-/_/g;
171                         if ($abbrev eq $modprotabbrev) {
172                                 $errorline = 0;
173                         }
174
175                         #remove '-'             
176                         if ($errorline == 1) {
177                                 $modprotabbrev = $protabbrev;
178                                 $modprotabbrev =~ s/-//g;
179                                 if ($abbrev eq $modprotabbrev) {
180                                         $errorline = 0;
181                                 }
182                         }
183
184                         #remove '_'             
185                         if ($errorline == 1) {
186                                 $modprotabbrev = $protabbrev;
187                                 $modprotabbrev =~ s/_//g;
188                                 if ($abbrev eq $modprotabbrev) {
189                                         $errorline = 0;
190                                 }
191                         }
192                         
193                         if ($errorline == 1) {
194                                 #remove any "accepted" prefix to see if there is still a problem
195                                 foreach (@acceptedprefixes) {
196                                         if ($protabbrev =~ /^$_/) {
197                                                 $modprotabbrev = substr($protabbrev, length($_));
198                                                 if ($abbrev eq $modprotabbrev) {
199                                                         push(@prefixfilelist, "$currfile\n");
200                                                         $errorline = 0;
201                                                 }
202                                         }
203                                 }
204                         } else {
205                                 push(@filemanipulationfilelist, "$currfile\n");
206                         }
207                 }
208                 
209                 if ($errorline == 1) {
210                         $debug>1 && print "$_[1] $_[0] doesn't match PROTOABBREV of $protabbrev\n";
211                         if ($showlinenoFlag) {
212                                 push(@elements, "$_[1] $_[0] doesn't match PROTOABBREV of $protabbrev\n");
213                         } else {
214                                 push(@elements, "$_[0] doesn't match PROTOABBREV of $protabbrev\n");
215                         }
216                 }
217                 
218                 if (($abbrev ne "") && (lc($abbrev) eq lc($afterabbrev))) {
219                         if ($showlinenoFlag) {
220                                 push(@elements_dup, "$_[1] $_[0] duplicates PROTOABBREV of $abbrev\n");
221                         } else {
222                                 push(@elements_dup, "$_[0] duplicates PROTOABBREV of $abbrev\n");
223                         }
224                 }
225         }       
226 }
227
228 sub printprevfile {
229         my $totalfields = keys(%filters);
230         my $count_ele;
231         my $count_dup;
232         my $total_count;
233
234         foreach (sort keys %filters) {
235                 checkprotoabbrev ($filters{$_}, $_);
236         }
237
238         foreach (sort keys %expert_filters) {
239                 checkprotoabbrev ($expert_filters{$_}, $_);
240         }
241         
242         $count_ele = @elements;
243         $count_dup = @elements_dup;
244         $total_count = $count_ele+$count_dup;
245
246         if ($noregprotocol == 1) {
247                 #if no protocol is registered, only worry about duplicates
248                 if ($currfile ne "") {
249                         push(@noregprotocolfilelist, "$currfile\n");
250                 }
251                 
252                 if ($count_dup > 0) {
253                         $errorfilecount++;
254                         $totalerrorcount += $count_dup;
255                 }
256                         
257                 if (($showall == 1) || ($count_dup > 0))  {
258                         print "\n\n$currfile  - NO PROTOCOL REGISTERED\n";
259                         if ($showall == 1) {
260                                 #everything is included, so count all errors
261                                 $totalerrorcount += $count_ele;
262                                 if (($count_ele > 0) && ($count_dup == 0)) {
263                                         $errorfilecount++;
264                                 }
265                         
266                                 foreach (@elements) {
267                                         print $_;
268                                 }
269                         }
270                         foreach (@elements_dup) {
271                                 print $_;
272                         }
273                 }       
274         } else {
275                 if ($total_count > 0) {
276                         $errorfilecount++;
277                         $totalerrorcount += $total_count;
278                 }
279         
280                 if (($automated == 0) || ($showall == 1)) {
281                         if ($total_count > 0) {
282                                 if ($automated == 1) {
283                                         if ($showall == 1) {
284                                                 print "\n\n$currfile - AUTOMATED ($total_count (of $totalfields) fields)\n";
285                                         }
286                                 } else {
287                                         print "\n\n$currfile ($total_count (of $totalfields) fields)\n";
288                                 }
289                                 
290                                 foreach (@elements) {
291                                         print $_;
292                                 }
293                                 foreach (@elements_dup) {
294                                         print $_;
295                                 }
296                         }
297                         
298                         if ((($nofields) || ($totalfields == 0)) && ($currfile ne "")) {
299                                 if ($showall == 1) {
300                                         print "\n\n$currfile  - NO FIELDS\n";
301                                 }
302                                 push(@nofieldfilelist, "$currfile\n");
303                         }
304                 }
305         }       
306 }
307
308 # ---------------------------------------------------------------------
309 #
310 # MAIN
311 #
312 GetOptions(
313                    'showlineno'    => \$showlinenoFlag,
314                    'showautomated' => \$showautomatedFlag,
315                   );
316
317 while (<>) {
318         if ($currfile !~ /$ARGV/) {
319                 &printprevfile();
320
321                 # New file - reset array and state
322                 $filecount++;
323                 $currfile = $ARGV;
324                 
325                 #determine PROTABBREV for dissector based on file name format of (dirs)/packet-PROTABBREV.c
326                 $protabbrev_index = rindex($currfile, "packet-");
327                 if ($protabbrev_index == -1) {
328                         print "$currfile doesn't fit format of packet-PROTABBREV.c\n";
329                         next;
330                 }
331                                 
332                 $protabbrev = substr($currfile, $protabbrev_index+length("packet-"));
333                 $protabbrev_index = rindex($protabbrev, ".");
334                 if ($protabbrev_index == -1) {
335                         print "$currfile doesn't fit format of packet-PROTABBREV.c\n";
336                         next;
337                 }
338                 $protabbrev = substr($protabbrev, 0, $protabbrev_index);                
339
340                 $PFNAME_value = "";
341                 $noregprotocol = 1;
342                 $automated = 0;
343                 $nofields = 0;
344                 $onefield = 0;
345                 $noperiod = 0;
346                 $linenumber = 1;
347                 %filters = ( );
348                 %expert_filters = ( );
349                 @protocols = ( );
350                 @elements = ( );
351                 @elements_dup = ( );
352                 $state = "s_unknown";
353         }
354         
355         if (($automated == 0) && ($showautomatedFlag eq "")) {
356                 #DCERPC automated files
357                 if ($_ =~ "DO NOT EDIT") {
358                         push(@dcerpcautomatedfilelist, "$currfile\n");
359                         $automated = 1;
360                         next;
361                 }
362                 #ASN.1 automated files 
363                 elsif ($_ =~ "It is created automatically by the ASN.1 to Wireshark dissector compiler") {
364                         push(@asn1automatedfilelist, "$currfile\n");
365                         $automated = 1;
366                         next;
367                 }
368                 #idl2wrs automated files
369                 elsif ($_ =~ "Autogenerated from idl2wrs") {
370                         push(@idl2wrsautomatedfilelist, "$currfile\n");
371                         $automated = 1;
372                         next;
373                 }
374         }
375                 
376         # opening then closing comment
377         if (/(.*?)\/\*.*\*\/(.*)/) {
378                 $comment = 0;
379                 $_ = "$1$2";
380         # closing then opening comment
381         } elsif (/.*?\*\/(.*?)\/\*/) {
382                 $comment = 1;
383                 $_ = "$1";
384         # opening comment
385         } elsif (/(.*?)\/\*/) {
386                 $comment = 1;
387                 $_ = "$1";
388         # closing comment
389         } elsif (/\*\/(.*?)/) {
390                 $comment = 0;
391                 $_ = "$1";
392         } elsif ($comment == 1) {
393                 $linenumber++;
394                 next;
395         }
396         # unhandled: more than one complete comment per line
397         
398         chomp;
399
400         #proto_register_protocol state machine
401         $restofline = $_;
402         $more_tokens = 1;
403         
404         #PFNAME is a popular #define for the proto filter name, so use it for testing   
405         if ($restofline =~ /#define\s*PFNAME\s*\"([^\"]*)\"/) {
406                 $PFNAME_value = $1;
407                 $debug>1 && print "PFNAME: '$1'\n";
408         }
409         
410         until ($more_tokens == 0) {
411                 if ($restofline =~ /proto_register_protocol\s*\((.*)/) {
412                         $noregprotocol = 0;
413                         $restofline = $1;
414                         $state = "s_proto_start";
415                 } elsif (($state eq "s_proto_start") && ($restofline =~ /^(\s*\"([^\"]*)\"\s*,)\s*(.*)/)) {
416                         $restofline = $3;
417                         $state = "s_proto_long_name";
418                         $debug>1 && print "proto long name: '$2'\n";
419                 } elsif (($state eq "s_proto_start") && ($restofline =~ /^(\s*(([\w\d])+)\s*,)\s*(.*)/)) {
420                         $restofline = $4;
421                         $state = "s_proto_long_name";
422                         $debug>1 && print "proto long name: '$2'\n";
423                 } elsif (($state eq "s_proto_long_name") && ($restofline =~ /^(\s*\"([^\"]*)\"\s*,)\s*(.*)/)) {
424                         $restofline = $3;
425                         $state = "s_proto_short_name";
426                         $debug>1 && print "proto short name: '$2'\n";
427                 } elsif (($state eq "s_proto_long_name") && ($restofline =~ /^(\s*(([\w\d])+)\s*,)\s*(.*)/)) {
428                         $restofline = $4;
429                         $state = "s_proto_short_name";
430                         $debug>1 && print "proto short name: '$2'\n";                   
431                 } elsif (($state eq "s_proto_short_name") && ($restofline =~ /\s*PFNAME\s*(.*)/)) {
432                         $more_tokens = 0;
433                         $state = "s_proto_filter_name";
434                         if ((index($PFNAME_value, ".") != -1) && ($noperiod == 0)) {
435                                 push(@periodinfilternamefilelist, "$currfile\n");
436                                 $noperiod = 1;
437                         }
438                         push(@protocols, $PFNAME_value);
439                         $debug>1 && print "proto filter name: '$PFNAME_value'\n";
440                 } elsif (($state eq "s_proto_short_name") && ($restofline =~ /\s*\"([^\"]*)\"\s*(.*)/)) {
441                         $more_tokens = 0;
442                         $state = "s_proto_filter_name";
443                         if ((index($1, ".") != -1) && ($noperiod == 0)) {
444                                 push(@periodinfilternamefilelist, "$currfile\n");
445                                 $noperiod = 1;
446                         }
447                         push(@protocols, $1);
448                         $debug>1 && print "proto filter name: '$1'\n";
449                 } elsif (($state eq "s_proto_short_name") && ($restofline =~ /\s*(([\w\d])+)\s*(.*)/)) {
450                         $more_tokens = 0;
451                         $state = "s_proto_filter_name";
452                         $debug>1 && print "proto filter name: '$1'\n";
453                 } else {
454                         $more_tokens = 0;
455                 }
456         }
457         
458         #retrieving display filters state machine
459         $restofline = $_;
460         $more_tokens = 1;
461         until ($more_tokens == 0) {
462                 if ($restofline =~ /\s*static\s*hf_register_info\s*(\w+)\[\](.*)/) {
463                         $restofline = $2;
464                         $state = "s_start";
465                         $debug>1 && print "$linenumber $state\n";
466                 } elsif ($restofline =~ /\s*static\s*ei_register_info\s*(\w+)\[\](.*)/) {
467                         $restofline = $2;
468                         $state = "s_start_expert";
469                         $debug>1 && print "$linenumber $state\n";
470                 } elsif (($state eq "s_start") && ($restofline =~ /\W+{(.*)/)) {
471                         $restofline = $1;
472                         $state = "s_in_hf_register_info";
473                         $debug>1 && print "$linenumber $state\n";
474                 } elsif (($state eq "s_in_hf_register_info") && ($restofline =~ /\W+{(.*)/)) {  
475                         $restofline = $1;
476                         $state = "s_hf_register_info_entry";
477                         $debug>1 && print "$linenumber $state\n";
478                         $onefield = 1;
479                 } elsif (($state eq "s_in_hf_register_info") && ($restofline =~ /\s*};(.*)/)) {
480                         $restofline = $1;
481                         if ($onefield == 0) {
482                                 $debug && print "$linenumber NO FIELDS!!!\n";
483                                 $nofields =     1;
484                                 $state = "s_nofields";
485                                 $more_tokens = 0;
486                         } else {
487                                 $state = "s_unknown";
488                         }
489                 } elsif (($state eq "s_hf_register_info_entry") && ($restofline =~ /\s*&\s*(hf_\w*(\[w*\])?)\s*,?(.*)/)) {
490                         $restofline = $3;
491                         $debug>1 && print "$linenumber hf_register_info_entry: $1\n";
492                         $state = "s_header_field_info_entry";
493                 } elsif (($state eq "s_header_field_info_entry") && ($restofline =~ /\s*{(.*)/)) {
494                         $restofline = $1;
495                         $state = "s_header_field_info_entry_start";
496                         $debug>1 && print "$linenumber $state\n";
497                 } elsif (($state eq "s_header_field_info_entry_start") && ($restofline =~ /\"([^\"]*)\"\s*,(.*)/)) {
498                         $restofline = $2;
499                         $debug>1 && print "$linenumber header_field_info_entry_name: $1\n";
500                         $state = "s_header_field_info_entry_name";
501                 } elsif (($state eq "s_header_field_info_entry_name") && ($restofline =~ /\"([^\"]*)\"\s*,?(.*)/)) {
502                         $restofline = $2;
503                         $debug>1 && print "$linenumber header_field_info_entry_abbrev: $1\n";
504                         $state = "s_header_field_info_entry_abbrev";
505                         $filters{$linenumber} = $1;
506                 } elsif (($state eq "s_header_field_info_entry_abbrev") && ($restofline =~ /[^}]*}(.*)/)) {
507                         $restofline = $1;
508                         $state = "s_header_field_info_entry_abbrev_end";
509                         $debug>1 && print "$linenumber $state\n";
510                 } elsif (($state eq "s_header_field_info_entry_abbrev_end") && ($restofline =~ /[^}]*}(.*)/)) {
511                         $restofline = $1;
512                         $state = "s_in_hf_register_info";
513                         $debug>1 && print "$linenumber $state\n";
514                 } elsif (($state eq "s_start_expert") && ($restofline =~ /\W+{(.*)/)) {
515                         $restofline = $1;
516                         $state = "s_in_ei_register_info";
517                         $debug>1 && print "$linenumber $state\n";
518                 } elsif (($state eq "s_in_ei_register_info") && ($restofline =~ /\W+{(.*)/)) {  
519                         $restofline = $1;
520                         $state = "s_ei_register_info_entry";
521                         $debug>1 && print "$linenumber $state\n";
522                 } elsif (($state eq "s_in_ei_register_info") && ($restofline =~ /\s*};(.*)/)) {
523                         $restofline = $1;
524                         $state = "s_unknown";
525                 } elsif (($state eq "s_ei_register_info_entry") && ($restofline =~ /\s*{(.*)/)) {
526                         $restofline = $1;
527                         $state = "s_ei_register_info_entry_start";
528                         $debug>1 && print "$linenumber $state\n";
529                 } elsif (($state eq "s_ei_register_info_entry_start") && ($restofline =~ /\"([^\"]*)\"\s*,(.*)/)) {
530                         $restofline = $2;
531                         $debug>1 && print "$linenumber ei_register_info_entry_abbrev: $1\n";
532                         $expert_filters{$linenumber} = $1;
533                         $state = "s_ei_register_info_entry_abbrev_end";
534                 } elsif (($state eq "s_ei_register_info_entry_abbrev_end") && ($restofline =~ /[^}]*}(.*)/)) {
535                         $restofline = $1;
536                         $state = "s_in_ei_register_info";
537                         $debug>1 && print "$linenumber $state\n";
538                 } else {
539                         $more_tokens = 0;
540                 }
541         }
542                 
543         $linenumber++;  
544 }
545
546 &printprevfile();
547
548 print "\n\nTOTAL ERRORS: $totalerrorcount";
549 if ($filecount > 1) {
550         print " ($errorfilecount files)\n";
551
552         print "NO FIELDS: " . scalar(@nofieldfilelist) . "\n";
553         print "AUTOMATED: " . (scalar(@asn1automatedfilelist) + scalar(@dcerpcautomatedfilelist) + scalar(@idl2wrsautomatedfilelist)) . "\n";
554         print "NO PROTOCOL: " . scalar(@noregprotocolfilelist) . "\n";
555
556         print "\nASN.1 AUTOMATED FILE LIST\n";
557         foreach (@asn1automatedfilelist) {
558                 print $_;
559         }
560         print "\nDCE/RPC AUTOMATED FILE LIST\n";
561         foreach (@dcerpcautomatedfilelist) {
562                 print $_;
563         }
564         print "\nIDL2WRS AUTOMATED FILE LIST\n";
565         foreach (@idl2wrsautomatedfilelist) {
566                 print $_;
567         }
568         print "\n\"FILE MANIPULATION\" FILE LIST\n";
569         @uniquefilelist = grep{ not $unique{$_}++} @filemanipulationfilelist;
570         foreach (@uniquefilelist) {
571                 print $_;
572         }
573         print "\nREMOVE PREFIX FILE LIST\n";
574         @uniquefilelist = grep{ not $unique{$_}++} @prefixfilelist;
575         foreach (@uniquefilelist) {
576                 print $_;
577         }
578         print "\nNO PROTOCOL REGISTERED FILE LIST\n";
579         foreach (@noregprotocolfilelist) {
580                 print $_;
581         }
582         print "\nNO FIELDS FILE LIST\n";
583         foreach (@nofieldfilelist) {
584                 print $_;
585         }
586
587         print "\nPERIOD IN PROTO FILTER NAME FILE LIST\n";
588         foreach (@periodinfilternamefilelist) {
589                 print $_;
590         }
591 }
592
593 print "\n";
594
595 exit $error;
596
597 __END__