3 # Copyright 2011, William Meier <wmeier[AT]newsguy.com>
5 # A program to fix encoding args for certain Wireshark API function calls
6 # from TRUE/FALSE to ENC_?? as appropriate (and possible)
7 # - proto_tree_add_item
8 # - proto_tree_add_bits_item
9 # - proto_tree_add_bits_ret_val
10 # - proto_tree_add_bitmask
11 # - proto_tree_add_bitmask_text !! ToDo: encoding arg not last arg
18 # - ptvcursor_add_no_advance
19 # - ptvcursor_add_with_subtree !! ToDo: encoding arg not last arg
21 # ToDo: Rework program so that it can better be used to *validate* encoding-args
23 # Wireshark - Network traffic analyzer
24 # By Gerald Combs <gerald@wireshark.org>
25 # Copyright 1998 Gerald Combs
27 # This program is free software; you can redistribute it and/or
28 # modify it under the terms of the GNU General Public License
29 # as published by the Free Software Foundation; either version 2
30 # of the License, or (at your option) any later version.
32 # This program is distributed in the hope that it will be useful,
33 # but WITHOUT ANY WARRANTY; without even the implied warranty of
34 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 # GNU General Public License for more details.
37 # You should have received a copy of the GNU General Public License
38 # along with this program; if not, write to the Free Software
39 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
47 # Conversion "Requests"
49 # Standard conversions
50 my $searchReplaceFalseTrueHRef =
52 "FALSE" => "ENC_BIG_ENDIAN",
53 "0" => "ENC_BIG_ENDIAN",
54 "TRUE" => "ENC_LITTLE_ENDIAN",
55 "1" => "ENC_LITTLE_ENDIAN"
58 my $searchReplaceEncNAHRef =
64 "ENC_LITTLE_ENDIAN" => "ENC_NA",
65 "ENC_BIG_ENDIAN" => "ENC_NA",
66 "ENC_ASCII|ENC_NA" => "ENC_NA",
67 "ENC_ASCII | ENC_NA" => "ENC_NA"
70 # ---------------------------------------------------------------------
71 # Conversion "request" structure
73 # [ <list of field types for which this conversion request applies> ],
74 # { <hash of desired encoding arg conversions> }
79 [ qw (FT_NONE FT_BYTES FT_ETHER FT_IPv6 FT_IPXNET FT_OID FT_REL_OID)],
80 $searchReplaceEncNAHRef
85 [ qw (FT_UINT8 FT_UINT16 FT_UINT24 FT_UINT32 FT_UINT64 FT_INT8
86 FT_INT16 FT_INT24 FT_INT32 FT_INT64 FT_FLOAT FT_DOUBLE)],
87 $searchReplaceFalseTrueHRef
92 [ qw (FT_BOOLEAN FT_IPv4 FT_GUID FT_EUI64)],
93 $searchReplaceFalseTrueHRef
98 [qw (FT_STRING FT_STRINGZ)],
100 "FALSE" => "ENC_ASCII|ENC_NA",
101 "0" => "ENC_ASCII|ENC_NA",
102 "TRUE" => "ENC_ASCII|ENC_NA",
103 "1" => "ENC_ASCII|ENC_NA",
104 "ENC_LITTLE_ENDIAN" => "ENC_ASCII|ENC_NA",
105 "ENC_BIG_ENDIAN" => "ENC_ASCII|ENC_NA",
106 "ENC_NA" => "ENC_ASCII|ENC_NA",
108 "ENC_ASCII" => "ENC_ASCII|ENC_NA",
109 "ENC_ASCII|ENC_LITTLE_ENDIAN" => "ENC_ASCII|ENC_NA",
110 "ENC_ASCII|ENC_BIG_ENDIAN" => "ENC_ASCII|ENC_NA",
112 "ENC_UTF_8" => "ENC_UTF_8|ENC_NA",
113 "ENC_UTF_8|ENC_LITTLE_ENDIAN" => "ENC_UTF_8|ENC_NA",
114 "ENC_UTF_8|ENC_BIG_ENDIAN" => "ENC_UTF_8|ENC_NA",
116 "ENC_EBCDIC" => "ENC_EBCDIC|ENC_NA",
117 "ENC_EBCDIC|ENC_LITTLE_ENDIAN" => "ENC_EBCDIC|ENC_NA",
118 "ENC_EBCDIC|ENC_BIG_ENDIAN" => "ENC_EBCDIC|ENC_NA",
122 my @types_UINT_STRING =
124 [qw (FT_UINT_STRING)],
126 "FALSE" => "ENC_ASCII|ENC_BIG_ENDIAN",
127 "0" => "ENC_ASCII|ENC_BIG_ENDIAN",
128 "TRUE" => "ENC_ASCII|ENC_LITTLE_ENDIAN",
129 "1" => "ENC_ASCII|ENC_LITTLE_ENDIAN",
130 "ENC_BIG_ENDIAN" => "ENC_ASCII|ENC_BIG_ENDIAN",
131 "ENC_LITTLE_ENDIAN" => "ENC_ASCII|ENC_LITTLE_ENDIAN",
132 "ENC_ASCII|ENC_NA" => "ENC_ASCII|ENC_BIG_ENDIAN",
133 "ENC_ASCII" => "ENC_ASCII|ENC_BIG_ENDIAN",
134 "ENC_NA" => "ENC_ASCII|ENC_BIG_ENDIAN"
138 my @types_REG_PROTO =
141 $searchReplaceEncNAHRef
144 # ---------------------------------------------------------------------
145 # For searching (and doing no substitutions) (obsolete ?)
148 [qw (FT_ABSOLUTE_TIME FT_RELATIVE_TIME)],
188 {# valid encoding args
190 "b"=>"ENC_LITTLE_ENDIAN",
191 "c"=>"ENC_BIG_ENDIAN",
193 "d"=>"ENC_ASCII|ENC_NA",
194 "e"=>"ENC_ASCII|ENC_LITTLE_ENDIAN",
195 "f"=>"ENC_ASCII|ENC_BIG_ENDIAN",
197 "g"=>"ENC_UTF_8|ENC_NA",
198 "h"=>"ENC_UTF_8|ENC_LITTLE_ENDIAN",
199 "i"=>"ENC_UTF_8|ENC_BIG_ENDIAN",
201 "j"=>"ENC_EBCDIC|ENC_NA",
202 "k"=>"ENC_EBCDIC|ENC_LITTLE_ENDIAN",
203 "l"=>"ENC_EBCDIC|ENC_BIG_ENDIAN",
207 # ---------------------------------------------------------------------
209 my @findAllFunctionList =
210 ## proto_tree_add_bitmask_text !! ToDo: encoding arg not last arg
211 ## ptvcursor_add_with_subtree !! ToDo: encoding Arg not last arg
214 proto_tree_add_bits_item
215 proto_tree_add_bits_ret_val
216 proto_tree_add_bitmask
217 proto_tree_add_bitmask_with_flags
224 ptvcursor_add_no_advance
227 # ---------------------------------------------------------------------
233 my $action = 'fix-all';
235 my $result = GetOptions(
236 'action=s' => \$action,
237 'write' => \$writeFlag,
238 'help|?' => \$helpFlag
241 if (!$result || $helpFlag || !$ARGV[0]) {
245 if (($action ne 'fix-all') && ($action ne 'find-all')) {
250 print "\nUsage: $0 [--action=fix-all|find-all] [--write] FILENAME [...]\n\n";
251 print " --action = fix-all (default)\n";
252 print " Fix <certain-fcn-names>() encoding arg when possible in FILENAME(s)\n";
253 print " Fixes (if any) are listed on stdout)\n\n";
254 print " --write create FILENAME.encoding-arg-fixes (original file with fixes)\n";
255 print " (effective only for fix-all)\n";
257 print " --action = find-all\n";
258 print " Find all occurrences of <certain-fcn-names>() statements)\n";
259 print " highlighting the 'encoding' arg\n";
263 # Read through the files; fix up encoding parameter of proto_tree_add_item() calls
266 # . Create a hash of the hf_index_names & associated field types from the entries in hf[]
267 # . For each requested "conversion request" {
268 # . . For each hf[] entry hf_index_name with a field type in a set of specified field types {
269 # . . . For each proto_tree_add_item() statement
270 # . . . . - replace encoding arg in proto_tree_add_item(..., hf_index_name, ..., 'encoding-arg')
271 # specific values ith new values
272 # . . . . - print the statement showing the change
276 # . If requested and if replacements done: write new file "orig-filename.encoding-arg-fixes"
279 # Note: The proto_tree_add_item() encoding arg will be converted only if
280 # the hf_index_name referenced is in one of the entries in hf[] in the same file
284 while (my $fileName = $ARGV[0]) {
286 my $fileContents = '';
288 die "No such file: \"$fileName\"\n" if (! -e $fileName);
290 # delete leading './'
291 $fileName =~ s{ ^ \. / } {}xo;
292 ##print "$fileName\n";
294 # Read in the file (ouch, but it's easier that way)
295 open(FCI, "<", $fileName) || die("Couldn't open $fileName");
301 # Create a hash of the hf[] entries (name_index_name=>field_type)
302 my $hfArrayEntryFieldTypeHRef = find_hf_array_entries(\$fileContents, $fileName);
304 if ($action eq "fix-all") {
306 # Find and replace: <fcn_name_pattern>() encoding arg in $fileContents for:
307 # - hf[] entries with specified field types;
308 # - 'proto' as returned from proto_register_protocol()
309 my $fcn_name = "(?:proto_tree_add_item|ptvcursor_add(?:_no_advance)?)";
311 $found += fix_encoding_args_by_hf_type(1, \@types_NA, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
312 $found += fix_encoding_args_by_hf_type(1, \@types_INT, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
313 $found += fix_encoding_args_by_hf_type(1, \@types_MISC, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
314 $found += fix_encoding_args_by_hf_type(1, \@types_STRING, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
315 $found += fix_encoding_args_by_hf_type(1, \@types_UINT_STRING, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
316 $found += fix_encoding_args_by_hf_type(1, \@types_REG_PROTO, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
318 # Find and replace: alters <fcn_name>() encoding arg in $fileContents
319 $found += fix_encoding_args(1, $searchReplaceFalseTrueHRef, "proto_tree_add_bits_(?:item|ret_val)", \$fileContents, $fileName);
320 $found += fix_encoding_args(1, $searchReplaceFalseTrueHRef, "proto_tree_add_bitmask", \$fileContents, $fileName);
321 $found += fix_encoding_args(1, $searchReplaceFalseTrueHRef, "proto_tree_add_bitmask_with_flags", \$fileContents, $fileName);
322 $found += fix_encoding_args(1, $searchReplaceFalseTrueHRef, "tvb_get_bits(?:16|24|32|64)?", \$fileContents, $fileName);
323 $found += fix_encoding_args(1, $searchReplaceFalseTrueHRef, "tvb_get_(?:ephemeral_)?unicode_string[z]?", \$fileContents, $fileName);
325 # If desired and if any changes, write out the changed version to a file
326 if (($writeFlag) && ($found > 0)) {
327 open(FCO, ">", $fileName . ".encoding-arg-fixes");
328 # open(FCO, ">", $fileName );
329 print FCO "$fileContents";
332 $found_total += $found;
335 if ($action eq "find-all") {
336 # Find all proto_tree_add_item() statements
337 # and output same highlighting the encoding arg
338 $found_total += find_all(\@findAllFunctionList, \$fileContents, $fileName);
341 # Optional searches: (kind of obsolete ?)
342 # search for (and output) proto_tree_add_item() statements with invalid encoding arg for specified field types
343 # $fcn_name = "proto_tree_add_item";
344 # fix_encoding_args(2, \@types_NA, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
345 # fix_encoding_args(2, \@types_INT, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
346 # fix_encoding_args(2, \@types_MISC, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
347 # fix_encoding_args(2, \@types_STRING, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
348 # fix_encoding_args(2, \@types_UINT_STRING, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
349 # fix_encoding_args(2, \@types_ALL, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
350 # search for (and output) proto_tree_add_item()$fcn_name, statements with any encoding arg for specified field types
351 # fix_encoding_args(3, \@types_TIME, $fcn_name, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
358 # ---------------------------------------------------------------------
359 # Create a hash containing an entry (hf_index_name => field_type) for each hf[]entry.
360 # also: create an entry in the hash for the 'protocol name' variable (proto... => FT_PROTOCOL)
361 # returns: ref to the hash
363 sub find_hf_array_entries {
364 my ($fileContentsRef, $fileName) = @_;
366 # The below Regexp is based on one from:
367 # http://aspn.activestate.com/ASPN/Cookbook/Rx/Recipe/59811
368 # It is in the public domain.
369 # A complicated regex which matches C-style comments.
370 my $CCommentRegEx = qr{ / [*] [^*]* [*]+ (?: [^/*] [^*]* [*]+ )* / }xo;
372 # hf[] entry regex (to extract an hf_index_name and associated field type)
373 my $hfArrayFieldTypeRegEx = qr {
376 &\s*([A-Z0-9_\[\]-]+) # &hf
379 .+? # (a bit dangerous)
381 (FT_[A-Z0-9_]+) # field type
388 # create a copy of $fileContents with comments removed
389 my $fileContentsWithoutComments = $$fileContentsRef;
390 $fileContentsWithoutComments =~ s {$CCommentRegEx} []xg;
392 # find all the hf[] entries (searching $fileContentsWithoutComments).
393 # Create a hash keyed by the hf_index_name with the associated value being the field_type
394 my %hfArrayEntryFieldType;
395 while ($fileContentsWithoutComments =~ m{ $hfArrayFieldTypeRegEx }xgis) {
397 if (exists $hfArrayEntryFieldType{$1}) {
398 printf "%-35.35s: ? duplicate hf[] entry: no fixes done for: $1; manual action may be req'd\n", $fileName;
399 $hfArrayEntryFieldType{$1} = "???"; # prevent any substitutions for this hf_index_name
401 $hfArrayEntryFieldType{$1} = $2;
405 # RegEx to get "proto" variable name
406 my $protoRegEx = qr /
407 ^ \s* # note m modifier below
414 proto_register_protocol
419 # Find all registered protocols
420 while ($fileContentsWithoutComments =~ m { $protoRegEx }xgioms ) {
422 if (exists $hfArrayEntryFieldType{$1}) {
423 printf "%-35.35s: ? duplicate 'proto': no fixes done for: $1; manual action may be req'd\n", $fileName;
424 $hfArrayEntryFieldType{$1} = "???"; # prevent any substitutions for this protocol
426 $hfArrayEntryFieldType{$1} = "REG_PROTO";
430 return \%hfArrayEntryFieldType;
433 # ---------------------------------------------------------------------
435 # Substitute new values for the specified <fcn_name>() encoding arg values
436 # when the encoding arg is the *last* arg of the call to fcn_name
438 # substitute_flag: 1: replace specified encoding arg values by a new value (keys/values in search hash);
439 # ref to hash containing search (keys) and replacement (values) for encoding arg
441 # ref to string containing file contents
448 my $searchReplaceHRef;
451 sub fix_encoding_args {
452 (my $subFlag, $searchReplaceHRef, my $fcn_name, my $fileContentsRef, $fileName) = @_;
457 # just match for <fcn_name>() statements which have an encoding arg matching one of the
458 # keys in the searchReplace hash.
459 # Escape any "|" characters in the keys
460 # and then create "alternatives" string containing all the resulting key strings. Ex: "(A|B|C\|D|..."
461 $encArgPat = join "|", map { my $copy = $_; $copy =~ s{ ( \| ) }{\\$1}gx; $copy } keys %$searchReplaceHRef;
462 } elsif ($subFlag == 3) {
463 # match for <fcn_name>() statements for any value of the encoding parameter
464 # IOW: find all the <fcn_name> statements
465 $encArgPat = qr / [^,)]+? /x;
468 # build the complete pattern
472 (?:^|=) # don't try to handle fcn_name call when arg of another fcn call
475 [^;]+? # a bit dangerous
480 # exact match of pattern (including spaces)
488 /xms; # m for ^ above
490 ##print "$patRegEx\n";
492 ## Match and substitute as specified
495 $$fileContentsRef =~ s/ $patRegEx /patsubx($1,$2,$3)/xges;
500 # Called from fix_encoding_args to determine replacement string when a regex match is encountered
502 # $_[1]: part 2: encoding arg
504 # lookup the desired replacement value for the encoding arg
505 # print match string showing and highlighting the encoding arg replacement
506 # return "replacement" string
509 my $substr = exists $$searchReplaceHRef{$_[1]} ? $$searchReplaceHRef{$_[1]} : "???";
510 my $str = sprintf("%s[[%s]-->[%s]]%s", $_[0], $_[1], $substr, $_[2]);
511 $str =~ tr/\t\n\r/ /d;
512 printf "%s: $str\n", $fileName;
513 return $_[0] . $substr . $_[2];
517 # ---------------------------------------------------------------------
518 # fix_encoding_args_by_hf_type
520 # Substitute new values for certain proto_tree_add_item() encoding arg
521 # values (for specified hf field types)
522 # Variants: search for and display for "exceptions" to allowed encoding arg values;
523 # search for and display all encoding arg values
525 # substitute_flag: 1: replace specified encoding arg values by a new value (keys/values in search hash);
526 # 2: search for "exceptions" to allowed encoding arg values (values in search hash);
527 # 3: search for all encoding arg values
528 # ref to array containing two elements:
529 # - ref to array containing hf[] types to be processed (FT_STRING, etc)
530 # - ref to hash containing search (keys) and replacement (values) for encoding arg
532 # ref to hfArrayEntries hash (key: hf name; value: field type)
533 # ref to string containing file contents
540 my $searchReplaceHRef;
544 sub fix_encoding_args_by_hf_type {
546 (my $subFlag, my $mapArg, my $fcn_name, my $fileContentsRef, my $hfArrayEntryFieldTypeHRef, $fileName) = @_;
552 $hfTypesARef = $$mapArg[0];
553 $searchReplaceHRef = $$mapArg[1];
556 @hfTypes{@$hfTypesARef}=();
558 # set up the encoding arg match pattern
560 # just match for <fcn_name>() statements which have an encoding arg matching one of the
561 # keys in the searchReplace hash.
562 # Escape any "|" characters in the keys
563 # and then create "alternatives" string containing all the resulting key strings. Ex: "A|B|C\|D|..."
564 $encArgPat = join "|", map { my $copy = $_; $copy =~ s{ ( \| ) }{\\$1}gx; $copy } keys %$searchReplaceHRef;
565 } elsif ($subFlag == 2) {
566 # Find all the <fcn_name>() statements wherein the encoding arg is a value other than
567 # one of the "replace" values.
568 # Uses zero-length negative-lookahead to find <fcn_name>() statements for which the encoding
569 # arg is something other than one of the the provided replace values.
570 # Escape any "|" characters in the values to be matched
571 # and then create "alternatives" string containing all the value strings. Ex: "A|B|C\|D|..."
572 my $match_str = join "|", map { my $copy = $_; $copy =~ s{ ( \| ) }{\\$1}gx; $copy } values %$searchReplaceHRef;
574 (?! # negative zero-length look-ahead
576 (?: $match_str ) # alternatives we don't want to match
579 [^,)]+? # OK: enoding arg is other than one of the alternatives:
580 # match to end of the arg
582 } elsif ($subFlag == 3) {
583 # match for <fcn_name>() statements for any value of the encoding parameter
584 # IOW: find all the proto_tree_add_item statements with an hf entry of the desired types
585 $encArgPat = qr / [^,)]+? /x;
588 # For each hf[] entry which matches a type in %hfTypes do replacements
590 foreach my $key (keys %$hfArrayEntryFieldTypeHRef) {
591 $hf_index_name = $key;
592 $hf_index_name =~ s{ ( \[ | \] ) }{\\$1}xg; # escape any "[" or "]" characters
593 $hf_field_type = $$hfArrayEntryFieldTypeHRef{$key};
594 ##printf "--> %-35.35s: %s\n", $hf_index_name, $hf_field_type;
596 next unless exists $hfTypes{$hf_field_type}; # Do we want to process for this hf[] entry type ?
598 # build the complete pattern
612 # exact match of pattern (including spaces)
622 ##print "\n$hf_index_name $hf_field_type\n";
623 ##print "\n$patRegEx\n";
625 ## Match and substitute as specified
626 $$fileContentsRef =~ s/ $patRegEx /patsub($1,$2,$3)/xges;
633 # Called from fix_encoding_args to determine replacement string when a regex match is encountered
635 # $_[1]: part 2: encoding arg
637 # lookup the desired replacement value for the encoding arg
638 # print match string showing and highlighting the encoding arg replacement
639 # return "replacement" string
642 my $substr = exists $$searchReplaceHRef{$_[1]} ? $$searchReplaceHRef{$_[1]} : "???";
643 my $str = sprintf("%s[[%s]-->[%s]]%s", $_[0], $_[1], $substr, $_[2]);
644 $str =~ tr/\t\n\r/ /d;
645 printf "%s: %-17.17s $str\n", $fileName, $hf_field_type . ":";
646 return $_[0] . $substr . $_[2];
650 # ---------------------------------------------------------------------
651 # Find all <fcnList> statements
652 # and output same highlighting the encoding arg
653 # Currently: encoding arg is matched as the *last* arg of the function call
656 my( $fcnListARef, $fileContentsRef, $fileName) = @_;
659 my $fcnListPat = join "|", @$fcnListARef;
662 (?:$fcnListPat) \s* \(
675 while ($$fileContentsRef =~ / $pat /xgso) {
676 my $str = "${1}[[${2}]]${3}\n";
677 $str =~ tr/\t\n\r/ /d;
678 $str =~ s/ \s+ / /xg;
679 print "$fileName: $str\n";