Removed some redundant prototypes and some shadowed variables.
[obnox/wireshark/wip.git] / tools / indexcap.py
1 #!/usr/bin/python
2 #
3 # Tool to index protocols that appears in the given capture files
4 #
5 # Copyright 2009, Kovarththanan Rajaratnam <kovarththanan.rajaratnam@gmail.com>
6 #
7 # $Id$
8 #
9 # Wireshark - Network traffic analyzer
10 # By Gerald Combs <gerald@wireshark.org>
11 # Copyright 1998 Gerald Combs
12 #
13 # This program is free software; you can redistribute it and/or
14 # modify it under the terms of the GNU General Public License
15 # as published by the Free Software Foundation; either version 2
16 # of the License, or (at your option) any later version.
17 #
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 # GNU General Public License for more details.
22 #
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 #
27
28 from optparse import OptionParser
29 import multiprocessing
30 import sys
31 import os
32 import subprocess
33 import re
34 import pickle
35 import tempfile
36 import filecmp
37 import random
38
39 def extract_protos_from_file_proces(tshark, file):
40     try:
41         cmd = [tshark, "-Tfields", "-e", "frame.protocols", "-r", file]
42         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
43         (stdout, stderr) = p.communicate()
44         if p.returncode != 0:
45             return (file, {})
46
47         proto_hash = {}
48         for line in stdout.splitlines():
49             if not re.match(r'^[\w:-]+$', line):
50                 continue
51
52             for proto in line.split(':'):
53                 proto_hash[proto] = 1 + proto_hash.setdefault(proto, 0)
54
55         return (file, proto_hash)
56     except KeyboardInterrupt:
57         return None
58
59 def extract_protos_from_file(tshark, num_procs, max_files, cap_files, cap_hash, index_file_name):
60     pool = multiprocessing.Pool(num_procs)
61     results = [pool.apply_async(extract_protos_from_file_proces, [tshark, file]) for file in cap_files]
62     try:
63         for (cur_item_idx,result_async) in enumerate(results):
64             file_result = result_async.get()
65             action = "SKIPPED" if file_result[1] is {} else "PROCESSED"
66             print "%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result[0], os.path.getsize(file_result[0]))
67             cap_hash.update(dict([file_result]))
68     except KeyboardInterrupt:
69         print "%s was interrupted by user" % (sys.argv[0])
70         pool.terminate()
71         exit(1)
72
73     index_file = open(index_file_name, "w")
74     pickle.dump(cap_hash, index_file)
75     index_file.close()
76     exit(0)
77
78 def dissect_file_process(tshark, tmpdir, file):
79     try:
80         (handle_o, tmpfile_o) = tempfile.mkstemp(suffix='_stdout', dir=tmpdir)
81         (handle_e, tmpfile_e) = tempfile.mkstemp(suffix='_stderr', dir=tmpdir)
82         cmd = [tshark, "-nxVr", file]
83         p = subprocess.Popen(cmd, stdout=handle_o, stderr=handle_e)
84         (stdout, stderr) = p.communicate()
85         if p.returncode == 0:
86             return (file, True, tmpfile_o, tmpfile_e)
87         else:
88             return (file, False, tmpfile_o, tmpfile_e)
89
90     except KeyboardInterrupt:
91         return False
92
93     finally:
94         os.close(handle_o)
95         os.close(handle_e)
96
97 def dissect_files(tshark, tmpdir, num_procs, max_files, cap_files):
98     pool = multiprocessing.Pool(num_procs)
99     results = [pool.apply_async(dissect_file_process, [tshark, tmpdir, file]) for file in cap_files]
100     try:
101         for (cur_item_idx,result_async) in enumerate(results):
102             file_result = result_async.get()
103             action = "FAILED" if file_result[1] is False else "PASSED"
104             print "%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result[0], os.path.getsize(file_result[0]))
105     except KeyboardInterrupt:
106         print "%s was interrupted by user" % (sys.argv[0])
107         pool.terminate()
108         exit(1)
109
110 def compare_files(tshark_bin, tmpdir, tshark_cmp, num_procs, max_files, cap_files):
111     pool = multiprocessing.Pool(num_procs)
112     results_bin = [pool.apply_async(dissect_file_process, [tshark_bin, tmpdir, file]) for file in cap_files]
113     results_cmp = [pool.apply_async(dissect_file_process, [tshark_cmp, tmpdir, file]) for file in cap_files]
114     try:
115         for (cur_item_idx,(result_async_bin, result_async_cmp)) in enumerate(zip(results_bin, results_cmp)):
116             file_result_bin = result_async_bin.get()
117             file_result_cmp = result_async_cmp.get()
118             if file_result_cmp[1] is False or file_result_bin[1] is False:
119                 action = "FAILED (exitcode)"
120             if not filecmp.cmp(file_result_bin[2], file_result_cmp[2]):
121                 action = "FAILED (stdout)"
122             if not filecmp.cmp(file_result_bin[3], file_result_cmp[3]):
123                 action = "FAILED (stderr)"
124             else:
125                 action = "PASSED"
126                 os.remove(file_result_bin[2])
127                 os.remove(file_result_cmp[2])
128                 os.remove(file_result_bin[3])
129                 os.remove(file_result_cmp[3])
130
131             print "%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result_bin[0], os.path.getsize(file_result_bin[0]))
132             print "%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result_cmp[0], os.path.getsize(file_result_cmp[0]))
133     except KeyboardInterrupt:
134         print "%s was interrupted by user" % (sys.argv[0])
135         pool.terminate()
136         exit(1)
137
138 def list_all_proto(cap_hash):
139     proto_hash = {}
140     for files_hash in cap_hash.itervalues():
141         for proto,count in files_hash.iteritems():
142             proto_hash[proto] = count + proto_hash.setdefault(proto, 0)
143
144     return proto_hash
145
146 def list_all_files(cap_hash):
147     files = cap_hash.keys()
148     files.sort()
149
150     return files
151
152 def list_all_proto_files(cap_hash, proto_comma_delit):
153     protos = [ x.strip() for x in proto_comma_delit.split(',') ]
154     files = []
155     for (file, files_hash) in cap_hash.iteritems():
156         for proto in files_hash.iterkeys():
157             if proto in protos:
158                 files.append(file)
159                 break
160
161     return files
162
163 def index_file_action(options):
164     return options.list_all_proto or \
165            options.list_all_files or \
166            options.list_all_proto_files or \
167            options.dissect_files
168
169 def find_capture_files(paths, cap_hash):
170     cap_files = []
171     for path in paths:
172         if os.path.isdir(path):
173             path = os.path.normpath(path)
174             for root, dirs, files in os.walk(path):
175                 cap_files += [os.path.join(root, name) for name in files if os.path.join(root, name) not in cap_hash]
176         elif path not in cap_hash:
177             cap_files.append(path)
178     return cap_files
179
180 def find_tshark_executable(bin_dir):
181     for file in ["tshark.exe", "tshark"]:
182         tshark = os.path.join(bin_dir, file)
183         if os.access(tshark, os.X_OK):
184             return tshark
185
186     return None
187
188 def main():
189     parser = OptionParser(usage="usage: %prog [options] index_file [file_1|dir_1 [.. file_n|dir_n]]")
190     parser.add_option("-d", "--dissect-files", dest="dissect_files", default=False, action="store_true",
191                       help="Dissect all matching files")
192     parser.add_option("-m", "--max-files", dest="max_files", default=sys.maxint, type="int", 
193                       help="Max number of files to process")
194     parser.add_option("-b", "--binary-dir", dest="bin_dir", default=os.getcwd(),
195                       help="Directory containing tshark executable")
196     parser.add_option("-c", "--compare-dir", dest="compare_dir", default=None,
197                       help="Directory containing tshark executable which is used for comparison")
198     parser.add_option("-j", dest="num_procs", default=multiprocessing.cpu_count(), type=int, 
199                       help="Max number of processes to spawn")
200     parser.add_option("-r", "--randomize", default=False, action="store_true",
201                       help="Randomize the file list order")
202     parser.add_option("", "--list-all-proto", dest="list_all_proto", default=False, action="store_true", 
203                       help="List all protocols in index file")
204     parser.add_option("", "--list-all-files", dest="list_all_files", default=False, action="store_true", 
205                       help="List all files in index file")
206     parser.add_option("", "--list-all-proto-files", dest="list_all_proto_files", default=False,
207                       metavar="PROTO_1[, .. PROTO_N]",
208                       help="List all files in index file containing the given protocol")
209
210     (options, args) = parser.parse_args()
211
212     if len(args) == 0:
213         parser.error("index_file is a required argument")
214
215     if len(args) == 1 and not index_file_action(options):
216         parser.error("one capture file/directory must be specified")
217
218     if options.dissect_files and not options.list_all_files and not options.list_all_proto_files:
219         parser.error("--list-all-files or --list-all-proto-files must be specified")
220
221     if options.dissect_files and not options.compare_dir is None:
222         parser.error("--dissect-files and --compare-dir cannot be specified at the same time")
223
224     index_file_name = args.pop(0)
225     paths = args
226     cap_hash = {}
227     try:
228         index_file = open(index_file_name, "r")
229         print "index file:", index_file.name, "[OPENED]",
230         cap_hash = pickle.load(index_file)
231         index_file.close()
232         print len(cap_hash), "files"
233     except IOError:
234         print "index file:", index_file_name, "[NEW]"
235
236     if options.list_all_proto:
237         print list_all_proto(cap_hash)
238         exit(0)
239
240     indexed_files = []
241     if options.list_all_files:
242         indexed_files = list_all_files(cap_hash)
243         print indexed_files
244
245     if options.list_all_proto_files:
246         indexed_files = list_all_proto_files(cap_hash, options.list_all_proto_files)
247         print indexed_files
248
249     tshark_bin = find_tshark_executable(options.bin_dir)
250     if not tshark_bin is None:
251         print "tshark:", tshark_bin, "[FOUND]"
252     else:
253         print "tshark:", tshark_bin, "[MISSING]"
254         exit(1)
255
256     if not options.compare_dir is None:
257         tshark_cmp = find_tshark_executable(options.compare_dir)
258         if not tshark_cmp is None:
259             print "tshark:", tshark_cmp, "[FOUND]"
260         else:
261             print "tshark:", tshark_cmp, "[MISSING]"
262             exit(1)
263
264     if options.dissect_files or options.compare_dir:
265         cap_files = indexed_files
266     elif options.list_all_proto_files or options.list_all_files:
267         exit(0)
268     else:
269         cap_files = find_capture_files(paths, cap_hash)
270
271     if options.randomize:
272         random.shuffle(cap_files)
273     else:
274         cap_files.sort()
275
276     options.max_files = min(options.max_files, len(cap_files))
277     print "%u total files, %u working files" % (len(cap_files), options.max_files)
278     cap_files = cap_files[:options.max_files]
279     tmpdir = tempfile.mkdtemp()
280     print "Temporary working dir: %s" % tmpdir
281
282     if options.compare_dir:
283         compare_files(tshark_bin, tmpdir, tshark_cmp, options.num_procs, options.max_files, cap_files)
284     elif options.dissect_files:
285         dissect_files(tshark_bin, tmpdir, options.num_procs, options.max_files, cap_files)
286     else:
287         extract_protos_from_file(tshark_bin, tmpdir, options.num_procs, options.max_files, cap_files, cap_hash, index_file_name)
288
289     os.rmdir(tmpdir)
290 if __name__ == "__main__":
291     main()