2 Routines for reading PDML produced from TShark.
4 Copyright (c) 2003, 2013 by Gilbert Ramirez <gram@alumni.rice.edu>
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 from xml.sax.saxutils import quoteattr
24 import cStringIO as StringIO
29 class FoundItException(Exception):
30 """Used internally for exiting a tree search"""
34 """Holds Packet objects, and has methods for finding
37 def __init__(self, children=None):
41 self.children = children
43 def __getitem__(self, index):
44 """We act like a list."""
45 return self.children[index]
48 return len(self.children)
50 def item_exists(self, name):
51 """Does an item with name 'name' exist in this
52 PacketList? Returns True or False."""
53 for child in self.children:
54 if child.name == name:
58 for child in self.children:
59 child._item_exists(name)
61 except FoundItException:
66 def _item_exists(self, name):
67 for child in self.children:
68 if child.name == name:
69 raise FoundItException
70 child._item_exists(name)
73 def get_items(self, name, items=None):
74 """Return all items that match the name 'name'.
75 They are returned in order of a depth-first-search."""
82 for child in self.children:
83 if child.name == name:
85 child.get_items(name, items)
88 return PacketList(items)
90 def get_items_before(self, name, before_item, items=None):
91 """Return all items that match the name 'name' that
92 exist before the before_item. The before_item is an object.
93 They results are returned in order of a depth-first-search.
94 This function allows you to find fields from protocols that occur
95 before other protocols. For example, if you have an HTTP
96 protocol, you can find all tcp.dstport fields *before* that HTTP
97 protocol. This helps analyze in the presence of tunneled protocols."""
104 for child in self.children:
105 if top_level == 1 and child == before_item:
107 if child.name == name:
109 # Call get_items because the 'before_item' applies
110 # only to the top level search.
111 child.get_items(name, items)
114 return PacketList(items)
117 class ProtoTreeItem(PacketList):
118 def __init__(self, xmlattrs):
119 PacketList.__init__(self)
121 self.name = xmlattrs.get("name", "")
122 self.showname = xmlattrs.get("showname", "")
123 self.pos = xmlattrs.get("pos", "")
124 self.size = xmlattrs.get("size", "")
125 self.value = xmlattrs.get("value", "")
126 self.show = xmlattrs.get("show", "")
127 self.hide = xmlattrs.get("hide", "")
129 def add_child(self, child):
130 self.children.append(child)
135 def get_showname(self):
153 def dump(self, fh=sys.stdout):
155 print >> fh, " name=%s" % (quoteattr(self.name),),
158 print >> fh, "showname=%s" % (quoteattr(self.showname),),
161 print >> fh, "pos=%s" % (quoteattr(self.pos),),
164 print >> fh, "size=%s" % (quoteattr(self.size),),
167 print >> fh, "value=%s" % (quoteattr(self.value),),
170 print >> fh, "show=%s" % (quoteattr(self.show),),
173 print >> fh, "hide=%s" % (quoteattr(self.hide),),
175 class Packet(ProtoTreeItem, PacketList):
176 def dump(self, fh=sys.stdout, indent=0):
177 print >> fh, " " * indent, "<packet>"
179 for child in self.children:
180 child.dump(fh, indent)
181 print >> fh, " " * indent, "</packet>"
184 class Protocol(ProtoTreeItem):
186 def dump(self, fh=sys.stdout, indent=0):
187 print >> fh, "%s<proto " % (" " * indent,),
189 ProtoTreeItem.dump(self, fh)
194 for child in self.children:
195 child.dump(fh, indent)
196 print >> fh, " " * indent, "</proto>"
199 class Field(ProtoTreeItem):
201 def dump(self, fh=sys.stdout, indent=0):
202 print >> fh, "%s<field " % (" " * indent,),
204 ProtoTreeItem.dump(self, fh)
209 for child in self.children:
210 child.dump(fh, indent)
211 print >> fh, " " * indent, "</field>"
217 class ParseXML(xml.sax.handler.ContentHandler):
219 ELEMENT_FILE = "pdml"
220 ELEMENT_FRAME = "packet"
221 ELEMENT_PROTOCOL = "proto"
222 ELEMENT_FIELD = "field"
224 def __init__(self, cb):
227 self.element_stack = []
229 def startElement(self, name, xmlattrs):
232 if name == self.ELEMENT_FILE:
233 # Eventually, we should check version number of pdml here
236 elif name == self.ELEMENT_FRAME:
237 elem = Packet(xmlattrs)
239 elif name == self.ELEMENT_PROTOCOL:
240 elem = Protocol(xmlattrs)
242 elif name == self.ELEMENT_FIELD:
243 elem = Field(xmlattrs)
246 sys.exit("Unknown element: %s" % (name,))
248 self.element_stack.append(elem)
251 def endElement(self, name):
252 elem = self.element_stack.pop()
254 # if isinstance(elem, Field):
255 # if elem.get_name() == "frame.number":
256 # print >> sys.stderr, "Packet:", elem.get_show()
258 # Add element as child to previous element as long
259 # as there is more than 1 element in the stack. Only
260 # one element in the stack means that the the element in
261 # the stack is the single CaptureFile element, and we don't
262 # want to add this element to that, as we only want one
263 # Packet element in memory at a time.
264 if len(self.element_stack) > 1:
265 parent_elem = self.element_stack[-1]
266 parent_elem.add_child(elem)
270 # If we just finished a Packet element, hand it to the
272 if isinstance(elem, Packet):
275 def characters(self, chars):
276 self.chars = self.chars + chars
279 def _create_parser(cb):
280 """Internal function for setting up the SAX parser."""
283 parser = xml.sax.make_parser()
286 handler = ParseXML(cb)
288 # Tell the parser to use our handler
289 parser.setContentHandler(handler)
291 # Don't fetch the DTD, in case it is listed
292 parser.setFeature(xml.sax.handler.feature_external_ges, False)
296 def parse_fh(fh, cb):
297 """Parse a PDML file, given filehandle, and call the callback function (cb),
298 once for each Packet object."""
300 parser = _create_parser(cb)
305 # Close the parser ; this is erroring out, but I'm not sure why.
308 def parse_string(text, cb):
309 """Parse the PDML contained in a string."""
310 stream = StringIO.StringIO(text)
319 filename = sys.argv[1]
320 fh = open(filename, "r")
321 parse_fh(fh, test_cb)
323 if __name__ == '__main__':