1 # objects.py -- Acces to base git objects
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # The header parsing code is based on that from git itself, which is
4 # Copyright (C) 2005 Linus Torvalds
5 # and licensed under v2 of the GPL.
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; version 2
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
27 from errors import (NotCommitError,
37 committer_id = "committer"
39 def _decompress(string):
40 dcomp = zlib.decompressobj()
41 dcomped = dcomp.decompress(string)
42 dcomped += dcomp.flush()
46 """Takes a string and returns the hex of the sha within"""
49 hexsha += "%02x" % ord(c)
50 assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
54 class ShaFile(object):
57 def _update_contents(self):
58 """Update the _contents from the _text"""
59 self._contents = [ord(c) for c in self._text]
62 def _parse_legacy_object(cls, map):
63 """Parse a legacy object, creating it and setting object._text"""
64 text = _decompress(map)
66 for posstype in type_map.keys():
67 if text.startswith(posstype):
68 object = type_map[posstype]()
69 text = text[len(posstype):]
71 assert object is not None, "%s is not a known object type" % text[:9]
72 assert text[0] == ' ', "%s is not a space" % text[0]
76 while text[0] >= '0' and text[0] <= '9':
77 if i > 0 and size == 0:
78 assert False, "Size is not in canonical format"
79 size = (size * 10) + int(text[0])
83 assert text[0] == "\0", "Size not followed by null"
86 object._update_contents()
90 def _parse_object(cls, map):
91 """Parse a new style object , creating it and setting object._text"""
95 num_type = (byte >> 4) & 7
97 object = num_type_map[num_type]()
99 assert False, "Not a known type: %d" % num_type
100 while((byte & 0x80) != 0):
101 byte = ord(map[used])
104 object._text = _decompress(raw)
105 object._update_contents()
109 def _parse_file(cls, map):
110 word = (ord(map[0]) << 8) + ord(map[1])
111 if ord(map[0]) == 0x78 and (word % 31) == 0:
112 return cls._parse_legacy_object(map)
114 return cls._parse_object(map)
117 """Don't call this directly"""
119 def _parse_text(self):
120 """For subclasses to do initialistion time parsing"""
123 def from_file(cls, filename):
124 """Get the contents of a SHA file on disk"""
125 size = os.path.getsize(filename)
126 f = open(filename, 'rb')
128 map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
129 shafile = cls._parse_file(map)
130 shafile._parse_text()
136 def from_raw_string(cls, type, string):
137 """Creates an object of the indicated type from the raw string given.
139 Type is the numeric type of an object. String is the raw uncompressed
142 real_class = num_type_map[type]
145 obj._update_contents()
149 return "%s %lu\0" % (self._type, len(self._contents))
152 """The raw bytes of this object"""
153 return self._contents
156 """The SHA1 object that is the name of this object."""
158 ressha.update(self._header())
159 ressha.update(self._text)
162 def __eq__(self, other):
163 """Return true id the sha of the two objects match.
165 The __le__ etc methods aren't overriden as they make no sense,
166 certainly at this level.
168 return self.sha().digest() == other.sha().digest()
171 """A Git Blob object."""
176 """The text contained within the blob object."""
180 def from_file(cls, filename):
181 blob = ShaFile.from_file(filename)
182 if blob._type != cls._type:
183 raise NotBlobError(filename)
187 def from_string(cls, string):
188 """Create a blob from a string."""
190 shafile._text = string
191 shafile._update_contents()
195 """A Git tree object"""
200 def from_file(cls, filename):
201 tree = ShaFile.from_file(filename)
202 if tree._type != cls._type:
203 raise NotTreeError(filename)
207 """Reutrn a list of tuples describing the tree entries"""
210 def _parse_text(self):
211 """Grab the entries in the tree"""
214 while count < len(self._text):
216 chr = self._text[count]
218 assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
219 mode = (mode << 3) + (ord(chr) - ord('0'))
221 chr = self._text[count]
223 chr = self._text[count]
228 chr = self._text[count]
230 chr = self._text[count]
231 sha = self._text[count:count+20]
232 hexsha = sha_to_hex(sha)
233 self._entries.append((mode, name, hexsha))
236 class Commit(ShaFile):
237 """A git commit object"""
242 def from_file(cls, filename):
243 commit = ShaFile.from_file(filename)
244 if commit._type != cls._type:
245 raise NotCommitError(filename)
248 def _parse_text(self):
251 assert text.startswith(tree_id), "Invlid commit object, " \
252 "must start with %s" % tree_id
253 count += len(tree_id)
254 assert text[count] == ' ', "Invalid commit object, " \
255 "%s must be followed by space not %s" % (tree_id, text[count])
257 self._tree = text[count:count+40]
259 assert text[count] == "\n", "Invalid commit object, " \
260 "tree sha must be followed by newline"
263 while text[count:].startswith(parent_id):
264 count += len(parent_id)
265 assert text[count] == ' ', "Invalid commit object, " \
266 "%s must be followed by space not %s" % (parent_id, text[count])
268 self._parents.append(text[count:count+40])
270 assert text[count] == "\n", "Invalid commit object, " \
271 "parent sha must be followed by newline"
274 if text[count:].startswith(author_id):
275 count += len(author_id)
276 assert text[count] == ' ', "Invalid commit object, " \
277 "%s must be followed by space not %s" % (author_id, text[count])
280 while text[count] != '>':
281 assert text[count] != '\n', "Malformed author information"
282 self._author += text[count]
284 self._author += text[count]
286 while text[count] != '\n':
289 self._committer = None
290 if text[count:].startswith(committer_id):
291 count += len(committer_id)
292 assert text[count] == ' ', "Invalid commit object, " \
293 "%s must be followed by space not %s" % (committer_id, text[count])
296 while text[count] != '>':
297 assert text[count] != '\n', "Malformed committer information"
298 self._committer += text[count]
300 self._committer += text[count]
302 assert text[count] == ' ', "Invalid commit object, " \
303 "commiter information must be followed by space not %s" % text[count]
305 self._commit_time = int(text[count:count+10])
306 while text[count] != '\n':
309 assert text[count] == '\n', "There must be a new line after the headers"
311 # XXX: There can be an encoding field.
312 self._message = text[count:]
315 """Returns the tree that is the state of this commit"""
319 """Return a list of parents of this commit."""
323 """Returns the name of the author of the commit"""
327 """Returns the name of the committer of the commit"""
328 return self._committer
331 """Returns the commit message"""
334 def commit_time(self):
335 """Returns the timestamp of the commit.
337 Returns it as the number of seconds since the epoch.
339 return self._commit_time