1 # objects.py -- Acces to base git objects
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # The header parsing code is based on that from git itself, which is
4 # Copyright (C) 2005 Linus Torvalds
5 # and licensed under v2 of the GPL.
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; version 2
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
27 from errors import (NotCommitError,
38 committer_id = "committer"
40 def _decompress(string):
41 dcomp = zlib.decompressobj()
42 dcomped = dcomp.decompress(string)
43 dcomped += dcomp.flush()
47 """Takes a string and returns the hex of the sha within"""
50 hexsha += "%02x" % ord(c)
51 assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
56 class ShaFile(object):
60 def _parse_legacy_object(cls, map):
61 """Parse a legacy object, creating it and setting object._text"""
62 text = _decompress(map)
64 for posstype in type_map.keys():
65 if text.startswith(posstype):
66 object = type_map[posstype]()
67 text = text[len(posstype):]
69 assert object is not None, "%s is not a known object type" % text[:9]
70 assert text[0] == ' ', "%s is not a space" % text[0]
74 while text[0] >= '0' and text[0] <= '9':
75 if i > 0 and size == 0:
76 assert False, "Size is not in canonical format"
77 size = (size * 10) + int(text[0])
81 assert text[0] == "\0", "Size not followed by null"
86 def as_raw_string(self):
87 return self._num_type, self._text
90 def _parse_object(cls, map):
91 """Parse a new style object , creating it and setting object._text"""
95 num_type = (byte >> 4) & 7
97 object = num_type_map[num_type]()
99 assert False, "Not a known type: %d" % num_type
100 while((byte & 0x80) != 0):
101 byte = ord(map[used])
104 object._text = _decompress(raw)
108 def _parse_file(cls, map):
109 word = (ord(map[0]) << 8) + ord(map[1])
110 if ord(map[0]) == 0x78 and (word % 31) == 0:
111 return cls._parse_legacy_object(map)
113 return cls._parse_object(map)
116 """Don't call this directly"""
118 def _parse_text(self):
119 """For subclasses to do initialistion time parsing"""
122 def from_file(cls, filename):
123 """Get the contents of a SHA file on disk"""
124 size = os.path.getsize(filename)
125 f = open(filename, 'rb')
127 map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
128 shafile = cls._parse_file(map)
129 shafile._parse_text()
135 def from_raw_string(cls, type, string):
136 """Creates an object of the indicated type from the raw string given.
138 Type is the numeric type of an object. String is the raw uncompressed
141 real_class = num_type_map[type]
149 return "%s %lu\0" % (self._type, len(self._text))
152 return zlib.crc32(self._text)
155 """The SHA1 object that is the name of this object."""
157 ressha.update(self._header())
158 ressha.update(self._text)
161 def __eq__(self, other):
162 """Return true id the sha of the two objects match.
164 The __le__ etc methods aren't overriden as they make no sense,
165 certainly at this level.
167 return self.sha().digest() == other.sha().digest()
171 """A Git Blob object."""
177 """The text contained within the blob object."""
181 def from_file(cls, filename):
182 blob = ShaFile.from_file(filename)
183 if blob._type != cls._type:
184 raise NotBlobError(filename)
188 def from_string(cls, string):
189 """Create a blob from a string."""
191 shafile._text = string
196 """A Git Tag object."""
201 def from_file(cls, filename):
202 blob = ShaFile.from_file(filename)
203 if blob._type != cls._type:
204 raise NotBlobError(filename)
208 def from_string(cls, string):
209 """Create a blob from a string."""
211 shafile._text = string
216 """A Git tree object"""
221 def from_file(cls, filename):
222 tree = ShaFile.from_file(filename)
223 if tree._type != cls._type:
224 raise NotTreeError(filename)
228 """Return a list of tuples describing the tree entries"""
231 def _parse_text(self):
232 """Grab the entries in the tree"""
235 while count < len(self._text):
237 chr = self._text[count]
239 assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
240 mode = (mode << 3) + (ord(chr) - ord('0'))
242 chr = self._text[count]
244 chr = self._text[count]
249 chr = self._text[count]
251 chr = self._text[count]
252 sha = self._text[count:count+20]
253 hexsha = sha_to_hex(sha)
254 self._entries.append((mode, name, hexsha))
257 class Commit(ShaFile):
258 """A git commit object"""
263 def from_file(cls, filename):
264 commit = ShaFile.from_file(filename)
265 if commit._type != cls._type:
266 raise NotCommitError(filename)
269 def _parse_text(self):
272 assert text.startswith(tree_id), "Invalid commit object, " \
273 "must start with %s" % tree_id
274 count += len(tree_id)
275 assert text[count] == ' ', "Invalid commit object, " \
276 "%s must be followed by space not %s" % (tree_id, text[count])
278 self._tree = text[count:count+40]
280 assert text[count] == "\n", "Invalid commit object, " \
281 "tree sha must be followed by newline"
284 while text[count:].startswith(parent_id):
285 count += len(parent_id)
286 assert text[count] == ' ', "Invalid commit object, " \
287 "%s must be followed by space not %s" % (parent_id, text[count])
289 self._parents.append(text[count:count+40])
291 assert text[count] == "\n", "Invalid commit object, " \
292 "parent sha must be followed by newline"
295 if text[count:].startswith(author_id):
296 count += len(author_id)
297 assert text[count] == ' ', "Invalid commit object, " \
298 "%s must be followed by space not %s" % (author_id, text[count])
301 while text[count] != '>':
302 assert text[count] != '\n', "Malformed author information"
303 self._author += text[count]
305 self._author += text[count]
307 while text[count] != '\n':
310 self._committer = None
311 if text[count:].startswith(committer_id):
312 count += len(committer_id)
313 assert text[count] == ' ', "Invalid commit object, " \
314 "%s must be followed by space not %s" % (committer_id, text[count])
317 while text[count] != '>':
318 assert text[count] != '\n', "Malformed committer information"
319 self._committer += text[count]
321 self._committer += text[count]
323 assert text[count] == ' ', "Invalid commit object, " \
324 "commiter information must be followed by space not %s" % text[count]
326 self._commit_time = int(text[count:count+10])
327 while text[count] != '\n':
330 assert text[count] == '\n', "There must be a new line after the headers"
332 # XXX: There can be an encoding field.
333 self._message = text[count:]
337 """Returns the tree that is the state of this commit"""
342 """Return a list of parents of this commit."""
347 """Returns the name of the author of the commit"""
352 """Returns the name of the committer of the commit"""
353 return self._committer
357 """Returns the commit message"""
361 def commit_time(self):
362 """Returns the timestamp of the commit.
364 Returns it as the number of seconds since the epoch.
366 return self._commit_time
370 return self.sha().hexdigest()
385 # 5 Is reserved for further expansion