1 # objects.py -- Acces to base git objects
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # The header parsing code is based on that from git itself, which is
4 # Copyright (C) 2005 Linus Torvalds
5 # and licensed under v2 of the GPL.
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; version 2
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
27 from errors import (NotCommitError,
38 COMMITTER_ID = "committer"
40 def _decompress(string):
41 dcomp = zlib.decompressobj()
42 dcomped = dcomp.decompress(string)
43 dcomped += dcomp.flush()
47 """Takes a string and returns the hex of the sha within"""
50 hexsha += "%02x" % ord(c)
51 assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
56 """Takes a hex sha and returns a binary sha"""
59 sha += chr(int(hex[i:i+2], 16))
60 assert len(sha) == 20, "Incorrent length of sha1"
63 class ShaFile(object):
67 def _parse_legacy_object(cls, map):
68 """Parse a legacy object, creating it and setting object._text"""
69 text = _decompress(map)
71 for posstype in type_map.keys():
72 if text.startswith(posstype):
73 object = type_map[posstype]()
74 text = text[len(posstype):]
76 assert object is not None, "%s is not a known object type" % text[:9]
77 assert text[0] == ' ', "%s is not a space" % text[0]
81 while text[0] >= '0' and text[0] <= '9':
82 if i > 0 and size == 0:
83 assert False, "Size is not in canonical format"
84 size = (size * 10) + int(text[0])
88 assert text[0] == "\0", "Size not followed by null"
93 def as_raw_string(self):
94 return self._num_type, self._text
97 def _parse_object(cls, map):
98 """Parse a new style object , creating it and setting object._text"""
100 byte = ord(map[used])
102 num_type = (byte >> 4) & 7
104 object = num_type_map[num_type]()
106 assert False, "Not a known type: %d" % num_type
107 while((byte & 0x80) != 0):
108 byte = ord(map[used])
111 object._text = _decompress(raw)
115 def _parse_file(cls, map):
116 word = (ord(map[0]) << 8) + ord(map[1])
117 if ord(map[0]) == 0x78 and (word % 31) == 0:
118 return cls._parse_legacy_object(map)
120 return cls._parse_object(map)
123 """Don't call this directly"""
125 def _parse_text(self):
126 """For subclasses to do initialisation time parsing"""
129 def from_file(cls, filename):
130 """Get the contents of a SHA file on disk"""
131 size = os.path.getsize(filename)
132 f = open(filename, 'rb')
134 map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
135 shafile = cls._parse_file(map)
136 shafile._parse_text()
142 def from_raw_string(cls, type, string):
143 """Creates an object of the indicated type from the raw string given.
145 Type is the numeric type of an object. String is the raw uncompressed
148 real_class = num_type_map[type]
156 return "%s %lu\0" % (self._type, len(self._text))
159 return zlib.crc32(self._text)
162 """The SHA1 object that is the name of this object."""
164 ressha.update(self._header())
165 ressha.update(self._text)
170 return self.sha().hexdigest()
173 return "<%s %s>" % (self.__class__.__name__, self.id)
175 def __eq__(self, other):
176 """Return true id the sha of the two objects match.
178 The __le__ etc methods aren't overriden as they make no sense,
179 certainly at this level.
181 return self.sha().digest() == other.sha().digest()
185 """A Git Blob object."""
192 """The text contained within the blob object."""
196 def from_file(cls, filename):
197 blob = ShaFile.from_file(filename)
198 if blob._type != cls._type:
199 raise NotBlobError(filename)
203 def from_string(cls, string):
204 """Create a blob from a string."""
206 shafile._text = string
211 """A Git Tag object."""
216 def from_file(cls, filename):
217 blob = ShaFile.from_file(filename)
218 if blob._type != cls._type:
219 raise NotBlobError(filename)
223 def from_string(cls, string):
224 """Create a blob from a string."""
226 shafile._text = string
231 """A Git tree object"""
240 def from_file(cls, filename):
241 tree = ShaFile.from_file(filename)
242 if tree._type != cls._type:
243 raise NotTreeError(filename)
246 def add(self, mode, name, hexsha):
247 self._entries.append((mode, name, hexsha))
250 """Return a list of tuples describing the tree entries"""
253 def _parse_text(self):
254 """Grab the entries in the tree"""
256 while count < len(self._text):
258 chr = self._text[count]
260 assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
261 mode = (mode << 3) + (ord(chr) - ord('0'))
263 chr = self._text[count]
265 chr = self._text[count]
270 chr = self._text[count]
272 chr = self._text[count]
273 sha = self._text[count:count+20]
274 hexsha = sha_to_hex(sha)
275 self.add(mode, name, hexsha)
280 for mode, name, hexsha in self._entries:
281 self._text += "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
284 class Commit(ShaFile):
285 """A git commit object"""
291 def from_file(cls, filename):
292 commit = ShaFile.from_file(filename)
293 if commit._type != cls._type:
294 raise NotCommitError(filename)
297 def _parse_text(self):
300 assert text.startswith(TREE_ID), "Invalid commit object, " \
301 "must start with %s" % TREE_ID
302 count += len(TREE_ID)
303 assert text[count] == ' ', "Invalid commit object, " \
304 "%s must be followed by space not %s" % (TREE_ID, text[count])
306 self._tree = text[count:count+40]
308 assert text[count] == "\n", "Invalid commit object, " \
309 "tree sha must be followed by newline"
312 while text[count:].startswith(PARENT_ID):
313 count += len(PARENT_ID)
314 assert text[count] == ' ', "Invalid commit object, " \
315 "%s must be followed by space not %s" % (PARENT_ID, text[count])
317 self._parents.append(text[count:count+40])
319 assert text[count] == "\n", "Invalid commit object, " \
320 "parent sha must be followed by newline"
323 if text[count:].startswith(AUTHOR_ID):
324 count += len(AUTHOR_ID)
325 assert text[count] == ' ', "Invalid commit object, " \
326 "%s must be followed by space not %s" % (AUTHOR_ID, text[count])
329 while text[count] != '>':
330 assert text[count] != '\n', "Malformed author information"
331 self._author += text[count]
333 self._author += text[count]
335 while text[count] != '\n':
338 self._committer = None
339 if text[count:].startswith(COMMITTER_ID):
340 count += len(COMMITTER_ID)
341 assert text[count] == ' ', "Invalid commit object, " \
342 "%s must be followed by space not %s" % (COMMITTER_ID, text[count])
345 while text[count] != '>':
346 assert text[count] != '\n', "Malformed committer information"
347 self._committer += text[count]
349 self._committer += text[count]
351 assert text[count] == ' ', "Invalid commit object, " \
352 "commiter information must be followed by space not %s" % text[count]
354 self._commit_time = int(text[count:count+10])
355 while text[count] != '\n':
358 assert text[count] == '\n', "There must be a new line after the headers"
360 # XXX: There can be an encoding field.
361 self._message = text[count:]
365 self._text += "%s %s\n" % (TREE_ID, self._tree)
366 for p in self._parents:
367 self._text += "%s %s\n" % (PARENT_ID, p)
368 self._text += "%s %s %s +0000\n" % (AUTHOR_ID, self._author, str(self._commit_time))
369 self._text += "%s %s %s +0000\n" % (COMMITTER_ID, self._committer, str(self._commit_time))
370 self._text += message
374 """Returns the tree that is the state of this commit"""
379 """Return a list of parents of this commit."""
384 """Returns the name of the author of the commit"""
389 """Returns the name of the committer of the commit"""
390 return self._committer
394 """Returns the commit message"""
398 def commit_time(self):
399 """Returns the timestamp of the commit.
401 Returns it as the number of seconds since the epoch.
403 return self._commit_time
419 # 5 Is reserved for further expansion