1 # objects.py -- Acces to base git objects
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
4 # The header parsing code is based on that from git itself, which is
5 # Copyright (C) 2005 Linus Torvalds
6 # and licensed under v2 of the GPL.
8 # This program is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU General Public License
10 # as published by the Free Software Foundation; version 2
11 # of the License or (at your option) a later version of the License.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
28 from dulwich.errors import (
40 COMMITTER_ID = "committer"
45 def _decompress(string):
46 dcomp = zlib.decompressobj()
47 dcomped = dcomp.decompress(string)
48 dcomped += dcomp.flush()
52 """Takes a string and returns the hex of the sha within"""
55 hexsha += "%02x" % ord(c)
56 assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
61 """Takes a hex sha and returns a binary sha"""
63 for i in range(0, len(hex), 2):
64 sha += chr(int(hex[i:i+2], 16))
65 assert len(sha) == 20, "Incorrent length of sha1: %d" % len(sha)
68 class ShaFile(object):
72 def _parse_legacy_object(cls, map):
73 """Parse a legacy object, creating it and setting object._text"""
74 text = _decompress(map)
76 for posstype in type_map.keys():
77 if text.startswith(posstype):
78 object = type_map[posstype]()
79 text = text[len(posstype):]
81 assert object is not None, "%s is not a known object type" % text[:9]
82 assert text[0] == ' ', "%s is not a space" % text[0]
86 while text[0] >= '0' and text[0] <= '9':
87 if i > 0 and size == 0:
88 assert False, "Size is not in canonical format"
89 size = (size * 10) + int(text[0])
93 assert text[0] == "\0", "Size not followed by null"
98 def as_raw_string(self):
99 return self._num_type, self._text
102 def _parse_object(cls, map):
103 """Parse a new style object , creating it and setting object._text"""
105 byte = ord(map[used])
107 num_type = (byte >> 4) & 7
109 object = num_type_map[num_type]()
111 assert False, "Not a known type: %d" % num_type
112 while((byte & 0x80) != 0):
113 byte = ord(map[used])
116 object._text = _decompress(raw)
120 def _parse_file(cls, map):
121 word = (ord(map[0]) << 8) + ord(map[1])
122 if ord(map[0]) == 0x78 and (word % 31) == 0:
123 return cls._parse_legacy_object(map)
125 return cls._parse_object(map)
128 """Don't call this directly"""
130 def _parse_text(self):
131 """For subclasses to do initialisation time parsing"""
134 def from_file(cls, filename):
135 """Get the contents of a SHA file on disk"""
136 size = os.path.getsize(filename)
137 f = open(filename, 'rb')
139 map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
140 shafile = cls._parse_file(map)
141 shafile._parse_text()
147 def from_raw_string(cls, type, string):
148 """Creates an object of the indicated type from the raw string given.
150 Type is the numeric type of an object. String is the raw uncompressed
153 real_class = num_type_map[type]
161 return "%s %lu\0" % (self._type, len(self._text))
164 return zlib.crc32(self._text) & 0xffffffff
167 """The SHA1 object that is the name of this object."""
169 ressha.update(self._header())
170 ressha.update(self._text)
175 return self.sha().hexdigest()
179 return self._num_type
182 return "<%s %s>" % (self.__class__.__name__, self.id)
184 def __eq__(self, other):
185 """Return true id the sha of the two objects match.
187 The __le__ etc methods aren't overriden as they make no sense,
188 certainly at this level.
190 return self.sha().digest() == other.sha().digest()
194 """A Git Blob object."""
201 """The text contained within the blob object."""
205 def from_file(cls, filename):
206 blob = ShaFile.from_file(filename)
207 if blob._type != cls._type:
208 raise NotBlobError(filename)
212 def from_string(cls, string):
213 """Create a blob from a string."""
215 shafile._text = string
220 """A Git Tag object."""
225 def from_file(cls, filename):
226 blob = ShaFile.from_file(filename)
227 if blob._type != cls._type:
228 raise NotBlobError(filename)
232 def from_string(cls, string):
233 """Create a blob from a string."""
235 shafile._text = string
238 def _parse_text(self):
239 """Grab the metadata attached to the tag"""
242 assert text.startswith(OBJECT_ID), "Invalid tag object, " \
243 "must start with %s" % OBJECT_ID
244 count += len(OBJECT_ID)
245 assert text[count] == ' ', "Invalid tag object, " \
246 "%s must be followed by space not %s" % (OBJECT_ID, text[count])
248 self._object_sha = text[count:count+40]
250 assert text[count] == '\n', "Invalid tag object, " \
251 "%s sha must be followed by newline" % OBJECT_ID
253 assert text[count:].startswith(TYPE_ID), "Invalid tag object, " \
254 "%s sha must be followed by %s" % (OBJECT_ID, TYPE_ID)
255 count += len(TYPE_ID)
256 assert text[count] == ' ', "Invalid tag object, " \
257 "%s must be followed by space not %s" % (TAG_ID, text[count])
259 self._object_type = ""
260 while text[count] != '\n':
261 self._object_type += text[count]
264 assert self._object_type in (COMMIT_ID, BLOB_ID, TREE_ID, TAG_ID), "Invalid tag object, " \
265 "unexpected object type %s" % self._object_type
266 self._object_type = type_map[self._object_type]
268 assert text[count:].startswith(TAG_ID), "Invalid tag object, " \
269 "object type must be followed by %s" % (TAG_ID)
271 assert text[count] == ' ', "Invalid tag object, " \
272 "%s must be followed by space not %s" % (TAG_ID, text[count])
275 while text[count] != '\n':
276 self._name += text[count]
280 assert text[count:].startswith(TAGGER_ID), "Invalid tag object, " \
281 "%s must be followed by %s" % (TAG_ID, TAGGER_ID)
282 count += len(TAGGER_ID)
283 assert text[count] == ' ', "Invalid tag object, " \
284 "%s must be followed by space not %s" % (TAGGER_ID, text[count])
287 while text[count] != '>':
288 assert text[count] != '\n', "Malformed tagger information"
289 self._tagger += text[count]
291 self._tagger += text[count]
293 assert text[count] == ' ', "Invalid tag object, " \
294 "tagger information must be followed by space not %s" % text[count]
296 self._tag_time = int(text[count:count+10])
297 while text[count] != '\n':
300 assert text[count] == '\n', "There must be a new line after the headers"
302 self._message = text[count:]
306 """Returns the object pointed by this tag, represented as a tuple(type, sha)"""
307 return (self._object_type, self._object_sha)
311 """Returns the name of this tag"""
316 """Returns the name of the person who created this tag"""
321 """Returns the creation timestamp of the tag.
323 Returns it as the number of seconds since the epoch"""
324 return self._tag_time
328 """Returns the message attached to this tag"""
333 """A Git tree object"""
342 def from_file(cls, filename):
343 tree = ShaFile.from_file(filename)
344 if tree._type != cls._type:
345 raise NotTreeError(filename)
348 def add(self, mode, name, hexsha):
349 self._entries.append((mode, name, hexsha))
352 """Return a list of tuples describing the tree entries"""
355 def _parse_text(self):
356 """Grab the entries in the tree"""
358 while count < len(self._text):
360 chr = self._text[count]
362 assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
363 mode = (mode << 3) + (ord(chr) - ord('0'))
365 chr = self._text[count]
367 chr = self._text[count]
372 chr = self._text[count]
374 chr = self._text[count]
375 sha = self._text[count:count+20]
376 hexsha = sha_to_hex(sha)
377 self.add(mode, name, hexsha)
382 for mode, name, hexsha in self._entries:
383 self._text += "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
386 class Commit(ShaFile):
387 """A git commit object"""
396 def from_file(cls, filename):
397 commit = ShaFile.from_file(filename)
398 if commit._type != cls._type:
399 raise NotCommitError(filename)
402 def _parse_text(self):
405 assert text.startswith(TREE_ID), "Invalid commit object, " \
406 "must start with %s" % TREE_ID
407 count += len(TREE_ID)
408 assert text[count] == ' ', "Invalid commit object, " \
409 "%s must be followed by space not %s" % (TREE_ID, text[count])
411 self._tree = text[count:count+40]
413 assert text[count] == "\n", "Invalid commit object, " \
414 "tree sha must be followed by newline"
417 while text[count:].startswith(PARENT_ID):
418 count += len(PARENT_ID)
419 assert text[count] == ' ', "Invalid commit object, " \
420 "%s must be followed by space not %s" % (PARENT_ID, text[count])
422 self._parents.append(text[count:count+40])
424 assert text[count] == "\n", "Invalid commit object, " \
425 "parent sha must be followed by newline"
428 if text[count:].startswith(AUTHOR_ID):
429 count += len(AUTHOR_ID)
430 assert text[count] == ' ', "Invalid commit object, " \
431 "%s must be followed by space not %s" % (AUTHOR_ID, text[count])
434 while text[count] != '>':
435 assert text[count] != '\n', "Malformed author information"
436 self._author += text[count]
438 self._author += text[count]
440 while text[count] != '\n':
443 self._committer = None
444 if text[count:].startswith(COMMITTER_ID):
445 count += len(COMMITTER_ID)
446 assert text[count] == ' ', "Invalid commit object, " \
447 "%s must be followed by space not %s" % (COMMITTER_ID, text[count])
450 while text[count] != '>':
451 assert text[count] != '\n', "Malformed committer information"
452 self._committer += text[count]
454 self._committer += text[count]
456 assert text[count] == ' ', "Invalid commit object, " \
457 "commiter information must be followed by space not %s" % text[count]
459 self._commit_time = int(text[count:count+10])
460 while text[count] != '\n':
463 assert text[count] == '\n', "There must be a new line after the headers"
465 # XXX: There can be an encoding field.
466 self._message = text[count:]
470 self._text += "%s %s\n" % (TREE_ID, self._tree)
471 for p in self._parents:
472 self._text += "%s %s\n" % (PARENT_ID, p)
473 self._text += "%s %s %s +0000\n" % (AUTHOR_ID, self._author, str(self._commit_time))
474 self._text += "%s %s %s +0000\n" % (COMMITTER_ID, self._committer, str(self._commit_time))
475 self._text += "\n" # There must be a new line after the headers
476 self._text += self._message
480 """Returns the tree that is the state of this commit"""
485 """Return a list of parents of this commit."""
490 """Returns the name of the author of the commit"""
495 """Returns the name of the committer of the commit"""
496 return self._committer
500 """Returns the commit message"""
504 def commit_time(self):
505 """Returns the timestamp of the commit.
507 Returns it as the number of seconds since the epoch.
509 return self._commit_time
525 # 5 Is reserved for further expansion