1 # objects.py -- Access to base git objects
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; version 2
8 # of the License or (at your option) a later version of the License.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21 """Access to base git objects."""
29 from dulwich.errors import (
41 COMMITTER_ID = "committer"
46 def _decompress(string):
47 dcomp = zlib.decompressobj()
48 dcomped = dcomp.decompress(string)
49 dcomped += dcomp.flush()
54 """Takes a string and returns the hex of the sha within"""
55 hexsha = "".join(["%02x" % ord(c) for c in sha])
56 assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
61 """Takes a hex sha and returns a binary sha"""
62 assert len(hex) == 40, "Incorrent length of hexsha: %s" % hex
63 return ''.join([chr(int(hex[i:i+2], 16)) for i in xrange(0, len(hex), 2)])
66 class ShaFile(object):
70 def _parse_legacy_object(cls, map):
71 """Parse a legacy object, creating it and setting object._text"""
72 text = _decompress(map)
74 for posstype in type_map.keys():
75 if text.startswith(posstype):
76 object = type_map[posstype]()
77 text = text[len(posstype):]
79 assert object is not None, "%s is not a known object type" % text[:9]
80 assert text[0] == ' ', "%s is not a space" % text[0]
84 while text[0] >= '0' and text[0] <= '9':
85 if i > 0 and size == 0:
86 assert False, "Size is not in canonical format"
87 size = (size * 10) + int(text[0])
91 assert text[0] == "\0", "Size not followed by null"
96 def as_legacy_object(self):
97 return zlib.compress("%s %d\0%s" % (self._type, len(self._text), self._text))
99 def as_raw_string(self):
100 return self._num_type, self._text
103 def _parse_object(cls, map):
104 """Parse a new style object , creating it and setting object._text"""
106 byte = ord(map[used])
108 num_type = (byte >> 4) & 7
110 object = num_type_map[num_type]()
112 raise AssertionError("Not a known type: %d" % num_type)
113 while (byte & 0x80) != 0:
114 byte = ord(map[used])
117 object._text = _decompress(raw)
121 def _parse_file(cls, map):
122 word = (ord(map[0]) << 8) + ord(map[1])
123 if ord(map[0]) == 0x78 and (word % 31) == 0:
124 return cls._parse_legacy_object(map)
126 return cls._parse_object(map)
129 """Don't call this directly"""
131 def _parse_text(self):
132 """For subclasses to do initialisation time parsing"""
135 def from_file(cls, filename):
136 """Get the contents of a SHA file on disk"""
137 size = os.path.getsize(filename)
138 f = open(filename, 'rb')
140 map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
141 shafile = cls._parse_file(map)
142 shafile._parse_text()
148 def from_raw_string(cls, type, string):
149 """Creates an object of the indicated type from the raw string given.
151 Type is the numeric type of an object. String is the raw uncompressed
154 real_class = num_type_map[type]
162 return "%s %lu\0" % (self._type, len(self._text))
165 """The SHA1 object that is the name of this object."""
167 ressha.update(self._header())
168 ressha.update(self._text)
173 return self.sha().hexdigest()
177 return self._num_type
180 return "<%s %s>" % (self.__class__.__name__, self.id)
182 def __eq__(self, other):
183 """Return true id the sha of the two objects match.
185 The __le__ etc methods aren't overriden as they make no sense,
186 certainly at this level.
188 return self.sha().digest() == other.sha().digest()
192 """A Git Blob object."""
199 """The text contained within the blob object."""
203 def from_file(cls, filename):
204 blob = ShaFile.from_file(filename)
205 if blob._type != cls._type:
206 raise NotBlobError(filename)
210 def from_string(cls, string):
211 """Create a blob from a string."""
213 shafile._text = string
218 """A Git Tag object."""
224 def from_file(cls, filename):
225 blob = ShaFile.from_file(filename)
226 if blob._type != cls._type:
227 raise NotBlobError(filename)
231 def from_string(cls, string):
232 """Create a blob from a string."""
234 shafile._text = string
237 def _parse_text(self):
238 """Grab the metadata attached to the tag"""
241 assert text.startswith(OBJECT_ID), "Invalid tag object, " \
242 "must start with %s" % OBJECT_ID
243 count += len(OBJECT_ID)
244 assert text[count] == ' ', "Invalid tag object, " \
245 "%s must be followed by space not %s" % (OBJECT_ID, text[count])
247 self._object_sha = text[count:count+40]
249 assert text[count] == '\n', "Invalid tag object, " \
250 "%s sha must be followed by newline" % OBJECT_ID
252 assert text[count:].startswith(TYPE_ID), "Invalid tag object, " \
253 "%s sha must be followed by %s" % (OBJECT_ID, TYPE_ID)
254 count += len(TYPE_ID)
255 assert text[count] == ' ', "Invalid tag object, " \
256 "%s must be followed by space not %s" % (TAG_ID, text[count])
258 self._object_type = ""
259 while text[count] != '\n':
260 self._object_type += text[count]
263 assert self._object_type in (COMMIT_ID, BLOB_ID, TREE_ID, TAG_ID), "Invalid tag object, " \
264 "unexpected object type %s" % self._object_type
265 self._object_type = type_map[self._object_type]
267 assert text[count:].startswith(TAG_ID), "Invalid tag object, " \
268 "object type must be followed by %s" % (TAG_ID)
270 assert text[count] == ' ', "Invalid tag object, " \
271 "%s must be followed by space not %s" % (TAG_ID, text[count])
274 while text[count] != '\n':
275 self._name += text[count]
279 assert text[count:].startswith(TAGGER_ID), "Invalid tag object, " \
280 "%s must be followed by %s" % (TAG_ID, TAGGER_ID)
281 count += len(TAGGER_ID)
282 assert text[count] == ' ', "Invalid tag object, " \
283 "%s must be followed by space not %s" % (TAGGER_ID, text[count])
286 while text[count] != '>':
287 assert text[count] != '\n', "Malformed tagger information"
288 self._tagger += text[count]
290 self._tagger += text[count]
292 assert text[count] == ' ', "Invalid tag object, " \
293 "tagger information must be followed by space not %s" % text[count]
295 self._tag_time = int(text[count:count+10])
296 while text[count] != '\n':
299 assert text[count] == '\n', "There must be a new line after the headers"
301 self._message = text[count:]
305 """Returns the object pointed by this tag, represented as a tuple(type, sha)"""
306 return (self._object_type, self._object_sha)
310 """Returns the name of this tag"""
315 """Returns the name of the person who created this tag"""
320 """Returns the creation timestamp of the tag.
322 Returns it as the number of seconds since the epoch"""
323 return self._tag_time
327 """Returns the message attached to this tag"""
331 def parse_tree(text):
334 while count < len(text):
338 assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
339 mode = (mode << 3) + (ord(chr) - ord('0'))
351 sha = text[count:count+20]
352 hexsha = sha_to_hex(sha)
353 ret[name] = (mode, hexsha)
359 """A Git tree object"""
368 def from_file(cls, filename):
369 tree = ShaFile.from_file(filename)
370 if tree._type != cls._type:
371 raise NotTreeError(filename)
374 def __contains__(self, name):
375 return name in self._entries
377 def __getitem__(self, name):
378 return self._entries[name]
380 def __setitem__(self, name, value):
381 assert isinstance(value, tuple)
382 assert len(value) == 2
383 self._entries[name] = value
385 def __delitem__(self, name):
386 del self._entries[name]
388 def add(self, mode, name, hexsha):
389 self._entries[name] = mode, hexsha
392 """Return a list of tuples describing the tree entries"""
393 # The order of this is different from iteritems() for historical reasons
394 return [(mode, name, hexsha) for (name, mode, hexsha) in self.iteritems()]
397 for name in sorted(self._entries.keys()):
398 yield name, self._entries[name][0], self._entries[name][1]
400 def _parse_text(self):
401 """Grab the entries in the tree"""
402 self._entries = parse_tree(self._text)
406 for name, mode, hexsha in self.iteritems():
407 self._text += "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
410 class Commit(ShaFile):
411 """A git commit object"""
420 def from_file(cls, filename):
421 commit = ShaFile.from_file(filename)
422 if commit._type != cls._type:
423 raise NotCommitError(filename)
426 def _parse_text(self):
429 assert text.startswith(TREE_ID), "Invalid commit object, " \
430 "must start with %s" % TREE_ID
431 count += len(TREE_ID)
432 assert text[count] == ' ', "Invalid commit object, " \
433 "%s must be followed by space not %s" % (TREE_ID, text[count])
435 self._tree = text[count:count+40]
437 assert text[count] == "\n", "Invalid commit object, " \
438 "tree sha must be followed by newline"
441 while text[count:].startswith(PARENT_ID):
442 count += len(PARENT_ID)
443 assert text[count] == ' ', "Invalid commit object, " \
444 "%s must be followed by space not %s" % (PARENT_ID, text[count])
446 self._parents.append(text[count:count+40])
448 assert text[count] == "\n", "Invalid commit object, " \
449 "parent sha must be followed by newline"
452 if text[count:].startswith(AUTHOR_ID):
453 count += len(AUTHOR_ID)
454 assert text[count] == ' ', "Invalid commit object, " \
455 "%s must be followed by space not %s" % (AUTHOR_ID, text[count])
458 while text[count] != '>':
459 assert text[count] != '\n', "Malformed author information"
460 self._author += text[count]
462 self._author += text[count]
464 assert text[count] == ' ', "Invalid commit object, " \
465 "author information must be followed by space not %s" % text[count]
467 self._author_time = int(text[count:count+10])
468 while text[count] != ' ':
469 assert text[count] != '\n', "Malformed author information"
471 self._author_timezone = int(text[count:count+6])
473 while text[count] != '\n':
476 self._committer = None
477 if text[count:].startswith(COMMITTER_ID):
478 count += len(COMMITTER_ID)
479 assert text[count] == ' ', "Invalid commit object, " \
480 "%s must be followed by space not %s" % (COMMITTER_ID, text[count])
483 while text[count] != '>':
484 assert text[count] != '\n', "Malformed committer information"
485 self._committer += text[count]
487 self._committer += text[count]
489 assert text[count] == ' ', "Invalid commit object, " \
490 "commiter information must be followed by space not %s" % text[count]
492 self._commit_time = int(text[count:count+10])
493 while text[count] != ' ':
494 assert text[count] != '\n', "Malformed committer information"
496 self._commit_timezone = int(text[count:count+6])
498 while text[count] != '\n':
501 assert text[count] == '\n', "There must be a new line after the headers"
503 # XXX: There can be an encoding field.
504 self._message = text[count:]
508 self._text += "%s %s\n" % (TREE_ID, self._tree)
509 for p in self._parents:
510 self._text += "%s %s\n" % (PARENT_ID, p)
511 self._text += "%s %s %s %+05d\n" % (AUTHOR_ID, self._author, str(self._author_time), self._author_timezone)
512 self._text += "%s %s %s %+05d\n" % (COMMITTER_ID, self._committer, str(self._commit_time), self._commit_timezone)
513 self._text += "\n" # There must be a new line after the headers
514 self._text += self._message
517 """Returns the tree that is the state of this commit"""
520 tree = property(get_tree)
522 def get_parents(self):
523 """Return a list of parents of this commit."""
526 parents = property(get_parents)
528 def get_author(self):
529 """Returns the name of the author of the commit"""
532 author = property(get_author)
534 def get_committer(self):
535 """Returns the name of the committer of the commit"""
536 return self._committer
538 committer = property(get_committer)
540 def get_message(self):
541 """Returns the commit message"""
544 message = property(get_message)
546 def get_commit_time(self):
547 """Returns the timestamp of the commit.
549 Returns it as the number of seconds since the epoch.
551 return self._commit_time
553 commit_time = property(get_commit_time)
555 def get_commit_timezone(self):
556 """Returns the zone the commit time is in
558 return self._commit_timezone
560 commit_timezone = property(get_commit_timezone)
562 def get_author_time(self):
563 """Returns the timestamp the commit was written.
565 Returns it as the number of seconds since the epoch.
567 return self._author_time
569 author_time = property(get_author_time)
571 def get_author_timezone(self):
572 """Returns the zone the author time is in
574 return self._author_timezone
576 author_timezone = property(get_author_timezone)
592 # 5 Is reserved for further expansion
596 # Try to import C versions
597 from dulwich._objects import hex_to_sha, sha_to_hex, parse_tree