1 # objects.py -- Access to base git objects
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; version 2
8 # of the License or (at your option) a later version of the License.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Access to base git objects."""
24 from cStringIO import (
31 from dulwich.errors import (
37 ObjectFormatException,
39 from dulwich.file import GitFile
40 from dulwich.misc import (
46 # Header fields for commits
48 _PARENT_HEADER = "parent"
49 _AUTHOR_HEADER = "author"
50 _COMMITTER_HEADER = "committer"
51 _ENCODING_HEADER = "encoding"
54 # Header fields for objects
55 _OBJECT_HEADER = "object"
58 _TAGGER_HEADER = "tagger"
64 return (stat.S_IFMT(m) == S_IFGITLINK)
67 def _decompress(string):
68 dcomp = zlib.decompressobj()
69 dcomped = dcomp.decompress(string)
70 dcomped += dcomp.flush()
75 """Takes a string and returns the hex of the sha within"""
76 hexsha = binascii.hexlify(sha)
77 assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
82 """Takes a hex sha and returns a binary sha"""
83 assert len(hex) == 40, "Incorrent length of hexsha: %s" % hex
84 return binascii.unhexlify(hex)
87 def hex_to_filename(path, hex):
88 """Takes a hex sha and returns its filename relative to the given path."""
91 # Check from object dir
92 return os.path.join(path, dir, file)
95 def filename_to_hex(filename):
96 """Takes an object filename and returns its corresponding hex sha."""
97 # grab the last (up to) two path components
98 names = filename.rsplit(os.path.sep, 2)[-2:]
99 errmsg = "Invalid object filename: %s" % filename
100 assert len(names) == 2, errmsg
102 assert len(base) == 2 and len(rest) == 38, errmsg
108 def object_header(num_type, length):
109 """Return an object header for the given numeric type and text length."""
110 return "%s %d\0" % (object_class(num_type).type_name, length)
113 def serializable_property(name, docstring=None):
116 setattr(obj, "_"+name, value)
117 obj._needs_serialization = True
120 return getattr(obj, "_"+name)
121 return property(get, set, doc=docstring)
124 def object_class(type):
125 """Get the object class corresponding to the given type.
127 :param type: Either a type name string or a numeric type.
128 :return: The ShaFile subclass corresponding to the given type, or None if
129 type is not a valid type name/number.
131 return _TYPE_MAP.get(type, None)
134 def check_hexsha(hex, error_msg):
137 except (TypeError, AssertionError):
138 raise ObjectFormatException("%s %s" % (error_msg, hex))
141 def check_identity(identity, error_msg):
142 """Check if the specified identity is valid.
144 This will raise an exception if the identity is not valid.
146 :param identity: Identity string
147 :param error_msg: Error message to use in exception
149 email_start = identity.find("<")
150 email_end = identity.find(">")
151 if (email_start < 0 or email_end < 0 or email_end <= email_start
152 or identity.find("<", email_start + 1) >= 0
153 or identity.find(">", email_end + 1) >= 0
154 or not identity.endswith(">")):
155 raise ObjectFormatException(error_msg)
158 class FixedSha(object):
159 """SHA object that behaves like hashlib's but is given a fixed value."""
161 __slots__ = ('_hexsha', '_sha')
163 def __init__(self, hexsha):
164 self._hexsha = hexsha
165 self._sha = hex_to_sha(hexsha)
174 class ShaFile(object):
175 """A git SHA file."""
177 __slots__ = ('_needs_parsing', '_chunked_text', '_file', '_path',
178 '_sha', '_needs_serialization', '_magic')
181 def _parse_legacy_object_header(magic, f):
182 """Parse a legacy object, creating it but not reading the file."""
184 decomp = zlib.decompressobj()
185 header = decomp.decompress(magic)
189 extra = f.read(bufsize)
190 header += decomp.decompress(extra)
192 end = header.find("\0", start)
194 header = header[:end]
195 type_name, size = header.split(" ", 1)
196 size = int(size) # sanity check
197 obj_class = object_class(type_name)
199 raise ObjectFormatException("Not a known type: %s" % type_name)
204 def _parse_legacy_object(self, map):
205 """Parse a legacy object, setting the raw string."""
206 text = _decompress(map)
207 header_end = text.find('\0')
209 raise ObjectFormatException("Invalid object header, no \\0")
210 self.set_raw_string(text[header_end+1:])
212 def as_legacy_object_chunks(self):
213 compobj = zlib.compressobj()
214 yield compobj.compress(self._header())
215 for chunk in self.as_raw_chunks():
216 yield compobj.compress(chunk)
217 yield compobj.flush()
219 def as_legacy_object(self):
220 return "".join(self.as_legacy_object_chunks())
222 def as_raw_chunks(self):
223 if self._needs_parsing:
224 self._ensure_parsed()
225 elif self._needs_serialization:
226 self._chunked_text = self._serialize()
227 return self._chunked_text
229 def as_raw_string(self):
230 return "".join(self.as_raw_chunks())
233 return self.as_raw_string()
238 def as_pretty_string(self):
239 return self.as_raw_string()
241 def _ensure_parsed(self):
242 if self._needs_parsing:
243 if not self._chunked_text:
244 if self._file is not None:
245 self._parse_file(self._file)
247 elif self._path is not None:
250 raise AssertionError(
251 "ShaFile needs either text or filename")
252 self._deserialize(self._chunked_text)
253 self._needs_parsing = False
255 def set_raw_string(self, text):
256 if type(text) != str:
257 raise TypeError(text)
258 self.set_raw_chunks([text])
260 def set_raw_chunks(self, chunks):
261 self._chunked_text = chunks
262 self._deserialize(chunks)
264 self._needs_parsing = False
265 self._needs_serialization = False
268 def _parse_object_header(magic, f):
269 """Parse a new style object, creating it but not reading the file."""
270 num_type = (ord(magic[0]) >> 4) & 7
271 obj_class = object_class(num_type)
273 raise ObjectFormatException("Not a known type %d" % num_type)
278 def _parse_object(self, map):
279 """Parse a new style object, setting self._text."""
280 # skip type and size; type must have already been determined, and
281 # we trust zlib to fail if it's otherwise corrupted
284 while (byte & 0x80) != 0:
285 byte = ord(map[used])
288 self.set_raw_string(_decompress(raw))
291 def _is_legacy_object(cls, magic):
292 b0, b1 = map(ord, magic)
293 word = (b0 << 8) + b1
294 return b0 == 0x78 and (word % 31) == 0
297 def _parse_file_header(cls, f):
299 if cls._is_legacy_object(magic):
300 return cls._parse_legacy_object_header(magic, f)
302 return cls._parse_object_header(magic, f)
305 """Don't call this directly"""
310 self._chunked_text = []
311 self._needs_parsing = False
312 self._needs_serialization = True
314 def _deserialize(self, chunks):
315 raise NotImplementedError(self._deserialize)
317 def _serialize(self):
318 raise NotImplementedError(self._serialize)
320 def _parse_path(self):
321 f = GitFile(self._path, 'rb')
327 def _parse_file(self, f):
331 map = magic + f.read()
332 if self._is_legacy_object(magic[:2]):
333 self._parse_legacy_object(map)
335 self._parse_object(map)
338 def from_path(cls, path):
339 f = GitFile(path, 'rb')
341 obj = cls.from_file(f)
343 obj._sha = FixedSha(filename_to_hex(path))
351 def from_file(cls, f):
352 """Get the contents of a SHA file on disk."""
354 obj = cls._parse_file_header(f)
356 obj._needs_parsing = True
357 obj._needs_serialization = True
360 except (IndexError, ValueError), e:
361 raise ObjectFormatException("invalid object header")
364 def from_raw_string(type_num, string):
365 """Creates an object of the indicated type from the raw string given.
367 :param type_num: The numeric type of the object.
368 :param string: The raw uncompressed contents.
370 obj = object_class(type_num)()
371 obj.set_raw_string(string)
375 def from_raw_chunks(type_num, chunks):
376 """Creates an object of the indicated type from the raw chunks given.
378 :param type_num: The numeric type of the object.
379 :param chunks: An iterable of the raw uncompressed contents.
381 obj = object_class(type_num)()
382 obj.set_raw_chunks(chunks)
386 def from_string(cls, string):
387 """Create a ShaFile from a string."""
389 obj.set_raw_string(string)
392 def _check_has_member(self, member, error_msg):
393 """Check that the object has a given member variable.
395 :param member: the member variable to check for
396 :param error_msg: the message for an error if the member is missing
397 :raise ObjectFormatException: with the given error_msg if member is
400 if getattr(self, member, None) is None:
401 raise ObjectFormatException(error_msg)
404 """Check this object for internal consistency.
406 :raise ObjectFormatException: if the object is malformed in some way
407 :raise ChecksumMismatch: if the object was created with a SHA that does
408 not match its contents
410 # TODO: if we find that error-checking during object parsing is a
411 # performance bottleneck, those checks should be moved to the class's
412 # check() method during optimization so we can still check the object
416 self._deserialize(self.as_raw_chunks())
420 raise ObjectFormatException(e)
421 if old_sha != new_sha:
422 raise ChecksumMismatch(new_sha, old_sha)
425 return object_header(self.type, self.raw_length())
427 def raw_length(self):
428 """Returns the length of the raw string of this object."""
430 for chunk in self.as_raw_chunks():
436 ret.update(self._header())
437 for chunk in self.as_raw_chunks():
442 """The SHA1 object that is the name of this object."""
443 if self._sha is None or self._needs_serialization:
444 # this is a local because as_raw_chunks() overwrites self._sha
446 new_sha.update(self._header())
447 for chunk in self.as_raw_chunks():
448 new_sha.update(chunk)
454 return self.sha().hexdigest()
459 def set_type(self, type):
462 # DEPRECATED: use type_num or type_name as needed.
463 type = property(get_type, set_type)
466 return "<%s %s>" % (self.__class__.__name__, self.id)
468 def __ne__(self, other):
469 return self.id != other.id
471 def __eq__(self, other):
472 """Return true if the sha of the two objects match.
474 The __le__ etc methods aren't overriden as they make no sense,
475 certainly at this level.
477 return self.id == other.id
481 """A Git Blob object."""
489 super(Blob, self).__init__()
490 self._chunked_text = []
491 self._needs_parsing = False
492 self._needs_serialization = False
495 return self.as_raw_string()
497 def _set_data(self, data):
498 self.set_raw_string(data)
500 data = property(_get_data, _set_data,
501 "The text contained within the blob object.")
503 def _get_chunked(self):
504 self._ensure_parsed()
505 return self._chunked_text
507 def _set_chunked(self, chunks):
508 self._chunked_text = chunks
510 def _serialize(self):
511 if not self._chunked_text:
512 self._ensure_parsed()
513 self._needs_serialization = False
514 return self._chunked_text
516 def _deserialize(self, chunks):
517 self._chunked_text = chunks
519 chunked = property(_get_chunked, _set_chunked,
520 "The text within the blob object, as chunks (not necessarily lines).")
523 def from_path(cls, path):
524 blob = ShaFile.from_path(path)
525 if not isinstance(blob, cls):
526 raise NotBlobError(path)
530 """Check this object for internal consistency.
532 :raise ObjectFormatException: if the object is malformed in some way
534 super(Blob, self).check()
537 def _parse_tag_or_commit(text):
538 """Parse tag or commit text.
540 :param text: the raw text of the tag or commit object.
541 :return: iterator of tuples of (field, value), one per header line, in the
542 order read from the text, possibly including duplicates. Includes a
543 field named None for the freeform tag/commit text.
549 # Empty line indicates end of headers
551 yield l.split(" ", 1)
552 yield (None, f.read())
557 return _parse_tag_or_commit(text)
561 """A Git Tag object."""
566 __slots__ = ('_tag_timezone_neg_utc', '_name', '_object_sha',
567 '_object_class', '_tag_time', '_tag_timezone',
568 '_tagger', '_message')
571 super(Tag, self).__init__()
572 self._tag_timezone_neg_utc = False
575 def from_path(cls, filename):
576 tag = ShaFile.from_path(filename)
577 if not isinstance(tag, cls):
578 raise NotTagError(filename)
582 """Check this object for internal consistency.
584 :raise ObjectFormatException: if the object is malformed in some way
586 super(Tag, self).check()
587 self._check_has_member("_object_sha", "missing object sha")
588 self._check_has_member("_object_class", "missing object type")
589 self._check_has_member("_name", "missing tag name")
592 raise ObjectFormatException("empty tag name")
594 check_hexsha(self._object_sha, "invalid object sha")
596 if getattr(self, "_tagger", None):
597 check_identity(self._tagger, "invalid tagger")
600 for field, _ in parse_tag("".join(self._chunked_text)):
601 if field == _OBJECT_HEADER and last is not None:
602 raise ObjectFormatException("unexpected object")
603 elif field == _TYPE_HEADER and last != _OBJECT_HEADER:
604 raise ObjectFormatException("unexpected type")
605 elif field == _TAG_HEADER and last != _TYPE_HEADER:
606 raise ObjectFormatException("unexpected tag name")
607 elif field == _TAGGER_HEADER and last != _TAG_HEADER:
608 raise ObjectFormatException("unexpected tagger")
611 def _serialize(self):
613 chunks.append("%s %s\n" % (_OBJECT_HEADER, self._object_sha))
614 chunks.append("%s %s\n" % (_TYPE_HEADER, self._object_class.type_name))
615 chunks.append("%s %s\n" % (_TAG_HEADER, self._name))
617 if self._tag_time is None:
618 chunks.append("%s %s\n" % (_TAGGER_HEADER, self._tagger))
620 chunks.append("%s %s %d %s\n" % (
621 _TAGGER_HEADER, self._tagger, self._tag_time,
622 format_timezone(self._tag_timezone,
623 self._tag_timezone_neg_utc)))
624 chunks.append("\n") # To close headers
625 chunks.append(self._message)
628 def _deserialize(self, chunks):
629 """Grab the metadata attached to the tag"""
631 for field, value in parse_tag("".join(chunks)):
632 if field == _OBJECT_HEADER:
633 self._object_sha = value
634 elif field == _TYPE_HEADER:
635 obj_class = object_class(value)
637 raise ObjectFormatException("Not a known type: %s" % value)
638 self._object_class = obj_class
639 elif field == _TAG_HEADER:
641 elif field == _TAGGER_HEADER:
643 sep = value.index("> ")
646 self._tag_time = None
647 self._tag_timezone = None
648 self._tag_timezone_neg_utc = False
650 self._tagger = value[0:sep+1]
652 (timetext, timezonetext) = value[sep+2:].rsplit(" ", 1)
653 self._tag_time = int(timetext)
654 self._tag_timezone, self._tag_timezone_neg_utc = \
655 parse_timezone(timezonetext)
656 except ValueError, e:
657 raise ObjectFormatException(e)
659 self._message = value
661 raise ObjectFormatException("Unknown field %s" % field)
663 def _get_object(self):
664 """Get the object pointed to by this tag.
666 :return: tuple of (object class, sha).
668 self._ensure_parsed()
669 return (self._object_class, self._object_sha)
671 def _set_object(self, value):
672 self._ensure_parsed()
673 (self._object_class, self._object_sha) = value
674 self._needs_serialization = True
676 object = property(_get_object, _set_object)
678 name = serializable_property("name", "The name of this tag")
679 tagger = serializable_property("tagger",
680 "Returns the name of the person who created this tag")
681 tag_time = serializable_property("tag_time",
682 "The creation timestamp of the tag. As the number of seconds since the epoch")
683 tag_timezone = serializable_property("tag_timezone",
684 "The timezone that tag_time is in.")
685 message = serializable_property("message", "The message attached to this tag")
688 def parse_tree(text):
689 """Parse a tree text.
691 :param text: Serialized text to parse
692 :return: iterator of tuples of (name, mode, sha)
697 mode_end = text.index(' ', count)
698 mode_text = text[count:mode_end]
699 assert mode_text[0] != '0'
701 mode = int(mode_text, 8)
703 raise ObjectFormatException("Invalid mode '%s'" % mode_text)
704 name_end = text.index('\0', mode_end)
705 name = text[mode_end+1:name_end]
707 sha = text[name_end+1:count]
709 raise ObjectFormatException("Sha has invalid length")
710 hexsha = sha_to_hex(sha)
711 yield (name, mode, hexsha)
714 def serialize_tree(items):
715 """Serialize the items in a tree to a text.
717 :param items: Sorted iterable over (name, mode, sha) tuples
718 :return: Serialized tree text as chunks
720 for name, mode, hexsha in items:
721 yield "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
724 def sorted_tree_items(entries):
725 """Iterate over a tree entries dictionary in the order in which
726 the items would be serialized.
728 :param entries: Dictionary mapping names to (mode, sha) tuples
729 :return: Iterator over (name, mode, hexsha)
731 for name, entry in sorted(entries.iteritems(), cmp=cmp_entry):
733 # Stricter type checks than normal to mirror checks in the C version.
735 if not isinstance(hexsha, str):
736 raise TypeError('Expected a string for SHA, got %r' % hexsha)
737 yield TreeEntry(name, mode, hexsha)
740 def cmp_entry((name1, value1), (name2, value2)):
741 """Compare two tree entries."""
742 if stat.S_ISDIR(value1[0]):
744 if stat.S_ISDIR(value2[0]):
746 return cmp(name1, name2)
750 """A Git tree object"""
755 __slots__ = ('_entries')
758 super(Tree, self).__init__()
762 def from_path(cls, filename):
763 tree = ShaFile.from_path(filename)
764 if not isinstance(tree, cls):
765 raise NotTreeError(filename)
768 def __contains__(self, name):
769 self._ensure_parsed()
770 return name in self._entries
772 def __getitem__(self, name):
773 self._ensure_parsed()
774 return self._entries[name]
776 def __setitem__(self, name, value):
777 """Set a tree entry by name.
779 :param name: The name of the entry, as a string.
780 :param value: A tuple of (mode, hexsha), where mode is the mode of the
781 entry as an integral type and hexsha is the hex SHA of the entry as
785 self._ensure_parsed()
786 self._entries[name] = (mode, hexsha)
787 self._needs_serialization = True
789 def __delitem__(self, name):
790 self._ensure_parsed()
791 del self._entries[name]
792 self._needs_serialization = True
795 self._ensure_parsed()
796 return len(self._entries)
799 self._ensure_parsed()
800 return iter(self._entries)
802 def add(self, mode, name, hexsha):
803 """Add an entry to the tree.
805 :param mode: The mode of the entry as an integral type. Not all possible
806 modes are supported by git; see check() for details.
807 :param name: The name of the entry, as a string.
808 :param hexsha: The hex SHA of the entry as a string.
810 self._ensure_parsed()
811 self._entries[name] = mode, hexsha
812 self._needs_serialization = True
815 """Return a list of tuples describing the tree entries.
817 :note: The order of the tuples that are returned is different from that
818 returned by the items and iteritems methods. This function will be
819 deprecated in the future.
821 self._ensure_parsed()
822 # The order of this is different from iteritems() for historical
825 (mode, name, hexsha) for (name, mode, hexsha) in self.iteritems()]
828 """Iterate over entries in the order in which they would be serialized.
830 :return: Iterator over (name, mode, sha) tuples
832 self._ensure_parsed()
833 return sorted_tree_items(self._entries)
836 """Return the sorted entries in this tree.
838 :return: List with (name, mode, sha) tuples
840 return list(self.iteritems())
842 def _deserialize(self, chunks):
843 """Grab the entries in the tree"""
845 parsed_entries = parse_tree("".join(chunks))
846 except ValueError, e:
847 raise ObjectFormatException(e)
848 # TODO: list comprehension is for efficiency in the common (small) case;
849 # if memory efficiency in the large case is a concern, use a genexp.
850 self._entries = dict([(n, (m, s)) for n, m, s in parsed_entries])
853 """Check this object for internal consistency.
855 :raise ObjectFormatException: if the object is malformed in some way
857 super(Tree, self).check()
859 allowed_modes = (stat.S_IFREG | 0755, stat.S_IFREG | 0644,
860 stat.S_IFLNK, stat.S_IFDIR, S_IFGITLINK,
861 # TODO: optionally exclude as in git fsck --strict
863 for name, mode, sha in parse_tree("".join(self._chunked_text)):
864 check_hexsha(sha, 'invalid sha %s' % sha)
865 if '/' in name or name in ('', '.', '..'):
866 raise ObjectFormatException('invalid name %s' % name)
868 if mode not in allowed_modes:
869 raise ObjectFormatException('invalid mode %06o' % mode)
871 entry = (name, (mode, sha))
873 if cmp_entry(last, entry) > 0:
874 raise ObjectFormatException('entries not sorted')
876 raise ObjectFormatException('duplicate entry %s' % name)
879 def _serialize(self):
880 return list(serialize_tree(self.iteritems()))
882 def as_pretty_string(self):
884 for name, mode, hexsha in self.iteritems():
885 if mode & stat.S_IFDIR:
889 text.append("%04o %s %s\t%s\n" % (mode, kind, hexsha, name))
893 def parse_timezone(text):
894 """Parse a timezone text fragment (e.g. '+0100').
896 :param text: Text to parse.
897 :return: Tuple with timezone as seconds difference to UTC
898 and a boolean indicating whether this was a UTC timezone
899 prefixed with a negative sign (-0000).
902 negative_utc = (offset == 0 and text[0] == '-')
903 signum = (offset < 0) and -1 or 1
905 hours = int(offset / 100)
906 minutes = (offset % 100)
907 return signum * (hours * 3600 + minutes * 60), negative_utc
910 def format_timezone(offset, negative_utc=False):
911 """Format a timezone for Git serialization.
913 :param offset: Timezone offset as seconds difference to UTC
914 :param negative_utc: Whether to use a minus sign for UTC
915 (-0000 rather than +0000).
918 raise ValueError("Unable to handle non-minute offset.")
919 if offset < 0 or (offset == 0 and negative_utc):
924 return '%c%02d%02d' % (sign, offset / 3600, (offset / 60) % 60)
927 def parse_commit(text):
928 return _parse_tag_or_commit(text)
931 class Commit(ShaFile):
932 """A git commit object"""
937 __slots__ = ('_parents', '_encoding', '_extra', '_author_timezone_neg_utc',
938 '_commit_timezone_neg_utc', '_commit_time',
939 '_author_time', '_author_timezone', '_commit_timezone',
940 '_author', '_committer', '_parents', '_extra',
941 '_encoding', '_tree', '_message')
944 super(Commit, self).__init__()
946 self._encoding = None
948 self._author_timezone_neg_utc = False
949 self._commit_timezone_neg_utc = False
952 def from_path(cls, path):
953 commit = ShaFile.from_path(path)
954 if not isinstance(commit, cls):
955 raise NotCommitError(path)
958 def _deserialize(self, chunks):
962 for field, value in parse_commit("".join(self._chunked_text)):
963 if field == _TREE_HEADER:
965 elif field == _PARENT_HEADER:
966 self._parents.append(value)
967 elif field == _AUTHOR_HEADER:
968 self._author, timetext, timezonetext = value.rsplit(" ", 2)
969 self._author_time = int(timetext)
970 self._author_timezone, self._author_timezone_neg_utc =\
971 parse_timezone(timezonetext)
972 elif field == _COMMITTER_HEADER:
973 self._committer, timetext, timezonetext = value.rsplit(" ", 2)
974 self._commit_time = int(timetext)
975 self._commit_timezone, self._commit_timezone_neg_utc =\
976 parse_timezone(timezonetext)
977 elif field == _ENCODING_HEADER:
978 self._encoding = value
980 self._message = value
982 self._extra.append((field, value))
985 """Check this object for internal consistency.
987 :raise ObjectFormatException: if the object is malformed in some way
989 super(Commit, self).check()
990 self._check_has_member("_tree", "missing tree")
991 self._check_has_member("_author", "missing author")
992 self._check_has_member("_committer", "missing committer")
993 # times are currently checked when set
995 for parent in self._parents:
996 check_hexsha(parent, "invalid parent sha")
997 check_hexsha(self._tree, "invalid tree sha")
999 check_identity(self._author, "invalid author")
1000 check_identity(self._committer, "invalid committer")
1003 for field, _ in parse_commit("".join(self._chunked_text)):
1004 if field == _TREE_HEADER and last is not None:
1005 raise ObjectFormatException("unexpected tree")
1006 elif field == _PARENT_HEADER and last not in (_PARENT_HEADER,
1008 raise ObjectFormatException("unexpected parent")
1009 elif field == _AUTHOR_HEADER and last not in (_TREE_HEADER,
1011 raise ObjectFormatException("unexpected author")
1012 elif field == _COMMITTER_HEADER and last != _AUTHOR_HEADER:
1013 raise ObjectFormatException("unexpected committer")
1014 elif field == _ENCODING_HEADER and last != _COMMITTER_HEADER:
1015 raise ObjectFormatException("unexpected encoding")
1018 # TODO: optionally check for duplicate parents
1020 def _serialize(self):
1022 chunks.append("%s %s\n" % (_TREE_HEADER, self._tree))
1023 for p in self._parents:
1024 chunks.append("%s %s\n" % (_PARENT_HEADER, p))
1025 chunks.append("%s %s %s %s\n" % (
1026 _AUTHOR_HEADER, self._author, str(self._author_time),
1027 format_timezone(self._author_timezone,
1028 self._author_timezone_neg_utc)))
1029 chunks.append("%s %s %s %s\n" % (
1030 _COMMITTER_HEADER, self._committer, str(self._commit_time),
1031 format_timezone(self._commit_timezone,
1032 self._commit_timezone_neg_utc)))
1034 chunks.append("%s %s\n" % (_ENCODING_HEADER, self.encoding))
1035 for k, v in self.extra:
1036 if "\n" in k or "\n" in v:
1037 raise AssertionError("newline in extra data: %r -> %r" % (k, v))
1038 chunks.append("%s %s\n" % (k, v))
1039 chunks.append("\n") # There must be a new line after the headers
1040 chunks.append(self._message)
1043 tree = serializable_property("tree", "Tree that is the state of this commit")
1045 def _get_parents(self):
1046 """Return a list of parents of this commit."""
1047 self._ensure_parsed()
1048 return self._parents
1050 def _set_parents(self, value):
1051 """Set a list of parents of this commit."""
1052 self._ensure_parsed()
1053 self._needs_serialization = True
1054 self._parents = value
1056 parents = property(_get_parents, _set_parents)
1058 def _get_extra(self):
1059 """Return extra settings of this commit."""
1060 self._ensure_parsed()
1063 extra = property(_get_extra)
1065 author = serializable_property("author",
1066 "The name of the author of the commit")
1068 committer = serializable_property("committer",
1069 "The name of the committer of the commit")
1071 message = serializable_property("message",
1072 "The commit message")
1074 commit_time = serializable_property("commit_time",
1075 "The timestamp of the commit. As the number of seconds since the epoch.")
1077 commit_timezone = serializable_property("commit_timezone",
1078 "The zone the commit time is in")
1080 author_time = serializable_property("author_time",
1081 "The timestamp the commit was written. as the number of seconds since the epoch.")
1083 author_timezone = serializable_property("author_timezone",
1084 "Returns the zone the author time is in.")
1086 encoding = serializable_property("encoding",
1087 "Encoding of the commit message.")
1099 for cls in OBJECT_CLASSES:
1100 _TYPE_MAP[cls.type_name] = cls
1101 _TYPE_MAP[cls.type_num] = cls
1105 # Hold on to the pure-python implementations for testing
1106 _parse_tree_py = parse_tree
1107 _sorted_tree_items_py = sorted_tree_items
1109 # Try to import C versions
1110 from dulwich._objects import parse_tree, sorted_tree_items