1 # repo.py -- For dealing wih git repositories.
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; version 2
8 # of the License or (at your option) any later version of
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
22 """Repository access."""
28 from dulwich.errors import (
38 from dulwich.file import (
42 from dulwich.object_store import (
45 from dulwich.objects import (
59 REFSDIR_HEADS = 'heads'
60 INDEX_FILENAME = "index"
68 [REFSDIR, REFSDIR_TAGS],
69 [REFSDIR, REFSDIR_HEADS],
75 def read_info_refs(f):
77 for l in f.readlines():
78 (sha, name) = l.rstrip("\n").split("\t", 1)
83 def check_ref_format(refname):
84 """Check if a refname is correctly formatted.
86 Implements all the same rules as git-check-ref-format[1].
88 [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
90 :param refname: The refname to check
91 :return: True if refname is valid, False otherwise
93 # These could be combined into one big expression, but are listed separately
95 if '/.' in refname or refname.startswith('.'):
97 if '/' not in refname:
102 if ord(c) < 040 or c in '\177 ~^:?*[':
104 if refname[-1] in '/.':
106 if refname.endswith('.lock'):
115 class RefsContainer(object):
116 """A container for refs."""
118 def set_ref(self, name, other):
119 """Make a ref point at another ref.
121 :param name: Name of the ref to set
122 :param other: Name of the ref to point at
124 self[name] = SYMREF + other + '\n'
126 def get_packed_refs(self):
127 """Get contents of the packed-refs file.
129 :return: Dictionary mapping ref names to SHA1s
131 :note: Will return an empty dictionary when no packed-refs file is
134 raise NotImplementedError(self.get_packed_refs)
136 def get_peeled(self, name):
137 """Return the cached peeled value of a ref, if available.
139 :param name: Name of the ref to peel
140 :return: The peeled value of the ref. If the ref is known not point to a
141 tag, this will be the SHA the ref refers to. If the ref may point to
142 a tag, but no cached information is available, None is returned.
146 def import_refs(self, base, other):
147 for name, value in other.iteritems():
148 self["%s/%s" % (base, name)] = value
150 def keys(self, base=None):
151 """Refs present in this container.
153 :param base: An optional base to return refs under
154 :return: An unsorted set of valid refs in this container, including
158 return self.subkeys(base)
160 return self.allkeys()
162 def subkeys(self, base):
164 for refname in self.allkeys():
165 if refname.startswith(base):
169 def as_dict(self, base=None):
170 """Return the contents of this container as a dictionary.
174 keys = self.keys(base)
179 ret[key] = self[("%s/%s" % (base, key)).strip("/")]
181 continue # Unable to resolve
185 def _check_refname(self, name):
186 """Ensure a refname is valid and lives in refs or is HEAD.
188 HEAD is not a valid refname according to git-check-ref-format, but this
189 class needs to be able to touch HEAD. Also, check_ref_format expects
190 refnames without the leading 'refs/', but this class requires that
191 so it cannot touch anything outside the refs dir (or HEAD).
193 :param name: The name of the reference.
194 :raises KeyError: if a refname is not HEAD or is otherwise not valid.
198 if not name.startswith('refs/') or not check_ref_format(name[5:]):
201 def read_loose_ref(self, name):
202 """Read a loose reference and return its contents.
204 :param name: the refname to read
205 :return: The contents of the ref file, or None if it does
208 raise NotImplementedError(self.read_loose_ref)
210 def _follow(self, name):
211 """Follow a reference name.
213 :return: a tuple of (refname, sha), where refname is the name of the
214 last reference in the symbolic reference chain
216 self._check_refname(name)
217 contents = SYMREF + name
219 while contents.startswith(SYMREF):
220 refname = contents[len(SYMREF):]
221 contents = self.read_loose_ref(refname)
223 contents = self.get_packed_refs().get(refname, None)
229 return refname, contents
231 def __getitem__(self, name):
232 """Get the SHA1 for a reference name.
234 This method follows all symbolic references.
236 _, sha = self._follow(name)
242 class DictRefsContainer(RefsContainer):
244 def __init__(self, refs):
248 return self._refs.keys()
250 def read_loose_ref(self, name):
251 return self._refs[name]
254 class DiskRefsContainer(RefsContainer):
255 """Refs container that reads refs from disk."""
257 def __init__(self, path):
259 self._packed_refs = None
260 self._peeled_refs = None
263 return "%s(%r)" % (self.__class__.__name__, self.path)
265 def subkeys(self, base):
267 path = self.refpath(base)
268 for root, dirs, files in os.walk(path):
269 dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
270 for filename in files:
271 refname = ("%s/%s" % (dir, filename)).strip("/")
272 # check_ref_format requires at least one /, so we prepend the
273 # base before calling it.
274 if check_ref_format("%s/%s" % (base, refname)):
276 for key in self.get_packed_refs():
277 if key.startswith(base):
278 keys.add(key[len(base):].strip("/"))
283 if os.path.exists(self.refpath("HEAD")):
285 path = self.refpath("")
286 for root, dirs, files in os.walk(self.refpath("refs")):
287 dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
288 for filename in files:
289 refname = ("%s/%s" % (dir, filename)).strip("/")
290 if check_ref_format(refname):
292 keys.update(self.get_packed_refs())
295 def refpath(self, name):
296 """Return the disk path of a ref.
299 if os.path.sep != "/":
300 name = name.replace("/", os.path.sep)
301 return os.path.join(self.path, name)
303 def get_packed_refs(self):
304 """Get contents of the packed-refs file.
306 :return: Dictionary mapping ref names to SHA1s
308 :note: Will return an empty dictionary when no packed-refs file is
311 # TODO: invalidate the cache on repacking
312 if self._packed_refs is None:
313 self._packed_refs = {}
314 path = os.path.join(self.path, 'packed-refs')
316 f = GitFile(path, 'rb')
318 if e.errno == errno.ENOENT:
322 first_line = iter(f).next().rstrip()
323 if (first_line.startswith("# pack-refs") and " peeled" in
325 self._peeled_refs = {}
326 for sha, name, peeled in read_packed_refs_with_peeled(f):
327 self._packed_refs[name] = sha
329 self._peeled_refs[name] = peeled
332 for sha, name in read_packed_refs(f):
333 self._packed_refs[name] = sha
336 return self._packed_refs
338 def get_peeled(self, name):
339 """Return the cached peeled value of a ref, if available.
341 :param name: Name of the ref to peel
342 :return: The peeled value of the ref. If the ref is known not point to a
343 tag, this will be the SHA the ref refers to. If the ref may point to
344 a tag, but no cached information is available, None is returned.
346 self.get_packed_refs()
347 if self._peeled_refs is None or name not in self._packed_refs:
348 # No cache: no peeled refs were read, or this ref is loose
350 if name in self._peeled_refs:
351 return self._peeled_refs[name]
356 def read_loose_ref(self, name):
357 """Read a reference file and return its contents.
359 If the reference file a symbolic reference, only read the first line of
360 the file. Otherwise, only read the first 40 bytes.
362 :param name: the refname to read, relative to refpath
363 :return: The contents of the ref file, or None if the file does not
365 :raises IOError: if any other error occurs
367 filename = self.refpath(name)
369 f = GitFile(filename, 'rb')
371 header = f.read(len(SYMREF))
373 # Read only the first line
374 return header + iter(f).next().rstrip("\n")
376 # Read only the first 40 bytes
377 return header + f.read(40-len(SYMREF))
381 if e.errno == errno.ENOENT:
385 def _remove_packed_ref(self, name):
386 if self._packed_refs is None:
388 filename = os.path.join(self.path, 'packed-refs')
389 # reread cached refs from disk, while holding the lock
390 f = GitFile(filename, 'wb')
392 self._packed_refs = None
393 self.get_packed_refs()
395 if name not in self._packed_refs:
398 del self._packed_refs[name]
399 if name in self._peeled_refs:
400 del self._peeled_refs[name]
401 write_packed_refs(f, self._packed_refs, self._peeled_refs)
406 def set_if_equals(self, name, old_ref, new_ref):
407 """Set a refname to new_ref only if it currently equals old_ref.
409 This method follows all symbolic references, and can be used to perform
410 an atomic compare-and-swap operation.
412 :param name: The refname to set.
413 :param old_ref: The old sha the refname must refer to, or None to set
415 :param new_ref: The new sha the refname will refer to.
416 :return: True if the set was successful, False otherwise.
419 realname, _ = self._follow(name)
422 filename = self.refpath(realname)
423 ensure_dir_exists(os.path.dirname(filename))
424 f = GitFile(filename, 'wb')
426 if old_ref is not None:
428 # read again while holding the lock
429 orig_ref = self.read_loose_ref(realname)
431 orig_ref = self.get_packed_refs().get(realname, None)
432 if orig_ref != old_ref:
435 except (OSError, IOError):
439 f.write(new_ref+"\n")
440 except (OSError, IOError):
447 def add_if_new(self, name, ref):
448 """Add a new reference only if it does not already exist."""
449 self._check_refname(name)
450 filename = self.refpath(name)
451 ensure_dir_exists(os.path.dirname(filename))
452 f = GitFile(filename, 'wb')
454 if os.path.exists(filename) or name in self.get_packed_refs():
459 except (OSError, IOError):
466 def __setitem__(self, name, ref):
467 """Set a reference name to point to the given SHA1.
469 This method follows all symbolic references.
471 :note: This method unconditionally overwrites the contents of a reference
472 on disk. To update atomically only if the reference has not changed
473 on disk, use set_if_equals().
475 self.set_if_equals(name, None, ref)
477 def remove_if_equals(self, name, old_ref):
478 """Remove a refname only if it currently equals old_ref.
480 This method does not follow symbolic references. It can be used to
481 perform an atomic compare-and-delete operation.
483 :param name: The refname to delete.
484 :param old_ref: The old sha the refname must refer to, or None to delete
486 :return: True if the delete was successful, False otherwise.
488 self._check_refname(name)
489 filename = self.refpath(name)
490 ensure_dir_exists(os.path.dirname(filename))
491 f = GitFile(filename, 'wb')
493 if old_ref is not None:
494 orig_ref = self.read_loose_ref(name)
496 orig_ref = self.get_packed_refs().get(name, None)
497 if orig_ref != old_ref:
503 if e.errno != errno.ENOENT:
505 self._remove_packed_ref(name)
507 # never write, we just wanted the lock
511 def __delitem__(self, name):
514 This method does not follow symbolic references.
515 :note: This method unconditionally deletes the contents of a reference
516 on disk. To delete atomically only if the reference has not changed
517 on disk, use set_if_equals().
519 self.remove_if_equals(name, None)
522 def _split_ref_line(line):
523 """Split a single ref line into a tuple of SHA1 and name."""
524 fields = line.rstrip("\n").split(" ")
526 raise PackedRefsException("invalid ref line '%s'" % line)
530 except (AssertionError, TypeError), e:
531 raise PackedRefsException(e)
532 if not check_ref_format(name):
533 raise PackedRefsException("invalid ref name '%s'" % name)
537 def read_packed_refs(f):
538 """Read a packed refs file.
540 Yields tuples with SHA1s and ref names.
542 :param f: file-like object to read from
549 raise PackedRefsException(
550 "found peeled ref in packed-refs without peeled")
551 yield _split_ref_line(l)
554 def read_packed_refs_with_peeled(f):
555 """Read a packed refs file including peeled refs.
557 Assumes the "# pack-refs with: peeled" line was already read. Yields tuples
558 with ref names, SHA1s, and peeled SHA1s (or None).
560 :param f: file-like object to read from, seek'ed to the second line
569 raise PackedRefsException("unexpected peeled ref line")
572 except (AssertionError, TypeError), e:
573 raise PackedRefsException(e)
574 sha, name = _split_ref_line(last)
576 yield (sha, name, l[1:])
579 sha, name = _split_ref_line(last)
580 yield (sha, name, None)
583 sha, name = _split_ref_line(last)
584 yield (sha, name, None)
587 def write_packed_refs(f, packed_refs, peeled_refs=None):
588 """Write a packed refs file.
590 :param f: empty file-like object to write to
591 :param packed_refs: dict of refname to sha of packed refs to write
592 :param peeled_refs: dict of refname to peeled value of sha
594 if peeled_refs is None:
597 f.write('# pack-refs with: peeled\n')
598 for refname in sorted(packed_refs.iterkeys()):
599 f.write('%s %s\n' % (packed_refs[refname], refname))
600 if refname in peeled_refs:
601 f.write('^%s\n' % peeled_refs[refname])
604 class BaseRepo(object):
605 """Base class for a git repository.
607 :ivar object_store: Dictionary-like object for accessing
609 :ivar refs: Dictionary-like object with the refs in this repository
612 def __init__(self, object_store, refs):
613 self.object_store = object_store
616 def get_named_file(self, path):
617 """Get a file from the control dir with a specific name.
619 Although the filename should be interpreted as a filename relative to
620 the control dir in a disk-baked Repo, the object returned need not be
621 pointing to a file in that location.
623 :param path: The path to the file, relative to the control dir.
624 :return: An open file object, or None if the file does not exist.
626 raise NotImplementedError(self.get_named_file)
628 def open_index(self):
629 """Open the index for this repository.
631 :raises NoIndexPresent: If no index is present
632 :return: Index instance
634 raise NotImplementedError(self.open_index)
636 def fetch(self, target, determine_wants=None, progress=None):
637 """Fetch objects into another repository.
639 :param target: The target repository
640 :param determine_wants: Optional function to determine what refs to
642 :param progress: Optional progress function
644 if determine_wants is None:
645 determine_wants = lambda heads: heads.values()
646 target.object_store.add_objects(
647 self.fetch_objects(determine_wants, target.get_graph_walker(),
649 return self.get_refs()
651 def fetch_objects(self, determine_wants, graph_walker, progress,
653 """Fetch the missing objects required for a set of revisions.
655 :param determine_wants: Function that takes a dictionary with heads
656 and returns the list of heads to fetch.
657 :param graph_walker: Object that can iterate over the list of revisions
658 to fetch and has an "ack" method that will be called to acknowledge
659 that a revision is present.
660 :param progress: Simple progress function that will be called with
661 updated progress strings.
662 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
663 sha for including tags.
664 :return: iterator over objects, with __len__ implemented
666 wants = determine_wants(self.get_refs())
667 haves = self.object_store.find_common_revisions(graph_walker)
668 return self.object_store.iter_shas(
669 self.object_store.find_missing_objects(haves, wants, progress,
672 def get_graph_walker(self, heads=None):
674 heads = self.refs.as_dict('refs/heads').values()
675 return self.object_store.get_graph_walker(heads)
678 """Return the SHA1 a ref is pointing to."""
679 return self.refs[name]
682 """Get dictionary with all refs."""
683 return self.refs.as_dict()
686 """Return the SHA1 pointed at by HEAD."""
687 return self.refs['HEAD']
689 def _get_object(self, sha, cls):
690 assert len(sha) in (20, 40)
691 ret = self.get_object(sha)
692 if ret._type != cls._type:
694 raise NotCommitError(ret)
696 raise NotBlobError(ret)
698 raise NotTreeError(ret)
700 raise NotTagError(ret)
702 raise Exception("Type invalid: %r != %r" % (ret._type, cls._type))
705 def get_object(self, sha):
706 return self.object_store[sha]
708 def get_parents(self, sha):
709 return self.commit(sha).parents
711 def get_config(self):
713 p = ConfigParser.RawConfigParser()
714 p.read(os.path.join(self._controldir, 'config'))
715 return dict((section, dict(p.items(section)))
716 for section in p.sections())
718 def commit(self, sha):
719 return self._get_object(sha, Commit)
722 return self._get_object(sha, Tree)
725 return self._get_object(sha, Tag)
727 def get_peeled(self, ref):
728 """Get the peeled value of a ref.
730 :param ref: the refname to peel
731 :return: the fully-peeled SHA1 of a tag object, after peeling all
732 intermediate tags; if the original ref does not point to a tag, this
733 will equal the original SHA1.
735 cached = self.refs.get_peeled(ref)
736 if cached is not None:
739 obj_type = num_type_map[obj.type]
740 while obj_type == Tag:
741 obj_type, sha = obj.object
742 obj = self.get_object(sha)
745 def get_blob(self, sha):
746 return self._get_object(sha, Blob)
748 def revision_history(self, head):
749 """Returns a list of the commits reachable from head.
751 Returns a list of commit objects. the first of which will be the commit
752 of head, then following theat will be the parents.
754 Raises NotCommitError if any no commits are referenced, including if the
755 head parameter isn't the sha of a commit.
757 XXX: work out how to handle merges.
759 # We build the list backwards, as parents are more likely to be older
761 pending_commits = [head]
763 while pending_commits != []:
764 head = pending_commits.pop(0)
766 commit = self.commit(head)
768 raise MissingCommitError(head)
769 if commit in history:
772 for known_commit in history:
773 if known_commit.commit_time > commit.commit_time:
776 history.insert(i, commit)
777 pending_commits += commit.parents
781 def __getitem__(self, name):
782 if len(name) in (20, 40):
783 return self.object_store[name]
784 return self.object_store[self.refs[name]]
786 def __setitem__(self, name, value):
787 if name.startswith("refs/") or name == "HEAD":
788 if isinstance(value, ShaFile):
789 self.refs[name] = value.id
790 elif isinstance(value, str):
791 self.refs[name] = value
793 raise TypeError(value)
795 raise ValueError(name)
797 def __delitem__(self, name):
798 if name.startswith("refs") or name == "HEAD":
800 raise ValueError(name)
802 def do_commit(self, message, committer=None,
803 author=None, commit_timestamp=None,
804 commit_timezone=None, author_timestamp=None,
805 author_timezone=None, tree=None):
806 """Create a new commit.
808 :param message: Commit message
809 :param committer: Committer fullname
810 :param author: Author fullname (defaults to committer)
811 :param commit_timestamp: Commit timestamp (defaults to now)
812 :param commit_timezone: Commit timestamp timezone (defaults to GMT)
813 :param author_timestamp: Author timestamp (defaults to commit timestamp)
814 :param author_timezone: Author timestamp timezone
815 (defaults to commit timestamp timezone)
816 :param tree: SHA1 of the tree root to use (if not specified the current index will be committed).
817 :return: New commit SHA1
820 index = self.open_index()
823 c.tree = index.commit(self.object_store)
826 # TODO: Allow username to be missing, and get it from .git/config
827 if committer is None:
828 raise ValueError("committer not set")
829 c.committer = committer
830 if commit_timestamp is None:
831 commit_timestamp = time.time()
832 c.commit_time = int(commit_timestamp)
833 if commit_timezone is None:
834 # FIXME: Use current user timezone rather than UTC
836 c.commit_timezone = commit_timezone
840 if author_timestamp is None:
841 author_timestamp = commit_timestamp
842 c.author_time = int(author_timestamp)
843 if author_timezone is None:
844 author_timezone = commit_timezone
845 c.author_timezone = author_timezone
847 self.object_store.add_object(c)
848 self.refs["HEAD"] = c.id
852 class Repo(BaseRepo):
853 """A git repository backed by local disk."""
855 def __init__(self, root):
856 if os.path.isdir(os.path.join(root, ".git", OBJECTDIR)):
858 self._controldir = os.path.join(root, ".git")
859 elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
860 os.path.isdir(os.path.join(root, REFSDIR))):
862 self._controldir = root
864 raise NotGitRepository(root)
866 object_store = DiskObjectStore(
867 os.path.join(self.controldir(), OBJECTDIR))
868 refs = DiskRefsContainer(self.controldir())
869 BaseRepo.__init__(self, object_store, refs)
871 def controldir(self):
872 """Return the path of the control directory."""
873 return self._controldir
875 def _put_named_file(self, path, contents):
876 """Write a file from the control dir with a specific name and contents.
878 f = GitFile(os.path.join(self.controldir(), path), 'wb')
884 def get_named_file(self, path):
885 """Get a file from the control dir with a specific name.
887 Although the filename should be interpreted as a filename relative to
888 the control dir in a disk-baked Repo, the object returned need not be
889 pointing to a file in that location.
891 :param path: The path to the file, relative to the control dir.
892 :return: An open file object, or None if the file does not exist.
895 return open(os.path.join(self.controldir(), path.lstrip('/')), 'rb')
896 except (IOError, OSError), e:
897 if e.errno == errno.ENOENT:
901 def index_path(self):
902 """Return path to the index file."""
903 return os.path.join(self.controldir(), INDEX_FILENAME)
905 def open_index(self):
906 """Open the index for this repository."""
907 from dulwich.index import Index
908 if not self.has_index():
909 raise NoIndexPresent()
910 return Index(self.index_path())
913 """Check if an index is present."""
914 return os.path.exists(self.index_path())
917 return "<Repo at %r>" % self.path
920 def init(cls, path, mkdir=True):
921 controldir = os.path.join(path, ".git")
923 cls.init_bare(controldir)
927 def init_bare(cls, path, mkdir=True):
928 for d in BASE_DIRECTORIES:
929 os.mkdir(os.path.join(path, *d))
931 ret.refs.set_ref("HEAD", "refs/heads/master")
932 ret._put_named_file('description', "Unnamed repository")
933 ret._put_named_file('config', """[core]
934 repositoryformatversion = 0
937 logallrefupdates = true
939 ret._put_named_file(os.path.join('info', 'excludes'), '')