1 # repo.py -- For dealing wih git repositories.
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; version 2
8 # of the License or (at your option) any later version of
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
22 """Repository access."""
28 from dulwich.errors import (
38 from dulwich.file import (
42 from dulwich.object_store import (
45 from dulwich.objects import (
59 REFSDIR_HEADS = 'heads'
60 INDEX_FILENAME = "index"
68 [REFSDIR, REFSDIR_TAGS],
69 [REFSDIR, REFSDIR_HEADS],
75 def read_info_refs(f):
77 for l in f.readlines():
78 (sha, name) = l.rstrip("\n").split("\t", 1)
83 def check_ref_format(refname):
84 """Check if a refname is correctly formatted.
86 Implements all the same rules as git-check-ref-format[1].
88 [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
90 :param refname: The refname to check
91 :return: True if refname is valid, False otherwise
93 # These could be combined into one big expression, but are listed separately
95 if '/.' in refname or refname.startswith('.'):
97 if '/' not in refname:
102 if ord(c) < 040 or c in '\177 ~^:?*[':
104 if refname[-1] in '/.':
106 if refname.endswith('.lock'):
115 class RefsContainer(object):
116 """A container for refs."""
118 def set_ref(self, name, other):
119 """Make a ref point at another ref.
121 :param name: Name of the ref to set
122 :param other: Name of the ref to point at
124 self[name] = SYMREF + other + '\n'
126 def get_packed_refs(self):
127 """Get contents of the packed-refs file.
129 :return: Dictionary mapping ref names to SHA1s
131 :note: Will return an empty dictionary when no packed-refs file is
134 raise NotImplementedError(self.get_packed_refs)
136 def get_peeled(self, name):
137 """Return the cached peeled value of a ref, if available.
139 :param name: Name of the ref to peel
140 :return: The peeled value of the ref. If the ref is known not point to a
141 tag, this will be the SHA the ref refers to. If the ref may point to
142 a tag, but no cached information is available, None is returned.
146 def import_refs(self, base, other):
147 for name, value in other.iteritems():
148 self["%s/%s" % (base, name)] = value
150 def keys(self, base=None):
151 """Refs present in this container.
153 :param base: An optional base to return refs under
154 :return: An unsorted set of valid refs in this container, including
158 return self.subkeys(base)
160 return self.allkeys()
162 def subkeys(self, base):
164 for refname in self.allkeys():
165 if refname.startswith(base):
169 def as_dict(self, base=None):
170 """Return the contents of this container as a dictionary.
174 keys = self.keys(base)
179 ret[key] = self[("%s/%s" % (base, key)).strip("/")]
181 continue # Unable to resolve
185 def _check_refname(self, name):
186 """Ensure a refname is valid and lives in refs or is HEAD.
188 HEAD is not a valid refname according to git-check-ref-format, but this
189 class needs to be able to touch HEAD. Also, check_ref_format expects
190 refnames without the leading 'refs/', but this class requires that
191 so it cannot touch anything outside the refs dir (or HEAD).
193 :param name: The name of the reference.
194 :raises KeyError: if a refname is not HEAD or is otherwise not valid.
198 if not name.startswith('refs/') or not check_ref_format(name[5:]):
201 def read_loose_ref(self, name):
202 """Read a loose reference and return its contents.
204 :param name: the refname to read
205 :return: The contents of the ref file, or None if it does
208 raise NotImplementedError(self.read_loose_ref)
210 def _follow(self, name):
211 """Follow a reference name.
213 :return: a tuple of (refname, sha), where refname is the name of the
214 last reference in the symbolic reference chain
216 self._check_refname(name)
217 contents = SYMREF + name
219 while contents.startswith(SYMREF):
220 refname = contents[len(SYMREF):]
221 contents = self.read_loose_ref(refname)
223 contents = self.get_packed_refs().get(refname, None)
229 return refname, contents
231 def __getitem__(self, name):
232 """Get the SHA1 for a reference name.
234 This method follows all symbolic references.
236 _, sha = self._follow(name)
242 class DictRefsContainer(RefsContainer):
244 def __init__(self, refs):
248 return self._refs.keys()
250 def read_loose_ref(self, name):
251 return self._refs[name]
254 class DiskRefsContainer(RefsContainer):
255 """Refs container that reads refs from disk."""
257 def __init__(self, path):
259 self._packed_refs = None
260 self._peeled_refs = None
263 return "%s(%r)" % (self.__class__.__name__, self.path)
265 def subkeys(self, base):
267 path = self.refpath(base)
268 for root, dirs, files in os.walk(path):
269 dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
270 for filename in files:
271 refname = ("%s/%s" % (dir, filename)).strip("/")
272 # check_ref_format requires at least one /, so we prepend the
273 # base before calling it.
274 if check_ref_format("%s/%s" % (base, refname)):
276 for key in self.get_packed_refs():
277 if key.startswith(base):
278 keys.add(key[len(base):].strip("/"))
283 if os.path.exists(self.refpath("HEAD")):
285 path = self.refpath("")
286 for root, dirs, files in os.walk(self.refpath("refs")):
287 dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
288 for filename in files:
289 refname = ("%s/%s" % (dir, filename)).strip("/")
290 if check_ref_format(refname):
292 keys.update(self.get_packed_refs())
295 def refpath(self, name):
296 """Return the disk path of a ref.
299 if os.path.sep != "/":
300 name = name.replace("/", os.path.sep)
301 return os.path.join(self.path, name)
303 def get_packed_refs(self):
304 """Get contents of the packed-refs file.
306 :return: Dictionary mapping ref names to SHA1s
308 :note: Will return an empty dictionary when no packed-refs file is
311 # TODO: invalidate the cache on repacking
312 if self._packed_refs is None:
313 self._packed_refs = {}
314 path = os.path.join(self.path, 'packed-refs')
316 f = GitFile(path, 'rb')
318 if e.errno == errno.ENOENT:
322 first_line = iter(f).next().rstrip()
323 if (first_line.startswith("# pack-refs") and " peeled" in
325 self._peeled_refs = {}
326 for sha, name, peeled in read_packed_refs_with_peeled(f):
327 self._packed_refs[name] = sha
329 self._peeled_refs[name] = peeled
332 for sha, name in read_packed_refs(f):
333 self._packed_refs[name] = sha
336 return self._packed_refs
338 def get_peeled(self, name):
339 """Return the cached peeled value of a ref, if available.
341 :param name: Name of the ref to peel
342 :return: The peeled value of the ref. If the ref is known not point to a
343 tag, this will be the SHA the ref refers to. If the ref may point to
344 a tag, but no cached information is available, None is returned.
346 self.get_packed_refs()
347 if self._peeled_refs is None or name not in self._packed_refs:
348 # No cache: no peeled refs were read, or this ref is loose
350 if name in self._peeled_refs:
351 return self._peeled_refs[name]
356 def read_loose_ref(self, name):
357 """Read a reference file and return its contents.
359 If the reference file a symbolic reference, only read the first line of
360 the file. Otherwise, only read the first 40 bytes.
362 :param name: the refname to read, relative to refpath
363 :return: The contents of the ref file, or None if the file does not
365 :raises IOError: if any other error occurs
367 filename = self.refpath(name)
369 f = GitFile(filename, 'rb')
371 header = f.read(len(SYMREF))
373 # Read only the first line
374 return header + iter(f).next().rstrip("\n")
376 # Read only the first 40 bytes
377 return header + f.read(40-len(SYMREF))
381 if e.errno == errno.ENOENT:
385 def _remove_packed_ref(self, name):
386 if self._packed_refs is None:
388 filename = os.path.join(self.path, 'packed-refs')
389 # reread cached refs from disk, while holding the lock
390 f = GitFile(filename, 'wb')
392 self._packed_refs = None
393 self.get_packed_refs()
395 if name not in self._packed_refs:
398 del self._packed_refs[name]
399 if name in self._peeled_refs:
400 del self._peeled_refs[name]
401 write_packed_refs(f, self._packed_refs, self._peeled_refs)
406 def set_if_equals(self, name, old_ref, new_ref):
407 """Set a refname to new_ref only if it currently equals old_ref.
409 This method follows all symbolic references, and can be used to perform
410 an atomic compare-and-swap operation.
412 :param name: The refname to set.
413 :param old_ref: The old sha the refname must refer to, or None to set
415 :param new_ref: The new sha the refname will refer to.
416 :return: True if the set was successful, False otherwise.
419 realname, _ = self._follow(name)
422 filename = self.refpath(realname)
423 ensure_dir_exists(os.path.dirname(filename))
424 f = GitFile(filename, 'wb')
426 if old_ref is not None:
428 # read again while holding the lock
429 orig_ref = self.read_loose_ref(realname)
431 orig_ref = self.get_packed_refs().get(realname, None)
432 if orig_ref != old_ref:
435 except (OSError, IOError):
439 f.write(new_ref+"\n")
440 except (OSError, IOError):
447 def add_if_new(self, name, ref):
448 """Add a new reference only if it does not already exist."""
449 self._check_refname(name)
450 filename = self.refpath(name)
451 ensure_dir_exists(os.path.dirname(filename))
452 f = GitFile(filename, 'wb')
454 if os.path.exists(filename) or name in self.get_packed_refs():
459 except (OSError, IOError):
466 def __setitem__(self, name, ref):
467 """Set a reference name to point to the given SHA1.
469 This method follows all symbolic references.
471 :note: This method unconditionally overwrites the contents of a reference
472 on disk. To update atomically only if the reference has not changed
473 on disk, use set_if_equals().
475 self.set_if_equals(name, None, ref)
477 def remove_if_equals(self, name, old_ref):
478 """Remove a refname only if it currently equals old_ref.
480 This method does not follow symbolic references. It can be used to
481 perform an atomic compare-and-delete operation.
483 :param name: The refname to delete.
484 :param old_ref: The old sha the refname must refer to, or None to delete
486 :return: True if the delete was successful, False otherwise.
488 self._check_refname(name)
489 filename = self.refpath(name)
490 ensure_dir_exists(os.path.dirname(filename))
491 f = GitFile(filename, 'wb')
493 if old_ref is not None:
494 orig_ref = self.read_loose_ref(name)
496 orig_ref = self.get_packed_refs().get(name, None)
497 if orig_ref != old_ref:
503 if e.errno != errno.ENOENT:
505 self._remove_packed_ref(name)
507 # never write, we just wanted the lock
511 def __delitem__(self, name):
514 This method does not follow symbolic references.
515 :note: This method unconditionally deletes the contents of a reference
516 on disk. To delete atomically only if the reference has not changed
517 on disk, use set_if_equals().
519 self.remove_if_equals(name, None)
522 def _split_ref_line(line):
523 """Split a single ref line into a tuple of SHA1 and name."""
524 fields = line.rstrip("\n").split(" ")
526 raise PackedRefsException("invalid ref line '%s'" % line)
530 except (AssertionError, TypeError), e:
531 raise PackedRefsException(e)
532 if not check_ref_format(name):
533 raise PackedRefsException("invalid ref name '%s'" % name)
537 def read_packed_refs(f):
538 """Read a packed refs file.
540 Yields tuples with SHA1s and ref names.
542 :param f: file-like object to read from
549 raise PackedRefsException(
550 "found peeled ref in packed-refs without peeled")
551 yield _split_ref_line(l)
554 def read_packed_refs_with_peeled(f):
555 """Read a packed refs file including peeled refs.
557 Assumes the "# pack-refs with: peeled" line was already read. Yields tuples
558 with ref names, SHA1s, and peeled SHA1s (or None).
560 :param f: file-like object to read from, seek'ed to the second line
569 raise PackedRefsException("unexpected peeled ref line")
572 except (AssertionError, TypeError), e:
573 raise PackedRefsException(e)
574 sha, name = _split_ref_line(last)
576 yield (sha, name, l[1:])
579 sha, name = _split_ref_line(last)
580 yield (sha, name, None)
583 sha, name = _split_ref_line(last)
584 yield (sha, name, None)
587 def write_packed_refs(f, packed_refs, peeled_refs=None):
588 """Write a packed refs file.
590 :param f: empty file-like object to write to
591 :param packed_refs: dict of refname to sha of packed refs to write
592 :param peeled_refs: dict of refname to peeled value of sha
594 if peeled_refs is None:
597 f.write('# pack-refs with: peeled\n')
598 for refname in sorted(packed_refs.iterkeys()):
599 f.write('%s %s\n' % (packed_refs[refname], refname))
600 if refname in peeled_refs:
601 f.write('^%s\n' % peeled_refs[refname])
604 class BaseRepo(object):
605 """Base class for a git repository.
607 :ivar object_store: Dictionary-like object for accessing
609 :ivar refs: Dictionary-like object with the refs in this repository
612 def __init__(self, object_store, refs):
613 self.object_store = object_store
616 def get_named_file(self, path):
617 """Get a file from the control dir with a specific name.
619 Although the filename should be interpreted as a filename relative to
620 the control dir in a disk-baked Repo, the object returned need not be
621 pointing to a file in that location.
623 :param path: The path to the file, relative to the control dir.
624 :return: An open file object, or None if the file does not exist.
626 raise NotImplementedError(self.get_named_file)
628 def open_index(self):
629 """Open the index for this repository.
631 :raises NoIndexPresent: If no index is present
632 :return: Index instance
634 raise NotImplementedError(self.open_index)
636 def fetch(self, target, determine_wants=None, progress=None):
637 """Fetch objects into another repository.
639 :param target: The target repository
640 :param determine_wants: Optional function to determine what refs to
642 :param progress: Optional progress function
644 if determine_wants is None:
645 determine_wants = lambda heads: heads.values()
646 target.object_store.add_objects(
647 self.fetch_objects(determine_wants, target.get_graph_walker(),
649 return self.get_refs()
651 def fetch_objects(self, determine_wants, graph_walker, progress):
652 """Fetch the missing objects required for a set of revisions.
654 :param determine_wants: Function that takes a dictionary with heads
655 and returns the list of heads to fetch.
656 :param graph_walker: Object that can iterate over the list of revisions
657 to fetch and has an "ack" method that will be called to acknowledge
658 that a revision is present.
659 :param progress: Simple progress function that will be called with
660 updated progress strings.
661 :return: iterator over objects, with __len__ implemented
663 wants = determine_wants(self.get_refs())
664 haves = self.object_store.find_common_revisions(graph_walker)
665 return self.object_store.iter_shas(
666 self.object_store.find_missing_objects(haves, wants, progress))
668 def get_graph_walker(self, heads=None):
670 heads = self.refs.as_dict('refs/heads').values()
671 return self.object_store.get_graph_walker(heads)
674 """Return the SHA1 a ref is pointing to."""
675 return self.refs[name]
678 """Get dictionary with all refs."""
679 return self.refs.as_dict()
682 """Return the SHA1 pointed at by HEAD."""
683 return self.refs['HEAD']
685 def _get_object(self, sha, cls):
686 assert len(sha) in (20, 40)
687 ret = self.get_object(sha)
688 if ret._type != cls._type:
690 raise NotCommitError(ret)
692 raise NotBlobError(ret)
694 raise NotTreeError(ret)
696 raise NotTagError(ret)
698 raise Exception("Type invalid: %r != %r" % (ret._type, cls._type))
701 def get_object(self, sha):
702 return self.object_store[sha]
704 def get_parents(self, sha):
705 return self.commit(sha).parents
707 def get_config(self):
709 p = ConfigParser.RawConfigParser()
710 p.read(os.path.join(self._controldir, 'config'))
711 return dict((section, dict(p.items(section)))
712 for section in p.sections())
714 def commit(self, sha):
715 return self._get_object(sha, Commit)
718 return self._get_object(sha, Tree)
721 return self._get_object(sha, Tag)
723 def get_peeled(self, ref):
724 """Get the peeled value of a ref.
726 :param ref: the refname to peel
727 :return: the fully-peeled SHA1 of a tag object, after peeling all
728 intermediate tags; if the original ref does not point to a tag, this
729 will equal the original SHA1.
731 cached = self.refs.get_peeled(ref)
732 if cached is not None:
735 obj_type = num_type_map[obj.type]
736 while obj_type == Tag:
737 obj_type, sha = obj.object
738 obj = self.get_object(sha)
741 def get_blob(self, sha):
742 return self._get_object(sha, Blob)
744 def revision_history(self, head):
745 """Returns a list of the commits reachable from head.
747 Returns a list of commit objects. the first of which will be the commit
748 of head, then following theat will be the parents.
750 Raises NotCommitError if any no commits are referenced, including if the
751 head parameter isn't the sha of a commit.
753 XXX: work out how to handle merges.
755 # We build the list backwards, as parents are more likely to be older
757 pending_commits = [head]
759 while pending_commits != []:
760 head = pending_commits.pop(0)
762 commit = self.commit(head)
764 raise MissingCommitError(head)
765 if commit in history:
768 for known_commit in history:
769 if known_commit.commit_time > commit.commit_time:
772 history.insert(i, commit)
773 parents = commit.parents
774 pending_commits += parents
778 def __getitem__(self, name):
779 if len(name) in (20, 40):
780 return self.object_store[name]
781 return self.object_store[self.refs[name]]
783 def __setitem__(self, name, value):
784 if name.startswith("refs/") or name == "HEAD":
785 if isinstance(value, ShaFile):
786 self.refs[name] = value.id
787 elif isinstance(value, str):
788 self.refs[name] = value
790 raise TypeError(value)
791 raise ValueError(name)
793 def __delitem__(self, name):
794 if name.startswith("refs") or name == "HEAD":
796 raise ValueError(name)
798 def do_commit(self, committer, message,
799 author=None, commit_timestamp=None,
800 commit_timezone=None, author_timestamp=None,
801 author_timezone=None, tree=None):
802 """Create a new commit.
804 :param committer: Committer fullname
805 :param message: Commit message
806 :param author: Author fullname (defaults to committer)
807 :param commit_timestamp: Commit timestamp (defaults to now)
808 :param commit_timezone: Commit timestamp timezone (defaults to GMT)
809 :param author_timestamp: Author timestamp (defaults to commit timestamp)
810 :param author_timezone: Author timestamp timezone
811 (defaults to commit timestamp timezone)
812 :param tree: SHA1 of the tree root to use (if not specified the current index will be committed).
813 :return: New commit SHA1
815 from dulwich.index import commit_index
817 index = self.open_index()
820 c.tree = commit_index(self.object_store, index)
823 c.committer = committer
824 if commit_timestamp is None:
825 commit_timestamp = time.time()
826 c.commit_time = int(commit_timestamp)
827 if commit_timezone is None:
829 c.commit_timezone = commit_timezone
833 if author_timestamp is None:
834 author_timestamp = commit_timestamp
835 c.author_time = int(author_timestamp)
836 if author_timezone is None:
837 author_timezone = commit_timezone
838 c.author_timezone = author_timezone
840 self.object_store.add_object(c)
841 self.refs["HEAD"] = c.id
845 class Repo(BaseRepo):
846 """A git repository backed by local disk."""
848 def __init__(self, root):
849 if os.path.isdir(os.path.join(root, ".git", OBJECTDIR)):
851 self._controldir = os.path.join(root, ".git")
852 elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
853 os.path.isdir(os.path.join(root, REFSDIR))):
855 self._controldir = root
857 raise NotGitRepository(root)
859 object_store = DiskObjectStore(
860 os.path.join(self.controldir(), OBJECTDIR))
861 refs = DiskRefsContainer(self.controldir())
862 BaseRepo.__init__(self, object_store, refs)
864 def controldir(self):
865 """Return the path of the control directory."""
866 return self._controldir
868 def _put_named_file(self, path, contents):
869 """Write a file from the control dir with a specific name and contents.
871 f = GitFile(os.path.join(self.controldir(), path), 'wb')
877 def get_named_file(self, path):
878 """Get a file from the control dir with a specific name.
880 Although the filename should be interpreted as a filename relative to
881 the control dir in a disk-baked Repo, the object returned need not be
882 pointing to a file in that location.
884 :param path: The path to the file, relative to the control dir.
885 :return: An open file object, or None if the file does not exist.
888 return open(os.path.join(self.controldir(), path.lstrip('/')), 'rb')
889 except (IOError, OSError), e:
890 if e.errno == errno.ENOENT:
894 def index_path(self):
895 """Return path to the index file."""
896 return os.path.join(self.controldir(), INDEX_FILENAME)
898 def open_index(self):
899 """Open the index for this repository."""
900 from dulwich.index import Index
901 if not self.has_index():
902 raise NoIndexPresent()
903 return Index(self.index_path())
906 """Check if an index is present."""
907 return os.path.exists(self.index_path())
910 return "<Repo at %r>" % self.path
913 def init(cls, path, mkdir=True):
914 controldir = os.path.join(path, ".git")
916 cls.init_bare(controldir)
920 def init_bare(cls, path, mkdir=True):
921 for d in BASE_DIRECTORIES:
922 os.mkdir(os.path.join(path, *d))
924 ret.refs.set_ref("HEAD", "refs/heads/master")
925 ret._put_named_file('description', "Unnamed repository")
926 ret._put_named_file('config', """[core]
927 repositoryformatversion = 0
930 logallrefupdates = true
932 ret._put_named_file(os.path.join('info', 'excludes'), '')