1 # repo.py -- For dealing wih git repositories.
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; version 2
8 # of the License or (at your option) any later version of
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
22 """Repository access."""
28 from dulwich.errors import (
37 from dulwich.file import (
41 from dulwich.object_store import (
44 from dulwich.objects import (
57 REFSDIR_HEADS = 'heads'
58 INDEX_FILENAME = "index"
66 [REFSDIR, REFSDIR_TAGS],
67 [REFSDIR, REFSDIR_HEADS],
73 def read_info_refs(f):
75 for l in f.readlines():
76 (sha, name) = l.rstrip("\n").split("\t", 1)
81 def check_ref_format(refname):
82 """Check if a refname is correctly formatted.
84 Implements all the same rules as git-check-ref-format[1].
86 [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
88 :param refname: The refname to check
89 :return: True if refname is valid, False otherwise
91 # These could be combined into one big expression, but are listed separately
93 if '/.' in refname or refname.startswith('.'):
95 if '/' not in refname:
100 if ord(c) < 040 or c in '\177 ~^:?*[':
102 if refname[-1] in '/.':
104 if refname.endswith('.lock'):
113 class RefsContainer(object):
114 """A container for refs."""
116 def set_ref(self, name, other):
117 """Make a ref point at another ref.
119 :param name: Name of the ref to set
120 :param other: Name of the ref to point at
122 self[name] = SYMREF + other + '\n'
124 def get_packed_refs(self):
125 """Get contents of the packed-refs file.
127 :return: Dictionary mapping ref names to SHA1s
129 :note: Will return an empty dictionary when no packed-refs file is
132 raise NotImplementedError(self.get_packed_refs)
134 def import_refs(self, base, other):
135 for name, value in other.iteritems():
136 self["%s/%s" % (base, name)] = value
138 def keys(self, base=None):
139 """Refs present in this container.
141 :param base: An optional base to return refs under
142 :return: An unsorted set of valid refs in this container, including
146 return self.subkeys(base)
148 return self.allkeys()
150 def subkeys(self, base):
152 for refname in self.allkeys():
153 if refname.startswith(base):
157 def as_dict(self, base=None):
158 """Return the contents of this container as a dictionary.
162 keys = self.keys(base)
167 ret[key] = self[("%s/%s" % (base, key)).strip("/")]
169 continue # Unable to resolve
173 def _check_refname(self, name):
174 """Ensure a refname is valid and lives in refs or is HEAD.
176 HEAD is not a valid refname according to git-check-ref-format, but this
177 class needs to be able to touch HEAD. Also, check_ref_format expects
178 refnames without the leading 'refs/', but this class requires that
179 so it cannot touch anything outside the refs dir (or HEAD).
181 :param name: The name of the reference.
182 :raises KeyError: if a refname is not HEAD or is otherwise not valid.
186 if not name.startswith('refs/') or not check_ref_format(name[5:]):
189 def read_loose_ref(self, name):
190 """Read a loose reference and return its contents.
192 :param name: the refname to read
193 :return: The contents of the ref file, or None if it does
196 raise NotImplementedError(self.read_loose_ref)
198 def _follow(self, name):
199 """Follow a reference name.
201 :return: a tuple of (refname, sha), where refname is the name of the
202 last reference in the symbolic reference chain
204 self._check_refname(name)
205 contents = SYMREF + name
207 while contents.startswith(SYMREF):
208 refname = contents[len(SYMREF):]
209 contents = self.read_loose_ref(refname)
211 contents = self.get_packed_refs().get(refname, None)
217 return refname, contents
219 def __getitem__(self, name):
220 """Get the SHA1 for a reference name.
222 This method follows all symbolic references.
224 _, sha = self._follow(name)
230 class DictRefsContainer(RefsContainer):
232 def __init__(self, refs):
236 return self._refs.keys()
238 def read_loose_ref(self, name):
239 return self._refs[name]
242 class DiskRefsContainer(RefsContainer):
243 """Refs container that reads refs from disk."""
245 def __init__(self, path):
247 self._packed_refs = None
248 self._peeled_refs = {}
251 return "%s(%r)" % (self.__class__.__name__, self.path)
253 def subkeys(self, base):
255 path = self.refpath(base)
256 for root, dirs, files in os.walk(path):
257 dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
258 for filename in files:
259 refname = ("%s/%s" % (dir, filename)).strip("/")
260 # check_ref_format requires at least one /, so we prepend the
261 # base before calling it.
262 if check_ref_format("%s/%s" % (base, refname)):
264 for key in self.get_packed_refs():
265 if key.startswith(base):
266 keys.add(key[len(base):].strip("/"))
271 if os.path.exists(self.refpath("HEAD")):
273 path = self.refpath("")
274 for root, dirs, files in os.walk(self.refpath("refs")):
275 dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
276 for filename in files:
277 refname = ("%s/%s" % (dir, filename)).strip("/")
278 if check_ref_format(refname):
280 keys.update(self.get_packed_refs())
283 def refpath(self, name):
284 """Return the disk path of a ref.
287 if os.path.sep != "/":
288 name = name.replace("/", os.path.sep)
289 return os.path.join(self.path, name)
291 def get_packed_refs(self):
292 """Get contents of the packed-refs file.
294 :return: Dictionary mapping ref names to SHA1s
296 :note: Will return an empty dictionary when no packed-refs file is
299 # TODO: invalidate the cache on repacking
300 if self._packed_refs is None:
301 self._packed_refs = {}
302 path = os.path.join(self.path, 'packed-refs')
304 f = GitFile(path, 'rb')
306 if e.errno == errno.ENOENT:
310 first_line = iter(f).next().rstrip()
311 if (first_line.startswith("# pack-refs") and " peeled" in
313 for sha, name, peeled in read_packed_refs_with_peeled(f):
314 self._packed_refs[name] = sha
316 self._peeled_refs[name] = peeled
319 for sha, name in read_packed_refs(f):
320 self._packed_refs[name] = sha
323 return self._packed_refs
325 def read_loose_ref(self, name):
326 """Read a reference file and return its contents.
328 If the reference file a symbolic reference, only read the first line of
329 the file. Otherwise, only read the first 40 bytes.
331 :param name: the refname to read, relative to refpath
332 :return: The contents of the ref file, or None if the file does not
334 :raises IOError: if any other error occurs
336 filename = self.refpath(name)
338 f = GitFile(filename, 'rb')
340 header = f.read(len(SYMREF))
342 # Read only the first line
343 return header + iter(f).next().rstrip("\n")
345 # Read only the first 40 bytes
346 return header + f.read(40-len(SYMREF))
350 if e.errno == errno.ENOENT:
354 def _remove_packed_ref(self, name):
355 if self._packed_refs is None:
357 filename = os.path.join(self.path, 'packed-refs')
358 # reread cached refs from disk, while holding the lock
359 f = GitFile(filename, 'wb')
361 self._packed_refs = None
362 self.get_packed_refs()
364 if name not in self._packed_refs:
367 del self._packed_refs[name]
368 if name in self._peeled_refs:
369 del self._peeled_refs[name]
370 write_packed_refs(f, self._packed_refs, self._peeled_refs)
375 def set_if_equals(self, name, old_ref, new_ref):
376 """Set a refname to new_ref only if it currently equals old_ref.
378 This method follows all symbolic references, and can be used to perform
379 an atomic compare-and-swap operation.
381 :param name: The refname to set.
382 :param old_ref: The old sha the refname must refer to, or None to set
384 :param new_ref: The new sha the refname will refer to.
385 :return: True if the set was successful, False otherwise.
388 realname, _ = self._follow(name)
391 filename = self.refpath(realname)
392 ensure_dir_exists(os.path.dirname(filename))
393 f = GitFile(filename, 'wb')
395 if old_ref is not None:
397 # read again while holding the lock
398 orig_ref = self.read_loose_ref(realname)
400 orig_ref = self.get_packed_refs().get(realname, None)
401 if orig_ref != old_ref:
404 except (OSError, IOError):
408 f.write(new_ref+"\n")
409 except (OSError, IOError):
416 def add_if_new(self, name, ref):
417 """Add a new reference only if it does not already exist."""
418 self._check_refname(name)
419 filename = self.refpath(name)
420 ensure_dir_exists(os.path.dirname(filename))
421 f = GitFile(filename, 'wb')
423 if os.path.exists(filename) or name in self.get_packed_refs():
428 except (OSError, IOError):
435 def __setitem__(self, name, ref):
436 """Set a reference name to point to the given SHA1.
438 This method follows all symbolic references.
440 :note: This method unconditionally overwrites the contents of a reference
441 on disk. To update atomically only if the reference has not changed
442 on disk, use set_if_equals().
444 self.set_if_equals(name, None, ref)
446 def remove_if_equals(self, name, old_ref):
447 """Remove a refname only if it currently equals old_ref.
449 This method does not follow symbolic references. It can be used to
450 perform an atomic compare-and-delete operation.
452 :param name: The refname to delete.
453 :param old_ref: The old sha the refname must refer to, or None to delete
455 :return: True if the delete was successful, False otherwise.
457 self._check_refname(name)
458 filename = self.refpath(name)
459 ensure_dir_exists(os.path.dirname(filename))
460 f = GitFile(filename, 'wb')
462 if old_ref is not None:
463 orig_ref = self.read_loose_ref(name)
465 orig_ref = self.get_packed_refs().get(name, None)
466 if orig_ref != old_ref:
472 if e.errno != errno.ENOENT:
474 self._remove_packed_ref(name)
476 # never write, we just wanted the lock
480 def __delitem__(self, name):
483 This method does not follow symbolic references.
484 :note: This method unconditionally deletes the contents of a reference
485 on disk. To delete atomically only if the reference has not changed
486 on disk, use set_if_equals().
488 self.remove_if_equals(name, None)
491 def _split_ref_line(line):
492 """Split a single ref line into a tuple of SHA1 and name."""
493 fields = line.rstrip("\n").split(" ")
495 raise PackedRefsException("invalid ref line '%s'" % line)
499 except (AssertionError, TypeError), e:
500 raise PackedRefsException(e)
501 if not check_ref_format(name):
502 raise PackedRefsException("invalid ref name '%s'" % name)
506 def read_packed_refs(f):
507 """Read a packed refs file.
509 Yields tuples with SHA1s and ref names.
511 :param f: file-like object to read from
518 raise PackedRefsException(
519 "found peeled ref in packed-refs without peeled")
520 yield _split_ref_line(l)
523 def read_packed_refs_with_peeled(f):
524 """Read a packed refs file including peeled refs.
526 Assumes the "# pack-refs with: peeled" line was already read. Yields tuples
527 with ref names, SHA1s, and peeled SHA1s (or None).
529 :param f: file-like object to read from, seek'ed to the second line
538 raise PackedRefsException("unexpected peeled ref line")
541 except (AssertionError, TypeError), e:
542 raise PackedRefsException(e)
543 sha, name = _split_ref_line(last)
545 yield (sha, name, l[1:])
548 sha, name = _split_ref_line(last)
549 yield (sha, name, None)
552 sha, name = _split_ref_line(last)
553 yield (sha, name, None)
556 def write_packed_refs(f, packed_refs, peeled_refs=None):
557 """Write a packed refs file.
559 :param f: empty file-like object to write to
560 :param packed_refs: dict of refname to sha of packed refs to write
562 if peeled_refs is None:
565 f.write('# pack-refs with: peeled\n')
566 for refname in sorted(packed_refs.iterkeys()):
567 f.write('%s %s\n' % (packed_refs[refname], refname))
568 if refname in peeled_refs:
569 f.write('^%s\n' % peeled_refs[refname])
572 class BaseRepo(object):
573 """Base class for a git repository.
575 :ivar object_store: Dictionary-like object for accessing
577 :ivar refs: Dictionary-like object with the refs in this repository
580 def __init__(self, object_store, refs):
581 self.object_store = object_store
584 def get_named_file(self, path):
585 """Get a file from the control dir with a specific name.
587 Although the filename should be interpreted as a filename relative to
588 the control dir in a disk-baked Repo, the object returned need not be
589 pointing to a file in that location.
591 :param path: The path to the file, relative to the control dir.
592 :return: An open file object, or None if the file does not exist.
594 raise NotImplementedError(self.get_named_file)
596 def open_index(self):
597 """Open the index for this repository.
599 :raises NoIndexPresent: If no index is present
600 :return: Index instance
602 raise NotImplementedError(self.open_index)
604 def fetch(self, target, determine_wants=None, progress=None):
605 """Fetch objects into another repository.
607 :param target: The target repository
608 :param determine_wants: Optional function to determine what refs to
610 :param progress: Optional progress function
612 if determine_wants is None:
613 determine_wants = lambda heads: heads.values()
614 target.object_store.add_objects(
615 self.fetch_objects(determine_wants, target.get_graph_walker(),
617 return self.get_refs()
619 def fetch_objects(self, determine_wants, graph_walker, progress):
620 """Fetch the missing objects required for a set of revisions.
622 :param determine_wants: Function that takes a dictionary with heads
623 and returns the list of heads to fetch.
624 :param graph_walker: Object that can iterate over the list of revisions
625 to fetch and has an "ack" method that will be called to acknowledge
626 that a revision is present.
627 :param progress: Simple progress function that will be called with
628 updated progress strings.
629 :return: iterator over objects, with __len__ implemented
631 wants = determine_wants(self.get_refs())
632 haves = self.object_store.find_common_revisions(graph_walker)
633 return self.object_store.iter_shas(
634 self.object_store.find_missing_objects(haves, wants, progress))
636 def get_graph_walker(self, heads=None):
638 heads = self.refs.as_dict('refs/heads').values()
639 return self.object_store.get_graph_walker(heads)
642 """Return the SHA1 a ref is pointing to."""
643 return self.refs[name]
646 """Get dictionary with all refs."""
647 return self.refs.as_dict()
650 """Return the SHA1 pointed at by HEAD."""
651 return self.refs['HEAD']
653 def _get_object(self, sha, cls):
654 assert len(sha) in (20, 40)
655 ret = self.get_object(sha)
656 if ret._type != cls._type:
658 raise NotCommitError(ret)
660 raise NotBlobError(ret)
662 raise NotTreeError(ret)
664 raise Exception("Type invalid: %r != %r" % (ret._type, cls._type))
667 def get_object(self, sha):
668 return self.object_store[sha]
670 def get_parents(self, sha):
671 return self.commit(sha).parents
673 def get_config(self):
674 from configobj import ConfigObj
675 return ConfigObj(os.path.join(self._controldir, 'config'))
677 def commit(self, sha):
678 return self._get_object(sha, Commit)
681 return self._get_object(sha, Tree)
684 return self._get_object(sha, Tag)
686 def get_blob(self, sha):
687 return self._get_object(sha, Blob)
689 def revision_history(self, head):
690 """Returns a list of the commits reachable from head.
692 Returns a list of commit objects. the first of which will be the commit
693 of head, then following theat will be the parents.
695 Raises NotCommitError if any no commits are referenced, including if the
696 head parameter isn't the sha of a commit.
698 XXX: work out how to handle merges.
700 # We build the list backwards, as parents are more likely to be older
702 pending_commits = [head]
704 while pending_commits != []:
705 head = pending_commits.pop(0)
707 commit = self.commit(head)
709 raise MissingCommitError(head)
710 if commit in history:
713 for known_commit in history:
714 if known_commit.commit_time > commit.commit_time:
717 history.insert(i, commit)
718 parents = commit.parents
719 pending_commits += parents
723 def __getitem__(self, name):
724 if len(name) in (20, 40):
725 return self.object_store[name]
726 return self.object_store[self.refs[name]]
728 def __setitem__(self, name, value):
729 if name.startswith("refs/") or name == "HEAD":
730 if isinstance(value, ShaFile):
731 self.refs[name] = value.id
732 elif isinstance(value, str):
733 self.refs[name] = value
735 raise TypeError(value)
736 raise ValueError(name)
738 def __delitem__(self, name):
739 if name.startswith("refs") or name == "HEAD":
741 raise ValueError(name)
743 def do_commit(self, committer, message,
744 author=None, commit_timestamp=None,
745 commit_timezone=None, author_timestamp=None,
746 author_timezone=None, tree=None):
747 """Create a new commit.
749 :param committer: Committer fullname
750 :param message: Commit message
751 :param author: Author fullname (defaults to committer)
752 :param commit_timestamp: Commit timestamp (defaults to now)
753 :param commit_timezone: Commit timestamp timezone (defaults to GMT)
754 :param author_timestamp: Author timestamp (defaults to commit timestamp)
755 :param author_timezone: Author timestamp timezone
756 (defaults to commit timestamp timezone)
757 :param tree: SHA1 of the tree root to use (if not specified the current index will be committed).
758 :return: New commit SHA1
760 from dulwich.index import commit_index
762 index = self.open_index()
765 c.tree = commit_index(self.object_store, index)
768 c.committer = committer
769 if commit_timestamp is None:
770 commit_timestamp = time.time()
771 c.commit_time = int(commit_timestamp)
772 if commit_timezone is None:
774 c.commit_timezone = commit_timezone
778 if author_timestamp is None:
779 author_timestamp = commit_timestamp
780 c.author_time = int(author_timestamp)
781 if author_timezone is None:
782 author_timezone = commit_timezone
783 c.author_timezone = author_timezone
785 self.object_store.add_object(c)
786 self.refs["HEAD"] = c.id
790 class Repo(BaseRepo):
791 """A git repository backed by local disk."""
793 def __init__(self, root):
794 if os.path.isdir(os.path.join(root, ".git", OBJECTDIR)):
796 self._controldir = os.path.join(root, ".git")
797 elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
798 os.path.isdir(os.path.join(root, REFSDIR))):
800 self._controldir = root
802 raise NotGitRepository(root)
804 object_store = DiskObjectStore(
805 os.path.join(self.controldir(), OBJECTDIR))
806 refs = DiskRefsContainer(self.controldir())
807 BaseRepo.__init__(self, object_store, refs)
809 def controldir(self):
810 """Return the path of the control directory."""
811 return self._controldir
813 def _put_named_file(self, path, contents):
814 """Write a file from the control dir with a specific name and contents.
816 f = GitFile(os.path.join(self.controldir(), path), 'wb')
822 def get_named_file(self, path):
823 """Get a file from the control dir with a specific name.
825 Although the filename should be interpreted as a filename relative to
826 the control dir in a disk-baked Repo, the object returned need not be
827 pointing to a file in that location.
829 :param path: The path to the file, relative to the control dir.
830 :return: An open file object, or None if the file does not exist.
833 return open(os.path.join(self.controldir(), path.lstrip('/')), 'rb')
834 except (IOError, OSError), e:
835 if e.errno == errno.ENOENT:
839 def index_path(self):
840 """Return path to the index file."""
841 return os.path.join(self.controldir(), INDEX_FILENAME)
843 def open_index(self):
844 """Open the index for this repository."""
845 from dulwich.index import Index
846 if not self.has_index():
847 raise NoIndexPresent()
848 return Index(self.index_path())
851 """Check if an index is present."""
852 return os.path.exists(self.index_path())
855 return "<Repo at %r>" % self.path
858 def init(cls, path, mkdir=True):
859 controldir = os.path.join(path, ".git")
861 cls.init_bare(controldir)
865 def init_bare(cls, path, mkdir=True):
866 for d in BASE_DIRECTORIES:
867 os.mkdir(os.path.join(path, *d))
869 ret.refs.set_ref("HEAD", "refs/heads/master")
870 ret._put_named_file('description', "Unnamed repository")
871 ret._put_named_file('config', """[core]
872 repositoryformatversion = 0
875 logallrefupdates = true
877 ret._put_named_file(os.path.join('info', 'excludes'), '')