# repo.py -- For dealing wih git repositories.
# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
-# Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
+# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; version 2
-# of the License.
+# of the License or (at your option) any later version of
+# the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
+
+"""Repository access."""
+
+
+import ConfigParser
+import errno
import os
-from commit import Commit
-from errors import (
- MissingCommitError,
- NotBlobError,
- NotCommitError,
- NotGitRepository,
- NotTreeError,
- )
-from object_store import ObjectStore
-from objects import (
- ShaFile,
- Commit,
- Tree,
- Blob,
- )
+from dulwich.errors import (
+ MissingCommitError,
+ NoIndexPresent,
+ NotBlobError,
+ NotCommitError,
+ NotGitRepository,
+ NotTreeError,
+ PackedRefsException,
+ )
+from dulwich.file import (
+ ensure_dir_exists,
+ GitFile,
+ )
+from dulwich.object_store import (
+ DiskObjectStore,
+ )
+from dulwich.objects import (
+ Blob,
+ Commit,
+ ShaFile,
+ Tag,
+ Tree,
+ hex_to_sha,
+ )
OBJECTDIR = 'objects'
SYMREF = 'ref: '
+REFSDIR = 'refs'
+REFSDIR_TAGS = 'tags'
+REFSDIR_HEADS = 'heads'
+INDEX_FILENAME = "index"
+
+BASE_DIRECTORIES = [
+ [OBJECTDIR],
+ [OBJECTDIR, "info"],
+ [OBJECTDIR, "pack"],
+ ["branches"],
+ [REFSDIR],
+ [REFSDIR, REFSDIR_TAGS],
+ [REFSDIR, REFSDIR_HEADS],
+ ["hooks"],
+ ["info"]
+ ]
+
+
+def read_info_refs(f):
+ ret = {}
+ for l in f.readlines():
+ (sha, name) = l.rstrip("\n").split("\t", 1)
+ ret[name] = sha
+ return ret
-class Tags(object):
+def check_ref_format(refname):
+ """Check if a refname is correctly formatted.
- def __init__(self, tags):
- self.tags = tags
+ Implements all the same rules as git-check-ref-format[1].
- def __getitem__(self, name):
- return self.tags[name]
+ [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
- def __len__(self):
- return len(self.tags)
+ :param refname: The refname to check
+ :return: True if refname is valid, False otherwise
+ """
+ # These could be combined into one big expression, but are listed separately
+ # to parallel [1].
+ if '/.' in refname or refname.startswith('.'):
+ return False
+ if '/' not in refname:
+ return False
+ if '..' in refname:
+ return False
+ for c in refname:
+ if ord(c) < 040 or c in '\177 ~^:?*[':
+ return False
+ if refname[-1] in '/.':
+ return False
+ if refname.endswith('.lock'):
+ return False
+ if '@{' in refname:
+ return False
+ if '\\' in refname:
+ return False
+ return True
+
+
+class RefsContainer(object):
+ """A container for refs."""
+
+ def set_ref(self, name, other):
+ """Make a ref point at another ref.
+
+ :param name: Name of the ref to set
+ :param other: Name of the ref to point at
+ """
+ self[name] = SYMREF + other + '\n'
+
+ def get_packed_refs(self):
+ """Get contents of the packed-refs file.
+
+ :return: Dictionary mapping ref names to SHA1s
+
+ :note: Will return an empty dictionary when no packed-refs file is
+ present.
+ """
+ raise NotImplementedError(self.get_packed_refs)
+
+ def import_refs(self, base, other):
+ for name, value in other.iteritems():
+ self["%s/%s" % (base, name)] = value
+
+ def keys(self, base=None):
+ """Refs present in this container.
+
+ :param base: An optional base to return refs under
+ :return: An unsorted set of valid refs in this container, including
+ packed refs.
+ """
+ if base is not None:
+ return self.subkeys(base)
+ else:
+ return self.allkeys()
+
+ def subkeys(self, base):
+ keys = set()
+ for refname in self.allkeys():
+ if refname.startswith(base):
+ keys.add(refname)
+ return keys
+
+ def as_dict(self, base=None):
+ """Return the contents of this container as a dictionary.
+
+ """
+ ret = {}
+ keys = self.keys(base)
+ if base is None:
+ base = ""
+ for key in keys:
+ try:
+ ret[key] = self[("%s/%s" % (base, key)).strip("/")]
+ except KeyError:
+ continue # Unable to resolve
+
+ return ret
+
+ def _check_refname(self, name):
+ """Ensure a refname is valid and lives in refs or is HEAD.
+
+ HEAD is not a valid refname according to git-check-ref-format, but this
+ class needs to be able to touch HEAD. Also, check_ref_format expects
+ refnames without the leading 'refs/', but this class requires that
+ so it cannot touch anything outside the refs dir (or HEAD).
+
+ :param name: The name of the reference.
+ :raises KeyError: if a refname is not HEAD or is otherwise not valid.
+ """
+ if name == 'HEAD':
+ return
+ if not name.startswith('refs/') or not check_ref_format(name[5:]):
+ raise KeyError(name)
+
+ def read_loose_ref(self, name):
+ """Read a loose reference and return its contents.
+
+ :param name: the refname to read
+ :return: The contents of the ref file, or None if it does
+ not exist.
+ """
+ raise NotImplementedError(self.read_loose_ref)
+
+ def _follow(self, name):
+ """Follow a reference name.
+
+ :return: a tuple of (refname, sha), where refname is the name of the
+ last reference in the symbolic reference chain
+ """
+ self._check_refname(name)
+ contents = SYMREF + name
+ depth = 0
+ while contents.startswith(SYMREF):
+ refname = contents[len(SYMREF):]
+ contents = self.read_loose_ref(refname)
+ if not contents:
+ contents = self.get_packed_refs().get(refname, None)
+ if not contents:
+ break
+ depth += 1
+ if depth > 5:
+ raise KeyError(name)
+ return refname, contents
- def iteritems(self):
- for k in self.tags:
- yield k, self[k]
+ def __getitem__(self, name):
+ """Get the SHA1 for a reference name.
+
+ This method follows all symbolic references.
+ """
+ _, sha = self._follow(name)
+ if sha is None:
+ raise KeyError(name)
+ return sha
+
+
+class DictRefsContainer(RefsContainer):
+
+ def __init__(self, refs):
+ self._refs = refs
+
+ def allkeys(self):
+ return self._refs.keys()
+
+ def read_loose_ref(self, name):
+ return self._refs[name]
+
+
+class DiskRefsContainer(RefsContainer):
+ """Refs container that reads refs from disk."""
+
+ def __init__(self, path):
+ self.path = path
+ self._packed_refs = None
+ self._peeled_refs = {}
+
+ def __repr__(self):
+ return "%s(%r)" % (self.__class__.__name__, self.path)
+
+ def subkeys(self, base):
+ keys = set()
+ path = self.refpath(base)
+ for root, dirs, files in os.walk(path):
+ dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
+ for filename in files:
+ refname = ("%s/%s" % (dir, filename)).strip("/")
+ # check_ref_format requires at least one /, so we prepend the
+ # base before calling it.
+ if check_ref_format("%s/%s" % (base, refname)):
+ keys.add(refname)
+ for key in self.get_packed_refs():
+ if key.startswith(base):
+ keys.add(key[len(base):].strip("/"))
+ return keys
+
+ def allkeys(self):
+ keys = set()
+ if os.path.exists(self.refpath("HEAD")):
+ keys.add("HEAD")
+ path = self.refpath("")
+ for root, dirs, files in os.walk(self.refpath("refs")):
+ dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
+ for filename in files:
+ refname = ("%s/%s" % (dir, filename)).strip("/")
+ if check_ref_format(refname):
+ keys.add(refname)
+ keys.update(self.get_packed_refs())
+ return keys
+
+ def refpath(self, name):
+ """Return the disk path of a ref.
+
+ """
+ if os.path.sep != "/":
+ name = name.replace("/", os.path.sep)
+ return os.path.join(self.path, name)
+
+ def get_packed_refs(self):
+ """Get contents of the packed-refs file.
+
+ :return: Dictionary mapping ref names to SHA1s
+
+ :note: Will return an empty dictionary when no packed-refs file is
+ present.
+ """
+ # TODO: invalidate the cache on repacking
+ if self._packed_refs is None:
+ self._packed_refs = {}
+ path = os.path.join(self.path, 'packed-refs')
+ try:
+ f = GitFile(path, 'rb')
+ except IOError, e:
+ if e.errno == errno.ENOENT:
+ return {}
+ raise
+ try:
+ first_line = iter(f).next().rstrip()
+ if (first_line.startswith("# pack-refs") and " peeled" in
+ first_line):
+ for sha, name, peeled in read_packed_refs_with_peeled(f):
+ self._packed_refs[name] = sha
+ if peeled:
+ self._peeled_refs[name] = peeled
+ else:
+ f.seek(0)
+ for sha, name in read_packed_refs(f):
+ self._packed_refs[name] = sha
+ finally:
+ f.close()
+ return self._packed_refs
+
+ def read_loose_ref(self, name):
+ """Read a reference file and return its contents.
+
+ If the reference file a symbolic reference, only read the first line of
+ the file. Otherwise, only read the first 40 bytes.
+
+ :param name: the refname to read, relative to refpath
+ :return: The contents of the ref file, or None if the file does not
+ exist.
+ :raises IOError: if any other error occurs
+ """
+ filename = self.refpath(name)
+ try:
+ f = GitFile(filename, 'rb')
+ try:
+ header = f.read(len(SYMREF))
+ if header == SYMREF:
+ # Read only the first line
+ return header + iter(f).next().rstrip("\n")
+ else:
+ # Read only the first 40 bytes
+ return header + f.read(40-len(SYMREF))
+ finally:
+ f.close()
+ except IOError, e:
+ if e.errno == errno.ENOENT:
+ return None
+ raise
+
+ def _remove_packed_ref(self, name):
+ if self._packed_refs is None:
+ return
+ filename = os.path.join(self.path, 'packed-refs')
+ # reread cached refs from disk, while holding the lock
+ f = GitFile(filename, 'wb')
+ try:
+ self._packed_refs = None
+ self.get_packed_refs()
+
+ if name not in self._packed_refs:
+ return
+
+ del self._packed_refs[name]
+ if name in self._peeled_refs:
+ del self._peeled_refs[name]
+ write_packed_refs(f, self._packed_refs, self._peeled_refs)
+ f.close()
+ finally:
+ f.abort()
+
+ def set_if_equals(self, name, old_ref, new_ref):
+ """Set a refname to new_ref only if it currently equals old_ref.
+
+ This method follows all symbolic references, and can be used to perform
+ an atomic compare-and-swap operation.
+
+ :param name: The refname to set.
+ :param old_ref: The old sha the refname must refer to, or None to set
+ unconditionally.
+ :param new_ref: The new sha the refname will refer to.
+ :return: True if the set was successful, False otherwise.
+ """
+ try:
+ realname, _ = self._follow(name)
+ except KeyError:
+ realname = name
+ filename = self.refpath(realname)
+ ensure_dir_exists(os.path.dirname(filename))
+ f = GitFile(filename, 'wb')
+ try:
+ if old_ref is not None:
+ try:
+ # read again while holding the lock
+ orig_ref = self.read_loose_ref(realname)
+ if orig_ref is None:
+ orig_ref = self.get_packed_refs().get(realname, None)
+ if orig_ref != old_ref:
+ f.abort()
+ return False
+ except (OSError, IOError):
+ f.abort()
+ raise
+ try:
+ f.write(new_ref+"\n")
+ except (OSError, IOError):
+ f.abort()
+ raise
+ finally:
+ f.close()
+ return True
+
+ def add_if_new(self, name, ref):
+ """Add a new reference only if it does not already exist."""
+ self._check_refname(name)
+ filename = self.refpath(name)
+ ensure_dir_exists(os.path.dirname(filename))
+ f = GitFile(filename, 'wb')
+ try:
+ if os.path.exists(filename) or name in self.get_packed_refs():
+ f.abort()
+ return False
+ try:
+ f.write(ref+"\n")
+ except (OSError, IOError):
+ f.abort()
+ raise
+ finally:
+ f.close()
+ return True
+
+ def __setitem__(self, name, ref):
+ """Set a reference name to point to the given SHA1.
+
+ This method follows all symbolic references.
+
+ :note: This method unconditionally overwrites the contents of a reference
+ on disk. To update atomically only if the reference has not changed
+ on disk, use set_if_equals().
+ """
+ self.set_if_equals(name, None, ref)
+
+ def remove_if_equals(self, name, old_ref):
+ """Remove a refname only if it currently equals old_ref.
+
+ This method does not follow symbolic references. It can be used to
+ perform an atomic compare-and-delete operation.
+
+ :param name: The refname to delete.
+ :param old_ref: The old sha the refname must refer to, or None to delete
+ unconditionally.
+ :return: True if the delete was successful, False otherwise.
+ """
+ self._check_refname(name)
+ filename = self.refpath(name)
+ ensure_dir_exists(os.path.dirname(filename))
+ f = GitFile(filename, 'wb')
+ try:
+ if old_ref is not None:
+ orig_ref = self.read_loose_ref(name)
+ if orig_ref is None:
+ orig_ref = self.get_packed_refs().get(name, None)
+ if orig_ref != old_ref:
+ return False
+ # may only be packed
+ try:
+ os.remove(filename)
+ except OSError, e:
+ if e.errno != errno.ENOENT:
+ raise
+ self._remove_packed_ref(name)
+ finally:
+ # never write, we just wanted the lock
+ f.abort()
+ return True
+
+ def __delitem__(self, name):
+ """Remove a refname.
+
+ This method does not follow symbolic references.
+ :note: This method unconditionally deletes the contents of a reference
+ on disk. To delete atomically only if the reference has not changed
+ on disk, use set_if_equals().
+ """
+ self.remove_if_equals(name, None)
+
+
+def _split_ref_line(line):
+ """Split a single ref line into a tuple of SHA1 and name."""
+ fields = line.rstrip("\n").split(" ")
+ if len(fields) != 2:
+ raise PackedRefsException("invalid ref line '%s'" % line)
+ sha, name = fields
+ try:
+ hex_to_sha(sha)
+ except (AssertionError, TypeError), e:
+ raise PackedRefsException(e)
+ if not check_ref_format(name):
+ raise PackedRefsException("invalid ref name '%s'" % name)
+ return (sha, name)
-class Repo(object):
+def read_packed_refs(f):
+ """Read a packed refs file.
- ref_locs = ['', 'refs', 'refs/tags', 'refs/heads', 'refs/remotes']
+ Yields tuples with SHA1s and ref names.
- def __init__(self, root):
- if os.path.isdir(os.path.join(root, ".git", "objects")):
- self.bare = False
- self._controldir = os.path.join(root, ".git")
- elif os.path.isdir(os.path.join(root, "objects")):
- self.bare = True
- self._controldir = root
- else:
- raise NotGitRepository(root)
- self.path = root
- self.tags = Tags(self.get_tags())
- self._object_store = None
-
- def controldir(self):
- return self._controldir
-
- def find_missing_objects(self, determine_wants, graph_walker, progress):
- """Fetch the missing objects required for a set of revisions.
-
- :param determine_wants: Function that takes a dictionary with heads
- and returns the list of heads to fetch.
- :param graph_walker: Object that can iterate over the list of revisions
- to fetch and has an "ack" method that will be called to acknowledge
- that a revision is present.
- :param progress: Simple progress function that will be called with
- updated progress strings.
+ :param f: file-like object to read from
"""
- wants = determine_wants(self.get_refs())
- commits_to_send = set(wants)
- sha_done = set()
- ref = graph_walker.next()
- while ref:
- sha_done.add(ref)
- if ref in self.object_store:
- graph_walker.ack(ref)
- ref = graph_walker.next()
- while commits_to_send:
- sha = commits_to_send.pop()
- if sha in sha_done:
+ for l in f:
+ if l[0] == "#":
+ # Comment
continue
+ if l[0] == "^":
+ raise PackedRefsException(
+ "found peeled ref in packed-refs without peeled")
+ yield _split_ref_line(l)
- c = self.commit(sha)
- assert isinstance(c, Commit)
- sha_done.add(sha)
-
- commits_to_send.update([p for p in c.parents if not p in sha_done])
-
- def parse_tree(tree, sha_done):
- for mode, name, x in tree.entries():
- if not x in sha_done:
- try:
- t = self.tree(x)
- sha_done.add(x)
- parse_tree(t, sha_done)
- except:
- sha_done.add(x)
-
- treesha = c.tree
- if treesha not in sha_done:
- t = self.tree(treesha)
- sha_done.add(treesha)
- parse_tree(t, sha_done)
-
- progress("counting objects: %d\r" % len(sha_done))
- return sha_done
-
- def fetch_objects(self, determine_wants, graph_walker, progress):
- """Fetch the missing objects required for a set of revisions.
-
- :param determine_wants: Function that takes a dictionary with heads
- and returns the list of heads to fetch.
- :param graph_walker: Object that can iterate over the list of revisions
- to fetch and has an "ack" method that will be called to acknowledge
- that a revision is present.
- :param progress: Simple progress function that will be called with
- updated progress strings.
- """
- shas = self.find_missing_objects(determine_wants, graph_walker, progress)
- for sha in shas:
- yield self.get_object(sha)
-
- def object_dir(self):
- return os.path.join(self.controldir(), OBJECTDIR)
-
- @property
- def object_store(self):
- if self._object_store is None:
- self._object_store = ObjectStore(self.object_dir())
- return self._object_store
-
- def pack_dir(self):
- return os.path.join(self.object_dir(), PACKDIR)
-
- def _get_ref(self, file):
- f = open(file, 'rb')
- try:
- contents = f.read()
- if contents.startswith(SYMREF):
- ref = contents[len(SYMREF):]
- if ref[-1] == '\n':
- ref = ref[:-1]
- return self.ref(ref)
- assert len(contents) == 41, 'Invalid ref in %s' % file
- return contents[:-1]
- finally:
- f.close()
-
- def ref(self, name):
- for dir in self.ref_locs:
- file = os.path.join(self.controldir(), dir, name)
- if os.path.exists(file):
- return self._get_ref(file)
-
- def get_refs(self):
- ret = {}
- if self.head():
- ret['HEAD'] = self.head()
- for dir in ["refs/heads", "refs/tags"]:
- for name in os.listdir(os.path.join(self.controldir(), dir)):
- path = os.path.join(self.controldir(), dir, name)
- if os.path.isfile(path):
- ret["/".join([dir, name])] = self._get_ref(path)
- return ret
- def set_ref(self, name, value):
- file = os.path.join(self.controldir(), name)
- open(file, 'w').write(value+"\n")
+def read_packed_refs_with_peeled(f):
+ """Read a packed refs file including peeled refs.
- def remove_ref(self, name):
- file = os.path.join(self.controldir(), name)
- if os.path.exists(file):
- os.remove(file)
- return
+ Assumes the "# pack-refs with: peeled" line was already read. Yields tuples
+ with ref names, SHA1s, and peeled SHA1s (or None).
- def get_tags(self):
- ret = {}
- for root, dirs, files in os.walk(os.path.join(self.controldir(), 'refs', 'tags')):
- for name in files:
- ret[name] = self._get_ref(os.path.join(root, name))
- return ret
-
- def heads(self):
- ret = {}
- for root, dirs, files in os.walk(os.path.join(self.controldir(), 'refs', 'heads')):
- for name in files:
- ret[name] = self._get_ref(os.path.join(root, name))
- return ret
-
- def head(self):
- return self.ref('HEAD')
-
- def _get_object(self, sha, cls):
- assert len(sha) in (20, 40)
- ret = self.get_object(sha)
- if ret._type != cls._type:
- if cls is Commit:
- raise NotCommitError(ret)
- elif cls is Blob:
- raise NotBlobError(ret)
- elif cls is Tree:
- raise NotTreeError(ret)
+ :param f: file-like object to read from, seek'ed to the second line
+ """
+ last = None
+ for l in f:
+ if l[0] == "#":
+ continue
+ l = l.rstrip("\n")
+ if l[0] == "^":
+ if not last:
+ raise PackedRefsException("unexpected peeled ref line")
+ try:
+ hex_to_sha(l[1:])
+ except (AssertionError, TypeError), e:
+ raise PackedRefsException(e)
+ sha, name = _split_ref_line(last)
+ last = None
+ yield (sha, name, l[1:])
else:
- raise Exception("Type invalid: %r != %r" % (ret._type, cls._type))
- return ret
+ if last:
+ sha, name = _split_ref_line(last)
+ yield (sha, name, None)
+ last = l
+ if last:
+ sha, name = _split_ref_line(last)
+ yield (sha, name, None)
- def get_object(self, sha):
- return self.object_store[sha]
- def get_parents(self, sha):
- return self.commit(sha).parents
+def write_packed_refs(f, packed_refs, peeled_refs=None):
+ """Write a packed refs file.
- def commit(self, sha):
- return self._get_object(sha, Commit)
-
- def tree(self, sha):
- return self._get_object(sha, Tree)
-
- def get_blob(self, sha):
- return self._get_object(sha, Blob)
-
- def revision_history(self, head):
- """Returns a list of the commits reachable from head.
+ :param f: empty file-like object to write to
+ :param packed_refs: dict of refname to sha of packed refs to write
+ """
+ if peeled_refs is None:
+ peeled_refs = {}
+ else:
+ f.write('# pack-refs with: peeled\n')
+ for refname in sorted(packed_refs.iterkeys()):
+ f.write('%s %s\n' % (packed_refs[refname], refname))
+ if refname in peeled_refs:
+ f.write('^%s\n' % peeled_refs[refname])
- Returns a list of commit objects. the first of which will be the commit
- of head, then following theat will be the parents.
- Raises NotCommitError if any no commits are referenced, including if the
- head parameter isn't the sha of a commit.
+class BaseRepo(object):
+ """Base class for a git repository.
- XXX: work out how to handle merges.
+ :ivar object_store: Dictionary-like object for accessing
+ the objects
+ :ivar refs: Dictionary-like object with the refs in this repository
"""
- # We build the list backwards, as parents are more likely to be older
- # than children
- pending_commits = [head]
- history = []
- while pending_commits != []:
- head = pending_commits.pop(0)
- try:
- commit = self.commit(head)
- except KeyError:
- raise MissingCommitError(head)
- if commit in history:
- continue
- i = 0
- for known_commit in history:
- if known_commit.commit_time > commit.commit_time:
- break
- i += 1
- history.insert(i, commit)
- parents = commit.parents
- pending_commits += parents
- history.reverse()
- return history
-
- def __repr__(self):
- return "<Repo at %r>" % self.path
-
- @classmethod
- def init(cls, path, mkdir=True):
- controldir = os.path.join(path, ".git")
- os.mkdir(controldir)
- cls.init_bare(controldir)
-
- @classmethod
- def init_bare(cls, path, mkdir=True):
- for d in [["objects"],
- ["objects", "info"],
- ["objects", "pack"],
- ["branches"],
- ["refs"],
- ["refs", "tags"],
- ["refs", "heads"],
- ["hooks"],
- ["info"]]:
- os.mkdir(os.path.join(path, *d))
- open(os.path.join(path, 'HEAD'), 'w').write("ref: refs/heads/master\n")
- open(os.path.join(path, 'description'), 'w').write("Unnamed repository")
- open(os.path.join(path, 'info', 'excludes'), 'w').write("")
-
- create = init_bare
-
+ def __init__(self, object_store, refs):
+ self.object_store = object_store
+ self.refs = refs
+
+ def get_named_file(self, path):
+ """Get a file from the control dir with a specific name.
+
+ Although the filename should be interpreted as a filename relative to
+ the control dir in a disk-baked Repo, the object returned need not be
+ pointing to a file in that location.
+
+ :param path: The path to the file, relative to the control dir.
+ :return: An open file object, or None if the file does not exist.
+ """
+ raise NotImplementedError(self.get_named_file)
+
+ def open_index(self):
+ """Open the index for this repository.
+
+ :raises NoIndexPresent: If no index is present
+ :return: Index instance
+ """
+ raise NotImplementedError(self.open_index)
+
+ def fetch(self, target, determine_wants=None, progress=None):
+ """Fetch objects into another repository.
+
+ :param target: The target repository
+ :param determine_wants: Optional function to determine what refs to
+ fetch.
+ :param progress: Optional progress function
+ """
+ if determine_wants is None:
+ determine_wants = lambda heads: heads.values()
+ target.object_store.add_objects(
+ self.fetch_objects(determine_wants, target.get_graph_walker(),
+ progress))
+ return self.get_refs()
+
+ def fetch_objects(self, determine_wants, graph_walker, progress):
+ """Fetch the missing objects required for a set of revisions.
+
+ :param determine_wants: Function that takes a dictionary with heads
+ and returns the list of heads to fetch.
+ :param graph_walker: Object that can iterate over the list of revisions
+ to fetch and has an "ack" method that will be called to acknowledge
+ that a revision is present.
+ :param progress: Simple progress function that will be called with
+ updated progress strings.
+ :return: iterator over objects, with __len__ implemented
+ """
+ wants = determine_wants(self.get_refs())
+ haves = self.object_store.find_common_revisions(graph_walker)
+ return self.object_store.iter_shas(
+ self.object_store.find_missing_objects(haves, wants, progress))
+
+ def get_graph_walker(self, heads=None):
+ if heads is None:
+ heads = self.refs.as_dict('refs/heads').values()
+ return self.object_store.get_graph_walker(heads)
+
+ def ref(self, name):
+ """Return the SHA1 a ref is pointing to."""
+ return self.refs[name]
+
+ def get_refs(self):
+ """Get dictionary with all refs."""
+ return self.refs.as_dict()
+
+ def head(self):
+ """Return the SHA1 pointed at by HEAD."""
+ return self.refs['HEAD']
+
+ def _get_object(self, sha, cls):
+ assert len(sha) in (20, 40)
+ ret = self.get_object(sha)
+ if ret._type != cls._type:
+ if cls is Commit:
+ raise NotCommitError(ret)
+ elif cls is Blob:
+ raise NotBlobError(ret)
+ elif cls is Tree:
+ raise NotTreeError(ret)
+ else:
+ raise Exception("Type invalid: %r != %r" % (ret._type, cls._type))
+ return ret
+
+ def get_object(self, sha):
+ return self.object_store[sha]
+
+ def get_parents(self, sha):
+ return self.commit(sha).parents
+
+ def get_config(self):
+ p = ConfigParser.RawConfigParser()
+ p.read(os.path.join(self._controldir, 'config'))
+ return dict((section, dict(p.items(section))) for section in p.sections())
+
+ def commit(self, sha):
+ return self._get_object(sha, Commit)
+
+ def tree(self, sha):
+ return self._get_object(sha, Tree)
+
+ def tag(self, sha):
+ return self._get_object(sha, Tag)
+
+ def get_blob(self, sha):
+ return self._get_object(sha, Blob)
+
+ def revision_history(self, head):
+ """Returns a list of the commits reachable from head.
+
+ Returns a list of commit objects. the first of which will be the commit
+ of head, then following theat will be the parents.
+
+ Raises NotCommitError if any no commits are referenced, including if the
+ head parameter isn't the sha of a commit.
+
+ XXX: work out how to handle merges.
+ """
+ # We build the list backwards, as parents are more likely to be older
+ # than children
+ pending_commits = [head]
+ history = []
+ while pending_commits != []:
+ head = pending_commits.pop(0)
+ try:
+ commit = self.commit(head)
+ except KeyError:
+ raise MissingCommitError(head)
+ if commit in history:
+ continue
+ i = 0
+ for known_commit in history:
+ if known_commit.commit_time > commit.commit_time:
+ break
+ i += 1
+ history.insert(i, commit)
+ parents = commit.parents
+ pending_commits += parents
+ history.reverse()
+ return history
+ def __getitem__(self, name):
+ if len(name) in (20, 40):
+ return self.object_store[name]
+ return self.object_store[self.refs[name]]
+
+ def __setitem__(self, name, value):
+ if name.startswith("refs/") or name == "HEAD":
+ if isinstance(value, ShaFile):
+ self.refs[name] = value.id
+ elif isinstance(value, str):
+ self.refs[name] = value
+ else:
+ raise TypeError(value)
+ raise ValueError(name)
+
+ def __delitem__(self, name):
+ if name.startswith("refs") or name == "HEAD":
+ del self.refs[name]
+ raise ValueError(name)
+
+ def do_commit(self, committer, message,
+ author=None, commit_timestamp=None,
+ commit_timezone=None, author_timestamp=None,
+ author_timezone=None, tree=None):
+ """Create a new commit.
+
+ :param committer: Committer fullname
+ :param message: Commit message
+ :param author: Author fullname (defaults to committer)
+ :param commit_timestamp: Commit timestamp (defaults to now)
+ :param commit_timezone: Commit timestamp timezone (defaults to GMT)
+ :param author_timestamp: Author timestamp (defaults to commit timestamp)
+ :param author_timezone: Author timestamp timezone
+ (defaults to commit timestamp timezone)
+ :param tree: SHA1 of the tree root to use (if not specified the current index will be committed).
+ :return: New commit SHA1
+ """
+ from dulwich.index import commit_index
+ import time
+ index = self.open_index()
+ c = Commit()
+ if tree is None:
+ c.tree = commit_index(self.object_store, index)
+ else:
+ c.tree = tree
+ c.committer = committer
+ if commit_timestamp is None:
+ commit_timestamp = time.time()
+ c.commit_time = int(commit_timestamp)
+ if commit_timezone is None:
+ commit_timezone = 0
+ c.commit_timezone = commit_timezone
+ if author is None:
+ author = committer
+ c.author = author
+ if author_timestamp is None:
+ author_timestamp = commit_timestamp
+ c.author_time = int(author_timestamp)
+ if author_timezone is None:
+ author_timezone = commit_timezone
+ c.author_timezone = author_timezone
+ c.message = message
+ self.object_store.add_object(c)
+ self.refs["HEAD"] = c.id
+ return c.id
+
+
+class Repo(BaseRepo):
+ """A git repository backed by local disk."""
+
+ def __init__(self, root):
+ if os.path.isdir(os.path.join(root, ".git", OBJECTDIR)):
+ self.bare = False
+ self._controldir = os.path.join(root, ".git")
+ elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
+ os.path.isdir(os.path.join(root, REFSDIR))):
+ self.bare = True
+ self._controldir = root
+ else:
+ raise NotGitRepository(root)
+ self.path = root
+ object_store = DiskObjectStore(
+ os.path.join(self.controldir(), OBJECTDIR))
+ refs = DiskRefsContainer(self.controldir())
+ BaseRepo.__init__(self, object_store, refs)
+
+ def controldir(self):
+ """Return the path of the control directory."""
+ return self._controldir
+
+ def _put_named_file(self, path, contents):
+ """Write a file from the control dir with a specific name and contents.
+ """
+ f = GitFile(os.path.join(self.controldir(), path), 'wb')
+ try:
+ f.write(contents)
+ finally:
+ f.close()
+
+ def get_named_file(self, path):
+ """Get a file from the control dir with a specific name.
+
+ Although the filename should be interpreted as a filename relative to
+ the control dir in a disk-baked Repo, the object returned need not be
+ pointing to a file in that location.
+
+ :param path: The path to the file, relative to the control dir.
+ :return: An open file object, or None if the file does not exist.
+ """
+ try:
+ return open(os.path.join(self.controldir(), path.lstrip('/')), 'rb')
+ except (IOError, OSError), e:
+ if e.errno == errno.ENOENT:
+ return None
+ raise
+
+ def index_path(self):
+ """Return path to the index file."""
+ return os.path.join(self.controldir(), INDEX_FILENAME)
+
+ def open_index(self):
+ """Open the index for this repository."""
+ from dulwich.index import Index
+ if not self.has_index():
+ raise NoIndexPresent()
+ return Index(self.index_path())
+
+ def has_index(self):
+ """Check if an index is present."""
+ return os.path.exists(self.index_path())
+
+ def __repr__(self):
+ return "<Repo at %r>" % self.path
+
+ @classmethod
+ def init(cls, path, mkdir=True):
+ controldir = os.path.join(path, ".git")
+ os.mkdir(controldir)
+ cls.init_bare(controldir)
+ return cls(path)
+
+ @classmethod
+ def init_bare(cls, path, mkdir=True):
+ for d in BASE_DIRECTORIES:
+ os.mkdir(os.path.join(path, *d))
+ ret = cls(path)
+ ret.refs.set_ref("HEAD", "refs/heads/master")
+ ret._put_named_file('description', "Unnamed repository")
+ ret._put_named_file('config', """[core]
+ repositoryformatversion = 0
+ filemode = true
+ bare = false
+ logallrefupdates = true
+""")
+ ret._put_named_file(os.path.join('info', 'excludes'), '')
+ return ret
+
+ create = init_bare