1 # repo.py -- For dealing wih git repositories.
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; version 2
8 # of the License or (at your option) any later version of
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
22 """Repository access."""
28 from dulwich.errors import (
37 from dulwich.file import (
41 from dulwich.object_store import (
44 from dulwich.objects import (
57 REFSDIR_HEADS = 'heads'
58 INDEX_FILENAME = "index"
66 [REFSDIR, REFSDIR_TAGS],
67 [REFSDIR, REFSDIR_HEADS],
73 def check_ref_format(refname):
74 """Check if a refname is correctly formatted.
76 Implements all the same rules as git-check-ref-format[1].
78 [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
80 :param refname: The refname to check
81 :return: True if refname is valid, False otherwise
83 # These could be combined into one big expression, but are listed separately
85 if '/.' in refname or refname.startswith('.'):
87 if '/' not in refname:
92 if ord(c) < 040 or c in '\177 ~^:?*[':
94 if refname[-1] in '/.':
96 if refname.endswith('.lock'):
105 class RefsContainer(object):
106 """A container for refs."""
108 def set_ref(self, name, other):
109 """Make a ref point at another ref.
111 :param name: Name of the ref to set
112 :param other: Name of the ref to point at
114 self[name] = SYMREF + other + '\n'
116 def get_packed_refs(self):
117 """Get contents of the packed-refs file.
119 :return: Dictionary mapping ref names to SHA1s
121 :note: Will return an empty dictionary when no packed-refs file is
124 raise NotImplementedError(self.get_packed_refs)
126 def import_refs(self, base, other):
127 for name, value in other.iteritems():
128 self["%s/%s" % (base, name)] = value
130 def keys(self, base=None):
131 """Refs present in this container.
133 :param base: An optional base to return refs under
134 :return: An unsorted set of valid refs in this container, including
138 return self.subkeys(base)
140 return self.allkeys()
142 def as_dict(self, base=None):
143 """Return the contents of this container as a dictionary.
147 keys = self.keys(base)
152 ret[key] = self[("%s/%s" % (base, key)).strip("/")]
154 continue # Unable to resolve
158 def _check_refname(self, name):
159 """Ensure a refname is valid and lives in refs or is HEAD.
161 HEAD is not a valid refname according to git-check-ref-format, but this
162 class needs to be able to touch HEAD. Also, check_ref_format expects
163 refnames without the leading 'refs/', but this class requires that
164 so it cannot touch anything outside the refs dir (or HEAD).
166 :param name: The name of the reference.
167 :raises KeyError: if a refname is not HEAD or is otherwise not valid.
171 if not name.startswith('refs/') or not check_ref_format(name[5:]):
174 def read_loose_ref(self, name):
175 """Read a loose reference and return its contents.
177 :param name: the refname to read
178 :return: The contents of the ref file, or None if it does
181 raise NotImplementedError(self.read_loose_ref)
183 def _follow(self, name):
184 """Follow a reference name.
186 :return: a tuple of (refname, sha), where refname is the name of the
187 last reference in the symbolic reference chain
189 self._check_refname(name)
190 contents = SYMREF + name
192 while contents.startswith(SYMREF):
193 refname = contents[len(SYMREF):]
194 contents = self.read_loose_ref(refname)
196 contents = self.get_packed_refs().get(refname, None)
202 return refname, contents
204 def __getitem__(self, name):
205 """Get the SHA1 for a reference name.
207 This method follows all symbolic references.
209 _, sha = self._follow(name)
215 class DiskRefsContainer(RefsContainer):
216 """Refs container that reads refs from disk."""
218 def __init__(self, path):
220 self._packed_refs = None
221 self._peeled_refs = {}
224 return "%s(%r)" % (self.__class__.__name__, self.path)
226 def subkeys(self, base):
228 path = self.refpath(base)
229 for root, dirs, files in os.walk(path):
230 dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
231 for filename in files:
232 refname = ("%s/%s" % (dir, filename)).strip("/")
233 # check_ref_format requires at least one /, so we prepend the
234 # base before calling it.
235 if check_ref_format("%s/%s" % (base, refname)):
237 for key in self.get_packed_refs():
238 if key.startswith(base):
239 keys.add(key[len(base):].strip("/"))
244 if os.path.exists(self.refpath("HEAD")):
246 path = self.refpath("")
247 for root, dirs, files in os.walk(self.refpath("refs")):
248 dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
249 for filename in files:
250 refname = ("%s/%s" % (dir, filename)).strip("/")
251 if check_ref_format(refname):
253 keys.update(self.get_packed_refs())
256 def refpath(self, name):
257 """Return the disk path of a ref.
260 if os.path.sep != "/":
261 name = name.replace("/", os.path.sep)
262 return os.path.join(self.path, name)
264 def get_packed_refs(self):
265 """Get contents of the packed-refs file.
267 :return: Dictionary mapping ref names to SHA1s
269 :note: Will return an empty dictionary when no packed-refs file is
272 # TODO: invalidate the cache on repacking
273 if self._packed_refs is None:
274 self._packed_refs = {}
275 path = os.path.join(self.path, 'packed-refs')
277 f = GitFile(path, 'rb')
279 if e.errno == errno.ENOENT:
283 first_line = iter(f).next().rstrip()
284 if (first_line.startswith("# pack-refs") and " peeled" in
286 for sha, name, peeled in read_packed_refs_with_peeled(f):
287 self._packed_refs[name] = sha
289 self._peeled_refs[name] = peeled
292 for sha, name in read_packed_refs(f):
293 self._packed_refs[name] = sha
296 return self._packed_refs
298 def read_loose_ref(self, name):
299 """Read a reference file and return its contents.
301 If the reference file a symbolic reference, only read the first line of
302 the file. Otherwise, only read the first 40 bytes.
304 :param name: the refname to read, relative to refpath
305 :return: The contents of the ref file, or None if the file does not
307 :raises IOError: if any other error occurs
309 filename = self.refpath(name)
311 f = GitFile(filename, 'rb')
313 header = f.read(len(SYMREF))
315 # Read only the first line
316 return header + iter(f).next().rstrip("\n")
318 # Read only the first 40 bytes
319 return header + f.read(40-len(SYMREF))
323 if e.errno == errno.ENOENT:
327 def _remove_packed_ref(self, name):
328 if self._packed_refs is None:
330 filename = os.path.join(self.path, 'packed-refs')
331 # reread cached refs from disk, while holding the lock
332 f = GitFile(filename, 'wb')
334 self._packed_refs = None
335 self.get_packed_refs()
337 if name not in self._packed_refs:
340 del self._packed_refs[name]
341 if name in self._peeled_refs:
342 del self._peeled_refs[name]
343 write_packed_refs(f, self._packed_refs, self._peeled_refs)
348 def set_if_equals(self, name, old_ref, new_ref):
349 """Set a refname to new_ref only if it currently equals old_ref.
351 This method follows all symbolic references, and can be used to perform
352 an atomic compare-and-swap operation.
354 :param name: The refname to set.
355 :param old_ref: The old sha the refname must refer to, or None to set
357 :param new_ref: The new sha the refname will refer to.
358 :return: True if the set was successful, False otherwise.
361 realname, _ = self._follow(name)
364 filename = self.refpath(realname)
365 ensure_dir_exists(os.path.dirname(filename))
366 f = GitFile(filename, 'wb')
368 if old_ref is not None:
370 # read again while holding the lock
371 orig_ref = self.read_loose_ref(realname)
373 orig_ref = self.get_packed_refs().get(realname, None)
374 if orig_ref != old_ref:
377 except (OSError, IOError):
381 f.write(new_ref+"\n")
382 except (OSError, IOError):
389 def add_if_new(self, name, ref):
390 """Add a new reference only if it does not already exist."""
391 self._check_refname(name)
392 filename = self.refpath(name)
393 ensure_dir_exists(os.path.dirname(filename))
394 f = GitFile(filename, 'wb')
396 if os.path.exists(filename) or name in self.get_packed_refs():
401 except (OSError, IOError):
408 def __setitem__(self, name, ref):
409 """Set a reference name to point to the given SHA1.
411 This method follows all symbolic references.
413 :note: This method unconditionally overwrites the contents of a reference
414 on disk. To update atomically only if the reference has not changed
415 on disk, use set_if_equals().
417 self.set_if_equals(name, None, ref)
419 def remove_if_equals(self, name, old_ref):
420 """Remove a refname only if it currently equals old_ref.
422 This method does not follow symbolic references. It can be used to
423 perform an atomic compare-and-delete operation.
425 :param name: The refname to delete.
426 :param old_ref: The old sha the refname must refer to, or None to delete
428 :return: True if the delete was successful, False otherwise.
430 self._check_refname(name)
431 filename = self.refpath(name)
432 ensure_dir_exists(os.path.dirname(filename))
433 f = GitFile(filename, 'wb')
435 if old_ref is not None:
436 orig_ref = self.read_loose_ref(name)
438 orig_ref = self.get_packed_refs().get(name, None)
439 if orig_ref != old_ref:
445 if e.errno != errno.ENOENT:
447 self._remove_packed_ref(name)
449 # never write, we just wanted the lock
453 def __delitem__(self, name):
456 This method does not follow symbolic references.
457 :note: This method unconditionally deletes the contents of a reference
458 on disk. To delete atomically only if the reference has not changed
459 on disk, use set_if_equals().
461 self.remove_if_equals(name, None)
464 def _split_ref_line(line):
465 """Split a single ref line into a tuple of SHA1 and name."""
466 fields = line.rstrip("\n").split(" ")
468 raise PackedRefsException("invalid ref line '%s'" % line)
472 except (AssertionError, TypeError), e:
473 raise PackedRefsException(e)
474 if not check_ref_format(name):
475 raise PackedRefsException("invalid ref name '%s'" % name)
479 def read_packed_refs(f):
480 """Read a packed refs file.
482 Yields tuples with SHA1s and ref names.
484 :param f: file-like object to read from
491 raise PackedRefsException(
492 "found peeled ref in packed-refs without peeled")
493 yield _split_ref_line(l)
496 def read_packed_refs_with_peeled(f):
497 """Read a packed refs file including peeled refs.
499 Assumes the "# pack-refs with: peeled" line was already read. Yields tuples
500 with ref names, SHA1s, and peeled SHA1s (or None).
502 :param f: file-like object to read from, seek'ed to the second line
511 raise PackedRefsException("unexpected peeled ref line")
514 except (AssertionError, TypeError), e:
515 raise PackedRefsException(e)
516 sha, name = _split_ref_line(last)
518 yield (sha, name, l[1:])
521 sha, name = _split_ref_line(last)
522 yield (sha, name, None)
525 sha, name = _split_ref_line(last)
526 yield (sha, name, None)
529 def write_packed_refs(f, packed_refs, peeled_refs=None):
530 """Write a packed refs file.
532 :param f: empty file-like object to write to
533 :param packed_refs: dict of refname to sha of packed refs to write
535 if peeled_refs is None:
538 f.write('# pack-refs with: peeled\n')
539 for refname in sorted(packed_refs.iterkeys()):
540 f.write('%s %s\n' % (packed_refs[refname], refname))
541 if refname in peeled_refs:
542 f.write('^%s\n' % peeled_refs[refname])
545 class BaseRepo(object):
546 """Base class for a git repository.
548 :ivar object_store: Dictionary-like object for accessing
550 :ivar refs: Dictionary-like object with the refs in this repository
553 def __init__(self, object_store, refs):
554 self.object_store = object_store
557 def get_named_file(self, path):
558 """Get a file from the control dir with a specific name.
560 Although the filename should be interpreted as a filename relative to
561 the control dir in a disk-baked Repo, the object returned need not be
562 pointing to a file in that location.
564 :param path: The path to the file, relative to the control dir.
565 :return: An open file object, or None if the file does not exist.
567 raise NotImplementedError(self.get_named_file)
569 def put_named_file(self, relpath, contents):
570 """Write a file in the control directory with specified name and
573 Although the filename should be interpreted as a filename relative to
574 the control dir in a disk-baked Repo, the object returned need not be
575 pointing to a file in that location.
577 :param path: The path to the file, relative to the control dir.
578 :param contents: Contents of the new file
580 raise NotImplementedError(self.put_named_file)
582 def open_index(self):
583 """Open the index for this repository.
585 :raises NoIndexPresent: If no index is present
586 :return: Index instance
588 raise NotImplementedError(self.open_index)
590 def fetch(self, target, determine_wants=None, progress=None):
591 """Fetch objects into another repository.
593 :param target: The target repository
594 :param determine_wants: Optional function to determine what refs to
596 :param progress: Optional progress function
598 if determine_wants is None:
599 determine_wants = lambda heads: heads.values()
600 target.object_store.add_objects(
601 self.fetch_objects(determine_wants, target.get_graph_walker(),
603 return self.get_refs()
605 def fetch_objects(self, determine_wants, graph_walker, progress):
606 """Fetch the missing objects required for a set of revisions.
608 :param determine_wants: Function that takes a dictionary with heads
609 and returns the list of heads to fetch.
610 :param graph_walker: Object that can iterate over the list of revisions
611 to fetch and has an "ack" method that will be called to acknowledge
612 that a revision is present.
613 :param progress: Simple progress function that will be called with
614 updated progress strings.
615 :return: iterator over objects, with __len__ implemented
617 wants = determine_wants(self.get_refs())
618 haves = self.object_store.find_common_revisions(graph_walker)
619 return self.object_store.iter_shas(
620 self.object_store.find_missing_objects(haves, wants, progress))
622 def get_graph_walker(self, heads=None):
624 heads = self.refs.as_dict('refs/heads').values()
625 return self.object_store.get_graph_walker(heads)
628 """Return the SHA1 a ref is pointing to."""
629 return self.refs[name]
632 """Get dictionary with all refs."""
633 return self.refs.as_dict()
636 """Return the SHA1 pointed at by HEAD."""
637 return self.refs['HEAD']
639 def _get_object(self, sha, cls):
640 assert len(sha) in (20, 40)
641 ret = self.get_object(sha)
642 if ret._type != cls._type:
644 raise NotCommitError(ret)
646 raise NotBlobError(ret)
648 raise NotTreeError(ret)
650 raise Exception("Type invalid: %r != %r" % (ret._type, cls._type))
653 def get_object(self, sha):
654 return self.object_store[sha]
656 def get_parents(self, sha):
657 return self.commit(sha).parents
659 def get_config(self):
660 from configobj import ConfigObj
661 return ConfigObj(os.path.join(self._controldir, 'config'))
663 def commit(self, sha):
664 return self._get_object(sha, Commit)
667 return self._get_object(sha, Tree)
670 return self._get_object(sha, Tag)
672 def get_blob(self, sha):
673 return self._get_object(sha, Blob)
675 def revision_history(self, head):
676 """Returns a list of the commits reachable from head.
678 Returns a list of commit objects. the first of which will be the commit
679 of head, then following theat will be the parents.
681 Raises NotCommitError if any no commits are referenced, including if the
682 head parameter isn't the sha of a commit.
684 XXX: work out how to handle merges.
686 # We build the list backwards, as parents are more likely to be older
688 pending_commits = [head]
690 while pending_commits != []:
691 head = pending_commits.pop(0)
693 commit = self.commit(head)
695 raise MissingCommitError(head)
696 if commit in history:
699 for known_commit in history:
700 if known_commit.commit_time > commit.commit_time:
703 history.insert(i, commit)
704 parents = commit.parents
705 pending_commits += parents
709 def __getitem__(self, name):
710 if len(name) in (20, 40):
711 return self.object_store[name]
712 return self.object_store[self.refs[name]]
714 def __setitem__(self, name, value):
715 if name.startswith("refs/") or name == "HEAD":
716 if isinstance(value, ShaFile):
717 self.refs[name] = value.id
718 elif isinstance(value, str):
719 self.refs[name] = value
721 raise TypeError(value)
722 raise ValueError(name)
724 def __delitem__(self, name):
725 if name.startswith("refs") or name == "HEAD":
727 raise ValueError(name)
729 def do_commit(self, committer, message,
730 author=None, commit_timestamp=None,
731 commit_timezone=None, author_timestamp=None,
732 author_timezone=None, tree=None):
733 """Create a new commit.
735 :param committer: Committer fullname
736 :param message: Commit message
737 :param author: Author fullname (defaults to committer)
738 :param commit_timestamp: Commit timestamp (defaults to now)
739 :param commit_timezone: Commit timestamp timezone (defaults to GMT)
740 :param author_timestamp: Author timestamp (defaults to commit timestamp)
741 :param author_timezone: Author timestamp timezone
742 (defaults to commit timestamp timezone)
743 :param tree: SHA1 of the tree root to use (if not specified the current index will be committed).
744 :return: New commit SHA1
746 from dulwich.index import commit_index
748 index = self.open_index()
751 c.tree = commit_index(self.object_store, index)
754 c.committer = committer
755 if commit_timestamp is None:
756 commit_timestamp = time.time()
757 c.commit_time = int(commit_timestamp)
758 if commit_timezone is None:
760 c.commit_timezone = commit_timezone
764 if author_timestamp is None:
765 author_timestamp = commit_timestamp
766 c.author_time = int(author_timestamp)
767 if author_timezone is None:
768 author_timezone = commit_timezone
769 c.author_timezone = author_timezone
771 self.object_store.add_object(c)
772 self.refs["HEAD"] = c.id
776 class Repo(BaseRepo):
777 """A git repository backed by local disk."""
779 def __init__(self, root):
780 if os.path.isdir(os.path.join(root, ".git", OBJECTDIR)):
782 self._controldir = os.path.join(root, ".git")
783 elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
784 os.path.isdir(os.path.join(root, REFSDIR))):
786 self._controldir = root
788 raise NotGitRepository(root)
790 object_store = DiskObjectStore(
791 os.path.join(self.controldir(), OBJECTDIR))
792 refs = DiskRefsContainer(self.controldir())
793 BaseRepo.__init__(self, object_store, refs)
795 def controldir(self):
796 """Return the path of the control directory."""
797 return self._controldir
799 def put_named_file(self, path, contents):
800 """Write a file from the control dir with a specific name and contents.
802 f = GitFile(os.path.join(self.controldir(), path), 'wb')
808 def get_named_file(self, path):
809 """Get a file from the control dir with a specific name.
811 Although the filename should be interpreted as a filename relative to
812 the control dir in a disk-baked Repo, the object returned need not be
813 pointing to a file in that location.
815 :param path: The path to the file, relative to the control dir.
816 :return: An open file object, or None if the file does not exist.
819 return open(os.path.join(self.controldir(), path.lstrip('/')), 'rb')
820 except (IOError, OSError), e:
821 if e.errno == errno.ENOENT:
825 def index_path(self):
826 """Return path to the index file."""
827 return os.path.join(self.controldir(), INDEX_FILENAME)
829 def open_index(self):
830 """Open the index for this repository."""
831 from dulwich.index import Index
832 if not self.has_index():
833 raise NoIndexPresent()
834 return Index(self.index_path())
837 """Check if an index is present."""
838 return os.path.exists(self.index_path())
841 return "<Repo at %r>" % self.path
844 def init(cls, path, mkdir=True):
845 controldir = os.path.join(path, ".git")
847 cls.init_bare(controldir)
851 def init_bare(cls, path, mkdir=True):
852 for d in BASE_DIRECTORIES:
853 os.mkdir(os.path.join(path, *d))
855 ret.refs.set_ref("HEAD", "refs/heads/master")
856 ret.put_named_file('description', "Unnamed repository")
857 ret.put_named_file('config', """[core]
858 repositoryformatversion = 0
861 logallrefupdates = true
863 ret.put_named_file(os.path.join('info', 'excludes'), '')