1 # repo.py -- For dealing wih git repositories.
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; version 2
8 # of the License or (at your option) any later version of
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
22 """Repository access."""
28 from dulwich.errors import (
37 from dulwich.file import (
41 from dulwich.object_store import (
44 from dulwich.objects import (
57 REFSDIR_HEADS = 'heads'
58 INDEX_FILENAME = "index"
66 [REFSDIR, REFSDIR_TAGS],
67 [REFSDIR, REFSDIR_HEADS],
73 def check_ref_format(refname):
74 """Check if a refname is correctly formatted.
76 Implements all the same rules as git-check-ref-format[1].
78 [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
80 :param refname: The refname to check
81 :return: True if refname is valid, False otherwise
83 # These could be combined into one big expression, but are listed separately
85 if '/.' in refname or refname.startswith('.'):
87 if '/' not in refname:
92 if ord(c) < 040 or c in '\177 ~^:?*[':
94 if refname[-1] in '/.':
96 if refname.endswith('.lock'):
105 class RefsContainer(object):
106 """A container for refs."""
108 def as_dict(self, base):
109 """Return the contents of this ref container under base as a dict."""
110 raise NotImplementedError(self.as_dict)
112 def set_ref(self, name, other):
113 """Make a ref point at another ref.
115 :param name: Name of the ref to set
116 :param other: Name of the ref to point at
118 self[name] = "ref: %s\n" % other
120 def import_refs(self, base, other):
121 for name, value in other.iteritems():
122 self["%s/%s" % (base, name)] = value
125 class DiskRefsContainer(RefsContainer):
126 """Refs container that reads refs from disk."""
128 def __init__(self, path):
130 self._packed_refs = None
131 self._peeled_refs = {}
134 return "%s(%r)" % (self.__class__.__name__, self.path)
136 def keys(self, base=None):
137 """Refs present in this container.
139 :param base: An optional base to return refs under
140 :return: An unsorted set of valid refs in this container, including
144 return self.subkeys(base)
146 return self.allkeys()
148 def subkeys(self, base):
150 path = self.refpath(base)
151 for root, dirs, files in os.walk(path):
152 dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
153 for filename in files:
154 refname = ("%s/%s" % (dir, filename)).strip("/")
155 # check_ref_format requires at least one /, so we prepend the
156 # base before calling it.
157 if check_ref_format("%s/%s" % (base, refname)):
159 for key in self.get_packed_refs():
160 if key.startswith(base):
161 keys.add(key[len(base):].strip("/"))
166 if os.path.exists(self.refpath("HEAD")):
168 path = self.refpath("")
169 for root, dirs, files in os.walk(self.refpath("refs")):
170 dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
171 for filename in files:
172 refname = ("%s/%s" % (dir, filename)).strip("/")
173 if check_ref_format(refname):
175 keys.update(self.get_packed_refs())
178 def as_dict(self, base=None):
179 """Return the contents of this container as a dictionary.
183 keys = self.keys(base)
188 ret[key] = self[("%s/%s" % (base, key)).strip("/")]
190 continue # Unable to resolve
194 def refpath(self, name):
195 """Return the disk path of a ref.
198 if os.path.sep != "/":
199 name = name.replace("/", os.path.sep)
200 return os.path.join(self.path, name)
202 def get_packed_refs(self):
203 """Get contents of the packed-refs file.
205 :return: Dictionary mapping ref names to SHA1s
207 :note: Will return an empty dictionary when no packed-refs file is
210 # TODO: invalidate the cache on repacking
211 if self._packed_refs is None:
212 self._packed_refs = {}
213 path = os.path.join(self.path, 'packed-refs')
215 f = GitFile(path, 'rb')
217 if e.errno == errno.ENOENT:
221 first_line = iter(f).next().rstrip()
222 if (first_line.startswith("# pack-refs") and " peeled" in
224 for sha, name, peeled in read_packed_refs_with_peeled(f):
225 self._packed_refs[name] = sha
227 self._peeled_refs[name] = peeled
230 for sha, name in read_packed_refs(f):
231 self._packed_refs[name] = sha
234 return self._packed_refs
236 def _check_refname(self, name):
237 """Ensure a refname is valid and lives in refs or is HEAD.
239 HEAD is not a valid refname according to git-check-ref-format, but this
240 class needs to be able to touch HEAD. Also, check_ref_format expects
241 refnames without the leading 'refs/', but this class requires that
242 so it cannot touch anything outside the refs dir (or HEAD).
244 :param name: The name of the reference.
245 :raises KeyError: if a refname is not HEAD or is otherwise not valid.
249 if not name.startswith('refs/') or not check_ref_format(name[5:]):
252 def _read_ref_file(self, name):
253 """Read a reference file and return its contents.
255 If the reference file a symbolic reference, only read the first line of
256 the file. Otherwise, only read the first 40 bytes.
258 :param name: the refname to read, relative to refpath
259 :return: The contents of the ref file, or None if the file does not
261 :raises IOError: if any other error occurs
263 filename = self.refpath(name)
265 f = GitFile(filename, 'rb')
267 header = f.read(len(SYMREF))
269 # Read only the first line
270 return header + iter(f).next().rstrip("\n")
272 # Read only the first 40 bytes
273 return header + f.read(40-len(SYMREF))
277 if e.errno == errno.ENOENT:
281 def _follow(self, name):
282 """Follow a reference name.
284 :return: a tuple of (refname, sha), where refname is the name of the
285 last reference in the symbolic reference chain
287 self._check_refname(name)
288 contents = SYMREF + name
290 while contents.startswith(SYMREF):
291 refname = contents[len(SYMREF):]
292 contents = self._read_ref_file(refname)
294 contents = self.get_packed_refs().get(refname, None)
300 return refname, contents
302 def __getitem__(self, name):
303 """Get the SHA1 for a reference name.
305 This method follows all symbolic references.
307 _, sha = self._follow(name)
312 def _remove_packed_ref(self, name):
313 if self._packed_refs is None:
315 filename = os.path.join(self.path, 'packed-refs')
316 # reread cached refs from disk, while holding the lock
317 f = GitFile(filename, 'wb')
319 self._packed_refs = None
320 self.get_packed_refs()
322 if name not in self._packed_refs:
325 del self._packed_refs[name]
326 if name in self._peeled_refs:
327 del self._peeled_refs[name]
328 write_packed_refs(f, self._packed_refs, self._peeled_refs)
333 def set_if_equals(self, name, old_ref, new_ref):
334 """Set a refname to new_ref only if it currently equals old_ref.
336 This method follows all symbolic references, and can be used to perform
337 an atomic compare-and-swap operation.
339 :param name: The refname to set.
340 :param old_ref: The old sha the refname must refer to, or None to set
342 :param new_ref: The new sha the refname will refer to.
343 :return: True if the set was successful, False otherwise.
346 realname, _ = self._follow(name)
349 filename = self.refpath(realname)
350 ensure_dir_exists(os.path.dirname(filename))
351 f = GitFile(filename, 'wb')
353 if old_ref is not None:
355 # read again while holding the lock
356 orig_ref = self._read_ref_file(realname)
358 orig_ref = self.get_packed_refs().get(realname, None)
359 if orig_ref != old_ref:
362 except (OSError, IOError):
366 f.write(new_ref+"\n")
367 except (OSError, IOError):
374 def add_if_new(self, name, ref):
375 """Add a new reference only if it does not already exist."""
376 self._check_refname(name)
377 filename = self.refpath(name)
378 ensure_dir_exists(os.path.dirname(filename))
379 f = GitFile(filename, 'wb')
381 if os.path.exists(filename) or name in self.get_packed_refs():
386 except (OSError, IOError):
393 def __setitem__(self, name, ref):
394 """Set a reference name to point to the given SHA1.
396 This method follows all symbolic references.
398 :note: This method unconditionally overwrites the contents of a reference
399 on disk. To update atomically only if the reference has not changed
400 on disk, use set_if_equals().
402 self.set_if_equals(name, None, ref)
404 def remove_if_equals(self, name, old_ref):
405 """Remove a refname only if it currently equals old_ref.
407 This method does not follow symbolic references. It can be used to
408 perform an atomic compare-and-delete operation.
410 :param name: The refname to delete.
411 :param old_ref: The old sha the refname must refer to, or None to delete
413 :return: True if the delete was successful, False otherwise.
415 self._check_refname(name)
416 filename = self.refpath(name)
417 ensure_dir_exists(os.path.dirname(filename))
418 f = GitFile(filename, 'wb')
420 if old_ref is not None:
421 orig_ref = self._read_ref_file(name)
423 orig_ref = self.get_packed_refs().get(name, None)
424 if orig_ref != old_ref:
427 if os.path.exists(filename):
429 self._remove_packed_ref(name)
431 # never write, we just wanted the lock
435 def __delitem__(self, name):
438 This method does not follow symbolic references.
439 :note: This method unconditionally deletes the contents of a reference
440 on disk. To delete atomically only if the reference has not changed
441 on disk, use set_if_equals().
443 self.remove_if_equals(name, None)
446 def _split_ref_line(line):
447 """Split a single ref line into a tuple of SHA1 and name."""
448 fields = line.rstrip("\n").split(" ")
450 raise PackedRefsException("invalid ref line '%s'" % line)
454 except (AssertionError, TypeError), e:
455 raise PackedRefsException(e)
456 if not check_ref_format(name):
457 raise PackedRefsException("invalid ref name '%s'" % name)
461 def read_packed_refs(f):
462 """Read a packed refs file.
464 Yields tuples with SHA1s and ref names.
466 :param f: file-like object to read from
473 raise PackedRefsException(
474 "found peeled ref in packed-refs without peeled")
475 yield _split_ref_line(l)
478 def read_packed_refs_with_peeled(f):
479 """Read a packed refs file including peeled refs.
481 Assumes the "# pack-refs with: peeled" line was already read. Yields tuples
482 with ref names, SHA1s, and peeled SHA1s (or None).
484 :param f: file-like object to read from, seek'ed to the second line
493 raise PackedRefsException("unexpected peeled ref line")
496 except (AssertionError, TypeError), e:
497 raise PackedRefsException(e)
498 sha, name = _split_ref_line(last)
500 yield (sha, name, l[1:])
503 sha, name = _split_ref_line(last)
504 yield (sha, name, None)
507 sha, name = _split_ref_line(last)
508 yield (sha, name, None)
511 def write_packed_refs(f, packed_refs, peeled_refs=None):
512 """Write a packed refs file.
514 :param f: empty file-like object to write to
515 :param packed_refs: dict of refname to sha of packed refs to write
517 if peeled_refs is None:
520 f.write('# pack-refs with: peeled\n')
521 for refname in sorted(packed_refs.iterkeys()):
522 f.write('%s %s\n' % (packed_refs[refname], refname))
523 if refname in peeled_refs:
524 f.write('^%s\n' % peeled_refs[refname])
527 class BaseRepo(object):
528 """Base class for a git repository.
530 :ivar object_store: Dictionary-like object for accessing
532 :ivar refs: Dictionary-like object with the refs in this repository
535 def __init__(self, object_store, refs):
536 self.object_store = object_store
539 def get_named_file(self, path):
540 """Get a file from the control dir with a specific name.
542 Although the filename should be interpreted as a filename relative to
543 the control dir in a disk-baked Repo, the object returned need not be
544 pointing to a file in that location.
546 :param path: The path to the file, relative to the control dir.
547 :return: An open file object, or None if the file does not exist.
549 raise NotImplementedError(self.get_named_file)
551 def put_named_file(self, relpath, contents):
552 """Write a file in the control directory with specified name and
555 Although the filename should be interpreted as a filename relative to
556 the control dir in a disk-baked Repo, the object returned need not be
557 pointing to a file in that location.
559 :param path: The path to the file, relative to the control dir.
560 :param contents: Contents of the new file
562 raise NotImplementedError(self.put_named_file)
564 def open_index(self):
565 """Open the index for this repository.
567 :raises NoIndexPresent: If no index is present
568 :return: Index instance
570 raise NotImplementedError(self.open_index)
572 def fetch(self, target, determine_wants=None, progress=None):
573 """Fetch objects into another repository.
575 :param target: The target repository
576 :param determine_wants: Optional function to determine what refs to
578 :param progress: Optional progress function
580 if determine_wants is None:
581 determine_wants = lambda heads: heads.values()
582 target.object_store.add_objects(
583 self.fetch_objects(determine_wants, target.get_graph_walker(),
585 return self.get_refs()
587 def fetch_objects(self, determine_wants, graph_walker, progress):
588 """Fetch the missing objects required for a set of revisions.
590 :param determine_wants: Function that takes a dictionary with heads
591 and returns the list of heads to fetch.
592 :param graph_walker: Object that can iterate over the list of revisions
593 to fetch and has an "ack" method that will be called to acknowledge
594 that a revision is present.
595 :param progress: Simple progress function that will be called with
596 updated progress strings.
597 :return: iterator over objects, with __len__ implemented
599 wants = determine_wants(self.get_refs())
600 haves = self.object_store.find_common_revisions(graph_walker)
601 return self.object_store.iter_shas(
602 self.object_store.find_missing_objects(haves, wants, progress))
604 def get_graph_walker(self, heads=None):
606 heads = self.refs.as_dict('refs/heads').values()
607 return self.object_store.get_graph_walker(heads)
610 """Return the SHA1 a ref is pointing to."""
611 return self.refs[name]
614 """Get dictionary with all refs."""
615 return self.refs.as_dict()
618 """Return the SHA1 pointed at by HEAD."""
619 return self.refs['HEAD']
621 def _get_object(self, sha, cls):
622 assert len(sha) in (20, 40)
623 ret = self.get_object(sha)
624 if ret._type != cls._type:
626 raise NotCommitError(ret)
628 raise NotBlobError(ret)
630 raise NotTreeError(ret)
632 raise Exception("Type invalid: %r != %r" % (ret._type, cls._type))
635 def get_object(self, sha):
636 return self.object_store[sha]
638 def get_parents(self, sha):
639 return self.commit(sha).parents
641 def get_config(self):
642 from configobj import ConfigObj
643 return ConfigObj(os.path.join(self._controldir, 'config'))
645 def commit(self, sha):
646 return self._get_object(sha, Commit)
649 return self._get_object(sha, Tree)
652 return self._get_object(sha, Tag)
654 def get_blob(self, sha):
655 return self._get_object(sha, Blob)
657 def revision_history(self, head):
658 """Returns a list of the commits reachable from head.
660 Returns a list of commit objects. the first of which will be the commit
661 of head, then following theat will be the parents.
663 Raises NotCommitError if any no commits are referenced, including if the
664 head parameter isn't the sha of a commit.
666 XXX: work out how to handle merges.
668 # We build the list backwards, as parents are more likely to be older
670 pending_commits = [head]
672 while pending_commits != []:
673 head = pending_commits.pop(0)
675 commit = self.commit(head)
677 raise MissingCommitError(head)
678 if commit in history:
681 for known_commit in history:
682 if known_commit.commit_time > commit.commit_time:
685 history.insert(i, commit)
686 parents = commit.parents
687 pending_commits += parents
691 def __getitem__(self, name):
692 if len(name) in (20, 40):
693 return self.object_store[name]
694 return self.object_store[self.refs[name]]
696 def __setitem__(self, name, value):
697 if name.startswith("refs/") or name == "HEAD":
698 if isinstance(value, ShaFile):
699 self.refs[name] = value.id
700 elif isinstance(value, str):
701 self.refs[name] = value
703 raise TypeError(value)
704 raise ValueError(name)
706 def __delitem__(self, name):
707 if name.startswith("refs") or name == "HEAD":
709 raise ValueError(name)
711 def do_commit(self, committer, message,
712 author=None, commit_timestamp=None,
713 commit_timezone=None, author_timestamp=None,
714 author_timezone=None, tree=None):
715 """Create a new commit.
717 :param committer: Committer fullname
718 :param message: Commit message
719 :param author: Author fullname (defaults to committer)
720 :param commit_timestamp: Commit timestamp (defaults to now)
721 :param commit_timezone: Commit timestamp timezone (defaults to GMT)
722 :param author_timestamp: Author timestamp (defaults to commit timestamp)
723 :param author_timezone: Author timestamp timezone
724 (defaults to commit timestamp timezone)
725 :param tree: SHA1 of the tree root to use (if not specified the current index will be committed).
726 :return: New commit SHA1
728 from dulwich.index import commit_index
730 index = self.open_index()
733 c.tree = commit_index(self.object_store, index)
736 c.committer = committer
737 if commit_timestamp is None:
738 commit_timestamp = time.time()
739 c.commit_time = int(commit_timestamp)
740 if commit_timezone is None:
742 c.commit_timezone = commit_timezone
746 if author_timestamp is None:
747 author_timestamp = commit_timestamp
748 c.author_time = int(author_timestamp)
749 if author_timezone is None:
750 author_timezone = commit_timezone
751 c.author_timezone = author_timezone
753 self.object_store.add_object(c)
754 self.refs["HEAD"] = c.id
758 class Repo(BaseRepo):
759 """A git repository backed by local disk."""
761 def __init__(self, root):
762 if os.path.isdir(os.path.join(root, ".git", OBJECTDIR)):
764 self._controldir = os.path.join(root, ".git")
765 elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
766 os.path.isdir(os.path.join(root, REFSDIR))):
768 self._controldir = root
770 raise NotGitRepository(root)
772 object_store = DiskObjectStore(
773 os.path.join(self.controldir(), OBJECTDIR))
774 refs = DiskRefsContainer(self.controldir())
775 BaseRepo.__init__(self, object_store, refs)
777 def controldir(self):
778 """Return the path of the control directory."""
779 return self._controldir
781 def put_named_file(self, path, contents):
782 """Write a file from the control dir with a specific name and contents.
784 f = GitFile(os.path.join(self.controldir(), path, 'config'), 'wb')
790 def get_named_file(self, path):
791 """Get a file from the control dir with a specific name.
793 Although the filename should be interpreted as a filename relative to
794 the control dir in a disk-baked Repo, the object returned need not be
795 pointing to a file in that location.
797 :param path: The path to the file, relative to the control dir.
798 :return: An open file object, or None if the file does not exist.
801 return open(os.path.join(self.controldir(), path.lstrip('/')), 'rb')
802 except (IOError, OSError), e:
803 if e.errno == errno.ENOENT:
807 def index_path(self):
808 """Return path to the index file."""
809 return os.path.join(self.controldir(), INDEX_FILENAME)
811 def open_index(self):
812 """Open the index for this repository."""
813 from dulwich.index import Index
814 if not self.has_index():
815 raise NoIndexPresent()
816 return Index(self.index_path())
819 """Check if an index is present."""
820 return os.path.exists(self.index_path())
823 return "<Repo at %r>" % self.path
826 def init(cls, path, mkdir=True):
827 controldir = os.path.join(path, ".git")
829 cls.init_bare(controldir)
833 def init_bare(cls, path, mkdir=True):
834 for d in BASE_DIRECTORIES:
835 os.mkdir(os.path.join(path, *d))
837 ret.refs.set_ref("HEAD", "refs/heads/master")
838 ret.put_named_file('description', "Unnamed repository")
839 ret.put_named_file('config', """[core]
840 repositoryformatversion = 0
843 logallrefupdates = true
845 ret.put_named_file(os.path.join('info', 'excludes'), '')