1 # repo.py -- For dealing wih git repositories.
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; version 2
8 # of the License or (at your option) any later version of
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
22 """Repository access."""
29 from dulwich.errors import (
37 from dulwich.file import (
41 from dulwich.object_store import (
44 from dulwich.objects import (
57 REFSDIR_HEADS = 'heads'
58 INDEX_FILENAME = "index"
61 def check_ref_format(refname):
62 """Check if a refname is correctly formatted.
64 Implements all the same rules as git-check-ref-format[1].
66 [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
68 :param refname: The refname to check
69 :return: True if refname is valid, False otherwise
71 # These could be combined into one big expression, but are listed separately
73 if '/.' in refname or refname.startswith('.'):
75 if '/' not in refname:
80 if ord(c) < 040 or c in '\177 ~^:?*[':
82 if refname[-1] in '/.':
84 if refname.endswith('.lock'):
93 class RefsContainer(object):
94 """A container for refs."""
96 def as_dict(self, base):
97 """Return the contents of this ref container under base as a dict."""
98 raise NotImplementedError(self.as_dict)
100 def set_ref(self, name, other):
101 """Make a ref point at another ref.
103 :param name: Name of the ref to set
104 :param other: Name of the ref to point at
106 self[name] = "ref: %s\n" % other
108 def import_refs(self, base, other):
109 for name, value in other.iteritems():
110 self["%s/%s" % (base, name)] = value
113 class DiskRefsContainer(RefsContainer):
114 """Refs container that reads refs from disk."""
116 def __init__(self, path):
118 self._packed_refs = None
119 self._peeled_refs = {}
122 return "%s(%r)" % (self.__class__.__name__, self.path)
124 def keys(self, base=None):
125 """Refs present in this container.
127 :param base: An optional base to return refs under
128 :return: An unsorted set of valid refs in this container, including
132 return self.subkeys(base)
134 return self.allkeys()
136 def subkeys(self, base):
138 path = self.refpath(base)
139 for root, dirs, files in os.walk(path):
140 dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
141 for filename in files:
142 refname = ("%s/%s" % (dir, filename)).strip("/")
143 # check_ref_format requires at least one /, so we prepend the
144 # base before calling it.
145 if check_ref_format("%s/%s" % (base, refname)):
147 for key in self.get_packed_refs():
148 if key.startswith(base):
149 keys.add(key[len(base):].strip("/"))
154 if os.path.exists(self.refpath("HEAD")):
156 path = self.refpath("")
157 for root, dirs, files in os.walk(self.refpath("refs")):
158 dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
159 for filename in files:
160 refname = ("%s/%s" % (dir, filename)).strip("/")
161 if check_ref_format(refname):
163 keys.update(self.get_packed_refs())
166 def as_dict(self, base=None):
167 """Return the contents of this container as a dictionary.
171 keys = self.keys(base)
176 ret[key] = self[("%s/%s" % (base, key)).strip("/")]
178 continue # Unable to resolve
182 def refpath(self, name):
183 """Return the disk path of a ref.
186 if os.path.sep != "/":
187 name = name.replace("/", os.path.sep)
188 return os.path.join(self.path, name)
190 def get_packed_refs(self):
191 """Get contents of the packed-refs file.
193 :return: Dictionary mapping ref names to SHA1s
195 :note: Will return an empty dictionary when no packed-refs file is
198 # TODO: invalidate the cache on repacking
199 if self._packed_refs is None:
200 self._packed_refs = {}
201 path = os.path.join(self.path, 'packed-refs')
203 f = GitFile(path, 'rb')
205 if e.errno == errno.ENOENT:
209 first_line = iter(f).next().rstrip()
210 if (first_line.startswith("# pack-refs") and " peeled" in
212 for sha, name, peeled in read_packed_refs_with_peeled(f):
213 self._packed_refs[name] = sha
215 self._peeled_refs[name] = peeled
218 for sha, name in read_packed_refs(f):
219 self._packed_refs[name] = sha
222 return self._packed_refs
224 def _check_refname(self, name):
225 """Ensure a refname is valid and lives in refs or is HEAD.
227 HEAD is not a valid refname according to git-check-ref-format, but this
228 class needs to be able to touch HEAD. Also, check_ref_format expects
229 refnames without the leading 'refs/', but this class requires that
230 so it cannot touch anything outside the refs dir (or HEAD).
232 :param name: The name of the reference.
233 :raises KeyError: if a refname is not HEAD or is otherwise not valid.
237 if not name.startswith('refs/') or not check_ref_format(name[5:]):
240 def _read_ref_file(self, name):
241 """Read a reference file and return its contents.
243 If the reference file a symbolic reference, only read the first line of
244 the file. Otherwise, only read the first 40 bytes.
246 :param name: the refname to read, relative to refpath
247 :return: The contents of the ref file, or None if the file does not
249 :raises IOError: if any other error occurs
251 filename = self.refpath(name)
253 f = GitFile(filename, 'rb')
255 header = f.read(len(SYMREF))
257 # Read only the first line
258 return header + iter(f).next().rstrip("\n")
260 # Read only the first 40 bytes
261 return header + f.read(40-len(SYMREF))
265 if e.errno == errno.ENOENT:
269 def _follow(self, name):
270 """Follow a reference name.
272 :return: a tuple of (refname, sha), where refname is the name of the
273 last reference in the symbolic reference chain
275 self._check_refname(name)
276 contents = SYMREF + name
278 while contents.startswith(SYMREF):
279 refname = contents[len(SYMREF):]
280 contents = self._read_ref_file(refname)
282 contents = self.get_packed_refs().get(refname, None)
288 return refname, contents
290 def __getitem__(self, name):
291 """Get the SHA1 for a reference name.
293 This method follows all symbolic references.
295 _, sha = self._follow(name)
300 def _remove_packed_ref(self, name):
301 if self._packed_refs is None:
303 filename = os.path.join(self.path, 'packed-refs')
304 # reread cached refs from disk, while holding the lock
305 f = GitFile(filename, 'wb')
307 self._packed_refs = None
308 self.get_packed_refs()
310 if name not in self._packed_refs:
313 del self._packed_refs[name]
314 if name in self._peeled_refs:
315 del self._peeled_refs[name]
316 write_packed_refs(f, self._packed_refs, self._peeled_refs)
321 def set_if_equals(self, name, old_ref, new_ref):
322 """Set a refname to new_ref only if it currently equals old_ref.
324 This method follows all symbolic references, and can be used to perform
325 an atomic compare-and-swap operation.
327 :param name: The refname to set.
328 :param old_ref: The old sha the refname must refer to, or None to set
330 :param new_ref: The new sha the refname will refer to.
331 :return: True if the set was successful, False otherwise.
334 realname, _ = self._follow(name)
337 filename = self.refpath(realname)
338 ensure_dir_exists(os.path.dirname(filename))
339 f = GitFile(filename, 'wb')
341 if old_ref is not None:
343 # read again while holding the lock
344 orig_ref = self._read_ref_file(realname)
346 orig_ref = self.get_packed_refs().get(realname, None)
347 if orig_ref != old_ref:
350 except (OSError, IOError):
354 f.write(new_ref+"\n")
355 except (OSError, IOError):
362 def add_if_new(self, name, ref):
363 """Add a new reference only if it does not already exist."""
364 self._check_refname(name)
365 filename = self.refpath(name)
366 ensure_dir_exists(os.path.dirname(filename))
367 f = GitFile(filename, 'wb')
369 if os.path.exists(filename) or name in self.get_packed_refs():
374 except (OSError, IOError):
381 def __setitem__(self, name, ref):
382 """Set a reference name to point to the given SHA1.
384 This method follows all symbolic references.
386 :note: This method unconditionally overwrites the contents of a reference
387 on disk. To update atomically only if the reference has not changed
388 on disk, use set_if_equals().
390 self.set_if_equals(name, None, ref)
392 def remove_if_equals(self, name, old_ref):
393 """Remove a refname only if it currently equals old_ref.
395 This method does not follow symbolic references. It can be used to
396 perform an atomic compare-and-delete operation.
398 :param name: The refname to delete.
399 :param old_ref: The old sha the refname must refer to, or None to delete
401 :return: True if the delete was successful, False otherwise.
403 self._check_refname(name)
404 filename = self.refpath(name)
405 ensure_dir_exists(os.path.dirname(filename))
406 f = GitFile(filename, 'wb')
408 if old_ref is not None:
409 orig_ref = self._read_ref_file(name)
411 orig_ref = self.get_packed_refs().get(name, None)
412 if orig_ref != old_ref:
415 if os.path.exists(filename):
417 self._remove_packed_ref(name)
419 # never write, we just wanted the lock
423 def __delitem__(self, name):
426 This method does not follow symbolic references.
427 :note: This method unconditionally deletes the contents of a reference
428 on disk. To delete atomically only if the reference has not changed
429 on disk, use set_if_equals().
431 self.remove_if_equals(name, None)
434 def _split_ref_line(line):
435 """Split a single ref line into a tuple of SHA1 and name."""
436 fields = line.rstrip("\n").split(" ")
438 raise PackedRefsException("invalid ref line '%s'" % line)
442 except (AssertionError, TypeError), e:
443 raise PackedRefsException(e)
444 if not check_ref_format(name):
445 raise PackedRefsException("invalid ref name '%s'" % name)
449 def read_packed_refs(f):
450 """Read a packed refs file.
452 Yields tuples with SHA1s and ref names.
454 :param f: file-like object to read from
461 raise PackedRefsException(
462 "found peeled ref in packed-refs without peeled")
463 yield _split_ref_line(l)
466 def read_packed_refs_with_peeled(f):
467 """Read a packed refs file including peeled refs.
469 Assumes the "# pack-refs with: peeled" line was already read. Yields tuples
470 with ref names, SHA1s, and peeled SHA1s (or None).
472 :param f: file-like object to read from, seek'ed to the second line
481 raise PackedRefsException("unexpected peeled ref line")
484 except (AssertionError, TypeError), e:
485 raise PackedRefsException(e)
486 sha, name = _split_ref_line(last)
488 yield (sha, name, l[1:])
491 sha, name = _split_ref_line(last)
492 yield (sha, name, None)
495 sha, name = _split_ref_line(last)
496 yield (sha, name, None)
499 def write_packed_refs(f, packed_refs, peeled_refs=None):
500 """Write a packed refs file.
502 :param f: empty file-like object to write to
503 :param packed_refs: dict of refname to sha of packed refs to write
505 if peeled_refs is None:
508 f.write('# pack-refs with: peeled\n')
509 for refname in sorted(packed_refs.iterkeys()):
510 f.write('%s %s\n' % (packed_refs[refname], refname))
511 if refname in peeled_refs:
512 f.write('^%s\n' % peeled_refs[refname])
514 class BaseRepo(object):
515 """Base class for a git repository.
517 :ivar object_store: Dictionary-like object for accessing
519 :ivar refs: Dictionary-like object with the refs in this repository
522 def __init__(self, object_store, refs):
523 self.object_store = object_store
526 def get_named_file(self, path):
527 """Get a file from the control dir with a specific name.
529 Although the filename should be interpreted as a filename relative to
530 the control dir in a disk-baked Repo, the object returned need not be
531 pointing to a file in that location.
533 :param path: The path to the file, relative to the control dir.
534 :return: An open file object, or None if the file does not exist.
536 raise NotImplementedError(self.get_named_file)
538 def fetch(self, target, determine_wants=None, progress=None):
539 """Fetch objects into another repository.
541 :param target: The target repository
542 :param determine_wants: Optional function to determine what refs to
544 :param progress: Optional progress function
546 if determine_wants is None:
547 determine_wants = lambda heads: heads.values()
548 target.object_store.add_objects(
549 self.fetch_objects(determine_wants, target.get_graph_walker(),
551 return self.get_refs()
553 def fetch_objects(self, determine_wants, graph_walker, progress):
554 """Fetch the missing objects required for a set of revisions.
556 :param determine_wants: Function that takes a dictionary with heads
557 and returns the list of heads to fetch.
558 :param graph_walker: Object that can iterate over the list of revisions
559 to fetch and has an "ack" method that will be called to acknowledge
560 that a revision is present.
561 :param progress: Simple progress function that will be called with
562 updated progress strings.
563 :return: iterator over objects, with __len__ implemented
565 wants = determine_wants(self.get_refs())
566 haves = self.object_store.find_common_revisions(graph_walker)
567 return self.object_store.iter_shas(
568 self.object_store.find_missing_objects(haves, wants, progress))
570 def get_graph_walker(self, heads=None):
572 heads = self.refs.as_dict('refs/heads').values()
573 return self.object_store.get_graph_walker(heads)
576 """Return the SHA1 a ref is pointing to."""
577 return self.refs[name]
580 """Get dictionary with all refs."""
581 return self.refs.as_dict()
584 """Return the SHA1 pointed at by HEAD."""
585 return self.refs['HEAD']
587 def _get_object(self, sha, cls):
588 assert len(sha) in (20, 40)
589 ret = self.get_object(sha)
590 if ret._type != cls._type:
592 raise NotCommitError(ret)
594 raise NotBlobError(ret)
596 raise NotTreeError(ret)
598 raise Exception("Type invalid: %r != %r" % (ret._type, cls._type))
601 def get_object(self, sha):
602 return self.object_store[sha]
604 def get_parents(self, sha):
605 return self.commit(sha).parents
607 def get_config(self):
608 from configobj import ConfigObj
609 return ConfigObj(os.path.join(self._controldir, 'config'))
611 def commit(self, sha):
612 return self._get_object(sha, Commit)
615 return self._get_object(sha, Tree)
618 return self._get_object(sha, Tag)
620 def get_blob(self, sha):
621 return self._get_object(sha, Blob)
623 def revision_history(self, head):
624 """Returns a list of the commits reachable from head.
626 Returns a list of commit objects. the first of which will be the commit
627 of head, then following theat will be the parents.
629 Raises NotCommitError if any no commits are referenced, including if the
630 head parameter isn't the sha of a commit.
632 XXX: work out how to handle merges.
634 # We build the list backwards, as parents are more likely to be older
636 pending_commits = [head]
638 while pending_commits != []:
639 head = pending_commits.pop(0)
641 commit = self.commit(head)
643 raise MissingCommitError(head)
644 if commit in history:
647 for known_commit in history:
648 if known_commit.commit_time > commit.commit_time:
651 history.insert(i, commit)
652 parents = commit.parents
653 pending_commits += parents
657 def __getitem__(self, name):
658 if len(name) in (20, 40):
659 return self.object_store[name]
660 return self.object_store[self.refs[name]]
662 def __setitem__(self, name, value):
663 if name.startswith("refs/") or name == "HEAD":
664 if isinstance(value, ShaFile):
665 self.refs[name] = value.id
666 elif isinstance(value, str):
667 self.refs[name] = value
669 raise TypeError(value)
670 raise ValueError(name)
672 def __delitem__(self, name):
673 if name.startswith("refs") or name == "HEAD":
675 raise ValueError(name)
678 class Repo(BaseRepo):
679 """A git repository backed by local disk."""
681 def __init__(self, root):
682 if os.path.isdir(os.path.join(root, ".git", OBJECTDIR)):
684 self._controldir = os.path.join(root, ".git")
685 elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
686 os.path.isdir(os.path.join(root, REFSDIR))):
688 self._controldir = root
690 raise NotGitRepository(root)
692 object_store = DiskObjectStore(
693 os.path.join(self.controldir(), OBJECTDIR))
694 refs = DiskRefsContainer(self.controldir())
695 BaseRepo.__init__(self, object_store, refs)
697 def controldir(self):
698 """Return the path of the control directory."""
699 return self._controldir
701 def get_named_file(self, path):
702 """Get a file from the control dir with a specific name.
704 Although the filename should be interpreted as a filename relative to
705 the control dir in a disk-baked Repo, the object returned need not be
706 pointing to a file in that location.
708 :param path: The path to the file, relative to the control dir.
709 :return: An open file object, or None if the file does not exist.
712 return open(os.path.join(self.controldir(), path.lstrip('/')), 'rb')
713 except (IOError, OSError), e:
714 if e.errno == errno.ENOENT:
718 def index_path(self):
719 """Return path to the index file."""
720 return os.path.join(self.controldir(), INDEX_FILENAME)
722 def open_index(self):
723 """Open the index for this repository."""
724 from dulwich.index import Index
725 return Index(self.index_path())
728 """Check if an index is present."""
729 return os.path.exists(self.index_path())
732 return "<Repo at %r>" % self.path
734 def do_commit(self, committer, message,
735 author=None, commit_timestamp=None,
736 commit_timezone=None, author_timestamp=None,
737 author_timezone=None, tree=None):
738 """Create a new commit.
740 :param committer: Committer fullname
741 :param message: Commit message
742 :param author: Author fullname (defaults to committer)
743 :param commit_timestamp: Commit timestamp (defaults to now)
744 :param commit_timezone: Commit timestamp timezone (defaults to GMT)
745 :param author_timestamp: Author timestamp (defaults to commit timestamp)
746 :param author_timezone: Author timestamp timezone
747 (defaults to commit timestamp timezone)
748 :param tree: SHA1 of the tree root to use (if not specified the current index will be committed).
749 :return: New commit SHA1
751 from dulwich.index import commit_index
753 index = self.open_index()
756 c.tree = commit_index(self.object_store, index)
759 c.committer = committer
760 if commit_timestamp is None:
761 commit_timestamp = time.time()
762 c.commit_time = int(commit_timestamp)
763 if commit_timezone is None:
765 c.commit_timezone = commit_timezone
769 if author_timestamp is None:
770 author_timestamp = commit_timestamp
771 c.author_time = int(author_timestamp)
772 if author_timezone is None:
773 author_timezone = commit_timezone
774 c.author_timezone = author_timezone
776 self.object_store.add_object(c)
777 self.refs["HEAD"] = c.id
781 def init(cls, path, mkdir=True):
782 controldir = os.path.join(path, ".git")
784 cls.init_bare(controldir)
788 def init_bare(cls, path, mkdir=True):
789 for d in [[OBJECTDIR],
794 [REFSDIR, REFSDIR_TAGS],
795 [REFSDIR, REFSDIR_HEADS],
798 os.mkdir(os.path.join(path, *d))
800 ret.refs.set_ref("HEAD", "refs/heads/master")
801 f = GitFile(os.path.join(path, 'description'), 'wb')
803 f.write("Unnamed repository")
807 f = GitFile(os.path.join(path, 'config'), 'wb')
810 repositoryformatversion = 0
813 logallrefupdates = true
818 f = GitFile(os.path.join(path, 'info', 'excludes'), 'wb')