Don't try and send objects if there are none to send (client hung up already)
[jelmer/dulwich-libgit2.git] / dulwich / repo.py
index 96ed40253d0238e4f0ee7cd4f49112fd40752f41..3d5412b644358fd998cf2b69210f5421212352d1 100644 (file)
@@ -5,7 +5,8 @@
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; version 2
-# of the License.
+# of the License or (at your option) any later version of 
+# the License.
 # 
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 # MA  02110-1301, USA.
 
-import os
+import os, stat
 
 from commit import Commit
-from errors import MissingCommitError, NotBlobError, NotTreeError, NotCommitError
-from objects import (ShaFile,
-                     Commit,
-                     Tree,
-                     Blob,
-                     )
-from pack import load_packs, iter_sha1, PackData, write_pack_index_v2
-import tempfile
+from errors import (
+        MissingCommitError, 
+        NotBlobError, 
+        NotCommitError, 
+        NotGitRepository,
+        NotTreeError, 
+        )
+from object_store import ObjectStore
+from objects import (
+        ShaFile,
+        Commit,
+        Tree,
+        Blob,
+        )
 
 OBJECTDIR = 'objects'
-PACKDIR = 'pack'
 SYMREF = 'ref: '
 
 
-class Tag(object):
+class Tags(object):
 
-    def __init__(self, name, ref):
-        self.name = name
-        self.ref = ref
+    def __init__(self, tagdir, tags):
+        self.tagdir = tagdir
+        self.tags = tags
+
+    def __getitem__(self, name):
+        return self.tags[name]
+    
+    def __setitem__(self, name, ref):
+        self.tags[name] = ref
+        f = open(os.path.join(self.tagdir, name), 'wb')
+        try:
+            f.write("%s\n" % ref)
+        finally:
+            f.close()
+
+    def __len__(self):
+        return len(self.tags)
+
+    def iteritems(self):
+        for k in self.tags:
+            yield k, self[k]
 
 
 class Repo(object):
@@ -46,57 +70,81 @@ class Repo(object):
   ref_locs = ['', 'refs', 'refs/tags', 'refs/heads', 'refs/remotes']
 
   def __init__(self, root):
-    controldir = os.path.join(root, ".git")
-    if os.path.exists(os.path.join(controldir, "objects")):
+    if os.path.isdir(os.path.join(root, ".git", "objects")):
       self.bare = False
-      self._controldir = controldir
-    else:
+      self._controldir = os.path.join(root, ".git")
+    elif os.path.isdir(os.path.join(root, "objects")):
       self.bare = True
       self._controldir = root
+    else:
+      raise NotGitRepository(root)
     self.path = root
-    self.tags = [Tag(name, ref) for name, ref in self.get_tags().items()]
+    self.tags = Tags(self.tagdir(), self.get_tags())
     self._object_store = None
 
   def controldir(self):
     return self._controldir
 
-  def fetch_objects(self, determine_wants, graph_walker, progress):
-    wants = determine_wants(self.heads())
-    commits_to_send = []
+  def find_missing_objects(self, determine_wants, graph_walker, progress):
+    """Fetch the missing objects required for a set of revisions.
+
+    :param determine_wants: Function that takes a dictionary with heads 
+        and returns the list of heads to fetch.
+    :param graph_walker: Object that can iterate over the list of revisions 
+        to fetch and has an "ack" method that will be called to acknowledge 
+        that a revision is present.
+    :param progress: Simple progress function that will be called with 
+        updated progress strings.
+    """
+    wants = determine_wants(self.get_refs())
+    commits_to_send = set(wants)
+    sha_done = set()
     ref = graph_walker.next()
     while ref:
-        commits_to_send.append(ref)
         if ref in self.object_store:
             graph_walker.ack(ref)
         ref = graph_walker.next()
-    sha_done = set()
-    for sha in commits_to_send:
+    while commits_to_send:
+        sha = (commits_to_send.pop(), None)
         if sha in sha_done:
             continue
 
         c = self.commit(sha)
-        sha_done.add(sha)
+        assert isinstance(c, Commit)
+        sha_done.add((sha, None))
+
+        commits_to_send.update([p for p in c.parents if not p in sha_done])
 
         def parse_tree(tree, sha_done):
-            for mode, name, x in tree.entries():
-                if not x in sha_done:
-                    try:
-                        t = self.tree(x)
-                        sha_done.add(x)
-                        parse_tree(t, sha_done)
-                    except:
-                        sha_done.append(x)
+            for mode, name, sha in tree.entries():
+                if (sha, name) in sha_done:
+                    continue
+                if mode & stat.S_IFDIR:
+                    parse_tree(self.tree(sha), sha_done)
+                sha_done.add((sha, name))
 
         treesha = c.tree
-        if treesha not in sha_done:
-            t = self.tree(treesha)
-            sha_done.add(treesha)
-            parse_tree(t, sha_done)
+        if c.tree not in sha_done:
+            parse_tree(self.tree(c.tree), sha_done)
+            sha_done.add((c.tree, None))
 
         progress("counting objects: %d\r" % len(sha_done))
+    return sha_done
 
-        for sha in sha_done:
-            yield self.get_object(sha)
+  def fetch_objects(self, determine_wants, graph_walker, progress):
+    """Fetch the missing objects required for a set of revisions.
+
+    :param determine_wants: Function that takes a dictionary with heads 
+        and returns the list of heads to fetch.
+    :param graph_walker: Object that can iterate over the list of revisions 
+        to fetch and has an "ack" method that will be called to acknowledge 
+        that a revision is present.
+    :param progress: Simple progress function that will be called with 
+        updated progress strings.
+    :return: tuple with number of objects, iterator over objects
+    """
+    shas = self.find_missing_objects(determine_wants, graph_walker, progress)
+    return (len(shas), ((self.get_object(sha), path) for sha, path in shas))
 
   def object_dir(self):
     return os.path.join(self.controldir(), OBJECTDIR)
@@ -131,7 +179,9 @@ class Repo(object):
         return self._get_ref(file)
 
   def get_refs(self):
-    ret = {"HEAD": self.head()}
+    ret = {}
+    if self.head():
+        ret['HEAD'] = self.head()
     for dir in ["refs/heads", "refs/tags"]:
         for name in os.listdir(os.path.join(self.controldir(), dir)):
           path = os.path.join(self.controldir(), dir, name)
@@ -149,9 +199,12 @@ class Repo(object):
       os.remove(file)
       return
 
+  def tagdir(self):
+    return os.path.join(self.controldir(), 'refs', 'tags')
+
   def get_tags(self):
     ret = {}
-    for root, dirs, files in os.walk(os.path.join(self.controldir(), 'refs', 'tags')):
+    for root, dirs, files in os.walk(self.tagdir()):
       for name in files:
         ret[name] = self._get_ref(os.path.join(root, name))
     return ret
@@ -167,6 +220,7 @@ class Repo(object):
     return self.ref('HEAD')
 
   def _get_object(self, sha, cls):
+    assert len(sha) in (20, 40)
     ret = self.get_object(sha)
     if ret._type != cls._type:
         if cls is Commit:
@@ -231,6 +285,12 @@ class Repo(object):
   def __repr__(self):
       return "<Repo at %r>" % self.path
 
+  @classmethod
+  def init(cls, path, mkdir=True):
+      controldir = os.path.join(path, ".git")
+      os.mkdir(controldir)
+      cls.init_bare(controldir)
+
   @classmethod
   def init_bare(cls, path, mkdir=True):
       for d in [["objects"], 
@@ -250,68 +310,4 @@ class Repo(object):
   create = init_bare
 
 
-class ObjectStore(object):
-
-    def __init__(self, path):
-        self.path = path
-        self._packs = None
 
-    def pack_dir(self):
-        return os.path.join(self.path, PACKDIR)
-
-    def __contains__(self, sha):
-        # TODO: This can be more efficient
-        try:
-            self[sha]
-            return True
-        except KeyError:
-            return False
-
-    @property
-    def packs(self):
-        if self._packs is None:
-            self._packs = list(load_packs(self.pack_dir()))
-        return self._packs
-
-    def _get_shafile(self, sha):
-        dir = sha[:2]
-        file = sha[2:]
-        # Check from object dir
-        path = os.path.join(self.path, dir, file)
-        if os.path.exists(path):
-          return ShaFile.from_file(path)
-        return None
-
-    def get_raw(self, sha):
-        for pack in self.packs:
-            if sha in pack:
-                return pack.get_raw(sha, self.get_raw)
-        # FIXME: Are pack deltas ever against on-disk shafiles ?
-        ret = self._get_shafile(sha)
-        if ret is not None:
-            return ret.as_raw_string()
-        raise KeyError(sha)
-
-    def __getitem__(self, sha):
-        assert len(sha) == 40, "Incorrect length sha: %s" % str(sha)
-        ret = self._get_shafile(sha)
-        if ret is not None:
-            return ret
-        # Check from packs
-        type, uncomp = self.get_raw(sha)
-        return ShaFile.from_raw_string(type, uncomp)
-
-    def move_in_pack(self, path):
-        p = PackData(path)
-        entries = p.sorted_entries(self.get_raw)
-        basename = os.path.join(self.pack_dir(), "pack-%s" % iter_sha1(entry[0] for entry in entries))
-        write_pack_index_v2(basename+".idx", entries, p.calculate_checksum())
-        os.rename(path, basename + ".pack")
-
-    def add_pack(self):
-        fd, path = tempfile.mkstemp(dir=self.pack_dir(), suffix=".pack")
-        f = os.fdopen(fd, 'w')
-        def commit():
-            if os.path.getsize(path) > 0:
-                self.move_in_pack(path)
-        return f, commit