py3k: Go through all uses of itertools and make them work on py3k

[jelmer/dulwich.git] / dulwich / object_store.py
diff --git a/dulwich/object_store.py b/dulwich/object_store.py

index ca26348ef495ac1b79f296fefdf55615ce5a1dc8..ce2ec868d9998e836baf489c2a9ff7653e1e2fae 100644 (file)
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -1,5 +1,6 @@
  # object_store.py -- Object store for git objects
-# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
+# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
+#                         and others
  #
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public License
@@ -20,13 +21,17 @@
  """Git object store interfaces and implementation."""
  
  
+from io import BytesIO
  import errno
-import itertools
+from itertools import chain
  import os
  import stat
  import tempfile
-import urllib2
  
+from dulwich.diff_tree import (
+    tree_changes,
+    walk_trees,
+    )
  from dulwich.errors import (
      NotTreeError,
      )
@@ -36,20 +41,28 @@ from dulwich.objects import (
      ShaFile,
      Tag,
      Tree,
+    ZERO_SHA,
      hex_to_sha,
      sha_to_hex,
+    hex_to_filename,
      S_ISGITLINK,
+    object_class,
      )
  from dulwich.pack import (
      Pack,
      PackData,
+    PackInflater,
      iter_sha1,
-    load_pack_index,
-    write_pack,
-    write_pack_data,
+    write_pack_header,
      write_pack_index_v2,
+    write_pack_object,
+    write_pack_objects,
+    compute_file_sha,
+    PackIndexer,
+    PackStreamCopier,
      )
  
+INFODIR = 'info'
  PACKDIR = 'pack'
  
  
@@ -57,7 +70,9 @@ class BaseObjectStore(object):
      """Object store interface."""
  
      def determine_wants_all(self, refs):
-           return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
+        return [sha for (ref, sha) in refs.iteritems()
+                if not sha in self and not ref.endswith("^{}") and
+                   not sha == ZERO_SHA]
  
      def iter_shas(self, shas):
          """Iterate over the objects for the specified shas.
@@ -91,14 +106,14 @@ class BaseObjectStore(object):
          """Obtain the raw text for an object.
  
          :param name: sha for the object.
-        :return: tuple with object type and object contents.
+        :return: tuple with numeric type and object contents.
          """
          raise NotImplementedError(self.get_raw)
  
      def __getitem__(self, sha):
          """Obtain an object by SHA1."""
-        type, uncomp = self.get_raw(sha)
-        return ShaFile.from_raw_string(type, uncomp)
+        type_num, uncomp = self.get_raw(sha)
+        return ShaFile.from_raw_string(type_num, uncomp, sha=sha)
  
      def __iter__(self):
          """Iterate over the SHAs that are present in this store."""
@@ -120,94 +135,47 @@ class BaseObjectStore(object):
      def tree_changes(self, source, target, want_unchanged=False):
          """Find the differences between the contents of two trees
  
-        :param object_store: Object store to use for retrieving tree contents
-        :param tree: SHA1 of the root tree
+        :param source: SHA1 of the source tree
+        :param target: SHA1 of the target tree
          :param want_unchanged: Whether unchanged files should be reported
-        :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
+        :return: Iterator over tuples with
+            (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
          """
-        todo = set([(source, target, "")])
-        while todo:
-            (sid, tid, path) = todo.pop()
-            if sid is not None:
-                stree = self[sid]
-            else:
-                stree = {}
-            if tid is not None:
-                ttree = self[tid]
-            else:
-                ttree = {}
-            for name, oldmode, oldhexsha in stree.iteritems():
-                if path == "":
-                    oldchildpath = name
-                else:
-                    oldchildpath = "%s/%s" % (path, name)
-                try:
-                    (newmode, newhexsha) = ttree[name]
-                    newchildpath = oldchildpath
-                except KeyError:
-                    newmode = None
-                    newhexsha = None
-                    newchildpath = None
-                if (want_unchanged or oldmode != newmode or 
-                    oldhexsha != newhexsha):
-                    if stat.S_ISDIR(oldmode):
-                        if newmode is None or stat.S_ISDIR(newmode):
-                            todo.add((oldhexsha, newhexsha, oldchildpath))
-                        else:
-                            # entry became a file
-                            todo.add((oldhexsha, None, oldchildpath))
-                            yield ((None, newchildpath), (None, newmode), (None, newhexsha))
-                    else:
-                        if newmode is not None and stat.S_ISDIR(newmode):
-                            # entry became a dir
-                            yield ((oldchildpath, None), (oldmode, None), (oldhexsha, None))
-                            todo.add((None, newhexsha, newchildpath))
-                        else:
-                            yield ((oldchildpath, newchildpath), (oldmode, newmode), (oldhexsha, newhexsha))
-
-            for name, newmode, newhexsha in ttree.iteritems():
-                if path == "":
-                    childpath = name
-                else:
-                    childpath = "%s/%s" % (path, name)
-                if not name in stree:
-                    if not stat.S_ISDIR(newmode):
-                        yield ((None, childpath), (None, newmode), (None, newhexsha))
-                    else:
-                        todo.add((None, newhexsha, childpath))
-
-    def iter_tree_contents(self, tree):
-        """Yield (path, mode, hexsha) tuples for all non-Tree objects in a tree.
-
-        :param tree: SHA1 of the root of the tree
-        """
-        todo = set([(tree, "")])
-        while todo:
-            (tid, tpath) = todo.pop()
-            tree = self[tid]
-            for name, mode, hexsha in tree.iteritems(): 
-                if tpath == "":
-                    path = name
-                else:
-                    path = "%s/%s" % (tpath, name)
-                if stat.S_ISDIR(mode):
-                    todo.add((hexsha, path))
-                else:
-                    yield path, mode, hexsha
+        for change in tree_changes(self, source, target,
+                                   want_unchanged=want_unchanged):
+            yield ((change.old.path, change.new.path),
+                   (change.old.mode, change.new.mode),
+                   (change.old.sha, change.new.sha))
+
+    def iter_tree_contents(self, tree_id, include_trees=False):
+        """Iterate the contents of a tree and all subtrees.
+
+        Iteration is depth-first pre-order, as in e.g. os.walk.
+
+        :param tree_id: SHA1 of the tree.
+        :param include_trees: If True, include tree objects in the iteration.
+        :return: Iterator over TreeEntry namedtuples for all the objects in a
+            tree.
+        """
+        for entry, _ in walk_trees(self, tree_id, None):
+            if not stat.S_ISDIR(entry.mode) or include_trees:
+                yield entry
  
      def find_missing_objects(self, haves, wants, progress=None,
-                             get_tagged=None):
+                             get_tagged=None,
+                             get_parents=lambda commit: commit.parents):
          """Find the missing objects required for a set of revisions.
  
          :param haves: Iterable over SHAs already in common.
          :param wants: Iterable over SHAs of objects to fetch.
-        :param progress: Simple progress function that will be called with 
+        :param progress: Simple progress function that will be called with
              updated progress strings.
          :param get_tagged: Function that returns a dict of pointed-to sha -> tag
              sha for including tags.
+        :param get_parents: Optional function for getting the parents of a commit.
          :return: Iterator over (sha, path) pairs.
          """
-        finder = MissingObjectFinder(self, haves, wants, progress, get_tagged)
+        finder = MissingObjectFinder(self, haves, wants, progress, get_tagged, get_parents=get_parents)
          return iter(finder.next, None)
  
      def find_common_revisions(self, graphwalker):
@@ -217,84 +185,171 @@ class BaseObjectStore(object):
          :return: List of SHAs that are in common
          """
          haves = []
-        sha = graphwalker.next()
+        sha = next(graphwalker)
          while sha:
              if sha in self:
                  haves.append(sha)
                  graphwalker.ack(sha)
-            sha = graphwalker.next()
+            sha = next(graphwalker)
          return haves
  
-    def get_graph_walker(self, heads):
-        """Obtain a graph walker for this object store.
-        
-        :param heads: Local heads to start search with
-        :return: GraphWalker object
-        """
-        return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
-
-    def generate_pack_contents(self, have, want):
+    def generate_pack_contents(self, have, want, progress=None):
          """Iterate over the contents of a pack file.
  
          :param have: List of SHA1s of objects that should not be sent
          :param want: List of SHA1s of objects that should be sent
+        :param progress: Optional progress reporting method
+        """
+        return self.iter_shas(self.find_missing_objects(have, want, progress))
+
+    def peel_sha(self, sha):
+        """Peel all tags from a SHA.
+
+        :param sha: The object SHA to peel.
+        :return: The fully-peeled SHA1 of a tag object, after peeling all
+            intermediate tags; if the original ref does not point to a tag, this
+            will equal the original SHA1.
+        """
+        obj = self[sha]
+        obj_class = object_class(obj.type_name)
+        while obj_class is Tag:
+            obj_class, sha = obj.object
+            obj = self[sha]
+        return obj
+
+    def _collect_ancestors(self, heads, common=set(),
+                           get_parents=lambda commit: commit.parents):
+        """Collect all ancestors of heads up to (excluding) those in common.
+
+        :param heads: commits to start from
+        :param common: commits to end at, or empty set to walk repository
+            completely
+        :param get_parents: Optional function for getting the parents of a commit.
+        :return: a tuple (A, B) where A - all commits reachable
+            from heads but not present in common, B - common (shared) elements
+            that are directly reachable from heads
          """
-        return self.iter_shas(self.find_missing_objects(have, want))
+        bases = set()
+        commits = set()
+        queue = []
+        queue.extend(heads)
+        while queue:
+            e = queue.pop(0)
+            if e in common:
+                bases.add(e)
+            elif e not in commits:
+                commits.add(e)
+                cmt = self[e]
+                queue.extend(get_parents(cmt))
+        return (commits, bases)
+
+    def close(self):
+        """Close any files opened by this object store."""
+        # Default implementation is a NO-OP
  
  
  class PackBasedObjectStore(BaseObjectStore):
  
      def __init__(self):
-        self._pack_cache = None
+        self._pack_cache = {}
+
+    @property
+    def alternates(self):
+        return []
  
      def contains_packed(self, sha):
-        """Check if a particular object is present by SHA1 and is packed."""
+        """Check if a particular object is present by SHA1 and is packed.
+
+        This does not check alternates.
+        """
          for pack in self.packs:
              if sha in pack:
                  return True
          return False
  
-    def _load_packs(self):
-        raise NotImplementedError(self._load_packs)
+    def __contains__(self, sha):
+        """Check if a particular object is present by SHA1.
+
+        This method makes no distinction between loose and packed objects.
+        """
+        if self.contains_packed(sha) or self.contains_loose(sha):
+            return True
+        for alternate in self.alternates:
+            if sha in alternate:
+                return True
+        return False
  
      def _pack_cache_stale(self):
          """Check whether the pack cache is stale."""
          raise NotImplementedError(self._pack_cache_stale)
  
-    def _add_known_pack(self, pack):
+    def _add_known_pack(self, base_name, pack):
          """Add a newly appeared pack to the cache by path.
  
          """
-        if self._pack_cache is not None:
-            self._pack_cache.append(pack)
+        self._pack_cache[base_name] = pack
+
+    def close(self):
+        pack_cache = self._pack_cache
+        self._pack_cache = {}
+        while pack_cache:
+            (name, pack) = pack_cache.popitem()
+            pack.close()
  
      @property
      def packs(self):
          """List with pack objects."""
          if self._pack_cache is None or self._pack_cache_stale():
-            self._pack_cache = self._load_packs()
-        return self._pack_cache
+            self._update_pack_cache()
+
+        return self._pack_cache.values()
+
+    def _iter_alternate_objects(self):
+        """Iterate over the SHAs of all the objects in alternate stores."""
+        for alternate in self.alternates:
+            for alternate_object in alternate:
+                yield alternate_object
  
      def _iter_loose_objects(self):
+        """Iterate over the SHAs of all loose objects."""
          raise NotImplementedError(self._iter_loose_objects)
  
      def _get_loose_object(self, sha):
          raise NotImplementedError(self._get_loose_object)
  
+    def _remove_loose_object(self, sha):
+        raise NotImplementedError(self._remove_loose_object)
+
+    def pack_loose_objects(self):
+        """Pack loose objects.
+
+        :return: Number of objects packed
+        """
+        objects = set()
+        for sha in self._iter_loose_objects():
+            objects.add((self._get_loose_object(sha), None))
+        self.add_objects(list(objects))
+        for obj, path in objects:
+            self._remove_loose_object(obj.id)
+        return len(objects)
+
      def __iter__(self):
          """Iterate over the SHAs that are present in this store."""
-        iterables = self.packs + [self._iter_loose_objects()]
-        return itertools.chain(*iterables)
+        iterables = self.packs + [self._iter_loose_objects()] + [self._iter_alternate_objects()]
+        return chain(*iterables)
  
      def contains_loose(self, sha):
-        """Check if a particular object is present by SHA1 and is loose."""
+        """Check if a particular object is present by SHA1 and is loose.
+
+        This does not check alternates.
+        """
          return self._get_loose_object(sha) is not None
  
      def get_raw(self, name):
          """Obtain the raw text for an object.
-        
+
          :param name: sha for the object.
-        :return: tuple with object type and object contents.
+        :return: tuple with numeric type and object contents.
          """
          if len(name) == 40:
              sha = hex_to_sha(name)
@@ -303,30 +358,41 @@ class PackBasedObjectStore(BaseObjectStore):
              sha = name
              hexsha = None
          else:
-            raise AssertionError
+            raise AssertionError("Invalid object name %r" % name)
          for pack in self.packs:
              try:
                  return pack.get_raw(sha)
              except KeyError:
                  pass
-        if hexsha is None: 
+        if hexsha is None:
              hexsha = sha_to_hex(name)
          ret = self._get_loose_object(hexsha)
          if ret is not None:
-            return ret.type, ret.as_raw_string()
+            return ret.type_num, ret.as_raw_string()
+        for alternate in self.alternates:
+            try:
+                return alternate.get_raw(hexsha)
+            except KeyError:
+                pass
          raise KeyError(hexsha)
  
      def add_objects(self, objects):
          """Add a set of objects to this object store.
  
          :param objects: Iterable over objects, should support __len__.
+        :return: Pack object of the objects written.
          """
          if len(objects) == 0:
              # Don't bother writing an empty pack file
              return
-        f, commit = self.add_pack()
-        write_pack_data(f, objects, len(objects))
-        commit()
+        f, commit, abort = self.add_pack()
+        try:
+            write_pack_objects(f, objects)
+        except:
+            abort()
+            raise
+        else:
+            return commit()
  
  
  class DiskObjectStore(PackBasedObjectStore):
@@ -341,38 +407,107 @@ class DiskObjectStore(PackBasedObjectStore):
          self.path = path
          self.pack_dir = os.path.join(self.path, PACKDIR)
          self._pack_cache_time = 0
+        self._pack_cache = {}
+        self._alternates = None
  
-    def _load_packs(self):
-        pack_files = []
+    def __repr__(self):
+        return "<%s(%r)>" % (self.__class__.__name__, self.path)
+
+    @property
+    def alternates(self):
+        if self._alternates is not None:
+            return self._alternates
+        self._alternates = []
+        for path in self._read_alternate_paths():
+            self._alternates.append(DiskObjectStore(path))
+        return self._alternates
+
+    def _read_alternate_paths(self):
          try:
-            self._pack_cache_time = os.stat(self.pack_dir).st_mtime
-            pack_dir_contents = os.listdir(self.pack_dir)
-            for name in pack_dir_contents:
-                # TODO: verify that idx exists first
-                if name.startswith("pack-") and name.endswith(".pack"):
-                    filename = os.path.join(self.pack_dir, name)
-                    pack_files.append((os.stat(filename).st_mtime, filename))
-        except OSError, e:
+            f = GitFile(os.path.join(self.path, "info", "alternates"),
+                    'rb')
+        except (OSError, IOError) as e:
              if e.errno == errno.ENOENT:
                  return []
              raise
-        pack_files.sort(reverse=True)
-        suffix_len = len(".pack")
-        return [Pack(f[:-suffix_len]) for _, f in pack_files]
+        ret = []
+        try:
+            for l in f.readlines():
+                l = l.rstrip("\n")
+                if l[0] == "#":
+                    continue
+                if os.path.isabs(l):
+                    ret.append(l)
+                else:
+                    ret.append(os.path.join(self.path, l))
+            return ret
+        finally:
+            f.close()
+
+    def add_alternate_path(self, path):
+        """Add an alternate path to this object store.
+        """
+        try:
+            os.mkdir(os.path.join(self.path, "info"))
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+        alternates_path = os.path.join(self.path, "info/alternates")
+        f = GitFile(alternates_path, 'wb')
+        try:
+            try:
+                orig_f = open(alternates_path, 'rb')
+            except (OSError, IOError) as e:
+                if e.errno != errno.ENOENT:
+                    raise
+            else:
+                try:
+                    f.write(orig_f.read())
+                finally:
+                    orig_f.close()
+            f.write("%s\n" % path)
+        finally:
+            f.close()
+
+        if not os.path.isabs(path):
+            path = os.path.join(self.path, path)
+        self.alternates.append(DiskObjectStore(path))
+
+    def _update_pack_cache(self):
+        try:
+            pack_dir_contents = os.listdir(self.pack_dir)
+        except OSError as e:
+            if e.errno == errno.ENOENT:
+                self._pack_cache_time = 0
+                self.close()
+                return
+            raise
+        self._pack_cache_time = os.stat(self.pack_dir).st_mtime
+        pack_files = set()
+        for name in pack_dir_contents:
+            # TODO: verify that idx exists first
+            if name.startswith("pack-") and name.endswith(".pack"):
+                pack_files.add(name[:-len(".pack")])
+
+        # Open newly appeared pack files
+        for f in pack_files:
+            if f not in self._pack_cache:
+                self._pack_cache[f] = Pack(os.path.join(self.pack_dir, f))
+        # Remove disappeared pack files
+        for f in set(self._pack_cache) - pack_files:
+            self._pack_cache.pop(f).close()
  
      def _pack_cache_stale(self):
          try:
              return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
-        except OSError, e:
+        except OSError as e:
              if e.errno == errno.ENOENT:
                  return True
              raise
  
      def _get_shafile_path(self, sha):
-        dir = sha[:2]
-        file = sha[2:]
          # Check from object dir
-        return os.path.join(self.path, dir, file)
+        return hex_to_filename(self.path, sha)
  
      def _iter_loose_objects(self):
          for base in os.listdir(self.path):
@@ -384,76 +519,129 @@ class DiskObjectStore(PackBasedObjectStore):
      def _get_loose_object(self, sha):
          path = self._get_shafile_path(sha)
          try:
-            return ShaFile.from_file(path)
-        except OSError, e:
+            return ShaFile.from_path(path)
+        except (OSError, IOError) as e:
              if e.errno == errno.ENOENT:
                  return None
              raise
  
-    def move_in_thin_pack(self, path):
+    def _remove_loose_object(self, sha):
+        os.remove(self._get_shafile_path(sha))
+
+    def _complete_thin_pack(self, f, path, copier, indexer):
          """Move a specific file containing a pack into the pack directory.
  
-        :note: The file should be on the same file system as the 
+        :note: The file should be on the same file system as the
              packs directory.
  
+        :param f: Open file object for the pack.
          :param path: Path to the pack file.
+        :param copier: A PackStreamCopier to use for writing pack data.
+        :param indexer: A PackIndexer for indexing the pack.
          """
-        data = PackData(path)
-
-        # Write index for the thin pack (do we really need this?)
-        temppath = os.path.join(self.pack_dir, 
-            sha_to_hex(urllib2.randombytes(20))+".tempidx")
-        data.create_index_v2(temppath, self.get_raw)
-        p = Pack.from_objects(data, load_pack_index(temppath))
-
-        # Write a full pack version
-        temppath = os.path.join(self.pack_dir, 
-            sha_to_hex(urllib2.randombytes(20))+".temppack")
-        write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)), 
-                len(p))
-        pack_sha = load_pack_index(temppath+".idx").objects_sha1()
-        newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
-        os.rename(temppath+".pack", newbasename+".pack")
-        os.rename(temppath+".idx", newbasename+".idx")
-        self._add_known_pack(Pack(newbasename))
+        entries = list(indexer)
+
+        # Update the header with the new number of objects.
+        f.seek(0)
+        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
+
+        # Must flush before reading (http://bugs.python.org/issue3207)
+        f.flush()
+
+        # Rescan the rest of the pack, computing the SHA with the new header.
+        new_sha = compute_file_sha(f, end_ofs=-20)
+
+        # Must reposition before writing (http://bugs.python.org/issue3207)
+        f.seek(0, os.SEEK_CUR)
+
+        # Complete the pack.
+        for ext_sha in indexer.ext_refs():
+            assert len(ext_sha) == 20
+            type_num, data = self.get_raw(ext_sha)
+            offset = f.tell()
+            crc32 = write_pack_object(f, type_num, data, sha=new_sha)
+            entries.append((ext_sha, offset, crc32))
+        pack_sha = new_sha.digest()
+        f.write(pack_sha)
+        f.close()
+
+        # Move the pack in.
+        entries.sort()
+        pack_base_name = os.path.join(
+          self.pack_dir, 'pack-' + iter_sha1(e[0] for e in entries))
+        os.rename(path, pack_base_name + '.pack')
+
+        # Write the index.
+        index_file = GitFile(pack_base_name + '.idx', 'wb')
+        try:
+            write_pack_index_v2(index_file, entries, pack_sha)
+            index_file.close()
+        finally:
+            index_file.abort()
+
+        # Add the pack to the store and return it.
+        final_pack = Pack(pack_base_name)
+        final_pack.check_length_and_checksum()
+        self._add_known_pack(pack_base_name, final_pack)
+        return final_pack
+
+    def add_thin_pack(self, read_all, read_some):
+        """Add a new thin pack to this object store.
+
+        Thin packs are packs that contain deltas with parents that exist outside
+        the pack. They should never be placed in the object store directly, and
+        always indexed and completed as they are copied.
+
+        :param read_all: Read function that blocks until the number of requested
+            bytes are read.
+        :param read_some: Read function that returns at least one byte, but may
+            not return the number of bytes requested.
+        :return: A Pack object pointing at the now-completed thin pack in the
+            objects/pack directory.
+        """
+        fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
+        f = os.fdopen(fd, 'w+b')
+
+        try:
+            indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
+            copier = PackStreamCopier(read_all, read_some, f,
+                                      delta_iter=indexer)
+            copier.verify()
+            return self._complete_thin_pack(f, path, copier, indexer)
+        finally:
+            f.close()
  
      def move_in_pack(self, path):
          """Move a specific file containing a pack into the pack directory.
  
-        :note: The file should be on the same file system as the 
+        :note: The file should be on the same file system as the
              packs directory.
  
          :param path: Path to the pack file.
          """
          p = PackData(path)
-        entries = p.sorted_entries()
-        basename = os.path.join(self.pack_dir, 
-            "pack-%s" % iter_sha1(entry[0] for entry in entries))
-        write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
-        p.close()
+        try:
+            entries = p.sorted_entries()
+            basename = os.path.join(self.pack_dir,
+                "pack-%s" % iter_sha1(entry[0] for entry in entries))
+            f = GitFile(basename+".idx", "wb")
+            try:
+                write_pack_index_v2(f, entries, p.get_stored_checksum())
+            finally:
+                f.close()
+        finally:
+            p.close()
          os.rename(path, basename + ".pack")
-        self._add_known_pack(Pack(basename))
-
-    def add_thin_pack(self):
-        """Add a new thin pack to this object store.
-
-        Thin packs are packs that contain deltas with parents that exist 
-        in a different pack.
-        """
-        fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
-        f = os.fdopen(fd, 'wb')
-        def commit():
-            os.fsync(fd)
-            f.close()
-            if os.path.getsize(path) > 0:
-                self.move_in_thin_pack(path)
-        return f, commit
+        final_pack = Pack(basename)
+        self._add_known_pack(basename, final_pack)
+        return final_pack
  
      def add_pack(self):
-        """Add a new pack to this object store. 
+        """Add a new pack to this object store.
  
-        :return: Fileobject to write to and a commit function to 
-            call when the pack is finished.
+        :return: Fileobject to write to, a commit function to
+            call when the pack is finished and an abort
+            function.
          """
          fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
          f = os.fdopen(fd, 'wb')
@@ -461,8 +649,14 @@ class DiskObjectStore(PackBasedObjectStore):
              os.fsync(fd)
              f.close()
              if os.path.getsize(path) > 0:
-                self.move_in_pack(path)
-        return f, commit
+                return self.move_in_pack(path)
+            else:
+                os.remove(path)
+                return None
+        def abort():
+            f.close()
+            os.remove(path)
+        return f, commit, abort
  
      def add_object(self, obj):
          """Add a single object to this object store.
@@ -470,8 +664,11 @@ class DiskObjectStore(PackBasedObjectStore):
          :param obj: Object to add
          """
          dir = os.path.join(self.path, obj.id[:2])
-        if not os.path.isdir(dir):
+        try:
              os.mkdir(dir)
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
          path = os.path.join(dir, obj.id[2:])
          if os.path.exists(path):
              return # Already there, no need to write again
@@ -481,6 +678,17 @@ class DiskObjectStore(PackBasedObjectStore):
          finally:
              f.close()
  
+    @classmethod
+    def init(cls, path):
+        try:
+            os.mkdir(path)
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+        os.mkdir(os.path.join(path, "info"))
+        os.mkdir(os.path.join(path, PACKDIR))
+        return cls(path)
+
  
  class MemoryObjectStore(BaseObjectStore):
      """Object store that keeps all objects in memory."""
@@ -489,9 +697,17 @@ class MemoryObjectStore(BaseObjectStore):
          super(MemoryObjectStore, self).__init__()
          self._data = {}
  
+    def _to_hexsha(self, sha):
+        if len(sha) == 40:
+            return sha
+        elif len(sha) == 20:
+            return sha_to_hex(sha)
+        else:
+            raise ValueError("Invalid sha %r" % (sha,))
+
      def contains_loose(self, sha):
          """Check if a particular object is present by SHA1 and is loose."""
-        return sha in self._data
+        return self._to_hexsha(sha) in self._data
  
      def contains_packed(self, sha):
          """Check if a particular object is present by SHA1 and is packed."""
@@ -508,14 +724,19 @@ class MemoryObjectStore(BaseObjectStore):
  
      def get_raw(self, name):
          """Obtain the raw text for an object.
-        
+
          :param name: sha for the object.
-        :return: tuple with object type and object contents.
+        :return: tuple with numeric type and object contents.
          """
-        return self[name].as_raw_string()
+        obj = self[self._to_hexsha(name)]
+        return obj.type_num, obj.as_raw_string()
  
      def __getitem__(self, name):
-        return self._data[name]
+        return self._data[self._to_hexsha(name)]
+
+    def __delitem__(self, name):
+        """Delete an object from this store, for testing only."""
+        del self._data[self._to_hexsha(name)]
  
      def add_object(self, obj):
          """Add a single object to this object store.
@@ -531,6 +752,72 @@ class MemoryObjectStore(BaseObjectStore):
          for obj, path in objects:
              self._data[obj.id] = obj
  
+    def add_pack(self):
+        """Add a new pack to this object store.
+
+        Because this object store doesn't support packs, we extract and add the
+        individual objects.
+
+        :return: Fileobject to write to and a commit function to
+            call when the pack is finished.
+        """
+        f = BytesIO()
+        def commit():
+            p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
+            f.close()
+            for obj in PackInflater.for_pack_data(p):
+                self._data[obj.id] = obj
+        def abort():
+            pass
+        return f, commit, abort
+
+    def _complete_thin_pack(self, f, indexer):
+        """Complete a thin pack by adding external references.
+
+        :param f: Open file object for the pack.
+        :param indexer: A PackIndexer for indexing the pack.
+        """
+        entries = list(indexer)
+
+        # Update the header with the new number of objects.
+        f.seek(0)
+        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
+
+        # Rescan the rest of the pack, computing the SHA with the new header.
+        new_sha = compute_file_sha(f, end_ofs=-20)
+
+        # Complete the pack.
+        for ext_sha in indexer.ext_refs():
+            assert len(ext_sha) == 20
+            type_num, data = self.get_raw(ext_sha)
+            write_pack_object(f, type_num, data, sha=new_sha)
+        pack_sha = new_sha.digest()
+        f.write(pack_sha)
+
+    def add_thin_pack(self, read_all, read_some):
+        """Add a new thin pack to this object store.
+
+        Thin packs are packs that contain deltas with parents that exist outside
+        the pack. Because this object store doesn't support packs, we extract
+        and add the individual objects.
+
+        :param read_all: Read function that blocks until the number of requested
+            bytes are read.
+        :param read_some: Read function that returns at least one byte, but may
+            not return the number of bytes requested.
+        """
+        f, commit, abort = self.add_pack()
+        try:
+            indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
+            copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)
+            copier.verify()
+            self._complete_thin_pack(f, indexer)
+        except:
+            abort()
+            raise
+        else:
+            commit()
+
  
  class ObjectImporter(object):
      """Interface for importing objects."""
@@ -547,7 +834,7 @@ class ObjectImporter(object):
          raise NotImplementedError(self.add_object)
  
      def finish(self, object):
-        """Finish the imoprt and write objects to disk."""
+        """Finish the import and write objects to disk."""
          raise NotImplementedError(self.finish)
  
  
@@ -592,7 +879,7 @@ class ObjectStoreIterator(ObjectIterator):
      def __contains__(self, needle):
          """Check if an object is present.
  
-        :note: This checks if the object is present in 
+        :note: This checks if the object is present in
              the underlying object store, not if it would
              be yielded by the iterator.
  
@@ -602,7 +889,7 @@ class ObjectStoreIterator(ObjectIterator):
  
      def __getitem__(self, key):
          """Find an object by SHA1.
-        
+
          :note: This retrieves the object from the underlying
              object store. It will also succeed if the object would
              not be returned by the iterator.
@@ -615,43 +902,126 @@ class ObjectStoreIterator(ObjectIterator):
  
  
  def tree_lookup_path(lookup_obj, root_sha, path):
-    """Lookup an object in a Git tree.
+    """Look up an object in a Git tree.
  
      :param lookup_obj: Callback for retrieving object by SHA1
      :param root_sha: SHA1 of the root tree
      :param path: Path to lookup
+    :return: A tuple of (mode, SHA) of the resulting path.
+    """
+    tree = lookup_obj(root_sha)
+    if not isinstance(tree, Tree):
+        raise NotTreeError(root_sha)
+    return tree.lookup_path(lookup_obj, path)
+
+
+def _collect_filetree_revs(obj_store, tree_sha, kset):
+    """Collect SHA1s of files and directories for specified tree.
+
+    :param obj_store: Object store to get objects by SHA from
+    :param tree_sha: tree reference to walk
+    :param kset: set to fill with references to files and directories
      """
-    parts = path.split("/")
-    sha = root_sha
-    mode = None
-    for p in parts:
-        obj = lookup_obj(sha)
-        if type(obj) is not Tree:
-            raise NotTreeError(sha)
-        if p == '':
-            continue
-        mode, sha = obj[p]
-    return mode, sha
+    filetree = obj_store[tree_sha]
+    for name, mode, sha in filetree.iteritems():
+       if not S_ISGITLINK(mode) and sha not in kset:
+           kset.add(sha)
+           if stat.S_ISDIR(mode):
+               _collect_filetree_revs(obj_store, sha, kset)
+
+
+def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
+    """Split object id list into two list with commit SHA1s and tag SHA1s.
+
+    Commits referenced by tags are included into commits
+    list as well. Only SHA1s known in this repository will get
+    through, and unless ignore_unknown argument is True, KeyError
+    is thrown for SHA1 missing in the repository
+
+    :param obj_store: Object store to get objects by SHA1 from
+    :param lst: Collection of commit and tag SHAs
+    :param ignore_unknown: True to skip SHA1 missing in the repository
+        silently.
+    :return: A tuple of (commits, tags) SHA1s
+    """
+    commits = set()
+    tags = set()
+    for e in lst:
+        try:
+            o = obj_store[e]
+        except KeyError:
+            if not ignore_unknown:
+                raise
+        else:
+            if isinstance(o, Commit):
+                commits.add(e)
+            elif isinstance(o, Tag):
+                tags.add(e)
+                commits.add(o.object[1])
+            else:
+                raise KeyError('Not a commit or a tag: %s' % e)
+    return (commits, tags)
  
  
  class MissingObjectFinder(object):
      """Find the objects missing from another object store.
  
-    :param object_store: Object store containing at least all objects to be 
+    :param object_store: Object store containing at least all objects to be
          sent
      :param haves: SHA1s of commits not to send (already present in target)
      :param wants: SHA1s of commits to send
      :param progress: Optional function to report progress to.
      :param get_tagged: Function that returns a dict of pointed-to sha -> tag
          sha for including tags.
+    :param get_parents: Optional function for getting the parents of a commit.
      :param tagged: dict of pointed-to sha -> tag sha for including tags
      """
  
      def __init__(self, object_store, haves, wants, progress=None,
-                 get_tagged=None):
-        self.sha_done = set(haves)
-        self.objects_to_send = set([(w, None, False) for w in wants if w not in haves])
+            get_tagged=None, get_parents=lambda commit: commit.parents):
          self.object_store = object_store
+        self._get_parents = get_parents
+        # process Commits and Tags differently
+        # Note, while haves may list commits/tags not available locally,
+        # and such SHAs would get filtered out by _split_commits_and_tags,
+        # wants shall list only known SHAs, and otherwise
+        # _split_commits_and_tags fails with KeyError
+        have_commits, have_tags = \
+                _split_commits_and_tags(object_store, haves, True)
+        want_commits, want_tags = \
+                _split_commits_and_tags(object_store, wants, False)
+        # all_ancestors is a set of commits that shall not be sent
+        # (complete repository up to 'haves')
+        all_ancestors = object_store._collect_ancestors(
+                have_commits,
+                get_parents=self._get_parents)[0]
+        # all_missing - complete set of commits between haves and wants
+        # common - commits from all_ancestors we hit into while
+        # traversing parent hierarchy of wants
+        missing_commits, common_commits = object_store._collect_ancestors(
+            want_commits,
+            all_ancestors,
+            get_parents=self._get_parents);
+        self.sha_done = set()
+        # Now, fill sha_done with commits and revisions of
+        # files and directories known to be both locally
+        # and on target. Thus these commits and files
+        # won't get selected for fetch
+        for h in common_commits:
+            self.sha_done.add(h)
+            cmt = object_store[h]
+            _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
+        # record tags we have as visited, too
+        for t in have_tags:
+            self.sha_done.add(t)
+
+        missing_tags = want_tags.difference(have_tags)
+        # in fact, what we 'want' is commits and tags
+        # we've found missing
+        wants = missing_commits.union(missing_tags)
+
+        self.objects_to_send = set([(w, None, False) for w in wants])
+
          if progress is None:
              self.progress = lambda x: None
          else:
@@ -659,39 +1029,41 @@ class MissingObjectFinder(object):
          self._tagged = get_tagged and get_tagged() or {}
  
      def add_todo(self, entries):
-        self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
-
-    def parse_tree(self, tree):
-        self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries() if not S_ISGITLINK(mode)])
-
-    def parse_commit(self, commit):
-        self.add_todo([(commit.tree, "", False)])
-        self.add_todo([(p, None, False) for p in commit.parents])
-
-    def parse_tag(self, tag):
-        self.add_todo([(tag.object[1], None, False)])
+        self.objects_to_send.update([e for e in entries
+                                     if not e[0] in self.sha_done])
  
      def next(self):
-        if not self.objects_to_send:
-            return None
-        (sha, name, leaf) = self.objects_to_send.pop()
+        while True:
+            if not self.objects_to_send:
+                return None
+            (sha, name, leaf) = self.objects_to_send.pop()
+            if sha not in self.sha_done:
+                break
          if not leaf:
              o = self.object_store[sha]
              if isinstance(o, Commit):
-                self.parse_commit(o)
+                self.add_todo([(o.tree, "", False)])
              elif isinstance(o, Tree):
-                self.parse_tree(o)
+                self.add_todo([(s, n, not stat.S_ISDIR(m))
+                               for n, m, s in o.iteritems()
+                               if not S_ISGITLINK(m)])
              elif isinstance(o, Tag):
-                self.parse_tag(o)
+                self.add_todo([(o.object[1], None, False)])
          if sha in self._tagged:
              self.add_todo([(self._tagged[sha], None, True)])
          self.sha_done.add(sha)
          self.progress("counting objects: %d\r" % len(self.sha_done))
          return (sha, name)
  
+    __next__ = next
+
  
  class ObjectStoreGraphWalker(object):
-    """Graph walker that finds out what commits are missing from an object store."""
+    """Graph walker that finds what commits are missing from an object store.
+
+    :ivar heads: Revisions without descendants in the local repo
+    :ivar get_parents: Function to retrieve parents in the local repo
+    """
  
      def __init__(self, local_heads, get_parents):
          """Create a new instance.
@@ -704,12 +1076,28 @@ class ObjectStoreGraphWalker(object):
          self.parents = {}
  
      def ack(self, sha):
-        """Ack that a particular revision and its ancestors are present in the source."""
-        if sha in self.heads:
-            self.heads.remove(sha)
-        if sha in self.parents:
-            for p in self.parents[sha]:
-                self.ack(p)
+        """Ack that a revision and its ancestors are present in the source."""
+        ancestors = set([sha])
+
+        # stop if we run out of heads to remove
+        while self.heads:
+            for a in ancestors:
+                if a in self.heads:
+                    self.heads.remove(a)
+
+            # collect all ancestors
+            new_ancestors = set()
+            for a in ancestors:
+                ps = self.parents.get(a)
+                if ps is not None:
+                    new_ancestors.update(ps)
+                self.parents[a] = None
+
+            # no more ancestors; stop
+            if not new_ancestors:
+                break
+
+            ancestors = new_ancestors
  
      def next(self):
          """Iterate over ancestors of heads in the target."""
@@ -717,6 +1105,8 @@ class ObjectStoreGraphWalker(object):
              ret = self.heads.pop()
              ps = self.get_parents(ret)
              self.parents[ret] = ps
-            self.heads.update(ps)
+            self.heads.update([p for p in ps if not p in self.parents])
              return ret
          return None
+
+    __next__ = next