# object_store.py -- Object store for git objects
-# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
+# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
+# and others
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
"""Git object store interfaces and implementation."""
+from io import BytesIO
import errno
-import itertools
+from itertools import chain
import os
import stat
import tempfile
-import urllib2
+from dulwich.diff_tree import (
+ tree_changes,
+ walk_trees,
+ )
from dulwich.errors import (
NotTreeError,
)
ShaFile,
Tag,
Tree,
+ ZERO_SHA,
hex_to_sha,
sha_to_hex,
+ hex_to_filename,
S_ISGITLINK,
+ object_class,
)
from dulwich.pack import (
Pack,
PackData,
+ PackInflater,
iter_sha1,
- load_pack_index,
- write_pack,
- write_pack_data,
+ write_pack_header,
write_pack_index_v2,
+ write_pack_object,
+ write_pack_objects,
+ compute_file_sha,
+ PackIndexer,
+ PackStreamCopier,
)
+INFODIR = 'info'
PACKDIR = 'pack'
"""Object store interface."""
def determine_wants_all(self, refs):
- return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
+ return [sha for (ref, sha) in refs.iteritems()
+ if not sha in self and not ref.endswith("^{}") and
+ not sha == ZERO_SHA]
def iter_shas(self, shas):
"""Iterate over the objects for the specified shas.
"""Obtain the raw text for an object.
:param name: sha for the object.
- :return: tuple with object type and object contents.
+ :return: tuple with numeric type and object contents.
"""
raise NotImplementedError(self.get_raw)
def __getitem__(self, sha):
"""Obtain an object by SHA1."""
- type, uncomp = self.get_raw(sha)
- return ShaFile.from_raw_string(type, uncomp)
+ type_num, uncomp = self.get_raw(sha)
+ return ShaFile.from_raw_string(type_num, uncomp, sha=sha)
def __iter__(self):
"""Iterate over the SHAs that are present in this store."""
def tree_changes(self, source, target, want_unchanged=False):
"""Find the differences between the contents of two trees
- :param object_store: Object store to use for retrieving tree contents
- :param tree: SHA1 of the root tree
+ :param source: SHA1 of the source tree
+ :param target: SHA1 of the target tree
:param want_unchanged: Whether unchanged files should be reported
- :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
+ :return: Iterator over tuples with
+ (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
"""
- todo = set([(source, target, "")])
- while todo:
- (sid, tid, path) = todo.pop()
- if sid is not None:
- stree = self[sid]
- else:
- stree = {}
- if tid is not None:
- ttree = self[tid]
- else:
- ttree = {}
- for name, oldmode, oldhexsha in stree.iteritems():
- if path == "":
- oldchildpath = name
- else:
- oldchildpath = "%s/%s" % (path, name)
- try:
- (newmode, newhexsha) = ttree[name]
- newchildpath = oldchildpath
- except KeyError:
- newmode = None
- newhexsha = None
- newchildpath = None
- if (want_unchanged or oldmode != newmode or
- oldhexsha != newhexsha):
- if stat.S_ISDIR(oldmode):
- if newmode is None or stat.S_ISDIR(newmode):
- todo.add((oldhexsha, newhexsha, oldchildpath))
- else:
- # entry became a file
- todo.add((oldhexsha, None, oldchildpath))
- yield ((None, newchildpath), (None, newmode), (None, newhexsha))
- else:
- if newmode is not None and stat.S_ISDIR(newmode):
- # entry became a dir
- yield ((oldchildpath, None), (oldmode, None), (oldhexsha, None))
- todo.add((None, newhexsha, newchildpath))
- else:
- yield ((oldchildpath, newchildpath), (oldmode, newmode), (oldhexsha, newhexsha))
-
- for name, newmode, newhexsha in ttree.iteritems():
- if path == "":
- childpath = name
- else:
- childpath = "%s/%s" % (path, name)
- if not name in stree:
- if not stat.S_ISDIR(newmode):
- yield ((None, childpath), (None, newmode), (None, newhexsha))
- else:
- todo.add((None, newhexsha, childpath))
-
- def iter_tree_contents(self, tree):
- """Yield (path, mode, hexsha) tuples for all non-Tree objects in a tree.
-
- :param tree: SHA1 of the root of the tree
- """
- todo = set([(tree, "")])
- while todo:
- (tid, tpath) = todo.pop()
- tree = self[tid]
- for name, mode, hexsha in tree.iteritems():
- if tpath == "":
- path = name
- else:
- path = "%s/%s" % (tpath, name)
- if stat.S_ISDIR(mode):
- todo.add((hexsha, path))
- else:
- yield path, mode, hexsha
+ for change in tree_changes(self, source, target,
+ want_unchanged=want_unchanged):
+ yield ((change.old.path, change.new.path),
+ (change.old.mode, change.new.mode),
+ (change.old.sha, change.new.sha))
+
+ def iter_tree_contents(self, tree_id, include_trees=False):
+ """Iterate the contents of a tree and all subtrees.
+
+ Iteration is depth-first pre-order, as in e.g. os.walk.
+
+ :param tree_id: SHA1 of the tree.
+ :param include_trees: If True, include tree objects in the iteration.
+ :return: Iterator over TreeEntry namedtuples for all the objects in a
+ tree.
+ """
+ for entry, _ in walk_trees(self, tree_id, None):
+ if not stat.S_ISDIR(entry.mode) or include_trees:
+ yield entry
def find_missing_objects(self, haves, wants, progress=None,
- get_tagged=None):
+ get_tagged=None,
+ get_parents=lambda commit: commit.parents):
"""Find the missing objects required for a set of revisions.
:param haves: Iterable over SHAs already in common.
:param wants: Iterable over SHAs of objects to fetch.
- :param progress: Simple progress function that will be called with
+ :param progress: Simple progress function that will be called with
updated progress strings.
:param get_tagged: Function that returns a dict of pointed-to sha -> tag
sha for including tags.
+ :param get_parents: Optional function for getting the parents of a commit.
:return: Iterator over (sha, path) pairs.
"""
- finder = MissingObjectFinder(self, haves, wants, progress, get_tagged)
+ finder = MissingObjectFinder(self, haves, wants, progress, get_tagged, get_parents=get_parents)
return iter(finder.next, None)
def find_common_revisions(self, graphwalker):
:return: List of SHAs that are in common
"""
haves = []
- sha = graphwalker.next()
+ sha = next(graphwalker)
while sha:
if sha in self:
haves.append(sha)
graphwalker.ack(sha)
- sha = graphwalker.next()
+ sha = next(graphwalker)
return haves
- def get_graph_walker(self, heads):
- """Obtain a graph walker for this object store.
-
- :param heads: Local heads to start search with
- :return: GraphWalker object
- """
- return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
-
- def generate_pack_contents(self, have, want):
+ def generate_pack_contents(self, have, want, progress=None):
"""Iterate over the contents of a pack file.
:param have: List of SHA1s of objects that should not be sent
:param want: List of SHA1s of objects that should be sent
+ :param progress: Optional progress reporting method
+ """
+ return self.iter_shas(self.find_missing_objects(have, want, progress))
+
+ def peel_sha(self, sha):
+ """Peel all tags from a SHA.
+
+ :param sha: The object SHA to peel.
+ :return: The fully-peeled SHA1 of a tag object, after peeling all
+ intermediate tags; if the original ref does not point to a tag, this
+ will equal the original SHA1.
+ """
+ obj = self[sha]
+ obj_class = object_class(obj.type_name)
+ while obj_class is Tag:
+ obj_class, sha = obj.object
+ obj = self[sha]
+ return obj
+
+ def _collect_ancestors(self, heads, common=set(),
+ get_parents=lambda commit: commit.parents):
+ """Collect all ancestors of heads up to (excluding) those in common.
+
+ :param heads: commits to start from
+ :param common: commits to end at, or empty set to walk repository
+ completely
+ :param get_parents: Optional function for getting the parents of a commit.
+ :return: a tuple (A, B) where A - all commits reachable
+ from heads but not present in common, B - common (shared) elements
+ that are directly reachable from heads
"""
- return self.iter_shas(self.find_missing_objects(have, want))
+ bases = set()
+ commits = set()
+ queue = []
+ queue.extend(heads)
+ while queue:
+ e = queue.pop(0)
+ if e in common:
+ bases.add(e)
+ elif e not in commits:
+ commits.add(e)
+ cmt = self[e]
+ queue.extend(get_parents(cmt))
+ return (commits, bases)
+
+ def close(self):
+ """Close any files opened by this object store."""
+ # Default implementation is a NO-OP
class PackBasedObjectStore(BaseObjectStore):
def __init__(self):
- self._pack_cache = None
+ self._pack_cache = {}
+
+ @property
+ def alternates(self):
+ return []
def contains_packed(self, sha):
- """Check if a particular object is present by SHA1 and is packed."""
+ """Check if a particular object is present by SHA1 and is packed.
+
+ This does not check alternates.
+ """
for pack in self.packs:
if sha in pack:
return True
return False
- def _load_packs(self):
- raise NotImplementedError(self._load_packs)
+ def __contains__(self, sha):
+ """Check if a particular object is present by SHA1.
+
+ This method makes no distinction between loose and packed objects.
+ """
+ if self.contains_packed(sha) or self.contains_loose(sha):
+ return True
+ for alternate in self.alternates:
+ if sha in alternate:
+ return True
+ return False
def _pack_cache_stale(self):
"""Check whether the pack cache is stale."""
raise NotImplementedError(self._pack_cache_stale)
- def _add_known_pack(self, pack):
+ def _add_known_pack(self, base_name, pack):
"""Add a newly appeared pack to the cache by path.
"""
- if self._pack_cache is not None:
- self._pack_cache.append(pack)
+ self._pack_cache[base_name] = pack
+
+ def close(self):
+ pack_cache = self._pack_cache
+ self._pack_cache = {}
+ while pack_cache:
+ (name, pack) = pack_cache.popitem()
+ pack.close()
@property
def packs(self):
"""List with pack objects."""
if self._pack_cache is None or self._pack_cache_stale():
- self._pack_cache = self._load_packs()
- return self._pack_cache
+ self._update_pack_cache()
+
+ return self._pack_cache.values()
+
+ def _iter_alternate_objects(self):
+ """Iterate over the SHAs of all the objects in alternate stores."""
+ for alternate in self.alternates:
+ for alternate_object in alternate:
+ yield alternate_object
def _iter_loose_objects(self):
+ """Iterate over the SHAs of all loose objects."""
raise NotImplementedError(self._iter_loose_objects)
def _get_loose_object(self, sha):
raise NotImplementedError(self._get_loose_object)
+ def _remove_loose_object(self, sha):
+ raise NotImplementedError(self._remove_loose_object)
+
+ def pack_loose_objects(self):
+ """Pack loose objects.
+
+ :return: Number of objects packed
+ """
+ objects = set()
+ for sha in self._iter_loose_objects():
+ objects.add((self._get_loose_object(sha), None))
+ self.add_objects(list(objects))
+ for obj, path in objects:
+ self._remove_loose_object(obj.id)
+ return len(objects)
+
def __iter__(self):
"""Iterate over the SHAs that are present in this store."""
- iterables = self.packs + [self._iter_loose_objects()]
- return itertools.chain(*iterables)
+ iterables = self.packs + [self._iter_loose_objects()] + [self._iter_alternate_objects()]
+ return chain(*iterables)
def contains_loose(self, sha):
- """Check if a particular object is present by SHA1 and is loose."""
+ """Check if a particular object is present by SHA1 and is loose.
+
+ This does not check alternates.
+ """
return self._get_loose_object(sha) is not None
def get_raw(self, name):
"""Obtain the raw text for an object.
-
+
:param name: sha for the object.
- :return: tuple with object type and object contents.
+ :return: tuple with numeric type and object contents.
"""
if len(name) == 40:
sha = hex_to_sha(name)
sha = name
hexsha = None
else:
- raise AssertionError
+ raise AssertionError("Invalid object name %r" % name)
for pack in self.packs:
try:
return pack.get_raw(sha)
except KeyError:
pass
- if hexsha is None:
+ if hexsha is None:
hexsha = sha_to_hex(name)
ret = self._get_loose_object(hexsha)
if ret is not None:
- return ret.type, ret.as_raw_string()
+ return ret.type_num, ret.as_raw_string()
+ for alternate in self.alternates:
+ try:
+ return alternate.get_raw(hexsha)
+ except KeyError:
+ pass
raise KeyError(hexsha)
def add_objects(self, objects):
"""Add a set of objects to this object store.
:param objects: Iterable over objects, should support __len__.
+ :return: Pack object of the objects written.
"""
if len(objects) == 0:
# Don't bother writing an empty pack file
return
- f, commit = self.add_pack()
- write_pack_data(f, objects, len(objects))
- commit()
+ f, commit, abort = self.add_pack()
+ try:
+ write_pack_objects(f, objects)
+ except:
+ abort()
+ raise
+ else:
+ return commit()
class DiskObjectStore(PackBasedObjectStore):
self.path = path
self.pack_dir = os.path.join(self.path, PACKDIR)
self._pack_cache_time = 0
+ self._pack_cache = {}
+ self._alternates = None
- def _load_packs(self):
- pack_files = []
+ def __repr__(self):
+ return "<%s(%r)>" % (self.__class__.__name__, self.path)
+
+ @property
+ def alternates(self):
+ if self._alternates is not None:
+ return self._alternates
+ self._alternates = []
+ for path in self._read_alternate_paths():
+ self._alternates.append(DiskObjectStore(path))
+ return self._alternates
+
+ def _read_alternate_paths(self):
try:
- self._pack_cache_time = os.stat(self.pack_dir).st_mtime
- pack_dir_contents = os.listdir(self.pack_dir)
- for name in pack_dir_contents:
- # TODO: verify that idx exists first
- if name.startswith("pack-") and name.endswith(".pack"):
- filename = os.path.join(self.pack_dir, name)
- pack_files.append((os.stat(filename).st_mtime, filename))
- except OSError, e:
+ f = GitFile(os.path.join(self.path, "info", "alternates"),
+ 'rb')
+ except (OSError, IOError) as e:
if e.errno == errno.ENOENT:
return []
raise
- pack_files.sort(reverse=True)
- suffix_len = len(".pack")
- return [Pack(f[:-suffix_len]) for _, f in pack_files]
+ ret = []
+ try:
+ for l in f.readlines():
+ l = l.rstrip("\n")
+ if l[0] == "#":
+ continue
+ if os.path.isabs(l):
+ ret.append(l)
+ else:
+ ret.append(os.path.join(self.path, l))
+ return ret
+ finally:
+ f.close()
+
+ def add_alternate_path(self, path):
+ """Add an alternate path to this object store.
+ """
+ try:
+ os.mkdir(os.path.join(self.path, "info"))
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise
+ alternates_path = os.path.join(self.path, "info/alternates")
+ f = GitFile(alternates_path, 'wb')
+ try:
+ try:
+ orig_f = open(alternates_path, 'rb')
+ except (OSError, IOError) as e:
+ if e.errno != errno.ENOENT:
+ raise
+ else:
+ try:
+ f.write(orig_f.read())
+ finally:
+ orig_f.close()
+ f.write("%s\n" % path)
+ finally:
+ f.close()
+
+ if not os.path.isabs(path):
+ path = os.path.join(self.path, path)
+ self.alternates.append(DiskObjectStore(path))
+
+ def _update_pack_cache(self):
+ try:
+ pack_dir_contents = os.listdir(self.pack_dir)
+ except OSError as e:
+ if e.errno == errno.ENOENT:
+ self._pack_cache_time = 0
+ self.close()
+ return
+ raise
+ self._pack_cache_time = os.stat(self.pack_dir).st_mtime
+ pack_files = set()
+ for name in pack_dir_contents:
+ # TODO: verify that idx exists first
+ if name.startswith("pack-") and name.endswith(".pack"):
+ pack_files.add(name[:-len(".pack")])
+
+ # Open newly appeared pack files
+ for f in pack_files:
+ if f not in self._pack_cache:
+ self._pack_cache[f] = Pack(os.path.join(self.pack_dir, f))
+ # Remove disappeared pack files
+ for f in set(self._pack_cache) - pack_files:
+ self._pack_cache.pop(f).close()
def _pack_cache_stale(self):
try:
return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
- except OSError, e:
+ except OSError as e:
if e.errno == errno.ENOENT:
return True
raise
def _get_shafile_path(self, sha):
- dir = sha[:2]
- file = sha[2:]
# Check from object dir
- return os.path.join(self.path, dir, file)
+ return hex_to_filename(self.path, sha)
def _iter_loose_objects(self):
for base in os.listdir(self.path):
def _get_loose_object(self, sha):
path = self._get_shafile_path(sha)
try:
- return ShaFile.from_file(path)
- except OSError, e:
+ return ShaFile.from_path(path)
+ except (OSError, IOError) as e:
if e.errno == errno.ENOENT:
return None
raise
- def move_in_thin_pack(self, path):
+ def _remove_loose_object(self, sha):
+ os.remove(self._get_shafile_path(sha))
+
+ def _complete_thin_pack(self, f, path, copier, indexer):
"""Move a specific file containing a pack into the pack directory.
- :note: The file should be on the same file system as the
+ :note: The file should be on the same file system as the
packs directory.
+ :param f: Open file object for the pack.
:param path: Path to the pack file.
+ :param copier: A PackStreamCopier to use for writing pack data.
+ :param indexer: A PackIndexer for indexing the pack.
"""
- data = PackData(path)
-
- # Write index for the thin pack (do we really need this?)
- temppath = os.path.join(self.pack_dir,
- sha_to_hex(urllib2.randombytes(20))+".tempidx")
- data.create_index_v2(temppath, self.get_raw)
- p = Pack.from_objects(data, load_pack_index(temppath))
-
- # Write a full pack version
- temppath = os.path.join(self.pack_dir,
- sha_to_hex(urllib2.randombytes(20))+".temppack")
- write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
- len(p))
- pack_sha = load_pack_index(temppath+".idx").objects_sha1()
- newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
- os.rename(temppath+".pack", newbasename+".pack")
- os.rename(temppath+".idx", newbasename+".idx")
- self._add_known_pack(Pack(newbasename))
+ entries = list(indexer)
+
+ # Update the header with the new number of objects.
+ f.seek(0)
+ write_pack_header(f, len(entries) + len(indexer.ext_refs()))
+
+ # Must flush before reading (http://bugs.python.org/issue3207)
+ f.flush()
+
+ # Rescan the rest of the pack, computing the SHA with the new header.
+ new_sha = compute_file_sha(f, end_ofs=-20)
+
+ # Must reposition before writing (http://bugs.python.org/issue3207)
+ f.seek(0, os.SEEK_CUR)
+
+ # Complete the pack.
+ for ext_sha in indexer.ext_refs():
+ assert len(ext_sha) == 20
+ type_num, data = self.get_raw(ext_sha)
+ offset = f.tell()
+ crc32 = write_pack_object(f, type_num, data, sha=new_sha)
+ entries.append((ext_sha, offset, crc32))
+ pack_sha = new_sha.digest()
+ f.write(pack_sha)
+ f.close()
+
+ # Move the pack in.
+ entries.sort()
+ pack_base_name = os.path.join(
+ self.pack_dir, 'pack-' + iter_sha1(e[0] for e in entries))
+ os.rename(path, pack_base_name + '.pack')
+
+ # Write the index.
+ index_file = GitFile(pack_base_name + '.idx', 'wb')
+ try:
+ write_pack_index_v2(index_file, entries, pack_sha)
+ index_file.close()
+ finally:
+ index_file.abort()
+
+ # Add the pack to the store and return it.
+ final_pack = Pack(pack_base_name)
+ final_pack.check_length_and_checksum()
+ self._add_known_pack(pack_base_name, final_pack)
+ return final_pack
+
+ def add_thin_pack(self, read_all, read_some):
+ """Add a new thin pack to this object store.
+
+ Thin packs are packs that contain deltas with parents that exist outside
+ the pack. They should never be placed in the object store directly, and
+ always indexed and completed as they are copied.
+
+ :param read_all: Read function that blocks until the number of requested
+ bytes are read.
+ :param read_some: Read function that returns at least one byte, but may
+ not return the number of bytes requested.
+ :return: A Pack object pointing at the now-completed thin pack in the
+ objects/pack directory.
+ """
+ fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
+ f = os.fdopen(fd, 'w+b')
+
+ try:
+ indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
+ copier = PackStreamCopier(read_all, read_some, f,
+ delta_iter=indexer)
+ copier.verify()
+ return self._complete_thin_pack(f, path, copier, indexer)
+ finally:
+ f.close()
def move_in_pack(self, path):
"""Move a specific file containing a pack into the pack directory.
- :note: The file should be on the same file system as the
+ :note: The file should be on the same file system as the
packs directory.
:param path: Path to the pack file.
"""
p = PackData(path)
- entries = p.sorted_entries()
- basename = os.path.join(self.pack_dir,
- "pack-%s" % iter_sha1(entry[0] for entry in entries))
- write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
- p.close()
+ try:
+ entries = p.sorted_entries()
+ basename = os.path.join(self.pack_dir,
+ "pack-%s" % iter_sha1(entry[0] for entry in entries))
+ f = GitFile(basename+".idx", "wb")
+ try:
+ write_pack_index_v2(f, entries, p.get_stored_checksum())
+ finally:
+ f.close()
+ finally:
+ p.close()
os.rename(path, basename + ".pack")
- self._add_known_pack(Pack(basename))
-
- def add_thin_pack(self):
- """Add a new thin pack to this object store.
-
- Thin packs are packs that contain deltas with parents that exist
- in a different pack.
- """
- fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
- f = os.fdopen(fd, 'wb')
- def commit():
- os.fsync(fd)
- f.close()
- if os.path.getsize(path) > 0:
- self.move_in_thin_pack(path)
- return f, commit
+ final_pack = Pack(basename)
+ self._add_known_pack(basename, final_pack)
+ return final_pack
def add_pack(self):
- """Add a new pack to this object store.
+ """Add a new pack to this object store.
- :return: Fileobject to write to and a commit function to
- call when the pack is finished.
+ :return: Fileobject to write to, a commit function to
+ call when the pack is finished and an abort
+ function.
"""
fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
f = os.fdopen(fd, 'wb')
os.fsync(fd)
f.close()
if os.path.getsize(path) > 0:
- self.move_in_pack(path)
- return f, commit
+ return self.move_in_pack(path)
+ else:
+ os.remove(path)
+ return None
+ def abort():
+ f.close()
+ os.remove(path)
+ return f, commit, abort
def add_object(self, obj):
"""Add a single object to this object store.
:param obj: Object to add
"""
dir = os.path.join(self.path, obj.id[:2])
- if not os.path.isdir(dir):
+ try:
os.mkdir(dir)
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise
path = os.path.join(dir, obj.id[2:])
if os.path.exists(path):
return # Already there, no need to write again
finally:
f.close()
+ @classmethod
+ def init(cls, path):
+ try:
+ os.mkdir(path)
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise
+ os.mkdir(os.path.join(path, "info"))
+ os.mkdir(os.path.join(path, PACKDIR))
+ return cls(path)
+
class MemoryObjectStore(BaseObjectStore):
"""Object store that keeps all objects in memory."""
super(MemoryObjectStore, self).__init__()
self._data = {}
+ def _to_hexsha(self, sha):
+ if len(sha) == 40:
+ return sha
+ elif len(sha) == 20:
+ return sha_to_hex(sha)
+ else:
+ raise ValueError("Invalid sha %r" % (sha,))
+
def contains_loose(self, sha):
"""Check if a particular object is present by SHA1 and is loose."""
- return sha in self._data
+ return self._to_hexsha(sha) in self._data
def contains_packed(self, sha):
"""Check if a particular object is present by SHA1 and is packed."""
def get_raw(self, name):
"""Obtain the raw text for an object.
-
+
:param name: sha for the object.
- :return: tuple with object type and object contents.
+ :return: tuple with numeric type and object contents.
"""
- return self[name].as_raw_string()
+ obj = self[self._to_hexsha(name)]
+ return obj.type_num, obj.as_raw_string()
def __getitem__(self, name):
- return self._data[name]
+ return self._data[self._to_hexsha(name)]
+
+ def __delitem__(self, name):
+ """Delete an object from this store, for testing only."""
+ del self._data[self._to_hexsha(name)]
def add_object(self, obj):
"""Add a single object to this object store.
for obj, path in objects:
self._data[obj.id] = obj
+ def add_pack(self):
+ """Add a new pack to this object store.
+
+ Because this object store doesn't support packs, we extract and add the
+ individual objects.
+
+ :return: Fileobject to write to and a commit function to
+ call when the pack is finished.
+ """
+ f = BytesIO()
+ def commit():
+ p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
+ f.close()
+ for obj in PackInflater.for_pack_data(p):
+ self._data[obj.id] = obj
+ def abort():
+ pass
+ return f, commit, abort
+
+ def _complete_thin_pack(self, f, indexer):
+ """Complete a thin pack by adding external references.
+
+ :param f: Open file object for the pack.
+ :param indexer: A PackIndexer for indexing the pack.
+ """
+ entries = list(indexer)
+
+ # Update the header with the new number of objects.
+ f.seek(0)
+ write_pack_header(f, len(entries) + len(indexer.ext_refs()))
+
+ # Rescan the rest of the pack, computing the SHA with the new header.
+ new_sha = compute_file_sha(f, end_ofs=-20)
+
+ # Complete the pack.
+ for ext_sha in indexer.ext_refs():
+ assert len(ext_sha) == 20
+ type_num, data = self.get_raw(ext_sha)
+ write_pack_object(f, type_num, data, sha=new_sha)
+ pack_sha = new_sha.digest()
+ f.write(pack_sha)
+
+ def add_thin_pack(self, read_all, read_some):
+ """Add a new thin pack to this object store.
+
+ Thin packs are packs that contain deltas with parents that exist outside
+ the pack. Because this object store doesn't support packs, we extract
+ and add the individual objects.
+
+ :param read_all: Read function that blocks until the number of requested
+ bytes are read.
+ :param read_some: Read function that returns at least one byte, but may
+ not return the number of bytes requested.
+ """
+ f, commit, abort = self.add_pack()
+ try:
+ indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
+ copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)
+ copier.verify()
+ self._complete_thin_pack(f, indexer)
+ except:
+ abort()
+ raise
+ else:
+ commit()
+
class ObjectImporter(object):
"""Interface for importing objects."""
raise NotImplementedError(self.add_object)
def finish(self, object):
- """Finish the imoprt and write objects to disk."""
+ """Finish the import and write objects to disk."""
raise NotImplementedError(self.finish)
def __contains__(self, needle):
"""Check if an object is present.
- :note: This checks if the object is present in
+ :note: This checks if the object is present in
the underlying object store, not if it would
be yielded by the iterator.
def __getitem__(self, key):
"""Find an object by SHA1.
-
+
:note: This retrieves the object from the underlying
object store. It will also succeed if the object would
not be returned by the iterator.
def tree_lookup_path(lookup_obj, root_sha, path):
- """Lookup an object in a Git tree.
+ """Look up an object in a Git tree.
:param lookup_obj: Callback for retrieving object by SHA1
:param root_sha: SHA1 of the root tree
:param path: Path to lookup
+ :return: A tuple of (mode, SHA) of the resulting path.
+ """
+ tree = lookup_obj(root_sha)
+ if not isinstance(tree, Tree):
+ raise NotTreeError(root_sha)
+ return tree.lookup_path(lookup_obj, path)
+
+
+def _collect_filetree_revs(obj_store, tree_sha, kset):
+ """Collect SHA1s of files and directories for specified tree.
+
+ :param obj_store: Object store to get objects by SHA from
+ :param tree_sha: tree reference to walk
+ :param kset: set to fill with references to files and directories
"""
- parts = path.split("/")
- sha = root_sha
- mode = None
- for p in parts:
- obj = lookup_obj(sha)
- if type(obj) is not Tree:
- raise NotTreeError(sha)
- if p == '':
- continue
- mode, sha = obj[p]
- return mode, sha
+ filetree = obj_store[tree_sha]
+ for name, mode, sha in filetree.iteritems():
+ if not S_ISGITLINK(mode) and sha not in kset:
+ kset.add(sha)
+ if stat.S_ISDIR(mode):
+ _collect_filetree_revs(obj_store, sha, kset)
+
+
+def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
+ """Split object id list into two list with commit SHA1s and tag SHA1s.
+
+ Commits referenced by tags are included into commits
+ list as well. Only SHA1s known in this repository will get
+ through, and unless ignore_unknown argument is True, KeyError
+ is thrown for SHA1 missing in the repository
+
+ :param obj_store: Object store to get objects by SHA1 from
+ :param lst: Collection of commit and tag SHAs
+ :param ignore_unknown: True to skip SHA1 missing in the repository
+ silently.
+ :return: A tuple of (commits, tags) SHA1s
+ """
+ commits = set()
+ tags = set()
+ for e in lst:
+ try:
+ o = obj_store[e]
+ except KeyError:
+ if not ignore_unknown:
+ raise
+ else:
+ if isinstance(o, Commit):
+ commits.add(e)
+ elif isinstance(o, Tag):
+ tags.add(e)
+ commits.add(o.object[1])
+ else:
+ raise KeyError('Not a commit or a tag: %s' % e)
+ return (commits, tags)
class MissingObjectFinder(object):
"""Find the objects missing from another object store.
- :param object_store: Object store containing at least all objects to be
+ :param object_store: Object store containing at least all objects to be
sent
:param haves: SHA1s of commits not to send (already present in target)
:param wants: SHA1s of commits to send
:param progress: Optional function to report progress to.
:param get_tagged: Function that returns a dict of pointed-to sha -> tag
sha for including tags.
+ :param get_parents: Optional function for getting the parents of a commit.
:param tagged: dict of pointed-to sha -> tag sha for including tags
"""
def __init__(self, object_store, haves, wants, progress=None,
- get_tagged=None):
- self.sha_done = set(haves)
- self.objects_to_send = set([(w, None, False) for w in wants if w not in haves])
+ get_tagged=None, get_parents=lambda commit: commit.parents):
self.object_store = object_store
+ self._get_parents = get_parents
+ # process Commits and Tags differently
+ # Note, while haves may list commits/tags not available locally,
+ # and such SHAs would get filtered out by _split_commits_and_tags,
+ # wants shall list only known SHAs, and otherwise
+ # _split_commits_and_tags fails with KeyError
+ have_commits, have_tags = \
+ _split_commits_and_tags(object_store, haves, True)
+ want_commits, want_tags = \
+ _split_commits_and_tags(object_store, wants, False)
+ # all_ancestors is a set of commits that shall not be sent
+ # (complete repository up to 'haves')
+ all_ancestors = object_store._collect_ancestors(
+ have_commits,
+ get_parents=self._get_parents)[0]
+ # all_missing - complete set of commits between haves and wants
+ # common - commits from all_ancestors we hit into while
+ # traversing parent hierarchy of wants
+ missing_commits, common_commits = object_store._collect_ancestors(
+ want_commits,
+ all_ancestors,
+ get_parents=self._get_parents);
+ self.sha_done = set()
+ # Now, fill sha_done with commits and revisions of
+ # files and directories known to be both locally
+ # and on target. Thus these commits and files
+ # won't get selected for fetch
+ for h in common_commits:
+ self.sha_done.add(h)
+ cmt = object_store[h]
+ _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
+ # record tags we have as visited, too
+ for t in have_tags:
+ self.sha_done.add(t)
+
+ missing_tags = want_tags.difference(have_tags)
+ # in fact, what we 'want' is commits and tags
+ # we've found missing
+ wants = missing_commits.union(missing_tags)
+
+ self.objects_to_send = set([(w, None, False) for w in wants])
+
if progress is None:
self.progress = lambda x: None
else:
self._tagged = get_tagged and get_tagged() or {}
def add_todo(self, entries):
- self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
-
- def parse_tree(self, tree):
- self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries() if not S_ISGITLINK(mode)])
-
- def parse_commit(self, commit):
- self.add_todo([(commit.tree, "", False)])
- self.add_todo([(p, None, False) for p in commit.parents])
-
- def parse_tag(self, tag):
- self.add_todo([(tag.object[1], None, False)])
+ self.objects_to_send.update([e for e in entries
+ if not e[0] in self.sha_done])
def next(self):
- if not self.objects_to_send:
- return None
- (sha, name, leaf) = self.objects_to_send.pop()
+ while True:
+ if not self.objects_to_send:
+ return None
+ (sha, name, leaf) = self.objects_to_send.pop()
+ if sha not in self.sha_done:
+ break
if not leaf:
o = self.object_store[sha]
if isinstance(o, Commit):
- self.parse_commit(o)
+ self.add_todo([(o.tree, "", False)])
elif isinstance(o, Tree):
- self.parse_tree(o)
+ self.add_todo([(s, n, not stat.S_ISDIR(m))
+ for n, m, s in o.iteritems()
+ if not S_ISGITLINK(m)])
elif isinstance(o, Tag):
- self.parse_tag(o)
+ self.add_todo([(o.object[1], None, False)])
if sha in self._tagged:
self.add_todo([(self._tagged[sha], None, True)])
self.sha_done.add(sha)
self.progress("counting objects: %d\r" % len(self.sha_done))
return (sha, name)
+ __next__ = next
+
class ObjectStoreGraphWalker(object):
- """Graph walker that finds out what commits are missing from an object store."""
+ """Graph walker that finds what commits are missing from an object store.
+
+ :ivar heads: Revisions without descendants in the local repo
+ :ivar get_parents: Function to retrieve parents in the local repo
+ """
def __init__(self, local_heads, get_parents):
"""Create a new instance.
self.parents = {}
def ack(self, sha):
- """Ack that a particular revision and its ancestors are present in the source."""
- if sha in self.heads:
- self.heads.remove(sha)
- if sha in self.parents:
- for p in self.parents[sha]:
- self.ack(p)
+ """Ack that a revision and its ancestors are present in the source."""
+ ancestors = set([sha])
+
+ # stop if we run out of heads to remove
+ while self.heads:
+ for a in ancestors:
+ if a in self.heads:
+ self.heads.remove(a)
+
+ # collect all ancestors
+ new_ancestors = set()
+ for a in ancestors:
+ ps = self.parents.get(a)
+ if ps is not None:
+ new_ancestors.update(ps)
+ self.parents[a] = None
+
+ # no more ancestors; stop
+ if not new_ancestors:
+ break
+
+ ancestors = new_ancestors
def next(self):
"""Iterate over ancestors of heads in the target."""
ret = self.heads.pop()
ps = self.get_parents(ret)
self.parents[ret] = ps
- self.heads.update(ps)
+ self.heads.update([p for p in ps if not p in self.parents])
return ret
return None
+
+ __next__ = next