1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
5 # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6 # General Public License as public by the Free Software Foundation; version 2.0
7 # or (at your option) any later version. You can redistribute it and/or
8 # modify it under the terms of either of these two licenses.
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
16 # You should have received a copy of the licenses; if not, see
17 # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18 # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19 # License, Version 2.0.
23 """Git object store interfaces and implementation."""
25 from io import BytesIO
27 from itertools import chain
34 from dulwich.diff_tree import (
38 from dulwich.errors import (
41 from dulwich.file import GitFile
42 from dulwich.objects import (
54 from dulwich.pack import (
72 class BaseObjectStore(object):
73 """Object store interface."""
75 def determine_wants_all(self, refs):
76 return [sha for (ref, sha) in refs.items()
77 if sha not in self and not ref.endswith(b"^{}") and
80 def iter_shas(self, shas):
81 """Iterate over the objects for the specified shas.
83 :param shas: Iterable object with SHAs
84 :return: Object iterator
86 return ObjectStoreIterator(self, shas)
88 def contains_loose(self, sha):
89 """Check if a particular object is present by SHA1 and is loose."""
90 raise NotImplementedError(self.contains_loose)
92 def contains_packed(self, sha):
93 """Check if a particular object is present by SHA1 and is packed."""
94 raise NotImplementedError(self.contains_packed)
96 def __contains__(self, sha):
97 """Check if a particular object is present by SHA1.
99 This method makes no distinction between loose and packed objects.
101 return self.contains_packed(sha) or self.contains_loose(sha)
105 """Iterable of pack objects."""
106 raise NotImplementedError
108 def get_raw(self, name):
109 """Obtain the raw text for an object.
111 :param name: sha for the object.
112 :return: tuple with numeric type and object contents.
114 raise NotImplementedError(self.get_raw)
116 def __getitem__(self, sha):
117 """Obtain an object by SHA1."""
118 type_num, uncomp = self.get_raw(sha)
119 return ShaFile.from_raw_string(type_num, uncomp, sha=sha)
122 """Iterate over the SHAs that are present in this store."""
123 raise NotImplementedError(self.__iter__)
125 def add_object(self, obj):
126 """Add a single object to this object store.
129 raise NotImplementedError(self.add_object)
131 def add_objects(self, objects):
132 """Add a set of objects to this object store.
134 :param objects: Iterable over a list of (object, path) tuples
136 raise NotImplementedError(self.add_objects)
138 def tree_changes(self, source, target, want_unchanged=False):
139 """Find the differences between the contents of two trees
141 :param source: SHA1 of the source tree
142 :param target: SHA1 of the target tree
143 :param want_unchanged: Whether unchanged files should be reported
144 :return: Iterator over tuples with
145 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
147 for change in tree_changes(self, source, target,
148 want_unchanged=want_unchanged):
149 yield ((change.old.path, change.new.path),
150 (change.old.mode, change.new.mode),
151 (change.old.sha, change.new.sha))
153 def iter_tree_contents(self, tree_id, include_trees=False):
154 """Iterate the contents of a tree and all subtrees.
156 Iteration is depth-first pre-order, as in e.g. os.walk.
158 :param tree_id: SHA1 of the tree.
159 :param include_trees: If True, include tree objects in the iteration.
160 :return: Iterator over TreeEntry namedtuples for all the objects in a
163 for entry, _ in walk_trees(self, tree_id, None):
164 if not stat.S_ISDIR(entry.mode) or include_trees:
167 def find_missing_objects(self, haves, wants, progress=None,
169 get_parents=lambda commit: commit.parents):
170 """Find the missing objects required for a set of revisions.
172 :param haves: Iterable over SHAs already in common.
173 :param wants: Iterable over SHAs of objects to fetch.
174 :param progress: Simple progress function that will be called with
175 updated progress strings.
176 :param get_tagged: Function that returns a dict of pointed-to sha ->
177 tag sha for including tags.
178 :param get_parents: Optional function for getting the parents of a
180 :return: Iterator over (sha, path) pairs.
182 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged,
183 get_parents=get_parents)
184 return iter(finder.next, None)
186 def find_common_revisions(self, graphwalker):
187 """Find which revisions this store has in common using graphwalker.
189 :param graphwalker: A graphwalker object.
190 :return: List of SHAs that are in common
193 sha = next(graphwalker)
198 sha = next(graphwalker)
201 def generate_pack_contents(self, have, want, progress=None):
202 """Iterate over the contents of a pack file.
204 :param have: List of SHA1s of objects that should not be sent
205 :param want: List of SHA1s of objects that should be sent
206 :param progress: Optional progress reporting method
208 return self.iter_shas(self.find_missing_objects(have, want, progress))
210 def peel_sha(self, sha):
211 """Peel all tags from a SHA.
213 :param sha: The object SHA to peel.
214 :return: The fully-peeled SHA1 of a tag object, after peeling all
215 intermediate tags; if the original ref does not point to a tag,
216 this will equal the original SHA1.
219 obj_class = object_class(obj.type_name)
220 while obj_class is Tag:
221 obj_class, sha = obj.object
225 def _collect_ancestors(self, heads, common=set(),
226 get_parents=lambda commit: commit.parents):
227 """Collect all ancestors of heads up to (excluding) those in common.
229 :param heads: commits to start from
230 :param common: commits to end at, or empty set to walk repository
232 :param get_parents: Optional function for getting the parents of a
234 :return: a tuple (A, B) where A - all commits reachable
235 from heads but not present in common, B - common (shared) elements
236 that are directly reachable from heads
246 elif e not in commits:
249 queue.extend(get_parents(cmt))
250 return (commits, bases)
253 """Close any files opened by this object store."""
254 # Default implementation is a NO-OP
257 class PackBasedObjectStore(BaseObjectStore):
260 self._pack_cache = {}
263 def alternates(self):
266 def contains_packed(self, sha):
267 """Check if a particular object is present by SHA1 and is packed.
269 This does not check alternates.
271 for pack in self.packs:
276 def __contains__(self, sha):
277 """Check if a particular object is present by SHA1.
279 This method makes no distinction between loose and packed objects.
281 if self.contains_packed(sha) or self.contains_loose(sha):
283 for alternate in self.alternates:
288 def _pack_cache_stale(self):
289 """Check whether the pack cache is stale."""
290 raise NotImplementedError(self._pack_cache_stale)
292 def _add_known_pack(self, base_name, pack):
293 """Add a newly appeared pack to the cache by path.
296 self._pack_cache[base_name] = pack
299 pack_cache = self._pack_cache
300 self._pack_cache = {}
302 (name, pack) = pack_cache.popitem()
307 """List with pack objects."""
308 if self._pack_cache is None or self._pack_cache_stale():
309 self._update_pack_cache()
311 return self._pack_cache.values()
313 def _iter_alternate_objects(self):
314 """Iterate over the SHAs of all the objects in alternate stores."""
315 for alternate in self.alternates:
316 for alternate_object in alternate:
317 yield alternate_object
319 def _iter_loose_objects(self):
320 """Iterate over the SHAs of all loose objects."""
321 raise NotImplementedError(self._iter_loose_objects)
323 def _get_loose_object(self, sha):
324 raise NotImplementedError(self._get_loose_object)
326 def _remove_loose_object(self, sha):
327 raise NotImplementedError(self._remove_loose_object)
329 def pack_loose_objects(self):
330 """Pack loose objects.
332 :return: Number of objects packed
335 for sha in self._iter_loose_objects():
336 objects.add((self._get_loose_object(sha), None))
337 self.add_objects(list(objects))
338 for obj, path in objects:
339 self._remove_loose_object(obj.id)
343 """Iterate over the SHAs that are present in this store."""
344 iterables = (list(self.packs) + [self._iter_loose_objects()] +
345 [self._iter_alternate_objects()])
346 return chain(*iterables)
348 def contains_loose(self, sha):
349 """Check if a particular object is present by SHA1 and is loose.
351 This does not check alternates.
353 return self._get_loose_object(sha) is not None
355 def get_raw(self, name):
356 """Obtain the raw text for an object.
358 :param name: sha for the object.
359 :return: tuple with numeric type and object contents.
362 sha = hex_to_sha(name)
364 elif len(name) == 20:
368 raise AssertionError("Invalid object name %r" % name)
369 for pack in self.packs:
371 return pack.get_raw(sha)
375 hexsha = sha_to_hex(name)
376 ret = self._get_loose_object(hexsha)
378 return ret.type_num, ret.as_raw_string()
379 for alternate in self.alternates:
381 return alternate.get_raw(hexsha)
384 raise KeyError(hexsha)
386 def add_objects(self, objects):
387 """Add a set of objects to this object store.
389 :param objects: Iterable over (object, path) tuples, should support
391 :return: Pack object of the objects written.
393 if len(objects) == 0:
394 # Don't bother writing an empty pack file
396 f, commit, abort = self.add_pack()
398 write_pack_objects(f, objects)
406 class DiskObjectStore(PackBasedObjectStore):
407 """Git-style object store that exists on disk."""
409 def __init__(self, path):
410 """Open an object store.
412 :param path: Path of the object store.
414 super(DiskObjectStore, self).__init__()
416 self.pack_dir = os.path.join(self.path, PACKDIR)
417 self._pack_cache_time = 0
418 self._pack_cache = {}
419 self._alternates = None
422 return "<%s(%r)>" % (self.__class__.__name__, self.path)
425 def alternates(self):
426 if self._alternates is not None:
427 return self._alternates
428 self._alternates = []
429 for path in self._read_alternate_paths():
430 self._alternates.append(DiskObjectStore(path))
431 return self._alternates
433 def _read_alternate_paths(self):
435 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), 'rb')
436 except (OSError, IOError) as e:
437 if e.errno == errno.ENOENT:
441 for l in f.readlines():
446 yield l.decode(sys.getfilesystemencoding())
448 yield os.path.join(self.path, l).decode(
449 sys.getfilesystemencoding())
451 def add_alternate_path(self, path):
452 """Add an alternate path to this object store.
455 os.mkdir(os.path.join(self.path, INFODIR))
457 if e.errno != errno.EEXIST:
459 alternates_path = os.path.join(self.path, INFODIR, "alternates")
460 with GitFile(alternates_path, 'wb') as f:
462 orig_f = open(alternates_path, 'rb')
463 except (OSError, IOError) as e:
464 if e.errno != errno.ENOENT:
468 f.write(orig_f.read())
469 f.write(path.encode(sys.getfilesystemencoding()) + b"\n")
471 if not os.path.isabs(path):
472 path = os.path.join(self.path, path)
473 self.alternates.append(DiskObjectStore(path))
475 def _update_pack_cache(self):
477 pack_dir_contents = os.listdir(self.pack_dir)
479 if e.errno == errno.ENOENT:
480 self._pack_cache_time = 0
484 self._pack_cache_time = max(
485 os.stat(self.pack_dir).st_mtime, time.time())
487 for name in pack_dir_contents:
488 if name.startswith("pack-") and name.endswith(".pack"):
489 # verify that idx exists first (otherwise the pack was not yet
491 idx_name = os.path.splitext(name)[0] + ".idx"
492 if idx_name in pack_dir_contents:
493 pack_name = name[:-len(".pack")]
494 pack_files.add(pack_name)
496 # Open newly appeared pack files
498 if f not in self._pack_cache:
499 self._pack_cache[f] = Pack(os.path.join(self.pack_dir, f))
500 # Remove disappeared pack files
501 for f in set(self._pack_cache) - pack_files:
502 self._pack_cache.pop(f).close()
504 def _pack_cache_stale(self):
506 return os.stat(self.pack_dir).st_mtime >= self._pack_cache_time
508 if e.errno == errno.ENOENT:
512 def _get_shafile_path(self, sha):
513 # Check from object dir
514 return hex_to_filename(self.path, sha)
516 def _iter_loose_objects(self):
517 for base in os.listdir(self.path):
520 for rest in os.listdir(os.path.join(self.path, base)):
521 yield (base+rest).encode(sys.getfilesystemencoding())
523 def _get_loose_object(self, sha):
524 path = self._get_shafile_path(sha)
526 return ShaFile.from_path(path)
527 except (OSError, IOError) as e:
528 if e.errno == errno.ENOENT:
532 def _remove_loose_object(self, sha):
533 os.remove(self._get_shafile_path(sha))
535 def _get_pack_basepath(self, entries):
536 suffix = iter_sha1(entry[0] for entry in entries)
537 # TODO: Handle self.pack_dir being bytes
538 suffix = suffix.decode('ascii')
539 return os.path.join(self.pack_dir, "pack-" + suffix)
541 def _complete_thin_pack(self, f, path, copier, indexer):
542 """Move a specific file containing a pack into the pack directory.
544 :note: The file should be on the same file system as the
547 :param f: Open file object for the pack.
548 :param path: Path to the pack file.
549 :param copier: A PackStreamCopier to use for writing pack data.
550 :param indexer: A PackIndexer for indexing the pack.
552 entries = list(indexer)
554 # Update the header with the new number of objects.
556 write_pack_header(f, len(entries) + len(indexer.ext_refs()))
558 # Must flush before reading (http://bugs.python.org/issue3207)
561 # Rescan the rest of the pack, computing the SHA with the new header.
562 new_sha = compute_file_sha(f, end_ofs=-20)
564 # Must reposition before writing (http://bugs.python.org/issue3207)
565 f.seek(0, os.SEEK_CUR)
568 for ext_sha in indexer.ext_refs():
569 assert len(ext_sha) == 20
570 type_num, data = self.get_raw(ext_sha)
572 crc32 = write_pack_object(f, type_num, data, sha=new_sha)
573 entries.append((ext_sha, offset, crc32))
574 pack_sha = new_sha.digest()
580 pack_base_name = self._get_pack_basepath(entries)
581 if sys.platform == 'win32':
583 os.rename(path, pack_base_name + '.pack')
585 os.remove(pack_base_name + '.pack')
586 os.rename(path, pack_base_name + '.pack')
588 os.rename(path, pack_base_name + '.pack')
591 index_file = GitFile(pack_base_name + '.idx', 'wb')
593 write_pack_index_v2(index_file, entries, pack_sha)
598 # Add the pack to the store and return it.
599 final_pack = Pack(pack_base_name)
600 final_pack.check_length_and_checksum()
601 self._add_known_pack(pack_base_name, final_pack)
604 def add_thin_pack(self, read_all, read_some):
605 """Add a new thin pack to this object store.
607 Thin packs are packs that contain deltas with parents that exist
608 outside the pack. They should never be placed in the object store
609 directly, and always indexed and completed as they are copied.
611 :param read_all: Read function that blocks until the number of
612 requested bytes are read.
613 :param read_some: Read function that returns at least one byte, but may
614 not return the number of bytes requested.
615 :return: A Pack object pointing at the now-completed thin pack in the
616 objects/pack directory.
618 fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
619 with os.fdopen(fd, 'w+b') as f:
620 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
621 copier = PackStreamCopier(read_all, read_some, f,
624 return self._complete_thin_pack(f, path, copier, indexer)
626 def move_in_pack(self, path):
627 """Move a specific file containing a pack into the pack directory.
629 :note: The file should be on the same file system as the
632 :param path: Path to the pack file.
634 with PackData(path) as p:
635 entries = p.sorted_entries()
636 basename = self._get_pack_basepath(entries)
637 with GitFile(basename+".idx", "wb") as f:
638 write_pack_index_v2(f, entries, p.get_stored_checksum())
639 os.rename(path, basename + ".pack")
640 final_pack = Pack(basename)
641 self._add_known_pack(basename, final_pack)
645 """Add a new pack to this object store.
647 :return: Fileobject to write to, a commit function to
648 call when the pack is finished and an abort
651 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
652 f = os.fdopen(fd, 'wb')
657 if os.path.getsize(path) > 0:
658 return self.move_in_pack(path)
666 return f, commit, abort
668 def add_object(self, obj):
669 """Add a single object to this object store.
671 :param obj: Object to add
673 path = self._get_shafile_path(obj.id)
674 dir = os.path.dirname(path)
678 if e.errno != errno.EEXIST:
680 if os.path.exists(path):
681 return # Already there, no need to write again
682 with GitFile(path, 'wb') as f:
683 f.write(obj.as_legacy_object())
690 if e.errno != errno.EEXIST:
692 os.mkdir(os.path.join(path, "info"))
693 os.mkdir(os.path.join(path, PACKDIR))
697 class MemoryObjectStore(BaseObjectStore):
698 """Object store that keeps all objects in memory."""
701 super(MemoryObjectStore, self).__init__()
704 def _to_hexsha(self, sha):
708 return sha_to_hex(sha)
710 raise ValueError("Invalid sha %r" % (sha,))
712 def contains_loose(self, sha):
713 """Check if a particular object is present by SHA1 and is loose."""
714 return self._to_hexsha(sha) in self._data
716 def contains_packed(self, sha):
717 """Check if a particular object is present by SHA1 and is packed."""
721 """Iterate over the SHAs that are present in this store."""
722 return iter(self._data.keys())
726 """List with pack objects."""
729 def get_raw(self, name):
730 """Obtain the raw text for an object.
732 :param name: sha for the object.
733 :return: tuple with numeric type and object contents.
735 obj = self[self._to_hexsha(name)]
736 return obj.type_num, obj.as_raw_string()
738 def __getitem__(self, name):
739 return self._data[self._to_hexsha(name)].copy()
741 def __delitem__(self, name):
742 """Delete an object from this store, for testing only."""
743 del self._data[self._to_hexsha(name)]
745 def add_object(self, obj):
746 """Add a single object to this object store.
749 self._data[obj.id] = obj.copy()
751 def add_objects(self, objects):
752 """Add a set of objects to this object store.
754 :param objects: Iterable over a list of (object, path) tuples
756 for obj, path in objects:
760 """Add a new pack to this object store.
762 Because this object store doesn't support packs, we extract and add the
765 :return: Fileobject to write to and a commit function to
766 call when the pack is finished.
771 p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
773 for obj in PackInflater.for_pack_data(p, self.get_raw):
778 return f, commit, abort
780 def _complete_thin_pack(self, f, indexer):
781 """Complete a thin pack by adding external references.
783 :param f: Open file object for the pack.
784 :param indexer: A PackIndexer for indexing the pack.
786 entries = list(indexer)
788 # Update the header with the new number of objects.
790 write_pack_header(f, len(entries) + len(indexer.ext_refs()))
792 # Rescan the rest of the pack, computing the SHA with the new header.
793 new_sha = compute_file_sha(f, end_ofs=-20)
796 for ext_sha in indexer.ext_refs():
797 assert len(ext_sha) == 20
798 type_num, data = self.get_raw(ext_sha)
799 write_pack_object(f, type_num, data, sha=new_sha)
800 pack_sha = new_sha.digest()
803 def add_thin_pack(self, read_all, read_some):
804 """Add a new thin pack to this object store.
806 Thin packs are packs that contain deltas with parents that exist
807 outside the pack. Because this object store doesn't support packs, we
808 extract and add the individual objects.
810 :param read_all: Read function that blocks until the number of
811 requested bytes are read.
812 :param read_some: Read function that returns at least one byte, but may
813 not return the number of bytes requested.
815 f, commit, abort = self.add_pack()
817 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
818 copier = PackStreamCopier(read_all, read_some, f,
821 self._complete_thin_pack(f, indexer)
829 class ObjectImporter(object):
830 """Interface for importing objects."""
832 def __init__(self, count):
833 """Create a new ObjectImporter.
835 :param count: Number of objects that's going to be imported.
839 def add_object(self, object):
841 raise NotImplementedError(self.add_object)
843 def finish(self, object):
844 """Finish the import and write objects to disk."""
845 raise NotImplementedError(self.finish)
848 class ObjectIterator(object):
849 """Interface for iterating over objects."""
851 def iterobjects(self):
852 raise NotImplementedError(self.iterobjects)
855 class ObjectStoreIterator(ObjectIterator):
856 """ObjectIterator that works on top of an ObjectStore."""
858 def __init__(self, store, sha_iter):
859 """Create a new ObjectIterator.
861 :param store: Object store to retrieve from
862 :param sha_iter: Iterator over (sha, path) tuples
865 self.sha_iter = sha_iter
869 """Yield tuple with next object and path."""
870 for sha, path in self.itershas():
871 yield self.store[sha], path
873 def iterobjects(self):
874 """Iterate over just the objects."""
879 """Iterate over the SHAs."""
880 for sha in self._shas:
882 for sha in self.sha_iter:
883 self._shas.append(sha)
886 def __contains__(self, needle):
887 """Check if an object is present.
889 :note: This checks if the object is present in
890 the underlying object store, not if it would
891 be yielded by the iterator.
893 :param needle: SHA1 of the object to check for
895 return needle in self.store
897 def __getitem__(self, key):
898 """Find an object by SHA1.
900 :note: This retrieves the object from the underlying
901 object store. It will also succeed if the object would
902 not be returned by the iterator.
904 return self.store[key]
907 """Return the number of objects."""
908 return len(list(self.itershas()))
911 def tree_lookup_path(lookup_obj, root_sha, path):
912 """Look up an object in a Git tree.
914 :param lookup_obj: Callback for retrieving object by SHA1
915 :param root_sha: SHA1 of the root tree
916 :param path: Path to lookup
917 :return: A tuple of (mode, SHA) of the resulting path.
919 tree = lookup_obj(root_sha)
920 if not isinstance(tree, Tree):
921 raise NotTreeError(root_sha)
922 return tree.lookup_path(lookup_obj, path)
925 def _collect_filetree_revs(obj_store, tree_sha, kset):
926 """Collect SHA1s of files and directories for specified tree.
928 :param obj_store: Object store to get objects by SHA from
929 :param tree_sha: tree reference to walk
930 :param kset: set to fill with references to files and directories
932 filetree = obj_store[tree_sha]
933 for name, mode, sha in filetree.iteritems():
934 if not S_ISGITLINK(mode) and sha not in kset:
936 if stat.S_ISDIR(mode):
937 _collect_filetree_revs(obj_store, sha, kset)
940 def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
941 """Split object id list into three lists with commit, tag, and other SHAs.
943 Commits referenced by tags are included into commits
944 list as well. Only SHA1s known in this repository will get
945 through, and unless ignore_unknown argument is True, KeyError
946 is thrown for SHA1 missing in the repository
948 :param obj_store: Object store to get objects by SHA1 from
949 :param lst: Collection of commit and tag SHAs
950 :param ignore_unknown: True to skip SHA1 missing in the repository
952 :return: A tuple of (commits, tags, others) SHA1s
961 if not ignore_unknown:
964 if isinstance(o, Commit):
966 elif isinstance(o, Tag):
969 c, t, o = _split_commits_and_tags(
970 obj_store, [tagged], ignore_unknown=ignore_unknown)
976 return (commits, tags, others)
979 class MissingObjectFinder(object):
980 """Find the objects missing from another object store.
982 :param object_store: Object store containing at least all objects to be
984 :param haves: SHA1s of commits not to send (already present in target)
985 :param wants: SHA1s of commits to send
986 :param progress: Optional function to report progress to.
987 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
988 sha for including tags.
989 :param get_parents: Optional function for getting the parents of a commit.
990 :param tagged: dict of pointed-to sha -> tag sha for including tags
993 def __init__(self, object_store, haves, wants, progress=None,
994 get_tagged=None, get_parents=lambda commit: commit.parents):
995 self.object_store = object_store
996 self._get_parents = get_parents
997 # process Commits and Tags differently
998 # Note, while haves may list commits/tags not available locally,
999 # and such SHAs would get filtered out by _split_commits_and_tags,
1000 # wants shall list only known SHAs, and otherwise
1001 # _split_commits_and_tags fails with KeyError
1002 have_commits, have_tags, have_others = (
1003 _split_commits_and_tags(object_store, haves, True))
1004 want_commits, want_tags, want_others = (
1005 _split_commits_and_tags(object_store, wants, False))
1006 # all_ancestors is a set of commits that shall not be sent
1007 # (complete repository up to 'haves')
1008 all_ancestors = object_store._collect_ancestors(
1009 have_commits, get_parents=self._get_parents)[0]
1010 # all_missing - complete set of commits between haves and wants
1011 # common - commits from all_ancestors we hit into while
1012 # traversing parent hierarchy of wants
1013 missing_commits, common_commits = object_store._collect_ancestors(
1014 want_commits, all_ancestors, get_parents=self._get_parents)
1015 self.sha_done = set()
1016 # Now, fill sha_done with commits and revisions of
1017 # files and directories known to be both locally
1018 # and on target. Thus these commits and files
1019 # won't get selected for fetch
1020 for h in common_commits:
1021 self.sha_done.add(h)
1022 cmt = object_store[h]
1023 _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
1024 # record tags we have as visited, too
1026 self.sha_done.add(t)
1028 missing_tags = want_tags.difference(have_tags)
1029 missing_others = want_others.difference(have_others)
1030 # in fact, what we 'want' is commits, tags, and others
1031 # we've found missing
1032 wants = missing_commits.union(missing_tags)
1033 wants = wants.union(missing_others)
1035 self.objects_to_send = set([(w, None, False) for w in wants])
1037 if progress is None:
1038 self.progress = lambda x: None
1040 self.progress = progress
1041 self._tagged = get_tagged and get_tagged() or {}
1043 def add_todo(self, entries):
1044 self.objects_to_send.update([e for e in entries
1045 if not e[0] in self.sha_done])
1049 if not self.objects_to_send:
1051 (sha, name, leaf) = self.objects_to_send.pop()
1052 if sha not in self.sha_done:
1055 o = self.object_store[sha]
1056 if isinstance(o, Commit):
1057 self.add_todo([(o.tree, "", False)])
1058 elif isinstance(o, Tree):
1059 self.add_todo([(s, n, not stat.S_ISDIR(m))
1060 for n, m, s in o.iteritems()
1061 if not S_ISGITLINK(m)])
1062 elif isinstance(o, Tag):
1063 self.add_todo([(o.object[1], None, False)])
1064 if sha in self._tagged:
1065 self.add_todo([(self._tagged[sha], None, True)])
1066 self.sha_done.add(sha)
1067 self.progress(("counting objects: %d\r" %
1068 len(self.sha_done)).encode('ascii'))
1074 class ObjectStoreGraphWalker(object):
1075 """Graph walker that finds what commits are missing from an object store.
1077 :ivar heads: Revisions without descendants in the local repo
1078 :ivar get_parents: Function to retrieve parents in the local repo
1081 def __init__(self, local_heads, get_parents):
1082 """Create a new instance.
1084 :param local_heads: Heads to start search with
1085 :param get_parents: Function for finding the parents of a SHA1.
1087 self.heads = set(local_heads)
1088 self.get_parents = get_parents
1092 """Ack that a revision and its ancestors are present in the source."""
1094 raise ValueError("unexpected sha %r received" % sha)
1095 ancestors = set([sha])
1097 # stop if we run out of heads to remove
1101 self.heads.remove(a)
1103 # collect all ancestors
1104 new_ancestors = set()
1106 ps = self.parents.get(a)
1108 new_ancestors.update(ps)
1109 self.parents[a] = None
1111 # no more ancestors; stop
1112 if not new_ancestors:
1115 ancestors = new_ancestors
1118 """Iterate over ancestors of heads in the target."""
1120 ret = self.heads.pop()
1121 ps = self.get_parents(ret)
1122 self.parents[ret] = ps
1124 [p for p in ps if p not in self.parents])
1131 def commit_tree_changes(object_store, tree, changes):
1132 """Commit a specified set of changes to a tree structure.
1134 This will apply a set of changes on top of an existing tree, storing new
1135 objects in object_store.
1137 changes are a list of tuples with (path, mode, object_sha).
1138 Paths can be both blobs and trees. See the mode and
1139 object sha to None deletes the path.
1141 This method works especially well if there are only a small
1142 number of changes to a big tree. For a large number of changes
1143 to a large tree, use e.g. commit_tree.
1145 :param object_store: Object store to store new objects in
1146 and retrieve old ones from.
1147 :param tree: Original tree root
1148 :param changes: changes to apply
1149 :return: New tree root object
1151 # TODO(jelmer): Save up the objects and add them using .add_objects
1152 # rather than with individual calls to .add_object.
1154 for (path, new_mode, new_sha) in changes:
1156 (dirname, subpath) = path.split(b'/', 1)
1161 tree[path] = (new_mode, new_sha)
1163 nested_changes.setdefault(dirname, []).append(
1164 (subpath, new_mode, new_sha))
1165 for name, subchanges in nested_changes.items():
1167 orig_subtree = object_store[tree[name][1]]
1169 orig_subtree = Tree()
1170 subtree = commit_tree_changes(object_store, orig_subtree, subchanges)
1171 if len(subtree) == 0:
1174 tree[name] = (stat.S_IFDIR, subtree.id)
1175 object_store.add_object(tree)