1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # or (at your option) a later version of the License.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21 """Git object store interfaces and implementation."""
24 from io import BytesIO
26 from itertools import chain
32 from dulwich.diff_tree import (
36 from dulwich.errors import (
39 from dulwich.file import GitFile
40 from dulwich.objects import (
52 from dulwich.pack import (
70 class BaseObjectStore(object):
71 """Object store interface."""
73 def determine_wants_all(self, refs):
74 return [sha for (ref, sha) in refs.items()
75 if not sha in self and not ref.endswith(b"^{}") and
78 def iter_shas(self, shas):
79 """Iterate over the objects for the specified shas.
81 :param shas: Iterable object with SHAs
82 :return: Object iterator
84 return ObjectStoreIterator(self, shas)
86 def contains_loose(self, sha):
87 """Check if a particular object is present by SHA1 and is loose."""
88 raise NotImplementedError(self.contains_loose)
90 def contains_packed(self, sha):
91 """Check if a particular object is present by SHA1 and is packed."""
92 raise NotImplementedError(self.contains_packed)
94 def __contains__(self, sha):
95 """Check if a particular object is present by SHA1.
97 This method makes no distinction between loose and packed objects.
99 return self.contains_packed(sha) or self.contains_loose(sha)
103 """Iterable of pack objects."""
104 raise NotImplementedError
106 def get_raw(self, name):
107 """Obtain the raw text for an object.
109 :param name: sha for the object.
110 :return: tuple with numeric type and object contents.
112 raise NotImplementedError(self.get_raw)
114 def __getitem__(self, sha):
115 """Obtain an object by SHA1."""
116 type_num, uncomp = self.get_raw(sha)
117 return ShaFile.from_raw_string(type_num, uncomp, sha=sha)
120 """Iterate over the SHAs that are present in this store."""
121 raise NotImplementedError(self.__iter__)
123 def add_object(self, obj):
124 """Add a single object to this object store.
127 raise NotImplementedError(self.add_object)
129 def add_objects(self, objects):
130 """Add a set of objects to this object store.
132 :param objects: Iterable over a list of objects.
134 raise NotImplementedError(self.add_objects)
136 def tree_changes(self, source, target, want_unchanged=False):
137 """Find the differences between the contents of two trees
139 :param source: SHA1 of the source tree
140 :param target: SHA1 of the target tree
141 :param want_unchanged: Whether unchanged files should be reported
142 :return: Iterator over tuples with
143 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
145 for change in tree_changes(self, source, target,
146 want_unchanged=want_unchanged):
147 yield ((change.old.path, change.new.path),
148 (change.old.mode, change.new.mode),
149 (change.old.sha, change.new.sha))
151 def iter_tree_contents(self, tree_id, include_trees=False):
152 """Iterate the contents of a tree and all subtrees.
154 Iteration is depth-first pre-order, as in e.g. os.walk.
156 :param tree_id: SHA1 of the tree.
157 :param include_trees: If True, include tree objects in the iteration.
158 :return: Iterator over TreeEntry namedtuples for all the objects in a
161 for entry, _ in walk_trees(self, tree_id, None):
162 if not stat.S_ISDIR(entry.mode) or include_trees:
165 def find_missing_objects(self, haves, wants, progress=None,
167 get_parents=lambda commit: commit.parents):
168 """Find the missing objects required for a set of revisions.
170 :param haves: Iterable over SHAs already in common.
171 :param wants: Iterable over SHAs of objects to fetch.
172 :param progress: Simple progress function that will be called with
173 updated progress strings.
174 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
175 sha for including tags.
176 :param get_parents: Optional function for getting the parents of a commit.
177 :return: Iterator over (sha, path) pairs.
179 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged, get_parents=get_parents)
180 return iter(finder.next, None)
182 def find_common_revisions(self, graphwalker):
183 """Find which revisions this store has in common using graphwalker.
185 :param graphwalker: A graphwalker object.
186 :return: List of SHAs that are in common
189 sha = next(graphwalker)
194 sha = next(graphwalker)
197 def generate_pack_contents(self, have, want, progress=None):
198 """Iterate over the contents of a pack file.
200 :param have: List of SHA1s of objects that should not be sent
201 :param want: List of SHA1s of objects that should be sent
202 :param progress: Optional progress reporting method
204 return self.iter_shas(self.find_missing_objects(have, want, progress))
206 def peel_sha(self, sha):
207 """Peel all tags from a SHA.
209 :param sha: The object SHA to peel.
210 :return: The fully-peeled SHA1 of a tag object, after peeling all
211 intermediate tags; if the original ref does not point to a tag, this
212 will equal the original SHA1.
215 obj_class = object_class(obj.type_name)
216 while obj_class is Tag:
217 obj_class, sha = obj.object
221 def _collect_ancestors(self, heads, common=set(),
222 get_parents=lambda commit: commit.parents):
223 """Collect all ancestors of heads up to (excluding) those in common.
225 :param heads: commits to start from
226 :param common: commits to end at, or empty set to walk repository
228 :param get_parents: Optional function for getting the parents of a commit.
229 :return: a tuple (A, B) where A - all commits reachable
230 from heads but not present in common, B - common (shared) elements
231 that are directly reachable from heads
241 elif e not in commits:
244 queue.extend(get_parents(cmt))
245 return (commits, bases)
248 """Close any files opened by this object store."""
249 # Default implementation is a NO-OP
252 class PackBasedObjectStore(BaseObjectStore):
255 self._pack_cache = {}
258 def alternates(self):
261 def contains_packed(self, sha):
262 """Check if a particular object is present by SHA1 and is packed.
264 This does not check alternates.
266 for pack in self.packs:
271 def __contains__(self, sha):
272 """Check if a particular object is present by SHA1.
274 This method makes no distinction between loose and packed objects.
276 if self.contains_packed(sha) or self.contains_loose(sha):
278 for alternate in self.alternates:
283 def _pack_cache_stale(self):
284 """Check whether the pack cache is stale."""
285 raise NotImplementedError(self._pack_cache_stale)
287 def _add_known_pack(self, base_name, pack):
288 """Add a newly appeared pack to the cache by path.
291 self._pack_cache[base_name] = pack
294 pack_cache = self._pack_cache
295 self._pack_cache = {}
297 (name, pack) = pack_cache.popitem()
302 """List with pack objects."""
303 if self._pack_cache is None or self._pack_cache_stale():
304 self._update_pack_cache()
306 return self._pack_cache.values()
308 def _iter_alternate_objects(self):
309 """Iterate over the SHAs of all the objects in alternate stores."""
310 for alternate in self.alternates:
311 for alternate_object in alternate:
312 yield alternate_object
314 def _iter_loose_objects(self):
315 """Iterate over the SHAs of all loose objects."""
316 raise NotImplementedError(self._iter_loose_objects)
318 def _get_loose_object(self, sha):
319 raise NotImplementedError(self._get_loose_object)
321 def _remove_loose_object(self, sha):
322 raise NotImplementedError(self._remove_loose_object)
324 def pack_loose_objects(self):
325 """Pack loose objects.
327 :return: Number of objects packed
330 for sha in self._iter_loose_objects():
331 objects.add((self._get_loose_object(sha), None))
332 self.add_objects(list(objects))
333 for obj, path in objects:
334 self._remove_loose_object(obj.id)
338 """Iterate over the SHAs that are present in this store."""
339 iterables = list(self.packs) + [self._iter_loose_objects()] + [self._iter_alternate_objects()]
340 return chain(*iterables)
342 def contains_loose(self, sha):
343 """Check if a particular object is present by SHA1 and is loose.
345 This does not check alternates.
347 return self._get_loose_object(sha) is not None
349 def get_raw(self, name):
350 """Obtain the raw text for an object.
352 :param name: sha for the object.
353 :return: tuple with numeric type and object contents.
356 sha = hex_to_sha(name)
358 elif len(name) == 20:
362 raise AssertionError("Invalid object name %r" % name)
363 for pack in self.packs:
365 return pack.get_raw(sha)
369 hexsha = sha_to_hex(name)
370 ret = self._get_loose_object(hexsha)
372 return ret.type_num, ret.as_raw_string()
373 for alternate in self.alternates:
375 return alternate.get_raw(hexsha)
378 raise KeyError(hexsha)
380 def add_objects(self, objects):
381 """Add a set of objects to this object store.
383 :param objects: Iterable over objects, should support __len__.
384 :return: Pack object of the objects written.
386 if len(objects) == 0:
387 # Don't bother writing an empty pack file
389 f, commit, abort = self.add_pack()
391 write_pack_objects(f, objects)
399 class DiskObjectStore(PackBasedObjectStore):
400 """Git-style object store that exists on disk."""
402 def __init__(self, path):
403 """Open an object store.
405 :param path: Path of the object store.
407 super(DiskObjectStore, self).__init__()
409 self.pack_dir = os.path.join(self.path, PACKDIR)
410 self._pack_cache_time = 0
411 self._pack_cache = {}
412 self._alternates = None
415 return "<%s(%r)>" % (self.__class__.__name__, self.path)
418 def alternates(self):
419 if self._alternates is not None:
420 return self._alternates
421 self._alternates = []
422 for path in self._read_alternate_paths():
423 self._alternates.append(DiskObjectStore(path))
424 return self._alternates
426 def _read_alternate_paths(self):
428 f = GitFile(os.path.join(self.path, INFODIR, "alternates"),
430 except (OSError, IOError) as e:
431 if e.errno == errno.ENOENT:
435 for l in f.readlines():
440 yield l.decode(sys.getfilesystemencoding())
442 yield os.path.join(self.path, l).decode(sys.getfilesystemencoding())
444 def add_alternate_path(self, path):
445 """Add an alternate path to this object store.
448 os.mkdir(os.path.join(self.path, INFODIR))
450 if e.errno != errno.EEXIST:
452 alternates_path = os.path.join(self.path, INFODIR, "alternates")
453 with GitFile(alternates_path, 'wb') as f:
455 orig_f = open(alternates_path, 'rb')
456 except (OSError, IOError) as e:
457 if e.errno != errno.ENOENT:
461 f.write(orig_f.read())
462 f.write(path.encode(sys.getfilesystemencoding()) + b"\n")
464 if not os.path.isabs(path):
465 path = os.path.join(self.path, path)
466 self.alternates.append(DiskObjectStore(path))
468 def _update_pack_cache(self):
470 pack_dir_contents = os.listdir(self.pack_dir)
472 if e.errno == errno.ENOENT:
473 self._pack_cache_time = 0
477 self._pack_cache_time = os.stat(self.pack_dir).st_mtime
479 for name in pack_dir_contents:
480 assert isinstance(name, basestring if sys.version_info[0] == 2 else str)
481 # TODO: verify that idx exists first
482 if name.startswith("pack-") and name.endswith(".pack"):
483 pack_files.add(name[:-len(".pack")])
485 # Open newly appeared pack files
487 if f not in self._pack_cache:
488 self._pack_cache[f] = Pack(os.path.join(self.pack_dir, f))
489 # Remove disappeared pack files
490 for f in set(self._pack_cache) - pack_files:
491 self._pack_cache.pop(f).close()
493 def _pack_cache_stale(self):
495 return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
497 if e.errno == errno.ENOENT:
501 def _get_shafile_path(self, sha):
502 # Check from object dir
503 return hex_to_filename(self.path, sha)
505 def _iter_loose_objects(self):
506 for base in os.listdir(self.path):
509 for rest in os.listdir(os.path.join(self.path, base)):
510 yield (base+rest).encode(sys.getfilesystemencoding())
512 def _get_loose_object(self, sha):
513 path = self._get_shafile_path(sha)
515 return ShaFile.from_path(path)
516 except (OSError, IOError) as e:
517 if e.errno == errno.ENOENT:
521 def _remove_loose_object(self, sha):
522 os.remove(self._get_shafile_path(sha))
524 def _get_pack_basepath(self, entries):
525 suffix = iter_sha1(entry[0] for entry in entries)
526 # TODO: Handle self.pack_dir being bytes
527 suffix = suffix.decode('ascii')
528 return os.path.join(self.pack_dir, "pack-" + suffix)
530 def _complete_thin_pack(self, f, path, copier, indexer):
531 """Move a specific file containing a pack into the pack directory.
533 :note: The file should be on the same file system as the
536 :param f: Open file object for the pack.
537 :param path: Path to the pack file.
538 :param copier: A PackStreamCopier to use for writing pack data.
539 :param indexer: A PackIndexer for indexing the pack.
541 entries = list(indexer)
543 # Update the header with the new number of objects.
545 write_pack_header(f, len(entries) + len(indexer.ext_refs()))
547 # Must flush before reading (http://bugs.python.org/issue3207)
550 # Rescan the rest of the pack, computing the SHA with the new header.
551 new_sha = compute_file_sha(f, end_ofs=-20)
553 # Must reposition before writing (http://bugs.python.org/issue3207)
554 f.seek(0, os.SEEK_CUR)
557 for ext_sha in indexer.ext_refs():
558 assert len(ext_sha) == 20
559 type_num, data = self.get_raw(ext_sha)
561 crc32 = write_pack_object(f, type_num, data, sha=new_sha)
562 entries.append((ext_sha, offset, crc32))
563 pack_sha = new_sha.digest()
569 pack_base_name = self._get_pack_basepath(entries)
570 os.rename(path, pack_base_name + '.pack')
573 index_file = GitFile(pack_base_name + '.idx', 'wb')
575 write_pack_index_v2(index_file, entries, pack_sha)
580 # Add the pack to the store and return it.
581 final_pack = Pack(pack_base_name)
582 final_pack.check_length_and_checksum()
583 self._add_known_pack(pack_base_name, final_pack)
586 def add_thin_pack(self, read_all, read_some):
587 """Add a new thin pack to this object store.
589 Thin packs are packs that contain deltas with parents that exist outside
590 the pack. They should never be placed in the object store directly, and
591 always indexed and completed as they are copied.
593 :param read_all: Read function that blocks until the number of requested
595 :param read_some: Read function that returns at least one byte, but may
596 not return the number of bytes requested.
597 :return: A Pack object pointing at the now-completed thin pack in the
598 objects/pack directory.
600 fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
601 with os.fdopen(fd, 'w+b') as f:
602 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
603 copier = PackStreamCopier(read_all, read_some, f,
606 return self._complete_thin_pack(f, path, copier, indexer)
608 def move_in_pack(self, path):
609 """Move a specific file containing a pack into the pack directory.
611 :note: The file should be on the same file system as the
614 :param path: Path to the pack file.
616 with PackData(path) as p:
617 entries = p.sorted_entries()
618 basename = self._get_pack_basepath(entries)
619 with GitFile(basename+".idx", "wb") as f:
620 write_pack_index_v2(f, entries, p.get_stored_checksum())
621 os.rename(path, basename + ".pack")
622 final_pack = Pack(basename)
623 self._add_known_pack(basename, final_pack)
627 """Add a new pack to this object store.
629 :return: Fileobject to write to, a commit function to
630 call when the pack is finished and an abort
633 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
634 f = os.fdopen(fd, 'wb')
638 if os.path.getsize(path) > 0:
639 return self.move_in_pack(path)
646 return f, commit, abort
648 def add_object(self, obj):
649 """Add a single object to this object store.
651 :param obj: Object to add
653 path = self._get_shafile_path(obj.id)
654 dir = os.path.dirname(path)
658 if e.errno != errno.EEXIST:
660 if os.path.exists(path):
661 return # Already there, no need to write again
662 with GitFile(path, 'wb') as f:
663 f.write(obj.as_legacy_object())
670 if e.errno != errno.EEXIST:
672 os.mkdir(os.path.join(path, "info"))
673 os.mkdir(os.path.join(path, PACKDIR))
677 class MemoryObjectStore(BaseObjectStore):
678 """Object store that keeps all objects in memory."""
681 super(MemoryObjectStore, self).__init__()
684 def _to_hexsha(self, sha):
688 return sha_to_hex(sha)
690 raise ValueError("Invalid sha %r" % (sha,))
692 def contains_loose(self, sha):
693 """Check if a particular object is present by SHA1 and is loose."""
694 return self._to_hexsha(sha) in self._data
696 def contains_packed(self, sha):
697 """Check if a particular object is present by SHA1 and is packed."""
701 """Iterate over the SHAs that are present in this store."""
702 return iter(self._data.keys())
706 """List with pack objects."""
709 def get_raw(self, name):
710 """Obtain the raw text for an object.
712 :param name: sha for the object.
713 :return: tuple with numeric type and object contents.
715 obj = self[self._to_hexsha(name)]
716 return obj.type_num, obj.as_raw_string()
718 def __getitem__(self, name):
719 return self._data[self._to_hexsha(name)]
721 def __delitem__(self, name):
722 """Delete an object from this store, for testing only."""
723 del self._data[self._to_hexsha(name)]
725 def add_object(self, obj):
726 """Add a single object to this object store.
729 self._data[obj.id] = obj
731 def add_objects(self, objects):
732 """Add a set of objects to this object store.
734 :param objects: Iterable over a list of objects.
736 for obj, path in objects:
737 self._data[obj.id] = obj
740 """Add a new pack to this object store.
742 Because this object store doesn't support packs, we extract and add the
745 :return: Fileobject to write to and a commit function to
746 call when the pack is finished.
750 p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
752 for obj in PackInflater.for_pack_data(p, self.get_raw):
753 self._data[obj.id] = obj
756 return f, commit, abort
758 def _complete_thin_pack(self, f, indexer):
759 """Complete a thin pack by adding external references.
761 :param f: Open file object for the pack.
762 :param indexer: A PackIndexer for indexing the pack.
764 entries = list(indexer)
766 # Update the header with the new number of objects.
768 write_pack_header(f, len(entries) + len(indexer.ext_refs()))
770 # Rescan the rest of the pack, computing the SHA with the new header.
771 new_sha = compute_file_sha(f, end_ofs=-20)
774 for ext_sha in indexer.ext_refs():
775 assert len(ext_sha) == 20
776 type_num, data = self.get_raw(ext_sha)
777 write_pack_object(f, type_num, data, sha=new_sha)
778 pack_sha = new_sha.digest()
781 def add_thin_pack(self, read_all, read_some):
782 """Add a new thin pack to this object store.
784 Thin packs are packs that contain deltas with parents that exist outside
785 the pack. Because this object store doesn't support packs, we extract
786 and add the individual objects.
788 :param read_all: Read function that blocks until the number of requested
790 :param read_some: Read function that returns at least one byte, but may
791 not return the number of bytes requested.
793 f, commit, abort = self.add_pack()
795 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
796 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)
798 self._complete_thin_pack(f, indexer)
806 class ObjectImporter(object):
807 """Interface for importing objects."""
809 def __init__(self, count):
810 """Create a new ObjectImporter.
812 :param count: Number of objects that's going to be imported.
816 def add_object(self, object):
818 raise NotImplementedError(self.add_object)
820 def finish(self, object):
821 """Finish the import and write objects to disk."""
822 raise NotImplementedError(self.finish)
825 class ObjectIterator(object):
826 """Interface for iterating over objects."""
828 def iterobjects(self):
829 raise NotImplementedError(self.iterobjects)
832 class ObjectStoreIterator(ObjectIterator):
833 """ObjectIterator that works on top of an ObjectStore."""
835 def __init__(self, store, sha_iter):
836 """Create a new ObjectIterator.
838 :param store: Object store to retrieve from
839 :param sha_iter: Iterator over (sha, path) tuples
842 self.sha_iter = sha_iter
846 """Yield tuple with next object and path."""
847 for sha, path in self.itershas():
848 yield self.store[sha], path
850 def iterobjects(self):
851 """Iterate over just the objects."""
856 """Iterate over the SHAs."""
857 for sha in self._shas:
859 for sha in self.sha_iter:
860 self._shas.append(sha)
863 def __contains__(self, needle):
864 """Check if an object is present.
866 :note: This checks if the object is present in
867 the underlying object store, not if it would
868 be yielded by the iterator.
870 :param needle: SHA1 of the object to check for
872 return needle in self.store
874 def __getitem__(self, key):
875 """Find an object by SHA1.
877 :note: This retrieves the object from the underlying
878 object store. It will also succeed if the object would
879 not be returned by the iterator.
881 return self.store[key]
884 """Return the number of objects."""
885 return len(list(self.itershas()))
888 def tree_lookup_path(lookup_obj, root_sha, path):
889 """Look up an object in a Git tree.
891 :param lookup_obj: Callback for retrieving object by SHA1
892 :param root_sha: SHA1 of the root tree
893 :param path: Path to lookup
894 :return: A tuple of (mode, SHA) of the resulting path.
896 tree = lookup_obj(root_sha)
897 if not isinstance(tree, Tree):
898 raise NotTreeError(root_sha)
899 return tree.lookup_path(lookup_obj, path)
902 def _collect_filetree_revs(obj_store, tree_sha, kset):
903 """Collect SHA1s of files and directories for specified tree.
905 :param obj_store: Object store to get objects by SHA from
906 :param tree_sha: tree reference to walk
907 :param kset: set to fill with references to files and directories
909 filetree = obj_store[tree_sha]
910 for name, mode, sha in filetree.iteritems():
911 if not S_ISGITLINK(mode) and sha not in kset:
913 if stat.S_ISDIR(mode):
914 _collect_filetree_revs(obj_store, sha, kset)
917 def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
918 """Split object id list into three lists with commit, tag, and other SHAs.
920 Commits referenced by tags are included into commits
921 list as well. Only SHA1s known in this repository will get
922 through, and unless ignore_unknown argument is True, KeyError
923 is thrown for SHA1 missing in the repository
925 :param obj_store: Object store to get objects by SHA1 from
926 :param lst: Collection of commit and tag SHAs
927 :param ignore_unknown: True to skip SHA1 missing in the repository
929 :return: A tuple of (commits, tags, others) SHA1s
938 if not ignore_unknown:
941 if isinstance(o, Commit):
943 elif isinstance(o, Tag):
946 c, t, o = _split_commits_and_tags(
947 obj_store, [tagged], ignore_unknown=ignore_unknown)
953 return (commits, tags, others)
956 class MissingObjectFinder(object):
957 """Find the objects missing from another object store.
959 :param object_store: Object store containing at least all objects to be
961 :param haves: SHA1s of commits not to send (already present in target)
962 :param wants: SHA1s of commits to send
963 :param progress: Optional function to report progress to.
964 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
965 sha for including tags.
966 :param get_parents: Optional function for getting the parents of a commit.
967 :param tagged: dict of pointed-to sha -> tag sha for including tags
970 def __init__(self, object_store, haves, wants, progress=None,
971 get_tagged=None, get_parents=lambda commit: commit.parents):
972 self.object_store = object_store
973 self._get_parents = get_parents
974 # process Commits and Tags differently
975 # Note, while haves may list commits/tags not available locally,
976 # and such SHAs would get filtered out by _split_commits_and_tags,
977 # wants shall list only known SHAs, and otherwise
978 # _split_commits_and_tags fails with KeyError
979 have_commits, have_tags, have_others = (
980 _split_commits_and_tags(object_store, haves, True))
981 want_commits, want_tags, want_others = (
982 _split_commits_and_tags(object_store, wants, False))
983 # all_ancestors is a set of commits that shall not be sent
984 # (complete repository up to 'haves')
985 all_ancestors = object_store._collect_ancestors(
986 have_commits, get_parents=self._get_parents)[0]
987 # all_missing - complete set of commits between haves and wants
988 # common - commits from all_ancestors we hit into while
989 # traversing parent hierarchy of wants
990 missing_commits, common_commits = object_store._collect_ancestors(
991 want_commits, all_ancestors, get_parents=self._get_parents)
992 self.sha_done = set()
993 # Now, fill sha_done with commits and revisions of
994 # files and directories known to be both locally
995 # and on target. Thus these commits and files
996 # won't get selected for fetch
997 for h in common_commits:
999 cmt = object_store[h]
1000 _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
1001 # record tags we have as visited, too
1003 self.sha_done.add(t)
1005 missing_tags = want_tags.difference(have_tags)
1006 missing_others = want_others.difference(have_others)
1007 # in fact, what we 'want' is commits, tags, and others
1008 # we've found missing
1009 wants = missing_commits.union(missing_tags)
1010 wants = wants.union(missing_others)
1012 self.objects_to_send = set([(w, None, False) for w in wants])
1014 if progress is None:
1015 self.progress = lambda x: None
1017 self.progress = progress
1018 self._tagged = get_tagged and get_tagged() or {}
1020 def add_todo(self, entries):
1021 self.objects_to_send.update([e for e in entries
1022 if not e[0] in self.sha_done])
1026 if not self.objects_to_send:
1028 (sha, name, leaf) = self.objects_to_send.pop()
1029 if sha not in self.sha_done:
1032 o = self.object_store[sha]
1033 if isinstance(o, Commit):
1034 self.add_todo([(o.tree, "", False)])
1035 elif isinstance(o, Tree):
1036 self.add_todo([(s, n, not stat.S_ISDIR(m))
1037 for n, m, s in o.iteritems()
1038 if not S_ISGITLINK(m)])
1039 elif isinstance(o, Tag):
1040 self.add_todo([(o.object[1], None, False)])
1041 if sha in self._tagged:
1042 self.add_todo([(self._tagged[sha], None, True)])
1043 self.sha_done.add(sha)
1044 self.progress(("counting objects: %d\r" % len(self.sha_done)).encode('ascii'))
1050 class ObjectStoreGraphWalker(object):
1051 """Graph walker that finds what commits are missing from an object store.
1053 :ivar heads: Revisions without descendants in the local repo
1054 :ivar get_parents: Function to retrieve parents in the local repo
1057 def __init__(self, local_heads, get_parents):
1058 """Create a new instance.
1060 :param local_heads: Heads to start search with
1061 :param get_parents: Function for finding the parents of a SHA1.
1063 self.heads = set(local_heads)
1064 self.get_parents = get_parents
1068 """Ack that a revision and its ancestors are present in the source."""
1070 raise ValueError("unexpected sha %r received" % sha)
1071 ancestors = set([sha])
1073 # stop if we run out of heads to remove
1077 self.heads.remove(a)
1079 # collect all ancestors
1080 new_ancestors = set()
1082 ps = self.parents.get(a)
1084 new_ancestors.update(ps)
1085 self.parents[a] = None
1087 # no more ancestors; stop
1088 if not new_ancestors:
1091 ancestors = new_ancestors
1094 """Iterate over ancestors of heads in the target."""
1096 ret = self.heads.pop()
1097 ps = self.get_parents(ret)
1098 self.parents[ret] = ps
1099 self.heads.update([p for p in ps if not p in self.parents])