1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
5 # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6 # General Public License as public by the Free Software Foundation; version 2.0
7 # or (at your option) any later version. You can redistribute it and/or
8 # modify it under the terms of either of these two licenses.
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
16 # You should have received a copy of the licenses; if not, see
17 # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18 # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19 # License, Version 2.0.
23 """Git object store interfaces and implementation."""
25 from io import BytesIO
27 from itertools import chain
34 from dulwich.diff_tree import (
38 from dulwich.errors import (
41 from dulwich.file import GitFile
42 from dulwich.objects import (
54 from dulwich.pack import (
72 class BaseObjectStore(object):
73 """Object store interface."""
75 def determine_wants_all(self, refs):
76 return [sha for (ref, sha) in refs.items()
77 if sha not in self and not ref.endswith(b"^{}") and
80 def iter_shas(self, shas):
81 """Iterate over the objects for the specified shas.
83 :param shas: Iterable object with SHAs
84 :return: Object iterator
86 return ObjectStoreIterator(self, shas)
88 def contains_loose(self, sha):
89 """Check if a particular object is present by SHA1 and is loose."""
90 raise NotImplementedError(self.contains_loose)
92 def contains_packed(self, sha):
93 """Check if a particular object is present by SHA1 and is packed."""
94 raise NotImplementedError(self.contains_packed)
96 def __contains__(self, sha):
97 """Check if a particular object is present by SHA1.
99 This method makes no distinction between loose and packed objects.
101 return self.contains_packed(sha) or self.contains_loose(sha)
105 """Iterable of pack objects."""
106 raise NotImplementedError
108 def get_raw(self, name):
109 """Obtain the raw text for an object.
111 :param name: sha for the object.
112 :return: tuple with numeric type and object contents.
114 raise NotImplementedError(self.get_raw)
116 def __getitem__(self, sha):
117 """Obtain an object by SHA1."""
118 type_num, uncomp = self.get_raw(sha)
119 return ShaFile.from_raw_string(type_num, uncomp, sha=sha)
122 """Iterate over the SHAs that are present in this store."""
123 raise NotImplementedError(self.__iter__)
125 def add_object(self, obj):
126 """Add a single object to this object store.
129 raise NotImplementedError(self.add_object)
131 def add_objects(self, objects):
132 """Add a set of objects to this object store.
134 :param objects: Iterable over a list of (object, path) tuples
136 raise NotImplementedError(self.add_objects)
138 def tree_changes(self, source, target, want_unchanged=False):
139 """Find the differences between the contents of two trees
141 :param source: SHA1 of the source tree
142 :param target: SHA1 of the target tree
143 :param want_unchanged: Whether unchanged files should be reported
144 :return: Iterator over tuples with
145 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
147 for change in tree_changes(self, source, target,
148 want_unchanged=want_unchanged):
149 yield ((change.old.path, change.new.path),
150 (change.old.mode, change.new.mode),
151 (change.old.sha, change.new.sha))
153 def iter_tree_contents(self, tree_id, include_trees=False):
154 """Iterate the contents of a tree and all subtrees.
156 Iteration is depth-first pre-order, as in e.g. os.walk.
158 :param tree_id: SHA1 of the tree.
159 :param include_trees: If True, include tree objects in the iteration.
160 :return: Iterator over TreeEntry namedtuples for all the objects in a
163 for entry, _ in walk_trees(self, tree_id, None):
164 if not stat.S_ISDIR(entry.mode) or include_trees:
167 def find_missing_objects(self, haves, wants, progress=None,
169 get_parents=lambda commit: commit.parents):
170 """Find the missing objects required for a set of revisions.
172 :param haves: Iterable over SHAs already in common.
173 :param wants: Iterable over SHAs of objects to fetch.
174 :param progress: Simple progress function that will be called with
175 updated progress strings.
176 :param get_tagged: Function that returns a dict of pointed-to sha ->
177 tag sha for including tags.
178 :param get_parents: Optional function for getting the parents of a
180 :return: Iterator over (sha, path) pairs.
182 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged,
183 get_parents=get_parents)
184 return iter(finder.next, None)
186 def find_common_revisions(self, graphwalker):
187 """Find which revisions this store has in common using graphwalker.
189 :param graphwalker: A graphwalker object.
190 :return: List of SHAs that are in common
193 sha = next(graphwalker)
198 sha = next(graphwalker)
201 def generate_pack_contents(self, have, want, progress=None):
202 """Iterate over the contents of a pack file.
204 :param have: List of SHA1s of objects that should not be sent
205 :param want: List of SHA1s of objects that should be sent
206 :param progress: Optional progress reporting method
208 return self.iter_shas(self.find_missing_objects(have, want, progress))
210 def peel_sha(self, sha):
211 """Peel all tags from a SHA.
213 :param sha: The object SHA to peel.
214 :return: The fully-peeled SHA1 of a tag object, after peeling all
215 intermediate tags; if the original ref does not point to a tag,
216 this will equal the original SHA1.
219 obj_class = object_class(obj.type_name)
220 while obj_class is Tag:
221 obj_class, sha = obj.object
225 def _collect_ancestors(self, heads, common=set(),
226 get_parents=lambda commit: commit.parents):
227 """Collect all ancestors of heads up to (excluding) those in common.
229 :param heads: commits to start from
230 :param common: commits to end at, or empty set to walk repository
232 :param get_parents: Optional function for getting the parents of a
234 :return: a tuple (A, B) where A - all commits reachable
235 from heads but not present in common, B - common (shared) elements
236 that are directly reachable from heads
246 elif e not in commits:
249 queue.extend(get_parents(cmt))
250 return (commits, bases)
253 """Close any files opened by this object store."""
254 # Default implementation is a NO-OP
257 class PackBasedObjectStore(BaseObjectStore):
260 self._pack_cache = {}
263 def alternates(self):
266 def contains_packed(self, sha):
267 """Check if a particular object is present by SHA1 and is packed.
269 This does not check alternates.
271 for pack in self.packs:
276 def __contains__(self, sha):
277 """Check if a particular object is present by SHA1.
279 This method makes no distinction between loose and packed objects.
281 if self.contains_packed(sha) or self.contains_loose(sha):
283 for alternate in self.alternates:
288 def _pack_cache_stale(self):
289 """Check whether the pack cache is stale."""
290 raise NotImplementedError(self._pack_cache_stale)
292 def _add_known_pack(self, base_name, pack):
293 """Add a newly appeared pack to the cache by path.
296 self._pack_cache[base_name] = pack
298 def _flush_pack_cache(self):
299 pack_cache = self._pack_cache
300 self._pack_cache = {}
302 (name, pack) = pack_cache.popitem()
306 self._flush_pack_cache()
310 """List with pack objects."""
311 if self._pack_cache is None or self._pack_cache_stale():
312 self._update_pack_cache()
314 return self._pack_cache.values()
316 def _iter_alternate_objects(self):
317 """Iterate over the SHAs of all the objects in alternate stores."""
318 for alternate in self.alternates:
319 for alternate_object in alternate:
320 yield alternate_object
322 def _iter_loose_objects(self):
323 """Iterate over the SHAs of all loose objects."""
324 raise NotImplementedError(self._iter_loose_objects)
326 def _get_loose_object(self, sha):
327 raise NotImplementedError(self._get_loose_object)
329 def _remove_loose_object(self, sha):
330 raise NotImplementedError(self._remove_loose_object)
332 def _remove_pack(self, name):
333 raise NotImplementedError(self._remove_pack)
335 def pack_loose_objects(self):
336 """Pack loose objects.
338 :return: Number of objects packed
341 for sha in self._iter_loose_objects():
342 objects.add((self._get_loose_object(sha), None))
343 self.add_objects(list(objects))
344 for obj, path in objects:
345 self._remove_loose_object(obj.id)
349 """Repack the packs in this repository.
351 Note that this implementation is fairly naive and currently keeps all
352 objects in memory while it repacks.
354 loose_objects = set()
355 for sha in self._iter_loose_objects():
356 loose_objects.add(self._get_loose_object(sha))
357 objects = {(obj, None) for obj in loose_objects}
358 old_packs = {p.name(): p for p in self.packs}
359 for name, pack in old_packs.items():
360 objects.update((obj, None) for obj in pack.iterobjects())
361 self._flush_pack_cache()
363 # The name of the consolidated pack might match the name of a
364 # pre-existing pack. Take care not to remove the newly created
367 consolidated = self.add_objects(objects)
368 old_packs.pop(consolidated.name(), None)
370 for obj in loose_objects:
371 self._remove_loose_object(obj.id)
372 for name, pack in old_packs.items():
373 self._remove_pack(pack)
374 self._update_pack_cache()
378 """Iterate over the SHAs that are present in this store."""
379 iterables = (list(self.packs) + [self._iter_loose_objects()] +
380 [self._iter_alternate_objects()])
381 return chain(*iterables)
383 def contains_loose(self, sha):
384 """Check if a particular object is present by SHA1 and is loose.
386 This does not check alternates.
388 return self._get_loose_object(sha) is not None
390 def get_raw(self, name):
391 """Obtain the raw text for an object.
393 :param name: sha for the object.
394 :return: tuple with numeric type and object contents.
397 sha = hex_to_sha(name)
399 elif len(name) == 20:
403 raise AssertionError("Invalid object name %r" % name)
404 for pack in self.packs:
406 return pack.get_raw(sha)
410 hexsha = sha_to_hex(name)
411 ret = self._get_loose_object(hexsha)
413 return ret.type_num, ret.as_raw_string()
414 for alternate in self.alternates:
416 return alternate.get_raw(hexsha)
419 raise KeyError(hexsha)
421 def add_objects(self, objects):
422 """Add a set of objects to this object store.
424 :param objects: Iterable over (object, path) tuples, should support
426 :return: Pack object of the objects written.
428 if len(objects) == 0:
429 # Don't bother writing an empty pack file
431 f, commit, abort = self.add_pack()
433 write_pack_objects(f, objects)
441 class DiskObjectStore(PackBasedObjectStore):
442 """Git-style object store that exists on disk."""
444 def __init__(self, path):
445 """Open an object store.
447 :param path: Path of the object store.
449 super(DiskObjectStore, self).__init__()
451 self.pack_dir = os.path.join(self.path, PACKDIR)
452 self._pack_cache_time = 0
453 self._pack_cache = {}
454 self._alternates = None
457 return "<%s(%r)>" % (self.__class__.__name__, self.path)
460 def alternates(self):
461 if self._alternates is not None:
462 return self._alternates
463 self._alternates = []
464 for path in self._read_alternate_paths():
465 self._alternates.append(DiskObjectStore(path))
466 return self._alternates
468 def _read_alternate_paths(self):
470 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), 'rb')
471 except (OSError, IOError) as e:
472 if e.errno == errno.ENOENT:
476 for l in f.readlines():
481 yield l.decode(sys.getfilesystemencoding())
483 yield os.path.join(self.path, l).decode(
484 sys.getfilesystemencoding())
486 def add_alternate_path(self, path):
487 """Add an alternate path to this object store.
490 os.mkdir(os.path.join(self.path, INFODIR))
492 if e.errno != errno.EEXIST:
494 alternates_path = os.path.join(self.path, INFODIR, "alternates")
495 with GitFile(alternates_path, 'wb') as f:
497 orig_f = open(alternates_path, 'rb')
498 except (OSError, IOError) as e:
499 if e.errno != errno.ENOENT:
503 f.write(orig_f.read())
504 f.write(path.encode(sys.getfilesystemencoding()) + b"\n")
506 if not os.path.isabs(path):
507 path = os.path.join(self.path, path)
508 self.alternates.append(DiskObjectStore(path))
510 def _update_pack_cache(self):
512 pack_dir_contents = os.listdir(self.pack_dir)
514 if e.errno == errno.ENOENT:
515 self._pack_cache_time = 0
519 self._pack_cache_time = max(
520 os.stat(self.pack_dir).st_mtime, time.time())
522 for name in pack_dir_contents:
523 if name.startswith("pack-") and name.endswith(".pack"):
524 # verify that idx exists first (otherwise the pack was not yet
526 idx_name = os.path.splitext(name)[0] + ".idx"
527 if idx_name in pack_dir_contents:
528 pack_name = name[:-len(".pack")]
529 pack_files.add(pack_name)
531 # Open newly appeared pack files
533 if f not in self._pack_cache:
534 self._pack_cache[f] = Pack(os.path.join(self.pack_dir, f))
535 # Remove disappeared pack files
536 for f in set(self._pack_cache) - pack_files:
537 self._pack_cache.pop(f).close()
539 def _pack_cache_stale(self):
541 return os.stat(self.pack_dir).st_mtime >= self._pack_cache_time
543 if e.errno == errno.ENOENT:
547 def _get_shafile_path(self, sha):
548 # Check from object dir
549 return hex_to_filename(self.path, sha)
551 def _iter_loose_objects(self):
552 for base in os.listdir(self.path):
555 for rest in os.listdir(os.path.join(self.path, base)):
556 yield (base+rest).encode(sys.getfilesystemencoding())
558 def _get_loose_object(self, sha):
559 path = self._get_shafile_path(sha)
561 return ShaFile.from_path(path)
562 except (OSError, IOError) as e:
563 if e.errno == errno.ENOENT:
567 def _remove_loose_object(self, sha):
568 os.remove(self._get_shafile_path(sha))
570 def _remove_pack(self, pack):
571 os.remove(pack.data.path)
572 os.remove(pack.index.path)
574 def _get_pack_basepath(self, entries):
575 suffix = iter_sha1(entry[0] for entry in entries)
576 # TODO: Handle self.pack_dir being bytes
577 suffix = suffix.decode('ascii')
578 return os.path.join(self.pack_dir, "pack-" + suffix)
580 def _complete_thin_pack(self, f, path, copier, indexer):
581 """Move a specific file containing a pack into the pack directory.
583 :note: The file should be on the same file system as the
586 :param f: Open file object for the pack.
587 :param path: Path to the pack file.
588 :param copier: A PackStreamCopier to use for writing pack data.
589 :param indexer: A PackIndexer for indexing the pack.
591 entries = list(indexer)
593 # Update the header with the new number of objects.
595 write_pack_header(f, len(entries) + len(indexer.ext_refs()))
597 # Must flush before reading (http://bugs.python.org/issue3207)
600 # Rescan the rest of the pack, computing the SHA with the new header.
601 new_sha = compute_file_sha(f, end_ofs=-20)
603 # Must reposition before writing (http://bugs.python.org/issue3207)
604 f.seek(0, os.SEEK_CUR)
607 for ext_sha in indexer.ext_refs():
608 assert len(ext_sha) == 20
609 type_num, data = self.get_raw(ext_sha)
611 crc32 = write_pack_object(f, type_num, data, sha=new_sha)
612 entries.append((ext_sha, offset, crc32))
613 pack_sha = new_sha.digest()
619 pack_base_name = self._get_pack_basepath(entries)
620 if sys.platform == 'win32':
622 os.rename(path, pack_base_name + '.pack')
624 os.remove(pack_base_name + '.pack')
625 os.rename(path, pack_base_name + '.pack')
627 os.rename(path, pack_base_name + '.pack')
630 index_file = GitFile(pack_base_name + '.idx', 'wb')
632 write_pack_index_v2(index_file, entries, pack_sha)
637 # Add the pack to the store and return it.
638 final_pack = Pack(pack_base_name)
639 final_pack.check_length_and_checksum()
640 self._add_known_pack(pack_base_name, final_pack)
643 def add_thin_pack(self, read_all, read_some):
644 """Add a new thin pack to this object store.
646 Thin packs are packs that contain deltas with parents that exist
647 outside the pack. They should never be placed in the object store
648 directly, and always indexed and completed as they are copied.
650 :param read_all: Read function that blocks until the number of
651 requested bytes are read.
652 :param read_some: Read function that returns at least one byte, but may
653 not return the number of bytes requested.
654 :return: A Pack object pointing at the now-completed thin pack in the
655 objects/pack directory.
657 fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
658 with os.fdopen(fd, 'w+b') as f:
659 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
660 copier = PackStreamCopier(read_all, read_some, f,
663 return self._complete_thin_pack(f, path, copier, indexer)
665 def move_in_pack(self, path):
666 """Move a specific file containing a pack into the pack directory.
668 :note: The file should be on the same file system as the
671 :param path: Path to the pack file.
673 with PackData(path) as p:
674 entries = p.sorted_entries()
675 basename = self._get_pack_basepath(entries)
676 with GitFile(basename+".idx", "wb") as f:
677 write_pack_index_v2(f, entries, p.get_stored_checksum())
678 os.rename(path, basename + ".pack")
679 final_pack = Pack(basename)
680 self._add_known_pack(basename, final_pack)
684 """Add a new pack to this object store.
686 :return: Fileobject to write to, a commit function to
687 call when the pack is finished and an abort
690 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
691 f = os.fdopen(fd, 'wb')
696 if os.path.getsize(path) > 0:
697 return self.move_in_pack(path)
705 return f, commit, abort
707 def add_object(self, obj):
708 """Add a single object to this object store.
710 :param obj: Object to add
712 path = self._get_shafile_path(obj.id)
713 dir = os.path.dirname(path)
717 if e.errno != errno.EEXIST:
719 if os.path.exists(path):
720 return # Already there, no need to write again
721 with GitFile(path, 'wb') as f:
722 f.write(obj.as_legacy_object())
729 if e.errno != errno.EEXIST:
731 os.mkdir(os.path.join(path, "info"))
732 os.mkdir(os.path.join(path, PACKDIR))
736 class MemoryObjectStore(BaseObjectStore):
737 """Object store that keeps all objects in memory."""
740 super(MemoryObjectStore, self).__init__()
743 def _to_hexsha(self, sha):
747 return sha_to_hex(sha)
749 raise ValueError("Invalid sha %r" % (sha,))
751 def contains_loose(self, sha):
752 """Check if a particular object is present by SHA1 and is loose."""
753 return self._to_hexsha(sha) in self._data
755 def contains_packed(self, sha):
756 """Check if a particular object is present by SHA1 and is packed."""
760 """Iterate over the SHAs that are present in this store."""
761 return iter(self._data.keys())
765 """List with pack objects."""
768 def get_raw(self, name):
769 """Obtain the raw text for an object.
771 :param name: sha for the object.
772 :return: tuple with numeric type and object contents.
774 obj = self[self._to_hexsha(name)]
775 return obj.type_num, obj.as_raw_string()
777 def __getitem__(self, name):
778 return self._data[self._to_hexsha(name)].copy()
780 def __delitem__(self, name):
781 """Delete an object from this store, for testing only."""
782 del self._data[self._to_hexsha(name)]
784 def add_object(self, obj):
785 """Add a single object to this object store.
788 self._data[obj.id] = obj.copy()
790 def add_objects(self, objects):
791 """Add a set of objects to this object store.
793 :param objects: Iterable over a list of (object, path) tuples
795 for obj, path in objects:
799 """Add a new pack to this object store.
801 Because this object store doesn't support packs, we extract and add the
804 :return: Fileobject to write to and a commit function to
805 call when the pack is finished.
810 p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
812 for obj in PackInflater.for_pack_data(p, self.get_raw):
817 return f, commit, abort
819 def _complete_thin_pack(self, f, indexer):
820 """Complete a thin pack by adding external references.
822 :param f: Open file object for the pack.
823 :param indexer: A PackIndexer for indexing the pack.
825 entries = list(indexer)
827 # Update the header with the new number of objects.
829 write_pack_header(f, len(entries) + len(indexer.ext_refs()))
831 # Rescan the rest of the pack, computing the SHA with the new header.
832 new_sha = compute_file_sha(f, end_ofs=-20)
835 for ext_sha in indexer.ext_refs():
836 assert len(ext_sha) == 20
837 type_num, data = self.get_raw(ext_sha)
838 write_pack_object(f, type_num, data, sha=new_sha)
839 pack_sha = new_sha.digest()
842 def add_thin_pack(self, read_all, read_some):
843 """Add a new thin pack to this object store.
845 Thin packs are packs that contain deltas with parents that exist
846 outside the pack. Because this object store doesn't support packs, we
847 extract and add the individual objects.
849 :param read_all: Read function that blocks until the number of
850 requested bytes are read.
851 :param read_some: Read function that returns at least one byte, but may
852 not return the number of bytes requested.
854 f, commit, abort = self.add_pack()
856 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
857 copier = PackStreamCopier(read_all, read_some, f,
860 self._complete_thin_pack(f, indexer)
868 class ObjectImporter(object):
869 """Interface for importing objects."""
871 def __init__(self, count):
872 """Create a new ObjectImporter.
874 :param count: Number of objects that's going to be imported.
878 def add_object(self, object):
880 raise NotImplementedError(self.add_object)
882 def finish(self, object):
883 """Finish the import and write objects to disk."""
884 raise NotImplementedError(self.finish)
887 class ObjectIterator(object):
888 """Interface for iterating over objects."""
890 def iterobjects(self):
891 raise NotImplementedError(self.iterobjects)
894 class ObjectStoreIterator(ObjectIterator):
895 """ObjectIterator that works on top of an ObjectStore."""
897 def __init__(self, store, sha_iter):
898 """Create a new ObjectIterator.
900 :param store: Object store to retrieve from
901 :param sha_iter: Iterator over (sha, path) tuples
904 self.sha_iter = sha_iter
908 """Yield tuple with next object and path."""
909 for sha, path in self.itershas():
910 yield self.store[sha], path
912 def iterobjects(self):
913 """Iterate over just the objects."""
918 """Iterate over the SHAs."""
919 for sha in self._shas:
921 for sha in self.sha_iter:
922 self._shas.append(sha)
925 def __contains__(self, needle):
926 """Check if an object is present.
928 :note: This checks if the object is present in
929 the underlying object store, not if it would
930 be yielded by the iterator.
932 :param needle: SHA1 of the object to check for
934 return needle in self.store
936 def __getitem__(self, key):
937 """Find an object by SHA1.
939 :note: This retrieves the object from the underlying
940 object store. It will also succeed if the object would
941 not be returned by the iterator.
943 return self.store[key]
946 """Return the number of objects."""
947 return len(list(self.itershas()))
950 def tree_lookup_path(lookup_obj, root_sha, path):
951 """Look up an object in a Git tree.
953 :param lookup_obj: Callback for retrieving object by SHA1
954 :param root_sha: SHA1 of the root tree
955 :param path: Path to lookup
956 :return: A tuple of (mode, SHA) of the resulting path.
958 tree = lookup_obj(root_sha)
959 if not isinstance(tree, Tree):
960 raise NotTreeError(root_sha)
961 return tree.lookup_path(lookup_obj, path)
964 def _collect_filetree_revs(obj_store, tree_sha, kset):
965 """Collect SHA1s of files and directories for specified tree.
967 :param obj_store: Object store to get objects by SHA from
968 :param tree_sha: tree reference to walk
969 :param kset: set to fill with references to files and directories
971 filetree = obj_store[tree_sha]
972 for name, mode, sha in filetree.iteritems():
973 if not S_ISGITLINK(mode) and sha not in kset:
975 if stat.S_ISDIR(mode):
976 _collect_filetree_revs(obj_store, sha, kset)
979 def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
980 """Split object id list into three lists with commit, tag, and other SHAs.
982 Commits referenced by tags are included into commits
983 list as well. Only SHA1s known in this repository will get
984 through, and unless ignore_unknown argument is True, KeyError
985 is thrown for SHA1 missing in the repository
987 :param obj_store: Object store to get objects by SHA1 from
988 :param lst: Collection of commit and tag SHAs
989 :param ignore_unknown: True to skip SHA1 missing in the repository
991 :return: A tuple of (commits, tags, others) SHA1s
1000 if not ignore_unknown:
1003 if isinstance(o, Commit):
1005 elif isinstance(o, Tag):
1007 tagged = o.object[1]
1008 c, t, o = _split_commits_and_tags(
1009 obj_store, [tagged], ignore_unknown=ignore_unknown)
1015 return (commits, tags, others)
1018 class MissingObjectFinder(object):
1019 """Find the objects missing from another object store.
1021 :param object_store: Object store containing at least all objects to be
1023 :param haves: SHA1s of commits not to send (already present in target)
1024 :param wants: SHA1s of commits to send
1025 :param progress: Optional function to report progress to.
1026 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
1027 sha for including tags.
1028 :param get_parents: Optional function for getting the parents of a commit.
1029 :param tagged: dict of pointed-to sha -> tag sha for including tags
1032 def __init__(self, object_store, haves, wants, progress=None,
1033 get_tagged=None, get_parents=lambda commit: commit.parents):
1034 self.object_store = object_store
1035 self._get_parents = get_parents
1036 # process Commits and Tags differently
1037 # Note, while haves may list commits/tags not available locally,
1038 # and such SHAs would get filtered out by _split_commits_and_tags,
1039 # wants shall list only known SHAs, and otherwise
1040 # _split_commits_and_tags fails with KeyError
1041 have_commits, have_tags, have_others = (
1042 _split_commits_and_tags(object_store, haves, True))
1043 want_commits, want_tags, want_others = (
1044 _split_commits_and_tags(object_store, wants, False))
1045 # all_ancestors is a set of commits that shall not be sent
1046 # (complete repository up to 'haves')
1047 all_ancestors = object_store._collect_ancestors(
1048 have_commits, get_parents=self._get_parents)[0]
1049 # all_missing - complete set of commits between haves and wants
1050 # common - commits from all_ancestors we hit into while
1051 # traversing parent hierarchy of wants
1052 missing_commits, common_commits = object_store._collect_ancestors(
1053 want_commits, all_ancestors, get_parents=self._get_parents)
1054 self.sha_done = set()
1055 # Now, fill sha_done with commits and revisions of
1056 # files and directories known to be both locally
1057 # and on target. Thus these commits and files
1058 # won't get selected for fetch
1059 for h in common_commits:
1060 self.sha_done.add(h)
1061 cmt = object_store[h]
1062 _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
1063 # record tags we have as visited, too
1065 self.sha_done.add(t)
1067 missing_tags = want_tags.difference(have_tags)
1068 missing_others = want_others.difference(have_others)
1069 # in fact, what we 'want' is commits, tags, and others
1070 # we've found missing
1071 wants = missing_commits.union(missing_tags)
1072 wants = wants.union(missing_others)
1074 self.objects_to_send = set([(w, None, False) for w in wants])
1076 if progress is None:
1077 self.progress = lambda x: None
1079 self.progress = progress
1080 self._tagged = get_tagged and get_tagged() or {}
1082 def add_todo(self, entries):
1083 self.objects_to_send.update([e for e in entries
1084 if not e[0] in self.sha_done])
1088 if not self.objects_to_send:
1090 (sha, name, leaf) = self.objects_to_send.pop()
1091 if sha not in self.sha_done:
1094 o = self.object_store[sha]
1095 if isinstance(o, Commit):
1096 self.add_todo([(o.tree, "", False)])
1097 elif isinstance(o, Tree):
1098 self.add_todo([(s, n, not stat.S_ISDIR(m))
1099 for n, m, s in o.iteritems()
1100 if not S_ISGITLINK(m)])
1101 elif isinstance(o, Tag):
1102 self.add_todo([(o.object[1], None, False)])
1103 if sha in self._tagged:
1104 self.add_todo([(self._tagged[sha], None, True)])
1105 self.sha_done.add(sha)
1106 self.progress(("counting objects: %d\r" %
1107 len(self.sha_done)).encode('ascii'))
1113 class ObjectStoreGraphWalker(object):
1114 """Graph walker that finds what commits are missing from an object store.
1116 :ivar heads: Revisions without descendants in the local repo
1117 :ivar get_parents: Function to retrieve parents in the local repo
1120 def __init__(self, local_heads, get_parents):
1121 """Create a new instance.
1123 :param local_heads: Heads to start search with
1124 :param get_parents: Function for finding the parents of a SHA1.
1126 self.heads = set(local_heads)
1127 self.get_parents = get_parents
1131 """Ack that a revision and its ancestors are present in the source."""
1133 raise ValueError("unexpected sha %r received" % sha)
1134 ancestors = set([sha])
1136 # stop if we run out of heads to remove
1140 self.heads.remove(a)
1142 # collect all ancestors
1143 new_ancestors = set()
1145 ps = self.parents.get(a)
1147 new_ancestors.update(ps)
1148 self.parents[a] = None
1150 # no more ancestors; stop
1151 if not new_ancestors:
1154 ancestors = new_ancestors
1157 """Iterate over ancestors of heads in the target."""
1159 ret = self.heads.pop()
1160 ps = self.get_parents(ret)
1161 self.parents[ret] = ps
1163 [p for p in ps if p not in self.parents])
1170 def commit_tree_changes(object_store, tree, changes):
1171 """Commit a specified set of changes to a tree structure.
1173 This will apply a set of changes on top of an existing tree, storing new
1174 objects in object_store.
1176 changes are a list of tuples with (path, mode, object_sha).
1177 Paths can be both blobs and trees. See the mode and
1178 object sha to None deletes the path.
1180 This method works especially well if there are only a small
1181 number of changes to a big tree. For a large number of changes
1182 to a large tree, use e.g. commit_tree.
1184 :param object_store: Object store to store new objects in
1185 and retrieve old ones from.
1186 :param tree: Original tree root
1187 :param changes: changes to apply
1188 :return: New tree root object
1190 # TODO(jelmer): Save up the objects and add them using .add_objects
1191 # rather than with individual calls to .add_object.
1193 for (path, new_mode, new_sha) in changes:
1195 (dirname, subpath) = path.split(b'/', 1)
1200 tree[path] = (new_mode, new_sha)
1202 nested_changes.setdefault(dirname, []).append(
1203 (subpath, new_mode, new_sha))
1204 for name, subchanges in nested_changes.items():
1206 orig_subtree = object_store[tree[name][1]]
1208 orig_subtree = Tree()
1209 subtree = commit_tree_changes(object_store, orig_subtree, subchanges)
1210 if len(subtree) == 0:
1213 tree[name] = (stat.S_IFDIR, subtree.id)
1214 object_store.add_object(tree)