1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
5 # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6 # General Public License as public by the Free Software Foundation; version 2.0
7 # or (at your option) any later version. You can redistribute it and/or
8 # modify it under the terms of either of these two licenses.
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
16 # You should have received a copy of the licenses; if not, see
17 # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18 # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19 # License, Version 2.0.
23 """Git object store interfaces and implementation."""
25 from io import BytesIO
27 from itertools import chain
34 from dulwich.diff_tree import (
38 from dulwich.errors import (
41 from dulwich.file import GitFile
42 from dulwich.objects import (
54 from dulwich.pack import (
68 from dulwich.refs import ANNOTATED_TAG_SUFFIX
74 class BaseObjectStore(object):
75 """Object store interface."""
77 def determine_wants_all(self, refs):
78 return [sha for (ref, sha) in refs.items()
79 if sha not in self and
80 not ref.endswith(ANNOTATED_TAG_SUFFIX) and
83 def iter_shas(self, shas):
84 """Iterate over the objects for the specified shas.
86 :param shas: Iterable object with SHAs
87 :return: Object iterator
89 return ObjectStoreIterator(self, shas)
91 def contains_loose(self, sha):
92 """Check if a particular object is present by SHA1 and is loose."""
93 raise NotImplementedError(self.contains_loose)
95 def contains_packed(self, sha):
96 """Check if a particular object is present by SHA1 and is packed."""
97 raise NotImplementedError(self.contains_packed)
99 def __contains__(self, sha):
100 """Check if a particular object is present by SHA1.
102 This method makes no distinction between loose and packed objects.
104 return self.contains_packed(sha) or self.contains_loose(sha)
108 """Iterable of pack objects."""
109 raise NotImplementedError
111 def get_raw(self, name):
112 """Obtain the raw text for an object.
114 :param name: sha for the object.
115 :return: tuple with numeric type and object contents.
117 raise NotImplementedError(self.get_raw)
119 def __getitem__(self, sha):
120 """Obtain an object by SHA1."""
121 type_num, uncomp = self.get_raw(sha)
122 return ShaFile.from_raw_string(type_num, uncomp, sha=sha)
125 """Iterate over the SHAs that are present in this store."""
126 raise NotImplementedError(self.__iter__)
128 def add_object(self, obj):
129 """Add a single object to this object store.
132 raise NotImplementedError(self.add_object)
134 def add_objects(self, objects, progress=None):
135 """Add a set of objects to this object store.
137 :param objects: Iterable over a list of (object, path) tuples
139 raise NotImplementedError(self.add_objects)
141 def add_pack_data(self, count, pack_data, progress=None):
142 """Add pack data to this object store.
144 :param num_items: Number of items to add
145 :param pack_data: Iterator over pack data tuples
148 # Don't bother writing an empty pack file
150 f, commit, abort = self.add_pack()
152 write_pack_data(f, count, pack_data, progress)
153 except BaseException:
159 def tree_changes(self, source, target, want_unchanged=False,
160 include_trees=False, change_type_same=False):
161 """Find the differences between the contents of two trees
163 :param source: SHA1 of the source tree
164 :param target: SHA1 of the target tree
165 :param want_unchanged: Whether unchanged files should be reported
166 :param include_trees: Whether to include trees
167 :param change_type_same: Whether to report files changing
168 type in the same entry.
169 :return: Iterator over tuples with
170 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
172 for change in tree_changes(self, source, target,
173 want_unchanged=want_unchanged,
174 include_trees=include_trees,
175 change_type_same=change_type_same):
176 yield ((change.old.path, change.new.path),
177 (change.old.mode, change.new.mode),
178 (change.old.sha, change.new.sha))
180 def iter_tree_contents(self, tree_id, include_trees=False):
181 """Iterate the contents of a tree and all subtrees.
183 Iteration is depth-first pre-order, as in e.g. os.walk.
185 :param tree_id: SHA1 of the tree.
186 :param include_trees: If True, include tree objects in the iteration.
187 :return: Iterator over TreeEntry namedtuples for all the objects in a
190 for entry, _ in walk_trees(self, tree_id, None):
191 if ((entry.mode is not None and
192 not stat.S_ISDIR(entry.mode)) or include_trees):
195 def find_missing_objects(self, haves, wants, progress=None,
197 get_parents=lambda commit: commit.parents,
199 """Find the missing objects required for a set of revisions.
201 :param haves: Iterable over SHAs already in common.
202 :param wants: Iterable over SHAs of objects to fetch.
203 :param progress: Simple progress function that will be called with
204 updated progress strings.
205 :param get_tagged: Function that returns a dict of pointed-to sha ->
206 tag sha for including tags.
207 :param get_parents: Optional function for getting the parents of a
209 :return: Iterator over (sha, path) pairs.
211 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged,
212 get_parents=get_parents)
213 return iter(finder.next, None)
215 def find_common_revisions(self, graphwalker):
216 """Find which revisions this store has in common using graphwalker.
218 :param graphwalker: A graphwalker object.
219 :return: List of SHAs that are in common
222 sha = next(graphwalker)
227 sha = next(graphwalker)
230 def generate_pack_contents(self, have, want, progress=None):
231 """Iterate over the contents of a pack file.
233 :param have: List of SHA1s of objects that should not be sent
234 :param want: List of SHA1s of objects that should be sent
235 :param progress: Optional progress reporting method
237 return self.iter_shas(self.find_missing_objects(have, want, progress))
239 def generate_pack_data(self, have, want, progress=None, ofs_delta=True):
240 """Generate pack data objects for a set of wants/haves.
242 :param have: List of SHA1s of objects that should not be sent
243 :param want: List of SHA1s of objects that should be sent
244 :param ofs_delta: Whether OFS deltas can be included
245 :param progress: Optional progress reporting method
247 # TODO(jelmer): More efficient implementation
248 return pack_objects_to_data(
249 self.generate_pack_contents(have, want, progress))
251 def peel_sha(self, sha):
252 """Peel all tags from a SHA.
254 :param sha: The object SHA to peel.
255 :return: The fully-peeled SHA1 of a tag object, after peeling all
256 intermediate tags; if the original ref does not point to a tag,
257 this will equal the original SHA1.
260 obj_class = object_class(obj.type_name)
261 while obj_class is Tag:
262 obj_class, sha = obj.object
266 def _collect_ancestors(self, heads, common=set(),
267 get_parents=lambda commit: commit.parents):
268 """Collect all ancestors of heads up to (excluding) those in common.
270 :param heads: commits to start from
271 :param common: commits to end at, or empty set to walk repository
273 :param get_parents: Optional function for getting the parents of a
275 :return: a tuple (A, B) where A - all commits reachable
276 from heads but not present in common, B - common (shared) elements
277 that are directly reachable from heads
287 elif e not in commits:
290 queue.extend(get_parents(cmt))
291 return (commits, bases)
294 """Close any files opened by this object store."""
295 # Default implementation is a NO-OP
298 class PackBasedObjectStore(BaseObjectStore):
301 self._pack_cache = {}
304 def alternates(self):
307 def contains_packed(self, sha):
308 """Check if a particular object is present by SHA1 and is packed.
310 This does not check alternates.
312 for pack in self.packs:
317 def __contains__(self, sha):
318 """Check if a particular object is present by SHA1.
320 This method makes no distinction between loose and packed objects.
322 if self.contains_packed(sha) or self.contains_loose(sha):
324 for alternate in self.alternates:
329 def _pack_cache_stale(self):
330 """Check whether the pack cache is stale."""
331 raise NotImplementedError(self._pack_cache_stale)
333 def _add_known_pack(self, base_name, pack):
334 """Add a newly appeared pack to the cache by path.
337 prev_pack = self._pack_cache.get(base_name)
338 if prev_pack is not pack:
339 self._pack_cache[base_name] = pack
343 def _flush_pack_cache(self):
344 pack_cache = self._pack_cache
345 self._pack_cache = {}
347 (name, pack) = pack_cache.popitem()
351 self._flush_pack_cache()
355 """List with pack objects."""
356 if self._pack_cache is None or self._pack_cache_stale():
357 self._update_pack_cache()
359 return self._pack_cache.values()
361 def _iter_alternate_objects(self):
362 """Iterate over the SHAs of all the objects in alternate stores."""
363 for alternate in self.alternates:
364 for alternate_object in alternate:
365 yield alternate_object
367 def _iter_loose_objects(self):
368 """Iterate over the SHAs of all loose objects."""
369 raise NotImplementedError(self._iter_loose_objects)
371 def _get_loose_object(self, sha):
372 raise NotImplementedError(self._get_loose_object)
374 def _remove_loose_object(self, sha):
375 raise NotImplementedError(self._remove_loose_object)
377 def _remove_pack(self, name):
378 raise NotImplementedError(self._remove_pack)
380 def pack_loose_objects(self):
381 """Pack loose objects.
383 :return: Number of objects packed
386 for sha in self._iter_loose_objects():
387 objects.add((self._get_loose_object(sha), None))
388 self.add_objects(list(objects))
389 for obj, path in objects:
390 self._remove_loose_object(obj.id)
394 """Repack the packs in this repository.
396 Note that this implementation is fairly naive and currently keeps all
397 objects in memory while it repacks.
399 loose_objects = set()
400 for sha in self._iter_loose_objects():
401 loose_objects.add(self._get_loose_object(sha))
402 objects = {(obj, None) for obj in loose_objects}
403 old_packs = {p.name(): p for p in self.packs}
404 for name, pack in old_packs.items():
405 objects.update((obj, None) for obj in pack.iterobjects())
406 self._flush_pack_cache()
408 # The name of the consolidated pack might match the name of a
409 # pre-existing pack. Take care not to remove the newly created
412 consolidated = self.add_objects(objects)
413 old_packs.pop(consolidated.name(), None)
415 for obj in loose_objects:
416 self._remove_loose_object(obj.id)
417 for name, pack in old_packs.items():
418 self._remove_pack(pack)
419 self._update_pack_cache()
423 """Iterate over the SHAs that are present in this store."""
424 iterables = (list(self.packs) + [self._iter_loose_objects()] +
425 [self._iter_alternate_objects()])
426 return chain(*iterables)
428 def contains_loose(self, sha):
429 """Check if a particular object is present by SHA1 and is loose.
431 This does not check alternates.
433 return self._get_loose_object(sha) is not None
435 def get_raw(self, name):
436 """Obtain the raw fulltext for an object.
438 :param name: sha for the object.
439 :return: tuple with numeric type and object contents.
442 sha = hex_to_sha(name)
444 elif len(name) == 20:
448 raise AssertionError("Invalid object name %r" % name)
449 for pack in self.packs:
451 return pack.get_raw(sha)
455 hexsha = sha_to_hex(name)
456 ret = self._get_loose_object(hexsha)
458 return ret.type_num, ret.as_raw_string()
459 for alternate in self.alternates:
461 return alternate.get_raw(hexsha)
464 raise KeyError(hexsha)
466 def add_objects(self, objects, progress=None):
467 """Add a set of objects to this object store.
469 :param objects: Iterable over (object, path) tuples, should support
471 :return: Pack object of the objects written.
473 return self.add_pack_data(
474 *pack_objects_to_data(objects),
478 class DiskObjectStore(PackBasedObjectStore):
479 """Git-style object store that exists on disk."""
481 def __init__(self, path):
482 """Open an object store.
484 :param path: Path of the object store.
486 super(DiskObjectStore, self).__init__()
488 self.pack_dir = os.path.join(self.path, PACKDIR)
489 self._pack_cache_time = 0
490 self._pack_cache = {}
491 self._alternates = None
494 return "<%s(%r)>" % (self.__class__.__name__, self.path)
497 def alternates(self):
498 if self._alternates is not None:
499 return self._alternates
500 self._alternates = []
501 for path in self._read_alternate_paths():
502 self._alternates.append(DiskObjectStore(path))
503 return self._alternates
505 def _read_alternate_paths(self):
507 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), 'rb')
508 except (OSError, IOError) as e:
509 if e.errno == errno.ENOENT:
513 for line in f.readlines():
514 line = line.rstrip(b"\n")
517 if os.path.isabs(line):
518 yield line.decode(sys.getfilesystemencoding())
520 yield os.path.join(self.path, line).decode(
521 sys.getfilesystemencoding())
523 def add_alternate_path(self, path):
524 """Add an alternate path to this object store.
527 os.mkdir(os.path.join(self.path, INFODIR))
529 if e.errno != errno.EEXIST:
531 alternates_path = os.path.join(self.path, INFODIR, "alternates")
532 with GitFile(alternates_path, 'wb') as f:
534 orig_f = open(alternates_path, 'rb')
535 except (OSError, IOError) as e:
536 if e.errno != errno.ENOENT:
540 f.write(orig_f.read())
541 f.write(path.encode(sys.getfilesystemencoding()) + b"\n")
543 if not os.path.isabs(path):
544 path = os.path.join(self.path, path)
545 self.alternates.append(DiskObjectStore(path))
547 def _update_pack_cache(self):
549 pack_dir_contents = os.listdir(self.pack_dir)
551 if e.errno == errno.ENOENT:
552 self._pack_cache_time = 0
556 self._pack_cache_time = max(
557 os.stat(self.pack_dir).st_mtime, time.time())
559 for name in pack_dir_contents:
560 if name.startswith("pack-") and name.endswith(".pack"):
561 # verify that idx exists first (otherwise the pack was not yet
563 idx_name = os.path.splitext(name)[0] + ".idx"
564 if idx_name in pack_dir_contents:
565 pack_name = name[:-len(".pack")]
566 pack_files.add(pack_name)
568 # Open newly appeared pack files
570 if f not in self._pack_cache:
571 self._pack_cache[f] = Pack(os.path.join(self.pack_dir, f))
572 # Remove disappeared pack files
573 for f in set(self._pack_cache) - pack_files:
574 self._pack_cache.pop(f).close()
576 def _pack_cache_stale(self):
578 return os.stat(self.pack_dir).st_mtime >= self._pack_cache_time
580 if e.errno == errno.ENOENT:
584 def _get_shafile_path(self, sha):
585 # Check from object dir
586 return hex_to_filename(self.path, sha)
588 def _iter_loose_objects(self):
589 for base in os.listdir(self.path):
592 for rest in os.listdir(os.path.join(self.path, base)):
593 yield (base+rest).encode(sys.getfilesystemencoding())
595 def _get_loose_object(self, sha):
596 path = self._get_shafile_path(sha)
598 return ShaFile.from_path(path)
599 except (OSError, IOError) as e:
600 if e.errno == errno.ENOENT:
604 def _remove_loose_object(self, sha):
605 os.remove(self._get_shafile_path(sha))
607 def _remove_pack(self, pack):
608 os.remove(pack.data.path)
609 os.remove(pack.index.path)
611 def _get_pack_basepath(self, entries):
612 suffix = iter_sha1(entry[0] for entry in entries)
613 # TODO: Handle self.pack_dir being bytes
614 suffix = suffix.decode('ascii')
615 return os.path.join(self.pack_dir, "pack-" + suffix)
617 def _complete_thin_pack(self, f, path, copier, indexer):
618 """Move a specific file containing a pack into the pack directory.
620 :note: The file should be on the same file system as the
623 :param f: Open file object for the pack.
624 :param path: Path to the pack file.
625 :param copier: A PackStreamCopier to use for writing pack data.
626 :param indexer: A PackIndexer for indexing the pack.
628 entries = list(indexer)
630 # Update the header with the new number of objects.
632 write_pack_header(f, len(entries) + len(indexer.ext_refs()))
634 # Must flush before reading (http://bugs.python.org/issue3207)
637 # Rescan the rest of the pack, computing the SHA with the new header.
638 new_sha = compute_file_sha(f, end_ofs=-20)
640 # Must reposition before writing (http://bugs.python.org/issue3207)
641 f.seek(0, os.SEEK_CUR)
644 for ext_sha in indexer.ext_refs():
645 assert len(ext_sha) == 20
646 type_num, data = self.get_raw(ext_sha)
648 crc32 = write_pack_object(f, type_num, data, sha=new_sha)
649 entries.append((ext_sha, offset, crc32))
650 pack_sha = new_sha.digest()
656 pack_base_name = self._get_pack_basepath(entries)
657 target_pack = pack_base_name + '.pack'
658 if sys.platform == 'win32':
659 # Windows might have the target pack file lingering. Attempt
660 # removal, silently passing if the target does not exist.
662 os.remove(target_pack)
663 except (IOError, OSError) as e:
664 if e.errno != errno.ENOENT:
666 os.rename(path, target_pack)
669 index_file = GitFile(pack_base_name + '.idx', 'wb')
671 write_pack_index_v2(index_file, entries, pack_sha)
676 # Add the pack to the store and return it.
677 final_pack = Pack(pack_base_name)
678 final_pack.check_length_and_checksum()
679 self._add_known_pack(pack_base_name, final_pack)
682 def add_thin_pack(self, read_all, read_some):
683 """Add a new thin pack to this object store.
685 Thin packs are packs that contain deltas with parents that exist
686 outside the pack. They should never be placed in the object store
687 directly, and always indexed and completed as they are copied.
689 :param read_all: Read function that blocks until the number of
690 requested bytes are read.
691 :param read_some: Read function that returns at least one byte, but may
692 not return the number of bytes requested.
693 :return: A Pack object pointing at the now-completed thin pack in the
694 objects/pack directory.
696 fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
697 with os.fdopen(fd, 'w+b') as f:
698 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
699 copier = PackStreamCopier(read_all, read_some, f,
702 return self._complete_thin_pack(f, path, copier, indexer)
704 def move_in_pack(self, path):
705 """Move a specific file containing a pack into the pack directory.
707 :note: The file should be on the same file system as the
710 :param path: Path to the pack file.
712 with PackData(path) as p:
713 entries = p.sorted_entries()
714 basename = self._get_pack_basepath(entries)
715 with GitFile(basename+".idx", "wb") as f:
716 write_pack_index_v2(f, entries, p.get_stored_checksum())
717 if self._pack_cache is None or self._pack_cache_stale():
718 self._update_pack_cache()
720 return self._pack_cache[basename]
723 target_pack = basename + '.pack'
724 if sys.platform == 'win32':
725 # Windows might have the target pack file lingering. Attempt
726 # removal, silently passing if the target does not exist.
728 os.remove(target_pack)
729 except (IOError, OSError) as e:
730 if e.errno != errno.ENOENT:
732 os.rename(path, target_pack)
733 final_pack = Pack(basename)
734 self._add_known_pack(basename, final_pack)
738 """Add a new pack to this object store.
740 :return: Fileobject to write to, a commit function to
741 call when the pack is finished and an abort
744 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
745 f = os.fdopen(fd, 'wb')
751 if os.path.getsize(path) > 0:
752 return self.move_in_pack(path)
760 return f, commit, abort
762 def add_object(self, obj):
763 """Add a single object to this object store.
765 :param obj: Object to add
767 path = self._get_shafile_path(obj.id)
768 dir = os.path.dirname(path)
772 if e.errno != errno.EEXIST:
774 if os.path.exists(path):
775 return # Already there, no need to write again
776 with GitFile(path, 'wb') as f:
777 f.write(obj.as_legacy_object())
784 if e.errno != errno.EEXIST:
786 os.mkdir(os.path.join(path, "info"))
787 os.mkdir(os.path.join(path, PACKDIR))
791 class MemoryObjectStore(BaseObjectStore):
792 """Object store that keeps all objects in memory."""
795 super(MemoryObjectStore, self).__init__()
798 def _to_hexsha(self, sha):
802 return sha_to_hex(sha)
804 raise ValueError("Invalid sha %r" % (sha,))
806 def contains_loose(self, sha):
807 """Check if a particular object is present by SHA1 and is loose."""
808 return self._to_hexsha(sha) in self._data
810 def contains_packed(self, sha):
811 """Check if a particular object is present by SHA1 and is packed."""
815 """Iterate over the SHAs that are present in this store."""
816 return iter(self._data.keys())
820 """List with pack objects."""
823 def get_raw(self, name):
824 """Obtain the raw text for an object.
826 :param name: sha for the object.
827 :return: tuple with numeric type and object contents.
829 obj = self[self._to_hexsha(name)]
830 return obj.type_num, obj.as_raw_string()
832 def __getitem__(self, name):
833 return self._data[self._to_hexsha(name)].copy()
835 def __delitem__(self, name):
836 """Delete an object from this store, for testing only."""
837 del self._data[self._to_hexsha(name)]
839 def add_object(self, obj):
840 """Add a single object to this object store.
843 self._data[obj.id] = obj.copy()
845 def add_objects(self, objects, progress=None):
846 """Add a set of objects to this object store.
848 :param objects: Iterable over a list of (object, path) tuples
850 for obj, path in objects:
854 """Add a new pack to this object store.
856 Because this object store doesn't support packs, we extract and add the
859 :return: Fileobject to write to and a commit function to
860 call when the pack is finished.
865 p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
867 for obj in PackInflater.for_pack_data(p, self.get_raw):
872 return f, commit, abort
874 def _complete_thin_pack(self, f, indexer):
875 """Complete a thin pack by adding external references.
877 :param f: Open file object for the pack.
878 :param indexer: A PackIndexer for indexing the pack.
880 entries = list(indexer)
882 # Update the header with the new number of objects.
884 write_pack_header(f, len(entries) + len(indexer.ext_refs()))
886 # Rescan the rest of the pack, computing the SHA with the new header.
887 new_sha = compute_file_sha(f, end_ofs=-20)
890 for ext_sha in indexer.ext_refs():
891 assert len(ext_sha) == 20
892 type_num, data = self.get_raw(ext_sha)
893 write_pack_object(f, type_num, data, sha=new_sha)
894 pack_sha = new_sha.digest()
897 def add_thin_pack(self, read_all, read_some):
898 """Add a new thin pack to this object store.
900 Thin packs are packs that contain deltas with parents that exist
901 outside the pack. Because this object store doesn't support packs, we
902 extract and add the individual objects.
904 :param read_all: Read function that blocks until the number of
905 requested bytes are read.
906 :param read_some: Read function that returns at least one byte, but may
907 not return the number of bytes requested.
909 f, commit, abort = self.add_pack()
911 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
912 copier = PackStreamCopier(read_all, read_some, f,
915 self._complete_thin_pack(f, indexer)
916 except BaseException:
923 class ObjectIterator(object):
924 """Interface for iterating over objects."""
926 def iterobjects(self):
927 raise NotImplementedError(self.iterobjects)
930 class ObjectStoreIterator(ObjectIterator):
931 """ObjectIterator that works on top of an ObjectStore."""
933 def __init__(self, store, sha_iter):
934 """Create a new ObjectIterator.
936 :param store: Object store to retrieve from
937 :param sha_iter: Iterator over (sha, path) tuples
940 self.sha_iter = sha_iter
944 """Yield tuple with next object and path."""
945 for sha, path in self.itershas():
946 yield self.store[sha], path
948 def iterobjects(self):
949 """Iterate over just the objects."""
954 """Iterate over the SHAs."""
955 for sha in self._shas:
957 for sha in self.sha_iter:
958 self._shas.append(sha)
961 def __contains__(self, needle):
962 """Check if an object is present.
964 :note: This checks if the object is present in
965 the underlying object store, not if it would
966 be yielded by the iterator.
968 :param needle: SHA1 of the object to check for
970 return needle in self.store
972 def __getitem__(self, key):
973 """Find an object by SHA1.
975 :note: This retrieves the object from the underlying
976 object store. It will also succeed if the object would
977 not be returned by the iterator.
979 return self.store[key]
982 """Return the number of objects."""
983 return len(list(self.itershas()))
986 iter = self.itershas()
989 except StopIteration:
995 """Indicate whether this object has contents."""
996 return not self.empty()
999 def tree_lookup_path(lookup_obj, root_sha, path):
1000 """Look up an object in a Git tree.
1002 :param lookup_obj: Callback for retrieving object by SHA1
1003 :param root_sha: SHA1 of the root tree
1004 :param path: Path to lookup
1005 :return: A tuple of (mode, SHA) of the resulting path.
1007 tree = lookup_obj(root_sha)
1008 if not isinstance(tree, Tree):
1009 raise NotTreeError(root_sha)
1010 return tree.lookup_path(lookup_obj, path)
1013 def _collect_filetree_revs(obj_store, tree_sha, kset):
1014 """Collect SHA1s of files and directories for specified tree.
1016 :param obj_store: Object store to get objects by SHA from
1017 :param tree_sha: tree reference to walk
1018 :param kset: set to fill with references to files and directories
1020 filetree = obj_store[tree_sha]
1021 for name, mode, sha in filetree.iteritems():
1022 if not S_ISGITLINK(mode) and sha not in kset:
1024 if stat.S_ISDIR(mode):
1025 _collect_filetree_revs(obj_store, sha, kset)
1028 def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
1029 """Split object id list into three lists with commit, tag, and other SHAs.
1031 Commits referenced by tags are included into commits
1032 list as well. Only SHA1s known in this repository will get
1033 through, and unless ignore_unknown argument is True, KeyError
1034 is thrown for SHA1 missing in the repository
1036 :param obj_store: Object store to get objects by SHA1 from
1037 :param lst: Collection of commit and tag SHAs
1038 :param ignore_unknown: True to skip SHA1 missing in the repository
1040 :return: A tuple of (commits, tags, others) SHA1s
1049 if not ignore_unknown:
1052 if isinstance(o, Commit):
1054 elif isinstance(o, Tag):
1056 tagged = o.object[1]
1057 c, t, o = _split_commits_and_tags(
1058 obj_store, [tagged], ignore_unknown=ignore_unknown)
1064 return (commits, tags, others)
1067 class MissingObjectFinder(object):
1068 """Find the objects missing from another object store.
1070 :param object_store: Object store containing at least all objects to be
1072 :param haves: SHA1s of commits not to send (already present in target)
1073 :param wants: SHA1s of commits to send
1074 :param progress: Optional function to report progress to.
1075 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
1076 sha for including tags.
1077 :param get_parents: Optional function for getting the parents of a commit.
1078 :param tagged: dict of pointed-to sha -> tag sha for including tags
1081 def __init__(self, object_store, haves, wants, progress=None,
1082 get_tagged=None, get_parents=lambda commit: commit.parents):
1083 self.object_store = object_store
1084 self._get_parents = get_parents
1085 # process Commits and Tags differently
1086 # Note, while haves may list commits/tags not available locally,
1087 # and such SHAs would get filtered out by _split_commits_and_tags,
1088 # wants shall list only known SHAs, and otherwise
1089 # _split_commits_and_tags fails with KeyError
1090 have_commits, have_tags, have_others = (
1091 _split_commits_and_tags(object_store, haves, True))
1092 want_commits, want_tags, want_others = (
1093 _split_commits_and_tags(object_store, wants, False))
1094 # all_ancestors is a set of commits that shall not be sent
1095 # (complete repository up to 'haves')
1096 all_ancestors = object_store._collect_ancestors(
1097 have_commits, get_parents=self._get_parents)[0]
1098 # all_missing - complete set of commits between haves and wants
1099 # common - commits from all_ancestors we hit into while
1100 # traversing parent hierarchy of wants
1101 missing_commits, common_commits = object_store._collect_ancestors(
1102 want_commits, all_ancestors, get_parents=self._get_parents)
1103 self.sha_done = set()
1104 # Now, fill sha_done with commits and revisions of
1105 # files and directories known to be both locally
1106 # and on target. Thus these commits and files
1107 # won't get selected for fetch
1108 for h in common_commits:
1109 self.sha_done.add(h)
1110 cmt = object_store[h]
1111 _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
1112 # record tags we have as visited, too
1114 self.sha_done.add(t)
1116 missing_tags = want_tags.difference(have_tags)
1117 missing_others = want_others.difference(have_others)
1118 # in fact, what we 'want' is commits, tags, and others
1119 # we've found missing
1120 wants = missing_commits.union(missing_tags)
1121 wants = wants.union(missing_others)
1123 self.objects_to_send = set([(w, None, False) for w in wants])
1125 if progress is None:
1126 self.progress = lambda x: None
1128 self.progress = progress
1129 self._tagged = get_tagged and get_tagged() or {}
1131 def add_todo(self, entries):
1132 self.objects_to_send.update([e for e in entries
1133 if not e[0] in self.sha_done])
1137 if not self.objects_to_send:
1139 (sha, name, leaf) = self.objects_to_send.pop()
1140 if sha not in self.sha_done:
1143 o = self.object_store[sha]
1144 if isinstance(o, Commit):
1145 self.add_todo([(o.tree, "", False)])
1146 elif isinstance(o, Tree):
1147 self.add_todo([(s, n, not stat.S_ISDIR(m))
1148 for n, m, s in o.iteritems()
1149 if not S_ISGITLINK(m)])
1150 elif isinstance(o, Tag):
1151 self.add_todo([(o.object[1], None, False)])
1152 if sha in self._tagged:
1153 self.add_todo([(self._tagged[sha], None, True)])
1154 self.sha_done.add(sha)
1155 self.progress(("counting objects: %d\r" %
1156 len(self.sha_done)).encode('ascii'))
1162 class ObjectStoreGraphWalker(object):
1163 """Graph walker that finds what commits are missing from an object store.
1165 :ivar heads: Revisions without descendants in the local repo
1166 :ivar get_parents: Function to retrieve parents in the local repo
1169 def __init__(self, local_heads, get_parents, shallow=None):
1170 """Create a new instance.
1172 :param local_heads: Heads to start search with
1173 :param get_parents: Function for finding the parents of a SHA1.
1175 self.heads = set(local_heads)
1176 self.get_parents = get_parents
1180 self.shallow = shallow
1183 """Ack that a revision and its ancestors are present in the source."""
1185 raise ValueError("unexpected sha %r received" % sha)
1186 ancestors = set([sha])
1188 # stop if we run out of heads to remove
1192 self.heads.remove(a)
1194 # collect all ancestors
1195 new_ancestors = set()
1197 ps = self.parents.get(a)
1199 new_ancestors.update(ps)
1200 self.parents[a] = None
1202 # no more ancestors; stop
1203 if not new_ancestors:
1206 ancestors = new_ancestors
1209 """Iterate over ancestors of heads in the target."""
1211 ret = self.heads.pop()
1212 ps = self.get_parents(ret)
1213 self.parents[ret] = ps
1215 [p for p in ps if p not in self.parents])
1222 def commit_tree_changes(object_store, tree, changes):
1223 """Commit a specified set of changes to a tree structure.
1225 This will apply a set of changes on top of an existing tree, storing new
1226 objects in object_store.
1228 changes are a list of tuples with (path, mode, object_sha).
1229 Paths can be both blobs and trees. See the mode and
1230 object sha to None deletes the path.
1232 This method works especially well if there are only a small
1233 number of changes to a big tree. For a large number of changes
1234 to a large tree, use e.g. commit_tree.
1236 :param object_store: Object store to store new objects in
1237 and retrieve old ones from.
1238 :param tree: Original tree root
1239 :param changes: changes to apply
1240 :return: New tree root object
1242 # TODO(jelmer): Save up the objects and add them using .add_objects
1243 # rather than with individual calls to .add_object.
1245 for (path, new_mode, new_sha) in changes:
1247 (dirname, subpath) = path.split(b'/', 1)
1252 tree[path] = (new_mode, new_sha)
1254 nested_changes.setdefault(dirname, []).append(
1255 (subpath, new_mode, new_sha))
1256 for name, subchanges in nested_changes.items():
1258 orig_subtree = object_store[tree[name][1]]
1260 orig_subtree = Tree()
1261 subtree = commit_tree_changes(object_store, orig_subtree, subchanges)
1262 if len(subtree) == 0:
1265 tree[name] = (stat.S_IFDIR, subtree.id)
1266 object_store.add_object(tree)
1270 class OverlayObjectStore(BaseObjectStore):
1271 """Object store that can overlay multiple object stores."""
1273 def __init__(self, bases, add_store=None):
1275 self.add_store = add_store
1277 def add_object(self, object):
1278 if self.add_store is None:
1279 raise NotImplementedError(self.add_object)
1280 return self.add_store.add_object(object)
1282 def add_objects(self, objects, progress=None):
1283 if self.add_store is None:
1284 raise NotImplementedError(self.add_object)
1285 return self.add_store.add_objects(objects, progress)
1290 for b in self.bases:
1296 for b in self.bases:
1298 if o_id not in done:
1302 def get_raw(self, sha_id):
1303 for b in self.bases:
1305 return b.get_raw(sha_id)
1309 raise KeyError(sha_id)
1311 def contains_packed(self, sha):
1312 for b in self.bases:
1313 if b.contains_packed(sha):
1318 def contains_loose(self, sha):
1319 for b in self.bases:
1320 if b.contains_loose(sha):
1326 def read_packs_file(f):
1327 """Yield the packs listed in a packs file."""
1328 for line in f.read().splitlines():
1331 (kind, name) = line.split(b" ", 1)
1334 yield name.decode(sys.getfilesystemencoding())