1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # or (at your option) a later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git object store interfaces and implementation."""
29 from dulwich.diff_tree import (
33 from dulwich.errors import (
36 from dulwich.file import GitFile
37 from dulwich.objects import (
49 from dulwich.pack import (
66 class BaseObjectStore(object):
67 """Object store interface."""
69 def determine_wants_all(self, refs):
70 return [sha for (ref, sha) in refs.iteritems()
71 if not sha in self and not ref.endswith("^{}") and
74 def iter_shas(self, shas):
75 """Iterate over the objects for the specified shas.
77 :param shas: Iterable object with SHAs
78 :return: Object iterator
80 return ObjectStoreIterator(self, shas)
82 def contains_loose(self, sha):
83 """Check if a particular object is present by SHA1 and is loose."""
84 raise NotImplementedError(self.contains_loose)
86 def contains_packed(self, sha):
87 """Check if a particular object is present by SHA1 and is packed."""
88 raise NotImplementedError(self.contains_packed)
90 def __contains__(self, sha):
91 """Check if a particular object is present by SHA1.
93 This method makes no distinction between loose and packed objects.
95 return self.contains_packed(sha) or self.contains_loose(sha)
99 """Iterable of pack objects."""
100 raise NotImplementedError
102 def get_raw(self, name):
103 """Obtain the raw text for an object.
105 :param name: sha for the object.
106 :return: tuple with numeric type and object contents.
108 raise NotImplementedError(self.get_raw)
110 def __getitem__(self, sha):
111 """Obtain an object by SHA1."""
112 type_num, uncomp = self.get_raw(sha)
113 return ShaFile.from_raw_string(type_num, uncomp)
116 """Iterate over the SHAs that are present in this store."""
117 raise NotImplementedError(self.__iter__)
119 def add_object(self, obj):
120 """Add a single object to this object store.
123 raise NotImplementedError(self.add_object)
125 def add_objects(self, objects):
126 """Add a set of objects to this object store.
128 :param objects: Iterable over a list of objects.
130 raise NotImplementedError(self.add_objects)
132 def tree_changes(self, source, target, want_unchanged=False):
133 """Find the differences between the contents of two trees
135 :param source: SHA1 of the source tree
136 :param target: SHA1 of the target tree
137 :param want_unchanged: Whether unchanged files should be reported
138 :return: Iterator over tuples with
139 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
141 for change in tree_changes(self, source, target,
142 want_unchanged=want_unchanged):
143 yield ((change.old.path, change.new.path),
144 (change.old.mode, change.new.mode),
145 (change.old.sha, change.new.sha))
147 def iter_tree_contents(self, tree_id, include_trees=False):
148 """Iterate the contents of a tree and all subtrees.
150 Iteration is depth-first pre-order, as in e.g. os.walk.
152 :param tree_id: SHA1 of the tree.
153 :param include_trees: If True, include tree objects in the iteration.
154 :return: Iterator over TreeEntry namedtuples for all the objects in a
157 for entry, _ in walk_trees(self, tree_id, None):
158 if not stat.S_ISDIR(entry.mode) or include_trees:
161 def find_missing_objects(self, haves, wants, progress=None,
163 """Find the missing objects required for a set of revisions.
165 :param haves: Iterable over SHAs already in common.
166 :param wants: Iterable over SHAs of objects to fetch.
167 :param progress: Simple progress function that will be called with
168 updated progress strings.
169 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
170 sha for including tags.
171 :return: Iterator over (sha, path) pairs.
173 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged)
174 return iter(finder.next, None)
176 def find_common_revisions(self, graphwalker):
177 """Find which revisions this store has in common using graphwalker.
179 :param graphwalker: A graphwalker object.
180 :return: List of SHAs that are in common
183 sha = graphwalker.next()
188 sha = graphwalker.next()
191 def get_graph_walker(self, heads):
192 """Obtain a graph walker for this object store.
194 :param heads: Local heads to start search with
195 :return: GraphWalker object
197 return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
199 def generate_pack_contents(self, have, want, progress=None):
200 """Iterate over the contents of a pack file.
202 :param have: List of SHA1s of objects that should not be sent
203 :param want: List of SHA1s of objects that should be sent
204 :param progress: Optional progress reporting method
206 return self.iter_shas(self.find_missing_objects(have, want, progress))
208 def peel_sha(self, sha):
209 """Peel all tags from a SHA.
211 :param sha: The object SHA to peel.
212 :return: The fully-peeled SHA1 of a tag object, after peeling all
213 intermediate tags; if the original ref does not point to a tag, this
214 will equal the original SHA1.
217 obj_class = object_class(obj.type_name)
218 while obj_class is Tag:
219 obj_class, sha = obj.object
224 class PackBasedObjectStore(BaseObjectStore):
227 self._pack_cache = None
230 def alternates(self):
233 def contains_packed(self, sha):
234 """Check if a particular object is present by SHA1 and is packed."""
235 for pack in self.packs:
240 def _load_packs(self):
241 raise NotImplementedError(self._load_packs)
243 def _pack_cache_stale(self):
244 """Check whether the pack cache is stale."""
245 raise NotImplementedError(self._pack_cache_stale)
247 def _add_known_pack(self, pack):
248 """Add a newly appeared pack to the cache by path.
251 if self._pack_cache is not None:
252 self._pack_cache.append(pack)
256 """List with pack objects."""
257 if self._pack_cache is None or self._pack_cache_stale():
258 self._pack_cache = self._load_packs()
259 return self._pack_cache
261 def _iter_loose_objects(self):
262 """Iterate over the SHAs of all loose objects."""
263 raise NotImplementedError(self._iter_loose_objects)
265 def _get_loose_object(self, sha):
266 raise NotImplementedError(self._get_loose_object)
268 def _remove_loose_object(self, sha):
269 raise NotImplementedError(self._remove_loose_object)
271 def pack_loose_objects(self):
272 """Pack loose objects.
274 :return: Number of objects packed
277 for sha in self._iter_loose_objects():
278 objects.add((self._get_loose_object(sha), None))
279 self.add_objects(list(objects))
280 for obj, path in objects:
281 self._remove_loose_object(obj.id)
285 """Iterate over the SHAs that are present in this store."""
286 iterables = self.packs + [self._iter_loose_objects()]
287 return itertools.chain(*iterables)
289 def contains_loose(self, sha):
290 """Check if a particular object is present by SHA1 and is loose."""
291 return self._get_loose_object(sha) is not None
293 def get_raw(self, name):
294 """Obtain the raw text for an object.
296 :param name: sha for the object.
297 :return: tuple with numeric type and object contents.
300 sha = hex_to_sha(name)
302 elif len(name) == 20:
306 raise AssertionError("Invalid object name %r" % name)
307 for pack in self.packs:
309 return pack.get_raw(sha)
313 hexsha = sha_to_hex(name)
314 ret = self._get_loose_object(hexsha)
316 return ret.type_num, ret.as_raw_string()
317 for alternate in self.alternates:
319 return alternate.get_raw(hexsha)
322 raise KeyError(hexsha)
324 def add_objects(self, objects):
325 """Add a set of objects to this object store.
327 :param objects: Iterable over objects, should support __len__.
328 :return: Pack object of the objects written.
330 if len(objects) == 0:
331 # Don't bother writing an empty pack file
333 f, commit = self.add_pack()
334 write_pack_objects(f, objects)
338 class DiskObjectStore(PackBasedObjectStore):
339 """Git-style object store that exists on disk."""
341 def __init__(self, path):
342 """Open an object store.
344 :param path: Path of the object store.
346 super(DiskObjectStore, self).__init__()
348 self.pack_dir = os.path.join(self.path, PACKDIR)
349 self._pack_cache_time = 0
350 self._alternates = None
353 def alternates(self):
354 if self._alternates is not None:
355 return self._alternates
356 self._alternates = []
357 for path in self._read_alternate_paths():
358 self._alternates.append(DiskObjectStore(path))
359 return self._alternates
361 def _read_alternate_paths(self):
363 f = GitFile(os.path.join(self.path, "info", "alternates"),
365 except (OSError, IOError), e:
366 if e.errno == errno.ENOENT:
371 for l in f.readlines():
375 if not os.path.isabs(l):
382 def add_alternate_path(self, path):
383 """Add an alternate path to this object store.
386 os.mkdir(os.path.join(self.path, "info"))
388 if e.errno != errno.EEXIST:
390 alternates_path = os.path.join(self.path, "info/alternates")
391 f = GitFile(alternates_path, 'wb')
394 orig_f = open(alternates_path, 'rb')
395 except (OSError, IOError), e:
396 if e.errno != errno.ENOENT:
400 f.write(orig_f.read())
403 f.write("%s\n" % path)
406 self.alternates.append(DiskObjectStore(path))
408 def _load_packs(self):
411 self._pack_cache_time = os.stat(self.pack_dir).st_mtime
412 pack_dir_contents = os.listdir(self.pack_dir)
413 for name in pack_dir_contents:
414 # TODO: verify that idx exists first
415 if name.startswith("pack-") and name.endswith(".pack"):
416 filename = os.path.join(self.pack_dir, name)
417 pack_files.append((os.stat(filename).st_mtime, filename))
419 if e.errno == errno.ENOENT:
422 pack_files.sort(reverse=True)
423 suffix_len = len(".pack")
424 return [Pack(f[:-suffix_len]) for _, f in pack_files]
426 def _pack_cache_stale(self):
428 return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
430 if e.errno == errno.ENOENT:
434 def _get_shafile_path(self, sha):
435 # Check from object dir
436 return hex_to_filename(self.path, sha)
438 def _iter_loose_objects(self):
439 for base in os.listdir(self.path):
442 for rest in os.listdir(os.path.join(self.path, base)):
445 def _get_loose_object(self, sha):
446 path = self._get_shafile_path(sha)
448 return ShaFile.from_path(path)
449 except (OSError, IOError), e:
450 if e.errno == errno.ENOENT:
454 def _remove_loose_object(self, sha):
455 os.remove(self._get_shafile_path(sha))
457 def _complete_thin_pack(self, f, path, copier, indexer):
458 """Move a specific file containing a pack into the pack directory.
460 :note: The file should be on the same file system as the
463 :param f: Open file object for the pack.
464 :param path: Path to the pack file.
465 :param copier: A PackStreamCopier to use for writing pack data.
466 :param indexer: A PackIndexer for indexing the pack.
468 entries = list(indexer)
470 # Update the header with the new number of objects.
472 write_pack_header(f, len(entries) + len(indexer.ext_refs()))
474 # Must flush before reading (http://bugs.python.org/issue3207)
477 # Rescan the rest of the pack, computing the SHA with the new header.
478 new_sha = compute_file_sha(f, end_ofs=-20)
480 # Must reposition before writing (http://bugs.python.org/issue3207)
481 f.seek(0, os.SEEK_CUR)
484 for ext_sha in indexer.ext_refs():
485 assert len(ext_sha) == 20
486 type_num, data = self.get_raw(ext_sha)
488 crc32 = write_pack_object(f, type_num, data, sha=new_sha)
489 entries.append((ext_sha, offset, crc32))
490 pack_sha = new_sha.digest()
496 pack_base_name = os.path.join(
497 self.pack_dir, 'pack-' + iter_sha1(e[0] for e in entries))
498 os.rename(path, pack_base_name + '.pack')
501 index_file = GitFile(pack_base_name + '.idx', 'wb')
503 write_pack_index_v2(index_file, entries, pack_sha)
508 # Add the pack to the store and return it.
509 final_pack = Pack(pack_base_name)
510 final_pack.check_length_and_checksum()
511 self._add_known_pack(final_pack)
514 def add_thin_pack(self, read_all, read_some):
515 """Add a new thin pack to this object store.
517 Thin packs are packs that contain deltas with parents that exist outside
518 the pack. They should never be placed in the object store directly, and
519 always indexed and completed as they are copied.
521 :param read_all: Read function that blocks until the number of requested
523 :param read_some: Read function that returns at least one byte, but may
524 not return the number of bytes requested.
525 :return: A Pack object pointing at the now-completed thin pack in the
526 objects/pack directory.
528 fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
529 f = os.fdopen(fd, 'w+b')
532 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
533 copier = PackStreamCopier(read_all, read_some, f,
536 return self._complete_thin_pack(f, path, copier, indexer)
540 def move_in_pack(self, path):
541 """Move a specific file containing a pack into the pack directory.
543 :note: The file should be on the same file system as the
546 :param path: Path to the pack file.
549 entries = p.sorted_entries()
550 basename = os.path.join(self.pack_dir,
551 "pack-%s" % iter_sha1(entry[0] for entry in entries))
552 f = GitFile(basename+".idx", "wb")
554 write_pack_index_v2(f, entries, p.get_stored_checksum())
558 os.rename(path, basename + ".pack")
559 final_pack = Pack(basename)
560 self._add_known_pack(final_pack)
564 """Add a new pack to this object store.
566 :return: Fileobject to write to and a commit function to
567 call when the pack is finished.
569 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
570 f = os.fdopen(fd, 'wb')
574 if os.path.getsize(path) > 0:
575 return self.move_in_pack(path)
581 def add_object(self, obj):
582 """Add a single object to this object store.
584 :param obj: Object to add
586 dir = os.path.join(self.path, obj.id[:2])
590 if e.errno != errno.EEXIST:
592 path = os.path.join(dir, obj.id[2:])
593 if os.path.exists(path):
594 return # Already there, no need to write again
595 f = GitFile(path, 'wb')
597 f.write(obj.as_legacy_object())
606 if e.errno != errno.EEXIST:
608 os.mkdir(os.path.join(path, "info"))
609 os.mkdir(os.path.join(path, PACKDIR))
613 class MemoryObjectStore(BaseObjectStore):
614 """Object store that keeps all objects in memory."""
617 super(MemoryObjectStore, self).__init__()
620 def _to_hexsha(self, sha):
624 return sha_to_hex(sha)
626 raise ValueError("Invalid sha %r" % sha)
628 def contains_loose(self, sha):
629 """Check if a particular object is present by SHA1 and is loose."""
630 return self._to_hexsha(sha) in self._data
632 def contains_packed(self, sha):
633 """Check if a particular object is present by SHA1 and is packed."""
637 """Iterate over the SHAs that are present in this store."""
638 return self._data.iterkeys()
642 """List with pack objects."""
645 def get_raw(self, name):
646 """Obtain the raw text for an object.
648 :param name: sha for the object.
649 :return: tuple with numeric type and object contents.
651 obj = self[self._to_hexsha(name)]
652 return obj.type_num, obj.as_raw_string()
654 def __getitem__(self, name):
655 return self._data[self._to_hexsha(name)]
657 def __delitem__(self, name):
658 """Delete an object from this store, for testing only."""
659 del self._data[self._to_hexsha(name)]
661 def add_object(self, obj):
662 """Add a single object to this object store.
665 self._data[obj.id] = obj
667 def add_objects(self, objects):
668 """Add a set of objects to this object store.
670 :param objects: Iterable over a list of objects.
672 for obj, path in objects:
673 self._data[obj.id] = obj
676 class ObjectImporter(object):
677 """Interface for importing objects."""
679 def __init__(self, count):
680 """Create a new ObjectImporter.
682 :param count: Number of objects that's going to be imported.
686 def add_object(self, object):
688 raise NotImplementedError(self.add_object)
690 def finish(self, object):
691 """Finish the import and write objects to disk."""
692 raise NotImplementedError(self.finish)
695 class ObjectIterator(object):
696 """Interface for iterating over objects."""
698 def iterobjects(self):
699 raise NotImplementedError(self.iterobjects)
702 class ObjectStoreIterator(ObjectIterator):
703 """ObjectIterator that works on top of an ObjectStore."""
705 def __init__(self, store, sha_iter):
706 """Create a new ObjectIterator.
708 :param store: Object store to retrieve from
709 :param sha_iter: Iterator over (sha, path) tuples
712 self.sha_iter = sha_iter
716 """Yield tuple with next object and path."""
717 for sha, path in self.itershas():
718 yield self.store[sha], path
720 def iterobjects(self):
721 """Iterate over just the objects."""
726 """Iterate over the SHAs."""
727 for sha in self._shas:
729 for sha in self.sha_iter:
730 self._shas.append(sha)
733 def __contains__(self, needle):
734 """Check if an object is present.
736 :note: This checks if the object is present in
737 the underlying object store, not if it would
738 be yielded by the iterator.
740 :param needle: SHA1 of the object to check for
742 return needle in self.store
744 def __getitem__(self, key):
745 """Find an object by SHA1.
747 :note: This retrieves the object from the underlying
748 object store. It will also succeed if the object would
749 not be returned by the iterator.
751 return self.store[key]
754 """Return the number of objects."""
755 return len(list(self.itershas()))
758 def tree_lookup_path(lookup_obj, root_sha, path):
759 """Look up an object in a Git tree.
761 :param lookup_obj: Callback for retrieving object by SHA1
762 :param root_sha: SHA1 of the root tree
763 :param path: Path to lookup
764 :return: A tuple of (mode, SHA) of the resulting path.
766 tree = lookup_obj(root_sha)
767 if not isinstance(tree, Tree):
768 raise NotTreeError(root_sha)
769 return tree.lookup_path(lookup_obj, path)
772 class MissingObjectFinder(object):
773 """Find the objects missing from another object store.
775 :param object_store: Object store containing at least all objects to be
777 :param haves: SHA1s of commits not to send (already present in target)
778 :param wants: SHA1s of commits to send
779 :param progress: Optional function to report progress to.
780 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
781 sha for including tags.
782 :param tagged: dict of pointed-to sha -> tag sha for including tags
785 def __init__(self, object_store, haves, wants, progress=None,
788 self.sha_done = haves
789 self.objects_to_send = set([(w, None, False) for w in wants
791 self.object_store = object_store
793 self.progress = lambda x: None
795 self.progress = progress
796 self._tagged = get_tagged and get_tagged() or {}
798 def add_todo(self, entries):
799 self.objects_to_send.update([e for e in entries
800 if not e[0] in self.sha_done])
802 def parse_tree(self, tree):
803 self.add_todo([(sha, name, not stat.S_ISDIR(mode))
804 for name, mode, sha in tree.iteritems()
805 if not S_ISGITLINK(mode)])
807 def parse_commit(self, commit):
808 self.add_todo([(commit.tree, "", False)])
809 self.add_todo([(p, None, False) for p in commit.parents])
811 def parse_tag(self, tag):
812 self.add_todo([(tag.object[1], None, False)])
816 if not self.objects_to_send:
818 (sha, name, leaf) = self.objects_to_send.pop()
819 if sha not in self.sha_done:
822 o = self.object_store[sha]
823 if isinstance(o, Commit):
825 elif isinstance(o, Tree):
827 elif isinstance(o, Tag):
829 if sha in self._tagged:
830 self.add_todo([(self._tagged[sha], None, True)])
831 self.sha_done.add(sha)
832 self.progress("counting objects: %d\r" % len(self.sha_done))
836 class ObjectStoreGraphWalker(object):
837 """Graph walker that finds what commits are missing from an object store.
839 :ivar heads: Revisions without descendants in the local repo
840 :ivar get_parents: Function to retrieve parents in the local repo
843 def __init__(self, local_heads, get_parents):
844 """Create a new instance.
846 :param local_heads: Heads to start search with
847 :param get_parents: Function for finding the parents of a SHA1.
849 self.heads = set(local_heads)
850 self.get_parents = get_parents
854 """Ack that a revision and its ancestors are present in the source."""
855 ancestors = set([sha])
857 # stop if we run out of heads to remove
863 # collect all ancestors
864 new_ancestors = set()
866 ps = self.parents.get(a)
868 new_ancestors.update(ps)
869 self.parents[a] = None
871 # no more ancestors; stop
872 if not new_ancestors:
875 ancestors = new_ancestors
878 """Iterate over ancestors of heads in the target."""
880 ret = self.heads.pop()
881 ps = self.get_parents(ret)
882 self.parents[ret] = ps
883 self.heads.update([p for p in ps if not p in self.parents])