1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # or (at your option) a later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git object store interfaces and implementation."""
29 from dulwich.diff_tree import (
33 from dulwich.errors import (
36 from dulwich.file import GitFile
37 from dulwich.objects import (
49 from dulwich.pack import (
66 class BaseObjectStore(object):
67 """Object store interface."""
69 def determine_wants_all(self, refs):
70 return [sha for (ref, sha) in refs.iteritems()
71 if not sha in self and not ref.endswith("^{}") and
74 def iter_shas(self, shas):
75 """Iterate over the objects for the specified shas.
77 :param shas: Iterable object with SHAs
78 :return: Object iterator
80 return ObjectStoreIterator(self, shas)
82 def contains_loose(self, sha):
83 """Check if a particular object is present by SHA1 and is loose."""
84 raise NotImplementedError(self.contains_loose)
86 def contains_packed(self, sha):
87 """Check if a particular object is present by SHA1 and is packed."""
88 raise NotImplementedError(self.contains_packed)
90 def __contains__(self, sha):
91 """Check if a particular object is present by SHA1.
93 This method makes no distinction between loose and packed objects.
95 return self.contains_packed(sha) or self.contains_loose(sha)
99 """Iterable of pack objects."""
100 raise NotImplementedError
102 def get_raw(self, name):
103 """Obtain the raw text for an object.
105 :param name: sha for the object.
106 :return: tuple with numeric type and object contents.
108 raise NotImplementedError(self.get_raw)
110 def __getitem__(self, sha):
111 """Obtain an object by SHA1."""
112 type_num, uncomp = self.get_raw(sha)
113 return ShaFile.from_raw_string(type_num, uncomp)
116 """Iterate over the SHAs that are present in this store."""
117 raise NotImplementedError(self.__iter__)
119 def add_object(self, obj):
120 """Add a single object to this object store.
123 raise NotImplementedError(self.add_object)
125 def add_objects(self, objects):
126 """Add a set of objects to this object store.
128 :param objects: Iterable over a list of objects.
130 raise NotImplementedError(self.add_objects)
132 def tree_changes(self, source, target, want_unchanged=False):
133 """Find the differences between the contents of two trees
135 :param source: SHA1 of the source tree
136 :param target: SHA1 of the target tree
137 :param want_unchanged: Whether unchanged files should be reported
138 :return: Iterator over tuples with
139 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
141 for change in tree_changes(self, source, target,
142 want_unchanged=want_unchanged):
143 yield ((change.old.path, change.new.path),
144 (change.old.mode, change.new.mode),
145 (change.old.sha, change.new.sha))
147 def iter_tree_contents(self, tree_id, include_trees=False):
148 """Iterate the contents of a tree and all subtrees.
150 Iteration is depth-first pre-order, as in e.g. os.walk.
152 :param tree_id: SHA1 of the tree.
153 :param include_trees: If True, include tree objects in the iteration.
154 :return: Iterator over TreeEntry namedtuples for all the objects in a
157 for entry, _ in walk_trees(self, tree_id, None):
158 if not stat.S_ISDIR(entry.mode) or include_trees:
161 def find_missing_objects(self, haves, wants, progress=None,
163 """Find the missing objects required for a set of revisions.
165 :param haves: Iterable over SHAs already in common.
166 :param wants: Iterable over SHAs of objects to fetch.
167 :param progress: Simple progress function that will be called with
168 updated progress strings.
169 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
170 sha for including tags.
171 :return: Iterator over (sha, path) pairs.
173 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged)
174 return iter(finder.next, None)
176 def find_common_revisions(self, graphwalker):
177 """Find which revisions this store has in common using graphwalker.
179 :param graphwalker: A graphwalker object.
180 :return: List of SHAs that are in common
183 sha = graphwalker.next()
188 sha = graphwalker.next()
191 def get_graph_walker(self, heads):
192 """Obtain a graph walker for this object store.
194 :param heads: Local heads to start search with
195 :return: GraphWalker object
197 return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
199 def generate_pack_contents(self, have, want, progress=None):
200 """Iterate over the contents of a pack file.
202 :param have: List of SHA1s of objects that should not be sent
203 :param want: List of SHA1s of objects that should be sent
204 :param progress: Optional progress reporting method
206 return self.iter_shas(self.find_missing_objects(have, want, progress))
208 def peel_sha(self, sha):
209 """Peel all tags from a SHA.
211 :param sha: The object SHA to peel.
212 :return: The fully-peeled SHA1 of a tag object, after peeling all
213 intermediate tags; if the original ref does not point to a tag, this
214 will equal the original SHA1.
217 obj_class = object_class(obj.type_name)
218 while obj_class is Tag:
219 obj_class, sha = obj.object
224 class PackBasedObjectStore(BaseObjectStore):
227 self._pack_cache = None
230 def alternates(self):
233 def contains_packed(self, sha):
234 """Check if a particular object is present by SHA1 and is packed."""
235 for pack in self.packs:
240 def __contains__(self, sha):
241 """Check if a particular object is present by SHA1 in the main or alternate stores.
243 This method makes no distinction between loose and packed objects.
245 if self.contains_packed(sha) or self.contains_loose(sha):
247 for alternate in self.alternates:
248 if alternate.contains_packed(sha) or alternate.contains_loose(sha):
252 def _load_packs(self):
253 raise NotImplementedError(self._load_packs)
255 def _pack_cache_stale(self):
256 """Check whether the pack cache is stale."""
257 raise NotImplementedError(self._pack_cache_stale)
259 def _add_known_pack(self, pack):
260 """Add a newly appeared pack to the cache by path.
263 if self._pack_cache is not None:
264 self._pack_cache.append(pack)
268 """List with pack objects."""
269 if self._pack_cache is None or self._pack_cache_stale():
270 self._pack_cache = self._load_packs()
271 return self._pack_cache
273 def _iter_alternate_objects(self):
274 """Iterate over the SHAs of all the objects in alternate stores."""
275 for alternate in self.alternates:
276 for alternate_object in alternate:
277 yield alternate_object
279 def _iter_loose_objects(self):
280 """Iterate over the SHAs of all loose objects."""
281 raise NotImplementedError(self._iter_loose_objects)
283 def _get_loose_object(self, sha):
284 raise NotImplementedError(self._get_loose_object)
286 def _remove_loose_object(self, sha):
287 raise NotImplementedError(self._remove_loose_object)
289 def pack_loose_objects(self):
290 """Pack loose objects.
292 :return: Number of objects packed
295 for sha in self._iter_loose_objects():
296 objects.add((self._get_loose_object(sha), None))
297 self.add_objects(list(objects))
298 for obj, path in objects:
299 self._remove_loose_object(obj.id)
303 """Iterate over the SHAs that are present in this store."""
304 iterables = self.packs + [self._iter_loose_objects()] + [self._iter_alternate_objects()]
305 return itertools.chain(*iterables)
307 def contains_loose(self, sha):
308 """Check if a particular object is present by SHA1 and is loose."""
309 return self._get_loose_object(sha) is not None
311 def get_raw(self, name):
312 """Obtain the raw text for an object.
314 :param name: sha for the object.
315 :return: tuple with numeric type and object contents.
318 sha = hex_to_sha(name)
320 elif len(name) == 20:
324 raise AssertionError("Invalid object name %r" % name)
325 for pack in self.packs:
327 return pack.get_raw(sha)
331 hexsha = sha_to_hex(name)
332 ret = self._get_loose_object(hexsha)
334 return ret.type_num, ret.as_raw_string()
335 for alternate in self.alternates:
337 return alternate.get_raw(hexsha)
340 raise KeyError(hexsha)
342 def add_objects(self, objects):
343 """Add a set of objects to this object store.
345 :param objects: Iterable over objects, should support __len__.
346 :return: Pack object of the objects written.
348 if len(objects) == 0:
349 # Don't bother writing an empty pack file
351 f, commit = self.add_pack()
352 write_pack_objects(f, objects)
356 class DiskObjectStore(PackBasedObjectStore):
357 """Git-style object store that exists on disk."""
359 def __init__(self, path):
360 """Open an object store.
362 :param path: Path of the object store.
364 super(DiskObjectStore, self).__init__()
366 self.pack_dir = os.path.join(self.path, PACKDIR)
367 self._pack_cache_time = 0
368 self._alternates = None
371 def alternates(self):
372 if self._alternates is not None:
373 return self._alternates
374 self._alternates = []
375 for path in self._read_alternate_paths():
376 self._alternates.append(DiskObjectStore(path))
377 return self._alternates
379 def _read_alternate_paths(self):
381 f = GitFile(os.path.join(self.path, "info", "alternates"),
383 except (OSError, IOError), e:
384 if e.errno == errno.ENOENT:
389 for l in f.readlines():
393 if not os.path.isabs(l):
400 def add_alternate_path(self, path):
401 """Add an alternate path to this object store.
404 os.mkdir(os.path.join(self.path, "info"))
406 if e.errno != errno.EEXIST:
408 alternates_path = os.path.join(self.path, "info/alternates")
409 f = GitFile(alternates_path, 'wb')
412 orig_f = open(alternates_path, 'rb')
413 except (OSError, IOError), e:
414 if e.errno != errno.ENOENT:
418 f.write(orig_f.read())
421 f.write("%s\n" % path)
424 self.alternates.append(DiskObjectStore(path))
426 def _load_packs(self):
429 self._pack_cache_time = os.stat(self.pack_dir).st_mtime
430 pack_dir_contents = os.listdir(self.pack_dir)
431 for name in pack_dir_contents:
432 # TODO: verify that idx exists first
433 if name.startswith("pack-") and name.endswith(".pack"):
434 filename = os.path.join(self.pack_dir, name)
435 pack_files.append((os.stat(filename).st_mtime, filename))
437 if e.errno == errno.ENOENT:
440 pack_files.sort(reverse=True)
441 suffix_len = len(".pack")
442 return [Pack(f[:-suffix_len]) for _, f in pack_files]
444 def _pack_cache_stale(self):
446 return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
448 if e.errno == errno.ENOENT:
452 def _get_shafile_path(self, sha):
453 # Check from object dir
454 return hex_to_filename(self.path, sha)
456 def _iter_loose_objects(self):
457 for base in os.listdir(self.path):
460 for rest in os.listdir(os.path.join(self.path, base)):
463 def _get_loose_object(self, sha):
464 path = self._get_shafile_path(sha)
466 return ShaFile.from_path(path)
467 except (OSError, IOError), e:
468 if e.errno == errno.ENOENT:
472 def _remove_loose_object(self, sha):
473 os.remove(self._get_shafile_path(sha))
475 def _complete_thin_pack(self, f, path, copier, indexer):
476 """Move a specific file containing a pack into the pack directory.
478 :note: The file should be on the same file system as the
481 :param f: Open file object for the pack.
482 :param path: Path to the pack file.
483 :param copier: A PackStreamCopier to use for writing pack data.
484 :param indexer: A PackIndexer for indexing the pack.
486 entries = list(indexer)
488 # Update the header with the new number of objects.
490 write_pack_header(f, len(entries) + len(indexer.ext_refs()))
492 # Must flush before reading (http://bugs.python.org/issue3207)
495 # Rescan the rest of the pack, computing the SHA with the new header.
496 new_sha = compute_file_sha(f, end_ofs=-20)
498 # Must reposition before writing (http://bugs.python.org/issue3207)
499 f.seek(0, os.SEEK_CUR)
502 for ext_sha in indexer.ext_refs():
503 assert len(ext_sha) == 20
504 type_num, data = self.get_raw(ext_sha)
506 crc32 = write_pack_object(f, type_num, data, sha=new_sha)
507 entries.append((ext_sha, offset, crc32))
508 pack_sha = new_sha.digest()
514 pack_base_name = os.path.join(
515 self.pack_dir, 'pack-' + iter_sha1(e[0] for e in entries))
516 os.rename(path, pack_base_name + '.pack')
519 index_file = GitFile(pack_base_name + '.idx', 'wb')
521 write_pack_index_v2(index_file, entries, pack_sha)
526 # Add the pack to the store and return it.
527 final_pack = Pack(pack_base_name)
528 final_pack.check_length_and_checksum()
529 self._add_known_pack(final_pack)
532 def add_thin_pack(self, read_all, read_some):
533 """Add a new thin pack to this object store.
535 Thin packs are packs that contain deltas with parents that exist outside
536 the pack. They should never be placed in the object store directly, and
537 always indexed and completed as they are copied.
539 :param read_all: Read function that blocks until the number of requested
541 :param read_some: Read function that returns at least one byte, but may
542 not return the number of bytes requested.
543 :return: A Pack object pointing at the now-completed thin pack in the
544 objects/pack directory.
546 fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
547 f = os.fdopen(fd, 'w+b')
550 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
551 copier = PackStreamCopier(read_all, read_some, f,
554 return self._complete_thin_pack(f, path, copier, indexer)
558 def move_in_pack(self, path):
559 """Move a specific file containing a pack into the pack directory.
561 :note: The file should be on the same file system as the
564 :param path: Path to the pack file.
567 entries = p.sorted_entries()
568 basename = os.path.join(self.pack_dir,
569 "pack-%s" % iter_sha1(entry[0] for entry in entries))
570 f = GitFile(basename+".idx", "wb")
572 write_pack_index_v2(f, entries, p.get_stored_checksum())
576 os.rename(path, basename + ".pack")
577 final_pack = Pack(basename)
578 self._add_known_pack(final_pack)
582 """Add a new pack to this object store.
584 :return: Fileobject to write to and a commit function to
585 call when the pack is finished.
587 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
588 f = os.fdopen(fd, 'wb')
592 if os.path.getsize(path) > 0:
593 return self.move_in_pack(path)
599 def add_object(self, obj):
600 """Add a single object to this object store.
602 :param obj: Object to add
604 dir = os.path.join(self.path, obj.id[:2])
608 if e.errno != errno.EEXIST:
610 path = os.path.join(dir, obj.id[2:])
611 if os.path.exists(path):
612 return # Already there, no need to write again
613 f = GitFile(path, 'wb')
615 f.write(obj.as_legacy_object())
624 if e.errno != errno.EEXIST:
626 os.mkdir(os.path.join(path, "info"))
627 os.mkdir(os.path.join(path, PACKDIR))
631 class MemoryObjectStore(BaseObjectStore):
632 """Object store that keeps all objects in memory."""
635 super(MemoryObjectStore, self).__init__()
638 def _to_hexsha(self, sha):
642 return sha_to_hex(sha)
644 raise ValueError("Invalid sha %r" % (sha,))
646 def contains_loose(self, sha):
647 """Check if a particular object is present by SHA1 and is loose."""
648 return self._to_hexsha(sha) in self._data
650 def contains_packed(self, sha):
651 """Check if a particular object is present by SHA1 and is packed."""
655 """Iterate over the SHAs that are present in this store."""
656 return self._data.iterkeys()
660 """List with pack objects."""
663 def get_raw(self, name):
664 """Obtain the raw text for an object.
666 :param name: sha for the object.
667 :return: tuple with numeric type and object contents.
669 obj = self[self._to_hexsha(name)]
670 return obj.type_num, obj.as_raw_string()
672 def __getitem__(self, name):
673 return self._data[self._to_hexsha(name)]
675 def __delitem__(self, name):
676 """Delete an object from this store, for testing only."""
677 del self._data[self._to_hexsha(name)]
679 def add_object(self, obj):
680 """Add a single object to this object store.
683 self._data[obj.id] = obj
685 def add_objects(self, objects):
686 """Add a set of objects to this object store.
688 :param objects: Iterable over a list of objects.
690 for obj, path in objects:
691 self._data[obj.id] = obj
694 class ObjectImporter(object):
695 """Interface for importing objects."""
697 def __init__(self, count):
698 """Create a new ObjectImporter.
700 :param count: Number of objects that's going to be imported.
704 def add_object(self, object):
706 raise NotImplementedError(self.add_object)
708 def finish(self, object):
709 """Finish the import and write objects to disk."""
710 raise NotImplementedError(self.finish)
713 class ObjectIterator(object):
714 """Interface for iterating over objects."""
716 def iterobjects(self):
717 raise NotImplementedError(self.iterobjects)
720 class ObjectStoreIterator(ObjectIterator):
721 """ObjectIterator that works on top of an ObjectStore."""
723 def __init__(self, store, sha_iter):
724 """Create a new ObjectIterator.
726 :param store: Object store to retrieve from
727 :param sha_iter: Iterator over (sha, path) tuples
730 self.sha_iter = sha_iter
734 """Yield tuple with next object and path."""
735 for sha, path in self.itershas():
736 yield self.store[sha], path
738 def iterobjects(self):
739 """Iterate over just the objects."""
744 """Iterate over the SHAs."""
745 for sha in self._shas:
747 for sha in self.sha_iter:
748 self._shas.append(sha)
751 def __contains__(self, needle):
752 """Check if an object is present.
754 :note: This checks if the object is present in
755 the underlying object store, not if it would
756 be yielded by the iterator.
758 :param needle: SHA1 of the object to check for
760 return needle in self.store
762 def __getitem__(self, key):
763 """Find an object by SHA1.
765 :note: This retrieves the object from the underlying
766 object store. It will also succeed if the object would
767 not be returned by the iterator.
769 return self.store[key]
772 """Return the number of objects."""
773 return len(list(self.itershas()))
776 def tree_lookup_path(lookup_obj, root_sha, path):
777 """Look up an object in a Git tree.
779 :param lookup_obj: Callback for retrieving object by SHA1
780 :param root_sha: SHA1 of the root tree
781 :param path: Path to lookup
782 :return: A tuple of (mode, SHA) of the resulting path.
784 tree = lookup_obj(root_sha)
785 if not isinstance(tree, Tree):
786 raise NotTreeError(root_sha)
787 return tree.lookup_path(lookup_obj, path)
790 class MissingObjectFinder(object):
791 """Find the objects missing from another object store.
793 :param object_store: Object store containing at least all objects to be
795 :param haves: SHA1s of commits not to send (already present in target)
796 :param wants: SHA1s of commits to send
797 :param progress: Optional function to report progress to.
798 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
799 sha for including tags.
800 :param tagged: dict of pointed-to sha -> tag sha for including tags
803 def __init__(self, object_store, haves, wants, progress=None,
806 self.sha_done = haves
807 self.objects_to_send = set([(w, None, False) for w in wants
809 self.object_store = object_store
811 self.progress = lambda x: None
813 self.progress = progress
814 self._tagged = get_tagged and get_tagged() or {}
816 def add_todo(self, entries):
817 self.objects_to_send.update([e for e in entries
818 if not e[0] in self.sha_done])
820 def parse_tree(self, tree):
821 self.add_todo([(sha, name, not stat.S_ISDIR(mode))
822 for name, mode, sha in tree.iteritems()
823 if not S_ISGITLINK(mode)])
825 def parse_commit(self, commit):
826 self.add_todo([(commit.tree, "", False)])
827 self.add_todo([(p, None, False) for p in commit.parents])
829 def parse_tag(self, tag):
830 self.add_todo([(tag.object[1], None, False)])
834 if not self.objects_to_send:
836 (sha, name, leaf) = self.objects_to_send.pop()
837 if sha not in self.sha_done:
840 o = self.object_store[sha]
841 if isinstance(o, Commit):
843 elif isinstance(o, Tree):
845 elif isinstance(o, Tag):
847 if sha in self._tagged:
848 self.add_todo([(self._tagged[sha], None, True)])
849 self.sha_done.add(sha)
850 self.progress("counting objects: %d\r" % len(self.sha_done))
854 class ObjectStoreGraphWalker(object):
855 """Graph walker that finds what commits are missing from an object store.
857 :ivar heads: Revisions without descendants in the local repo
858 :ivar get_parents: Function to retrieve parents in the local repo
861 def __init__(self, local_heads, get_parents):
862 """Create a new instance.
864 :param local_heads: Heads to start search with
865 :param get_parents: Function for finding the parents of a SHA1.
867 self.heads = set(local_heads)
868 self.get_parents = get_parents
872 """Ack that a revision and its ancestors are present in the source."""
873 ancestors = set([sha])
875 # stop if we run out of heads to remove
881 # collect all ancestors
882 new_ancestors = set()
884 ps = self.parents.get(a)
886 new_ancestors.update(ps)
887 self.parents[a] = None
889 # no more ancestors; stop
890 if not new_ancestors:
893 ancestors = new_ancestors
896 """Iterate over ancestors of heads in the target."""
898 ret = self.heads.pop()
899 ps = self.get_parents(ret)
900 self.parents[ret] = ps
901 self.heads.update([p for p in ps if not p in self.parents])