1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # or (at your option) a later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git object store interfaces and implementation."""
23 from cStringIO import StringIO
31 from dulwich._compat import (
36 from dulwich.diff_tree import (
40 from dulwich.errors import (
43 from dulwich.file import GitFile
44 from dulwich.objects import (
56 from dulwich.pack import (
76 class BaseObjectStore(object):
77 """Object store interface."""
79 def determine_wants_all(self, refs):
80 return [sha for (ref, sha) in refs.iteritems()
81 if not sha in self and not ref.endswith("^{}") and
84 def iter_shas(self, shas):
85 """Iterate over the objects for the specified shas.
87 :param shas: Iterable object with SHAs
88 :return: Object iterator
90 return ObjectStoreIterator(self, shas)
92 def contains_loose(self, sha):
93 """Check if a particular object is present by SHA1 and is loose."""
94 raise NotImplementedError(self.contains_loose)
96 def contains_packed(self, sha):
97 """Check if a particular object is present by SHA1 and is packed."""
98 raise NotImplementedError(self.contains_packed)
100 def __contains__(self, sha):
101 """Check if a particular object is present by SHA1.
103 This method makes no distinction between loose and packed objects.
105 return self.contains_packed(sha) or self.contains_loose(sha)
109 """Iterable of pack objects."""
110 raise NotImplementedError
112 def get_raw(self, name):
113 """Obtain the raw text for an object.
115 :param name: sha for the object.
116 :return: tuple with numeric type and object contents.
118 raise NotImplementedError(self.get_raw)
120 def __getitem__(self, sha):
121 """Obtain an object by SHA1."""
122 type_num, uncomp = self.get_raw(sha)
123 return ShaFile.from_raw_string(type_num, uncomp)
126 """Iterate over the SHAs that are present in this store."""
127 raise NotImplementedError(self.__iter__)
129 def add_object(self, obj):
130 """Add a single object to this object store.
133 raise NotImplementedError(self.add_object)
135 def add_objects(self, objects):
136 """Add a set of objects to this object store.
138 :param objects: Iterable over a list of objects.
140 raise NotImplementedError(self.add_objects)
142 def tree_changes(self, source, target, want_unchanged=False):
143 """Find the differences between the contents of two trees
145 :param object_store: Object store to use for retrieving tree contents
146 :param tree: SHA1 of the root tree
147 :param want_unchanged: Whether unchanged files should be reported
148 :return: Iterator over tuples with
149 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
151 for change in tree_changes(self, source, target,
152 want_unchanged=want_unchanged):
153 yield ((change.old.path, change.new.path),
154 (change.old.mode, change.new.mode),
155 (change.old.sha, change.new.sha))
157 def iter_tree_contents(self, tree_id, include_trees=False):
158 """Iterate the contents of a tree and all subtrees.
160 Iteration is depth-first pre-order, as in e.g. os.walk.
162 :param tree_id: SHA1 of the tree.
163 :param include_trees: If True, include tree objects in the iteration.
164 :return: Iterator over TreeEntry namedtuples for all the objects in a
167 for entry, _ in walk_trees(self, tree_id, None):
168 if not stat.S_ISDIR(entry.mode) or include_trees:
171 def find_missing_objects(self, haves, wants, progress=None,
173 """Find the missing objects required for a set of revisions.
175 :param haves: Iterable over SHAs already in common.
176 :param wants: Iterable over SHAs of objects to fetch.
177 :param progress: Simple progress function that will be called with
178 updated progress strings.
179 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
180 sha for including tags.
181 :return: Iterator over (sha, path) pairs.
183 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged)
184 return iter(finder.next, None)
186 def find_common_revisions(self, graphwalker):
187 """Find which revisions this store has in common using graphwalker.
189 :param graphwalker: A graphwalker object.
190 :return: List of SHAs that are in common
193 sha = graphwalker.next()
198 sha = graphwalker.next()
201 def get_graph_walker(self, heads):
202 """Obtain a graph walker for this object store.
204 :param heads: Local heads to start search with
205 :return: GraphWalker object
207 return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
209 def generate_pack_contents(self, have, want, progress=None):
210 """Iterate over the contents of a pack file.
212 :param have: List of SHA1s of objects that should not be sent
213 :param want: List of SHA1s of objects that should be sent
214 :param progress: Optional progress reporting method
216 return self.iter_shas(self.find_missing_objects(have, want, progress))
218 def peel_sha(self, sha):
219 """Peel all tags from a SHA.
221 :param sha: The object SHA to peel.
222 :return: The fully-peeled SHA1 of a tag object, after peeling all
223 intermediate tags; if the original ref does not point to a tag, this
224 will equal the original SHA1.
227 obj_class = object_class(obj.type_name)
228 while obj_class is Tag:
229 obj_class, sha = obj.object
234 class PackBasedObjectStore(BaseObjectStore):
237 self._pack_cache = None
239 def contains_packed(self, sha):
240 """Check if a particular object is present by SHA1 and is packed."""
241 for pack in self.packs:
246 def _load_packs(self):
247 raise NotImplementedError(self._load_packs)
249 def _pack_cache_stale(self):
250 """Check whether the pack cache is stale."""
251 raise NotImplementedError(self._pack_cache_stale)
253 def _add_known_pack(self, pack):
254 """Add a newly appeared pack to the cache by path.
257 if self._pack_cache is not None:
258 self._pack_cache.append(pack)
262 """List with pack objects."""
263 if self._pack_cache is None or self._pack_cache_stale():
264 self._pack_cache = self._load_packs()
265 return self._pack_cache
267 def _iter_loose_objects(self):
268 """Iterate over the SHAs of all loose objects."""
269 raise NotImplementedError(self._iter_loose_objects)
271 def _get_loose_object(self, sha):
272 raise NotImplementedError(self._get_loose_object)
274 def _remove_loose_object(self, sha):
275 raise NotImplementedError(self._remove_loose_object)
277 def pack_loose_objects(self):
278 """Pack loose objects.
280 :return: Number of objects packed
283 for sha in self._iter_loose_objects():
284 objects.add((self._get_loose_object(sha), None))
285 self.add_objects(list(objects))
286 for obj, path in objects:
287 self._remove_loose_object(obj.id)
291 """Iterate over the SHAs that are present in this store."""
292 iterables = self.packs + [self._iter_loose_objects()]
293 return itertools.chain(*iterables)
295 def contains_loose(self, sha):
296 """Check if a particular object is present by SHA1 and is loose."""
297 return self._get_loose_object(sha) is not None
299 def get_raw(self, name):
300 """Obtain the raw text for an object.
302 :param name: sha for the object.
303 :return: tuple with numeric type and object contents.
306 sha = hex_to_sha(name)
308 elif len(name) == 20:
312 raise AssertionError("Invalid object name %r" % name)
313 for pack in self.packs:
315 return pack.get_raw(sha)
319 hexsha = sha_to_hex(name)
320 ret = self._get_loose_object(hexsha)
322 return ret.type_num, ret.as_raw_string()
323 raise KeyError(hexsha)
325 def add_objects(self, objects):
326 """Add a set of objects to this object store.
328 :param objects: Iterable over objects, should support __len__.
329 :return: Pack object of the objects written.
331 if len(objects) == 0:
332 # Don't bother writing an empty pack file
334 f, commit = self.add_pack()
335 write_pack_objects(f, objects)
339 class DiskObjectStore(PackBasedObjectStore):
340 """Git-style object store that exists on disk."""
342 def __init__(self, path):
343 """Open an object store.
345 :param path: Path of the object store.
347 super(DiskObjectStore, self).__init__()
349 self.pack_dir = os.path.join(self.path, PACKDIR)
350 self._pack_cache_time = 0
352 def _load_packs(self):
355 self._pack_cache_time = os.stat(self.pack_dir).st_mtime
356 pack_dir_contents = os.listdir(self.pack_dir)
357 for name in pack_dir_contents:
358 # TODO: verify that idx exists first
359 if name.startswith("pack-") and name.endswith(".pack"):
360 filename = os.path.join(self.pack_dir, name)
361 pack_files.append((os.stat(filename).st_mtime, filename))
363 if e.errno == errno.ENOENT:
366 pack_files.sort(reverse=True)
367 suffix_len = len(".pack")
368 return [Pack(f[:-suffix_len]) for _, f in pack_files]
370 def _pack_cache_stale(self):
372 return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
374 if e.errno == errno.ENOENT:
378 def _get_shafile_path(self, sha):
379 # Check from object dir
380 return hex_to_filename(self.path, sha)
382 def _iter_loose_objects(self):
383 for base in os.listdir(self.path):
386 for rest in os.listdir(os.path.join(self.path, base)):
389 def _get_loose_object(self, sha):
390 path = self._get_shafile_path(sha)
392 return ShaFile.from_path(path)
393 except (OSError, IOError), e:
394 if e.errno == errno.ENOENT:
398 def _remove_loose_object(self, sha):
399 os.remove(self._get_shafile_path(sha))
401 def _complete_thin_pack(self, f, path, copier, indexer):
402 """Move a specific file containing a pack into the pack directory.
404 :note: The file should be on the same file system as the
407 :param f: Open file object for the pack.
408 :param path: Path to the pack file.
409 :param copier: A PackStreamCopier to use for writing pack data.
410 :param indexer: A PackIndexer for indexing the pack.
412 entries = list(indexer)
414 # Update the header with the new number of objects.
416 write_pack_header(f, len(entries) + len(indexer.ext_refs()))
418 # Rescan the rest of the pack, computing the SHA with the new header.
419 new_sha = compute_file_sha(f, end_ofs=-20)
422 for ext_sha in indexer.ext_refs():
423 type_num, data = self.get_raw(ext_sha)
425 crc32 = write_pack_object(f, type_num, data, sha=new_sha)
426 entries.append((ext_sha, offset, crc32))
427 pack_sha = new_sha.digest()
433 pack_base_name = os.path.join(
434 self.pack_dir, 'pack-' + iter_sha1(e[0] for e in entries))
435 os.rename(path, pack_base_name + '.pack')
438 index_file = GitFile(pack_base_name + '.idx', 'wb')
440 write_pack_index_v2(index_file, entries, pack_sha)
445 # Add the pack to the store and return it.
446 final_pack = Pack(pack_base_name)
447 final_pack.check_length_and_checksum()
448 self._add_known_pack(final_pack)
451 def add_thin_pack(self, read_all, read_some):
452 """Add a new thin pack to this object store.
454 Thin packs are packs that contain deltas with parents that exist outside
455 the pack. They should never be placed in the object store directly, and
456 always indexed and completed as they are copied.
458 :param read_all: Read function that blocks until the number of requested
460 :param read_some: Read function that returns at least one byte, but may
461 not return the number of bytes requested.
462 :return: A Pack object pointing at the now-completed thin pack in the
463 objects/pack directory.
465 fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
466 f = os.fdopen(fd, 'w+b')
469 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
470 copier = PackStreamCopier(read_all, read_some, f,
473 return self._complete_thin_pack(f, path, copier, indexer)
477 def move_in_pack(self, path):
478 """Move a specific file containing a pack into the pack directory.
480 :note: The file should be on the same file system as the
483 :param path: Path to the pack file.
486 entries = p.sorted_entries()
487 basename = os.path.join(self.pack_dir,
488 "pack-%s" % iter_sha1(entry[0] for entry in entries))
489 f = GitFile(basename+".idx", "wb")
491 write_pack_index_v2(f, entries, p.get_stored_checksum())
495 os.rename(path, basename + ".pack")
496 final_pack = Pack(basename)
497 self._add_known_pack(final_pack)
501 """Add a new pack to this object store.
503 :return: Fileobject to write to and a commit function to
504 call when the pack is finished.
506 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
507 f = os.fdopen(fd, 'wb')
511 if os.path.getsize(path) > 0:
512 return self.move_in_pack(path)
518 def add_object(self, obj):
519 """Add a single object to this object store.
521 :param obj: Object to add
523 dir = os.path.join(self.path, obj.id[:2])
527 if e.errno != errno.EEXIST:
529 path = os.path.join(dir, obj.id[2:])
530 if os.path.exists(path):
531 return # Already there, no need to write again
532 f = GitFile(path, 'wb')
534 f.write(obj.as_legacy_object())
543 if e.errno != errno.EEXIST:
545 os.mkdir(os.path.join(path, "info"))
546 os.mkdir(os.path.join(path, PACKDIR))
550 class MemoryObjectStore(BaseObjectStore):
551 """Object store that keeps all objects in memory."""
554 super(MemoryObjectStore, self).__init__()
557 def contains_loose(self, sha):
558 """Check if a particular object is present by SHA1 and is loose."""
559 return sha in self._data
561 def contains_packed(self, sha):
562 """Check if a particular object is present by SHA1 and is packed."""
566 """Iterate over the SHAs that are present in this store."""
567 return self._data.iterkeys()
571 """List with pack objects."""
574 def get_raw(self, name):
575 """Obtain the raw text for an object.
577 :param name: sha for the object.
578 :return: tuple with numeric type and object contents.
581 return obj.type_num, obj.as_raw_string()
583 def __getitem__(self, name):
584 return self._data[name]
586 def __delitem__(self, name):
587 """Delete an object from this store, for testing only."""
590 def add_object(self, obj):
591 """Add a single object to this object store.
594 self._data[obj.id] = obj
596 def add_objects(self, objects):
597 """Add a set of objects to this object store.
599 :param objects: Iterable over a list of objects.
601 for obj, path in objects:
602 self._data[obj.id] = obj
605 class ObjectImporter(object):
606 """Interface for importing objects."""
608 def __init__(self, count):
609 """Create a new ObjectImporter.
611 :param count: Number of objects that's going to be imported.
615 def add_object(self, object):
617 raise NotImplementedError(self.add_object)
619 def finish(self, object):
620 """Finish the import and write objects to disk."""
621 raise NotImplementedError(self.finish)
624 class ObjectIterator(object):
625 """Interface for iterating over objects."""
627 def iterobjects(self):
628 raise NotImplementedError(self.iterobjects)
631 class ObjectStoreIterator(ObjectIterator):
632 """ObjectIterator that works on top of an ObjectStore."""
634 def __init__(self, store, sha_iter):
635 """Create a new ObjectIterator.
637 :param store: Object store to retrieve from
638 :param sha_iter: Iterator over (sha, path) tuples
641 self.sha_iter = sha_iter
645 """Yield tuple with next object and path."""
646 for sha, path in self.itershas():
647 yield self.store[sha], path
649 def iterobjects(self):
650 """Iterate over just the objects."""
655 """Iterate over the SHAs."""
656 for sha in self._shas:
658 for sha in self.sha_iter:
659 self._shas.append(sha)
662 def __contains__(self, needle):
663 """Check if an object is present.
665 :note: This checks if the object is present in
666 the underlying object store, not if it would
667 be yielded by the iterator.
669 :param needle: SHA1 of the object to check for
671 return needle in self.store
673 def __getitem__(self, key):
674 """Find an object by SHA1.
676 :note: This retrieves the object from the underlying
677 object store. It will also succeed if the object would
678 not be returned by the iterator.
680 return self.store[key]
683 """Return the number of objects."""
684 return len(list(self.itershas()))
687 def tree_lookup_path(lookup_obj, root_sha, path):
688 """Lookup an object in a Git tree.
690 :param lookup_obj: Callback for retrieving object by SHA1
691 :param root_sha: SHA1 of the root tree
692 :param path: Path to lookup
694 parts = path.split("/")
698 obj = lookup_obj(sha)
699 if not isinstance(obj, Tree):
700 raise NotTreeError(sha)
707 class MissingObjectFinder(object):
708 """Find the objects missing from another object store.
710 :param object_store: Object store containing at least all objects to be
712 :param haves: SHA1s of commits not to send (already present in target)
713 :param wants: SHA1s of commits to send
714 :param progress: Optional function to report progress to.
715 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
716 sha for including tags.
717 :param tagged: dict of pointed-to sha -> tag sha for including tags
720 def __init__(self, object_store, haves, wants, progress=None,
723 self.sha_done = haves
724 self.objects_to_send = set([(w, None, False) for w in wants
726 self.object_store = object_store
728 self.progress = lambda x: None
730 self.progress = progress
731 self._tagged = get_tagged and get_tagged() or {}
733 def add_todo(self, entries):
734 self.objects_to_send.update([e for e in entries
735 if not e[0] in self.sha_done])
737 def parse_tree(self, tree):
738 self.add_todo([(sha, name, not stat.S_ISDIR(mode))
739 for name, mode, sha in tree.iteritems()
740 if not S_ISGITLINK(mode)])
742 def parse_commit(self, commit):
743 self.add_todo([(commit.tree, "", False)])
744 self.add_todo([(p, None, False) for p in commit.parents])
746 def parse_tag(self, tag):
747 self.add_todo([(tag.object[1], None, False)])
750 if not self.objects_to_send:
752 (sha, name, leaf) = self.objects_to_send.pop()
754 o = self.object_store[sha]
755 if isinstance(o, Commit):
757 elif isinstance(o, Tree):
759 elif isinstance(o, Tag):
761 if sha in self._tagged:
762 self.add_todo([(self._tagged[sha], None, True)])
763 self.sha_done.add(sha)
764 self.progress("counting objects: %d\r" % len(self.sha_done))
768 class ObjectStoreGraphWalker(object):
769 """Graph walker that finds what commits are missing from an object store.
771 :ivar heads: Revisions without descendants in the local repo
772 :ivar get_parents: Function to retrieve parents in the local repo
775 def __init__(self, local_heads, get_parents):
776 """Create a new instance.
778 :param local_heads: Heads to start search with
779 :param get_parents: Function for finding the parents of a SHA1.
781 self.heads = set(local_heads)
782 self.get_parents = get_parents
786 """Ack that a revision and its ancestors are present in the source."""
787 ancestors = set([sha])
789 # stop if we run out of heads to remove
795 # collect all ancestors
796 new_ancestors = set()
798 if a in self.parents:
799 new_ancestors.update(self.parents[a])
801 # no more ancestors; stop
802 if not new_ancestors:
805 ancestors = new_ancestors
808 """Iterate over ancestors of heads in the target."""
810 ret = self.heads.pop()
811 ps = self.get_parents(ret)
812 self.parents[ret] = ps
813 self.heads.update(ps)