1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # or (at your option) a later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git object store interfaces and implementation."""
23 from cStringIO import StringIO
31 from dulwich._compat import (
36 from dulwich.diff_tree import (
40 from dulwich.errors import (
43 from dulwich.file import GitFile
44 from dulwich.objects import (
56 from dulwich.pack import (
77 class BaseObjectStore(object):
78 """Object store interface."""
80 def determine_wants_all(self, refs):
81 return [sha for (ref, sha) in refs.iteritems()
82 if not sha in self and not ref.endswith("^{}") and
85 def iter_shas(self, shas):
86 """Iterate over the objects for the specified shas.
88 :param shas: Iterable object with SHAs
89 :return: Object iterator
91 return ObjectStoreIterator(self, shas)
93 def contains_loose(self, sha):
94 """Check if a particular object is present by SHA1 and is loose."""
95 raise NotImplementedError(self.contains_loose)
97 def contains_packed(self, sha):
98 """Check if a particular object is present by SHA1 and is packed."""
99 raise NotImplementedError(self.contains_packed)
101 def __contains__(self, sha):
102 """Check if a particular object is present by SHA1.
104 This method makes no distinction between loose and packed objects.
106 return self.contains_packed(sha) or self.contains_loose(sha)
110 """Iterable of pack objects."""
111 raise NotImplementedError
113 def get_raw(self, name):
114 """Obtain the raw text for an object.
116 :param name: sha for the object.
117 :return: tuple with numeric type and object contents.
119 raise NotImplementedError(self.get_raw)
121 def __getitem__(self, sha):
122 """Obtain an object by SHA1."""
123 type_num, uncomp = self.get_raw(sha)
124 return ShaFile.from_raw_string(type_num, uncomp)
127 """Iterate over the SHAs that are present in this store."""
128 raise NotImplementedError(self.__iter__)
130 def add_object(self, obj):
131 """Add a single object to this object store.
134 raise NotImplementedError(self.add_object)
136 def add_objects(self, objects):
137 """Add a set of objects to this object store.
139 :param objects: Iterable over a list of objects.
141 raise NotImplementedError(self.add_objects)
143 def tree_changes(self, source, target, want_unchanged=False):
144 """Find the differences between the contents of two trees
146 :param object_store: Object store to use for retrieving tree contents
147 :param tree: SHA1 of the root tree
148 :param want_unchanged: Whether unchanged files should be reported
149 :return: Iterator over tuples with
150 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
152 for change in tree_changes(self, source, target,
153 want_unchanged=want_unchanged):
154 yield ((change.old.path, change.new.path),
155 (change.old.mode, change.new.mode),
156 (change.old.sha, change.new.sha))
158 def iter_tree_contents(self, tree_id, include_trees=False):
159 """Iterate the contents of a tree and all subtrees.
161 Iteration is depth-first pre-order, as in e.g. os.walk.
163 :param tree_id: SHA1 of the tree.
164 :param include_trees: If True, include tree objects in the iteration.
165 :return: Iterator over TreeEntry namedtuples for all the objects in a
168 for entry, _ in walk_trees(self, tree_id, None):
169 if not stat.S_ISDIR(entry.mode) or include_trees:
172 def find_missing_objects(self, haves, wants, progress=None,
174 """Find the missing objects required for a set of revisions.
176 :param haves: Iterable over SHAs already in common.
177 :param wants: Iterable over SHAs of objects to fetch.
178 :param progress: Simple progress function that will be called with
179 updated progress strings.
180 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
181 sha for including tags.
182 :return: Iterator over (sha, path) pairs.
184 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged)
185 return iter(finder.next, None)
187 def find_common_revisions(self, graphwalker):
188 """Find which revisions this store has in common using graphwalker.
190 :param graphwalker: A graphwalker object.
191 :return: List of SHAs that are in common
194 sha = graphwalker.next()
199 sha = graphwalker.next()
202 def get_graph_walker(self, heads):
203 """Obtain a graph walker for this object store.
205 :param heads: Local heads to start search with
206 :return: GraphWalker object
208 return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
210 def generate_pack_contents(self, have, want, progress=None):
211 """Iterate over the contents of a pack file.
213 :param have: List of SHA1s of objects that should not be sent
214 :param want: List of SHA1s of objects that should be sent
215 :param progress: Optional progress reporting method
217 return self.iter_shas(self.find_missing_objects(have, want, progress))
219 def peel_sha(self, sha):
220 """Peel all tags from a SHA.
222 :param sha: The object SHA to peel.
223 :return: The fully-peeled SHA1 of a tag object, after peeling all
224 intermediate tags; if the original ref does not point to a tag, this
225 will equal the original SHA1.
228 obj_class = object_class(obj.type_name)
229 while obj_class is Tag:
230 obj_class, sha = obj.object
235 class PackBasedObjectStore(BaseObjectStore):
238 self._pack_cache = None
240 def contains_packed(self, sha):
241 """Check if a particular object is present by SHA1 and is packed."""
242 for pack in self.packs:
247 def _load_packs(self):
248 raise NotImplementedError(self._load_packs)
250 def _pack_cache_stale(self):
251 """Check whether the pack cache is stale."""
252 raise NotImplementedError(self._pack_cache_stale)
254 def _add_known_pack(self, pack):
255 """Add a newly appeared pack to the cache by path.
258 if self._pack_cache is not None:
259 self._pack_cache.append(pack)
263 """List with pack objects."""
264 if self._pack_cache is None or self._pack_cache_stale():
265 self._pack_cache = self._load_packs()
266 return self._pack_cache
268 def _iter_loose_objects(self):
269 """Iterate over the SHAs of all loose objects."""
270 raise NotImplementedError(self._iter_loose_objects)
272 def _get_loose_object(self, sha):
273 raise NotImplementedError(self._get_loose_object)
275 def _remove_loose_object(self, sha):
276 raise NotImplementedError(self._remove_loose_object)
278 def pack_loose_objects(self):
279 """Pack loose objects.
281 :return: Number of objects packed
284 for sha in self._iter_loose_objects():
285 objects.add((self._get_loose_object(sha), None))
286 self.add_objects(list(objects))
287 for obj, path in objects:
288 self._remove_loose_object(obj.id)
292 """Iterate over the SHAs that are present in this store."""
293 iterables = self.packs + [self._iter_loose_objects()]
294 return itertools.chain(*iterables)
296 def contains_loose(self, sha):
297 """Check if a particular object is present by SHA1 and is loose."""
298 return self._get_loose_object(sha) is not None
300 def get_raw(self, name):
301 """Obtain the raw text for an object.
303 :param name: sha for the object.
304 :return: tuple with numeric type and object contents.
307 sha = hex_to_sha(name)
309 elif len(name) == 20:
313 raise AssertionError("Invalid object name %r" % name)
314 for pack in self.packs:
316 return pack.get_raw(sha)
320 hexsha = sha_to_hex(name)
321 ret = self._get_loose_object(hexsha)
323 return ret.type_num, ret.as_raw_string()
324 raise KeyError(hexsha)
326 def add_objects(self, objects):
327 """Add a set of objects to this object store.
329 :param objects: Iterable over objects, should support __len__.
330 :return: Pack object of the objects written.
332 if len(objects) == 0:
333 # Don't bother writing an empty pack file
335 f, commit = self.add_pack()
336 write_pack_objects(f, objects)
340 class DiskObjectStore(PackBasedObjectStore):
341 """Git-style object store that exists on disk."""
343 def __init__(self, path):
344 """Open an object store.
346 :param path: Path of the object store.
348 super(DiskObjectStore, self).__init__()
350 self.pack_dir = os.path.join(self.path, PACKDIR)
351 self._pack_cache_time = 0
353 def _load_packs(self):
356 self._pack_cache_time = os.stat(self.pack_dir).st_mtime
357 pack_dir_contents = os.listdir(self.pack_dir)
358 for name in pack_dir_contents:
359 # TODO: verify that idx exists first
360 if name.startswith("pack-") and name.endswith(".pack"):
361 filename = os.path.join(self.pack_dir, name)
362 pack_files.append((os.stat(filename).st_mtime, filename))
364 if e.errno == errno.ENOENT:
367 pack_files.sort(reverse=True)
368 suffix_len = len(".pack")
369 return [Pack(f[:-suffix_len]) for _, f in pack_files]
371 def _pack_cache_stale(self):
373 return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
375 if e.errno == errno.ENOENT:
379 def _get_shafile_path(self, sha):
380 # Check from object dir
381 return hex_to_filename(self.path, sha)
383 def _iter_loose_objects(self):
384 for base in os.listdir(self.path):
387 for rest in os.listdir(os.path.join(self.path, base)):
390 def _get_loose_object(self, sha):
391 path = self._get_shafile_path(sha)
393 return ShaFile.from_path(path)
394 except (OSError, IOError), e:
395 if e.errno == errno.ENOENT:
399 def _remove_loose_object(self, sha):
400 os.remove(self._get_shafile_path(sha))
402 def _complete_thin_pack(self, f, path, copier, indexer):
403 """Move a specific file containing a pack into the pack directory.
405 :note: The file should be on the same file system as the
408 :param f: Open file object for the pack.
409 :param path: Path to the pack file.
410 :param copier: A PackStreamCopier to use for writing pack data.
411 :param indexer: A PackIndexer for indexing the pack.
413 entries = list(indexer)
415 # Update the header with the new number of objects.
417 write_pack_header(f, len(entries) + len(indexer.ext_refs()))
419 # Rescan the rest of the pack, computing the SHA with the new header.
420 new_sha = compute_file_sha(f, end_ofs=-20)
423 for ext_sha in indexer.ext_refs():
424 type_num, data = self.get_raw(ext_sha)
426 crc32 = write_pack_object(f, type_num, data, sha=new_sha)
427 entries.append((ext_sha, offset, crc32))
428 pack_sha = new_sha.digest()
434 pack_base_name = os.path.join(
435 self.pack_dir, 'pack-' + iter_sha1(e[0] for e in entries))
436 os.rename(path, pack_base_name + '.pack')
439 index_file = GitFile(pack_base_name + '.idx', 'wb')
441 write_pack_index_v2(index_file, entries, pack_sha)
446 # Add the pack to the store and return it.
447 final_pack = Pack(pack_base_name)
448 final_pack.check_length_and_checksum()
449 self._add_known_pack(final_pack)
452 def add_thin_pack(self, read_all, read_some):
453 """Add a new thin pack to this object store.
455 Thin packs are packs that contain deltas with parents that exist outside
456 the pack. They should never be placed in the object store directly, and
457 always indexed and completed as they are copied.
459 :param read_all: Read function that blocks until the number of requested
461 :param read_some: Read function that returns at least one byte, but may
462 not return the number of bytes requested.
463 :return: A Pack object pointing at the now-completed thin pack in the
464 objects/pack directory.
466 fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
467 f = os.fdopen(fd, 'w+b')
470 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
471 copier = PackStreamCopier(read_all, read_some, f,
474 return self._complete_thin_pack(f, path, copier, indexer)
478 def move_in_pack(self, path):
479 """Move a specific file containing a pack into the pack directory.
481 :note: The file should be on the same file system as the
484 :param path: Path to the pack file.
487 entries = p.sorted_entries()
488 basename = os.path.join(self.pack_dir,
489 "pack-%s" % iter_sha1(entry[0] for entry in entries))
490 f = GitFile(basename+".idx", "wb")
492 write_pack_index_v2(f, entries, p.get_stored_checksum())
496 os.rename(path, basename + ".pack")
497 final_pack = Pack(basename)
498 self._add_known_pack(final_pack)
502 """Add a new pack to this object store.
504 :return: Fileobject to write to and a commit function to
505 call when the pack is finished.
507 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
508 f = os.fdopen(fd, 'wb')
512 if os.path.getsize(path) > 0:
513 return self.move_in_pack(path)
519 def add_object(self, obj):
520 """Add a single object to this object store.
522 :param obj: Object to add
524 dir = os.path.join(self.path, obj.id[:2])
528 if e.errno != errno.EEXIST:
530 path = os.path.join(dir, obj.id[2:])
531 if os.path.exists(path):
532 return # Already there, no need to write again
533 f = GitFile(path, 'wb')
535 f.write(obj.as_legacy_object())
544 if e.errno != errno.EEXIST:
546 os.mkdir(os.path.join(path, "info"))
547 os.mkdir(os.path.join(path, PACKDIR))
551 class MemoryObjectStore(BaseObjectStore):
552 """Object store that keeps all objects in memory."""
555 super(MemoryObjectStore, self).__init__()
558 def contains_loose(self, sha):
559 """Check if a particular object is present by SHA1 and is loose."""
560 return sha in self._data
562 def contains_packed(self, sha):
563 """Check if a particular object is present by SHA1 and is packed."""
567 """Iterate over the SHAs that are present in this store."""
568 return self._data.iterkeys()
572 """List with pack objects."""
575 def get_raw(self, name):
576 """Obtain the raw text for an object.
578 :param name: sha for the object.
579 :return: tuple with numeric type and object contents.
582 return obj.type_num, obj.as_raw_string()
584 def __getitem__(self, name):
585 return self._data[name]
587 def __delitem__(self, name):
588 """Delete an object from this store, for testing only."""
591 def add_object(self, obj):
592 """Add a single object to this object store.
595 self._data[obj.id] = obj
597 def add_objects(self, objects):
598 """Add a set of objects to this object store.
600 :param objects: Iterable over a list of objects.
602 for obj, path in objects:
603 self._data[obj.id] = obj
606 class ObjectImporter(object):
607 """Interface for importing objects."""
609 def __init__(self, count):
610 """Create a new ObjectImporter.
612 :param count: Number of objects that's going to be imported.
616 def add_object(self, object):
618 raise NotImplementedError(self.add_object)
620 def finish(self, object):
621 """Finish the import and write objects to disk."""
622 raise NotImplementedError(self.finish)
625 class ObjectIterator(object):
626 """Interface for iterating over objects."""
628 def iterobjects(self):
629 raise NotImplementedError(self.iterobjects)
632 class ObjectStoreIterator(ObjectIterator):
633 """ObjectIterator that works on top of an ObjectStore."""
635 def __init__(self, store, sha_iter):
636 """Create a new ObjectIterator.
638 :param store: Object store to retrieve from
639 :param sha_iter: Iterator over (sha, path) tuples
642 self.sha_iter = sha_iter
646 """Yield tuple with next object and path."""
647 for sha, path in self.itershas():
648 yield self.store[sha], path
650 def iterobjects(self):
651 """Iterate over just the objects."""
656 """Iterate over the SHAs."""
657 for sha in self._shas:
659 for sha in self.sha_iter:
660 self._shas.append(sha)
663 def __contains__(self, needle):
664 """Check if an object is present.
666 :note: This checks if the object is present in
667 the underlying object store, not if it would
668 be yielded by the iterator.
670 :param needle: SHA1 of the object to check for
672 return needle in self.store
674 def __getitem__(self, key):
675 """Find an object by SHA1.
677 :note: This retrieves the object from the underlying
678 object store. It will also succeed if the object would
679 not be returned by the iterator.
681 return self.store[key]
684 """Return the number of objects."""
685 return len(list(self.itershas()))
688 def tree_lookup_path(lookup_obj, root_sha, path):
689 """Lookup an object in a Git tree.
691 :param lookup_obj: Callback for retrieving object by SHA1
692 :param root_sha: SHA1 of the root tree
693 :param path: Path to lookup
695 parts = path.split("/")
699 obj = lookup_obj(sha)
700 if not isinstance(obj, Tree):
701 raise NotTreeError(sha)
708 class MissingObjectFinder(object):
709 """Find the objects missing from another object store.
711 :param object_store: Object store containing at least all objects to be
713 :param haves: SHA1s of commits not to send (already present in target)
714 :param wants: SHA1s of commits to send
715 :param progress: Optional function to report progress to.
716 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
717 sha for including tags.
718 :param tagged: dict of pointed-to sha -> tag sha for including tags
721 def __init__(self, object_store, haves, wants, progress=None,
724 self.sha_done = haves
725 self.objects_to_send = set([(w, None, False) for w in wants
727 self.object_store = object_store
729 self.progress = lambda x: None
731 self.progress = progress
732 self._tagged = get_tagged and get_tagged() or {}
734 def add_todo(self, entries):
735 self.objects_to_send.update([e for e in entries
736 if not e[0] in self.sha_done])
738 def parse_tree(self, tree):
739 self.add_todo([(sha, name, not stat.S_ISDIR(mode))
740 for name, mode, sha in tree.iteritems()
741 if not S_ISGITLINK(mode)])
743 def parse_commit(self, commit):
744 self.add_todo([(commit.tree, "", False)])
745 self.add_todo([(p, None, False) for p in commit.parents])
747 def parse_tag(self, tag):
748 self.add_todo([(tag.object[1], None, False)])
751 if not self.objects_to_send:
753 (sha, name, leaf) = self.objects_to_send.pop()
755 o = self.object_store[sha]
756 if isinstance(o, Commit):
758 elif isinstance(o, Tree):
760 elif isinstance(o, Tag):
762 if sha in self._tagged:
763 self.add_todo([(self._tagged[sha], None, True)])
764 self.sha_done.add(sha)
765 self.progress("counting objects: %d\r" % len(self.sha_done))
769 class ObjectStoreGraphWalker(object):
770 """Graph walker that finds what commits are missing from an object store.
772 :ivar heads: Revisions without descendants in the local repo
773 :ivar get_parents: Function to retrieve parents in the local repo
776 def __init__(self, local_heads, get_parents):
777 """Create a new instance.
779 :param local_heads: Heads to start search with
780 :param get_parents: Function for finding the parents of a SHA1.
782 self.heads = set(local_heads)
783 self.get_parents = get_parents
787 """Ack that a revision and its ancestors are present in the source."""
788 ancestors = set([sha])
790 # stop if we run out of heads to remove
796 # collect all ancestors
797 new_ancestors = set()
799 if a in self.parents:
800 new_ancestors.update(self.parents[a])
802 # no more ancestors; stop
803 if not new_ancestors:
806 ancestors = new_ancestors
809 """Iterate over ancestors of heads in the target."""
811 ret = self.heads.pop()
812 ps = self.get_parents(ret)
813 self.parents[ret] = ps
814 self.heads.update(ps)