1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # or (at your option) a later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git object store interfaces and implementation."""
31 from dulwich.errors import (
34 from dulwich.file import GitFile
35 from dulwich.objects import (
46 from dulwich.pack import (
61 class BaseObjectStore(object):
62 """Object store interface."""
64 def determine_wants_all(self, refs):
65 return [sha for (ref, sha) in refs.iteritems()
66 if not sha in self and not ref.endswith("^{}")]
68 def iter_shas(self, shas):
69 """Iterate over the objects for the specified shas.
71 :param shas: Iterable object with SHAs
72 :return: Object iterator
74 return ObjectStoreIterator(self, shas)
76 def contains_loose(self, sha):
77 """Check if a particular object is present by SHA1 and is loose."""
78 raise NotImplementedError(self.contains_loose)
80 def contains_packed(self, sha):
81 """Check if a particular object is present by SHA1 and is packed."""
82 raise NotImplementedError(self.contains_packed)
84 def __contains__(self, sha):
85 """Check if a particular object is present by SHA1.
87 This method makes no distinction between loose and packed objects.
89 return self.contains_packed(sha) or self.contains_loose(sha)
93 """Iterable of pack objects."""
94 raise NotImplementedError
96 def get_raw(self, name):
97 """Obtain the raw text for an object.
99 :param name: sha for the object.
100 :return: tuple with numeric type and object contents.
102 raise NotImplementedError(self.get_raw)
104 def __getitem__(self, sha):
105 """Obtain an object by SHA1."""
106 type_num, uncomp = self.get_raw(sha)
107 return ShaFile.from_raw_string(type_num, uncomp)
110 """Iterate over the SHAs that are present in this store."""
111 raise NotImplementedError(self.__iter__)
113 def add_object(self, obj):
114 """Add a single object to this object store.
117 raise NotImplementedError(self.add_object)
119 def add_objects(self, objects):
120 """Add a set of objects to this object store.
122 :param objects: Iterable over a list of objects.
124 raise NotImplementedError(self.add_objects)
126 def tree_changes(self, source, target, want_unchanged=False):
127 """Find the differences between the contents of two trees
129 :param object_store: Object store to use for retrieving tree contents
130 :param tree: SHA1 of the root tree
131 :param want_unchanged: Whether unchanged files should be reported
132 :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
134 todo = set([(source, target, "")])
136 (sid, tid, path) = todo.pop()
145 for name, oldmode, oldhexsha in stree.iteritems():
146 oldchildpath = posixpath.join(path, name)
148 (newmode, newhexsha) = ttree[name]
149 newchildpath = oldchildpath
154 if (want_unchanged or oldmode != newmode or
155 oldhexsha != newhexsha):
156 if stat.S_ISDIR(oldmode):
157 if newmode is None or stat.S_ISDIR(newmode):
158 todo.add((oldhexsha, newhexsha, oldchildpath))
160 # entry became a file
161 todo.add((oldhexsha, None, oldchildpath))
162 yield ((None, newchildpath), (None, newmode), (None, newhexsha))
164 if newmode is not None and stat.S_ISDIR(newmode):
166 yield ((oldchildpath, None), (oldmode, None), (oldhexsha, None))
167 todo.add((None, newhexsha, newchildpath))
169 yield ((oldchildpath, newchildpath), (oldmode, newmode), (oldhexsha, newhexsha))
171 for name, newmode, newhexsha in ttree.iteritems():
172 childpath = posixpath.join(path, name)
173 if not name in stree:
174 if not stat.S_ISDIR(newmode):
175 yield ((None, childpath), (None, newmode), (None, newhexsha))
177 todo.add((None, newhexsha, childpath))
179 def iter_tree_contents(self, tree_id, include_trees=False):
180 """Iterate the contents of a tree and all subtrees.
182 Iteration is depth-first pre-order, as in e.g. os.walk.
184 :param tree_id: SHA1 of the tree.
185 :param include_trees: If True, include tree objects in the iteration.
186 :yield: Tuples of (path, mode, hexhsa) for objects in a tree.
188 todo = [('', stat.S_IFDIR, tree_id)]
190 path, mode, hexsha = todo.pop()
191 is_subtree = stat.S_ISDIR(mode)
192 if not is_subtree or include_trees:
193 yield path, mode, hexsha
195 entries = reversed(list(self[hexsha].iteritems()))
196 for name, entry_mode, entry_hexsha in entries:
197 entry_path = posixpath.join(path, name)
198 todo.append((entry_path, entry_mode, entry_hexsha))
200 def find_missing_objects(self, haves, wants, progress=None,
202 """Find the missing objects required for a set of revisions.
204 :param haves: Iterable over SHAs already in common.
205 :param wants: Iterable over SHAs of objects to fetch.
206 :param progress: Simple progress function that will be called with
207 updated progress strings.
208 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
209 sha for including tags.
210 :return: Iterator over (sha, path) pairs.
212 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged)
213 return iter(finder.next, None)
215 def find_common_revisions(self, graphwalker):
216 """Find which revisions this store has in common using graphwalker.
218 :param graphwalker: A graphwalker object.
219 :return: List of SHAs that are in common
222 sha = graphwalker.next()
227 sha = graphwalker.next()
230 def get_graph_walker(self, heads):
231 """Obtain a graph walker for this object store.
233 :param heads: Local heads to start search with
234 :return: GraphWalker object
236 return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
238 def generate_pack_contents(self, have, want, progress=None):
239 """Iterate over the contents of a pack file.
241 :param have: List of SHA1s of objects that should not be sent
242 :param want: List of SHA1s of objects that should be sent
243 :param progress: Optional progress reporting method
245 return self.iter_shas(self.find_missing_objects(have, want, progress))
247 def peel_sha(self, sha):
248 """Peel all tags from a SHA.
250 :param sha: The object SHA to peel.
251 :return: The fully-peeled SHA1 of a tag object, after peeling all
252 intermediate tags; if the original ref does not point to a tag, this
253 will equal the original SHA1.
256 obj_class = object_class(obj.type_name)
257 while obj_class is Tag:
258 obj_class, sha = obj.object
263 class PackBasedObjectStore(BaseObjectStore):
266 self._pack_cache = None
268 def contains_packed(self, sha):
269 """Check if a particular object is present by SHA1 and is packed."""
270 for pack in self.packs:
275 def _load_packs(self):
276 raise NotImplementedError(self._load_packs)
278 def _pack_cache_stale(self):
279 """Check whether the pack cache is stale."""
280 raise NotImplementedError(self._pack_cache_stale)
282 def _add_known_pack(self, pack):
283 """Add a newly appeared pack to the cache by path.
286 if self._pack_cache is not None:
287 self._pack_cache.append(pack)
291 """List with pack objects."""
292 if self._pack_cache is None or self._pack_cache_stale():
293 self._pack_cache = self._load_packs()
294 return self._pack_cache
296 def _iter_loose_objects(self):
297 """Iterate over the SHAs of all loose objects."""
298 raise NotImplementedError(self._iter_loose_objects)
300 def _get_loose_object(self, sha):
301 raise NotImplementedError(self._get_loose_object)
303 def _remove_loose_object(self, sha):
304 raise NotImplementedError(self._remove_loose_object)
306 def pack_loose_objects(self):
307 """Pack loose objects.
309 :return: Number of objects packed
312 for sha in self._iter_loose_objects():
313 objects.add((self._get_loose_object(sha), None))
314 self.add_objects(objects)
315 for obj, path in objects:
316 self._remove_loose_object(obj.id)
320 """Iterate over the SHAs that are present in this store."""
321 iterables = self.packs + [self._iter_loose_objects()]
322 return itertools.chain(*iterables)
324 def contains_loose(self, sha):
325 """Check if a particular object is present by SHA1 and is loose."""
326 return self._get_loose_object(sha) is not None
328 def get_raw(self, name):
329 """Obtain the raw text for an object.
331 :param name: sha for the object.
332 :return: tuple with numeric type and object contents.
335 sha = hex_to_sha(name)
337 elif len(name) == 20:
342 for pack in self.packs:
344 return pack.get_raw(sha)
348 hexsha = sha_to_hex(name)
349 ret = self._get_loose_object(hexsha)
351 return ret.type_num, ret.as_raw_string()
352 raise KeyError(hexsha)
354 def add_objects(self, objects):
355 """Add a set of objects to this object store.
357 :param objects: Iterable over objects, should support __len__.
358 :return: Pack object of the objects written.
360 if len(objects) == 0:
361 # Don't bother writing an empty pack file
363 f, commit = self.add_pack()
364 write_pack_data(f, objects, len(objects))
368 class DiskObjectStore(PackBasedObjectStore):
369 """Git-style object store that exists on disk."""
371 def __init__(self, path):
372 """Open an object store.
374 :param path: Path of the object store.
376 super(DiskObjectStore, self).__init__()
378 self.pack_dir = os.path.join(self.path, PACKDIR)
379 self._pack_cache_time = 0
381 def _load_packs(self):
384 self._pack_cache_time = os.stat(self.pack_dir).st_mtime
385 pack_dir_contents = os.listdir(self.pack_dir)
386 for name in pack_dir_contents:
387 # TODO: verify that idx exists first
388 if name.startswith("pack-") and name.endswith(".pack"):
389 filename = os.path.join(self.pack_dir, name)
390 pack_files.append((os.stat(filename).st_mtime, filename))
392 if e.errno == errno.ENOENT:
395 pack_files.sort(reverse=True)
396 suffix_len = len(".pack")
397 return [Pack(f[:-suffix_len]) for _, f in pack_files]
399 def _pack_cache_stale(self):
401 return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
403 if e.errno == errno.ENOENT:
407 def _get_shafile_path(self, sha):
408 # Check from object dir
409 return hex_to_filename(self.path, sha)
411 def _iter_loose_objects(self):
412 for base in os.listdir(self.path):
415 for rest in os.listdir(os.path.join(self.path, base)):
418 def _get_loose_object(self, sha):
419 path = self._get_shafile_path(sha)
421 return ShaFile.from_path(path)
422 except (OSError, IOError), e:
423 if e.errno == errno.ENOENT:
427 def _remove_loose_object(self, sha):
428 os.remove(self._get_shafile_path(sha))
430 def move_in_thin_pack(self, path):
431 """Move a specific file containing a pack into the pack directory.
433 :note: The file should be on the same file system as the
436 :param path: Path to the pack file.
438 data = ThinPackData(self.get_raw, path)
440 # Write index for the thin pack (do we really need this?)
441 temppath = os.path.join(self.pack_dir,
442 sha_to_hex(urllib2.randombytes(20))+".tempidx")
443 data.create_index_v2(temppath)
444 p = Pack.from_objects(data, load_pack_index(temppath))
446 # Write a full pack version
447 temppath = os.path.join(self.pack_dir,
448 sha_to_hex(urllib2.randombytes(20))+".temppack")
449 write_pack(temppath, ((o, None) for o in p.iterobjects()), len(p))
450 pack_sha = load_pack_index(temppath+".idx").objects_sha1()
451 newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
452 os.rename(temppath+".pack", newbasename+".pack")
453 os.rename(temppath+".idx", newbasename+".idx")
454 final_pack = Pack(newbasename)
455 self._add_known_pack(final_pack)
458 def move_in_pack(self, path):
459 """Move a specific file containing a pack into the pack directory.
461 :note: The file should be on the same file system as the
464 :param path: Path to the pack file.
467 entries = p.sorted_entries()
468 basename = os.path.join(self.pack_dir,
469 "pack-%s" % iter_sha1(entry[0] for entry in entries))
470 f = GitFile(basename+".idx", "wb")
472 write_pack_index_v2(f, entries, p.get_stored_checksum())
476 os.rename(path, basename + ".pack")
477 final_pack = Pack(basename)
478 self._add_known_pack(final_pack)
481 def add_thin_pack(self):
482 """Add a new thin pack to this object store.
484 Thin packs are packs that contain deltas with parents that exist
487 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
488 f = os.fdopen(fd, 'wb')
492 if os.path.getsize(path) > 0:
493 return self.move_in_thin_pack(path)
499 """Add a new pack to this object store.
501 :return: Fileobject to write to and a commit function to
502 call when the pack is finished.
504 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
505 f = os.fdopen(fd, 'wb')
509 if os.path.getsize(path) > 0:
510 return self.move_in_pack(path)
515 def add_object(self, obj):
516 """Add a single object to this object store.
518 :param obj: Object to add
520 dir = os.path.join(self.path, obj.id[:2])
524 if e.errno != errno.EEXIST:
526 path = os.path.join(dir, obj.id[2:])
527 if os.path.exists(path):
528 return # Already there, no need to write again
529 f = GitFile(path, 'wb')
531 f.write(obj.as_legacy_object())
540 if e.errno != errno.EEXIST:
542 os.mkdir(os.path.join(path, "info"))
543 os.mkdir(os.path.join(path, PACKDIR))
547 class MemoryObjectStore(BaseObjectStore):
548 """Object store that keeps all objects in memory."""
551 super(MemoryObjectStore, self).__init__()
554 def contains_loose(self, sha):
555 """Check if a particular object is present by SHA1 and is loose."""
556 return sha in self._data
558 def contains_packed(self, sha):
559 """Check if a particular object is present by SHA1 and is packed."""
563 """Iterate over the SHAs that are present in this store."""
564 return self._data.iterkeys()
568 """List with pack objects."""
571 def get_raw(self, name):
572 """Obtain the raw text for an object.
574 :param name: sha for the object.
575 :return: tuple with numeric type and object contents.
577 return self[name].as_raw_string()
579 def __getitem__(self, name):
580 return self._data[name]
582 def add_object(self, obj):
583 """Add a single object to this object store.
586 self._data[obj.id] = obj
588 def add_objects(self, objects):
589 """Add a set of objects to this object store.
591 :param objects: Iterable over a list of objects.
593 for obj, path in objects:
594 self._data[obj.id] = obj
597 class ObjectImporter(object):
598 """Interface for importing objects."""
600 def __init__(self, count):
601 """Create a new ObjectImporter.
603 :param count: Number of objects that's going to be imported.
607 def add_object(self, object):
609 raise NotImplementedError(self.add_object)
611 def finish(self, object):
612 """Finish the import and write objects to disk."""
613 raise NotImplementedError(self.finish)
616 class ObjectIterator(object):
617 """Interface for iterating over objects."""
619 def iterobjects(self):
620 raise NotImplementedError(self.iterobjects)
623 class ObjectStoreIterator(ObjectIterator):
624 """ObjectIterator that works on top of an ObjectStore."""
626 def __init__(self, store, sha_iter):
627 """Create a new ObjectIterator.
629 :param store: Object store to retrieve from
630 :param sha_iter: Iterator over (sha, path) tuples
633 self.sha_iter = sha_iter
637 """Yield tuple with next object and path."""
638 for sha, path in self.itershas():
639 yield self.store[sha], path
641 def iterobjects(self):
642 """Iterate over just the objects."""
647 """Iterate over the SHAs."""
648 for sha in self._shas:
650 for sha in self.sha_iter:
651 self._shas.append(sha)
654 def __contains__(self, needle):
655 """Check if an object is present.
657 :note: This checks if the object is present in
658 the underlying object store, not if it would
659 be yielded by the iterator.
661 :param needle: SHA1 of the object to check for
663 return needle in self.store
665 def __getitem__(self, key):
666 """Find an object by SHA1.
668 :note: This retrieves the object from the underlying
669 object store. It will also succeed if the object would
670 not be returned by the iterator.
672 return self.store[key]
675 """Return the number of objects."""
676 return len(list(self.itershas()))
679 def tree_lookup_path(lookup_obj, root_sha, path):
680 """Lookup an object in a Git tree.
682 :param lookup_obj: Callback for retrieving object by SHA1
683 :param root_sha: SHA1 of the root tree
684 :param path: Path to lookup
686 parts = path.split("/")
690 obj = lookup_obj(sha)
691 if not isinstance(obj, Tree):
692 raise NotTreeError(sha)
699 class MissingObjectFinder(object):
700 """Find the objects missing from another object store.
702 :param object_store: Object store containing at least all objects to be
704 :param haves: SHA1s of commits not to send (already present in target)
705 :param wants: SHA1s of commits to send
706 :param progress: Optional function to report progress to.
707 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
708 sha for including tags.
709 :param tagged: dict of pointed-to sha -> tag sha for including tags
712 def __init__(self, object_store, haves, wants, progress=None,
714 self.sha_done = set(haves)
715 self.objects_to_send = set([(w, None, False) for w in wants if w not in haves])
716 self.object_store = object_store
718 self.progress = lambda x: None
720 self.progress = progress
721 self._tagged = get_tagged and get_tagged() or {}
723 def add_todo(self, entries):
724 self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
726 def parse_tree(self, tree):
727 self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries() if not S_ISGITLINK(mode)])
729 def parse_commit(self, commit):
730 self.add_todo([(commit.tree, "", False)])
731 self.add_todo([(p, None, False) for p in commit.parents])
733 def parse_tag(self, tag):
734 self.add_todo([(tag.object[1], None, False)])
737 if not self.objects_to_send:
739 (sha, name, leaf) = self.objects_to_send.pop()
741 o = self.object_store[sha]
742 if isinstance(o, Commit):
744 elif isinstance(o, Tree):
746 elif isinstance(o, Tag):
748 if sha in self._tagged:
749 self.add_todo([(self._tagged[sha], None, True)])
750 self.sha_done.add(sha)
751 self.progress("counting objects: %d\r" % len(self.sha_done))
755 class ObjectStoreGraphWalker(object):
756 """Graph walker that finds what commits are missing from an object store.
758 :ivar heads: Revisions without descendants in the local repo
759 :ivar get_parents: Function to retrieve parents in the local repo
762 def __init__(self, local_heads, get_parents):
763 """Create a new instance.
765 :param local_heads: Heads to start search with
766 :param get_parents: Function for finding the parents of a SHA1.
768 self.heads = set(local_heads)
769 self.get_parents = get_parents
773 """Ack that a revision and its ancestors are present in the source."""
774 ancestors = set([sha])
776 # stop if we run out of heads to remove
782 # collect all ancestors
783 new_ancestors = set()
785 if a in self.parents:
786 new_ancestors.update(self.parents[a])
788 # no more ancestors; stop
789 if not new_ancestors:
792 ancestors = new_ancestors
795 """Iterate over ancestors of heads in the target."""
797 ret = self.heads.pop()
798 ps = self.get_parents(ret)
799 self.parents[ret] = ps
800 self.heads.update(ps)