1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # or (at your option) a later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git object store interfaces and implementation."""
30 from dulwich.diff_tree import (
34 from dulwich.errors import (
37 from dulwich.file import GitFile
38 from dulwich.objects import (
50 from dulwich.pack import (
65 class BaseObjectStore(object):
66 """Object store interface."""
68 def determine_wants_all(self, refs):
69 return [sha for (ref, sha) in refs.iteritems()
70 if not sha in self and not ref.endswith("^{}") and
73 def iter_shas(self, shas):
74 """Iterate over the objects for the specified shas.
76 :param shas: Iterable object with SHAs
77 :return: Object iterator
79 return ObjectStoreIterator(self, shas)
81 def contains_loose(self, sha):
82 """Check if a particular object is present by SHA1 and is loose."""
83 raise NotImplementedError(self.contains_loose)
85 def contains_packed(self, sha):
86 """Check if a particular object is present by SHA1 and is packed."""
87 raise NotImplementedError(self.contains_packed)
89 def __contains__(self, sha):
90 """Check if a particular object is present by SHA1.
92 This method makes no distinction between loose and packed objects.
94 return self.contains_packed(sha) or self.contains_loose(sha)
98 """Iterable of pack objects."""
99 raise NotImplementedError
101 def get_raw(self, name):
102 """Obtain the raw text for an object.
104 :param name: sha for the object.
105 :return: tuple with numeric type and object contents.
107 raise NotImplementedError(self.get_raw)
109 def __getitem__(self, sha):
110 """Obtain an object by SHA1."""
111 type_num, uncomp = self.get_raw(sha)
112 return ShaFile.from_raw_string(type_num, uncomp)
115 """Iterate over the SHAs that are present in this store."""
116 raise NotImplementedError(self.__iter__)
118 def add_object(self, obj):
119 """Add a single object to this object store.
122 raise NotImplementedError(self.add_object)
124 def add_objects(self, objects):
125 """Add a set of objects to this object store.
127 :param objects: Iterable over a list of objects.
129 raise NotImplementedError(self.add_objects)
131 def tree_changes(self, source, target, want_unchanged=False):
132 """Find the differences between the contents of two trees
134 :param object_store: Object store to use for retrieving tree contents
135 :param tree: SHA1 of the root tree
136 :param want_unchanged: Whether unchanged files should be reported
137 :return: Iterator over tuples with
138 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
140 for change in tree_changes(self, source, target,
141 want_unchanged=want_unchanged):
142 yield ((change.old.path, change.new.path),
143 (change.old.mode, change.new.mode),
144 (change.old.sha, change.new.sha))
146 def iter_tree_contents(self, tree_id, include_trees=False):
147 """Iterate the contents of a tree and all subtrees.
149 Iteration is depth-first pre-order, as in e.g. os.walk.
151 :param tree_id: SHA1 of the tree.
152 :param include_trees: If True, include tree objects in the iteration.
153 :return: Iterator over TreeEntry namedtuples for all the objects in a
156 for entry, _ in walk_trees(self, tree_id, None):
157 if not stat.S_ISDIR(entry.mode) or include_trees:
160 def find_missing_objects(self, haves, wants, progress=None,
162 """Find the missing objects required for a set of revisions.
164 :param haves: Iterable over SHAs already in common.
165 :param wants: Iterable over SHAs of objects to fetch.
166 :param progress: Simple progress function that will be called with
167 updated progress strings.
168 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
169 sha for including tags.
170 :return: Iterator over (sha, path) pairs.
172 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged)
173 return iter(finder.next, None)
175 def find_common_revisions(self, graphwalker):
176 """Find which revisions this store has in common using graphwalker.
178 :param graphwalker: A graphwalker object.
179 :return: List of SHAs that are in common
182 sha = graphwalker.next()
187 sha = graphwalker.next()
190 def get_graph_walker(self, heads):
191 """Obtain a graph walker for this object store.
193 :param heads: Local heads to start search with
194 :return: GraphWalker object
196 return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
198 def generate_pack_contents(self, have, want, progress=None):
199 """Iterate over the contents of a pack file.
201 :param have: List of SHA1s of objects that should not be sent
202 :param want: List of SHA1s of objects that should be sent
203 :param progress: Optional progress reporting method
205 return self.iter_shas(self.find_missing_objects(have, want, progress))
207 def peel_sha(self, sha):
208 """Peel all tags from a SHA.
210 :param sha: The object SHA to peel.
211 :return: The fully-peeled SHA1 of a tag object, after peeling all
212 intermediate tags; if the original ref does not point to a tag, this
213 will equal the original SHA1.
216 obj_class = object_class(obj.type_name)
217 while obj_class is Tag:
218 obj_class, sha = obj.object
223 class PackBasedObjectStore(BaseObjectStore):
226 self._pack_cache = None
228 def contains_packed(self, sha):
229 """Check if a particular object is present by SHA1 and is packed."""
230 for pack in self.packs:
235 def _load_packs(self):
236 raise NotImplementedError(self._load_packs)
238 def _pack_cache_stale(self):
239 """Check whether the pack cache is stale."""
240 raise NotImplementedError(self._pack_cache_stale)
242 def _add_known_pack(self, pack):
243 """Add a newly appeared pack to the cache by path.
246 if self._pack_cache is not None:
247 self._pack_cache.append(pack)
251 """List with pack objects."""
252 if self._pack_cache is None or self._pack_cache_stale():
253 self._pack_cache = self._load_packs()
254 return self._pack_cache
256 def _iter_loose_objects(self):
257 """Iterate over the SHAs of all loose objects."""
258 raise NotImplementedError(self._iter_loose_objects)
260 def _get_loose_object(self, sha):
261 raise NotImplementedError(self._get_loose_object)
263 def _remove_loose_object(self, sha):
264 raise NotImplementedError(self._remove_loose_object)
266 def pack_loose_objects(self):
267 """Pack loose objects.
269 :return: Number of objects packed
272 for sha in self._iter_loose_objects():
273 objects.add((self._get_loose_object(sha), None))
274 self.add_objects(objects)
275 for obj, path in objects:
276 self._remove_loose_object(obj.id)
280 """Iterate over the SHAs that are present in this store."""
281 iterables = self.packs + [self._iter_loose_objects()]
282 return itertools.chain(*iterables)
284 def contains_loose(self, sha):
285 """Check if a particular object is present by SHA1 and is loose."""
286 return self._get_loose_object(sha) is not None
288 def get_raw(self, name):
289 """Obtain the raw text for an object.
291 :param name: sha for the object.
292 :return: tuple with numeric type and object contents.
295 sha = hex_to_sha(name)
297 elif len(name) == 20:
301 raise AssertionError("Invalid object name %r" % name)
302 for pack in self.packs:
304 return pack.get_raw(sha)
308 hexsha = sha_to_hex(name)
309 ret = self._get_loose_object(hexsha)
311 return ret.type_num, ret.as_raw_string()
312 raise KeyError(hexsha)
314 def add_objects(self, objects):
315 """Add a set of objects to this object store.
317 :param objects: Iterable over objects, should support __len__.
318 :return: Pack object of the objects written.
320 if len(objects) == 0:
321 # Don't bother writing an empty pack file
323 f, commit = self.add_pack()
324 write_pack_data(f, objects, len(objects))
328 class DiskObjectStore(PackBasedObjectStore):
329 """Git-style object store that exists on disk."""
331 def __init__(self, path):
332 """Open an object store.
334 :param path: Path of the object store.
336 super(DiskObjectStore, self).__init__()
338 self.pack_dir = os.path.join(self.path, PACKDIR)
339 self._pack_cache_time = 0
341 def _load_packs(self):
344 self._pack_cache_time = os.stat(self.pack_dir).st_mtime
345 pack_dir_contents = os.listdir(self.pack_dir)
346 for name in pack_dir_contents:
347 # TODO: verify that idx exists first
348 if name.startswith("pack-") and name.endswith(".pack"):
349 filename = os.path.join(self.pack_dir, name)
350 pack_files.append((os.stat(filename).st_mtime, filename))
352 if e.errno == errno.ENOENT:
355 pack_files.sort(reverse=True)
356 suffix_len = len(".pack")
357 return [Pack(f[:-suffix_len]) for _, f in pack_files]
359 def _pack_cache_stale(self):
361 return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
363 if e.errno == errno.ENOENT:
367 def _get_shafile_path(self, sha):
368 # Check from object dir
369 return hex_to_filename(self.path, sha)
371 def _iter_loose_objects(self):
372 for base in os.listdir(self.path):
375 for rest in os.listdir(os.path.join(self.path, base)):
378 def _get_loose_object(self, sha):
379 path = self._get_shafile_path(sha)
381 return ShaFile.from_path(path)
382 except (OSError, IOError), e:
383 if e.errno == errno.ENOENT:
387 def _remove_loose_object(self, sha):
388 os.remove(self._get_shafile_path(sha))
390 def move_in_thin_pack(self, path):
391 """Move a specific file containing a pack into the pack directory.
393 :note: The file should be on the same file system as the
396 :param path: Path to the pack file.
398 data = ThinPackData(self.get_raw, path)
400 # Write index for the thin pack (do we really need this?)
401 temppath = os.path.join(self.pack_dir,
402 sha_to_hex(urllib2.randombytes(20))+".tempidx")
403 data.create_index_v2(temppath)
404 p = Pack.from_objects(data, load_pack_index(temppath))
407 # Write a full pack version
408 temppath = os.path.join(self.pack_dir,
409 sha_to_hex(urllib2.randombytes(20))+".temppack")
410 write_pack(temppath, ((o, None) for o in p.iterobjects()), len(p))
414 pack_sha = load_pack_index(temppath+".idx").objects_sha1()
415 newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
416 os.rename(temppath+".pack", newbasename+".pack")
417 os.rename(temppath+".idx", newbasename+".idx")
418 final_pack = Pack(newbasename)
419 self._add_known_pack(final_pack)
422 def move_in_pack(self, path):
423 """Move a specific file containing a pack into the pack directory.
425 :note: The file should be on the same file system as the
428 :param path: Path to the pack file.
431 entries = p.sorted_entries()
432 basename = os.path.join(self.pack_dir,
433 "pack-%s" % iter_sha1(entry[0] for entry in entries))
434 f = GitFile(basename+".idx", "wb")
436 write_pack_index_v2(f, entries, p.get_stored_checksum())
440 os.rename(path, basename + ".pack")
441 final_pack = Pack(basename)
442 self._add_known_pack(final_pack)
445 def add_thin_pack(self):
446 """Add a new thin pack to this object store.
448 Thin packs are packs that contain deltas with parents that exist
451 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
452 f = os.fdopen(fd, 'wb')
456 if os.path.getsize(path) > 0:
457 return self.move_in_thin_pack(path)
464 """Add a new pack to this object store.
466 :return: Fileobject to write to and a commit function to
467 call when the pack is finished.
469 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
470 f = os.fdopen(fd, 'wb')
474 if os.path.getsize(path) > 0:
475 return self.move_in_pack(path)
481 def add_object(self, obj):
482 """Add a single object to this object store.
484 :param obj: Object to add
486 dir = os.path.join(self.path, obj.id[:2])
490 if e.errno != errno.EEXIST:
492 path = os.path.join(dir, obj.id[2:])
493 if os.path.exists(path):
494 return # Already there, no need to write again
495 f = GitFile(path, 'wb')
497 f.write(obj.as_legacy_object())
506 if e.errno != errno.EEXIST:
508 os.mkdir(os.path.join(path, "info"))
509 os.mkdir(os.path.join(path, PACKDIR))
513 class MemoryObjectStore(BaseObjectStore):
514 """Object store that keeps all objects in memory."""
517 super(MemoryObjectStore, self).__init__()
520 def contains_loose(self, sha):
521 """Check if a particular object is present by SHA1 and is loose."""
522 return sha in self._data
524 def contains_packed(self, sha):
525 """Check if a particular object is present by SHA1 and is packed."""
529 """Iterate over the SHAs that are present in this store."""
530 return self._data.iterkeys()
534 """List with pack objects."""
537 def get_raw(self, name):
538 """Obtain the raw text for an object.
540 :param name: sha for the object.
541 :return: tuple with numeric type and object contents.
543 return self[name].as_raw_string()
545 def __getitem__(self, name):
546 return self._data[name]
548 def __delitem__(self, name):
549 """Delete an object from this store, for testing only."""
552 def add_object(self, obj):
553 """Add a single object to this object store.
556 self._data[obj.id] = obj
558 def add_objects(self, objects):
559 """Add a set of objects to this object store.
561 :param objects: Iterable over a list of objects.
563 for obj, path in objects:
564 self._data[obj.id] = obj
567 class ObjectImporter(object):
568 """Interface for importing objects."""
570 def __init__(self, count):
571 """Create a new ObjectImporter.
573 :param count: Number of objects that's going to be imported.
577 def add_object(self, object):
579 raise NotImplementedError(self.add_object)
581 def finish(self, object):
582 """Finish the import and write objects to disk."""
583 raise NotImplementedError(self.finish)
586 class ObjectIterator(object):
587 """Interface for iterating over objects."""
589 def iterobjects(self):
590 raise NotImplementedError(self.iterobjects)
593 class ObjectStoreIterator(ObjectIterator):
594 """ObjectIterator that works on top of an ObjectStore."""
596 def __init__(self, store, sha_iter):
597 """Create a new ObjectIterator.
599 :param store: Object store to retrieve from
600 :param sha_iter: Iterator over (sha, path) tuples
603 self.sha_iter = sha_iter
607 """Yield tuple with next object and path."""
608 for sha, path in self.itershas():
609 yield self.store[sha], path
611 def iterobjects(self):
612 """Iterate over just the objects."""
617 """Iterate over the SHAs."""
618 for sha in self._shas:
620 for sha in self.sha_iter:
621 self._shas.append(sha)
624 def __contains__(self, needle):
625 """Check if an object is present.
627 :note: This checks if the object is present in
628 the underlying object store, not if it would
629 be yielded by the iterator.
631 :param needle: SHA1 of the object to check for
633 return needle in self.store
635 def __getitem__(self, key):
636 """Find an object by SHA1.
638 :note: This retrieves the object from the underlying
639 object store. It will also succeed if the object would
640 not be returned by the iterator.
642 return self.store[key]
645 """Return the number of objects."""
646 return len(list(self.itershas()))
649 def tree_lookup_path(lookup_obj, root_sha, path):
650 """Lookup an object in a Git tree.
652 :param lookup_obj: Callback for retrieving object by SHA1
653 :param root_sha: SHA1 of the root tree
654 :param path: Path to lookup
656 parts = path.split("/")
660 obj = lookup_obj(sha)
661 if not isinstance(obj, Tree):
662 raise NotTreeError(sha)
669 class MissingObjectFinder(object):
670 """Find the objects missing from another object store.
672 :param object_store: Object store containing at least all objects to be
674 :param haves: SHA1s of commits not to send (already present in target)
675 :param wants: SHA1s of commits to send
676 :param progress: Optional function to report progress to.
677 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
678 sha for including tags.
679 :param tagged: dict of pointed-to sha -> tag sha for including tags
682 def __init__(self, object_store, haves, wants, progress=None,
685 self.sha_done = haves
686 self.objects_to_send = set([(w, None, False) for w in wants
688 self.object_store = object_store
690 self.progress = lambda x: None
692 self.progress = progress
693 self._tagged = get_tagged and get_tagged() or {}
695 def add_todo(self, entries):
696 self.objects_to_send.update([e for e in entries
697 if not e[0] in self.sha_done])
699 def parse_tree(self, tree):
700 self.add_todo([(sha, name, not stat.S_ISDIR(mode))
701 for mode, name, sha in tree.entries()
702 if not S_ISGITLINK(mode)])
704 def parse_commit(self, commit):
705 self.add_todo([(commit.tree, "", False)])
706 self.add_todo([(p, None, False) for p in commit.parents])
708 def parse_tag(self, tag):
709 self.add_todo([(tag.object[1], None, False)])
712 if not self.objects_to_send:
714 (sha, name, leaf) = self.objects_to_send.pop()
716 o = self.object_store[sha]
717 if isinstance(o, Commit):
719 elif isinstance(o, Tree):
721 elif isinstance(o, Tag):
723 if sha in self._tagged:
724 self.add_todo([(self._tagged[sha], None, True)])
725 self.sha_done.add(sha)
726 self.progress("counting objects: %d\r" % len(self.sha_done))
730 class ObjectStoreGraphWalker(object):
731 """Graph walker that finds what commits are missing from an object store.
733 :ivar heads: Revisions without descendants in the local repo
734 :ivar get_parents: Function to retrieve parents in the local repo
737 def __init__(self, local_heads, get_parents):
738 """Create a new instance.
740 :param local_heads: Heads to start search with
741 :param get_parents: Function for finding the parents of a SHA1.
743 self.heads = set(local_heads)
744 self.get_parents = get_parents
748 """Ack that a revision and its ancestors are present in the source."""
749 ancestors = set([sha])
751 # stop if we run out of heads to remove
757 # collect all ancestors
758 new_ancestors = set()
760 if a in self.parents:
761 new_ancestors.update(self.parents[a])
763 # no more ancestors; stop
764 if not new_ancestors:
767 ancestors = new_ancestors
770 """Iterate over ancestors of heads in the target."""
772 ret = self.heads.pop()
773 ps = self.get_parents(ret)
774 self.parents[ret] = ps
775 self.heads.update(ps)