1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # or (at your option) a later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git object store interfaces and implementation."""
31 from dulwich.errors import (
34 from dulwich.file import GitFile
35 from dulwich.objects import (
44 from dulwich.pack import (
57 class BaseObjectStore(object):
58 """Object store interface."""
60 def determine_wants_all(self, refs):
61 return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
63 def iter_shas(self, shas):
64 """Iterate over the objects for the specified shas.
66 :param shas: Iterable object with SHAs
67 :return: Object iterator
69 return ObjectStoreIterator(self, shas)
71 def contains_loose(self, sha):
72 """Check if a particular object is present by SHA1 and is loose."""
73 raise NotImplementedError(self.contains_loose)
75 def contains_packed(self, sha):
76 """Check if a particular object is present by SHA1 and is packed."""
77 raise NotImplementedError(self.contains_packed)
79 def __contains__(self, sha):
80 """Check if a particular object is present by SHA1.
82 This method makes no distinction between loose and packed objects.
84 return self.contains_packed(sha) or self.contains_loose(sha)
88 """Iterable of pack objects."""
89 raise NotImplementedError
91 def get_raw(self, name):
92 """Obtain the raw text for an object.
94 :param name: sha for the object.
95 :return: tuple with object type and object contents.
97 raise NotImplementedError(self.get_raw)
99 def __getitem__(self, sha):
100 """Obtain an object by SHA1."""
101 type, uncomp = self.get_raw(sha)
102 return ShaFile.from_raw_string(type, uncomp)
105 """Iterate over the SHAs that are present in this store."""
106 raise NotImplementedError(self.__iter__)
108 def add_object(self, obj):
109 """Add a single object to this object store.
112 raise NotImplementedError(self.add_object)
114 def add_objects(self, objects):
115 """Add a set of objects to this object store.
117 :param objects: Iterable over a list of objects.
119 raise NotImplementedError(self.add_objects)
121 def tree_changes(self, source, target, want_unchanged=False):
122 """Find the differences between the contents of two trees
124 :param object_store: Object store to use for retrieving tree contents
125 :param tree: SHA1 of the root tree
126 :param want_unchanged: Whether unchanged files should be reported
127 :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
129 todo = set([(source, target, "")])
131 (sid, tid, path) = todo.pop()
140 for name, oldmode, oldhexsha in stree.iteritems():
141 oldchildpath = posixpath.join(path, name)
143 (newmode, newhexsha) = ttree[name]
144 newchildpath = oldchildpath
149 if (want_unchanged or oldmode != newmode or
150 oldhexsha != newhexsha):
151 if stat.S_ISDIR(oldmode):
152 if newmode is None or stat.S_ISDIR(newmode):
153 todo.add((oldhexsha, newhexsha, oldchildpath))
155 # entry became a file
156 todo.add((oldhexsha, None, oldchildpath))
157 yield ((None, newchildpath), (None, newmode), (None, newhexsha))
159 if newmode is not None and stat.S_ISDIR(newmode):
161 yield ((oldchildpath, None), (oldmode, None), (oldhexsha, None))
162 todo.add((None, newhexsha, newchildpath))
164 yield ((oldchildpath, newchildpath), (oldmode, newmode), (oldhexsha, newhexsha))
166 for name, newmode, newhexsha in ttree.iteritems():
167 childpath = posixpath.join(path, name)
168 if not name in stree:
169 if not stat.S_ISDIR(newmode):
170 yield ((None, childpath), (None, newmode), (None, newhexsha))
172 todo.add((None, newhexsha, childpath))
174 def iter_tree_contents(self, tree):
175 """Yield (path, mode, hexsha) tuples for all non-Tree objects in a tree.
177 :param tree: SHA1 of the root of the tree
179 todo = set([(tree, "")])
181 (tid, tpath) = todo.pop()
183 for name, mode, hexsha in tree.iteritems():
184 path = posixpath.join(tpath, name)
185 if stat.S_ISDIR(mode):
186 todo.add((hexsha, path))
188 yield path, mode, hexsha
190 def find_missing_objects(self, haves, wants, progress=None,
192 """Find the missing objects required for a set of revisions.
194 :param haves: Iterable over SHAs already in common.
195 :param wants: Iterable over SHAs of objects to fetch.
196 :param progress: Simple progress function that will be called with
197 updated progress strings.
198 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
199 sha for including tags.
200 :return: Iterator over (sha, path) pairs.
202 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged)
203 return iter(finder.next, None)
205 def find_common_revisions(self, graphwalker):
206 """Find which revisions this store has in common using graphwalker.
208 :param graphwalker: A graphwalker object.
209 :return: List of SHAs that are in common
212 sha = graphwalker.next()
217 sha = graphwalker.next()
220 def get_graph_walker(self, heads):
221 """Obtain a graph walker for this object store.
223 :param heads: Local heads to start search with
224 :return: GraphWalker object
226 return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
228 def generate_pack_contents(self, have, want):
229 """Iterate over the contents of a pack file.
231 :param have: List of SHA1s of objects that should not be sent
232 :param want: List of SHA1s of objects that should be sent
234 return self.iter_shas(self.find_missing_objects(have, want))
237 class PackBasedObjectStore(BaseObjectStore):
240 self._pack_cache = None
242 def contains_packed(self, sha):
243 """Check if a particular object is present by SHA1 and is packed."""
244 for pack in self.packs:
249 def _load_packs(self):
250 raise NotImplementedError(self._load_packs)
252 def _pack_cache_stale(self):
253 """Check whether the pack cache is stale."""
254 raise NotImplementedError(self._pack_cache_stale)
256 def _add_known_pack(self, pack):
257 """Add a newly appeared pack to the cache by path.
260 if self._pack_cache is not None:
261 self._pack_cache.append(pack)
265 """List with pack objects."""
266 if self._pack_cache is None or self._pack_cache_stale():
267 self._pack_cache = self._load_packs()
268 return self._pack_cache
270 def _iter_loose_objects(self):
271 raise NotImplementedError(self._iter_loose_objects)
273 def _get_loose_object(self, sha):
274 raise NotImplementedError(self._get_loose_object)
277 """Iterate over the SHAs that are present in this store."""
278 iterables = self.packs + [self._iter_loose_objects()]
279 return itertools.chain(*iterables)
281 def contains_loose(self, sha):
282 """Check if a particular object is present by SHA1 and is loose."""
283 return self._get_loose_object(sha) is not None
285 def get_raw(self, name):
286 """Obtain the raw text for an object.
288 :param name: sha for the object.
289 :return: tuple with object type and object contents.
292 sha = hex_to_sha(name)
294 elif len(name) == 20:
299 for pack in self.packs:
301 return pack.get_raw(sha)
305 hexsha = sha_to_hex(name)
306 ret = self._get_loose_object(hexsha)
308 return ret.type, ret.as_raw_string()
309 raise KeyError(hexsha)
311 def add_objects(self, objects):
312 """Add a set of objects to this object store.
314 :param objects: Iterable over objects, should support __len__.
316 if len(objects) == 0:
317 # Don't bother writing an empty pack file
319 f, commit = self.add_pack()
320 write_pack_data(f, objects, len(objects))
324 class DiskObjectStore(PackBasedObjectStore):
325 """Git-style object store that exists on disk."""
327 def __init__(self, path):
328 """Open an object store.
330 :param path: Path of the object store.
332 super(DiskObjectStore, self).__init__()
334 self.pack_dir = os.path.join(self.path, PACKDIR)
335 self._pack_cache_time = 0
337 def _load_packs(self):
340 self._pack_cache_time = os.stat(self.pack_dir).st_mtime
341 pack_dir_contents = os.listdir(self.pack_dir)
342 for name in pack_dir_contents:
343 # TODO: verify that idx exists first
344 if name.startswith("pack-") and name.endswith(".pack"):
345 filename = os.path.join(self.pack_dir, name)
346 pack_files.append((os.stat(filename).st_mtime, filename))
348 if e.errno == errno.ENOENT:
351 pack_files.sort(reverse=True)
352 suffix_len = len(".pack")
353 return [Pack(f[:-suffix_len]) for _, f in pack_files]
355 def _pack_cache_stale(self):
357 return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
359 if e.errno == errno.ENOENT:
363 def _get_shafile_path(self, sha):
366 # Check from object dir
367 return os.path.join(self.path, dir, file)
369 def _iter_loose_objects(self):
370 for base in os.listdir(self.path):
373 for rest in os.listdir(os.path.join(self.path, base)):
376 def _get_loose_object(self, sha):
377 path = self._get_shafile_path(sha)
379 return ShaFile.from_file(path)
381 if e.errno == errno.ENOENT:
385 def move_in_thin_pack(self, path):
386 """Move a specific file containing a pack into the pack directory.
388 :note: The file should be on the same file system as the
391 :param path: Path to the pack file.
393 data = PackData(path)
395 # Write index for the thin pack (do we really need this?)
396 temppath = os.path.join(self.pack_dir,
397 sha_to_hex(urllib2.randombytes(20))+".tempidx")
398 data.create_index_v2(temppath, self.get_raw)
399 p = Pack.from_objects(data, load_pack_index(temppath))
401 # Write a full pack version
402 temppath = os.path.join(self.pack_dir,
403 sha_to_hex(urllib2.randombytes(20))+".temppack")
404 write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
406 pack_sha = load_pack_index(temppath+".idx").objects_sha1()
407 newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
408 os.rename(temppath+".pack", newbasename+".pack")
409 os.rename(temppath+".idx", newbasename+".idx")
410 self._add_known_pack(Pack(newbasename))
412 def move_in_pack(self, path):
413 """Move a specific file containing a pack into the pack directory.
415 :note: The file should be on the same file system as the
418 :param path: Path to the pack file.
421 entries = p.sorted_entries()
422 basename = os.path.join(self.pack_dir,
423 "pack-%s" % iter_sha1(entry[0] for entry in entries))
424 write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
426 os.rename(path, basename + ".pack")
427 self._add_known_pack(Pack(basename))
429 def add_thin_pack(self):
430 """Add a new thin pack to this object store.
432 Thin packs are packs that contain deltas with parents that exist
435 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
436 f = os.fdopen(fd, 'wb')
440 if os.path.getsize(path) > 0:
441 self.move_in_thin_pack(path)
445 """Add a new pack to this object store.
447 :return: Fileobject to write to and a commit function to
448 call when the pack is finished.
450 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
451 f = os.fdopen(fd, 'wb')
455 if os.path.getsize(path) > 0:
456 self.move_in_pack(path)
459 def add_object(self, obj):
460 """Add a single object to this object store.
462 :param obj: Object to add
464 dir = os.path.join(self.path, obj.id[:2])
468 if e.errno != errno.EEXIST:
470 path = os.path.join(dir, obj.id[2:])
471 if os.path.exists(path):
472 return # Already there, no need to write again
473 f = GitFile(path, 'wb')
475 f.write(obj.as_legacy_object())
480 class MemoryObjectStore(BaseObjectStore):
481 """Object store that keeps all objects in memory."""
484 super(MemoryObjectStore, self).__init__()
487 def contains_loose(self, sha):
488 """Check if a particular object is present by SHA1 and is loose."""
489 return sha in self._data
491 def contains_packed(self, sha):
492 """Check if a particular object is present by SHA1 and is packed."""
496 """Iterate over the SHAs that are present in this store."""
497 return self._data.iterkeys()
501 """List with pack objects."""
504 def get_raw(self, name):
505 """Obtain the raw text for an object.
507 :param name: sha for the object.
508 :return: tuple with object type and object contents.
510 return self[name].as_raw_string()
512 def __getitem__(self, name):
513 return self._data[name]
515 def add_object(self, obj):
516 """Add a single object to this object store.
519 self._data[obj.id] = obj
521 def add_objects(self, objects):
522 """Add a set of objects to this object store.
524 :param objects: Iterable over a list of objects.
526 for obj, path in objects:
527 self._data[obj.id] = obj
530 class ObjectImporter(object):
531 """Interface for importing objects."""
533 def __init__(self, count):
534 """Create a new ObjectImporter.
536 :param count: Number of objects that's going to be imported.
540 def add_object(self, object):
542 raise NotImplementedError(self.add_object)
544 def finish(self, object):
545 """Finish the imoprt and write objects to disk."""
546 raise NotImplementedError(self.finish)
549 class ObjectIterator(object):
550 """Interface for iterating over objects."""
552 def iterobjects(self):
553 raise NotImplementedError(self.iterobjects)
556 class ObjectStoreIterator(ObjectIterator):
557 """ObjectIterator that works on top of an ObjectStore."""
559 def __init__(self, store, sha_iter):
560 """Create a new ObjectIterator.
562 :param store: Object store to retrieve from
563 :param sha_iter: Iterator over (sha, path) tuples
566 self.sha_iter = sha_iter
570 """Yield tuple with next object and path."""
571 for sha, path in self.itershas():
572 yield self.store[sha], path
574 def iterobjects(self):
575 """Iterate over just the objects."""
580 """Iterate over the SHAs."""
581 for sha in self._shas:
583 for sha in self.sha_iter:
584 self._shas.append(sha)
587 def __contains__(self, needle):
588 """Check if an object is present.
590 :note: This checks if the object is present in
591 the underlying object store, not if it would
592 be yielded by the iterator.
594 :param needle: SHA1 of the object to check for
596 return needle in self.store
598 def __getitem__(self, key):
599 """Find an object by SHA1.
601 :note: This retrieves the object from the underlying
602 object store. It will also succeed if the object would
603 not be returned by the iterator.
605 return self.store[key]
608 """Return the number of objects."""
609 return len(list(self.itershas()))
612 def tree_lookup_path(lookup_obj, root_sha, path):
613 """Lookup an object in a Git tree.
615 :param lookup_obj: Callback for retrieving object by SHA1
616 :param root_sha: SHA1 of the root tree
617 :param path: Path to lookup
619 parts = path.split("/")
623 obj = lookup_obj(sha)
624 if type(obj) is not Tree:
625 raise NotTreeError(sha)
632 class MissingObjectFinder(object):
633 """Find the objects missing from another object store.
635 :param object_store: Object store containing at least all objects to be
637 :param haves: SHA1s of commits not to send (already present in target)
638 :param wants: SHA1s of commits to send
639 :param progress: Optional function to report progress to.
640 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
641 sha for including tags.
642 :param tagged: dict of pointed-to sha -> tag sha for including tags
645 def __init__(self, object_store, haves, wants, progress=None,
647 self.sha_done = set(haves)
648 self.objects_to_send = set([(w, None, False) for w in wants if w not in haves])
649 self.object_store = object_store
651 self.progress = lambda x: None
653 self.progress = progress
654 self._tagged = get_tagged and get_tagged() or {}
656 def add_todo(self, entries):
657 self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
659 def parse_tree(self, tree):
660 self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries() if not S_ISGITLINK(mode)])
662 def parse_commit(self, commit):
663 self.add_todo([(commit.tree, "", False)])
664 self.add_todo([(p, None, False) for p in commit.parents])
666 def parse_tag(self, tag):
667 self.add_todo([(tag.object[1], None, False)])
670 if not self.objects_to_send:
672 (sha, name, leaf) = self.objects_to_send.pop()
674 o = self.object_store[sha]
675 if isinstance(o, Commit):
677 elif isinstance(o, Tree):
679 elif isinstance(o, Tag):
681 if sha in self._tagged:
682 self.add_todo([(self._tagged[sha], None, True)])
683 self.sha_done.add(sha)
684 self.progress("counting objects: %d\r" % len(self.sha_done))
688 class ObjectStoreGraphWalker(object):
689 """Graph walker that finds out what commits are missing from an object
692 :ivar heads: Revisions without descendants in the local repo
693 :ivar get_parents: Function to retrieve parents in the local repo
696 def __init__(self, local_heads, get_parents):
697 """Create a new instance.
699 :param local_heads: Heads to start search with
700 :param get_parents: Function for finding the parents of a SHA1.
702 self.heads = set(local_heads)
703 self.get_parents = get_parents
707 """Ack that a revision and its ancestors are present in the source."""
708 if sha in self.heads:
709 self.heads.remove(sha)
710 if sha in self.parents:
711 for p in self.parents[sha]:
715 """Iterate over ancestors of heads in the target."""
717 ret = self.heads.pop()
718 ps = self.get_parents(ret)
719 self.parents[ret] = ps
720 self.heads.update(ps)