1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # or (at your option) a later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git object store interfaces and implementation."""
30 from dulwich.errors import (
33 from dulwich.file import GitFile
34 from dulwich.objects import (
43 from dulwich.pack import (
56 class BaseObjectStore(object):
57 """Object store interface."""
59 def determine_wants_all(self, refs):
60 return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
62 def iter_shas(self, shas):
63 """Iterate over the objects for the specified shas.
65 :param shas: Iterable object with SHAs
66 :return: Object iterator
68 return ObjectStoreIterator(self, shas)
70 def contains_loose(self, sha):
71 """Check if a particular object is present by SHA1 and is loose."""
72 raise NotImplementedError(self.contains_loose)
74 def contains_packed(self, sha):
75 """Check if a particular object is present by SHA1 and is packed."""
76 raise NotImplementedError(self.contains_packed)
78 def __contains__(self, sha):
79 """Check if a particular object is present by SHA1.
81 This method makes no distinction between loose and packed objects.
83 return self.contains_packed(sha) or self.contains_loose(sha)
87 """Iterable of pack objects."""
88 raise NotImplementedError
90 def get_raw(self, name):
91 """Obtain the raw text for an object.
93 :param name: sha for the object.
94 :return: tuple with object type and object contents.
96 raise NotImplementedError(self.get_raw)
98 def __getitem__(self, sha):
99 """Obtain an object by SHA1."""
100 type, uncomp = self.get_raw(sha)
101 return ShaFile.from_raw_string(type, uncomp)
104 """Iterate over the SHAs that are present in this store."""
105 raise NotImplementedError(self.__iter__)
107 def add_object(self, obj):
108 """Add a single object to this object store.
111 raise NotImplementedError(self.add_object)
113 def add_objects(self, objects):
114 """Add a set of objects to this object store.
116 :param objects: Iterable over a list of objects.
118 raise NotImplementedError(self.add_objects)
120 def tree_changes(self, source, target, want_unchanged=False):
121 """Find the differences between the contents of two trees
123 :param object_store: Object store to use for retrieving tree contents
124 :param tree: SHA1 of the root tree
125 :param want_unchanged: Whether unchanged files should be reported
126 :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
128 todo = set([(source, target, "")])
130 (sid, tid, path) = todo.pop()
139 for name, oldmode, oldhexsha in stree.iteritems():
143 oldchildpath = "%s/%s" % (path, name)
145 (newmode, newhexsha) = ttree[name]
146 newchildpath = oldchildpath
151 if (want_unchanged or oldmode != newmode or
152 oldhexsha != newhexsha):
153 if stat.S_ISDIR(oldmode):
154 if newmode is None or stat.S_ISDIR(newmode):
155 todo.add((oldhexsha, newhexsha, oldchildpath))
157 # entry became a file
158 todo.add((oldhexsha, None, oldchildpath))
159 yield ((None, newchildpath), (None, newmode), (None, newhexsha))
161 if newmode is not None and stat.S_ISDIR(newmode):
163 yield ((oldchildpath, None), (oldmode, None), (oldhexsha, None))
164 todo.add((None, newhexsha, newchildpath))
166 yield ((oldchildpath, newchildpath), (oldmode, newmode), (oldhexsha, newhexsha))
168 for name, newmode, newhexsha in ttree.iteritems():
172 childpath = "%s/%s" % (path, name)
173 if not name in stree:
174 if not stat.S_ISDIR(newmode):
175 yield ((None, childpath), (None, newmode), (None, newhexsha))
177 todo.add((None, newhexsha, childpath))
179 def iter_tree_contents(self, tree):
180 """Yield (path, mode, hexsha) tuples for all non-Tree objects in a tree.
182 :param tree: SHA1 of the root of the tree
184 todo = set([(tree, "")])
186 (tid, tpath) = todo.pop()
188 for name, mode, hexsha in tree.iteritems():
192 path = "%s/%s" % (tpath, name)
193 if stat.S_ISDIR(mode):
194 todo.add((hexsha, path))
196 yield path, mode, hexsha
198 def find_missing_objects(self, haves, wants, progress=None):
199 """Find the missing objects required for a set of revisions.
201 :param haves: Iterable over SHAs already in common.
202 :param wants: Iterable over SHAs of objects to fetch.
203 :param progress: Simple progress function that will be called with
204 updated progress strings.
205 :return: Iterator over (sha, path) pairs.
207 return iter(MissingObjectFinder(self, haves, wants, progress).next, None)
209 def find_common_revisions(self, graphwalker):
210 """Find which revisions this store has in common using graphwalker.
212 :param graphwalker: A graphwalker object.
213 :return: List of SHAs that are in common
216 sha = graphwalker.next()
221 sha = graphwalker.next()
224 def get_graph_walker(self, heads):
225 """Obtain a graph walker for this object store.
227 :param heads: Local heads to start search with
228 :return: GraphWalker object
230 return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
232 def generate_pack_contents(self, have, want):
233 """Iterate over the contents of a pack file.
235 :param have: List of SHA1s of objects that should not be sent
236 :param want: List of SHA1s of objects that should be sent
238 return self.iter_shas(self.find_missing_objects(have, want))
241 class DiskObjectStore(BaseObjectStore):
242 """Git-style object store that exists on disk."""
244 def __init__(self, path):
245 """Open an object store.
247 :param path: Path of the object store.
250 self._pack_cache = None
251 self.pack_dir = os.path.join(self.path, PACKDIR)
253 def contains_loose(self, sha):
254 """Check if a particular object is present by SHA1 and is loose."""
255 return self._get_shafile(sha) is not None
257 def contains_packed(self, sha):
258 """Check if a particular object is present by SHA1 and is packed."""
259 for pack in self.packs:
265 """Iterate over the SHAs that are present in this store."""
266 iterables = self.packs + [self._iter_shafile_shas()]
267 return itertools.chain(*iterables)
271 """List with pack objects."""
272 if self._pack_cache is None:
273 self._pack_cache = self._load_packs()
274 return self._pack_cache
276 def _load_packs(self):
277 if not os.path.exists(self.pack_dir):
280 for name in os.listdir(self.pack_dir):
281 # TODO: verify that idx exists first
282 if name.startswith("pack-") and name.endswith(".pack"):
283 filename = os.path.join(self.pack_dir, name)
284 pack_files.append((os.stat(filename).st_mtime, filename))
285 pack_files.sort(reverse=True)
286 suffix_len = len(".pack")
287 return [Pack(f[:-suffix_len]) for _, f in pack_files]
289 def _add_known_pack(self, path):
290 """Add a newly appeared pack to the cache by path.
293 if self._pack_cache is not None:
294 self._pack_cache.append(Pack(path))
296 def _get_shafile_path(self, sha):
299 # Check from object dir
300 return os.path.join(self.path, dir, file)
302 def _iter_shafile_shas(self):
303 for base in os.listdir(self.path):
306 for rest in os.listdir(os.path.join(self.path, base)):
309 def _get_shafile(self, sha):
310 path = self._get_shafile_path(sha)
311 if os.path.exists(path):
312 return ShaFile.from_file(path)
315 def _add_shafile(self, sha, o):
316 dir = os.path.join(self.path, sha[:2])
317 if not os.path.isdir(dir):
319 path = os.path.join(dir, sha[2:])
320 if os.path.exists(path):
321 return # Already there, no need to write again
322 f = GitFile(path, 'wb')
324 f.write(o.as_legacy_object())
328 def get_raw(self, name):
329 """Obtain the raw text for an object.
331 :param name: sha for the object.
332 :return: tuple with object type and object contents.
335 sha = hex_to_sha(name)
337 elif len(name) == 20:
342 for pack in self.packs:
344 return pack.get_raw(sha)
348 hexsha = sha_to_hex(name)
349 ret = self._get_shafile(hexsha)
351 return ret.type, ret.as_raw_string()
352 raise KeyError(hexsha)
354 def move_in_thin_pack(self, path):
355 """Move a specific file containing a pack into the pack directory.
357 :note: The file should be on the same file system as the
360 :param path: Path to the pack file.
362 data = PackData(path)
364 # Write index for the thin pack (do we really need this?)
365 temppath = os.path.join(self.pack_dir,
366 sha_to_hex(urllib2.randombytes(20))+".tempidx")
367 data.create_index_v2(temppath, self.get_raw)
368 p = Pack.from_objects(data, load_pack_index(temppath))
370 # Write a full pack version
371 temppath = os.path.join(self.pack_dir,
372 sha_to_hex(urllib2.randombytes(20))+".temppack")
373 write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
375 pack_sha = load_pack_index(temppath+".idx").objects_sha1()
376 newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
377 os.rename(temppath+".pack", newbasename+".pack")
378 os.rename(temppath+".idx", newbasename+".idx")
379 self._add_known_pack(newbasename)
381 def move_in_pack(self, path):
382 """Move a specific file containing a pack into the pack directory.
384 :note: The file should be on the same file system as the
387 :param path: Path to the pack file.
390 entries = p.sorted_entries()
391 basename = os.path.join(self.pack_dir,
392 "pack-%s" % iter_sha1(entry[0] for entry in entries))
393 write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
395 os.rename(path, basename + ".pack")
396 self._add_known_pack(basename)
398 def add_thin_pack(self):
399 """Add a new thin pack to this object store.
401 Thin packs are packs that contain deltas with parents that exist
404 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
405 f = os.fdopen(fd, 'wb')
409 if os.path.getsize(path) > 0:
410 self.move_in_thin_pack(path)
414 """Add a new pack to this object store.
416 :return: Fileobject to write to and a commit function to
417 call when the pack is finished.
419 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
420 f = os.fdopen(fd, 'wb')
424 if os.path.getsize(path) > 0:
425 self.move_in_pack(path)
428 def add_object(self, obj):
429 """Add a single object to this object store.
431 :param obj: Object to add
433 self._add_shafile(obj.id, obj)
435 def add_objects(self, objects):
436 """Add a set of objects to this object store.
438 :param objects: Iterable over objects, should support __len__.
440 if len(objects) == 0:
441 # Don't bother writing an empty pack file
443 f, commit = self.add_pack()
444 write_pack_data(f, objects, len(objects))
448 class MemoryObjectStore(BaseObjectStore):
449 """Object store that keeps all objects in memory."""
452 super(MemoryObjectStore, self).__init__()
455 def contains_loose(self, sha):
456 """Check if a particular object is present by SHA1 and is loose."""
457 return sha in self._data
459 def contains_packed(self, sha):
460 """Check if a particular object is present by SHA1 and is packed."""
464 """Iterate over the SHAs that are present in this store."""
465 return self._data.iterkeys()
469 """List with pack objects."""
472 def get_raw(self, name):
473 """Obtain the raw text for an object.
475 :param name: sha for the object.
476 :return: tuple with object type and object contents.
478 return self[name].as_raw_string()
480 def __getitem__(self, name):
481 return self._data[name]
483 def add_object(self, obj):
484 """Add a single object to this object store.
487 self._data[obj.id] = obj
489 def add_objects(self, objects):
490 """Add a set of objects to this object store.
492 :param objects: Iterable over a list of objects.
494 for obj, path in objects:
495 self._data[obj.id] = obj
498 class ObjectImporter(object):
499 """Interface for importing objects."""
501 def __init__(self, count):
502 """Create a new ObjectImporter.
504 :param count: Number of objects that's going to be imported.
508 def add_object(self, object):
510 raise NotImplementedError(self.add_object)
512 def finish(self, object):
513 """Finish the imoprt and write objects to disk."""
514 raise NotImplementedError(self.finish)
517 class ObjectIterator(object):
518 """Interface for iterating over objects."""
520 def iterobjects(self):
521 raise NotImplementedError(self.iterobjects)
524 class ObjectStoreIterator(ObjectIterator):
525 """ObjectIterator that works on top of an ObjectStore."""
527 def __init__(self, store, sha_iter):
528 """Create a new ObjectIterator.
530 :param store: Object store to retrieve from
531 :param sha_iter: Iterator over (sha, path) tuples
534 self.sha_iter = sha_iter
538 """Yield tuple with next object and path."""
539 for sha, path in self.itershas():
540 yield self.store[sha], path
542 def iterobjects(self):
543 """Iterate over just the objects."""
548 """Iterate over the SHAs."""
549 for sha in self._shas:
551 for sha in self.sha_iter:
552 self._shas.append(sha)
555 def __contains__(self, needle):
556 """Check if an object is present.
558 :note: This checks if the object is present in
559 the underlying object store, not if it would
560 be yielded by the iterator.
562 :param needle: SHA1 of the object to check for
564 return needle in self.store
566 def __getitem__(self, key):
567 """Find an object by SHA1.
569 :note: This retrieves the object from the underlying
570 object store. It will also succeed if the object would
571 not be returned by the iterator.
573 return self.store[key]
576 """Return the number of objects."""
577 return len(list(self.itershas()))
580 def tree_lookup_path(lookup_obj, root_sha, path):
581 """Lookup an object in a Git tree.
583 :param lookup_obj: Callback for retrieving object by SHA1
584 :param root_sha: SHA1 of the root tree
585 :param path: Path to lookup
587 parts = path.split("/")
590 obj = lookup_obj(sha)
591 if type(obj) is not Tree:
592 raise NotTreeError(sha)
599 class MissingObjectFinder(object):
600 """Find the objects missing from another object store.
602 :param object_store: Object store containing at least all objects to be
604 :param haves: SHA1s of commits not to send (already present in target)
605 :param wants: SHA1s of commits to send
606 :param progress: Optional function to report progress to.
609 def __init__(self, object_store, haves, wants, progress=None):
610 self.sha_done = set(haves)
611 self.objects_to_send = set([(w, None, False) for w in wants if w not in haves])
612 self.object_store = object_store
614 self.progress = lambda x: None
616 self.progress = progress
618 def add_todo(self, entries):
619 self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
621 def parse_tree(self, tree):
622 self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries() if not S_ISGITLINK(mode)])
624 def parse_commit(self, commit):
625 self.add_todo([(commit.tree, "", False)])
626 self.add_todo([(p, None, False) for p in commit.parents])
628 def parse_tag(self, tag):
629 self.add_todo([(tag.object[1], None, False)])
632 if not self.objects_to_send:
634 (sha, name, leaf) = self.objects_to_send.pop()
636 o = self.object_store[sha]
637 if isinstance(o, Commit):
639 elif isinstance(o, Tree):
641 elif isinstance(o, Tag):
643 self.sha_done.add(sha)
644 self.progress("counting objects: %d\r" % len(self.sha_done))
648 class ObjectStoreGraphWalker(object):
649 """Graph walker that finds out what commits are missing from an object store."""
651 def __init__(self, local_heads, get_parents):
652 """Create a new instance.
654 :param local_heads: Heads to start search with
655 :param get_parents: Function for finding the parents of a SHA1.
657 self.heads = set(local_heads)
658 self.get_parents = get_parents
662 """Ack that a particular revision and its ancestors are present in the source."""
663 if sha in self.heads:
664 self.heads.remove(sha)
665 if sha in self.parents:
666 for p in self.parents[sha]:
670 """Iterate over ancestors of heads in the target."""
672 ret = self.heads.pop()
673 ps = self.get_parents(ret)
674 self.parents[ret] = ps
675 self.heads.update(ps)