1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # or (at your option) a later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git object store interfaces and implementation."""
29 from dulwich.errors import (
32 from dulwich.objects import (
41 from dulwich.pack import (
54 class BaseObjectStore(object):
55 """Object store interface."""
57 def determine_wants_all(self, refs):
58 return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
60 def iter_shas(self, shas):
61 """Iterate over the objects for the specified shas.
63 :param shas: Iterable object with SHAs
64 :return: Object iterator
66 return ObjectStoreIterator(self, shas)
68 def __contains__(self, sha):
69 """Check if a particular object is present by SHA1."""
70 raise NotImplementedError(self.__contains__)
72 def get_raw(self, name):
73 """Obtain the raw text for an object.
75 :param name: sha for the object.
76 :return: tuple with object type and object contents.
78 raise NotImplementedError(self.get_raw)
80 def __getitem__(self, sha):
81 """Obtain an object by SHA1."""
82 type, uncomp = self.get_raw(sha)
83 return ShaFile.from_raw_string(type, uncomp)
86 """Iterate over the SHAs that are present in this store."""
87 raise NotImplementedError(self.__iter__)
89 def add_object(self, obj):
90 """Add a single object to this object store.
93 raise NotImplementedError(self.add_object)
95 def add_objects(self, objects):
96 """Add a set of objects to this object store.
98 :param objects: Iterable over a list of objects.
100 raise NotImplementedError(self.add_objects)
102 def find_missing_objects(self, haves, wants, progress=None):
103 """Find the missing objects required for a set of revisions.
105 :param haves: Iterable over SHAs already in common.
106 :param wants: Iterable over SHAs of objects to fetch.
107 :param progress: Simple progress function that will be called with
108 updated progress strings.
109 :return: Iterator over (sha, path) pairs.
111 return iter(MissingObjectFinder(self, haves, wants, progress).next, None)
113 def find_common_revisions(self, graphwalker):
114 """Find which revisions this store has in common using graphwalker.
116 :param graphwalker: A graphwalker object.
117 :return: List of SHAs that are in common
120 sha = graphwalker.next()
125 sha = graphwalker.next()
128 def get_graph_walker(self, heads):
129 """Obtain a graph walker for this object store.
131 :param heads: Local heads to start search with
132 :return: GraphWalker object
134 return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
136 def generate_pack_contents(self, have, want):
137 """Iterate over the contents of a pack file.
139 :param have: List of SHA1s of objects that should not be sent
140 :param want: List of SHA1s of objects that should be sent
142 return self.iter_shas(self.find_missing_objects(have, want))
145 class DiskObjectStore(BaseObjectStore):
146 """Git-style object store that exists on disk."""
148 def __init__(self, path):
149 """Open an object store.
151 :param path: Path of the object store.
154 self._pack_cache = None
155 self.pack_dir = os.path.join(self.path, PACKDIR)
157 def __contains__(self, sha):
158 """Check if a particular object is present by SHA1."""
159 for pack in self.packs:
162 ret = self._get_shafile(sha)
168 """Iterate over the SHAs that are present in this store."""
169 iterables = self.packs + [self._iter_shafile_shas()]
170 return itertools.chain(*iterables)
174 """List with pack objects."""
175 if self._pack_cache is None:
176 self._pack_cache = list(self._load_packs())
177 return self._pack_cache
179 def _load_packs(self):
180 if not os.path.exists(self.pack_dir):
182 for name in os.listdir(self.pack_dir):
183 if name.startswith("pack-") and name.endswith(".pack"):
184 yield Pack(os.path.join(self.pack_dir, name[:-len(".pack")]))
186 def _add_known_pack(self, path):
187 """Add a newly appeared pack to the cache by path.
190 if self._pack_cache is not None:
191 self._pack_cache.append(Pack(path))
193 def _get_shafile_path(self, sha):
196 # Check from object dir
197 return os.path.join(self.path, dir, file)
199 def _iter_shafile_shas(self):
200 for base in os.listdir(self.path):
203 for rest in os.listdir(os.path.join(self.path, base)):
206 def _get_shafile(self, sha):
207 path = self._get_shafile_path(sha)
208 if os.path.exists(path):
209 return ShaFile.from_file(path)
212 def _add_shafile(self, sha, o):
213 dir = os.path.join(self.path, sha[:2])
214 if not os.path.isdir(dir):
216 path = os.path.join(dir, sha[2:])
217 if os.path.exists(path):
218 return # Already there, no need to write again
221 f.write(o.as_legacy_object())
225 def get_raw(self, name):
226 """Obtain the raw text for an object.
228 :param name: sha for the object.
229 :return: tuple with object type and object contents.
232 sha = hex_to_sha(name)
234 elif len(name) == 20:
239 for pack in self.packs:
241 return pack.get_raw(sha)
245 hexsha = sha_to_hex(name)
246 ret = self._get_shafile(hexsha)
248 return ret.type, ret.as_raw_string()
249 raise KeyError(hexsha)
251 def move_in_thin_pack(self, path):
252 """Move a specific file containing a pack into the pack directory.
254 :note: The file should be on the same file system as the
257 :param path: Path to the pack file.
259 data = PackData(path)
261 # Write index for the thin pack (do we really need this?)
262 temppath = os.path.join(self.pack_dir,
263 sha_to_hex(urllib2.randombytes(20))+".tempidx")
264 data.create_index_v2(temppath, self.get_raw)
265 p = Pack.from_objects(data, load_pack_index(temppath))
267 # Write a full pack version
268 temppath = os.path.join(self.pack_dir,
269 sha_to_hex(urllib2.randombytes(20))+".temppack")
270 write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
272 pack_sha = load_pack_index(temppath+".idx").objects_sha1()
273 newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
274 os.rename(temppath+".pack", newbasename+".pack")
275 os.rename(temppath+".idx", newbasename+".idx")
276 self._add_known_pack(newbasename)
278 def move_in_pack(self, path):
279 """Move a specific file containing a pack into the pack directory.
281 :note: The file should be on the same file system as the
284 :param path: Path to the pack file.
287 entries = p.sorted_entries()
288 basename = os.path.join(self.pack_dir,
289 "pack-%s" % iter_sha1(entry[0] for entry in entries))
290 write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
291 os.rename(path, basename + ".pack")
292 self._add_known_pack(basename)
294 def add_thin_pack(self):
295 """Add a new thin pack to this object store.
297 Thin packs are packs that contain deltas with parents that exist
300 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
301 f = os.fdopen(fd, 'wb')
305 if os.path.getsize(path) > 0:
306 self.move_in_thin_pack(path)
310 """Add a new pack to this object store.
312 :return: Fileobject to write to and a commit function to
313 call when the pack is finished.
315 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
316 f = os.fdopen(fd, 'wb')
320 if os.path.getsize(path) > 0:
321 self.move_in_pack(path)
324 def add_object(self, obj):
325 """Add a single object to this object store.
327 :param obj: Object to add
329 self._add_shafile(obj.id, obj)
331 def add_objects(self, objects):
332 """Add a set of objects to this object store.
334 :param objects: Iterable over objects, should support __len__.
336 if len(objects) == 0:
337 # Don't bother writing an empty pack file
339 f, commit = self.add_pack()
340 write_pack_data(f, objects, len(objects))
344 class MemoryObjectStore(BaseObjectStore):
345 """Object store that keeps all objects in memory."""
348 super(MemoryObjectStore, self).__init__()
351 def __contains__(self, sha):
352 """Check if the object with a particular SHA is present."""
353 return sha in self._data
356 """Iterate over the SHAs that are present in this store."""
357 return self._data.iterkeys()
359 def get_raw(self, name):
360 """Obtain the raw text for an object.
362 :param name: sha for the object.
363 :return: tuple with object type and object contents.
365 return self[name].as_raw_string()
367 def __getitem__(self, name):
368 return self._data[name]
370 def add_object(self, obj):
371 """Add a single object to this object store.
374 self._data[obj.id] = obj
376 def add_objects(self, objects):
377 """Add a set of objects to this object store.
379 :param objects: Iterable over a list of objects.
381 for obj, path in objects:
382 self._data[obj.id] = obj
385 class ObjectImporter(object):
386 """Interface for importing objects."""
388 def __init__(self, count):
389 """Create a new ObjectImporter.
391 :param count: Number of objects that's going to be imported.
395 def add_object(self, object):
397 raise NotImplementedError(self.add_object)
399 def finish(self, object):
400 """Finish the imoprt and write objects to disk."""
401 raise NotImplementedError(self.finish)
404 class ObjectIterator(object):
405 """Interface for iterating over objects."""
407 def iterobjects(self):
408 raise NotImplementedError(self.iterobjects)
411 class ObjectStoreIterator(ObjectIterator):
412 """ObjectIterator that works on top of an ObjectStore."""
414 def __init__(self, store, sha_iter):
415 """Create a new ObjectIterator.
417 :param store: Object store to retrieve from
418 :param sha_iter: Iterator over (sha, path) tuples
421 self.sha_iter = sha_iter
425 """Yield tuple with next object and path."""
426 for sha, path in self.itershas():
427 yield self.store[sha], path
429 def iterobjects(self):
430 """Iterate over just the objects."""
435 """Iterate over the SHAs."""
436 for sha in self._shas:
438 for sha in self.sha_iter:
439 self._shas.append(sha)
442 def __contains__(self, needle):
443 """Check if an object is present.
445 :note: This checks if the object is present in
446 the underlying object store, not if it would
447 be yielded by the iterator.
449 :param needle: SHA1 of the object to check for
451 return needle in self.store
453 def __getitem__(self, key):
454 """Find an object by SHA1.
456 :note: This retrieves the object from the underlying
457 object store. It will also succeed if the object would
458 not be returned by the iterator.
460 return self.store[key]
463 """Return the number of objects."""
464 return len(list(self.itershas()))
467 def tree_lookup_path(lookup_obj, root_sha, path):
468 """Lookup an object in a Git tree.
470 :param lookup_obj: Callback for retrieving object by SHA1
471 :param root_sha: SHA1 of the root tree
472 :param path: Path to lookup
474 parts = path.split("/")
477 obj = lookup_obj(sha)
478 if type(obj) is not Tree:
479 raise NotTreeError(sha)
486 class MissingObjectFinder(object):
487 """Find the objects missing from another object store.
489 :param object_store: Object store containing at least all objects to be
491 :param haves: SHA1s of commits not to send (already present in target)
492 :param wants: SHA1s of commits to send
493 :param progress: Optional function to report progress to.
496 def __init__(self, object_store, haves, wants, progress=None):
497 self.sha_done = set(haves)
498 self.objects_to_send = set([(w, None, False) for w in wants if w not in haves])
499 self.object_store = object_store
501 self.progress = lambda x: None
503 self.progress = progress
505 def add_todo(self, entries):
506 self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
508 def parse_tree(self, tree):
509 self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries() if not S_ISGITLINK(mode)])
511 def parse_commit(self, commit):
512 self.add_todo([(commit.tree, "", False)])
513 self.add_todo([(p, None, False) for p in commit.parents])
515 def parse_tag(self, tag):
516 self.add_todo([(tag.object[1], None, False)])
519 if not self.objects_to_send:
521 (sha, name, leaf) = self.objects_to_send.pop()
523 o = self.object_store[sha]
524 if isinstance(o, Commit):
526 elif isinstance(o, Tree):
528 elif isinstance(o, Tag):
530 self.sha_done.add(sha)
531 self.progress("counting objects: %d\r" % len(self.sha_done))
535 class ObjectStoreGraphWalker(object):
536 """Graph walker that finds out what commits are missing from an object store."""
538 def __init__(self, local_heads, get_parents):
539 """Create a new instance.
541 :param local_heads: Heads to start search with
542 :param get_parents: Function for finding the parents of a SHA1.
544 self.heads = set(local_heads)
545 self.get_parents = get_parents
549 """Ack that a particular revision and its ancestors are present in the source."""
550 if sha in self.heads:
551 self.heads.remove(sha)
552 if sha in self.parents:
553 for p in self.parents[sha]:
557 """Iterate over ancestors of heads in the target."""
559 ret = self.heads.pop()
560 ps = self.get_parents(ret)
561 self.parents[ret] = ps
562 self.heads.update(ps)