1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # or (at your option) a later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git object store interfaces and implementation."""
29 from dulwich.errors import (
32 from dulwich.objects import (
40 from dulwich.pack import (
54 class BaseObjectStore(object):
55 """Object store interface."""
57 def determine_wants_all(self, refs):
58 return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
60 def iter_shas(self, shas):
61 """Iterate over the objects for the specified shas.
63 :param shas: Iterable object with SHAs
65 return ObjectStoreIterator(self, shas)
67 def __contains__(self, sha):
68 """Check if a particular object is present by SHA1."""
69 raise NotImplementedError(self.__contains__)
71 def get_raw(self, name):
72 """Obtain the raw text for an object.
74 :param name: sha for the object.
75 :return: tuple with object type and object contents.
77 raise NotImplementedError(self.get_raw)
79 def __getitem__(self, sha):
80 """Obtain an object by SHA1."""
81 type, uncomp = self.get_raw(sha)
82 return ShaFile.from_raw_string(type, uncomp)
85 """Iterate over the SHAs that are present in this store."""
86 raise NotImplementedError(self.__iter__)
88 def add_object(self, obj):
89 """Add a single object to this object store.
92 raise NotImplementedError(self.add_object)
94 def add_objects(self, objects):
95 """Add a set of objects to this object store.
97 :param objects: Iterable over a list of objects.
99 raise NotImplementedError(self.add_objects)
101 def find_missing_objects(self, haves, wants, progress=None):
102 """Find the missing objects required for a set of revisions.
104 :param haves: Iterable over SHAs already in common.
105 :param wants: Iterable over SHAs of objects to fetch.
106 :param progress: Simple progress function that will be called with
107 updated progress strings.
108 :return: Iterator over (sha, path) pairs.
110 return iter(MissingObjectFinder(self, haves, wants, progress).next, None)
112 def get_commit_parents(self, sha):
113 """Retrieve the parents of a commit.
115 :param sha: SHA1 of the commit
116 :return: List of parent SHA1s
118 return self[sha].parents
120 def get_graph_walker(self, heads):
121 """Obtain a graph walker for this object store.
123 :param heads: Local heads to start search with
124 :return: GraphWalker object
126 return ObjectStoreGraphWalker(heads, self.get_commit_parents)
129 class DiskObjectStore(BaseObjectStore):
130 """Git-style object store that exists on disk."""
132 def __init__(self, path):
133 """Open an object store.
135 :param path: Path of the object store.
138 self._pack_cache = None
139 self.pack_dir = os.path.join(self.path, PACKDIR)
141 def __contains__(self, sha):
142 """Check if a particular object is present by SHA1."""
143 for pack in self.packs:
146 ret = self._get_shafile(sha)
152 """Iterate over the SHAs that are present in this store."""
153 iterables = self.packs + [self._iter_shafile_shas()]
154 return itertools.chain(*iterables)
158 """List with pack objects."""
159 if self._pack_cache is None:
160 self._pack_cache = list(load_packs(self.pack_dir))
161 return self._pack_cache
163 def _add_known_pack(self, path):
164 """Add a newly appeared pack to the cache by path.
167 if self._pack_cache is not None:
168 self._pack_cache.append(Pack(path))
170 def _get_shafile_path(self, sha):
173 # Check from object dir
174 return os.path.join(self.path, dir, file)
176 def _iter_shafile_shas(self):
177 for base in os.listdir(self.path):
180 for rest in os.listdir(os.path.join(self.path, base)):
183 def _get_shafile(self, sha):
184 path = self._get_shafile_path(sha)
185 if os.path.exists(path):
186 return ShaFile.from_file(path)
189 def _add_shafile(self, sha, o):
190 dir = os.path.join(self.path, sha[:2])
191 if not os.path.isdir(dir):
193 path = os.path.join(dir, sha[2:])
196 f.write(o.as_legacy_object())
200 def get_raw(self, name):
201 """Obtain the raw text for an object.
203 :param name: sha for the object.
204 :return: tuple with object type and object contents.
207 sha = hex_to_sha(name)
209 elif len(name) == 20:
214 for pack in self.packs:
216 return pack.get_raw(sha)
220 hexsha = sha_to_hex(name)
221 ret = self._get_shafile(hexsha)
223 return ret.type, ret.as_raw_string()
224 raise KeyError(hexsha)
226 def move_in_thin_pack(self, path):
227 """Move a specific file containing a pack into the pack directory.
229 :note: The file should be on the same file system as the
232 :param path: Path to the pack file.
234 data = PackData(path)
236 # Write index for the thin pack (do we really need this?)
237 temppath = os.path.join(self.pack_dir,
238 sha_to_hex(urllib2.randombytes(20))+".tempidx")
239 data.create_index_v2(temppath, self.get_raw)
240 p = Pack.from_objects(data, load_pack_index(temppath))
242 # Write a full pack version
243 temppath = os.path.join(self.pack_dir,
244 sha_to_hex(urllib2.randombytes(20))+".temppack")
245 write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
247 pack_sha = load_pack_index(temppath+".idx").objects_sha1()
248 newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
249 os.rename(temppath+".pack", newbasename+".pack")
250 os.rename(temppath+".idx", newbasename+".idx")
251 self._add_known_pack(newbasename)
253 def move_in_pack(self, path):
254 """Move a specific file containing a pack into the pack directory.
256 :note: The file should be on the same file system as the
259 :param path: Path to the pack file.
262 entries = p.sorted_entries()
263 basename = os.path.join(self.pack_dir,
264 "pack-%s" % iter_sha1(entry[0] for entry in entries))
265 write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
266 os.rename(path, basename + ".pack")
267 self._add_known_pack(basename)
269 def add_thin_pack(self):
270 """Add a new thin pack to this object store.
272 Thin packs are packs that contain deltas with parents that exist
275 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
276 f = os.fdopen(fd, 'w')
280 if os.path.getsize(path) > 0:
281 self.move_in_thin_pack(path)
285 """Add a new pack to this object store.
287 :return: Fileobject to write to and a commit function to
288 call when the pack is finished.
290 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
291 f = os.fdopen(fd, 'w')
295 if os.path.getsize(path) > 0:
296 self.move_in_pack(path)
299 def add_object(self, obj):
300 """Add a single object to this object store.
303 self._add_shafile(obj.id, obj)
305 def add_objects(self, objects):
306 """Add a set of objects to this object store.
308 :param objects: Iterable over a list of objects.
310 if len(objects) == 0:
312 f, commit = self.add_pack()
313 write_pack_data(f, objects, len(objects))
317 class MemoryObjectStore(BaseObjectStore):
320 super(MemoryObjectStore, self).__init__()
323 def __contains__(self, sha):
324 return sha in self._data
327 """Iterate over the SHAs that are present in this store."""
328 return self._data.iterkeys()
330 def get_raw(self, name):
331 """Obtain the raw text for an object.
333 :param name: sha for the object.
334 :return: tuple with object type and object contents.
336 return self[name].as_raw_string()
338 def __getitem__(self, name):
339 return self._data[name]
341 def add_object(self, obj):
342 """Add a single object to this object store.
345 self._data[obj.id] = obj
347 def add_objects(self, objects):
348 """Add a set of objects to this object store.
350 :param objects: Iterable over a list of objects.
353 self._data[obj.id] = obj
356 class ObjectImporter(object):
357 """Interface for importing objects."""
359 def __init__(self, count):
360 """Create a new ObjectImporter.
362 :param count: Number of objects that's going to be imported.
366 def add_object(self, object):
368 raise NotImplementedError(self.add_object)
370 def finish(self, object):
371 """Finish the imoprt and write objects to disk."""
372 raise NotImplementedError(self.finish)
375 class ObjectIterator(object):
376 """Interface for iterating over objects."""
378 def iterobjects(self):
379 raise NotImplementedError(self.iterobjects)
382 class ObjectStoreIterator(ObjectIterator):
383 """ObjectIterator that works on top of an ObjectStore."""
385 def __init__(self, store, sha_iter):
387 self.sha_iter = sha_iter
391 for sha, path in self.itershas():
392 yield self.store[sha], path
394 def iterobjects(self):
399 for sha in self._shas:
401 for sha in self.sha_iter:
402 self._shas.append(sha)
405 def __contains__(self, needle):
406 """Check if an object is present.
408 :param needle: SHA1 of the object to check for
410 return needle in self.store
412 def __getitem__(self, key):
413 """Find an object by SHA1."""
414 return self.store[key]
417 """Return the number of objects."""
418 return len(list(self.itershas()))
421 def tree_lookup_path(lookup_obj, root_sha, path):
422 """Lookup an object in a Git tree.
424 :param lookup_obj: Callback for retrieving object by SHA1
425 :param root_sha: SHA1 of the root tree
426 :param path: Path to lookup
428 parts = path.split("/")
431 obj = lookup_obj(sha)
432 if type(obj) is not Tree:
433 raise NotTreeError(sha)
437 return lookup_obj(sha)
440 class MissingObjectFinder(object):
441 """Find the objects missing from another object store.
443 :param object_store: Object store containing at least all objects to be
445 :param wants: SHA1s of commits to send
446 :param progress: Optional function to report progress to.
449 def __init__(self, object_store, haves, wants, progress=None):
450 self.sha_done = set(haves)
451 self.objects_to_send = set([(w, None, False) for w in wants])
452 self.object_store = object_store
454 self.progress = lambda x: None
456 self.progress = progress
458 def add_todo(self, entries):
459 self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
461 def parse_tree(self, tree):
462 self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries()])
464 def parse_commit(self, commit):
465 self.add_todo([(commit.tree, "", False)])
466 self.add_todo([(p, None, False) for p in commit.parents])
468 def parse_tag(self, tag):
469 self.add_todo([(tag.object[1], None, False)])
472 if not self.objects_to_send:
474 (sha, name, leaf) = self.objects_to_send.pop()
476 o = self.object_store[sha]
477 if isinstance(o, Commit):
479 elif isinstance(o, Tree):
481 elif isinstance(o, Tag):
483 self.sha_done.add(sha)
484 self.progress("counting objects: %d\r" % len(self.sha_done))
488 class ObjectStoreGraphWalker(object):
489 """Graph walker that finds out what commits are missing from an object store."""
491 def __init__(self, local_heads, get_parents):
492 """Create a new instance.
494 :param local_heads: Heads to start search with
495 :param get_parents: Function for finding the parents of a SHA1.
497 self.heads = set(local_heads)
498 self.get_parents = get_parents
502 """Ack that a particular revision and its ancestors are present in the source."""
503 if sha in self.heads:
504 self.heads.remove(sha)
505 if sha in self.parents:
506 for p in self.parents[sha]:
510 """Iterate over ancestors of heads in the target."""
512 ret = self.heads.pop()
513 ps = self.get_parents(ret)
514 self.parents[ret] = ps
515 self.heads.update(ps)