1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # or (at your option) a later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git object store interfaces and implementation."""
29 from dulwich.errors import (
32 from dulwich.objects import (
40 from dulwich.pack import (
53 class ObjectStore(object):
56 def __init__(self, path):
57 """Open an object store.
59 :param path: Path of the object store.
62 self._pack_cache = None
63 self.pack_dir = os.path.join(self.path, PACKDIR)
65 def determine_wants_all(self, refs):
66 return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
68 def iter_shas(self, shas):
69 """Iterate over the objects for the specified shas.
71 :param shas: Iterable object with SHAs
73 return ObjectStoreIterator(self, shas)
75 def __contains__(self, sha):
76 """Check if a particular object is present by SHA1."""
77 for pack in self.packs:
80 ret = self._get_shafile(sha)
86 """Iterate over the SHAs that are present in this store."""
87 iterables = self.packs + [self._iter_shafile_shas()]
88 return itertools.chain(*iterables)
92 """List with pack objects."""
93 if self._pack_cache is None:
94 self._pack_cache = list(load_packs(self.pack_dir))
95 return self._pack_cache
97 def _add_known_pack(self, path):
98 """Add a newly appeared pack to the cache by path.
101 if self._pack_cache is not None:
102 self._pack_cache.append(Pack(path))
104 def _get_shafile_path(self, sha):
107 # Check from object dir
108 return os.path.join(self.path, dir, file)
110 def _iter_shafile_shas(self):
111 for base in os.listdir(self.path):
114 for rest in os.listdir(os.path.join(self.path, base)):
117 def _get_shafile(self, sha):
118 path = self._get_shafile_path(sha)
119 if os.path.exists(path):
120 return ShaFile.from_file(path)
123 def _add_shafile(self, sha, o):
124 dir = os.path.join(self.path, sha[:2])
125 if not os.path.isdir(dir):
127 path = os.path.join(dir, sha[2:])
130 f.write(o.as_legacy_object())
134 def get_raw(self, name):
135 """Obtain the raw text for an object.
137 :param name: sha for the object.
138 :return: tuple with object type and object contents.
141 sha = hex_to_sha(name)
143 elif len(name) == 20:
148 for pack in self.packs:
150 return pack.get_raw(sha)
154 hexsha = sha_to_hex(name)
155 ret = self._get_shafile(hexsha)
157 return ret.type, ret.as_raw_string()
158 raise KeyError(hexsha)
160 def __getitem__(self, sha):
161 type, uncomp = self.get_raw(sha)
162 return ShaFile.from_raw_string(type, uncomp)
164 def move_in_thin_pack(self, path):
165 """Move a specific file containing a pack into the pack directory.
167 :note: The file should be on the same file system as the
170 :param path: Path to the pack file.
172 data = PackData(path)
174 # Write index for the thin pack (do we really need this?)
175 temppath = os.path.join(self.pack_dir,
176 sha_to_hex(urllib2.randombytes(20))+".tempidx")
177 data.create_index_v2(temppath, self.get_raw)
178 p = Pack.from_objects(data, load_pack_index(temppath))
180 # Write a full pack version
181 temppath = os.path.join(self.pack_dir,
182 sha_to_hex(urllib2.randombytes(20))+".temppack")
183 write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
185 pack_sha = load_pack_index(temppath+".idx").objects_sha1()
186 newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
187 os.rename(temppath+".pack", newbasename+".pack")
188 os.rename(temppath+".idx", newbasename+".idx")
189 self._add_known_pack(newbasename)
191 def move_in_pack(self, path):
192 """Move a specific file containing a pack into the pack directory.
194 :note: The file should be on the same file system as the
197 :param path: Path to the pack file.
200 entries = p.sorted_entries()
201 basename = os.path.join(self.pack_dir,
202 "pack-%s" % iter_sha1(entry[0] for entry in entries))
203 write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
204 os.rename(path, basename + ".pack")
205 self._add_known_pack(basename)
207 def add_thin_pack(self):
208 """Add a new thin pack to this object store.
210 Thin packs are packs that contain deltas with parents that exist
213 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
214 f = os.fdopen(fd, 'w')
218 if os.path.getsize(path) > 0:
219 self.move_in_thin_pack(path)
223 """Add a new pack to this object store.
225 :return: Fileobject to write to and a commit function to
226 call when the pack is finished.
228 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
229 f = os.fdopen(fd, 'w')
233 if os.path.getsize(path) > 0:
234 self.move_in_pack(path)
237 def add_object(self, obj):
238 """Add a single object to this object store.
241 self._add_shafile(obj.id, obj)
243 def add_objects(self, objects):
244 """Add a set of objects to this object store.
246 :param objects: Iterable over a list of objects.
248 if len(objects) == 0:
250 f, commit = self.add_pack()
251 write_pack_data(f, objects, len(objects))
254 def find_missing_objects(self, wants, graph_walker, progress=None):
255 """Find the missing objects required for a set of revisions.
257 :param wants: Iterable over SHAs of objects to fetch.
258 :param graph_walker: Object that can iterate over the list of revisions
259 to fetch and has an "ack" method that will be called to acknowledge
260 that a revision is present.
261 :param progress: Simple progress function that will be called with
262 updated progress strings.
263 :return: Iterator over (sha, path) pairs.
265 return iter(MissingObjectFinder(self, wants, graph_walker, progress).next, None)
268 class ObjectImporter(object):
269 """Interface for importing objects."""
271 def __init__(self, count):
272 """Create a new ObjectImporter.
274 :param count: Number of objects that's going to be imported.
278 def add_object(self, object):
280 raise NotImplementedError(self.add_object)
282 def finish(self, object):
283 """Finish the imoprt and write objects to disk."""
284 raise NotImplementedError(self.finish)
287 class ObjectIterator(object):
288 """Interface for iterating over objects."""
290 def iterobjects(self):
291 raise NotImplementedError(self.iterobjects)
294 class ObjectStoreIterator(ObjectIterator):
295 """ObjectIterator that works on top of an ObjectStore."""
297 def __init__(self, store, sha_iter):
299 self.sha_iter = sha_iter
303 for sha, path in self.itershas():
304 yield self.store[sha], path
306 def iterobjects(self):
311 for sha in self._shas:
313 for sha in self.sha_iter:
314 self._shas.append(sha)
317 def __contains__(self, needle):
318 """Check if an object is present.
320 :param needle: SHA1 of the object to check for
322 return needle in self.store
324 def __getitem__(self, key):
325 """Find an object by SHA1."""
326 return self.store[key]
329 """Return the number of objects."""
330 return len(list(self.itershas()))
333 def tree_lookup_path(lookup_obj, root_sha, path):
334 """Lookup an object in a Git tree.
336 :param lookup_obj: Callback for retrieving object by SHA1
337 :param root_sha: SHA1 of the root tree
338 :param path: Path to lookup
340 parts = path.split("/")
343 obj = lookup_obj(sha)
344 if type(obj) is not Tree:
345 raise NotTreeError(sha)
349 return lookup_obj(sha)
352 class MissingObjectFinder(object):
353 """Find the objects missing from another object store.
355 :param object_store: Object store containing at least all objects to be
357 :param wants: SHA1s of commits to send
358 :param graph_walker: graph walker object used to see what the remote
360 :param progress: Optional function to report progress to.
363 def __init__(self, object_store, wants, graph_walker, progress=None):
364 self.sha_done = set()
365 self.objects_to_send = set([(w, None, False) for w in wants])
366 self.object_store = object_store
368 self.progress = lambda x: None
370 self.progress = progress
371 ref = graph_walker.next()
373 if ref in self.object_store:
374 graph_walker.ack(ref)
375 ref = graph_walker.next()
377 def add_todo(self, entries):
378 self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
380 def parse_tree(self, tree):
381 self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries()])
383 def parse_commit(self, commit):
384 self.add_todo([(commit.tree, "", False)])
385 self.add_todo([(p, None, False) for p in commit.parents])
387 def parse_tag(self, tag):
388 self.add_todo([(tag.object[1], None, False)])
391 if not self.objects_to_send:
393 (sha, name, leaf) = self.objects_to_send.pop()
395 o = self.object_store[sha]
396 if isinstance(o, Commit):
398 elif isinstance(o, Tree):
400 elif isinstance(o, Tag):
402 self.sha_done.add(sha)
403 self.progress("counting objects: %d\r" % len(self.sha_done))