New dulwich.pack.MemoryPackIndex class.
[jelmer/dulwich-libgit2.git] / dulwich / pack.py
index 5ab4dfbd18d9161a5a38f327204160c849e734e7..93ae488c22de6f683cc78a3212d071449a2e2c4d 100644 (file)
@@ -1,6 +1,6 @@
-# pack.py -- For dealing wih packed git objects.
+# pack.py -- For dealing with packed git objects.
 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
-# Copryight (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
+# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -35,6 +35,12 @@ try:
 except ImportError:
     from misc import defaultdict
 
+from cStringIO import (
+    StringIO,
+    )
+from collections import (
+    deque,
+    )
 import difflib
 from itertools import (
     chain,
@@ -59,15 +65,16 @@ from dulwich.file import GitFile
 from dulwich.lru_cache import (
     LRUSizeCache,
     )
+from dulwich.misc import (
+    make_sha,
+    SEEK_END,
+    )
 from dulwich.objects import (
     ShaFile,
     hex_to_sha,
     sha_to_hex,
     object_header,
     )
-from dulwich.misc import (
-    make_sha,
-    )
 
 supports_mmap_offset = (sys.version_info[0] >= 3 or
         (sys.version_info[0] == 2 and sys.version_info[1] >= 6))
@@ -123,6 +130,7 @@ def read_zlib_chunks(read_some, dec_size, buffer_size=4096):
     comp_len = fed - len(obj.unused_data)
     return ret, comp_len, obj.unused_data
 
+
 def iter_sha1(iter):
     """Return the hexdigest of the SHA1 over a set of names.
 
@@ -214,6 +222,111 @@ class PackIndex(object):
 
     Given a sha id of an object a pack index can tell you the location in the
     packfile of that object if it has it.
+    """
+
+    def __eq__(self, other):
+        if not isinstance(other, PackIndex):
+            return False
+
+        for (name1, _, _), (name2, _, _) in izip(self.iterentries(),
+                                                 other.iterentries()):
+            if name1 != name2:
+                return False
+        return True
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __len__(self):
+        """Return the number of entries in this pack index."""
+        raise NotImplementedError(self.__len__)
+
+    def __iter__(self):
+        """Iterate over the SHAs in this pack."""
+        raise NotImplementedError(self.__iter__)
+
+    def iterentries(self):
+        """Iterate over the entries in this pack index.
+
+        :return: iterator over tuples with object name, offset in packfile and
+            crc32 checksum.
+        """
+        raise NotImplementedError(self.iterentries)
+
+    def get_pack_checksum(self):
+        """Return the SHA1 checksum stored for the corresponding packfile.
+
+        :return: 20-byte binary digest
+        """
+        raise NotImplementedError(self.get_pack_checksum)
+
+    def object_index(self, sha):
+        """Return the index in to the corresponding packfile for the object.
+
+        Given the name of an object it will return the offset that object
+        lives at within the corresponding pack file. If the pack file doesn't
+        have the object then None will be returned.
+        """
+        if len(sha) == 40:
+            sha = hex_to_sha(sha)
+        return self._object_index(sha)
+
+    def _object_index(self, sha):
+        """See object_index.
+
+        :param sha: A *binary* SHA string. (20 characters long)_
+        """
+        raise NotImplementedError(self._object_index)
+
+    def __iter__(self):
+        """Iterate over the SHAs in this pack."""
+        return imap(sha_to_hex, self._itersha())
+
+    def objects_sha1(self):
+        """Return the hex SHA1 over all the shas of all objects in this pack.
+
+        :note: This is used for the filename of the pack.
+        """
+        return iter_sha1(self._itersha())
+
+    def _itersha(self):
+        """Yield all the SHA1's of the objects in the index, sorted."""
+        raise NotImplementedError(self._itersha)
+
+
+class MemoryPackIndex(PackIndex):
+    """Pack index that is stored entirely in memory."""
+
+    def __init__(self, entries, pack_checksum=None):
+        """Create a new MemoryPackIndex.
+
+        :param entries: Sequence of name, idx, crc32 (sorted)
+        :param pack_checksum: Optional pack checksum
+        """
+        self._by_sha = {}
+        for name, idx, crc32 in entries:
+            self._by_sha[name] = idx
+        self._entries = entries
+        self._pack_checksum = pack_checksum
+
+    def get_pack_checksum(self):
+        return self._pack_checksum
+
+    def __len__(self):
+        return len(self._entries)
+
+    def _object_index(self, sha):
+        return self._by_sha[sha][0]
+
+    def _itersha(self):
+        return iter(self._by_sha)
+
+    def iterentries(self):
+        return iter(self._entries)
+
+
+class FilePackIndex(PackIndex):
+    """Pack index that is based on a file.
 
     To do the loop it opens the file, and indexes first 256 4 byte groups
     with the first byte of the sha id. The value in the four byte group indexed
@@ -242,20 +355,12 @@ class PackIndex(object):
             self._contents, self._size = (contents, size)
 
     def __eq__(self, other):
-        if not isinstance(other, PackIndex):
-            return False
-
-        if self._fan_out_table != other._fan_out_table:
+        # Quick optimization:
+        if (isinstance(other, FilePackIndex) and
+            self._fan_out_table != other._fan_out_table):
             return False
 
-        for (name1, _, _), (name2, _, _) in izip(self.iterentries(),
-                                                 other.iterentries()):
-            if name1 != name2:
-                return False
-        return True
-
-    def __ne__(self, other):
-        return not self.__eq__(other)
+        return super(FilePackIndex, self).__eq__(other)
 
     def close(self):
         self._file.close()
@@ -284,25 +389,15 @@ class PackIndex(object):
         """Unpack the crc32 checksum for the i-th object from the index file."""
         raise NotImplementedError(self._unpack_crc32_checksum)
 
-    def __iter__(self):
-        """Iterate over the SHAs in this pack."""
-        return imap(sha_to_hex, self._itersha())
-
     def _itersha(self):
         for i in range(len(self)):
             yield self._unpack_name(i)
 
-    def objects_sha1(self):
-        """Return the hex SHA1 over all the shas of all objects in this pack.
-
-        :note: This is used for the filename of the pack.
-        """
-        return iter_sha1(self._itersha())
-
     def iterentries(self):
         """Iterate over the entries in this pack index.
 
-        :yields: tuples with object name, offset in packfile and crc32 checksum.
+        :return: iterator over tuples with object name, offset in packfile and
+            crc32 checksum.
         """
         for i in range(len(self)):
             yield self._unpack_entry(i)
@@ -316,7 +411,6 @@ class PackIndex(object):
 
     def check(self):
         """Check that the stored checksum matches the actual checksum."""
-        # TODO: Check pack contents, too
         actual = self.calculate_checksum()
         stored = self.get_stored_checksum()
         if actual != stored:
@@ -343,17 +437,6 @@ class PackIndex(object):
         """
         return str(self._contents[-20:])
 
-    def object_index(self, sha):
-        """Return the index in to the corresponding packfile for the object.
-
-        Given the name of an object it will return the offset that object
-        lives at within the corresponding pack file. If the pack file doesn't
-        have the object then None will be returned.
-        """
-        if len(sha) == 40:
-            sha = hex_to_sha(sha)
-        return self._object_index(sha)
-
     def _object_index(self, sha):
         """See object_index.
 
@@ -372,11 +455,11 @@ class PackIndex(object):
         return self._unpack_offset(i)
 
 
-class PackIndex1(PackIndex):
-    """Version 1 Pack Index."""
+class PackIndex1(FilePackIndex):
+    """Version 1 Pack Index file."""
 
     def __init__(self, filename, file=None, contents=None, size=None):
-        PackIndex.__init__(self, filename, file, contents, size)
+        super(PackIndex1, self).__init__(filename, file, contents, size)
         self.version = 1
         self._fan_out_table = self._read_fan_out_table(0)
 
@@ -398,11 +481,11 @@ class PackIndex1(PackIndex):
         return None
 
 
-class PackIndex2(PackIndex):
-    """Version 2 Pack Index."""
+class PackIndex2(FilePackIndex):
+    """Version 2 Pack Index file."""
 
     def __init__(self, filename, file=None, contents=None, size=None):
-        PackIndex.__init__(self, filename, file, contents, size)
+        super(PackIndex2, self).__init__(filename, file, contents, size)
         assert self._contents[:4] == '\377tOc', "Not a v2 pack index file"
         (self.version, ) = unpack_from(">L", self._contents, 4)
         assert self.version == 2, "Version was %d" % self.version
@@ -433,6 +516,7 @@ def read_pack_header(read):
     """Read the header of a pack file.
 
     :param read: Read function
+    :return: Tuple with pack version and number of objects
     """
     header = read(12)
     assert header[:4] == "PACK"
@@ -494,6 +578,145 @@ def _compute_object_size((num, obj)):
     return chunks_length(obj)
 
 
+class PackStreamReader(object):
+    """Class to read a pack stream.
+
+    The pack is read from a ReceivableProtocol using read() or recv() as
+    appropriate.
+    """
+
+    def __init__(self, read_all, read_some=None):
+        self.read_all = read_all
+        if read_some is None:
+            self.read_some = read_all
+        else:
+            self.read_some = read_some
+        self.sha = make_sha()
+        self._offset = 0
+        self._rbuf = StringIO()
+        # trailer is a deque to avoid memory allocation on small reads
+        self._trailer = deque()
+
+    def _read(self, read, size):
+        """Read up to size bytes using the given callback.
+
+        As a side effect, update the verifier's hash (excluding the last 20
+        bytes read) and write through to the output file.
+
+        :param read: The read callback to read from.
+        :param size: The maximum number of bytes to read; the particular
+            behavior is callback-specific.
+        """
+        data = read(size)
+
+        # maintain a trailer of the last 20 bytes we've read
+        n = len(data)
+        self._offset += n
+        tn = len(self._trailer)
+        if n >= 20:
+            to_pop = tn
+            to_add = 20
+        else:
+            to_pop = max(n + tn - 20, 0)
+            to_add = n
+        for _ in xrange(to_pop):
+            self.sha.update(self._trailer.popleft())
+        self._trailer.extend(data[-to_add:])
+
+        # hash everything but the trailer
+        self.sha.update(data[:-to_add])
+        return data
+
+    def _buf_len(self):
+        buf = self._rbuf
+        start = buf.tell()
+        buf.seek(0, SEEK_END)
+        end = buf.tell()
+        buf.seek(start)
+        return end - start
+
+    @property
+    def offset(self):
+        return self._offset - self._buf_len()
+
+    def read(self, size):
+        """Read, blocking until size bytes are read."""
+        buf_len = self._buf_len()
+        if buf_len >= size:
+            return self._rbuf.read(size)
+        buf_data = self._rbuf.read()
+        self._rbuf = StringIO()
+        return buf_data + self._read(self.read_all, size - buf_len)
+
+    def recv(self, size):
+        """Read up to size bytes, blocking until one byte is read."""
+        buf_len = self._buf_len()
+        if buf_len:
+            data = self._rbuf.read(size)
+            if size >= buf_len:
+                self._rbuf = StringIO()
+            return data
+        return self._read(self.read_some, size)
+
+    def __len__(self):
+        return self._num_objects
+
+    def read_objects(self):
+        """Read the objects in this pack file.
+
+        :raise AssertionError: if there is an error in the pack format.
+        :raise ChecksumMismatch: if the checksum of the pack contents does not
+            match the checksum in the pack trailer.
+        :raise zlib.error: if an error occurred during zlib decompression.
+        :raise IOError: if an error occurred writing to the output file.
+        """
+        pack_version, self._num_objects = read_pack_header(self.read)
+        for i in xrange(self._num_objects):
+            type, uncomp, comp_len, unused = unpack_object(self.read, self.recv)
+            yield type, uncomp, comp_len
+
+            # prepend any unused data to current read buffer
+            buf = StringIO()
+            buf.write(unused)
+            buf.write(self._rbuf.read())
+            buf.seek(0)
+            self._rbuf = buf
+
+        pack_sha = sha_to_hex(''.join([c for c in self._trailer]))
+        calculated_sha = self.sha.hexdigest()
+        if pack_sha != calculated_sha:
+            raise ChecksumMismatch(pack_sha, calculated_sha)
+
+
+class PackObjectIterator(object):
+
+    def __init__(self, pack, progress=None):
+        self.i = 0
+        self.offset = pack._header_size
+        self.num = len(pack)
+        self.map = pack._file
+        self._progress = progress
+
+    def __iter__(self):
+        return self
+
+    def __len__(self):
+        return self.num
+
+    def next(self):
+        if self.i == self.num:
+            raise StopIteration
+        self.map.seek(self.offset)
+        (type, obj, total_size, unused) = unpack_object(self.map.read)
+        self.map.seek(self.offset)
+        crc32 = zlib.crc32(self.map.read(total_size)) & 0xffffffff
+        ret = (self.offset, type, obj, crc32)
+        self.offset += total_size
+        if self._progress is not None:
+            self._progress(self.i, self.num)
+        self.i+=1
+        return ret
+
 def obj_sha(type, chunks):
     """Compute the SHA for a numeric type and object chunks."""
     sha = make_sha()
@@ -562,9 +785,6 @@ class PackData(object):
     def close(self):
         self._file.close()
 
-    def __del__(self):
-        self.close()
-
     def _get_size(self):
         if self._size is not None:
             return self._size
@@ -638,42 +858,14 @@ class PackData(object):
         return type, chunks
 
     def iterobjects(self, progress=None):
-
-        class ObjectIterator(object):
-
-            def __init__(self, pack):
-                self.i = 0
-                self.offset = pack._header_size
-                self.num = len(pack)
-                self.map = pack._file
-
-            def __iter__(self):
-                return self
-
-            def __len__(self):
-                return self.num
-
-            def next(self):
-                if self.i == self.num:
-                    raise StopIteration
-                self.map.seek(self.offset)
-                (type, obj, total_size, unused) = unpack_object(self.map.read)
-                self.map.seek(self.offset)
-                crc32 = zlib.crc32(self.map.read(total_size)) & 0xffffffff
-                ret = (self.offset, type, obj, crc32)
-                self.offset += total_size
-                if progress:
-                    progress(self.i, self.num)
-                self.i+=1
-                return ret
-        return ObjectIterator(self)
+        return PackObjectIterator(self, progress)
 
     def iterentries(self, progress=None):
         """Yield entries summarizing the contents of this pack.
 
-        :param progress: Progress function, called with current and total object
-            count.
-        :yields: tuples with (sha, offset, crc32)
+        :param progress: Progress function, called with current and total
+            object count.
+        :return: iterator of tuples with (sha, offset, crc32)
         """
         for offset, type, obj, crc32 in self.iterobjects(progress=progress):
             assert isinstance(offset, int)
@@ -685,8 +877,8 @@ class PackData(object):
     def sorted_entries(self, progress=None):
         """Return entries in this pack, sorted by SHA.
 
-        :param progress: Progress function, called with current and total object
-            count
+        :param progress: Progress function, called with current and total
+            object count
         :return: List of tuples with (sha, offset, crc32)
         """
         ret = list(self.iterentries(progress=progress))
@@ -698,18 +890,28 @@ class PackData(object):
 
         :param filename: Index filename.
         :param progress: Progress report function
+        :return: Checksum of index file
         """
         entries = self.sorted_entries(progress=progress)
-        write_pack_index_v1(filename, entries, self.calculate_checksum())
+        f = GitFile(filename, 'wb')
+        try:
+            return write_pack_index_v1(f, entries, self.calculate_checksum())
+        finally:
+            f.close()
 
     def create_index_v2(self, filename, progress=None):
         """Create a version 2 index file for this data file.
 
         :param filename: Index filename.
         :param progress: Progress report function
+        :return: Checksum of index file
         """
         entries = self.sorted_entries(progress=progress)
-        write_pack_index_v2(filename, entries, self.calculate_checksum())
+        f = GitFile(filename, 'wb')
+        try:
+            return write_pack_index_v2(f, entries, self.calculate_checksum())
+        finally:
+            f.close()
 
     def create_index(self, filename, progress=None,
                      version=2):
@@ -717,11 +919,12 @@ class PackData(object):
 
         :param filename: Index filename.
         :param progress: Progress report function
+        :return: Checksum of index file
         """
         if version == 1:
-            self.create_index_v1(filename, progress)
+            return self.create_index_v1(filename, progress)
         elif version == 2:
-            self.create_index_v2(filename, progress)
+            return self.create_index_v2(filename, progress)
         else:
             raise ValueError("unknown index format %d" % version)
 
@@ -756,9 +959,13 @@ class PackData(object):
 class ThinPackData(PackData):
     """PackData for thin packs, which require an ObjectStore for resolving."""
 
-    def __init__(self, store, *args, **kwargs):
+    def __init__(self, resolve_ext_ref, *args, **kwargs):
         super(ThinPackData, self).__init__(*args, **kwargs)
-        self.store = store
+        self.resolve_ext_ref = resolve_ext_ref
+
+    @classmethod
+    def from_file(cls, resolve_ext_ref, file, size):
+        return cls(resolve_ext_ref, str(file), file=file, size=size)
 
     def get_ref(self, sha):
         """Resolve a reference looking in both this pack and the store."""
@@ -770,7 +977,7 @@ class ThinPackData(PackData):
             # rewritten.
             return super(ThinPackData, self).get_ref(sha)
         except KeyError:
-            type, obj = self.store.get_raw(sha)
+            type, obj = self.resolve_ext_ref(sha)
             return None, type, obj
 
     def iterentries(self, progress=None):
@@ -918,6 +1125,7 @@ def write_pack(filename, objects, num_objects):
     :param filename: Path to the new pack file (without .pack extension)
     :param objects: Iterable over (object, path) tuples to write
     :param num_objects: Number of objects to write
+    :return: Tuple with checksum of pack file and index file
     """
     f = GitFile(filename + ".pack", 'wb')
     try:
@@ -925,14 +1133,28 @@ def write_pack(filename, objects, num_objects):
     finally:
         f.close()
     entries.sort()
-    write_pack_index_v2(filename + ".idx", entries, data_sum)
+    f = GitFile(filename + ".idx", 'wb')
+    try:
+        return data_sum, write_pack_index_v2(f, entries, data_sum)
+    finally:
+        f.close()
+
+
+def write_pack_header(f, num_objects):
+    """Write a pack header for the given number of objects."""
+    f.write('PACK')                          # Pack header
+    f.write(struct.pack('>L', 2))            # Pack version
+    f.write(struct.pack('>L', num_objects))  # Number of objects in pack
 
 
 def write_pack_data(f, objects, num_objects, window=10):
-    """Write a new pack file.
+    """Write a new pack data file.
 
-    :param filename: The filename of the new pack file.
-    :param objects: List of objects to write (tuples with object and path)
+    :param f: File to write to
+    :param objects: Iterable over (object, path) tuples to write
+    :param num_objects: Number of objects to write
+    :param window: Sliding window size for searching for deltas; currently
+                   unimplemented
     :return: List with (name, offset, crc32 checksum) entries, pack checksum
     """
     recency = list(objects)
@@ -952,9 +1174,7 @@ def write_pack_data(f, objects, num_objects, window=10):
     # Write the pack
     entries = []
     f = SHA1Writer(f)
-    f.write("PACK")               # Pack header
-    f.write(struct.pack(">L", 2)) # Pack version
-    f.write(struct.pack(">L", num_objects)) # Number of objects in pack
+    write_pack_header(f, num_objects)
     for o, path in recency:
         sha1 = o.sha().digest()
         orig_t = o.type_num
@@ -975,30 +1195,28 @@ def write_pack_data(f, objects, num_objects, window=10):
     return entries, f.write_sha()
 
 
-def write_pack_index_v1(filename, entries, pack_checksum):
+def write_pack_index_v1(f, entries, pack_checksum):
     """Write a new pack index file.
 
-    :param filename: The filename of the new pack index file.
+    :param f: A file-like object to write to
     :param entries: List of tuples with object name (sha), offset_in_pack,
         and crc32_checksum.
     :param pack_checksum: Checksum of the pack file.
+    :return: The SHA of the written index file
     """
-    f = GitFile(filename, 'wb')
-    try:
-        f = SHA1Writer(f)
-        fan_out_table = defaultdict(lambda: 0)
-        for (name, offset, entry_checksum) in entries:
-            fan_out_table[ord(name[0])] += 1
-        # Fan-out table
-        for i in range(0x100):
-            f.write(struct.pack(">L", fan_out_table[i]))
-            fan_out_table[i+1] += fan_out_table[i]
-        for (name, offset, entry_checksum) in entries:
-            f.write(struct.pack(">L20s", offset, name))
-        assert len(pack_checksum) == 20
-        f.write(pack_checksum)
-    finally:
-        f.close()
+    f = SHA1Writer(f)
+    fan_out_table = defaultdict(lambda: 0)
+    for (name, offset, entry_checksum) in entries:
+        fan_out_table[ord(name[0])] += 1
+    # Fan-out table
+    for i in range(0x100):
+        f.write(struct.pack(">L", fan_out_table[i]))
+        fan_out_table[i+1] += fan_out_table[i]
+    for (name, offset, entry_checksum) in entries:
+        f.write(struct.pack(">L20s", offset, name))
+    assert len(pack_checksum) == 20
+    f.write(pack_checksum)
+    return f.write_sha()
 
 
 def create_delta(base_buf, target_buf):
@@ -1126,38 +1344,36 @@ def apply_delta(src_buf, delta):
     return out
 
 
-def write_pack_index_v2(filename, entries, pack_checksum):
+def write_pack_index_v2(f, entries, pack_checksum):
     """Write a new pack index file.
 
-    :param filename: The filename of the new pack index file.
+    :param f: File-like object to write to
     :param entries: List of tuples with object name (sha), offset_in_pack, and
         crc32_checksum.
     :param pack_checksum: Checksum of the pack file.
+    :return: The SHA of the index file written
     """
-    f = GitFile(filename, 'wb')
-    try:
-        f = SHA1Writer(f)
-        f.write('\377tOc') # Magic!
-        f.write(struct.pack(">L", 2))
-        fan_out_table = defaultdict(lambda: 0)
-        for (name, offset, entry_checksum) in entries:
-            fan_out_table[ord(name[0])] += 1
-        # Fan-out table
-        for i in range(0x100):
-            f.write(struct.pack(">L", fan_out_table[i]))
-            fan_out_table[i+1] += fan_out_table[i]
-        for (name, offset, entry_checksum) in entries:
-            f.write(name)
-        for (name, offset, entry_checksum) in entries:
-            f.write(struct.pack(">L", entry_checksum))
-        for (name, offset, entry_checksum) in entries:
-            # FIXME: handle if MSBit is set in offset
-            f.write(struct.pack(">L", offset))
-        # FIXME: handle table for pack files > 8 Gb
-        assert len(pack_checksum) == 20
-        f.write(pack_checksum)
-    finally:
-        f.close()
+    f = SHA1Writer(f)
+    f.write('\377tOc') # Magic!
+    f.write(struct.pack(">L", 2))
+    fan_out_table = defaultdict(lambda: 0)
+    for (name, offset, entry_checksum) in entries:
+        fan_out_table[ord(name[0])] += 1
+    # Fan-out table
+    for i in range(0x100):
+        f.write(struct.pack(">L", fan_out_table[i]))
+        fan_out_table[i+1] += fan_out_table[i]
+    for (name, offset, entry_checksum) in entries:
+        f.write(name)
+    for (name, offset, entry_checksum) in entries:
+        f.write(struct.pack(">L", entry_checksum))
+    for (name, offset, entry_checksum) in entries:
+        # FIXME: handle if MSBit is set in offset
+        f.write(struct.pack(">L", offset))
+    # FIXME: handle table for pack files > 8 Gb
+    assert len(pack_checksum) == 20
+    f.write(pack_checksum)
+    return f.write_sha()
 
 
 class Pack(object):
@@ -1165,18 +1381,28 @@ class Pack(object):
 
     def __init__(self, basename):
         self._basename = basename
-        self._data_path = self._basename + ".pack"
-        self._idx_path = self._basename + ".idx"
         self._data = None
         self._idx = None
+        self._idx_path = self._basename + ".idx"
+        self._data_path = self._basename + ".pack"
+        self._data_load = lambda: PackData(self._data_path)
+        self._idx_load = lambda: load_pack_index(self._idx_path)
+
+    @classmethod
+    def from_lazy_objects(self, data_fn, idx_fn):
+        """Create a new pack object from callables to load pack data and 
+        index objects."""
+        ret = Pack("")
+        ret._data_load = data_fn
+        ret._idx_load = idx_fn
+        return ret
 
     @classmethod
     def from_objects(self, data, idx):
         """Create a new pack object from pack data and index objects."""
         ret = Pack("")
-        ret._data = data
-        ret._idx = idx
-        data.pack = ret
+        ret._data_load = lambda: data
+        ret._idx_load = lambda: idx
         return ret
 
     def name(self):
@@ -1187,7 +1413,7 @@ class Pack(object):
     def data(self):
         """The pack data object being used."""
         if self._data is None:
-            self._data = PackData(self._data_path)
+            self._data = self._data_load()
             self._data.pack = self
             assert len(self.index) == len(self._data)
             idx_stored_checksum = self.index.get_pack_checksum()
@@ -1204,7 +1430,7 @@ class Pack(object):
         :note: This may be an in-memory index
         """
         if self._idx is None:
-            self._idx = load_pack_index(self._idx_path)
+            self._idx = self._idx_load()
         return self._idx
 
     def close(self):
@@ -1233,6 +1459,9 @@ class Pack(object):
         """
         self.index.check()
         self.data.check()
+        for obj in self.iterobjects():
+            obj.check()
+        # TODO: object connectivity checks
 
     def get_stored_checksum(self):
         return self.data.get_stored_checksum()
@@ -1263,7 +1492,7 @@ class Pack(object):
         for offset, type, obj, crc32 in self.data.iterobjects():
             assert isinstance(offset, int)
             yield ShaFile.from_raw_chunks(
-                    *self.data.resolve_object(offset, type, obj))
+              *self.data.resolve_object(offset, type, obj))
 
 
 try: