New dulwich.pack.MemoryPackIndex class.
authorJelmer Vernooij <jelmer@samba.org>
Sun, 8 Aug 2010 17:16:18 +0000 (19:16 +0200)
committerJelmer Vernooij <jelmer@samba.org>
Sun, 8 Aug 2010 17:16:18 +0000 (19:16 +0200)
NEWS
dulwich/pack.py
dulwich/tests/test_pack.py

diff --git a/NEWS b/NEWS
index 6af9851c8010aa4755ba6d0cbccadd24241890a7..9035100f5dfeeb096d7996ef77ec5d52afa91f7c 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -22,6 +22,8 @@
 
   * Initial work on a fastimport parser. (Jelmer Vernooij)
 
+  * New dulwich.pack.MemoryPackIndex class. (Jelmer Vernooij)
+
  TESTS
 
   * Use GitFile when modifying packed-refs in tests. (Dave Borowitz)
index 1146155b4cc3c398e72343698d027525a133f5f7..93ae488c22de6f683cc78a3212d071449a2e2c4d 100644 (file)
@@ -278,6 +278,52 @@ class PackIndex(object):
         """
         raise NotImplementedError(self._object_index)
 
+    def __iter__(self):
+        """Iterate over the SHAs in this pack."""
+        return imap(sha_to_hex, self._itersha())
+
+    def objects_sha1(self):
+        """Return the hex SHA1 over all the shas of all objects in this pack.
+
+        :note: This is used for the filename of the pack.
+        """
+        return iter_sha1(self._itersha())
+
+    def _itersha(self):
+        """Yield all the SHA1's of the objects in the index, sorted."""
+        raise NotImplementedError(self._itersha)
+
+
+class MemoryPackIndex(PackIndex):
+    """Pack index that is stored entirely in memory."""
+
+    def __init__(self, entries, pack_checksum=None):
+        """Create a new MemoryPackIndex.
+
+        :param entries: Sequence of name, idx, crc32 (sorted)
+        :param pack_checksum: Optional pack checksum
+        """
+        self._by_sha = {}
+        for name, idx, crc32 in entries:
+            self._by_sha[name] = idx
+        self._entries = entries
+        self._pack_checksum = pack_checksum
+
+    def get_pack_checksum(self):
+        return self._pack_checksum
+
+    def __len__(self):
+        return len(self._entries)
+
+    def _object_index(self, sha):
+        return self._by_sha[sha][0]
+
+    def _itersha(self):
+        return iter(self._by_sha)
+
+    def iterentries(self):
+        return iter(self._entries)
+
 
 class FilePackIndex(PackIndex):
     """Pack index that is based on a file.
@@ -310,7 +356,8 @@ class FilePackIndex(PackIndex):
 
     def __eq__(self, other):
         # Quick optimization:
-        if isinstance(other, FilePackIndex) and self._fan_out_table != other._fan_out_table:
+        if (isinstance(other, FilePackIndex) and
+            self._fan_out_table != other._fan_out_table):
             return False
 
         return super(FilePackIndex, self).__eq__(other)
@@ -342,21 +389,10 @@ class FilePackIndex(PackIndex):
         """Unpack the crc32 checksum for the i-th object from the index file."""
         raise NotImplementedError(self._unpack_crc32_checksum)
 
-    def __iter__(self):
-        """Iterate over the SHAs in this pack."""
-        return imap(sha_to_hex, self._itersha())
-
     def _itersha(self):
         for i in range(len(self)):
             yield self._unpack_name(i)
 
-    def objects_sha1(self):
-        """Return the hex SHA1 over all the shas of all objects in this pack.
-
-        :note: This is used for the filename of the pack.
-        """
-        return iter_sha1(self._itersha())
-
     def iterentries(self):
         """Iterate over the entries in this pack index.
 
index 129836248088d50b0d5da881ec4b6f6c41c9983c..c5249e2276ef8a12dba07b0dbb73cefad5eca34b 100644 (file)
@@ -38,6 +38,7 @@ from dulwich.objects import (
     Tree,
     )
 from dulwich.pack import (
+    MemoryPackIndex,
     Pack,
     PackData,
     ThinPackData,
@@ -312,42 +313,24 @@ pack_checksum = hex_to_sha('721980e866af9a5f93ad674144e1459b8ba3e7b7')
 
 class BaseTestPackIndexWriting(object):
 
-    def setUp(self):
-        self.tempdir = tempfile.mkdtemp()
-
-    def tearDown(self):
-        shutil.rmtree(self.tempdir)
-
     def assertSucceeds(self, func, *args, **kwargs):
         try:
             func(*args, **kwargs)
         except ChecksumMismatch, e:
             self.fail(e)
 
-    def writeIndex(self, filename, entries, pack_checksum):
-        # FIXME: Write to StringIO instead rather than hitting disk ?
-        f = GitFile(filename, "wb")
-        try:
-            self._write_fn(f, entries, pack_checksum)
-        finally:
-            f.close()
+    def index(self, filename, entries, pack_checksum):
+        raise NotImplementedError(self.index)
 
     def test_empty(self):
-        filename = os.path.join(self.tempdir, 'empty.idx')
-        self.writeIndex(filename, [], pack_checksum)
-        idx = load_pack_index(filename)
-        self.assertSucceeds(idx.check)
+        idx = self.index('empty.idx', [], pack_checksum)
         self.assertEquals(idx.get_pack_checksum(), pack_checksum)
         self.assertEquals(0, len(idx))
 
     def test_single(self):
         entry_sha = hex_to_sha('6f670c0fb53f9463760b7295fbb814e965fb20c8')
         my_entries = [(entry_sha, 178, 42)]
-        filename = os.path.join(self.tempdir, 'single.idx')
-        self.writeIndex(filename, my_entries, pack_checksum)
-        idx = load_pack_index(filename)
-        self.assertEquals(idx.version, self._expected_version)
-        self.assertSucceeds(idx.check)
+        idx = self.index('single.idx', my_entries, pack_checksum)
         self.assertEquals(idx.get_pack_checksum(), pack_checksum)
         self.assertEquals(1, len(idx))
         actual_entries = list(idx.iterentries())
@@ -363,32 +346,70 @@ class BaseTestPackIndexWriting(object):
                 self.assertTrue(actual_crc is None)
 
 
-class TestPackIndexWritingv1(TestCase, BaseTestPackIndexWriting):
+class BaseTestFilePackIndexWriting(BaseTestPackIndexWriting):
+
+    def setUp(self):
+        self.tempdir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.tempdir)
+
+    def index(self, filename, entries, pack_checksum):
+        path = os.path.join(self.tempdir, filename)
+        self.writeIndex(path, entries, pack_checksum)
+        idx = load_pack_index(path)
+        self.assertSucceeds(idx.check)
+        self.assertEquals(idx.version, self._expected_version)
+        return idx
+
+    def writeIndex(self, filename, entries, pack_checksum):
+        # FIXME: Write to StringIO instead rather than hitting disk ?
+        f = GitFile(filename, "wb")
+        try:
+            self._write_fn(f, entries, pack_checksum)
+        finally:
+            f.close()
+
+
+class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting):
+
+    def setUp(self):
+        TestCase.setUp(self)
+        self._has_crc32_checksum = True
+
+    def index(self, filename, entries, pack_checksum):
+        return MemoryPackIndex(entries, pack_checksum)
+
+    def tearDown(self):
+        TestCase.tearDown(self)
+
+
+class TestPackIndexWritingv1(TestCase, BaseTestFilePackIndexWriting):
 
     def setUp(self):
         TestCase.setUp(self)
-        BaseTestPackIndexWriting.setUp(self)
+        BaseTestFilePackIndexWriting.setUp(self)
         self._has_crc32_checksum = False
         self._expected_version = 1
         self._write_fn = write_pack_index_v1
 
     def tearDown(self):
         TestCase.tearDown(self)
-        BaseTestPackIndexWriting.tearDown(self)
+        BaseTestFilePackIndexWriting.tearDown(self)
 
 
-class TestPackIndexWritingv2(TestCase, BaseTestPackIndexWriting):
+class TestPackIndexWritingv2(TestCase, BaseTestFilePackIndexWriting):
 
     def setUp(self):
         TestCase.setUp(self)
-        BaseTestPackIndexWriting.setUp(self)
+        BaseTestFilePackIndexWriting.setUp(self)
         self._has_crc32_checksum = True
         self._expected_version = 2
         self._write_fn = write_pack_index_v2
 
     def tearDown(self):
         TestCase.tearDown(self)
-        BaseTestPackIndexWriting.tearDown(self)
+        BaseTestFilePackIndexWriting.tearDown(self)
 
 
 class ReadZlibTests(TestCase):