Do simple pack checking during receive-pack.
authorDave Borowitz <dborowitz@google.com>
Tue, 9 Mar 2010 19:44:14 +0000 (11:44 -0800)
committerDave Borowitz <dborowitz@google.com>
Thu, 22 Apr 2010 21:35:46 +0000 (14:35 -0700)
Call obj.check() for each object in a pack. This requires passing the
Pack object back from the various commit callbacks.

In the future, we might consider not even moving the pack in until after
has been checked. At the moment, however, the only way to complete a
thin pack is to move it in, so we must do that first. This results in a
garbage pack if the check fails, which would have to be cleaned up by
an eventual GC. However, since only corrupt objects (not packs) can be
written to disk, with the SHA of their corrupt contents, the chances of
actually hitting a corrupt object in practice are very small.

Change-Id: Ib6c6ae2846f77f6a6cd4849b9608c29886258673

dulwich/object_store.py
dulwich/pack.py
dulwich/server.py

index 688ccd6b5f07e1ea1e27fb7ca2be678ae676837d..5c11a84cb51522aac8d496ea78b43d69e4f435f8 100644 (file)
@@ -315,13 +315,14 @@ class PackBasedObjectStore(BaseObjectStore):
         """Add a set of objects to this object store.
 
         :param objects: Iterable over objects, should support __len__.
         """Add a set of objects to this object store.
 
         :param objects: Iterable over objects, should support __len__.
+        :return: Pack object of the objects written.
         """
         if len(objects) == 0:
             # Don't bother writing an empty pack file
             return
         f, commit = self.add_pack()
         write_pack_data(f, objects, len(objects))
         """
         if len(objects) == 0:
             # Don't bother writing an empty pack file
             return
         f, commit = self.add_pack()
         write_pack_data(f, objects, len(objects))
-        commit()
+        return commit()
 
 
 class DiskObjectStore(PackBasedObjectStore):
 
 
 class DiskObjectStore(PackBasedObjectStore):
@@ -407,7 +408,9 @@ class DiskObjectStore(PackBasedObjectStore):
         newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
         os.rename(temppath+".pack", newbasename+".pack")
         os.rename(temppath+".idx", newbasename+".idx")
         newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
         os.rename(temppath+".pack", newbasename+".pack")
         os.rename(temppath+".idx", newbasename+".idx")
-        self._add_known_pack(Pack(newbasename))
+        final_pack = Pack(newbasename)
+        self._add_known_pack(final_pack)
+        return final_pack
 
     def move_in_pack(self, path):
         """Move a specific file containing a pack into the pack directory.
 
     def move_in_pack(self, path):
         """Move a specific file containing a pack into the pack directory.
@@ -424,7 +427,9 @@ class DiskObjectStore(PackBasedObjectStore):
         write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
         p.close()
         os.rename(path, basename + ".pack")
         write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
         p.close()
         os.rename(path, basename + ".pack")
-        self._add_known_pack(Pack(basename))
+        final_pack = Pack(basename)
+        self._add_known_pack(final_pack)
+        return final_pack
 
     def add_thin_pack(self):
         """Add a new thin pack to this object store.
 
     def add_thin_pack(self):
         """Add a new thin pack to this object store.
@@ -438,7 +443,9 @@ class DiskObjectStore(PackBasedObjectStore):
             os.fsync(fd)
             f.close()
             if os.path.getsize(path) > 0:
             os.fsync(fd)
             f.close()
             if os.path.getsize(path) > 0:
-                self.move_in_thin_pack(path)
+                return self.move_in_thin_pack(path)
+            else:
+                return None
         return f, commit
 
     def add_pack(self):
         return f, commit
 
     def add_pack(self):
@@ -453,7 +460,9 @@ class DiskObjectStore(PackBasedObjectStore):
             os.fsync(fd)
             f.close()
             if os.path.getsize(path) > 0:
             os.fsync(fd)
             f.close()
             if os.path.getsize(path) > 0:
-                self.move_in_pack(path)
+                return self.move_in_pack(path)
+            else:
+                return None
         return f, commit
 
     def add_object(self, obj):
         return f, commit
 
     def add_object(self, obj):
index 5ab4dfbd18d9161a5a38f327204160c849e734e7..52440a4affad5533c0be7ec6cb797a186eb5225e 100644 (file)
@@ -316,7 +316,6 @@ class PackIndex(object):
 
     def check(self):
         """Check that the stored checksum matches the actual checksum."""
 
     def check(self):
         """Check that the stored checksum matches the actual checksum."""
-        # TODO: Check pack contents, too
         actual = self.calculate_checksum()
         stored = self.get_stored_checksum()
         if actual != stored:
         actual = self.calculate_checksum()
         stored = self.get_stored_checksum()
         if actual != stored:
@@ -1233,6 +1232,9 @@ class Pack(object):
         """
         self.index.check()
         self.data.check()
         """
         self.index.check()
         self.data.check()
+        for obj in self.iterobjects():
+            obj.check()
+        # TODO: object connectivity checks
 
     def get_stored_checksum(self):
         return self.data.get_stored_checksum()
 
     def get_stored_checksum(self):
         return self.data.get_stored_checksum()
index 45fdc9091a2c384a76346b49e2ed98932bad2fea..2f1573332264cb62ff52f188978c52f67ac93bd1 100644 (file)
@@ -36,6 +36,7 @@ from dulwich.errors import (
     ApplyDeltaError,
     ChecksumMismatch,
     GitProtocolError,
     ApplyDeltaError,
     ChecksumMismatch,
     GitProtocolError,
+    ObjectFormatException,
     )
 from dulwich.misc import (
     make_sha,
     )
 from dulwich.misc import (
     make_sha,
@@ -643,48 +644,43 @@ class ReceivePackHandler(Handler):
     def _apply_pack(self, refs):
         f, commit = self.repo.object_store.add_thin_pack()
         all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,
     def _apply_pack(self, refs):
         f, commit = self.repo.object_store.add_thin_pack()
         all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,
-                          AssertionError, socket.error, zlib.error)
+                          AssertionError, socket.error, zlib.error,
+                          ObjectFormatException)
         status = []
         unpack_error = None
         # TODO: more informative error messages than just the exception string
         try:
             PackStreamVerifier(self.proto, f).verify()
         status = []
         unpack_error = None
         # TODO: more informative error messages than just the exception string
         try:
             PackStreamVerifier(self.proto, f).verify()
-        except all_exceptions, e:
-            unpack_error = str(e).replace('\n', '')
-        try:
-            commit()
-        except all_exceptions, e:
-            if not unpack_error:
-                unpack_error = str(e).replace('\n', '')
-
-        if unpack_error:
-            status.append(('unpack', unpack_error))
-        else:
+            p = commit()
+            if not p:
+                raise IOError('Failed to write pack')
+            p.check()
             status.append(('unpack', 'ok'))
             status.append(('unpack', 'ok'))
+        except all_exceptions, e:
+            status.append(('unpack', str(e).replace('\n', '')))
+            # The pack may still have been moved in, but it may contain broken
+            # objects. We trust a later GC to clean it up.
 
         for oldsha, sha, ref in refs:
 
         for oldsha, sha, ref in refs:
-            ref_error = None
+            ref_status = 'ok'
             try:
                 if sha == ZERO_SHA:
             try:
                 if sha == ZERO_SHA:
-                    if not self.has_capability('delete-refs'):
+                    if not delete_refs:
                         raise GitProtocolError(
                           'Attempted to delete refs without delete-refs '
                           'capability.')
                     try:
                         del self.repo.refs[ref]
                     except all_exceptions:
                         raise GitProtocolError(
                           'Attempted to delete refs without delete-refs '
                           'capability.')
                     try:
                         del self.repo.refs[ref]
                     except all_exceptions:
-                        ref_error = 'failed to delete'
+                        ref_status = 'failed to delete'
                 else:
                     try:
                         self.repo.refs[ref] = sha
                     except all_exceptions:
                 else:
                     try:
                         self.repo.refs[ref] = sha
                     except all_exceptions:
-                        ref_error = 'failed to write'
+                        ref_status = 'failed to write'
             except KeyError, e:
             except KeyError, e:
-                ref_error = 'bad ref'
-            if ref_error:
-                status.append((ref, ref_error))
-            else:
-                status.append((ref, 'ok'))
+                ref_status = 'bad ref'
+            status.append((ref, ref_status))
 
         return status
 
 
         return status