Only reload new pack files, and discard old ones when updating pack
authorJelmer Vernooij <jelmer@samba.org>
Sun, 20 Apr 2014 19:03:04 +0000 (21:03 +0200)
committerJelmer Vernooij <jelmer@samba.org>
Sun, 20 Apr 2014 19:03:04 +0000 (21:03 +0200)
cache.

NEWS
dulwich/object_store.py
dulwich/tests/compat/server_utils.py
dulwich/tests/test_object_store.py

diff --git a/NEWS b/NEWS
index 6198cc8f871da850016a76b313f79bd92aee75b5..03826d395207c40dcf3ce5386d62ed3b9f4b74a5 100644 (file)
--- a/NEWS
+++ b/NEWS
    object already in the repository.
    (Damien Tournoud, #135)
 
+ * Stop leaking file handles during pack reload. (Damien Tournoud)
+
+ * Avoid reopening packs during pack cache reload. (Jelmer Vernooij)
+
  API CHANGES
 
   * Drop support for Python 2.6. (Jelmer Vernooij)
index bb3117a06ffcb4c6296661ed8ccbc7c54acfbf71..014f5b16a0978966072187d575fec3237b90071e 100644 (file)
@@ -251,7 +251,7 @@ class BaseObjectStore(object):
 class PackBasedObjectStore(BaseObjectStore):
 
     def __init__(self):
-        self._pack_cache = None
+        self._pack_cache = {}
 
     @property
     def alternates(self):
@@ -279,36 +279,30 @@ class PackBasedObjectStore(BaseObjectStore):
                 return True
         return False
 
-    def _load_packs(self):
-        raise NotImplementedError(self._load_packs)
-
     def _pack_cache_stale(self):
         """Check whether the pack cache is stale."""
         raise NotImplementedError(self._pack_cache_stale)
 
-    def _add_known_pack(self, pack):
+    def _add_known_pack(self, base_name, pack):
         """Add a newly appeared pack to the cache by path.
 
         """
-        if self._pack_cache is not None:
-            self._pack_cache.append(pack)
+        self._pack_cache[base_name] = pack
 
     def close(self):
         pack_cache = self._pack_cache
-        self._pack_cache = None
+        self._pack_cache = {}
         while pack_cache:
-            pack = pack_cache.pop()
+            (name, pack) = pack_cache.popitem()
             pack.close()
 
     @property
     def packs(self):
         """List with pack objects."""
-        if self._pack_cache is not None and self._pack_cache_stale():
-            self.close()
+        if self._pack_cache is None or self._pack_cache_stale():
+            self._update_pack_cache()
 
-        if self._pack_cache is None:
-            self._pack_cache = self._load_packs()
-        return self._pack_cache
+        return self._pack_cache.values()
 
     def _iter_alternate_objects(self):
         """Iterate over the SHAs of all the objects in alternate stores."""
@@ -413,6 +407,7 @@ class DiskObjectStore(PackBasedObjectStore):
         self.path = path
         self.pack_dir = os.path.join(self.path, PACKDIR)
         self._pack_cache_time = 0
+        self._pack_cache = {}
         self._alternates = None
 
     def __repr__(self):
@@ -478,31 +473,29 @@ class DiskObjectStore(PackBasedObjectStore):
             path = os.path.join(self.path, path)
         self.alternates.append(DiskObjectStore(path))
 
-    def _load_packs(self):
-        pack_files = []
+    def _update_pack_cache(self):
         try:
-            self._pack_cache_time = os.stat(self.pack_dir).st_mtime
             pack_dir_contents = os.listdir(self.pack_dir)
-            for name in pack_dir_contents:
-                # TODO: verify that idx exists first
-                if name.startswith("pack-") and name.endswith(".pack"):
-                    filename = os.path.join(self.pack_dir, name)
-                    pack_files.append((os.stat(filename).st_mtime, filename))
         except OSError as e:
             if e.errno == errno.ENOENT:
-                return []
-            raise
-        pack_files.sort(reverse=True)
-        suffix_len = len(".pack")
-        result = []
-        try:
-            for _, f in pack_files:
-                result.append(Pack(f[:-suffix_len]))
-        except:
-            for p in result:
-                p.close()
+                self._pack_cache_time = 0
+                self.close()
+                return
             raise
-        return result
+        self._pack_cache_time = os.stat(self.pack_dir).st_mtime
+        pack_files = set()
+        for name in pack_dir_contents:
+            # TODO: verify that idx exists first
+            if name.startswith("pack-") and name.endswith(".pack"):
+                pack_files.add(name[:-len(".pack")])
+
+        # Open newly appeared pack files
+        for f in pack_files:
+            if f not in self._pack_cache:
+                self._pack_cache[f] = Pack(os.path.join(self.pack_dir, f))
+        # Remove disappeared pack files
+        for f in set(self._pack_cache) - pack_files:
+            self._pack_cache.pop(f).close()
 
     def _pack_cache_stale(self):
         try:
@@ -589,7 +582,7 @@ class DiskObjectStore(PackBasedObjectStore):
         # Add the pack to the store and return it.
         final_pack = Pack(pack_base_name)
         final_pack.check_length_and_checksum()
-        self._add_known_pack(final_pack)
+        self._add_known_pack(pack_base_name, final_pack)
         return final_pack
 
     def add_thin_pack(self, read_all, read_some):
@@ -640,7 +633,7 @@ class DiskObjectStore(PackBasedObjectStore):
             p.close()
         os.rename(path, basename + ".pack")
         final_pack = Pack(basename)
-        self._add_known_pack(final_pack)
+        self._add_known_pack(basename, final_pack)
         return final_pack
 
     def add_pack(self):
index be55cc1035e13822597cc863f3119c7846483539..a946a3fbd792161b0364f96347c3d962988e2459 100644 (file)
@@ -130,7 +130,7 @@ class ServerTests(object):
         run_git_or_fail(['fetch', self.url(port)] + self.branch_args(),
                         cwd=self._old_repo.path)
         # flush the pack cache so any new packs are picked up
-        self._old_repo.object_store._pack_cache = None
+        self._old_repo.object_store._pack_cache_time = 0
         self.assertReposEqual(self._old_repo, self._new_repo)
 
     def test_fetch_from_dulwich_no_op(self):
@@ -144,7 +144,7 @@ class ServerTests(object):
         run_git_or_fail(['fetch', self.url(port)] + self.branch_args(),
                         cwd=self._old_repo.path)
         # flush the pack cache so any new packs are picked up
-        self._old_repo.object_store._pack_cache = None
+        self._old_repo.object_store._pack_cache_time = 0
         self.assertReposEqual(self._old_repo, self._new_repo)
 
     def test_clone_from_dulwich_empty(self):
index 3bb80649522e1edca3bc7e2022f70f073280def0..586c7d6ca47e5b49c7e5d20d6b694199333c93c3 100644 (file)
@@ -329,12 +329,10 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
             self.assertEqual((Blob.type_num, 'more yummy data'),
                              o.get_raw(packed_blob_sha))
         finally:
-            # FIXME: DiskObjectStore should have close() which do the following:
-            for p in o._pack_cache or []:
-                p.close()
-
+            o.close()
             pack.close()
 
+
 class TreeLookupPathTests(TestCase):
 
     def setUp(self):