Bunch of file id improvements.
authorJelmer Vernooij <jelmer@samba.org>
Sun, 16 Jul 2006 20:31:55 +0000 (22:31 +0200)
committerJelmer Vernooij <jelmer@samba.org>
Sun, 16 Jul 2006 20:31:55 +0000 (22:31 +0200)
README
TODO [deleted file]
branch.py
fetch.py
fileids.py
logwalker.py
repository.py
tests/test_fileids.py
transport.py

diff --git a/README b/README
index 9d4dc828d932c266bfdead1834f231d5fbdf7fbd..a37cad50731a75da209d45a6b07fa6d06e858fa4 100644 (file)
--- a/README
+++ b/README
@@ -10,8 +10,8 @@ You will need at least version 0.9 of Bazaar-NG (currently unreleased).
 
 You also need a fairly recent version of the Python bindings to the 
 Subversion libraries. At the moment, the svn plugin only works with 
-Subversion 1.5 (trunk). I plan to submit patches against the python-subversion 
-package in Ubuntu Edgy later.
+Subversion 1.5 (trunk). I have submitted patches against the python-subversion 
+package in Ubuntu Edgy later (bug #51304 on Launchpad).
 
 == Features ==
 
@@ -87,8 +87,6 @@ In the future, I also hope to support:
    good shape. However, it is currently quite CPU-intensive for no good reason 
    and I hope to make a couple of improvements in that area.
 
-     * Keep on-disk cache of file-ids to path mappings
-
         * Override implementation of get_revision_delta(). Will speed up 'bzr log -v'
        
         * use svn_ra_replay() on systems that have Subversion 1.4. Saves a couple of roundtrips when fetching history.
diff --git a/TODO b/TODO
deleted file mode 100644 (file)
index d65e14b..0000000
--- a/TODO
+++ /dev/null
@@ -1,4 +0,0 @@
-- fix weird samba bug
-- small optimization for big repositories:
-  - if parent revision doesn't have ancestors set, there's no need to set 
-    anything for child
index 8fb2e3abf7feb71a9754e5685edb996d16a8219e..99ec3cb52c1f573048a9628b7cd75ba434410264 100644 (file)
--- a/branch.py
+++ b/branch.py
@@ -62,8 +62,8 @@ class SvnBranch(Branch):
         self._format = SvnBranchFormat()
         mutter("Connected to branch at %r" % self.branch_path)
         self._generate_revision_history(self.repository._latest_revnum)
-        #self.repository.get_fileid_map(self.repository._latest_revnum,
-        #                               self.branch_path)
+        self.repository.get_fileid_map(self.repository._latest_revnum,
+                                       self.branch_path)
 
     def check(self):
         """See Branch.Check.
index a59c3c416d4edfd815b0db4aab69a556b183f1ff..bc5f0d4defa9d156a1193a2c17f1e8384681dd04 100644 (file)
--- a/fetch.py
+++ b/fetch.py
@@ -46,6 +46,7 @@ class RevisionBuildEditor(svn.delta.Editor):
         self.inventory = prev_inventory.copy()
         self.revid = revid
         self.revnum = revnum
+        self.id_map = source.get_fileid_map(revnum, branch_path)
         self.source = source
         self.target = target
         self.transact = target.get_transaction()
@@ -113,7 +114,7 @@ class RevisionBuildEditor(svn.delta.Editor):
         relpath = self.relpath(path)
         if relpath is None:
             return ROOT_ID
-        file_id, revision_id = self.get_file_id(path, self.revnum)
+        file_id, revision_id = self.id_map[relpath]
 
         if copyfrom_path:
             base_file_id, base_revid = self.get_file_id(copyfrom_path, copyfrom_revnum)
@@ -207,7 +208,7 @@ class RevisionBuildEditor(svn.delta.Editor):
         actual_checksum = md5_strings(lines)
         assert checksum is None or checksum == actual_checksum
 
-        file_id, revision_id = self.get_file_id(path, self.revnum)
+        file_id, revision_id = self.id_map[relpath]
         file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
         if not file_weave.has_version(revision_id):
             file_weave.add_lines(revision_id, self.file_parents, lines)
index fe11dc6e03f668b99b2f071be1a9f5687395d086..a1fca962aaf1dc5d147b17c581247548b7356907 100644 (file)
@@ -46,18 +46,27 @@ def generate_file_id(revid, path):
     (uuid, branch, revnum) = parse_svn_revision_id(revid)
     return generate_svn_file_id(uuid, revnum, branch, path)
 
-def copy_ids(base_map, orig_branch, map, dest_branch):
-    for p in base_map:
-        if p == orig_branch or p.startswith(orig_branch+"/"):
-            map[p.replace(orig_branch, dest_branch, 1)] = base_map[p]
-    return map
-
-def new_ids(base_map, orig_parent, map, dest_revid, dest_parent):
-    for p in base_map:
-        if p.startswith(orig_parent+"/"):
-            new_p = p.replace(orig_parent, dest_parent, 1)
-            map[new_p] = generate_file_id(dest_revid, new_p), dest_revid
-    return map
+
+def get_local_changes(paths, scheme, uuid):
+    new_paths = {}
+    names = paths.keys()
+    names.sort()
+    for p in names:
+        data = paths[p]
+        new_p = scheme.unprefix(p)[1]
+        if data[1] is not None:
+            (cbp, crp) = scheme.unprefix(data[1])
+
+            # Branch copy
+            if (crp == "" and new_p == ""):
+                data = ('M', None, None)
+            else:
+                data = (data[0], crp, generate_svn_revision_id(
+                    uuid, data[2], cbp))
+
+        new_paths[new_p] = data
+    return new_paths
+
 
 class FileIdMap(object):
     """ File id store. 
@@ -73,12 +82,16 @@ class FileIdMap(object):
                 access_mode='w', create=True)
 
     def save(self, revid, parent_revids, _map):
+        # FIXME: use VersionedFile.annotate_iter() to find versions?
         mutter('saving file id map for %r' % revid)
-        def createline((p, v)):
-            return "\t".join([p, v[0], v[1]]) + "\n"
+        def createline(p):
+            return "\t".join([p, _map[p][0], _map[p][1]]) + "\n"
+
+        keys = _map.keys()
+        keys.sort()
         
         self.cache_weave.add_lines(revid, parent_revids, 
-                                   map(createline, _map.items()))
+                                   map(createline, keys))
 
     def load(self, revid):
         map = {}
@@ -93,7 +106,7 @@ class FileIdMap(object):
         todo = []
         parent_revs = []
         map = {"": (ROOT_ID, None)} # No history -> empty map
-        for (bp, paths, rev) in self.follow_local_history(uuid,branch, revnum):
+        for (bp, paths, rev) in self._log.follow_history(branch, revnum):
             revid = generate_svn_revision_id(uuid, rev, bp)
             try:
                 map = self.load(revid)
@@ -111,12 +124,19 @@ class FileIdMap(object):
         todo.reverse()
 
         i = 0
-        for (revid, changes) in todo:
+        for (revid, global_changes) in todo:
+            changes = get_local_changes(global_changes, self._log.scheme,
+                                        uuid)
             mutter('generating file id map for %r' % revid)
             if pb is not None:
                 pb.update('generating file id map', i, len(todo))
 
-            map = self._apply_changes(map, revid, changes)
+            def find_children(path, revid):
+                (_, bp, revnum) = parse_svn_revision_id(revid)
+                for p in self._log.find_children(bp+"/"+path, revnum):
+                    yield self._log.scheme.unprefix(p)[1]
+
+            map = self._apply_changes(map, revid, changes, find_children)
             self.save(revid, parent_revs, map)
             parent_revs = [revid]
             i = i + 1
@@ -125,42 +145,10 @@ class FileIdMap(object):
             pb.clear()
         return map
 
-    def find_children(self, uuid, path, revnum):
-        (bp, rp) = self._log.scheme.unprefix(path)
-        map = self.get_map(uuid, revnum, bp)
-        for m in map:
-            if m.startswith(rp+"/"):
-                yield m[len(rp)+1:]
-
-    def follow_local_history(self, uuid, branch_path, revnum):
-        for (bp, paths, rev) in self._log.follow_history(branch_path, revnum):
-            new_paths = {}
-            names = paths.keys()
-            names.sort()
-            for p in names:
-                data = paths[p]
-                assert p.startswith(bp)
-                new_p = p[len(bp):].strip("/") # remove branch path
-                if data[1] is not None:
-                    (cbp, crp) = self._log.scheme.unprefix(data[1])
-                    # TODO: See if data[1]:data[2] is the same branch as 
-                    # the current branch. The current code doesn't handle
-                    # replaced branches very well
-                    related = (cbp == bp)
-
-                    if related:
-                        data = (data[0], crp, data[2])
-                    else:
-                        warn('unrelated, non-branch copy from %r:%d to %r' %
-                                (data[1], data[2], p))
-                        data = (data[0], None, None)
-
-                new_paths[new_p] = data
-            yield (bp, new_paths, rev)
 
 class SimpleFileIdMap(FileIdMap):
     @staticmethod
-    def _apply_changes(base_map, revid, changes):
+    def _apply_changes(base_map, revid, changes, find_children=None):
         map = copy(base_map)
 
         map[""] = (ROOT_ID, revid)
@@ -172,7 +160,7 @@ class SimpleFileIdMap(FileIdMap):
             if data[0] in ('D', 'R'):
                 assert map.has_key(p)
                 del map[p]
-                # FIXME: Delete all children of p
+                # Delete all children of p as well
                 for c in map.keys():
                     if c.startswith(p+"/"):
                         del map[c]
@@ -182,10 +170,14 @@ class SimpleFileIdMap(FileIdMap):
 
                 if not data[1] is None:
                     mutter('%r:%s copied from %r:%s' % (p, revid, data[1], data[2]))
-                    if p == "" and data[1] == "":
-                        map = copy_ids(base_map, cbp, map, bp)
+                    if find_children is None:
+                        warn('incomplete data for %r' % p)
                     else:
-                        map = new_ids(base_map, data[1], map, revid, p)
+                        mutter('find children of %r:%r' % (data[1], data[2]))
+                        for c in find_children(data[1], data[2]):
+                            mutter('child: %r -> %r' % (c, c.replace(data[1], p, 1)))
+                            map[c.replace(data[1], p, 1)] = generate_file_id(revid, c), revid
+
             elif data[0] == 'M':
                 if p == "":
                     map[p] = (ROOT_ID, "")
index 474efaaf5426dda2b0b2d34e33d4e3272fadb05a..3bd3de2b0a72f757df7755bc66dc10e3d0d1decf 100644 (file)
@@ -189,7 +189,27 @@ class LogWalker(object):
 
     
     def find_latest_change(self, path, revnum):
-        while revnum > 0 and not path in self.revisions[str(revnum)]['paths']:
+        while revnum > 0 and not self.touches_path(path, revnum):
             revnum = revnum - 1
         return revnum
 
+    def touches_path(self, path, revnum):
+        return (path in self.revisions[str(revnum)]['paths'])
+
+    def find_children(self, path, revnum):
+        # TODO: Find children by walking history, or use 
+        # cache?
+        mutter("svn ls -r %d '%r'" % (revnum, path))
+
+        try:
+            (dirents, _, _) = svn.ra.get_dir(
+                self.ra, path.encode('utf8'), revnum)
+        except SubversionException, (_, num):
+            if num == svn.core.SVN_ERR_FS_NOT_DIRECTORY:
+                return
+            raise
+
+        for p in dirents:
+            yield os.path.join(path, p)
+            for c in self.find_children(os.path.join(path, p), revnum):
+                yield c
index 148753a8964800d531ce9585903334ad14c7580f..e7729f9fe546322fdec1c910451f1c28e18e8830 100644 (file)
@@ -179,6 +179,7 @@ class SvnRepository(Repository):
 
         mutter("Connected to repository with UUID %s" % self.uuid)
 
+        mutter('svn latest-revnum')
         self._latest_revnum = svn.ra.get_latest_revnum(self.ra)
 
         self._log = logwalker.LogWalker(self.scheme, self.ra, 
@@ -241,11 +242,11 @@ See http://bazaar-vcs.org/BzrSvn for details.
 
         map = self.get_fileid_map(revnum, bp)
 
-        if not map.has_key(rp):
+        try:
+            return map[rp]
+        except KeyError:
             raise NoSuchFile(path=rp)
 
-        return map[rp]
-
     def all_revision_ids(self):
         raise NotImplementedError(self.all_revision_ids)
 
@@ -466,9 +467,10 @@ See http://bazaar-vcs.org/BzrSvn for details.
     def _cache_get_dir(self, path, revnum):
         assert path != None
         path = path.lstrip("/")
-        if self.dir_cache.has_key(path) and \
-           self.dir_cache[path].has_key(revnum):
-            return self.dir_cache[path][revnum]
+        try:
+           return self.dir_cache[path][revnum]
+        except KeyError:
+            pass
 
         mutter("svn ls -r %d '%r'" % (revnum, path))
 
@@ -512,10 +514,25 @@ See http://bazaar-vcs.org/BzrSvn for details.
         return None
 
     def _get_branch_proplist(self, path, revnum):
-        key = "%s:%d" % (path, revnum)
-        if not self.branchprop_cache.has_key(key):
-            self.branchprop_cache[key] = self._get_dir_proplist(path, revnum)
-        return self.branchprop_cache[key]
+        # Search backwards in the cache
+        for i in range(revnum):
+            i = revnum - i
+
+            key = "%s:%d" % (path, i)
+
+            try:
+                proplist = self.branchprop_cache[key]
+            except: 
+                pass
+
+            if self._log.touches_path(path, i):
+                proplist = self._get_dir_proplist(path, i)
+                break
+
+        for j in range(i, revnum-i):
+            self.branchprop_cache["%s:%d" % (path, j)] = proplist
+
+        return proplist
 
     def _get_branch_prop(self, path, revnum, name, default=None):
         props = self._get_branch_proplist(path, revnum)
@@ -539,9 +556,10 @@ See http://bazaar-vcs.org/BzrSvn for details.
         assert isinstance(path, basestring)
 
         props = self._get_dir_proplist(path, revnum)
-        if props.has_key(name):
+        try:
             return props[name]
-        return default
+        except KeyError:
+            return default
 
     def _get_file(self, path, revnum):
         (_, stream) = self._cache_get_file(path, revnum)
@@ -606,8 +624,10 @@ class SvnRepositoryRenaming(SvnRepository):
         assert isinstance(path, basestring)
         assert revnum >= 0
 
-        if self.path_map.has_key(revnum) and self.path_map[revnum].has_key(path):
+        try:
             return self.path_map[revnum][path]
+        except:
+            pass
 
         mutter('creating file id for %r:%d' % (path, revnum))
 
index 409aa0f6fa098fe83517fb9a7919de35abd3a647..1ef21098124ae02949e4ed8b63e313a15b478820 100644 (file)
@@ -147,3 +147,14 @@ class TestFileMapping(TestCase):
         self.assertEqual({'': ('TREE_ROOT', 'svn-v1:1@uuid-'), 
                                'foo': ('svn-v1:1@uuid--foo', 'svn-v1:1@uuid-')
                          }, map)
+
+    def test_copy(self):
+        map = self.apply_mappings(
+                {"svn-v1:1@uuid-": {"foo": ('A', None, None), 
+                                   "foo/blie": ('A', None, None),
+                                   "foo/bla": ('A', None, None)},
+                "svn-v1:2@uuid-": {"foob": ('A', 'foo', 1), 
+                                   "foob/bla": ('M', None, None)}
+                })
+        self.assertTrue(map.has_key("foob/bla"))
+        self.assertTrue(map.has_key("foob/blie"))
index 425112b4c68ddaea3994473deeb63fd77d4a15a7..f1b9db1758938804253c7d1b05a41a8fc5f926ab 100644 (file)
@@ -117,6 +117,7 @@ class SvnRaTransport(Transport):
 
         else:
             self.ra = ra
+            mutter('svn reparent %r' % self.svn_url)
             svn.ra.reparent(self.ra, self.svn_url.encode('utf8'))
 
     def has(self, relpath):