Fix bug in revid caching.
[jelmer/subvertpy.git] / fetch.py
index 56f2d18da39a70b125eabf7388012512964e2851..2723f97fb4fb2239e7ee4a62de5317c2985dcc62 100644 (file)
--- a/fetch.py
+++ b/fetch.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2005-2006 Jelmer Vernooij <jelmer@samba.org>
+# Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+"""Fetching revisions from Subversion repositories in batches."""
 
 import bzrlib
-from bzrlib.decorators import needs_write_lock
 from bzrlib.inventory import Inventory
 import bzrlib.osutils as osutils
-from bzrlib.progress import ProgressBar
 from bzrlib.revision import Revision
 from bzrlib.repository import InterRepository
 from bzrlib.trace import mutter
+import bzrlib.ui as ui
 
 from copy import copy
 from cStringIO import StringIO
 import md5
 import os
 
-from svn.core import SubversionException, Pool
-import svn.core, svn.ra
+from svn.core import Pool
+import svn.core
 
+from fileids import generate_file_id
 from repository import (SvnRepository, SVN_PROP_BZR_MERGE, SVN_PROP_SVK_MERGE,
-                SVN_PROP_BZR_REVPROP_PREFIX, SvnRepositoryFormat)
+                SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO, 
+                SvnRepositoryFormat, parse_revision_metadata)
 from tree import apply_txdelta_handler
 
 
@@ -41,28 +43,33 @@ def md5_strings(strings):
     map(s.update, strings)
     return s.hexdigest()
 
+
 class RevisionBuildEditor(svn.delta.Editor):
-    def __init__(self, source, target, branch_path, revnum, prev_inventory, revid, svn_revprops, id_map, parent_branch, parent_id_map):
+    """Implementation of the Subversion commit editor interface that builds a 
+    Bazaar revision.
+    """
+    def __init__(self, source, target, branch_path, prev_inventory, revid, 
+                 svn_revprops, id_map):
         self.branch_path = branch_path
+        self.old_inventory = prev_inventory
         self.inventory = copy(prev_inventory)
-        assert self.inventory.root is None or revnum > 0
         self.revid = revid
-        self.revnum = revnum
         self.id_map = id_map
-        self.parent_branch = parent_branch
-        self.parent_id_map = parent_id_map
         self.source = source
         self.target = target
         self.transact = target.get_transaction()
         self.weave_store = target.weave_store
-    
         self.dir_baserev = {}
-
         self._parent_ids = None
-        self._revprops = {}
+        self._revinfo = None
         self._svn_revprops = svn_revprops
+        self.pool = Pool()
 
     def _get_revision(self, revid):
+        """Creates the revision object.
+
+        :param revid: Revision id of the revision to create.
+        """
         if self._parent_ids is None:
             self._parent_ids = ""
 
@@ -71,8 +78,11 @@ class RevisionBuildEditor(svn.delta.Editor):
         # Commit SVN revision properties to a Revision object
         rev = Revision(revision_id=revid, parent_ids=parent_ids)
 
-        rev.timestamp = 1.0 * svn.core.secs_from_timestr(
-            self._svn_revprops[2], None) #date
+        if self._svn_revprops[2] is not None:
+            rev.timestamp = 1.0 * svn.core.secs_from_timestr(
+                self._svn_revprops[2], None) #date
+        else:
+            rev.timestamp = 0 # FIXME: Obtain repository creation time
         rev.timezone = None
 
         rev.committer = self._svn_revprops[0] # author
@@ -80,61 +90,92 @@ class RevisionBuildEditor(svn.delta.Editor):
             rev.committer = ""
         rev.message = self._svn_revprops[1] # message
 
-        rev.properties = self._revprops
+        if self._revinfo:
+            parse_revision_metadata(self._revinfo, rev)
+
         return rev
 
     def open_root(self, base_revnum, baton):
-        file_id, revision_id = self.id_map[""]
-        if self.inventory.root is None:
+        if self.old_inventory.root is None:
+            # First time the root is set
+            file_id = generate_file_id(self.source, self.revid, "")
             self.dir_baserev[file_id] = []
-            ie = self.inventory.add_path("", 'directory', file_id)
         else:
-            self.dir_baserev[file_id] = [self.inventory.revision_id]
-            ie = self.inventory[file_id]
+            assert self.old_inventory.root.revision is not None
+            if self.id_map.has_key(""):
+                file_id = self.id_map[""]
+            else:
+                file_id = self.old_inventory.root.file_id
+            self.dir_baserev[file_id] = [self.old_inventory.root.revision]
 
-        if ie is not None:
-            ie.revision = revision_id
+        if self.inventory.root is not None and \
+                file_id == self.inventory.root.file_id:
+            ie = self.inventory.root
+        else:
+            ie = self.inventory.add_path("", 'directory', file_id)
+        ie.revision = self.revid
         return file_id
 
-    def relpath(self, path):
-        return path.strip("/")
+    def _get_existing_id(self, parent_id, path):
+        if self.id_map.has_key(path):
+            return self.id_map[path]
+        return self._get_old_id(parent_id, path)
 
-    def delete_entry(self, path, revnum, parent_baton, pool):
-        del self.inventory[self.inventory.path2id(path)]
+    def _get_old_id(self, parent_id, old_path):
+        return self.old_inventory[parent_id].children[os.path.basename(old_path)].file_id
 
-    def close_directory(self, id):
-        revid = self.revid
+    def _get_new_id(self, parent_id, new_path):
+        if self.id_map.has_key(new_path):
+            return self.id_map[new_path]
+        return generate_file_id(self.source, self.revid, new_path)
 
-        if id != self.id_map[""][0]:
-            self.inventory[id].revision = revid
+    def delete_entry(self, path, revnum, parent_id, pool):
+        path = path.decode("utf-8")
+        del self.inventory[self._get_old_id(parent_id, path)]
 
-            file_weave = self.weave_store.get_weave_or_empty(id, self.transact)
-            if not file_weave.has_version(revid):
-                file_weave.add_lines(revid, self.dir_baserev[id], [])
+    def close_directory(self, id):
+        self.inventory[id].revision = self.revid
 
-    def add_directory(self, path, parent_baton, copyfrom_path, copyfrom_revnum, pool):
-        file_id, revision_id = self.id_map[path]
+        file_weave = self.weave_store.get_weave_or_empty(id, self.transact)
+        if not file_weave.has_version(self.revid):
+            file_weave.add_lines(self.revid, self.dir_baserev[id], [])
 
-        if copyfrom_path is not None:
-            base_file_id, base_revid = self.source.path_to_file_id(copyfrom_revnum, os.path.join(self.parent_branch, copyfrom_path))
-            if base_file_id == file_id: 
-                self.dir_baserev[file_id] = [base_revid]
-                ie = self.inventory[file_id]
-                ie.revision = revision_id
-                return file_id
+    def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum, 
+                      pool):
+        path = path.decode("utf-8")
+        file_id = self._get_new_id(parent_id, path)
 
         self.dir_baserev[file_id] = []
         ie = self.inventory.add_path(path, 'directory', file_id)
-        ie.revision = revision_id
+        ie.revision = self.revid
 
         return file_id
 
-    def open_directory(self, path, parent_baton, base_revnum, pool):
-        return self.add_directory(path, parent_baton, path, base_revnum, pool)
+    def open_directory(self, path, parent_id, base_revnum, pool):
+        assert base_revnum >= 0
+        base_file_id = self._get_old_id(parent_id, path)
+        base_revid = self.old_inventory[base_file_id].revision
+        file_id = self._get_existing_id(parent_id, path)
+        if file_id == base_file_id:
+            self.dir_baserev[file_id] = [base_revid]
+            ie = self.inventory[file_id]
+        else:
+            # Replace if original was inside this branch
+            # change id of base_file_id to file_id
+            ie = self.inventory[base_file_id]
+            for name in ie.children:
+                ie.children[name].parent_id = file_id
+            # FIXME: Don't touch inventory internals
+            del self.inventory._byid[base_file_id]
+            self.inventory._byid[file_id] = ie
+            ie.file_id = file_id
+            self.dir_baserev[file_id] = []
+        ie.revision = self.revid
+        return file_id
 
     def change_dir_prop(self, id, name, value, pool):
         if name == SVN_PROP_BZR_MERGE:
-            if id != self.id_map[""][0]:
+            if id != self.inventory.root.file_id:
                 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_MERGE)
                 return
             
@@ -144,8 +185,12 @@ class RevisionBuildEditor(svn.delta.Editor):
                 # Only set parents using svk:merge if no 
                 # bzr:merge set.
                 pass # FIXME 
-        elif name.startswith(SVN_PROP_BZR_REVPROP_PREFIX):
-            self._revprops[name[len(SVN_PROP_BZR_REVPROP_PREFIX):]] = value
+        elif name == SVN_PROP_BZR_REVISION_INFO:
+            if id != self.inventory.root.file_id:
+                mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
+                return
+            self._revinfo = value
         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
                       svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
@@ -155,12 +200,16 @@ class RevisionBuildEditor(svn.delta.Editor):
             pass
         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
             pass
-        else:
+        elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
+              name.startswith(SVN_PROP_BZR_PREFIX)):
             mutter('unsupported file property %r' % name)
 
     def change_file_prop(self, id, name, value, pool):
         if name == svn.core.SVN_PROP_EXECUTABLE: 
-            # Strange, you'd expect executable to match svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not how SVN behaves.
+            # You'd expect executable to match 
+            # svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not 
+            # how SVN behaves. It appears to consider the presence 
+            # of the property sufficient to mark it executable.
             self.is_executable = (value != None)
         elif (name == svn.core.SVN_PROP_SPECIAL):
             self.is_symlink = (value != None)
@@ -174,28 +223,31 @@ class RevisionBuildEditor(svn.delta.Editor):
             pass
         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
             pass
-        else:
+        elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
+              name.startswith(SVN_PROP_BZR_PREFIX)):
             mutter('unsupported file property %r' % name)
 
     def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
+        path = path.decode("utf-8")
         self.is_symlink = False
         self.is_executable = None
         self.file_data = ""
         self.file_parents = []
         self.file_stream = None
+        self.file_id = self._get_new_id(parent_id, path)
         return path
 
     def open_file(self, path, parent_id, base_revnum, pool):
-        base_file_id, base_revid = self.source.path_to_file_id(base_revnum, os.path.join(self.parent_branch, path))
-        file_id, revid = self.id_map[path]
+        base_file_id = self._get_old_id(parent_id, path)
+        base_revid = self.old_inventory[base_file_id].revision
+        self.file_id = self._get_existing_id(parent_id, path)
         self.is_executable = None
-        mutter('bla; %s and %s' % (base_file_id, file_id))
-        mutter('inventory: %s' % self.inventory.entries())
         self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
-        file_weave = self.weave_store.get_weave_or_empty(base_file_id, self.transact)
+        file_weave = self.weave_store.get_weave_or_empty(base_file_id, 
+                                                         self.transact)
         self.file_data = file_weave.get_text(base_revid)
         self.file_stream = None
-        if file_id == base_file_id:
+        if self.file_id == base_file_id:
             self.file_parents = [base_revid]
         else:
             # Replace
@@ -214,18 +266,18 @@ class RevisionBuildEditor(svn.delta.Editor):
         actual_checksum = md5_strings(lines)
         assert checksum is None or checksum == actual_checksum
 
-        file_id, revision_id = self.id_map[path]
-        file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
-        if not file_weave.has_version(revision_id):
-            file_weave.add_lines(revision_id, self.file_parents, lines)
+        file_weave = self.weave_store.get_weave_or_empty(self.file_id, 
+                                                         self.transact)
+        if not file_weave.has_version(self.revid):
+            file_weave.add_lines(self.revid, self.file_parents, lines)
 
-        if file_id in self.inventory:
-            ie = self.inventory[file_id]
+        if self.file_id in self.inventory:
+            ie = self.inventory[self.file_id]
         elif self.is_symlink:
-            ie = self.inventory.add_path(path, 'symlink', file_id)
+            ie = self.inventory.add_path(path, 'symlink', self.file_id)
         else:
-            ie = self.inventory.add_path(path, 'file', file_id)
-        ie.revision = revision_id
+            ie = self.inventory.add_path(path, 'file', self.file_id)
+        ie.revision = self.revid
 
         if self.is_symlink:
             ie.symlink_target = lines[0][len("link "):]
@@ -247,6 +299,7 @@ class RevisionBuildEditor(svn.delta.Editor):
             bzrlib.xml5.serializer_v5.write_inventory_to_string(
                 self.inventory))
         self.target.add_revision(self.revid, rev, self.inventory)
+        self.pool.destroy()
 
     def abort_edit(self):
         pass
@@ -254,38 +307,44 @@ class RevisionBuildEditor(svn.delta.Editor):
     def apply_textdelta(self, file_id, base_checksum):
         actual_checksum = md5.new(self.file_data).hexdigest(),
         assert (base_checksum is None or base_checksum == actual_checksum,
-            "base checksum mismatch: %r != %r" % (base_checksum, actual_checksum))
+            "base checksum mismatch: %r != %r" % (base_checksum, 
+                                                  actual_checksum))
         self.file_stream = StringIO()
-        return apply_txdelta_handler(StringIO(self.file_data), self.file_stream)
+        return apply_txdelta_handler(StringIO(self.file_data), 
+                                     self.file_stream, self.pool)
 
 
-class InterSvnRepository(InterRepository):
+class InterFromSvnRepository(InterRepository):
     """Svn to any repository actions."""
 
-    _matching_repo_format = SvnRepositoryFormat
-    """The format to test with."""
+    _matching_repo_format = SvnRepositoryFormat()
 
-    @needs_write_lock
-    def copy_content(self, revision_id=None, basis=None, pb=ProgressBar()):
-        """See InterRepository.copy_content."""
-        # Dictionary with paths as keys, revnums as values
+    @staticmethod
+    def _get_repo_format_to_test():
+        return None
 
-        # Loop over all the revnums until revision_id
-        # (or youngest_revnum) and call self.target.add_revision() 
-        # or self.target.add_inventory() each time
-        if revision_id is None:
-            path = None
-            until_revnum = self.source._latest_revnum
-        else:
-            (path, until_revnum) = self.source.parse_revision_id(revision_id)
+    def _find_all(self):
+        needed = []
+        parents = {}
+        for (branch, revnum) in self.source.follow_history(
+                                                self.source._latest_revnum):
+            mutter('br, revnum: %r, %r' % (branch, revnum))
+            revid = self.source.generate_revision_id(revnum, branch)
+            parents[revid] = self.source._mainline_revision_parent(branch, 
+                                                                   revnum)
 
-        repos_root = self.source.transport.get_repos_root()
-        
+            if not self.target.has_revision(revid):
+                needed.append(revid)
+        return (needed, parents)
+
+    def _find_until(self, revision_id):
         needed = []
         parents = {}
+        (path, until_revnum) = self.source.lookup_revision_id(revision_id)
+
         prev_revid = None
-        for (branch, changes, revnum) in \
-            self.source._log.follow_history(path, until_revnum):
+        for (branch, revnum) in self.source.follow_branch(path, 
+                                                          until_revnum):
             revid = self.source.generate_revision_id(revnum, branch)
 
             if prev_revid is not None:
@@ -294,95 +353,121 @@ class InterSvnRepository(InterRepository):
             prev_revid = revid
 
             if not self.target.has_revision(revid):
-                needed.append((branch, revnum, revid, changes))
+                needed.append(revid)
 
         parents[prev_revid] = None
+        return (needed, parents)
 
-        num = 0
-        needed.reverse()
-        prev_revid = None
-        transport = self.source.transport
-        for (branch, revnum, revid, changes) in needed:
-            if pb is not None:
-                pb.update('copying revision', num+1, len(needed)+1)
-            num += 1
-
-            parent_revid = parents[revid]
-
-            if parent_revid is not None:
-                (parent_branch, parent_revnum) = self.source.parse_revision_id(parent_revid)
-            else:
-                parent_revnum = 0
-                parent_branch = None
-
-            if parent_revid is None:
-                parent_id_map = self.source.get_fileid_map(0, "")
-                id_map = self.source.get_fileid_map(revnum, branch)
-                parent_inv = Inventory(parent_id_map[""][0])
-            elif prev_revid != parent_revid:
-                parent_id_map = self.source.get_fileid_map(parent_revnum, parent_branch)
-                id_map = self.source.get_fileid_map(revnum, branch)
-                parent_inv = self.target.get_inventory(parent_revid)
-            else:
-                parent_id_map = copy(id_map)
-                self.source.transform_fileid_map(self.source.uuid, 
-                                        revnum, branch, 
-                                        changes, id_map)
-                parent_inv = prev_inv
-
-            editor = RevisionBuildEditor(self.source, self.target, branch, 
-                                         revnum, parent_inv, revid, 
-                                     self.source._log.get_revision_info(revnum),
-                                     id_map, parent_branch, parent_id_map)
-
-            edit, edit_baton = svn.delta.make_editor(editor)
+    def copy_content(self, revision_id=None, basis=None, pb=None):
+        """See InterRepository.copy_content."""
+        # FIXME: Use basis
+        # Dictionary with paths as keys, revnums as values
 
-            if parent_branch is None:
-                transport.reparent(repos_root)
-            else:
-                transport.reparent("%s/%s" % (repos_root, parent_branch))
-            pool = Pool()
-            if parent_branch != branch:
-                mutter('svn switch %r:%r -> %r:%r' % 
-                               (parent_branch, parent_revnum, branch, revnum))
-                reporter, reporter_baton = transport.do_switch(
-                           revnum, "", True, 
-                           "%s/%s" % (repos_root, branch),
-                           edit, edit_baton, pool)
+        # Loop over all the revnums until revision_id
+        # (or youngest_revnum) and call self.target.add_revision() 
+        # or self.target.add_inventory() each time
+        needed = []
+        parents = {}
+        self.target.lock_read()
+        try:
+            if revision_id is None:
+                (needed, parents) = self._find_all()
             else:
-                mutter('svn update -r %r:%r %r' % 
-                               (parent_revnum, revnum, branch))
-                reporter, reporter_baton = transport.do_update(
-                           revnum, "", True, 
-                           edit, edit_baton, pool)
-
-            # Report status of existing paths
-            svn.ra.reporter2_invoke_set_path(reporter, reporter_baton, 
-                "", parent_revnum, False, None)
+                (needed, parents) = self._find_until(revision_id)
+        finally:
+            self.target.unlock()
 
-            transport.lock()
-            svn.ra.reporter2_invoke_finish_report(reporter, reporter_baton)
-            transport.unlock()
-
-            prev_inv = editor.inventory
-            prev_revid = revid
+        if len(needed) == 0:
+            # Nothing to fetch
+            return
 
-        if pb is not None:
-            pb.clear()
+        repos_root = self.source.transport.get_repos_root()
 
+        needed.reverse()
+        prev_revid = None
+        transport = self.source.transport
+        self.target.lock_write()
+        if pb is None:
+            pb = ui.ui_factory.nested_progress_bar()
+            nested_pb = pb
+        else:
+            nested_pb = None
+        num = 0
+        prev_inv = None
+        try:
+            for revid in needed:
+                (branch, revnum) = self.source.lookup_revision_id(revid)
+                pb.update('copying revision', num, len(needed))
+
+                parent_revid = parents[revid]
+
+                if parent_revid is None:
+                    parent_inv = Inventory(root_id=None)
+                elif prev_revid != parent_revid:
+                    parent_inv = self.target.get_inventory(parent_revid)
+                else:
+                    assert prev_inv is not None
+                    parent_inv = prev_inv
+
+                changes = self.source._log.get_revision_paths(revnum, branch)
+                renames = self.source.revision_fileid_renames(revid)
+                id_map = self.source.transform_fileid_map(self.source.uuid, 
+                                            revnum, branch, changes, renames)
+
+                editor = RevisionBuildEditor(self.source, self.target, branch, 
+                             parent_inv, revid, 
+                             self.source._log.get_revision_info(revnum),
+                             id_map)
+
+                pool = Pool()
+                edit, edit_baton = svn.delta.make_editor(editor, pool)
+
+                if parent_revid is None:
+                    transport.reparent("%s/%s" % (repos_root, branch))
+                    reporter = transport.do_update(
+                                   revnum, "", True, edit, edit_baton, pool)
+
+                    # Report status of existing paths
+                    reporter.set_path("", revnum, True, None, pool)
+                else:
+                    (parent_branch, parent_revnum) = \
+                            self.source.lookup_revision_id(parent_revid)
+                    transport.reparent("%s/%s" % (repos_root, parent_branch))
+
+                    if parent_branch != branch:
+                        switch_url = "%s/%s" % (repos_root, branch)
+                        reporter = transport.do_switch(
+                                   revnum, "", True, 
+                                   switch_url, edit, edit_baton, pool)
+                    else:
+                        reporter = transport.do_update(
+                                   revnum, "", True, edit, edit_baton, pool)
+
+                    # Report status of existing paths
+                    reporter.set_path("", parent_revnum, False, None, pool)
+
+                transport.lock()
+                reporter.finish_report(pool)
+                transport.unlock()
+
+                prev_inv = editor.inventory
+                prev_revid = revid
+                pool.destroy()
+                num += 1
+        finally:
+            self.target.unlock()
+            if nested_pb is not None:
+                nested_pb.finished()
         self.source.transport.reparent(repos_root)
 
-    @needs_write_lock
-    def fetch(self, revision_id=None, pb=ProgressBar()):
+    def fetch(self, revision_id=None, pb=None):
         """Fetch revisions. """
         self.copy_content(revision_id=revision_id, pb=pb)
 
     @staticmethod
     def is_compatible(source, target):
         """Be compatible with SvnRepository."""
+        mutter("Checking from %r %r" % (source, target))
         # FIXME: Also check target uses VersionedFile
-        mutter('test %r' % source)
         return isinstance(source, SvnRepository)
 
-
-