Fix bug in revid caching.
[jelmer/subvertpy.git] / fetch.py
index 190bbfb1e0aca84603ddc0619e239fea75b6dd50..2723f97fb4fb2239e7ee4a62de5317c2985dcc62 100644 (file)
--- a/fetch.py
+++ b/fetch.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2005-2006 Jelmer Vernooij <jelmer@samba.org>
+# Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+"""Fetching revisions from Subversion repositories in batches."""
 
 import bzrlib
-from bzrlib.decorators import needs_write_lock
-from bzrlib.inventory import Inventory, ROOT_ID
+from bzrlib.inventory import Inventory
 import bzrlib.osutils as osutils
-from bzrlib.progress import ProgressBar
 from bzrlib.revision import Revision
 from bzrlib.repository import InterRepository
 from bzrlib.trace import mutter
+import bzrlib.ui as ui
 
 from copy import copy
 from cStringIO import StringIO
 import md5
 import os
 
-from svn.core import SubversionException, Pool
-import svn.core, svn.ra
+from svn.core import Pool
+import svn.core
 
 from fileids import generate_file_id
 from repository import (SvnRepository, SVN_PROP_BZR_MERGE, SVN_PROP_SVK_MERGE,
-                SVN_PROP_BZR_REVPROP_PREFIX, SvnRepositoryFormat)
+                SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO, 
+                SvnRepositoryFormat, parse_revision_metadata)
 from tree import apply_txdelta_handler
 
 
@@ -42,29 +43,33 @@ def md5_strings(strings):
     map(s.update, strings)
     return s.hexdigest()
 
+
 class RevisionBuildEditor(svn.delta.Editor):
-    def __init__(self, source, target, branch_path, revnum, prev_inventory, revid, svn_revprops, id_map, parent_branch):
+    """Implementation of the Subversion commit editor interface that builds a 
+    Bazaar revision.
+    """
+    def __init__(self, source, target, branch_path, prev_inventory, revid, 
+                 svn_revprops, id_map):
         self.branch_path = branch_path
         self.old_inventory = prev_inventory
         self.inventory = copy(prev_inventory)
         self.revid = revid
-        self.revnum = revnum
         self.id_map = id_map
-        self.parent_branch = parent_branch
         self.source = source
         self.target = target
         self.transact = target.get_transaction()
         self.weave_store = target.weave_store
-    
         self.dir_baserev = {}
-
         self._parent_ids = None
-        self._revprops = {}
+        self._revinfo = None
         self._svn_revprops = svn_revprops
-
         self.pool = Pool()
 
     def _get_revision(self, revid):
+        """Creates the revision object.
+
+        :param revid: Revision id of the revision to create.
+        """
         if self._parent_ids is None:
             self._parent_ids = ""
 
@@ -73,8 +78,11 @@ class RevisionBuildEditor(svn.delta.Editor):
         # Commit SVN revision properties to a Revision object
         rev = Revision(revision_id=revid, parent_ids=parent_ids)
 
-        rev.timestamp = 1.0 * svn.core.secs_from_timestr(
-            self._svn_revprops[2], None) #date
+        if self._svn_revprops[2] is not None:
+            rev.timestamp = 1.0 * svn.core.secs_from_timestr(
+                self._svn_revprops[2], None) #date
+        else:
+            rev.timestamp = 0 # FIXME: Obtain repository creation time
         rev.timezone = None
 
         rev.committer = self._svn_revprops[0] # author
@@ -82,24 +90,35 @@ class RevisionBuildEditor(svn.delta.Editor):
             rev.committer = ""
         rev.message = self._svn_revprops[1] # message
 
-        rev.properties = self._revprops
+        if self._revinfo:
+            parse_revision_metadata(self._revinfo, rev)
+
         return rev
 
     def open_root(self, base_revnum, baton):
-        if self.inventory.revision_id is None:
-            self.dir_baserev[ROOT_ID] = []
+        if self.old_inventory.root is None:
+            # First time the root is set
+            file_id = generate_file_id(self.source, self.revid, "")
+            self.dir_baserev[file_id] = []
         else:
-            self.dir_baserev[ROOT_ID] = [self.inventory.revision_id]
-        self.inventory.revision_id = self.revid
-        return ROOT_ID
+            assert self.old_inventory.root.revision is not None
+            if self.id_map.has_key(""):
+                file_id = self.id_map[""]
+            else:
+                file_id = self.old_inventory.root.file_id
+            self.dir_baserev[file_id] = [self.old_inventory.root.revision]
 
-    def relpath(self, path):
-        return path.strip("/")
+        if self.inventory.root is not None and \
+                file_id == self.inventory.root.file_id:
+            ie = self.inventory.root
+        else:
+            ie = self.inventory.add_path("", 'directory', file_id)
+        ie.revision = self.revid
+        return file_id
 
     def _get_existing_id(self, parent_id, path):
         if self.id_map.has_key(path):
             return self.id_map[path]
-
         return self._get_old_id(parent_id, path)
 
     def _get_old_id(self, parent_id, old_path):
@@ -108,20 +127,22 @@ class RevisionBuildEditor(svn.delta.Editor):
     def _get_new_id(self, parent_id, new_path):
         if self.id_map.has_key(new_path):
             return self.id_map[new_path]
-        return generate_file_id(self.revid, new_path)
+        return generate_file_id(self.source, self.revid, new_path)
 
     def delete_entry(self, path, revnum, parent_id, pool):
+        path = path.decode("utf-8")
         del self.inventory[self._get_old_id(parent_id, path)]
 
     def close_directory(self, id):
-        if id != ROOT_ID:
-            self.inventory[id].revision = self.revid
+        self.inventory[id].revision = self.revid
 
-            file_weave = self.weave_store.get_weave_or_empty(id, self.transact)
-            if not file_weave.has_version(self.revid):
-                file_weave.add_lines(self.revid, self.dir_baserev[id], [])
+        file_weave = self.weave_store.get_weave_or_empty(id, self.transact)
+        if not file_weave.has_version(self.revid):
+            file_weave.add_lines(self.revid, self.dir_baserev[id], [])
 
-    def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum, pool):
+    def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum, 
+                      pool):
+        path = path.decode("utf-8")
         file_id = self._get_new_id(parent_id, path)
 
         self.dir_baserev[file_id] = []
@@ -154,7 +175,7 @@ class RevisionBuildEditor(svn.delta.Editor):
 
     def change_dir_prop(self, id, name, value, pool):
         if name == SVN_PROP_BZR_MERGE:
-            if id != ROOT_ID:
+            if id != self.inventory.root.file_id:
                 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_MERGE)
                 return
             
@@ -164,8 +185,12 @@ class RevisionBuildEditor(svn.delta.Editor):
                 # Only set parents using svk:merge if no 
                 # bzr:merge set.
                 pass # FIXME 
-        elif name.startswith(SVN_PROP_BZR_REVPROP_PREFIX):
-            self._revprops[name[len(SVN_PROP_BZR_REVPROP_PREFIX):]] = value
+        elif name == SVN_PROP_BZR_REVISION_INFO:
+            if id != self.inventory.root.file_id:
+                mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
+                return
+            self._revinfo = value
         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
                       svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
@@ -175,12 +200,16 @@ class RevisionBuildEditor(svn.delta.Editor):
             pass
         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
             pass
-        else:
+        elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
+              name.startswith(SVN_PROP_BZR_PREFIX)):
             mutter('unsupported file property %r' % name)
 
     def change_file_prop(self, id, name, value, pool):
         if name == svn.core.SVN_PROP_EXECUTABLE: 
-            # Strange, you'd expect executable to match svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not how SVN behaves.
+            # You'd expect executable to match 
+            # svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not 
+            # how SVN behaves. It appears to consider the presence 
+            # of the property sufficient to mark it executable.
             self.is_executable = (value != None)
         elif (name == svn.core.SVN_PROP_SPECIAL):
             self.is_symlink = (value != None)
@@ -194,10 +223,12 @@ class RevisionBuildEditor(svn.delta.Editor):
             pass
         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
             pass
-        else:
+        elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
+              name.startswith(SVN_PROP_BZR_PREFIX)):
             mutter('unsupported file property %r' % name)
 
     def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
+        path = path.decode("utf-8")
         self.is_symlink = False
         self.is_executable = None
         self.file_data = ""
@@ -212,7 +243,8 @@ class RevisionBuildEditor(svn.delta.Editor):
         self.file_id = self._get_existing_id(parent_id, path)
         self.is_executable = None
         self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
-        file_weave = self.weave_store.get_weave_or_empty(base_file_id, self.transact)
+        file_weave = self.weave_store.get_weave_or_empty(base_file_id, 
+                                                         self.transact)
         self.file_data = file_weave.get_text(base_revid)
         self.file_stream = None
         if self.file_id == base_file_id:
@@ -234,7 +266,8 @@ class RevisionBuildEditor(svn.delta.Editor):
         actual_checksum = md5_strings(lines)
         assert checksum is None or checksum == actual_checksum
 
-        file_weave = self.weave_store.get_weave_or_empty(self.file_id, self.transact)
+        file_weave = self.weave_store.get_weave_or_empty(self.file_id, 
+                                                         self.transact)
         if not file_weave.has_version(self.revid):
             file_weave.add_lines(self.revid, self.file_parents, lines)
 
@@ -274,41 +307,44 @@ class RevisionBuildEditor(svn.delta.Editor):
     def apply_textdelta(self, file_id, base_checksum):
         actual_checksum = md5.new(self.file_data).hexdigest(),
         assert (base_checksum is None or base_checksum == actual_checksum,
-            "base checksum mismatch: %r != %r" % (base_checksum, actual_checksum))
+            "base checksum mismatch: %r != %r" % (base_checksum, 
+                                                  actual_checksum))
         self.file_stream = StringIO()
-        return apply_txdelta_handler(StringIO(self.file_data), self.file_stream, self.pool)
+        return apply_txdelta_handler(StringIO(self.file_data), 
+                                     self.file_stream, self.pool)
 
 
-class InterSvnRepository(InterRepository):
+class InterFromSvnRepository(InterRepository):
     """Svn to any repository actions."""
 
     _matching_repo_format = SvnRepositoryFormat()
-    """The format to test with."""
 
-    @needs_write_lock
-    def copy_content(self, revision_id=None, basis=None, pb=ProgressBar()):
-        """See InterRepository.copy_content."""
-        # Dictionary with paths as keys, revnums as values
+    @staticmethod
+    def _get_repo_format_to_test():
+        return None
 
-        # Loop over all the revnums until revision_id
-        # (or youngest_revnum) and call self.target.add_revision() 
-        # or self.target.add_inventory() each time
-        if revision_id is None:
-            path = None
-            until_revnum = self.source._latest_revnum
-        else:
-            (path, until_revnum) = self.source.parse_revision_id(revision_id)
+    def _find_all(self):
+        needed = []
+        parents = {}
+        for (branch, revnum) in self.source.follow_history(
+                                                self.source._latest_revnum):
+            mutter('br, revnum: %r, %r' % (branch, revnum))
+            revid = self.source.generate_revision_id(revnum, branch)
+            parents[revid] = self.source._mainline_revision_parent(branch, 
+                                                                   revnum)
 
-        repos_root = self.source.transport.get_repos_root()
-        
+            if not self.target.has_revision(revid):
+                needed.append(revid)
+        return (needed, parents)
+
+    def _find_until(self, revision_id):
         needed = []
         parents = {}
+        (path, until_revnum) = self.source.lookup_revision_id(revision_id)
+
         prev_revid = None
-        if path is None:
-            it = self.source.follow_history(until_revnum)
-        else:
-            it = self.source.follow_branch_history(path, until_revnum)
-        for (branch, changes, revnum) in it:
+        for (branch, revnum) in self.source.follow_branch(path, 
+                                                          until_revnum):
             revid = self.source.generate_revision_id(revnum, branch)
 
             if prev_revid is not None:
@@ -317,87 +353,121 @@ class InterSvnRepository(InterRepository):
             prev_revid = revid
 
             if not self.target.has_revision(revid):
-                needed.append((branch, revnum, revid, changes))
+                needed.append(revid)
 
         parents[prev_revid] = None
+        return (needed, parents)
 
-        num = 0
-        needed.reverse()
-        prev_revid = None
-        transport = self.source.transport
-        for (branch, revnum, revid, changes) in needed:
-            if pb is not None:
-                pb.update('copying revision', num+1, len(needed)+1)
-            num += 1
-
-            parent_revid = parents[revid]
-
-            if parent_revid is not None:
-                (parent_branch, parent_revnum) = self.source.parse_revision_id(parent_revid)
-            else:
-                parent_revnum = 0
-                parent_branch = None
-
-            if parent_revid is None:
-                parent_inv = Inventory(ROOT_ID)
-            elif prev_revid != parent_revid:
-                parent_inv = self.target.get_inventory(parent_revid)
-            else:
-                parent_inv = prev_inv
-
-            id_map = self.source.transform_fileid_map(self.source.uuid, 
-                                        revnum, branch, changes)
-
-            editor = RevisionBuildEditor(self.source, self.target, branch, 
-                                         revnum, parent_inv, revid, 
-                                     self.source._log.get_revision_info(revnum),
-                                     id_map, parent_branch)
-
-            pool = Pool()
-            edit, edit_baton = svn.delta.make_editor(editor, pool)
+    def copy_content(self, revision_id=None, basis=None, pb=None):
+        """See InterRepository.copy_content."""
+        # FIXME: Use basis
+        # Dictionary with paths as keys, revnums as values
 
-            if parent_branch is None:
-                transport.reparent(repos_root)
-            else:
-                transport.reparent("%s/%s" % (repos_root, parent_branch))
-            if parent_branch != branch:
-                switch_url = "%s/%s" % (repos_root, branch)
-                reporter, reporter_baton = transport.do_switch(
-                           revnum, "", True, 
-                           switch_url, edit, edit_baton, pool)
+        # Loop over all the revnums until revision_id
+        # (or youngest_revnum) and call self.target.add_revision() 
+        # or self.target.add_inventory() each time
+        needed = []
+        parents = {}
+        self.target.lock_read()
+        try:
+            if revision_id is None:
+                (needed, parents) = self._find_all()
             else:
-                reporter, reporter_baton = transport.do_update(
-                           revnum, "", True, edit, edit_baton, pool)
-
-            # Report status of existing paths
-            svn.ra.reporter2_invoke_set_path(reporter, reporter_baton, 
-                "", parent_revnum, False, None, pool)
-
-            transport.lock()
-            svn.ra.reporter2_invoke_finish_report(reporter, reporter_baton, pool)
-            transport.unlock()
-
-            prev_inv = editor.inventory
-            prev_revid = revid
+                (needed, parents) = self._find_until(revision_id)
+        finally:
+            self.target.unlock()
 
-            pool.destroy()
+        if len(needed) == 0:
+            # Nothing to fetch
+            return
 
-        if pb is not None:
-            pb.clear()
+        repos_root = self.source.transport.get_repos_root()
 
+        needed.reverse()
+        prev_revid = None
+        transport = self.source.transport
+        self.target.lock_write()
+        if pb is None:
+            pb = ui.ui_factory.nested_progress_bar()
+            nested_pb = pb
+        else:
+            nested_pb = None
+        num = 0
+        prev_inv = None
+        try:
+            for revid in needed:
+                (branch, revnum) = self.source.lookup_revision_id(revid)
+                pb.update('copying revision', num, len(needed))
+
+                parent_revid = parents[revid]
+
+                if parent_revid is None:
+                    parent_inv = Inventory(root_id=None)
+                elif prev_revid != parent_revid:
+                    parent_inv = self.target.get_inventory(parent_revid)
+                else:
+                    assert prev_inv is not None
+                    parent_inv = prev_inv
+
+                changes = self.source._log.get_revision_paths(revnum, branch)
+                renames = self.source.revision_fileid_renames(revid)
+                id_map = self.source.transform_fileid_map(self.source.uuid, 
+                                            revnum, branch, changes, renames)
+
+                editor = RevisionBuildEditor(self.source, self.target, branch, 
+                             parent_inv, revid, 
+                             self.source._log.get_revision_info(revnum),
+                             id_map)
+
+                pool = Pool()
+                edit, edit_baton = svn.delta.make_editor(editor, pool)
+
+                if parent_revid is None:
+                    transport.reparent("%s/%s" % (repos_root, branch))
+                    reporter = transport.do_update(
+                                   revnum, "", True, edit, edit_baton, pool)
+
+                    # Report status of existing paths
+                    reporter.set_path("", revnum, True, None, pool)
+                else:
+                    (parent_branch, parent_revnum) = \
+                            self.source.lookup_revision_id(parent_revid)
+                    transport.reparent("%s/%s" % (repos_root, parent_branch))
+
+                    if parent_branch != branch:
+                        switch_url = "%s/%s" % (repos_root, branch)
+                        reporter = transport.do_switch(
+                                   revnum, "", True, 
+                                   switch_url, edit, edit_baton, pool)
+                    else:
+                        reporter = transport.do_update(
+                                   revnum, "", True, edit, edit_baton, pool)
+
+                    # Report status of existing paths
+                    reporter.set_path("", parent_revnum, False, None, pool)
+
+                transport.lock()
+                reporter.finish_report(pool)
+                transport.unlock()
+
+                prev_inv = editor.inventory
+                prev_revid = revid
+                pool.destroy()
+                num += 1
+        finally:
+            self.target.unlock()
+            if nested_pb is not None:
+                nested_pb.finished()
         self.source.transport.reparent(repos_root)
 
-    @needs_write_lock
-    def fetch(self, revision_id=None, pb=ProgressBar()):
+    def fetch(self, revision_id=None, pb=None):
         """Fetch revisions. """
         self.copy_content(revision_id=revision_id, pb=pb)
 
     @staticmethod
     def is_compatible(source, target):
         """Be compatible with SvnRepository."""
+        mutter("Checking from %r %r" % (source, target))
         # FIXME: Also check target uses VersionedFile
-        mutter('test %r' % source)
         return isinstance(source, SvnRepository)
 
-
-