Use locking and write groups properly, fixes compatibility with packs.
[jelmer/subvertpy.git] / fetch.py
index 134bef21da0ea860a80dd735461278707e7fb36f..bcde7d245b8d6ead5d19083a60ffb03300293c97 100644 (file)
--- a/fetch.py
+++ b/fetch.py
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+"""Fetching revisions from Subversion repositories in batches."""
 
 import bzrlib
-from bzrlib.inventory import Inventory, ROOT_ID
-import bzrlib.osutils as osutils
-from bzrlib.revision import Revision
+from bzrlib import osutils, ui, urlutils
+from bzrlib.inventory import Inventory
+from bzrlib.revision import Revision, NULL_REVISION
 from bzrlib.repository import InterRepository
 from bzrlib.trace import mutter
-from bzrlib.ui import ui_factory
 
 from copy import copy
 from cStringIO import StringIO
 import md5
-import os
 
-from svn.core import SubversionException, Pool
+from svn.core import Pool
 import svn.core
 
 from fileids import generate_file_id
-from repository import (SvnRepository, SVN_PROP_BZR_MERGE, SVN_PROP_SVK_MERGE,
-                SVN_PROP_BZR_REVPROP_PREFIX, SvnRepositoryFormat)
+from repository import (SvnRepository, SVN_PROP_BZR_ANCESTRY, 
+                SVN_PROP_SVK_MERGE, SVN_PROP_BZR_MERGE,
+                SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO, 
+                SVN_PROP_BZR_BRANCHING_SCHEME, SVN_PROP_BZR_REVISION_ID,
+                SVN_PROP_BZR_FILEIDS, SvnRepositoryFormat, 
+                parse_revision_metadata, parse_merge_property)
 from tree import apply_txdelta_handler
 
 
 def md5_strings(strings):
+    """Return the MD5sum of the concatenation of strings.
+
+    :param strings: Strings to find the MD5sum of.
+    :return: MD5sum
+    """
     s = md5.new()
     map(s.update, strings)
     return s.hexdigest()
 
 
 class RevisionBuildEditor(svn.delta.Editor):
+    """Implementation of the Subversion commit editor interface that builds a 
+    Bazaar revision.
+    """
     def __init__(self, source, target, branch_path, prev_inventory, revid, 
-                 svn_revprops, id_map):
+                 svn_revprops, id_map, scheme):
         self.branch_path = branch_path
         self.old_inventory = prev_inventory
         self.inventory = copy(prev_inventory)
         self.revid = revid
         self.id_map = id_map
+        self.scheme = scheme
         self.source = source
         self.target = target
         self.transact = target.get_transaction()
         self.weave_store = target.weave_store
         self.dir_baserev = {}
-        self._parent_ids = None
-        self._revprops = {}
+        self._bzr_merges = []
+        self._svk_merges = []
+        self._revinfo = None
         self._svn_revprops = svn_revprops
+        self._premature_deletes = set()
         self.pool = Pool()
+        self.target.start_write_group()
 
     def _get_revision(self, revid):
-        if self._parent_ids is None:
-            self._parent_ids = ""
+        """Creates the revision object.
 
-        parent_ids = self.source.revision_parents(revid, self._parent_ids)
+        :param revid: Revision id of the revision to create.
+        """
+        parent_ids = self.source.revision_parents(revid, self._bzr_merges)
 
         # Commit SVN revision properties to a Revision object
         rev = Revision(revision_id=revid, parent_ids=parent_ids)
 
-        rev.timestamp = 1.0 * svn.core.secs_from_timestr(
-            self._svn_revprops[2], None) #date
+        if self._svn_revprops[2] is not None:
+            rev.timestamp = 1.0 * svn.core.secs_from_timestr(
+                self._svn_revprops[2], None) #date
+        else:
+            rev.timestamp = 0 # FIXME: Obtain repository creation time
         rev.timezone = None
 
         rev.committer = self._svn_revprops[0] # author
@@ -78,16 +97,31 @@ class RevisionBuildEditor(svn.delta.Editor):
             rev.committer = ""
         rev.message = self._svn_revprops[1] # message
 
-        rev.properties = self._revprops
+        if self._revinfo:
+            parse_revision_metadata(self._revinfo, rev)
+
         return rev
 
     def open_root(self, base_revnum, baton):
-        if self.inventory.revision_id is None:
-            self.dir_baserev[ROOT_ID] = []
+        if self.old_inventory.root is None:
+            # First time the root is set
+            file_id = generate_file_id(self.source, self.revid, "")
+            self.dir_baserev[file_id] = []
         else:
-            self.dir_baserev[ROOT_ID] = [self.inventory.revision_id]
-        self.inventory.revision_id = self.revid
-        return ROOT_ID
+            assert self.old_inventory.root.revision is not None
+            if self.id_map.has_key(""):
+                file_id = self.id_map[""]
+            else:
+                file_id = self.old_inventory.root.file_id
+            self.dir_baserev[file_id] = [self.old_inventory.root.revision]
+
+        if self.inventory.root is not None and \
+                file_id == self.inventory.root.file_id:
+            ie = self.inventory.root
+        else:
+            ie = self.inventory.add_path("", 'directory', file_id)
+        ie.revision = self.revid
+        return file_id
 
     def _get_existing_id(self, parent_id, path):
         if self.id_map.has_key(path):
@@ -95,31 +129,48 @@ class RevisionBuildEditor(svn.delta.Editor):
         return self._get_old_id(parent_id, path)
 
     def _get_old_id(self, parent_id, old_path):
-        return self.old_inventory[parent_id].children[os.path.basename(old_path)].file_id
+        return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
 
     def _get_new_id(self, parent_id, new_path):
         if self.id_map.has_key(new_path):
             return self.id_map[new_path]
-        return generate_file_id(self.revid, new_path)
+        return generate_file_id(self.source, self.revid, new_path)
 
     def delete_entry(self, path, revnum, parent_id, pool):
         path = path.decode("utf-8")
-        del self.inventory[self._get_old_id(parent_id, path)]
+        if path in self._premature_deletes:
+            self._premature_deletes.remove(path)
+        else:
+            self.inventory.remove_recursive_id(self._get_old_id(parent_id, path))
 
     def close_directory(self, id):
-        if id != ROOT_ID:
-            self.inventory[id].revision = self.revid
+        self.inventory[id].revision = self.revid
 
+        # Only record root if the target repository supports it
+        if self.target.supports_rich_root:
             file_weave = self.weave_store.get_weave_or_empty(id, self.transact)
             if not file_weave.has_version(self.revid):
                 file_weave.add_lines(self.revid, self.dir_baserev[id], [])
 
-    def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum, pool):
+    def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum, 
+                      pool):
         path = path.decode("utf-8")
         file_id = self._get_new_id(parent_id, path)
 
         self.dir_baserev[file_id] = []
-        ie = self.inventory.add_path(path, 'directory', file_id)
+        if file_id in self.inventory:
+            # This directory was moved here from somewhere else, but the 
+            # other location hasn't been removed yet. 
+            if copyfrom_path is None:
+                # FIXME: This should never happen!
+                copyfrom_path = self.old_inventory.id2path(file_id)
+            assert copyfrom_path == self.old_inventory.id2path(file_id)
+            assert copyfrom_path not in self._premature_deletes
+            self._premature_deletes.add(copyfrom_path)
+            self.inventory.rename(file_id, parent_id, urlutils.basename(path))
+            ie = self.inventory[file_id]
+        else:
+            ie = self.inventory.add_path(path, 'directory', file_id)
         ie.revision = self.revid
 
         return file_id
@@ -147,19 +198,27 @@ class RevisionBuildEditor(svn.delta.Editor):
         return file_id
 
     def change_dir_prop(self, id, name, value, pool):
-        if name == SVN_PROP_BZR_MERGE:
-            if id != ROOT_ID:
-                mutter('rogue %r on non-root directory' % SVN_PROP_BZR_MERGE)
+        if name == SVN_PROP_BZR_BRANCHING_SCHEME:
+            if id != self.inventory.root.file_id:
+                mutter('rogue %r on non-root directory' % name)
+                return
+        elif name == SVN_PROP_BZR_ANCESTRY+str(self.scheme):
+            if id != self.inventory.root.file_id:
+                mutter('rogue %r on non-root directory' % name)
                 return
             
-            self._parent_ids = value.splitlines()[-1]
+            self._bzr_merges = parse_merge_property(value.splitlines()[-1])
+        elif (name.startswith(SVN_PROP_BZR_ANCESTRY) or 
+              name.startswith(SVN_PROP_BZR_REVISION_ID)):
+            pass
         elif name == SVN_PROP_SVK_MERGE:
-            if self._parent_ids is None:
-                # Only set parents using svk:merge if no 
-                # bzr:merge set.
-                pass # FIXME 
-        elif name.startswith(SVN_PROP_BZR_REVPROP_PREFIX):
-            self._revprops[name[len(SVN_PROP_BZR_REVPROP_PREFIX):]] = value
+            self._svk_merges = None # Force Repository.revision_parents() to look it up
+        elif name == SVN_PROP_BZR_REVISION_INFO:
+            if id != self.inventory.root.file_id:
+                mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
+                return
+            self._revinfo = value
         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
                       svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
@@ -169,8 +228,11 @@ class RevisionBuildEditor(svn.delta.Editor):
             pass
         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
             pass
-        else:
-            mutter('unsupported file property %r' % name)
+        elif name in (SVN_PROP_BZR_MERGE, SVN_PROP_BZR_FILEIDS):
+            pass
+        elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
+              name.startswith(SVN_PROP_BZR_PREFIX)):
+            mutter('unsupported dir property %r' % name)
 
     def change_file_prop(self, id, name, value, pool):
         if name == svn.core.SVN_PROP_EXECUTABLE: 
@@ -191,7 +253,8 @@ class RevisionBuildEditor(svn.delta.Editor):
             pass
         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
             pass
-        else:
+        elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
+              name.startswith(SVN_PROP_BZR_PREFIX)):
             mutter('unsupported file property %r' % name)
 
     def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
@@ -202,6 +265,16 @@ class RevisionBuildEditor(svn.delta.Editor):
         self.file_parents = []
         self.file_stream = None
         self.file_id = self._get_new_id(parent_id, path)
+        if self.file_id in self.inventory:
+            # This file was moved here from somewhere else, but the 
+            # other location hasn't been removed yet. 
+            if copyfrom_path is None:
+                # FIXME: This should never happen!
+                copyfrom_path = self.old_inventory.id2path(self.file_id)
+            assert copyfrom_path == self.old_inventory.id2path(self.file_id)
+            assert copyfrom_path not in self._premature_deletes
+            self._premature_deletes.add(copyfrom_path)
+            self.inventory.rename(self.file_id, parent_id, urlutils.basename(path))
         return path
 
     def open_file(self, path, parent_id, base_revnum, pool):
@@ -210,7 +283,8 @@ class RevisionBuildEditor(svn.delta.Editor):
         self.file_id = self._get_existing_id(parent_id, path)
         self.is_executable = None
         self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
-        file_weave = self.weave_store.get_weave_or_empty(base_file_id, self.transact)
+        file_weave = self.weave_store.get_weave_or_empty(base_file_id, 
+                                                         self.transact)
         self.file_data = file_weave.get_text(base_revid)
         self.file_stream = None
         if self.file_id == base_file_id:
@@ -232,7 +306,8 @@ class RevisionBuildEditor(svn.delta.Editor):
         actual_checksum = md5_strings(lines)
         assert checksum is None or checksum == actual_checksum
 
-        file_weave = self.weave_store.get_weave_or_empty(self.file_id, self.transact)
+        file_weave = self.weave_store.get_weave_or_empty(self.file_id, 
+                                                         self.transact)
         if not file_weave.has_version(self.revid):
             file_weave.add_lines(self.revid, self.file_parents, lines)
 
@@ -258,12 +333,17 @@ class RevisionBuildEditor(svn.delta.Editor):
         self.file_stream = None
 
     def close_edit(self):
+        assert len(self._premature_deletes) == 0
         rev = self._get_revision(self.revid)
         self.inventory.revision_id = self.revid
         rev.inventory_sha1 = osutils.sha_string(
             bzrlib.xml5.serializer_v5.write_inventory_to_string(
                 self.inventory))
-        self.target.add_revision(self.revid, rev, self.inventory)
+        try:
+            self.target.add_revision(self.revid, rev, self.inventory)
+            self.target.commit_write_group()
+        except:
+            self.target.abort_write_group()
         self.pool.destroy()
 
     def abort_edit(self):
@@ -272,12 +352,14 @@ class RevisionBuildEditor(svn.delta.Editor):
     def apply_textdelta(self, file_id, base_checksum):
         actual_checksum = md5.new(self.file_data).hexdigest(),
         assert (base_checksum is None or base_checksum == actual_checksum,
-            "base checksum mismatch: %r != %r" % (base_checksum, actual_checksum))
+            "base checksum mismatch: %r != %r" % (base_checksum, 
+                                                  actual_checksum))
         self.file_stream = StringIO()
-        return apply_txdelta_handler(StringIO(self.file_data), self.file_stream, self.pool)
+        return apply_txdelta_handler(StringIO(self.file_data), 
+                                     self.file_stream, self.pool)
 
 
-class InterSvnRepository(InterRepository):
+class InterFromSvnRepository(InterRepository):
     """Svn to any repository actions."""
 
     _matching_repo_format = SvnRepositoryFormat()
@@ -287,26 +369,31 @@ class InterSvnRepository(InterRepository):
         return None
 
     def _find_all(self):
-        needed = []
         parents = {}
-        for (branch, revnum) in self.source.follow_history(
-                                                self.source._latest_revnum):
-            revid = self.source.generate_revision_id(revnum, branch)
-            parents[revid] = self.source._mainline_revision_parent(branch, revnum)
-
-            if not self.target.has_revision(revid):
-                needed.append(revid)
+        needed = filter(lambda x: not self.target.has_revision(x), 
+                        self.source.all_revision_ids())
+        for revid in needed:
+            (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
+            parents[revid] = self.source._mainline_revision_parent(branch, 
+                                               revnum, scheme)
         return (needed, parents)
 
     def _find_until(self, revision_id):
+        """Find all missing revisions until revision_id
+
+        :param revision_id: Stop revision
+        :return: Tuple with revisions missing and a dictionary with 
+            parents for those revision.
+        """
         needed = []
         parents = {}
-        (path, until_revnum) = self.source.parse_revision_id(revision_id)
+        (path, until_revnum, scheme) = self.source.lookup_revision_id(
+                                                                    revision_id)
 
         prev_revid = None
         for (branch, revnum) in self.source.follow_branch(path, 
-                                                          until_revnum):
-            revid = self.source.generate_revision_id(revnum, branch)
+                                                          until_revnum, scheme):
+            revid = self.source.generate_revision_id(revnum, branch, str(scheme))
 
             if prev_revid is not None:
                 parents[prev_revid] = revid
@@ -319,15 +406,15 @@ class InterSvnRepository(InterRepository):
         parents[prev_revid] = None
         return (needed, parents)
 
-    def copy_content(self, revision_id=None, basis=None, pb=None):
+    def copy_content(self, revision_id=None, pb=None):
         """See InterRepository.copy_content."""
+        if revision_id == NULL_REVISION:
+            return
         # Dictionary with paths as keys, revnums as values
 
         # Loop over all the revnums until revision_id
         # (or youngest_revnum) and call self.target.add_revision() 
         # or self.target.add_inventory() each time
-        needed = []
-        parents = {}
         self.target.lock_read()
         try:
             if revision_id is None:
@@ -341,71 +428,76 @@ class InterSvnRepository(InterRepository):
             # Nothing to fetch
             return
 
-        repos_root = self.source.transport.get_repos_root()
+        repos_root = self.source.transport.get_svn_repos_root()
 
-        needed.reverse()
         prev_revid = None
         transport = self.source.transport
-        self.target.lock_write()
         if pb is None:
-            pb = ui_factory.nested_progress_bar()
+            pb = ui.ui_factory.nested_progress_bar()
             nested_pb = pb
         else:
             nested_pb = None
         num = 0
+        prev_inv = None
+        self.target.lock_write()
         try:
-            for revid in needed:
-                (branch, revnum) = self.source.parse_revision_id(revid)
+            for revid in reversed(needed):
+                (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
                 pb.update('copying revision', num, len(needed))
 
                 parent_revid = parents[revid]
 
                 if parent_revid is None:
-                    parent_inv = Inventory()
+                    parent_inv = Inventory(root_id=None)
                 elif prev_revid != parent_revid:
                     parent_inv = self.target.get_inventory(parent_revid)
                 else:
+                    assert prev_inv is not None
                     parent_inv = prev_inv
 
                 changes = self.source._log.get_revision_paths(revnum, branch)
                 renames = self.source.revision_fileid_renames(revid)
                 id_map = self.source.transform_fileid_map(self.source.uuid, 
-                                            revnum, branch, changes, renames)
+                                      revnum, branch, changes, renames, scheme)
 
                 editor = RevisionBuildEditor(self.source, self.target, branch, 
-                                             parent_inv, revid, 
-                                         self.source._log.get_revision_info(revnum),
-                                         id_map)
+                             parent_inv, revid, 
+                             self.source._log.get_revision_info(revnum),
+                             id_map, scheme)
 
                 pool = Pool()
                 edit, edit_baton = svn.delta.make_editor(editor, pool)
 
                 if parent_revid is None:
-                    transport.reparent("%s/%s" % (repos_root, branch))
+                    branch_url = urlutils.join(repos_root, branch)
+                    transport.reparent(branch_url)
+                    assert transport.svn_url == branch_url.rstrip("/"), \
+                        "Expected %r, got %r" % (transport.svn_url, branch_url)
                     reporter = transport.do_update(
-                                   revnum, "", True, edit, edit_baton, pool)
+                                   revnum, True, edit, edit_baton, pool)
 
                     # Report status of existing paths
                     reporter.set_path("", revnum, True, None, pool)
                 else:
-                    (parent_branch, parent_revnum) = self.source.parse_revision_id(parent_revid)
-                    transport.reparent("%s/%s" % (repos_root, parent_branch))
+                    (parent_branch, parent_revnum, scheme) = \
+                            self.source.lookup_revision_id(parent_revid)
+                    transport.reparent(urlutils.join(repos_root, parent_branch))
 
                     if parent_branch != branch:
-                        switch_url = "%s/%s" % (repos_root, branch)
                         reporter = transport.do_switch(
-                                   revnum, "", True, 
-                                   switch_url, edit, edit_baton, pool)
+                                   revnum, True, 
+                                   urlutils.join(repos_root, branch), 
+                                   edit, edit_baton, pool)
                     else:
                         reporter = transport.do_update(
-                                   revnum, "", True, edit, edit_baton, pool)
+                                   revnum, True, edit, edit_baton, pool)
 
                     # Report status of existing paths
                     reporter.set_path("", parent_revnum, False, None, pool)
 
-                transport.lock()
+                lock = transport.lock_read(".")
                 reporter.finish_report(pool)
-                transport.unlock()
+                lock.unlock()
 
                 prev_inv = editor.inventory
                 prev_revid = revid
@@ -415,7 +507,7 @@ class InterSvnRepository(InterRepository):
             self.target.unlock()
             if nested_pb is not None:
                 nested_pb.finished()
-        self.source.transport.reparent(repos_root)
+        self.source.transport.reparent_root()
 
     def fetch(self, revision_id=None, pb=None):
         """Fetch revisions. """
@@ -425,5 +517,6 @@ class InterSvnRepository(InterRepository):
     def is_compatible(source, target):
         """Be compatible with SvnRepository."""
         # FIXME: Also check target uses VersionedFile
-        return isinstance(source, SvnRepository)
+        return isinstance(source, SvnRepository) and \
+                target.supports_rich_root()