# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+"""Fetching revisions from Subversion repositories in batches."""
import bzrlib
-from bzrlib.inventory import Inventory, ROOT_ID
-import bzrlib.osutils as osutils
-from bzrlib.revision import Revision
+from bzrlib import osutils, ui, urlutils
+from bzrlib.inventory import Inventory
+from bzrlib.revision import Revision, NULL_REVISION
from bzrlib.repository import InterRepository
from bzrlib.trace import mutter
-from bzrlib.ui import ui_factory
from copy import copy
from cStringIO import StringIO
import md5
-import os
-from svn.core import SubversionException, Pool
+from svn.core import Pool
import svn.core
from fileids import generate_file_id
-from repository import (SvnRepository, SVN_PROP_BZR_MERGE, SVN_PROP_SVK_MERGE,
- SVN_PROP_BZR_REVPROP_PREFIX, SvnRepositoryFormat)
+from repository import (SvnRepository, SVN_PROP_BZR_ANCESTRY,
+ SVN_PROP_SVK_MERGE, SVN_PROP_BZR_MERGE,
+ SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO,
+ SVN_PROP_BZR_BRANCHING_SCHEME, SVN_PROP_BZR_REVISION_ID,
+ SVN_PROP_BZR_FILEIDS, SvnRepositoryFormat,
+ parse_revision_metadata, parse_merge_property)
from tree import apply_txdelta_handler
def md5_strings(strings):
+ """Return the MD5sum of the concatenation of strings.
+
+ :param strings: Strings to find the MD5sum of.
+ :return: MD5sum
+ """
s = md5.new()
map(s.update, strings)
return s.hexdigest()
class RevisionBuildEditor(svn.delta.Editor):
+ """Implementation of the Subversion commit editor interface that builds a
+ Bazaar revision.
+ """
def __init__(self, source, target, branch_path, prev_inventory, revid,
- svn_revprops, id_map):
+ svn_revprops, id_map, scheme):
self.branch_path = branch_path
self.old_inventory = prev_inventory
self.inventory = copy(prev_inventory)
self.revid = revid
self.id_map = id_map
+ self.scheme = scheme
self.source = source
self.target = target
self.transact = target.get_transaction()
self.weave_store = target.weave_store
self.dir_baserev = {}
- self._parent_ids = None
- self._revprops = {}
+ self._bzr_merges = []
+ self._svk_merges = []
+ self._revinfo = None
self._svn_revprops = svn_revprops
+ self._premature_deletes = set()
self.pool = Pool()
+ self.target.start_write_group()
def _get_revision(self, revid):
- if self._parent_ids is None:
- self._parent_ids = ""
+ """Creates the revision object.
- parent_ids = self.source.revision_parents(revid, self._parent_ids)
+ :param revid: Revision id of the revision to create.
+ """
+ parent_ids = self.source.revision_parents(revid, self._bzr_merges)
# Commit SVN revision properties to a Revision object
rev = Revision(revision_id=revid, parent_ids=parent_ids)
- rev.timestamp = 1.0 * svn.core.secs_from_timestr(
- self._svn_revprops[2], None) #date
+ if self._svn_revprops[2] is not None:
+ rev.timestamp = 1.0 * svn.core.secs_from_timestr(
+ self._svn_revprops[2], None) #date
+ else:
+ rev.timestamp = 0 # FIXME: Obtain repository creation time
rev.timezone = None
rev.committer = self._svn_revprops[0] # author
rev.committer = ""
rev.message = self._svn_revprops[1] # message
- rev.properties = self._revprops
+ if self._revinfo:
+ parse_revision_metadata(self._revinfo, rev)
+
return rev
def open_root(self, base_revnum, baton):
- if self.inventory.revision_id is None:
- self.dir_baserev[ROOT_ID] = []
+ if self.old_inventory.root is None:
+ # First time the root is set
+ file_id = generate_file_id(self.source, self.revid, "")
+ self.dir_baserev[file_id] = []
else:
- self.dir_baserev[ROOT_ID] = [self.inventory.revision_id]
- self.inventory.revision_id = self.revid
- return ROOT_ID
+ assert self.old_inventory.root.revision is not None
+ if self.id_map.has_key(""):
+ file_id = self.id_map[""]
+ else:
+ file_id = self.old_inventory.root.file_id
+ self.dir_baserev[file_id] = [self.old_inventory.root.revision]
+
+ if self.inventory.root is not None and \
+ file_id == self.inventory.root.file_id:
+ ie = self.inventory.root
+ else:
+ ie = self.inventory.add_path("", 'directory', file_id)
+ ie.revision = self.revid
+ return file_id
def _get_existing_id(self, parent_id, path):
if self.id_map.has_key(path):
return self._get_old_id(parent_id, path)
def _get_old_id(self, parent_id, old_path):
- return self.old_inventory[parent_id].children[os.path.basename(old_path)].file_id
+ return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
def _get_new_id(self, parent_id, new_path):
if self.id_map.has_key(new_path):
return self.id_map[new_path]
- return generate_file_id(self.revid, new_path)
+ return generate_file_id(self.source, self.revid, new_path)
def delete_entry(self, path, revnum, parent_id, pool):
path = path.decode("utf-8")
- del self.inventory[self._get_old_id(parent_id, path)]
+ if path in self._premature_deletes:
+ self._premature_deletes.remove(path)
+ else:
+ self.inventory.remove_recursive_id(self._get_old_id(parent_id, path))
def close_directory(self, id):
- if id != ROOT_ID:
- self.inventory[id].revision = self.revid
+ self.inventory[id].revision = self.revid
+ # Only record root if the target repository supports it
+ if self.target.supports_rich_root:
file_weave = self.weave_store.get_weave_or_empty(id, self.transact)
if not file_weave.has_version(self.revid):
file_weave.add_lines(self.revid, self.dir_baserev[id], [])
- def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum, pool):
+ def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum,
+ pool):
path = path.decode("utf-8")
file_id = self._get_new_id(parent_id, path)
self.dir_baserev[file_id] = []
- ie = self.inventory.add_path(path, 'directory', file_id)
+ if file_id in self.inventory:
+ # This directory was moved here from somewhere else, but the
+ # other location hasn't been removed yet.
+ if copyfrom_path is None:
+ # FIXME: This should never happen!
+ copyfrom_path = self.old_inventory.id2path(file_id)
+ assert copyfrom_path == self.old_inventory.id2path(file_id)
+ assert copyfrom_path not in self._premature_deletes
+ self._premature_deletes.add(copyfrom_path)
+ self.inventory.rename(file_id, parent_id, urlutils.basename(path))
+ ie = self.inventory[file_id]
+ else:
+ ie = self.inventory.add_path(path, 'directory', file_id)
ie.revision = self.revid
return file_id
return file_id
def change_dir_prop(self, id, name, value, pool):
- if name == SVN_PROP_BZR_MERGE:
- if id != ROOT_ID:
- mutter('rogue %r on non-root directory' % SVN_PROP_BZR_MERGE)
+ if name == SVN_PROP_BZR_BRANCHING_SCHEME:
+ if id != self.inventory.root.file_id:
+ mutter('rogue %r on non-root directory' % name)
+ return
+ elif name == SVN_PROP_BZR_ANCESTRY+str(self.scheme):
+ if id != self.inventory.root.file_id:
+ mutter('rogue %r on non-root directory' % name)
return
- self._parent_ids = value.splitlines()[-1]
+ self._bzr_merges = parse_merge_property(value.splitlines()[-1])
+ elif (name.startswith(SVN_PROP_BZR_ANCESTRY) or
+ name.startswith(SVN_PROP_BZR_REVISION_ID)):
+ pass
elif name == SVN_PROP_SVK_MERGE:
- if self._parent_ids is None:
- # Only set parents using svk:merge if no
- # bzr:merge set.
- pass # FIXME
- elif name.startswith(SVN_PROP_BZR_REVPROP_PREFIX):
- self._revprops[name[len(SVN_PROP_BZR_REVPROP_PREFIX):]] = value
+ self._svk_merges = None # Force Repository.revision_parents() to look it up
+ elif name == SVN_PROP_BZR_REVISION_INFO:
+ if id != self.inventory.root.file_id:
+ mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
+ return
+
+ self._revinfo = value
elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
pass
elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
pass
- else:
- mutter('unsupported file property %r' % name)
+ elif name in (SVN_PROP_BZR_MERGE, SVN_PROP_BZR_FILEIDS):
+ pass
+ elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
+ name.startswith(SVN_PROP_BZR_PREFIX)):
+ mutter('unsupported dir property %r' % name)
def change_file_prop(self, id, name, value, pool):
if name == svn.core.SVN_PROP_EXECUTABLE:
pass
elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
pass
- else:
+ elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
+ name.startswith(SVN_PROP_BZR_PREFIX)):
mutter('unsupported file property %r' % name)
def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
self.file_parents = []
self.file_stream = None
self.file_id = self._get_new_id(parent_id, path)
+ if self.file_id in self.inventory:
+ # This file was moved here from somewhere else, but the
+ # other location hasn't been removed yet.
+ if copyfrom_path is None:
+ # FIXME: This should never happen!
+ copyfrom_path = self.old_inventory.id2path(self.file_id)
+ assert copyfrom_path == self.old_inventory.id2path(self.file_id)
+ assert copyfrom_path not in self._premature_deletes
+ self._premature_deletes.add(copyfrom_path)
+ self.inventory.rename(self.file_id, parent_id, urlutils.basename(path))
return path
def open_file(self, path, parent_id, base_revnum, pool):
self.file_id = self._get_existing_id(parent_id, path)
self.is_executable = None
self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
- file_weave = self.weave_store.get_weave_or_empty(base_file_id, self.transact)
+ file_weave = self.weave_store.get_weave_or_empty(base_file_id,
+ self.transact)
self.file_data = file_weave.get_text(base_revid)
self.file_stream = None
if self.file_id == base_file_id:
actual_checksum = md5_strings(lines)
assert checksum is None or checksum == actual_checksum
- file_weave = self.weave_store.get_weave_or_empty(self.file_id, self.transact)
+ file_weave = self.weave_store.get_weave_or_empty(self.file_id,
+ self.transact)
if not file_weave.has_version(self.revid):
file_weave.add_lines(self.revid, self.file_parents, lines)
self.file_stream = None
def close_edit(self):
+ assert len(self._premature_deletes) == 0
rev = self._get_revision(self.revid)
self.inventory.revision_id = self.revid
rev.inventory_sha1 = osutils.sha_string(
bzrlib.xml5.serializer_v5.write_inventory_to_string(
self.inventory))
- self.target.add_revision(self.revid, rev, self.inventory)
+ try:
+ self.target.add_revision(self.revid, rev, self.inventory)
+ self.target.commit_write_group()
+ except:
+ self.target.abort_write_group()
self.pool.destroy()
def abort_edit(self):
def apply_textdelta(self, file_id, base_checksum):
actual_checksum = md5.new(self.file_data).hexdigest(),
assert (base_checksum is None or base_checksum == actual_checksum,
- "base checksum mismatch: %r != %r" % (base_checksum, actual_checksum))
+ "base checksum mismatch: %r != %r" % (base_checksum,
+ actual_checksum))
self.file_stream = StringIO()
- return apply_txdelta_handler(StringIO(self.file_data), self.file_stream, self.pool)
+ return apply_txdelta_handler(StringIO(self.file_data),
+ self.file_stream, self.pool)
-class InterSvnRepository(InterRepository):
+class InterFromSvnRepository(InterRepository):
"""Svn to any repository actions."""
_matching_repo_format = SvnRepositoryFormat()
return None
def _find_all(self):
- needed = []
parents = {}
- for (branch, revnum) in self.source.follow_history(
- self.source._latest_revnum):
- revid = self.source.generate_revision_id(revnum, branch)
- parents[revid] = self.source._mainline_revision_parent(branch, revnum)
-
- if not self.target.has_revision(revid):
- needed.append(revid)
+ needed = filter(lambda x: not self.target.has_revision(x),
+ self.source.all_revision_ids())
+ for revid in needed:
+ (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
+ parents[revid] = self.source._mainline_revision_parent(branch,
+ revnum, scheme)
return (needed, parents)
def _find_until(self, revision_id):
+ """Find all missing revisions until revision_id
+
+ :param revision_id: Stop revision
+ :return: Tuple with revisions missing and a dictionary with
+ parents for those revision.
+ """
needed = []
parents = {}
- (path, until_revnum) = self.source.parse_revision_id(revision_id)
+ (path, until_revnum, scheme) = self.source.lookup_revision_id(
+ revision_id)
prev_revid = None
for (branch, revnum) in self.source.follow_branch(path,
- until_revnum):
- revid = self.source.generate_revision_id(revnum, branch)
+ until_revnum, scheme):
+ revid = self.source.generate_revision_id(revnum, branch, str(scheme))
if prev_revid is not None:
parents[prev_revid] = revid
parents[prev_revid] = None
return (needed, parents)
- def copy_content(self, revision_id=None, basis=None, pb=None):
+ def copy_content(self, revision_id=None, pb=None):
"""See InterRepository.copy_content."""
+ if revision_id == NULL_REVISION:
+ return
# Dictionary with paths as keys, revnums as values
# Loop over all the revnums until revision_id
# (or youngest_revnum) and call self.target.add_revision()
# or self.target.add_inventory() each time
- needed = []
- parents = {}
self.target.lock_read()
try:
if revision_id is None:
# Nothing to fetch
return
- repos_root = self.source.transport.get_repos_root()
+ repos_root = self.source.transport.get_svn_repos_root()
- needed.reverse()
prev_revid = None
transport = self.source.transport
- self.target.lock_write()
if pb is None:
- pb = ui_factory.nested_progress_bar()
+ pb = ui.ui_factory.nested_progress_bar()
nested_pb = pb
else:
nested_pb = None
num = 0
+ prev_inv = None
+ self.target.lock_write()
try:
- for revid in needed:
- (branch, revnum) = self.source.parse_revision_id(revid)
+ for revid in reversed(needed):
+ (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
pb.update('copying revision', num, len(needed))
parent_revid = parents[revid]
if parent_revid is None:
- parent_inv = Inventory()
+ parent_inv = Inventory(root_id=None)
elif prev_revid != parent_revid:
parent_inv = self.target.get_inventory(parent_revid)
else:
+ assert prev_inv is not None
parent_inv = prev_inv
changes = self.source._log.get_revision_paths(revnum, branch)
renames = self.source.revision_fileid_renames(revid)
id_map = self.source.transform_fileid_map(self.source.uuid,
- revnum, branch, changes, renames)
+ revnum, branch, changes, renames, scheme)
editor = RevisionBuildEditor(self.source, self.target, branch,
- parent_inv, revid,
- self.source._log.get_revision_info(revnum),
- id_map)
+ parent_inv, revid,
+ self.source._log.get_revision_info(revnum),
+ id_map, scheme)
pool = Pool()
edit, edit_baton = svn.delta.make_editor(editor, pool)
if parent_revid is None:
- transport.reparent("%s/%s" % (repos_root, branch))
+ branch_url = urlutils.join(repos_root, branch)
+ transport.reparent(branch_url)
+ assert transport.svn_url == branch_url.rstrip("/"), \
+ "Expected %r, got %r" % (transport.svn_url, branch_url)
reporter = transport.do_update(
- revnum, "", True, edit, edit_baton, pool)
+ revnum, True, edit, edit_baton, pool)
# Report status of existing paths
reporter.set_path("", revnum, True, None, pool)
else:
- (parent_branch, parent_revnum) = self.source.parse_revision_id(parent_revid)
- transport.reparent("%s/%s" % (repos_root, parent_branch))
+ (parent_branch, parent_revnum, scheme) = \
+ self.source.lookup_revision_id(parent_revid)
+ transport.reparent(urlutils.join(repos_root, parent_branch))
if parent_branch != branch:
- switch_url = "%s/%s" % (repos_root, branch)
reporter = transport.do_switch(
- revnum, "", True,
- switch_url, edit, edit_baton, pool)
+ revnum, True,
+ urlutils.join(repos_root, branch),
+ edit, edit_baton, pool)
else:
reporter = transport.do_update(
- revnum, "", True, edit, edit_baton, pool)
+ revnum, True, edit, edit_baton, pool)
# Report status of existing paths
reporter.set_path("", parent_revnum, False, None, pool)
- transport.lock()
+ lock = transport.lock_read(".")
reporter.finish_report(pool)
- transport.unlock()
+ lock.unlock()
prev_inv = editor.inventory
prev_revid = revid
self.target.unlock()
if nested_pb is not None:
nested_pb.finished()
- self.source.transport.reparent(repos_root)
+ self.source.transport.reparent_root()
def fetch(self, revision_id=None, pb=None):
"""Fetch revisions. """
def is_compatible(source, target):
"""Be compatible with SvnRepository."""
# FIXME: Also check target uses VersionedFile
- return isinstance(source, SvnRepository)
+ return isinstance(source, SvnRepository) and \
+ target.supports_rich_root()