1 # Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 """Fetching revisions from Subversion repositories in batches."""
19 from bzrlib import osutils, ui, urlutils
20 from bzrlib.inventory import Inventory
21 from bzrlib.revision import Revision, NULL_REVISION
22 from bzrlib.repository import InterRepository
23 from bzrlib.trace import mutter
25 from cStringIO import StringIO
30 from bzrlib.plugins.svn.errors import InvalidFileName
31 from mapping import (SVN_PROP_BZR_ANCESTRY, SVN_PROP_BZR_MERGE,
32 SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO,
33 SVN_PROP_BZR_BRANCHING_SCHEME, SVN_PROP_BZR_REVISION_ID,
34 SVN_PROP_BZR_FILEIDS, SVN_REVPROP_BZR_SIGNATURE,
36 parse_revision_metadata)
37 from repository import (SvnRepository, SvnRepositoryFormat)
38 from svk import SVN_PROP_SVK_MERGE
39 from delta import apply_txdelta_handler
40 from tree import (parse_externals_description, inventory_add_external)
43 def _escape_commit_message(message):
44 """Replace xml-incompatible control characters."""
48 # FIXME: RBC 20060419 this should be done by the revision
49 # serialiser not by commit. Then we can also add an unescaper
50 # in the deserializer and start roundtripping revision messages
51 # precisely. See repository_implementations/test_repository.py
53 # Python strings can include characters that can't be
54 # represented in well-formed XML; escape characters that
55 # aren't listed in the XML specification
56 # (http://www.w3.org/TR/REC-xml/#NT-Char).
58 u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
59 lambda match: match.group(0).encode('unicode_escape'),
64 def md5_strings(strings):
65 """Return the MD5sum of the concatenation of strings.
67 :param strings: Strings to find the MD5sum of.
71 map(s.update, strings)
75 def check_filename(path):
76 """Check that a path does not contain invalid characters.
78 :param path: Path to check
79 :raises InvalidFileName:
81 assert isinstance(path, unicode)
83 raise InvalidFileName(path)
86 class RevisionBuildEditor:
87 """Implementation of the Subversion commit editor interface that builds a
90 def __init__(self, source, target):
93 self.transact = target.get_transaction()
95 def set_target_revision(self, target_revision):
98 def start_revision(self, revid, prev_inventory):
100 (self.branch_path, self.revnum, self.mapping) = self.source.lookup_revision_id(revid)
101 self.svn_revprops = self.source._log._get_transport().revprop_list(self.revnum)
102 changes = self.source._log.get_revision_paths(self.revnum, self.branch_path)
103 renames = self.source.revision_fileid_renames(self.branch_path, self.revnum, self.mapping,
104 revprops=self.svn_revprops)
105 self.id_map = self.source.transform_fileid_map(self.source.uuid,
106 self.revnum, self.branch_path, changes, renames,
108 self._bzr_merges = ()
109 self._svk_merges = ()
110 self._premature_deletes = set()
111 self.old_inventory = prev_inventory
112 self.inventory = prev_inventory.copy()
113 self._branch_fileprops = {}
114 self._start_revision()
116 def _get_parent_ids(self):
117 return self.source.revision_parents(self.revid, self._branch_fileprops)
119 def _get_revision(self, revid):
120 """Creates the revision object.
122 :param revid: Revision id of the revision to create.
125 # Commit SVN revision properties to a Revision object
126 rev = Revision(revision_id=revid, parent_ids=self._get_parent_ids())
128 self.mapping.import_revision(self.svn_revprops, self._branch_fileprops, rev)
130 signature = self.svn_revprops.get(SVN_REVPROP_BZR_SIGNATURE)
132 return (rev, signature)
134 def open_root(self, base_revnum):
135 if self.old_inventory.root is None:
136 # First time the root is set
138 file_id = self.mapping.generate_file_id(self.source.uuid, self.revnum, self.branch_path, u"")
141 assert self.old_inventory.root.revision is not None
142 old_file_id = self.old_inventory.root.file_id
143 if self.id_map.has_key(""):
144 file_id = self.id_map[""]
146 file_id = old_file_id
147 file_parents = [self.old_inventory.root.revision]
149 if self.inventory.root is not None and \
150 file_id == self.inventory.root.file_id:
151 ie = self.inventory.root
153 ie = self.inventory.add_path("", 'directory', file_id)
154 ie.revision = self.revid
155 return DirectoryBuildEditor(self, old_file_id, file_id, file_parents)
160 def _store_directory(self, file_id, parents):
161 raise NotImplementedError(self._store_directory)
163 def _get_file_data(self, file_id, revid):
164 raise NotImplementedError(self._get_file_data)
166 def _finish_commit(self):
167 raise NotImplementedError(self._finish_commit)
172 def _start_revision(self):
175 def _store_file(self, file_id, lines, parents):
176 raise NotImplementedError(self._store_file)
178 def _get_existing_id(self, old_parent_id, new_parent_id, path):
179 assert isinstance(path, unicode)
180 assert isinstance(old_parent_id, str)
181 assert isinstance(new_parent_id, str)
182 if self.id_map.has_key(path):
183 return self.id_map[path]
184 return self.old_inventory[old_parent_id].children[urlutils.basename(path)].file_id
186 def _get_old_id(self, parent_id, old_path):
187 assert isinstance(old_path, unicode)
188 assert isinstance(parent_id, str)
189 return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
191 def _get_new_id(self, parent_id, new_path):
192 assert isinstance(new_path, unicode)
193 assert isinstance(parent_id, str)
194 if self.id_map.has_key(new_path):
195 return self.id_map[new_path]
196 return self.mapping.generate_file_id(self.source.uuid, self.revnum, self.branch_path, new_path)
198 def _rename(self, file_id, parent_id, path):
199 assert isinstance(path, unicode)
200 assert isinstance(parent_id, str)
201 # Only rename if not right yet
202 if (self.inventory[file_id].parent_id == parent_id and
203 self.inventory[file_id].name == urlutils.basename(path)):
205 self.inventory.rename(file_id, parent_id, urlutils.basename(path))
207 class DirectoryBuildEditor:
208 def __init__(self, editor, old_id, new_id, parent_revids=[]):
212 self.parent_revids = parent_revids
215 self.editor.inventory[self.new_id].revision = self.editor.revid
216 self.editor._store_directory(self.new_id, self.parent_revids)
218 if self.new_id == self.editor.inventory.root.file_id:
219 assert len(self.editor._premature_deletes) == 0
220 self.editor._finish_commit()
222 def add_directory(self, path, copyfrom_path=None, copyfrom_revnum=-1):
223 assert isinstance(path, str)
224 path = path.decode("utf-8")
226 file_id = self.editor._get_new_id(self.new_id, path)
228 if file_id in self.editor.inventory:
229 # This directory was moved here from somewhere else, but the
230 # other location hasn't been removed yet.
231 if copyfrom_path is None:
232 # This should ideally never happen!
233 copyfrom_path = self.editor.old_inventory.id2path(file_id)
234 mutter('no copyfrom path set, assuming %r' % copyfrom_path)
235 assert copyfrom_path == self.editor.old_inventory.id2path(file_id)
236 assert copyfrom_path not in self.editor._premature_deletes
237 self.editor._premature_deletes.add(copyfrom_path)
238 self.editor._rename(file_id, self.new_id, path)
239 ie = self.editor.inventory[file_id]
240 old_file_id = file_id
243 ie = self.editor.inventory.add_path(path, 'directory', file_id)
244 ie.revision = self.editor.revid
246 return DirectoryBuildEditor(self.editor, old_file_id, file_id)
248 def open_directory(self, path, base_revnum):
249 assert isinstance(path, str)
250 path = path.decode("utf-8")
251 assert isinstance(base_revnum, int)
252 base_file_id = self.editor._get_old_id(self.old_id, path)
253 base_revid = self.editor.old_inventory[base_file_id].revision
254 file_id = self.editor._get_existing_id(self.old_id, self.new_id, path)
255 if file_id == base_file_id:
256 file_parents = [base_revid]
257 ie = self.editor.inventory[file_id]
259 # Replace if original was inside this branch
260 # change id of base_file_id to file_id
261 ie = self.editor.inventory[base_file_id]
262 for name in ie.children:
263 ie.children[name].parent_id = file_id
264 # FIXME: Don't touch inventory internals
265 del self.editor.inventory._byid[base_file_id]
266 self.editor.inventory._byid[file_id] = ie
269 ie.revision = self.editor.revid
270 return DirectoryBuildEditor(self.editor, base_file_id, file_id,
273 def change_prop(self, name, value):
274 if self.new_id == self.editor.inventory.root.file_id:
275 self.editor._branch_fileprops[name] = value
277 if name == SVN_PROP_BZR_BRANCHING_SCHEME:
278 if self.new_id != self.editor.inventory.root.file_id:
279 mutter('rogue %r on non-root directory' % name)
281 elif name == SVN_PROP_BZR_ANCESTRY+str(self.editor.mapping.scheme):
282 if self.new_id != self.editor.inventory.root.file_id:
283 mutter('rogue %r on non-root directory' % name)
286 self.editor._bzr_merges = parse_merge_property(value.splitlines()[-1])
287 elif (name.startswith(SVN_PROP_BZR_ANCESTRY) or
288 name.startswith(SVN_PROP_BZR_REVISION_ID)):
290 elif name == SVN_PROP_SVK_MERGE:
291 self.editor._svk_merges = None # Force Repository.revision_parents() to look it up
292 elif name == SVN_PROP_BZR_REVISION_INFO:
293 if self.new_id != self.editor.inventory.root.file_id:
294 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
297 elif name in (constants.PROP_ENTRY_COMMITTED_DATE,
298 constants.PROP_ENTRY_COMMITTED_REV,
299 constants.PROP_ENTRY_LAST_AUTHOR,
300 constants.PROP_ENTRY_LOCK_TOKEN,
301 constants.PROP_ENTRY_UUID,
302 constants.PROP_EXECUTABLE):
304 elif name.startswith(constants.PROP_WC_PREFIX):
306 elif name in (SVN_PROP_BZR_MERGE, SVN_PROP_BZR_FILEIDS):
308 elif (name.startswith(constants.PROP_PREFIX) or
309 name.startswith(SVN_PROP_BZR_PREFIX)):
310 mutter('unsupported dir property %r' % name)
312 def add_file(self, path, copyfrom_path=None, copyfrom_revnum=-1):
313 assert isinstance(path, str)
314 path = path.decode("utf-8")
316 file_id = self.editor._get_new_id(self.new_id, path)
317 if file_id in self.editor.inventory:
318 # This file was moved here from somewhere else, but the
319 # other location hasn't been removed yet.
320 if copyfrom_path is None:
321 # This should ideally never happen
322 copyfrom_path = self.editor.old_inventory.id2path(file_id)
323 mutter('no copyfrom path set, assuming %r' % copyfrom_path)
324 assert copyfrom_path == self.editor.old_inventory.id2path(file_id)
325 assert copyfrom_path not in self.editor._premature_deletes
326 self.editor._premature_deletes.add(copyfrom_path)
327 # No need to rename if it's already in the right spot
328 self.editor._rename(file_id, self.new_id, path)
329 return FileBuildEditor(self.editor, path, file_id)
331 def open_file(self, path, base_revnum):
332 assert isinstance(path, str)
333 path = path.decode("utf-8")
334 base_file_id = self.editor._get_old_id(self.old_id, path)
335 base_revid = self.editor.old_inventory[base_file_id].revision
336 file_id = self.editor._get_existing_id(self.old_id, self.new_id, path)
337 is_symlink = (self.editor.inventory[base_file_id].kind == 'symlink')
338 file_data = self.editor._get_file_data(base_file_id, base_revid)
339 if file_id == base_file_id:
340 file_parents = [base_revid]
343 del self.editor.inventory[base_file_id]
345 return FileBuildEditor(self.editor, path, file_id,
346 file_parents, file_data, is_symlink=is_symlink)
348 def delete_entry(self, path, revnum):
349 assert isinstance(path, str)
350 path = path.decode("utf-8")
351 if path in self.editor._premature_deletes:
353 self.editor._premature_deletes.remove(path)
354 for p in self.editor._premature_deletes.copy():
355 if p.startswith("%s/" % path):
356 self.editor._premature_deletes.remove(p)
358 self.editor.inventory.remove_recursive_id(self.editor._get_old_id(self.new_id, path))
360 class FileBuildEditor:
361 def __init__(self, editor, path, file_id, file_parents=[], data="",
365 self.file_id = file_id
366 self.file_data = data
367 self.is_symlink = is_symlink
368 self.file_parents = file_parents
369 self.is_executable = None
370 self.file_stream = None
372 def apply_textdelta(self, base_checksum=None):
373 actual_checksum = md5.new(self.file_data).hexdigest()
374 assert (base_checksum is None or base_checksum == actual_checksum,
375 "base checksum mismatch: %r != %r" % (base_checksum,
377 self.file_stream = StringIO()
378 return apply_txdelta_handler(self.file_data, self.file_stream)
380 def change_prop(self, name, value):
381 if name == constants.PROP_EXECUTABLE:
382 # You'd expect executable to match
383 # constants.PROP_EXECUTABLE_VALUE, but that's not
384 # how SVN behaves. It appears to consider the presence
385 # of the property sufficient to mark it executable.
386 self.is_executable = (value != None)
387 elif (name == constants.PROP_SPECIAL):
388 self.is_symlink = (value != None)
389 elif name == constants.PROP_ENTRY_COMMITTED_REV:
390 self.last_file_rev = int(value)
391 elif name in (constants.PROP_ENTRY_COMMITTED_DATE,
392 constants.PROP_ENTRY_LAST_AUTHOR,
393 constants.PROP_ENTRY_LOCK_TOKEN,
394 constants.PROP_ENTRY_UUID,
395 constants.PROP_MIME_TYPE):
397 elif name.startswith(constants.PROP_WC_PREFIX):
399 elif name == constants.PROP_EXTERNALS:
400 mutter('svn:externals property on file!')
401 elif (name.startswith(constants.PROP_PREFIX) or
402 name.startswith(SVN_PROP_BZR_PREFIX)):
403 mutter('unsupported file property %r' % name)
405 def close(self, checksum=None):
406 assert isinstance(self.path, unicode)
407 if self.file_stream is not None:
408 self.file_stream.seek(0)
409 lines = osutils.split_lines(self.file_stream.read())
411 # Data didn't change or file is new
412 lines = osutils.split_lines(self.file_data)
414 actual_checksum = md5_strings(lines)
415 assert checksum is None or checksum == actual_checksum
417 self.editor._store_file(self.file_id, lines, self.file_parents)
419 if self.file_id in self.editor.inventory:
420 ie = self.editor.inventory[self.file_id]
421 elif self.is_symlink:
422 ie = self.editor.inventory.add_path(self.path, 'symlink', self.file_id)
424 ie = self.editor.inventory.add_path(self.path, 'file', self.file_id)
425 ie.revision = self.editor.revid
429 ie.symlink_target = lines[0][len("link "):]
435 ie.text_sha1 = osutils.sha_strings(lines)
436 ie.text_size = sum(map(len, lines))
437 if self.is_executable is not None:
438 ie.executable = self.is_executable
441 class WeaveRevisionBuildEditor(RevisionBuildEditor):
442 """Subversion commit editor that can write to a weave-based repository.
444 def __init__(self, source, target):
445 RevisionBuildEditor.__init__(self, source, target)
446 self.weave_store = target.weave_store
448 def _start_revision(self):
449 self._write_group_active = True
450 self.target.start_write_group()
452 def _store_directory(self, file_id, parents):
453 file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
454 if not file_weave.has_version(self.revid):
455 file_weave.add_lines(self.revid, parents, [])
457 def _get_file_data(self, file_id, revid):
458 file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
459 return file_weave.get_text(revid)
461 def _store_file(self, file_id, lines, parents):
462 file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
463 if not file_weave.has_version(self.revid):
464 file_weave.add_lines(self.revid, parents, lines)
466 def _finish_commit(self):
467 (rev, signature) = self._get_revision(self.revid)
468 self.inventory.revision_id = self.revid
469 # Escaping the commit message is really the task of the serialiser
470 rev.message = _escape_commit_message(rev.message)
471 rev.inventory_sha1 = osutils.sha_string(
472 self.target.serialise_inventory(self.inventory))
473 self.target.add_revision(self.revid, rev, self.inventory)
474 if signature is not None:
475 self.target.add_signature_text(self.revid, signature)
476 self.target.commit_write_group()
477 self._write_group_active = False
480 if self._write_group_active:
481 self.target.abort_write_group()
482 self._write_group_active = False
485 class PackRevisionBuildEditor(WeaveRevisionBuildEditor):
486 """Revision Build Editor for Subversion that is specific for the packs API.
488 def __init__(self, source, target):
489 WeaveRevisionBuildEditor.__init__(self, source, target)
491 def _add_text_to_weave(self, file_id, new_lines, parents):
492 return self.target._packs._add_text_to_weave(file_id,
493 self.revid, new_lines, parents, nostore_sha=None,
496 def _store_directory(self, file_id, parents):
497 self._add_text_to_weave(file_id, [], parents)
499 def _store_file(self, file_id, lines, parents):
500 self._add_text_to_weave(file_id, lines, parents)
503 class CommitBuilderRevisionBuildEditor(RevisionBuildEditor):
504 """Revision Build Editor for Subversion that uses the CommitBuilder API.
506 def __init__(self, source, target):
507 RevisionBuildEditor.__init__(self, source, target)
508 raise NotImplementedError(self)
511 def get_revision_build_editor(repository):
512 """Obtain a RevisionBuildEditor for a particular target repository.
514 :param repository: Repository to obtain the buildeditor for.
515 :return: Class object of class descending from RevisionBuildEditor
517 if hasattr(repository, '_packs'):
518 return PackRevisionBuildEditor
519 return WeaveRevisionBuildEditor
522 class InterFromSvnRepository(InterRepository):
523 """Svn to any repository actions."""
525 _matching_repo_format = SvnRepositoryFormat()
527 _supports_branches = True
530 def _get_repo_format_to_test():
534 """Find all revisions from the source repository that are not
535 yet in the target repository.
538 needed = filter(lambda x: not self.target.has_revision(x),
539 self.source.all_revision_ids())
541 (branch, revnum, mapping) = self.source.lookup_revision_id(revid)
542 parents[revid] = self.source._mainline_revision_parent(branch,
545 return (needed, parents)
547 def _find_branches(self, branches, find_ghosts=False):
551 for revid in branches:
552 (needed, parents) = self._find_until(revid, find_ghosts=find_ghosts)
554 if not rev in set_needed:
555 ret_needed.append(rev)
557 ret_parents.update(parents)
558 return ret_needed, ret_parents
560 def _find_until(self, revision_id, find_ghosts=False):
561 """Find all missing revisions until revision_id
563 :param revision_id: Stop revision
564 :param find_ghosts: Find ghosts
565 :return: Tuple with revisions missing and a dictionary with
566 parents for those revision.
572 pb = ui.ui_factory.nested_progress_bar()
574 for revid in self.source.iter_lhs_ancestry(revision_id, pb):
576 if prev_revid is not None:
577 parents[prev_revid] = revid
581 if not self.target.has_revision(revid):
583 elif not find_ghosts:
588 parents[prev_revid] = None
590 return (needed, parents)
592 def copy_content(self, revision_id=None, pb=None):
593 """See InterRepository.copy_content."""
594 self.fetch(revision_id, pb, find_ghosts=False)
596 def _fetch_revision(self, editor, transport, repos_root, parent_revid):
597 if self._supports_replay:
599 self._fetch_revision_replay(editor, transport, repos_root, parent_revid)
601 except NotImplementedError:
602 self._supports_replay = False
603 self._fetch_revision_update(editor, transport, repos_root, parent_revid)
605 def _fetch_revision_replay(self, editor, transport, repos_root, parent_revid):
606 if parent_revid is not None:
607 parent_revnum = self.source.lookup_revision_id(parent_revid)[1]
609 parent_revnum = editor.revnum-1
610 branch_url = urlutils.join(repos_root, editor.branch_path)
611 transport.reparent(branch_url)
612 lock = transport.lock_read(".")
614 transport.replay(editor.revnum, parent_revnum, editor, True)
618 def _fetch_revision_update(self, editor, transport, repos_root, parent_revid):
619 if parent_revid is None:
620 branch_url = urlutils.join(repos_root, editor.branch_path)
621 transport.reparent(branch_url)
622 assert transport.svn_url == branch_url.rstrip("/"), \
623 "Expected %r, got %r" % (transport.svn_url, branch_url)
624 reporter = transport.do_update(editor.revnum, True, editor)
626 # Report status of existing paths
627 reporter.set_path("", editor.revnum, True)
629 (parent_branch, parent_revnum, mapping) = \
630 self.source.lookup_revision_id(parent_revid)
631 transport.reparent(urlutils.join(repos_root, parent_branch))
633 if parent_branch != editor.branch_path:
634 reporter = transport.do_switch(editor.revnum, True,
635 urlutils.join(repos_root, editor.branch_path), editor)
637 reporter = transport.do_update(editor.revnum, True, editor)
639 # Report status of existing paths
640 reporter.set_path("", parent_revnum, False)
642 lock = transport.lock_read(".")
646 def _fetch_switch(self, revids, pb=None, lhs_parent=None):
647 """Copy a set of related revisions using ra_switch.
649 :param revids: List of revision ids of revisions to copy,
651 :param pb: Optional progress bar.
653 repos_root = self.source.transport.get_svn_repos_root()
656 transport = self.source.transport
658 pb = ui.ui_factory.nested_progress_bar()
665 self.target.lock_write()
666 revbuildklass = get_revision_build_editor(self.target)
667 editor = revbuildklass(self.source, self.target)
671 pb.update('copying revision', num, len(revids))
673 parent_revid = lhs_parent[revid]
675 if parent_revid is None:
676 parent_inv = Inventory(root_id=None)
677 elif prev_revid != parent_revid:
678 parent_inv = self.target.get_inventory(parent_revid)
680 parent_inv = prev_inv
682 editor.start_revision(revid, parent_inv)
685 self._fetch_revision_update(editor, transport, repos_root, parent_revid)
690 prev_inv = editor.inventory
695 if nested_pb is not None:
697 self.source.transport.reparent_root()
699 def fetch(self, revision_id=None, pb=None, find_ghosts=False,
701 """Fetch revisions. """
702 if revision_id == NULL_REVISION:
705 self._supports_replay = True # assume replay supported by default
706 # Dictionary with paths as keys, revnums as values
708 # Loop over all the revnums until revision_id
709 # (or youngest_revnum) and call self.target.add_revision()
710 # or self.target.add_inventory() each time
711 self.target.lock_read()
713 if branches is not None:
714 (needed, lhs_parent) = self._find_branches(branches,
716 elif revision_id is None:
717 (needed, lhs_parent) = self._find_all()
719 (needed, lhs_parent) = self._find_until(revision_id,
728 self._fetch_switch(needed, pb, lhs_parent)
731 def is_compatible(source, target):
732 """Be compatible with SvnRepository."""
733 # FIXME: Also check target uses VersionedFile
734 return isinstance(source, SvnRepository) and target.supports_rich_root()