1 # Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 """Fetching revisions from Subversion repositories in batches."""
19 from bzrlib import osutils, ui, urlutils
20 from bzrlib.inventory import Inventory
21 from bzrlib.revision import Revision, NULL_REVISION
22 from bzrlib.repository import InterRepository
23 from bzrlib.trace import mutter
25 from cStringIO import StringIO
28 from bzrlib.plugins.svn.delta import apply_txdelta_handler
29 from bzrlib.plugins.svn import properties
30 from bzrlib.plugins.svn.errors import InvalidFileName
31 from bzrlib.plugins.svn.logwalker import lazy_dict
32 from bzrlib.plugins.svn.mapping import (SVN_PROP_BZR_MERGE,
33 SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO,
34 SVN_PROP_BZR_REVISION_ID,
35 SVN_PROP_BZR_FILEIDS, SVN_REVPROP_BZR_SIGNATURE,
37 parse_revision_metadata)
38 from bzrlib.plugins.svn.repository import SvnRepository, SvnRepositoryFormat
39 from bzrlib.plugins.svn.svk import SVN_PROP_SVK_MERGE
40 from bzrlib.plugins.svn.tree import (parse_externals_description,
41 inventory_add_external)
44 def _escape_commit_message(message):
45 """Replace xml-incompatible control characters."""
49 # FIXME: RBC 20060419 this should be done by the revision
50 # serialiser not by commit. Then we can also add an unescaper
51 # in the deserializer and start roundtripping revision messages
52 # precisely. See repository_implementations/test_repository.py
54 # Python strings can include characters that can't be
55 # represented in well-formed XML; escape characters that
56 # aren't listed in the XML specification
57 # (http://www.w3.org/TR/REC-xml/#NT-Char).
59 u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
60 lambda match: match.group(0).encode('unicode_escape'),
65 def md5_strings(strings):
66 """Return the MD5sum of the concatenation of strings.
68 :param strings: Strings to find the MD5sum of.
72 map(s.update, strings)
76 def check_filename(path):
77 """Check that a path does not contain invalid characters.
79 :param path: Path to check
80 :raises InvalidFileName:
82 assert isinstance(path, unicode)
84 raise InvalidFileName(path)
87 class RevisionBuildEditor:
88 """Implementation of the Subversion commit editor interface that builds a
91 def __init__(self, source, target):
94 self.transact = target.get_transaction()
96 def set_target_revision(self, target_revision):
99 def start_revision(self, revid, prev_inventory, revmeta):
101 (self.branch_path, self.revnum, self.mapping) = self.source.lookup_revision_id(revid)
102 self.revmeta = revmeta
104 self.dir_baserev = {}
106 self._premature_deletes = set()
107 self.old_inventory = prev_inventory
108 self.inventory = prev_inventory.copy()
109 self._start_revision()
111 def _get_id_map(self):
112 if self._id_map is not None:
115 renames = self.mapping.import_fileid_map(self.revmeta.revprops, self.revmeta.fileprops)
116 self._id_map = self.source.transform_fileid_map(self.source.uuid,
117 self.revnum, self.branch_path, self.revmeta.paths, renames,
122 def _get_revision(self, revid):
123 """Creates the revision object.
125 :param revid: Revision id of the revision to create.
128 # Commit SVN revision properties to a Revision object
129 rev = Revision(revision_id=revid, parent_ids=self.revmeta.get_parent_ids(self.mapping))
131 self.mapping.import_revision(self.revmeta.revprops, self.revmeta.fileprops, rev)
133 signature = self.revmeta.revprops.get(SVN_REVPROP_BZR_SIGNATURE)
135 return (rev, signature)
137 def open_root(self, base_revnum):
138 if self.old_inventory.root is None:
139 # First time the root is set
141 file_id = self.mapping.generate_file_id(self.source.uuid, self.revnum, self.branch_path, u"")
144 assert self.old_inventory.root.revision is not None
145 old_file_id = self.old_inventory.root.file_id
146 file_id = self._get_id_map().get("", old_file_id)
147 file_parents = [self.old_inventory.root.revision]
149 if self.inventory.root is not None and \
150 file_id == self.inventory.root.file_id:
151 ie = self.inventory.root
153 ie = self.inventory.add_path("", 'directory', file_id)
154 ie.revision = self.revid
155 return DirectoryBuildEditor(self, old_file_id, file_id, file_parents)
160 def _store_directory(self, file_id, parents):
161 raise NotImplementedError(self._store_directory)
163 def _get_file_data(self, file_id, revid):
164 raise NotImplementedError(self._get_file_data)
166 def _finish_commit(self):
167 raise NotImplementedError(self._finish_commit)
172 def _start_revision(self):
175 def _store_file(self, file_id, lines, parents):
176 raise NotImplementedError(self._store_file)
178 def _get_existing_id(self, old_parent_id, new_parent_id, path):
179 assert isinstance(path, unicode)
180 assert isinstance(old_parent_id, str)
181 assert isinstance(new_parent_id, str)
182 ret = self._get_id_map().get(path)
185 return self.old_inventory[old_parent_id].children[urlutils.basename(path)].file_id
187 def _get_old_id(self, parent_id, old_path):
188 assert isinstance(old_path, unicode)
189 assert isinstance(parent_id, str)
190 return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
192 def _get_new_id(self, parent_id, new_path):
193 assert isinstance(new_path, unicode)
194 assert isinstance(parent_id, str)
195 ret = self._get_id_map().get(new_path)
198 return self.mapping.generate_file_id(self.source.uuid, self.revnum,
199 self.branch_path, new_path)
201 def _rename(self, file_id, parent_id, path):
202 assert isinstance(path, unicode)
203 assert isinstance(parent_id, str)
204 # Only rename if not right yet
205 if (self.inventory[file_id].parent_id == parent_id and
206 self.inventory[file_id].name == urlutils.basename(path)):
208 self.inventory.rename(file_id, parent_id, urlutils.basename(path))
210 class DirectoryBuildEditor:
211 def __init__(self, editor, old_id, new_id, parent_revids=[]):
215 self.parent_revids = parent_revids
218 self.editor.inventory[self.new_id].revision = self.editor.revid
219 self.editor._store_directory(self.new_id, self.parent_revids)
221 if self.new_id == self.editor.inventory.root.file_id:
222 assert len(self.editor._premature_deletes) == 0
223 self.editor._finish_commit()
225 def add_directory(self, path, copyfrom_path=None, copyfrom_revnum=-1):
226 assert isinstance(path, str)
227 path = path.decode("utf-8")
229 file_id = self.editor._get_new_id(self.new_id, path)
231 if file_id in self.editor.inventory:
232 # This directory was moved here from somewhere else, but the
233 # other location hasn't been removed yet.
234 if copyfrom_path is None:
235 # This should ideally never happen!
236 copyfrom_path = self.editor.old_inventory.id2path(file_id)
237 mutter('no copyfrom path set, assuming %r' % copyfrom_path)
238 assert copyfrom_path == self.editor.old_inventory.id2path(file_id)
239 assert copyfrom_path not in self.editor._premature_deletes
240 self.editor._premature_deletes.add(copyfrom_path)
241 self.editor._rename(file_id, self.new_id, path)
242 ie = self.editor.inventory[file_id]
243 old_file_id = file_id
246 ie = self.editor.inventory.add_path(path, 'directory', file_id)
247 ie.revision = self.editor.revid
249 return DirectoryBuildEditor(self.editor, old_file_id, file_id)
251 def open_directory(self, path, base_revnum):
252 assert isinstance(path, str)
253 path = path.decode("utf-8")
254 assert isinstance(base_revnum, int)
255 base_file_id = self.editor._get_old_id(self.old_id, path)
256 base_revid = self.editor.old_inventory[base_file_id].revision
257 file_id = self.editor._get_existing_id(self.old_id, self.new_id, path)
258 if file_id == base_file_id:
259 file_parents = [base_revid]
260 ie = self.editor.inventory[file_id]
262 # Replace if original was inside this branch
263 # change id of base_file_id to file_id
264 ie = self.editor.inventory[base_file_id]
265 for name in ie.children:
266 ie.children[name].parent_id = file_id
267 # FIXME: Don't touch inventory internals
268 del self.editor.inventory._byid[base_file_id]
269 self.editor.inventory._byid[file_id] = ie
272 ie.revision = self.editor.revid
273 return DirectoryBuildEditor(self.editor, base_file_id, file_id,
276 def change_prop(self, name, value):
277 if self.new_id == self.editor.inventory.root.file_id:
278 # Replay lazy_dict, since it may be more expensive
279 if type(self.editor.revmeta.fileprops) != dict:
280 self.editor.revmeta.fileprops = {}
281 self.editor.revmeta.fileprops[name] = value
283 if name in (properties.PROP_ENTRY_COMMITTED_DATE,
284 properties.PROP_ENTRY_COMMITTED_REV,
285 properties.PROP_ENTRY_LAST_AUTHOR,
286 properties.PROP_ENTRY_LOCK_TOKEN,
287 properties.PROP_ENTRY_UUID,
288 properties.PROP_EXECUTABLE):
290 elif (name.startswith(properties.PROP_WC_PREFIX)):
292 elif name.startswith(properties.PROP_PREFIX):
293 mutter('unsupported dir property %r' % name)
295 def add_file(self, path, copyfrom_path=None, copyfrom_revnum=-1):
296 assert isinstance(path, str)
297 path = path.decode("utf-8")
299 file_id = self.editor._get_new_id(self.new_id, path)
300 if file_id in self.editor.inventory:
301 # This file was moved here from somewhere else, but the
302 # other location hasn't been removed yet.
303 if copyfrom_path is None:
304 # This should ideally never happen
305 copyfrom_path = self.editor.old_inventory.id2path(file_id)
306 mutter('no copyfrom path set, assuming %r' % copyfrom_path)
307 assert copyfrom_path == self.editor.old_inventory.id2path(file_id)
308 assert copyfrom_path not in self.editor._premature_deletes
309 self.editor._premature_deletes.add(copyfrom_path)
310 # No need to rename if it's already in the right spot
311 self.editor._rename(file_id, self.new_id, path)
312 return FileBuildEditor(self.editor, path, file_id)
314 def open_file(self, path, base_revnum):
315 assert isinstance(path, str)
316 path = path.decode("utf-8")
317 base_file_id = self.editor._get_old_id(self.old_id, path)
318 base_revid = self.editor.old_inventory[base_file_id].revision
319 file_id = self.editor._get_existing_id(self.old_id, self.new_id, path)
320 is_symlink = (self.editor.inventory[base_file_id].kind == 'symlink')
321 file_data = self.editor._get_file_data(base_file_id, base_revid)
322 if file_id == base_file_id:
323 file_parents = [base_revid]
326 del self.editor.inventory[base_file_id]
328 return FileBuildEditor(self.editor, path, file_id,
329 file_parents, file_data, is_symlink=is_symlink)
331 def delete_entry(self, path, revnum):
332 assert isinstance(path, str)
333 path = path.decode("utf-8")
334 if path in self.editor._premature_deletes:
336 self.editor._premature_deletes.remove(path)
337 for p in self.editor._premature_deletes.copy():
338 if p.startswith("%s/" % path):
339 self.editor._premature_deletes.remove(p)
341 self.editor.inventory.remove_recursive_id(self.editor._get_old_id(self.old_id, path))
343 class FileBuildEditor:
344 def __init__(self, editor, path, file_id, file_parents=[], data="",
348 self.file_id = file_id
349 self.file_data = data
350 self.is_symlink = is_symlink
351 self.file_parents = file_parents
352 self.is_executable = None
353 self.file_stream = None
355 def apply_textdelta(self, base_checksum=None):
356 actual_checksum = md5.new(self.file_data).hexdigest()
357 assert (base_checksum is None or base_checksum == actual_checksum,
358 "base checksum mismatch: %r != %r" % (base_checksum,
360 self.file_stream = StringIO()
361 return apply_txdelta_handler(self.file_data, self.file_stream)
363 def change_prop(self, name, value):
364 if name == properties.PROP_EXECUTABLE:
365 # You'd expect executable to match
366 # constants.PROP_EXECUTABLE_VALUE, but that's not
367 # how SVN behaves. It appears to consider the presence
368 # of the property sufficient to mark it executable.
369 self.is_executable = (value != None)
370 elif (name == properties.PROP_SPECIAL):
371 self.is_symlink = (value != None)
372 elif name == properties.PROP_ENTRY_COMMITTED_REV:
373 self.last_file_rev = int(value)
374 elif name in (properties.PROP_ENTRY_COMMITTED_DATE,
375 properties.PROP_ENTRY_LAST_AUTHOR,
376 properties.PROP_ENTRY_LOCK_TOKEN,
377 properties.PROP_ENTRY_UUID,
378 properties.PROP_MIME_TYPE):
380 elif name.startswith(properties.PROP_WC_PREFIX):
382 elif name == properties.PROP_EXTERNALS:
383 mutter('svn:externals property on file!')
384 elif (name.startswith(properties.PROP_PREFIX) or
385 name.startswith(SVN_PROP_BZR_PREFIX)):
386 mutter('unsupported file property %r' % name)
388 def close(self, checksum=None):
389 assert isinstance(self.path, unicode)
390 if self.file_stream is not None:
391 self.file_stream.seek(0)
392 lines = osutils.split_lines(self.file_stream.read())
394 # Data didn't change or file is new
395 lines = osutils.split_lines(self.file_data)
397 actual_checksum = md5_strings(lines)
398 assert checksum is None or checksum == actual_checksum
400 self.editor._store_file(self.file_id, lines, self.file_parents)
402 assert self.is_symlink in (True, False)
404 if self.file_id in self.editor.inventory:
405 del self.editor.inventory[self.file_id]
408 ie = self.editor.inventory.add_path(self.path, 'symlink', self.file_id)
409 ie.symlink_target = lines[0][len("link "):]
412 ie.executable = False
413 ie.revision = self.editor.revid
415 ie = self.editor.inventory.add_path(self.path, 'file', self.file_id)
416 ie.revision = self.editor.revid
418 ie.symlink_target = None
419 ie.text_sha1 = osutils.sha_strings(lines)
420 ie.text_size = sum(map(len, lines))
421 assert ie.text_size is not None
422 if self.is_executable is not None:
423 ie.executable = self.is_executable
425 self.file_stream = None
428 class WeaveRevisionBuildEditor(RevisionBuildEditor):
429 """Subversion commit editor that can write to a weave-based repository.
431 def __init__(self, source, target):
432 RevisionBuildEditor.__init__(self, source, target)
433 self.weave_store = target.weave_store
435 def _start_revision(self):
436 self._write_group_active = True
437 self.target.start_write_group()
439 def _store_directory(self, file_id, parents):
440 file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
441 if not file_weave.has_version(self.revid):
442 file_weave.add_lines(self.revid, parents, [])
444 def _get_file_data(self, file_id, revid):
445 file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
446 return file_weave.get_text(revid)
448 def _store_file(self, file_id, lines, parents):
449 file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
450 if not file_weave.has_version(self.revid):
451 file_weave.add_lines(self.revid, parents, lines)
453 def _finish_commit(self):
454 (rev, signature) = self._get_revision(self.revid)
455 self.inventory.revision_id = self.revid
456 # Escaping the commit message is really the task of the serialiser
457 rev.message = _escape_commit_message(rev.message)
458 rev.inventory_sha1 = None
459 self.target.add_revision(self.revid, rev, self.inventory)
460 if signature is not None:
461 self.target.add_signature_text(self.revid, signature)
462 self.target.commit_write_group()
463 self._write_group_active = False
466 if self._write_group_active:
467 self.target.abort_write_group()
468 self._write_group_active = False
471 class PackRevisionBuildEditor(WeaveRevisionBuildEditor):
472 """Revision Build Editor for Subversion that is specific for the packs API.
474 def __init__(self, source, target):
475 WeaveRevisionBuildEditor.__init__(self, source, target)
477 def _add_text_to_weave(self, file_id, new_lines, parents):
478 return self.target._packs._add_text_to_weave(file_id,
479 self.revid, new_lines, parents, nostore_sha=None,
482 def _store_directory(self, file_id, parents):
483 self._add_text_to_weave(file_id, [], parents)
485 def _store_file(self, file_id, lines, parents):
486 self._add_text_to_weave(file_id, lines, parents)
489 class CommitBuilderRevisionBuildEditor(RevisionBuildEditor):
490 """Revision Build Editor for Subversion that uses the CommitBuilder API.
492 def __init__(self, source, target):
493 RevisionBuildEditor.__init__(self, source, target)
494 raise NotImplementedError(self)
497 def get_revision_build_editor(repository):
498 """Obtain a RevisionBuildEditor for a particular target repository.
500 :param repository: Repository to obtain the buildeditor for.
501 :return: Class object of class descending from RevisionBuildEditor
503 if hasattr(repository, '_packs'):
504 return PackRevisionBuildEditor
505 return WeaveRevisionBuildEditor
508 class InterFromSvnRepository(InterRepository):
509 """Svn to any repository actions."""
511 _matching_repo_format = SvnRepositoryFormat()
513 _supports_branches = True
516 def _get_repo_format_to_test():
519 def _find_all(self, mapping, pb=None):
520 """Find all revisions from the source repository that are not
521 yet in the target repository.
525 graph = self.source.get_graph()
526 available_revs = set()
527 for revmeta in self.source.iter_all_changes(pb=pb):
528 revid = revmeta.get_revision_id(mapping)
529 available_revs.add(revid)
530 meta_map[revid] = revmeta
531 missing = available_revs.difference(self.target.has_revisions(available_revs))
532 needed = list(graph.iter_topo_order(missing))
533 parents = graph.get_parent_map(needed)
534 return [(revid, parents[revid][0], meta_map[revid]) for revid in needed]
536 def _find_branches(self, branches, find_ghosts=False, fetch_rhs_ancestry=False, pb=None):
539 for revid in branches:
541 pb.update("determining revisions to fetch", branches.index(revid), len(branches))
543 nestedpb = ui.ui_factory.nested_progress_bar()
544 for rev in self._find_until(revid, find_ghosts=find_ghosts, fetch_rhs_ancestry=False,
546 if rev[0] not in set_needed:
547 ret_needed.append(rev)
548 set_needed.add(rev[0])
553 def _find_until(self, revision_id, find_ghosts=False, fetch_rhs_ancestry=False, pb=None):
554 """Find all missing revisions until revision_id
556 :param revision_id: Stop revision
557 :param find_ghosts: Find ghosts
558 :param fetch_rhs_ancestry: Fetch right hand side ancestors
559 :return: Tuple with revisions missing and a dictionary with
560 parents for those revision.
567 def check_revid(revision_id):
569 (branch_path, revnum, mapping) = self.source.lookup_revision_id(revision_id)
570 for revmeta in self.source.iter_reverse_branch_changes(branch_path, revnum, mapping):
572 pb.update("determining revisions to fetch", revnum-revmeta.revnum, revnum)
573 revid = revmeta.get_revision_id(mapping)
574 lhs_parent[prev] = revid
575 meta_map[revid] = revmeta
576 if fetch_rhs_ancestry:
577 extra.update(revmeta.get_rhs_parents(mapping))
578 if not self.target.has_revision(revid):
580 elif not find_ghosts:
584 lhs_parent[prev] = NULL_REVISION
586 check_revid(revision_id)
589 if revid not in revs:
592 needed = [(revid, lhs_parent[revid], meta_map[revid]) for revid in reversed(revs)]
596 def copy_content(self, revision_id=None, pb=None):
597 """See InterRepository.copy_content."""
598 self.fetch(revision_id, pb, find_ghosts=False)
600 def _fetch_switch(self, repos_root, revids, pb=None):
601 """Copy a set of related revisions using svn.ra.switch.
603 :param revids: List of revision ids of revisions to copy,
605 :param pb: Optional progress bar.
609 pb = ui.ui_factory.nested_progress_bar()
616 self.target.lock_write()
617 revbuildklass = get_revision_build_editor(self.target)
618 editor = revbuildklass(self.source, self.target)
621 for (revid, parent_revid, revmeta) in revids:
622 pb.update('copying revision', num, len(revids))
624 assert parent_revid is not None
626 if parent_revid == NULL_REVISION:
627 parent_inv = Inventory(root_id=None)
628 elif prev_revid != parent_revid:
629 parent_inv = self.target.get_inventory(parent_revid)
631 parent_inv = prev_inv
633 editor.start_revision(revid, parent_inv, revmeta)
638 if parent_revid == NULL_REVISION:
639 branch_url = urlutils.join(repos_root,
642 conn = self.source.transport.connections.get(branch_url)
643 reporter = conn.do_update(editor.revnum, "", True,
647 # Report status of existing paths
648 reporter.set_path("", editor.revnum, True, None)
653 (parent_branch, parent_revnum, mapping) = \
654 self.source.lookup_revision_id(parent_revid)
655 conn = self.source.transport.connections.get(urlutils.join(repos_root, parent_branch))
657 if parent_branch != editor.branch_path:
658 reporter = conn.do_switch(editor.revnum, "", True,
659 urlutils.join(repos_root, editor.branch_path),
662 reporter = conn.do_update(editor.revnum, "", True, editor)
665 # Report status of existing paths
666 reporter.set_path("", parent_revnum, False, None)
674 self.source.transport.add_connection(conn)
679 prev_inv = editor.inventory
684 if nested_pb is not None:
687 def fetch(self, revision_id=None, pb=None, find_ghosts=False,
688 branches=None, fetch_rhs_ancestry=False):
689 """Fetch revisions. """
690 if revision_id == NULL_REVISION:
693 self._supports_replay = True # assume replay supported by default
694 # Dictionary with paths as keys, revnums as values
697 pb.update("determining revisions to fetch", 0, 2)
699 # Loop over all the revnums until revision_id
700 # (or youngest_revnum) and call self.target.add_revision()
701 # or self.target.add_inventory() each time
702 self.target.lock_read()
704 if branches is not None:
705 needed = self._find_branches(branches, find_ghosts, fetch_rhs_ancestry, pb=pb)
706 elif revision_id is None:
707 needed = self._find_all(self.source.get_mapping(), pb=pb)
709 needed = self._find_until(revision_id, find_ghosts, fetch_rhs_ancestry, pb=pb)
717 self._fetch_switch(self.source.transport.get_svn_repos_root(), needed, pb)
720 def is_compatible(source, target):
721 """Be compatible with SvnRepository."""
722 # FIXME: Also check target uses VersionedFile
723 return isinstance(source, SvnRepository) and target.supports_rich_root()