1 # Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 """Fetching revisions from Subversion repositories in batches."""
19 from bzrlib import osutils, ui, urlutils
20 from bzrlib.errors import NoSuchRevision
21 from bzrlib.inventory import Inventory
22 from bzrlib.revision import Revision, NULL_REVISION
23 from bzrlib.repository import InterRepository
24 from bzrlib.trace import mutter
26 from cStringIO import StringIO
29 from bzrlib.plugins.svn import properties
30 from bzrlib.plugins.svn.delta import apply_txdelta_handler
31 from bzrlib.plugins.svn.errors import InvalidFileName
32 from bzrlib.plugins.svn.logwalker import lazy_dict
33 from bzrlib.plugins.svn.mapping import (SVN_PROP_BZR_MERGE,
34 SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO,
35 SVN_PROP_BZR_REVISION_ID,
36 SVN_PROP_BZR_FILEIDS, SVN_REVPROP_BZR_SIGNATURE,
38 parse_revision_metadata)
39 from bzrlib.plugins.svn.properties import parse_externals_description
40 from bzrlib.plugins.svn.repository import SvnRepository, SvnRepositoryFormat
41 from bzrlib.plugins.svn.svk import SVN_PROP_SVK_MERGE
42 from bzrlib.plugins.svn.transport import _url_escape_uri
43 from bzrlib.plugins.svn.tree import inventory_add_external
45 def _escape_commit_message(message):
46 """Replace xml-incompatible control characters."""
50 # FIXME: RBC 20060419 this should be done by the revision
51 # serialiser not by commit. Then we can also add an unescaper
52 # in the deserializer and start roundtripping revision messages
53 # precisely. See repository_implementations/test_repository.py
55 # Python strings can include characters that can't be
56 # represented in well-formed XML; escape characters that
57 # aren't listed in the XML specification
58 # (http://www.w3.org/TR/REC-xml/#NT-Char).
60 u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
61 lambda match: match.group(0).encode('unicode_escape'),
66 def md5_strings(strings):
67 """Return the MD5sum of the concatenation of strings.
69 :param strings: Strings to find the MD5sum of.
73 map(s.update, strings)
77 def check_filename(path):
78 """Check that a path does not contain invalid characters.
80 :param path: Path to check
81 :raises InvalidFileName:
83 assert isinstance(path, unicode)
85 raise InvalidFileName(path)
88 class RevisionBuildEditor(object):
89 """Implementation of the Subversion commit editor interface that builds a
92 def __init__(self, source, target):
95 self.texts = target.texts
97 def set_target_revision(self, revnum):
98 assert self.revnum == revnum
100 def start_revision(self, revid, prev_inventory, revmeta):
102 (self.branch_path, self.revnum, self.mapping) = self.source.lookup_revision_id(revid)
103 self.revmeta = revmeta
105 self.dir_baserev = {}
107 self._premature_deletes = set()
108 self.old_inventory = prev_inventory
109 self.inventory = prev_inventory.copy()
110 self._start_revision()
112 def _get_id_map(self):
113 if self._id_map is not None:
116 self._id_map = self.source.transform_fileid_map(self.revmeta, self.mapping)
120 def _get_revision(self, revid):
121 """Creates the revision object.
123 :param revid: Revision id of the revision to create.
126 # Commit SVN revision properties to a Revision object
127 parent_ids = self.revmeta.get_parent_ids(self.mapping)
128 if parent_ids == (NULL_REVISION,):
130 assert not NULL_REVISION in parent_ids, "parents: %r" % parent_ids
131 rev = Revision(revision_id=revid,
132 parent_ids=parent_ids)
134 self.mapping.import_revision(self.revmeta.revprops, self.revmeta.fileprops,
135 self.revmeta.repository.uuid, self.revmeta.branch_path,
136 self.revmeta.revnum, rev)
138 signature = self.revmeta.revprops.get(SVN_REVPROP_BZR_SIGNATURE)
140 return (rev, signature)
142 def open_root(self, base_revnum):
143 if self.old_inventory.root is None:
144 # First time the root is set
146 file_id = self.mapping.generate_file_id(self.source.uuid, self.revnum, self.branch_path, u"")
149 assert self.old_inventory.root.revision is not None
150 old_file_id = self.old_inventory.root.file_id
151 file_id = self._get_id_map().get("", old_file_id)
152 file_parents = [self.old_inventory.root.revision]
154 if self.inventory.root is not None and \
155 file_id == self.inventory.root.file_id:
156 ie = self.inventory.root
158 ie = self.inventory.add_path("", 'directory', file_id)
159 ie.revision = self.revid
160 return DirectoryBuildEditor(self, old_file_id, file_id, file_parents)
165 def _finish_commit(self):
166 raise NotImplementedError(self._finish_commit)
171 def _start_revision(self):
174 def _get_existing_id(self, old_parent_id, new_parent_id, path):
175 assert isinstance(path, unicode)
176 assert isinstance(old_parent_id, str)
177 assert isinstance(new_parent_id, str)
178 ret = self._get_id_map().get(path)
181 return self.old_inventory[old_parent_id].children[urlutils.basename(path)].file_id
183 def _get_old_id(self, parent_id, old_path):
184 assert isinstance(old_path, unicode)
185 assert isinstance(parent_id, str)
186 return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
188 def _get_new_id(self, parent_id, new_path):
189 assert isinstance(new_path, unicode)
190 assert isinstance(parent_id, str)
191 ret = self._get_id_map().get(new_path)
194 return self.mapping.generate_file_id(self.source.uuid, self.revnum,
195 self.branch_path, new_path)
197 def _rename(self, file_id, parent_id, path):
198 assert isinstance(path, unicode)
199 assert isinstance(parent_id, str)
200 # Only rename if not right yet
201 if (self.inventory[file_id].parent_id == parent_id and
202 self.inventory[file_id].name == urlutils.basename(path)):
204 self.inventory.rename(file_id, parent_id, urlutils.basename(path))
207 class DirectoryBuildEditor(object):
208 def __init__(self, editor, old_id, new_id, parent_revids=[]):
212 self.parent_revids = parent_revids
215 self.editor.inventory[self.new_id].revision = self.editor.revid
217 # Only record root if the target repository supports it
218 self.editor.texts.add_lines((self.new_id, self.editor.revid),
219 [(self.new_id, revid) for revid in self.parent_revids], [])
221 if self.new_id == self.editor.inventory.root.file_id:
222 assert len(self.editor._premature_deletes) == 0
223 self.editor._finish_commit()
225 def add_directory(self, path, copyfrom_path=None, copyfrom_revnum=-1):
226 assert isinstance(path, str)
227 path = path.decode("utf-8")
229 file_id = self.editor._get_new_id(self.new_id, path)
231 if file_id in self.editor.inventory:
232 # This directory was moved here from somewhere else, but the
233 # other location hasn't been removed yet.
234 if copyfrom_path is None:
235 # This should ideally never happen!
236 copyfrom_path = self.editor.old_inventory.id2path(file_id)
237 mutter('no copyfrom path set, assuming %r', copyfrom_path)
238 assert copyfrom_path == self.editor.old_inventory.id2path(file_id)
239 assert copyfrom_path not in self.editor._premature_deletes
240 self.editor._premature_deletes.add(copyfrom_path)
241 self.editor._rename(file_id, self.new_id, path)
242 ie = self.editor.inventory[file_id]
243 old_file_id = file_id
246 ie = self.editor.inventory.add_path(path, 'directory', file_id)
247 ie.revision = self.editor.revid
249 return DirectoryBuildEditor(self.editor, old_file_id, file_id)
251 def open_directory(self, path, base_revnum):
252 assert isinstance(path, str)
253 path = path.decode("utf-8")
254 assert base_revnum >= 0
255 base_file_id = self.editor._get_old_id(self.old_id, path)
256 base_revid = self.editor.old_inventory[base_file_id].revision
257 file_id = self.editor._get_existing_id(self.old_id, self.new_id, path)
258 if file_id == base_file_id:
259 file_parents = [base_revid]
260 ie = self.editor.inventory[file_id]
262 # Replace if original was inside this branch
263 # change id of base_file_id to file_id
264 ie = self.editor.inventory[base_file_id]
265 for name in ie.children:
266 ie.children[name].parent_id = file_id
267 # FIXME: Don't touch inventory internals
268 del self.editor.inventory._byid[base_file_id]
269 self.editor.inventory._byid[file_id] = ie
272 ie.revision = self.editor.revid
273 return DirectoryBuildEditor(self.editor, base_file_id, file_id,
276 def change_prop(self, name, value):
277 if self.new_id == self.editor.inventory.root.file_id:
278 # Replay lazy_dict, since it may be more expensive
279 if type(self.editor.revmeta.fileprops) != dict:
280 self.editor.revmeta.fileprops = {}
281 self.editor.revmeta.fileprops[name] = value
283 if name in (properties.PROP_ENTRY_COMMITTED_DATE,
284 properties.PROP_ENTRY_COMMITTED_REV,
285 properties.PROP_ENTRY_LAST_AUTHOR,
286 properties.PROP_ENTRY_LOCK_TOKEN,
287 properties.PROP_ENTRY_UUID,
288 properties.PROP_EXECUTABLE):
290 elif (name.startswith(properties.PROP_WC_PREFIX)):
292 elif name.startswith(properties.PROP_PREFIX):
293 mutter('unsupported dir property %r', name)
295 def add_file(self, path, copyfrom_path=None, copyfrom_revnum=-1):
296 assert isinstance(path, str)
297 path = path.decode("utf-8")
299 file_id = self.editor._get_new_id(self.new_id, path)
300 if file_id in self.editor.inventory:
301 # This file was moved here from somewhere else, but the
302 # other location hasn't been removed yet.
303 if copyfrom_path is None:
304 # This should ideally never happen
305 copyfrom_path = self.editor.old_inventory.id2path(file_id)
306 mutter('no copyfrom path set, assuming %r', copyfrom_path)
307 assert copyfrom_path == self.editor.old_inventory.id2path(file_id)
308 assert copyfrom_path not in self.editor._premature_deletes
309 self.editor._premature_deletes.add(copyfrom_path)
310 # No need to rename if it's already in the right spot
311 self.editor._rename(file_id, self.new_id, path)
312 return FileBuildEditor(self.editor, path, file_id)
314 def open_file(self, path, base_revnum):
315 assert isinstance(path, str)
316 path = path.decode("utf-8")
317 base_file_id = self.editor._get_old_id(self.old_id, path)
318 base_revid = self.editor.old_inventory[base_file_id].revision
319 file_id = self.editor._get_existing_id(self.old_id, self.new_id, path)
320 is_symlink = (self.editor.inventory[base_file_id].kind == 'symlink')
321 record = self.editor.texts.get_record_stream([(base_file_id, base_revid)], 'unordered', True).next()
322 file_data = record.get_bytes_as('fulltext')
323 if file_id == base_file_id:
324 file_parents = [base_revid]
327 del self.editor.inventory[base_file_id]
329 return FileBuildEditor(self.editor, path, file_id,
330 file_parents, file_data, is_symlink=is_symlink)
332 def delete_entry(self, path, revnum):
333 assert isinstance(path, str)
334 path = path.decode("utf-8")
335 if path in self.editor._premature_deletes:
337 self.editor._premature_deletes.remove(path)
338 for p in self.editor._premature_deletes.copy():
339 if p.startswith("%s/" % path):
340 self.editor._premature_deletes.remove(p)
342 self.editor.inventory.remove_recursive_id(self.editor._get_old_id(self.old_id, path))
345 class FileBuildEditor(object):
346 def __init__(self, editor, path, file_id, file_parents=[], data="",
350 self.file_id = file_id
351 self.file_data = data
352 self.is_symlink = is_symlink
353 self.is_special = None
354 self.file_parents = file_parents
355 self.is_executable = None
356 self.file_stream = None
358 def apply_textdelta(self, base_checksum=None):
359 actual_checksum = md5.new(self.file_data).hexdigest()
360 assert (base_checksum is None or base_checksum == actual_checksum,
361 "base checksum mismatch: %r != %r" % (base_checksum,
363 self.file_stream = StringIO()
364 return apply_txdelta_handler(self.file_data, self.file_stream)
366 def change_prop(self, name, value):
367 if name == properties.PROP_EXECUTABLE:
368 # You'd expect executable to match
369 # properties.PROP_EXECUTABLE_VALUE, but that's not
370 # how SVN behaves. It appears to consider the presence
371 # of the property sufficient to mark it executable.
372 self.is_executable = (value is not None)
373 elif (name == properties.PROP_SPECIAL):
374 self.is_special = (value != None)
375 elif name == properties.PROP_ENTRY_COMMITTED_REV:
376 self.last_file_rev = int(value)
377 elif name == properties.PROP_EXTERNALS:
378 mutter('svn:externals property on file!')
379 elif name in (properties.PROP_ENTRY_COMMITTED_DATE,
380 properties.PROP_ENTRY_LAST_AUTHOR,
381 properties.PROP_ENTRY_LOCK_TOKEN,
382 properties.PROP_ENTRY_UUID,
383 properties.PROP_MIME_TYPE):
385 elif name.startswith(properties.PROP_WC_PREFIX):
387 elif (name.startswith(properties.PROP_PREFIX) or
388 name.startswith(SVN_PROP_BZR_PREFIX)):
389 mutter('unsupported file property %r', name)
391 def close(self, checksum=None):
392 assert isinstance(self.path, unicode)
393 if self.file_stream is not None:
394 self.file_stream.seek(0)
395 lines = osutils.split_lines(self.file_stream.read())
397 # Data didn't change or file is new
398 lines = osutils.split_lines(self.file_data)
400 actual_checksum = md5_strings(lines)
401 assert checksum is None or checksum == actual_checksum
403 self.editor.texts.add_lines((self.file_id, self.editor.revid),
404 [(self.file_id, revid) for revid in self.file_parents], lines)
406 if self.is_special is not None:
407 self.is_symlink = (self.is_special and len(lines) > 0 and lines[0].startswith("link "))
409 assert self.is_symlink in (True, False)
411 if self.file_id in self.editor.inventory:
412 if self.is_executable is None:
413 self.is_executable = self.editor.inventory[self.file_id].executable
414 del self.editor.inventory[self.file_id]
417 ie = self.editor.inventory.add_path(self.path, 'symlink', self.file_id)
418 ie.symlink_target = "".join(lines)[len("link "):]
421 ie.executable = False
422 ie.revision = self.editor.revid
424 ie = self.editor.inventory.add_path(self.path, 'file', self.file_id)
425 ie.revision = self.editor.revid
427 ie.symlink_target = None
428 ie.text_sha1 = osutils.sha_strings(lines)
429 ie.text_size = sum(map(len, lines))
430 assert ie.text_size is not None
431 ie.executable = self.is_executable
433 self.file_stream = None
436 class WeaveRevisionBuildEditor(RevisionBuildEditor):
437 """Subversion commit editor that can write to a weave-based repository.
439 def _start_revision(self):
440 self._write_group_active = True
441 self.target.start_write_group()
443 def _finish_commit(self):
444 (rev, signature) = self._get_revision(self.revid)
445 self.inventory.revision_id = self.revid
446 # Escaping the commit message is really the task of the serialiser
447 rev.message = _escape_commit_message(rev.message)
448 rev.inventory_sha1 = None
449 self.target.add_revision(self.revid, rev, self.inventory)
450 if signature is not None:
451 self.target.add_signature_text(self.revid, signature)
452 self.target.commit_write_group()
453 self._write_group_active = False
456 if self._write_group_active:
457 self.target.abort_write_group()
458 self._write_group_active = False
461 class CommitBuilderRevisionBuildEditor(RevisionBuildEditor):
462 """Revision Build Editor for Subversion that uses the CommitBuilder API.
464 def __init__(self, source, target):
465 RevisionBuildEditor.__init__(self, source, target)
466 raise NotImplementedError(self)
469 def get_revision_build_editor(repository):
470 """Obtain a RevisionBuildEditor for a particular target repository.
472 :param repository: Repository to obtain the buildeditor for.
473 :return: Class object of class descending from RevisionBuildEditor
475 return WeaveRevisionBuildEditor
478 def report_inventory_contents(reporter, inv, revnum, start_empty):
480 reporter.set_path("", revnum, start_empty)
482 # Report status of existing paths
483 for path, entry in inv.iter_entries():
485 reporter.set_path(path.encode("utf-8"), revnum, start_empty)
492 class InterFromSvnRepository(InterRepository):
493 """Svn to any repository actions."""
495 _matching_repo_format = SvnRepositoryFormat()
497 _supports_branches = True
500 def _get_repo_format_to_test():
503 def _find_all(self, mapping, pb=None):
504 """Find all revisions from the source repository that are not
505 yet in the target repository.
509 graph = self.source.get_graph()
510 available_revs = set()
511 for revmeta in self.source.iter_all_changes(pb=pb):
512 revid = revmeta.get_revision_id(mapping)
513 available_revs.add(revid)
514 meta_map[revid] = revmeta
515 missing = available_revs.difference(self.target.has_revisions(available_revs))
516 needed = list(graph.iter_topo_order(missing))
517 parents = graph.get_parent_map(needed)
518 return [(revid, parents[revid][0], meta_map[revid]) for revid in needed]
520 def _find_branches(self, branches, find_ghosts=False, pb=None):
524 for branch in branches:
526 pb.update("determining revisions to fetch", branches.index(branch), len(branches))
528 nestedpb = ui.ui_factory.nested_progress_bar()
529 for rev in self._find_until(branch.last_revision(), find_ghosts=find_ghosts,
530 pb=nestedpb, checked=checked):
531 if rev[0] not in set_needed:
532 ret_needed.append(rev)
533 set_needed.add(rev[0])
538 def _find_until(self, revision_id, find_ghosts=False, pb=None,
540 """Find all missing revisions until revision_id
542 :param revision_id: Stop revision
543 :param find_ghosts: Find ghosts
544 :return: Tuple with revisions missing and a dictionary with
545 parents for those revision.
549 if revision_id in checked:
556 def check_revid(revision_id):
558 (branch_path, revnum, mapping) = self.source.lookup_revision_id(revision_id)
559 except NoSuchRevision:
561 for revmeta in self.source.iter_reverse_branch_changes(branch_path, revnum,
562 to_revnum=0, mapping=mapping):
564 pb.update("determining revisions to fetch", revnum-revmeta.revnum, revnum)
565 revid = revmeta.get_revision_id(mapping)
566 parent_ids = revmeta.get_parent_ids(mapping)
567 lhs_parent[revid] = parent_ids[0]
568 meta_map[revid] = revmeta
570 # This revision (and its ancestry) has already been checked
572 extra.update(parent_ids[1:])
573 if not self.target.has_revision(revid):
575 elif not find_ghosts:
579 check_revid(revision_id)
582 if revid not in revs:
585 needed = [(revid, lhs_parent[revid], meta_map[revid]) for revid in reversed(revs)]
589 def copy_content(self, revision_id=None, pb=None):
590 """See InterRepository.copy_content."""
591 self.fetch(revision_id, pb, find_ghosts=False)
593 def _fetch_replay(self, revids, pb=None):
594 """Copy a set of related revisions using svn.ra.replay.
596 :param revids: Revision ids to copy.
597 :param pb: Optional progress bar
599 raise NotImplementedError(self._copy_revisions_replay)
601 def _fetch_switch(self, repos_root, revids, pb=None):
602 """Copy a set of related revisions using svn.ra.switch.
604 :param revids: List of revision ids of revisions to copy,
606 :param pb: Optional progress bar.
610 pb = ui.ui_factory.nested_progress_bar()
617 revbuildklass = get_revision_build_editor(self.target)
618 editor = revbuildklass(self.source, self.target)
621 for (revid, parent_revid, revmeta) in revids:
622 assert revid != NULL_REVISION
623 pb.update('copying revision', num, len(revids))
625 assert parent_revid is not None and parent_revid != revid
627 if parent_revid == NULL_REVISION:
628 parent_inv = Inventory(root_id=None)
629 elif prev_revid != parent_revid:
630 parent_inv = self.target.get_inventory(parent_revid)
632 parent_inv = prev_inv
634 editor.start_revision(revid, parent_inv, revmeta)
636 if parent_revid == NULL_REVISION:
637 parent_branch = editor.branch_path
638 parent_revnum = editor.revnum
641 (parent_branch, parent_revnum, mapping) = \
642 self.source.lookup_revision_id(parent_revid)
648 conn = self.source.transport.connections.get(urlutils.join(repos_root, parent_branch))
650 assert editor.revnum > parent_revnum or start_empty
652 if parent_branch != editor.branch_path:
653 reporter = conn.do_switch(editor.revnum, "", True,
654 _url_escape_uri(urlutils.join(repos_root, editor.branch_path)),
657 reporter = conn.do_update(editor.revnum, "", True, editor)
659 report_inventory_contents(reporter, parent_inv, parent_revnum, start_empty)
663 self.source.transport.add_connection(conn)
668 prev_inv = editor.inventory
672 if nested_pb is not None:
675 def fetch(self, revision_id=None, pb=None, find_ghosts=False,
677 """Fetch revisions. """
678 if revision_id == NULL_REVISION:
680 # Dictionary with paths as keys, revnums as values
683 pb.update("determining revisions to fetch", 0, 2)
685 # Loop over all the revnums until revision_id
686 # (or youngest_revnum) and call self.target.add_revision()
687 # or self.target.add_inventory() each time
688 self.target.lock_write()
690 nested_pb = ui.ui_factory.nested_progress_bar()
692 if branches is not None:
693 needed = self._find_branches(branches, find_ghosts,
695 elif revision_id is None:
696 needed = self._find_all(self.source.get_mapping(), pb=nested_pb)
698 needed = self._find_until(revision_id, find_ghosts, pb=nested_pb)
706 self._fetch_switch(self.source.transport.get_svn_repos_root(), needed, pb)
711 def is_compatible(source, target):
712 """Be compatible with SvnRepository."""
713 # FIXME: Also check target uses VersionedFile
714 return isinstance(source, SvnRepository) and target.supports_rich_root()