1 # Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 """Fetching revisions from Subversion repositories in batches."""
19 from bzrlib import osutils, ui, urlutils
20 from bzrlib.inventory import Inventory
21 from bzrlib.revision import Revision, NULL_REVISION
22 from bzrlib.repository import InterRepository
23 from bzrlib.trace import mutter
25 from cStringIO import StringIO
28 from bzrlib.plugins.svn import properties
29 from bzrlib.plugins.svn.delta import apply_txdelta_handler
30 from bzrlib.plugins.svn.errors import InvalidFileName
31 from bzrlib.plugins.svn.logwalker import lazy_dict
32 from bzrlib.plugins.svn.mapping import (SVN_PROP_BZR_MERGE,
33 SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO,
34 SVN_PROP_BZR_REVISION_ID,
35 SVN_PROP_BZR_FILEIDS, SVN_REVPROP_BZR_SIGNATURE,
37 parse_revision_metadata)
38 from bzrlib.plugins.svn.repository import SvnRepository, SvnRepositoryFormat
39 from bzrlib.plugins.svn.svk import SVN_PROP_SVK_MERGE
40 from bzrlib.plugins.svn.tree import (parse_externals_description,
41 inventory_add_external)
43 def _escape_commit_message(message):
44 """Replace xml-incompatible control characters."""
48 # FIXME: RBC 20060419 this should be done by the revision
49 # serialiser not by commit. Then we can also add an unescaper
50 # in the deserializer and start roundtripping revision messages
51 # precisely. See repository_implementations/test_repository.py
53 # Python strings can include characters that can't be
54 # represented in well-formed XML; escape characters that
55 # aren't listed in the XML specification
56 # (http://www.w3.org/TR/REC-xml/#NT-Char).
58 u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
59 lambda match: match.group(0).encode('unicode_escape'),
64 def md5_strings(strings):
65 """Return the MD5sum of the concatenation of strings.
67 :param strings: Strings to find the MD5sum of.
71 map(s.update, strings)
75 def check_filename(path):
76 """Check that a path does not contain invalid characters.
78 :param path: Path to check
79 :raises InvalidFileName:
81 assert isinstance(path, unicode)
83 raise InvalidFileName(path)
86 class RevisionBuildEditor(object):
87 """Implementation of the Subversion commit editor interface that builds a
90 def __init__(self, source, target):
93 self.transact = target.get_transaction()
95 def set_target_revision(self, revnum):
96 assert self.revnum == revnum
98 def start_revision(self, revid, prev_inventory, revmeta):
100 (self.branch_path, self.revnum, self.mapping) = self.source.lookup_revision_id(revid)
101 self.revmeta = revmeta
103 self.dir_baserev = {}
105 self._premature_deletes = set()
106 self.old_inventory = prev_inventory
107 self.inventory = prev_inventory.copy()
108 self._start_revision()
110 def _get_id_map(self):
111 if self._id_map is not None:
114 renames = self.mapping.import_fileid_map(self.revmeta.revprops, self.revmeta.fileprops)
115 self._id_map = self.source.transform_fileid_map(self.source.uuid,
116 self.revnum, self.branch_path, self.revmeta.paths, renames,
121 def _get_revision(self, revid):
122 """Creates the revision object.
124 :param revid: Revision id of the revision to create.
127 # Commit SVN revision properties to a Revision object
128 rev = Revision(revision_id=revid, parent_ids=self.revmeta.get_parent_ids(self.mapping))
130 self.mapping.import_revision(self.revmeta.revprops, self.revmeta.fileprops, rev)
132 signature = self.revmeta.revprops.get(SVN_REVPROP_BZR_SIGNATURE)
134 return (rev, signature)
136 def open_root(self, base_revnum):
137 if self.old_inventory.root is None:
138 # First time the root is set
140 file_id = self.mapping.generate_file_id(self.source.uuid, self.revnum, self.branch_path, u"")
143 assert self.old_inventory.root.revision is not None
144 old_file_id = self.old_inventory.root.file_id
145 file_id = self._get_id_map().get("", old_file_id)
146 file_parents = [self.old_inventory.root.revision]
148 if self.inventory.root is not None and \
149 file_id == self.inventory.root.file_id:
150 ie = self.inventory.root
152 ie = self.inventory.add_path("", 'directory', file_id)
153 ie.revision = self.revid
154 return DirectoryBuildEditor(self, old_file_id, file_id, file_parents)
159 def _finish_commit(self):
160 raise NotImplementedError(self._finish_commit)
165 def _start_revision(self):
168 def _get_existing_id(self, old_parent_id, new_parent_id, path):
169 assert isinstance(path, unicode)
170 assert isinstance(old_parent_id, str)
171 assert isinstance(new_parent_id, str)
172 ret = self._get_id_map().get(path)
175 return self.old_inventory[old_parent_id].children[urlutils.basename(path)].file_id
177 def _get_old_id(self, parent_id, old_path):
178 assert isinstance(old_path, unicode)
179 assert isinstance(parent_id, str)
180 return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
182 def _get_new_id(self, parent_id, new_path):
183 assert isinstance(new_path, unicode)
184 assert isinstance(parent_id, str)
185 ret = self._get_id_map().get(new_path)
188 return self.mapping.generate_file_id(self.source.uuid, self.revnum,
189 self.branch_path, new_path)
191 def _rename(self, file_id, parent_id, path):
192 assert isinstance(path, unicode)
193 assert isinstance(parent_id, str)
194 # Only rename if not right yet
195 if (self.inventory[file_id].parent_id == parent_id and
196 self.inventory[file_id].name == urlutils.basename(path)):
198 self.inventory.rename(file_id, parent_id, urlutils.basename(path))
201 class DirectoryBuildEditor(object):
202 def __init__(self, editor, old_id, new_id, parent_revids=[]):
206 self.parent_revids = parent_revids
209 self.editor.inventory[self.new_id].revision = self.editor.revid
211 # Only record root if the target repository supports it
212 self.editor.texts.add_lines((self.new_id, self.editor.revid),
213 [(self.new_id, revid) for revid in self.parent_revids], [])
215 if self.new_id == self.editor.inventory.root.file_id:
216 assert len(self.editor._premature_deletes) == 0
217 self.editor._finish_commit()
219 def add_directory(self, path, copyfrom_path=None, copyfrom_revnum=-1):
220 assert isinstance(path, str)
221 path = path.decode("utf-8")
223 file_id = self.editor._get_new_id(self.new_id, path)
225 if file_id in self.editor.inventory:
226 # This directory was moved here from somewhere else, but the
227 # other location hasn't been removed yet.
228 if copyfrom_path is None:
229 # This should ideally never happen!
230 copyfrom_path = self.editor.old_inventory.id2path(file_id)
231 mutter('no copyfrom path set, assuming %r', copyfrom_path)
232 assert copyfrom_path == self.editor.old_inventory.id2path(file_id)
233 assert copyfrom_path not in self.editor._premature_deletes
234 self.editor._premature_deletes.add(copyfrom_path)
235 self.editor._rename(file_id, self.new_id, path)
236 ie = self.editor.inventory[file_id]
237 old_file_id = file_id
240 ie = self.editor.inventory.add_path(path, 'directory', file_id)
241 ie.revision = self.editor.revid
243 return DirectoryBuildEditor(self.editor, old_file_id, file_id)
245 def open_directory(self, path, base_revnum):
246 assert isinstance(path, str)
247 path = path.decode("utf-8")
248 assert base_revnum >= 0
249 base_file_id = self.editor._get_old_id(self.old_id, path)
250 base_revid = self.editor.old_inventory[base_file_id].revision
251 file_id = self.editor._get_existing_id(self.old_id, self.new_id, path)
252 if file_id == base_file_id:
253 file_parents = [base_revid]
254 ie = self.editor.inventory[file_id]
256 # Replace if original was inside this branch
257 # change id of base_file_id to file_id
258 ie = self.editor.inventory[base_file_id]
259 for name in ie.children:
260 ie.children[name].parent_id = file_id
261 # FIXME: Don't touch inventory internals
262 del self.editor.inventory._byid[base_file_id]
263 self.editor.inventory._byid[file_id] = ie
266 ie.revision = self.editor.revid
267 return DirectoryBuildEditor(self.editor, base_file_id, file_id,
270 def change_prop(self, name, value):
271 if self.new_id == self.editor.inventory.root.file_id:
272 # Replay lazy_dict, since it may be more expensive
273 if type(self.editor.revmeta.fileprops) != dict:
274 self.editor.revmeta.fileprops = {}
275 self.editor.revmeta.fileprops[name] = value
277 if name in (properties.PROP_ENTRY_COMMITTED_DATE,
278 properties.PROP_ENTRY_COMMITTED_REV,
279 properties.PROP_ENTRY_LAST_AUTHOR,
280 properties.PROP_ENTRY_LOCK_TOKEN,
281 properties.PROP_ENTRY_UUID,
282 properties.PROP_EXECUTABLE):
284 elif (name.startswith(properties.PROP_WC_PREFIX)):
286 elif name.startswith(properties.PROP_PREFIX):
287 mutter('unsupported dir property %r', name)
289 def add_file(self, path, copyfrom_path=None, copyfrom_revnum=-1):
290 assert isinstance(path, str)
291 path = path.decode("utf-8")
293 file_id = self.editor._get_new_id(self.new_id, path)
294 if file_id in self.editor.inventory:
295 # This file was moved here from somewhere else, but the
296 # other location hasn't been removed yet.
297 if copyfrom_path is None:
298 # This should ideally never happen
299 copyfrom_path = self.editor.old_inventory.id2path(file_id)
300 mutter('no copyfrom path set, assuming %r', copyfrom_path)
301 assert copyfrom_path == self.editor.old_inventory.id2path(file_id)
302 assert copyfrom_path not in self.editor._premature_deletes
303 self.editor._premature_deletes.add(copyfrom_path)
304 # No need to rename if it's already in the right spot
305 self.editor._rename(file_id, self.new_id, path)
306 return FileBuildEditor(self.editor, path, file_id)
308 def open_file(self, path, base_revnum):
309 assert isinstance(path, str)
310 path = path.decode("utf-8")
311 base_file_id = self.editor._get_old_id(self.old_id, path)
312 base_revid = self.editor.old_inventory[base_file_id].revision
313 file_id = self.editor._get_existing_id(self.old_id, self.new_id, path)
314 is_symlink = (self.editor.inventory[base_file_id].kind == 'symlink')
315 record = self.editor.texts.get_record_stream([(base_file_id, base_revid)], 'unordered', True).next()
316 file_data = record.get_bytes_as('fulltext')
317 if file_id == base_file_id:
318 file_parents = [base_revid]
321 del self.editor.inventory[base_file_id]
323 return FileBuildEditor(self.editor, path, file_id,
324 file_parents, file_data, is_symlink=is_symlink)
326 def delete_entry(self, path, revnum):
327 assert isinstance(path, str)
328 path = path.decode("utf-8")
329 if path in self.editor._premature_deletes:
331 self.editor._premature_deletes.remove(path)
332 for p in self.editor._premature_deletes.copy():
333 if p.startswith("%s/" % path):
334 self.editor._premature_deletes.remove(p)
336 self.editor.inventory.remove_recursive_id(self.editor._get_old_id(self.old_id, path))
339 class FileBuildEditor(object):
340 def __init__(self, editor, path, file_id, file_parents=[], data="",
344 self.file_id = file_id
345 self.file_data = data
346 self.is_symlink = is_symlink
347 self.file_parents = file_parents
348 self.is_executable = None
349 self.file_stream = None
351 def apply_textdelta(self, base_checksum=None):
352 actual_checksum = md5.new(self.file_data).hexdigest()
353 assert (base_checksum is None or base_checksum == actual_checksum,
354 "base checksum mismatch: %r != %r" % (base_checksum,
356 self.file_stream = StringIO()
357 return apply_txdelta_handler(self.file_data, self.file_stream)
359 def change_prop(self, name, value):
360 if name == properties.PROP_EXECUTABLE:
361 # You'd expect executable to match
362 # properties.PROP_EXECUTABLE_VALUE, but that's not
363 # how SVN behaves. It appears to consider the presence
364 # of the property sufficient to mark it executable.
365 self.is_executable = (value != None)
366 elif (name == properties.PROP_SPECIAL):
367 self.is_symlink = (value != None)
368 elif name == properties.PROP_ENTRY_COMMITTED_REV:
369 self.last_file_rev = int(value)
370 elif name == properties.PROP_EXTERNALS:
371 mutter('svn:externals property on file!')
372 elif name in (properties.PROP_ENTRY_COMMITTED_DATE,
373 properties.PROP_ENTRY_LAST_AUTHOR,
374 properties.PROP_ENTRY_LOCK_TOKEN,
375 properties.PROP_ENTRY_UUID,
376 properties.PROP_MIME_TYPE):
378 elif name.startswith(properties.PROP_WC_PREFIX):
380 elif (name.startswith(properties.PROP_PREFIX) or
381 name.startswith(SVN_PROP_BZR_PREFIX)):
382 mutter('unsupported file property %r', name)
384 def close(self, checksum=None):
385 assert isinstance(self.path, unicode)
386 if self.file_stream is not None:
387 self.file_stream.seek(0)
388 lines = osutils.split_lines(self.file_stream.read())
390 # Data didn't change or file is new
391 lines = osutils.split_lines(self.file_data)
393 actual_checksum = md5_strings(lines)
394 assert checksum is None or checksum == actual_checksum
396 self.editor.texts.add_lines((self.file_id, self.editor.revid),
397 [(self.file_id, revid) for revid in self.file_parents], lines)
399 assert self.is_symlink in (True, False)
401 if self.file_id in self.editor.inventory:
402 del self.editor.inventory[self.file_id]
405 ie = self.editor.inventory.add_path(self.path, 'symlink', self.file_id)
406 ie.symlink_target = lines[0][len("link "):]
409 ie.executable = False
410 ie.revision = self.editor.revid
412 ie = self.editor.inventory.add_path(self.path, 'file', self.file_id)
413 ie.revision = self.editor.revid
415 ie.symlink_target = None
416 ie.text_sha1 = osutils.sha_strings(lines)
417 ie.text_size = sum(map(len, lines))
418 assert ie.text_size is not None
419 if self.is_executable is not None:
420 ie.executable = self.is_executable
422 self.file_stream = None
425 class WeaveRevisionBuildEditor(RevisionBuildEditor):
426 """Subversion commit editor that can write to a weave-based repository.
428 def __init__(self, source, target):
429 RevisionBuildEditor.__init__(self, source, target)
430 self.texts = target.texts
432 def _start_revision(self):
433 self._write_group_active = True
434 self.target.start_write_group()
436 def _finish_commit(self):
437 (rev, signature) = self._get_revision(self.revid)
438 self.inventory.revision_id = self.revid
439 # Escaping the commit message is really the task of the serialiser
440 rev.message = _escape_commit_message(rev.message)
441 rev.inventory_sha1 = None
442 self.target.add_revision(self.revid, rev, self.inventory)
443 if signature is not None:
444 self.target.add_signature_text(self.revid, signature)
445 self.target.commit_write_group()
446 self._write_group_active = False
449 if self._write_group_active:
450 self.target.abort_write_group()
451 self._write_group_active = False
454 class CommitBuilderRevisionBuildEditor(RevisionBuildEditor):
455 """Revision Build Editor for Subversion that uses the CommitBuilder API.
457 def __init__(self, source, target):
458 RevisionBuildEditor.__init__(self, source, target)
459 raise NotImplementedError(self)
462 def get_revision_build_editor(repository):
463 """Obtain a RevisionBuildEditor for a particular target repository.
465 :param repository: Repository to obtain the buildeditor for.
466 :return: Class object of class descending from RevisionBuildEditor
468 return WeaveRevisionBuildEditor
471 class InterFromSvnRepository(InterRepository):
472 """Svn to any repository actions."""
474 _matching_repo_format = SvnRepositoryFormat()
476 _supports_branches = True
479 def _get_repo_format_to_test():
482 def _find_all(self, mapping, pb=None):
483 """Find all revisions from the source repository that are not
484 yet in the target repository.
488 graph = self.source.get_graph()
489 available_revs = set()
490 for revmeta in self.source.iter_all_changes(pb=pb):
491 revid = revmeta.get_revision_id(mapping)
492 available_revs.add(revid)
493 meta_map[revid] = revmeta
494 missing = available_revs.difference(self.target.has_revisions(available_revs))
495 needed = list(graph.iter_topo_order(missing))
496 parents = graph.get_parent_map(needed)
497 return [(revid, parents[revid][0], meta_map[revid]) for revid in needed]
499 def _find_branches(self, branches, find_ghosts=False, fetch_rhs_ancestry=False, pb=None):
502 for revid in branches:
504 pb.update("determining revisions to fetch", branches.index(revid), len(branches))
506 nestedpb = ui.ui_factory.nested_progress_bar()
507 for rev in self._find_until(revid, find_ghosts=find_ghosts, fetch_rhs_ancestry=False,
509 if rev[0] not in set_needed:
510 ret_needed.append(rev)
511 set_needed.add(rev[0])
516 def _find_until(self, revision_id, find_ghosts=False, fetch_rhs_ancestry=False, pb=None):
517 """Find all missing revisions until revision_id
519 :param revision_id: Stop revision
520 :param find_ghosts: Find ghosts
521 :param fetch_rhs_ancestry: Fetch right hand side ancestors
522 :return: Tuple with revisions missing and a dictionary with
523 parents for those revision.
530 def check_revid(revision_id):
532 (branch_path, revnum, mapping) = self.source.lookup_revision_id(revision_id)
533 for revmeta in self.source.iter_reverse_branch_changes(branch_path, revnum, mapping):
535 pb.update("determining revisions to fetch", revnum-revmeta.revnum, revnum)
536 revid = revmeta.get_revision_id(mapping)
537 lhs_parent[prev] = revid
538 meta_map[revid] = revmeta
539 if fetch_rhs_ancestry:
540 extra.update(revmeta.get_rhs_parents(mapping))
541 if not self.target.has_revision(revid):
543 elif not find_ghosts:
547 lhs_parent[prev] = NULL_REVISION
549 check_revid(revision_id)
552 if revid not in revs:
555 needed = [(revid, lhs_parent[revid], meta_map[revid]) for revid in reversed(revs)]
559 def copy_content(self, revision_id=None, pb=None):
560 """See InterRepository.copy_content."""
561 self.fetch(revision_id, pb, find_ghosts=False)
563 def _fetch_replay(self, revids, pb=None):
564 """Copy a set of related revisions using svn.ra.replay.
566 :param revids: Revision ids to copy.
567 :param pb: Optional progress bar
569 raise NotImplementedError(self._copy_revisions_replay)
571 def _fetch_switch(self, repos_root, revids, pb=None):
572 """Copy a set of related revisions using svn.ra.switch.
574 :param revids: List of revision ids of revisions to copy,
576 :param pb: Optional progress bar.
580 pb = ui.ui_factory.nested_progress_bar()
587 self.target.lock_write()
588 revbuildklass = get_revision_build_editor(self.target)
589 editor = revbuildklass(self.source, self.target)
592 for (revid, parent_revid, revmeta) in revids:
593 pb.update('copying revision', num, len(revids))
595 assert parent_revid is not None
597 if parent_revid == NULL_REVISION:
598 parent_inv = Inventory(root_id=None)
599 elif prev_revid != parent_revid:
600 parent_inv = self.target.get_inventory(parent_revid)
602 parent_inv = prev_inv
604 editor.start_revision(revid, parent_inv, revmeta)
606 if parent_revid == NULL_REVISION:
607 parent_branch = editor.branch_path
608 parent_revnum = editor.revnum
611 (parent_branch, parent_revnum, mapping) = \
612 self.source.lookup_revision_id(parent_revid)
618 conn = self.source.transport.connections.get(urlutils.join(repos_root, parent_branch))
620 if parent_branch != editor.branch_path:
621 reporter = conn.do_switch(editor.revnum, "", True,
622 urlutils.join(repos_root, editor.branch_path),
625 reporter = conn.do_update(editor.revnum, "", True, editor)
628 # Report status of existing paths
629 reporter.set_path("", parent_revnum, start_empty)
638 self.source.transport.add_connection(conn)
643 prev_inv = editor.inventory
648 if nested_pb is not None:
651 def fetch(self, revision_id=None, pb=None, find_ghosts=False,
652 branches=None, fetch_rhs_ancestry=False):
653 """Fetch revisions. """
654 if revision_id == NULL_REVISION:
656 # Dictionary with paths as keys, revnums as values
659 pb.update("determining revisions to fetch", 0, 2)
661 # Loop over all the revnums until revision_id
662 # (or youngest_revnum) and call self.target.add_revision()
663 # or self.target.add_inventory() each time
664 self.target.lock_read()
666 if branches is not None:
667 needed = self._find_branches(branches, find_ghosts, fetch_rhs_ancestry, pb=pb)
668 elif revision_id is None:
669 needed = self._find_all(self.source.get_mapping(), pb=pb)
671 needed = self._find_until(revision_id, find_ghosts, fetch_rhs_ancestry, pb=pb)
679 self._fetch_switch(self.source.transport.get_svn_repos_root(), needed, pb)
682 def is_compatible(source, target):
683 """Be compatible with SvnRepository."""
684 # FIXME: Also check target uses VersionedFile
685 return isinstance(source, SvnRepository) and target.supports_rich_root()