1 # Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 """Fetching revisions from Subversion repositories in batches."""
19 from bzrlib import osutils, ui, urlutils
20 from bzrlib.inventory import Inventory
21 from bzrlib.revision import Revision, NULL_REVISION
22 from bzrlib.repository import InterRepository
23 from bzrlib.trace import mutter
25 from cStringIO import StringIO
28 from svn.core import Pool
31 from fileids import generate_file_id
32 from repository import (SvnRepository, SVN_PROP_BZR_ANCESTRY,
33 SVN_PROP_SVK_MERGE, SVN_PROP_BZR_MERGE,
34 SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO,
35 SVN_PROP_BZR_BRANCHING_SCHEME, SVN_PROP_BZR_REVISION_ID,
36 SVN_PROP_BZR_FILEIDS, SvnRepositoryFormat,
37 parse_revision_metadata, parse_merge_property)
38 from tree import apply_txdelta_handler
41 def md5_strings(strings):
42 """Return the MD5sum of the concatenation of strings.
44 :param strings: Strings to find the MD5sum of.
48 map(s.update, strings)
52 class RevisionBuildEditor(svn.delta.Editor):
53 """Implementation of the Subversion commit editor interface that builds a
56 def __init__(self, source, target):
59 self.transact = target.get_transaction()
61 def start_revision(self, revid, prev_inventory):
63 (self.branch_path, self.revnum, self.scheme) = self.source.lookup_revision_id(revid)
64 changes = self.source._log.get_revision_paths(self.revnum, self.branch_path)
65 renames = self.source.revision_fileid_renames(revid)
66 self.id_map = self.source.transform_fileid_map(self.source.uuid,
67 self.revnum, self.branch_path, changes, renames,
73 self._premature_deletes = set()
75 self.old_inventory = prev_inventory
76 self.inventory = prev_inventory.copy()
77 self._start_revision()
79 def _get_parent_ids(self):
80 return self.source.revision_parents(self.revid, self._bzr_merges)
82 def _get_revision(self, revid):
83 """Creates the revision object.
85 :param revid: Revision id of the revision to create.
88 # Commit SVN revision properties to a Revision object
89 rev = Revision(revision_id=revid, parent_ids=self._get_parent_ids())
91 _svn_revprops = self.source._log.get_revision_info(self.revnum)
92 if _svn_revprops[2] is not None:
93 rev.timestamp = 1.0 * svn.core.secs_from_timestr(
94 _svn_revprops[2], None) #date
96 rev.timestamp = 0 # FIXME: Obtain repository creation time
99 rev.committer = _svn_revprops[0] # author
100 if rev.committer is None:
102 rev.message = _svn_revprops[1] # message
105 parse_revision_metadata(self._revinfo, rev)
109 def open_root(self, base_revnum, baton):
110 if self.old_inventory.root is None:
111 # First time the root is set
112 file_id = generate_file_id(self.source, self.revid, "")
113 self.dir_baserev[file_id] = []
115 assert self.old_inventory.root.revision is not None
116 if self.id_map.has_key(""):
117 file_id = self.id_map[""]
119 file_id = self.old_inventory.root.file_id
120 self.dir_baserev[file_id] = [self.old_inventory.root.revision]
122 if self.inventory.root is not None and \
123 file_id == self.inventory.root.file_id:
124 ie = self.inventory.root
126 ie = self.inventory.add_path("", 'directory', file_id)
127 ie.revision = self.revid
130 def _get_existing_id(self, parent_id, path):
131 if self.id_map.has_key(path):
132 return self.id_map[path]
133 return self._get_old_id(parent_id, path)
135 def _get_old_id(self, parent_id, old_path):
136 return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
138 def _get_new_id(self, parent_id, new_path):
139 if self.id_map.has_key(new_path):
140 return self.id_map[new_path]
141 return generate_file_id(self.source, self.revid, new_path)
143 def _rename(self, file_id, parent_id, path):
144 # Only rename if not right yet
145 if (self.inventory[file_id].parent_id == parent_id and
146 self.inventory[file_id].name == urlutils.basename(path)):
148 self.inventory.rename(file_id, parent_id, urlutils.basename(path))
150 def delete_entry(self, path, revnum, parent_id, pool):
151 path = path.decode("utf-8")
152 if path in self._premature_deletes:
154 self._premature_deletes.remove(path)
155 for p in self._premature_deletes.copy():
156 if p.startswith("%s/" % path):
157 self._premature_deletes.remove(p)
159 self.inventory.remove_recursive_id(self._get_old_id(parent_id, path))
161 def close_directory(self, id):
162 self.inventory[id].revision = self.revid
164 # Only record root if the target repository supports it
165 self._store_directory(id, self.dir_baserev[id])
167 def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum,
169 path = path.decode("utf-8")
170 file_id = self._get_new_id(parent_id, path)
172 self.dir_baserev[file_id] = []
173 if file_id in self.inventory:
174 # This directory was moved here from somewhere else, but the
175 # other location hasn't been removed yet.
176 if copyfrom_path is None:
177 # This should ideally never happen!
178 copyfrom_path = self.old_inventory.id2path(file_id)
179 mutter('no copyfrom path set, assuming %r' % copyfrom_path)
180 assert copyfrom_path == self.old_inventory.id2path(file_id)
181 assert copyfrom_path not in self._premature_deletes
182 self._premature_deletes.add(copyfrom_path)
183 self._rename(file_id, parent_id, path)
184 ie = self.inventory[file_id]
186 ie = self.inventory.add_path(path, 'directory', file_id)
187 ie.revision = self.revid
191 def open_directory(self, path, parent_id, base_revnum, pool):
192 assert base_revnum >= 0
193 base_file_id = self._get_old_id(parent_id, path)
194 base_revid = self.old_inventory[base_file_id].revision
195 file_id = self._get_existing_id(parent_id, path)
196 if file_id == base_file_id:
197 self.dir_baserev[file_id] = [base_revid]
198 ie = self.inventory[file_id]
200 # Replace if original was inside this branch
201 # change id of base_file_id to file_id
202 ie = self.inventory[base_file_id]
203 for name in ie.children:
204 ie.children[name].parent_id = file_id
205 # FIXME: Don't touch inventory internals
206 del self.inventory._byid[base_file_id]
207 self.inventory._byid[file_id] = ie
209 self.dir_baserev[file_id] = []
210 ie.revision = self.revid
213 def change_dir_prop(self, id, name, value, pool):
214 if name == SVN_PROP_BZR_BRANCHING_SCHEME:
215 if id != self.inventory.root.file_id:
216 mutter('rogue %r on non-root directory' % name)
218 elif name == SVN_PROP_BZR_ANCESTRY+str(self.scheme):
219 if id != self.inventory.root.file_id:
220 mutter('rogue %r on non-root directory' % name)
223 self._bzr_merges = parse_merge_property(value.splitlines()[-1])
224 elif (name.startswith(SVN_PROP_BZR_ANCESTRY) or
225 name.startswith(SVN_PROP_BZR_REVISION_ID)):
227 elif name == SVN_PROP_SVK_MERGE:
228 self._svk_merges = None # Force Repository.revision_parents() to look it up
229 elif name == SVN_PROP_BZR_REVISION_INFO:
230 if id != self.inventory.root.file_id:
231 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
234 self._revinfo = value
235 elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
236 svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
237 svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
238 svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
239 svn.core.SVN_PROP_ENTRY_UUID,
240 svn.core.SVN_PROP_EXECUTABLE):
242 elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
244 elif name in (SVN_PROP_BZR_MERGE, SVN_PROP_BZR_FILEIDS):
246 elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
247 name.startswith(SVN_PROP_BZR_PREFIX)):
248 mutter('unsupported dir property %r' % name)
250 def change_file_prop(self, id, name, value, pool):
251 if name == svn.core.SVN_PROP_EXECUTABLE:
252 # You'd expect executable to match
253 # svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not
254 # how SVN behaves. It appears to consider the presence
255 # of the property sufficient to mark it executable.
256 self.is_executable = (value != None)
257 elif (name == svn.core.SVN_PROP_SPECIAL):
258 self.is_symlink = (value != None)
259 elif name == svn.core.SVN_PROP_ENTRY_COMMITTED_REV:
260 self.last_file_rev = int(value)
261 elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
262 svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
263 svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
264 svn.core.SVN_PROP_ENTRY_UUID,
265 svn.core.SVN_PROP_MIME_TYPE):
267 elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
269 elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
270 name.startswith(SVN_PROP_BZR_PREFIX)):
271 mutter('unsupported file property %r' % name)
273 def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
274 path = path.decode("utf-8")
275 self.is_symlink = False
276 self.is_executable = None
278 self.file_parents = []
279 self.file_stream = None
280 self.file_id = self._get_new_id(parent_id, path)
281 if self.file_id in self.inventory:
282 # This file was moved here from somewhere else, but the
283 # other location hasn't been removed yet.
284 if copyfrom_path is None:
285 # This should ideally never happen
286 copyfrom_path = self.old_inventory.id2path(self.file_id)
287 mutter('no copyfrom path set, assuming %r' % copyfrom_path)
288 assert copyfrom_path == self.old_inventory.id2path(self.file_id)
289 assert copyfrom_path not in self._premature_deletes
290 self._premature_deletes.add(copyfrom_path)
291 # No need to rename if it's already in the right spot
292 self._rename(self.file_id, parent_id, path)
295 def open_file(self, path, parent_id, base_revnum, pool):
296 base_file_id = self._get_old_id(parent_id, path)
297 base_revid = self.old_inventory[base_file_id].revision
298 self.file_id = self._get_existing_id(parent_id, path)
299 self.is_executable = None
300 self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
301 self.file_data = self._get_file_data(base_file_id, base_revid)
302 self.file_stream = None
303 if self.file_id == base_file_id:
304 self.file_parents = [base_revid]
307 del self.inventory[base_file_id]
308 self.file_parents = []
311 def close_file(self, path, checksum):
312 if self.file_stream is not None:
313 self.file_stream.seek(0)
314 lines = osutils.split_lines(self.file_stream.read())
316 # Data didn't change or file is new
317 lines = osutils.split_lines(self.file_data)
319 actual_checksum = md5_strings(lines)
320 assert checksum is None or checksum == actual_checksum
322 self._store_file(self.file_id, lines, self.file_parents)
324 if self.file_id in self.inventory:
325 ie = self.inventory[self.file_id]
326 elif self.is_symlink:
327 ie = self.inventory.add_path(path, 'symlink', self.file_id)
329 ie = self.inventory.add_path(path, 'file', self.file_id)
330 ie.revision = self.revid
333 ie.symlink_target = lines[0][len("link "):]
338 ie.text_sha1 = osutils.sha_strings(lines)
339 ie.text_size = sum(map(len, lines))
340 if self.is_executable is not None:
341 ie.executable = self.is_executable
343 self.file_stream = None
345 def close_edit(self):
346 assert len(self._premature_deletes) == 0
347 self._finish_commit()
350 def apply_textdelta(self, file_id, base_checksum):
351 actual_checksum = md5.new(self.file_data).hexdigest(),
352 assert (base_checksum is None or base_checksum == actual_checksum,
353 "base checksum mismatch: %r != %r" % (base_checksum,
355 self.file_stream = StringIO()
356 return apply_txdelta_handler(StringIO(self.file_data),
357 self.file_stream, self.pool)
359 def _store_file(self, file_id, lines, parents):
360 raise NotImplementedError(self._store_file)
362 def _store_directory(self, file_id, parents):
363 raise NotImplementedError(self._store_directory)
365 def _get_file_data(self, file_id, revid):
366 raise NotImplementedError(self._get_file_data)
368 def _finish_commit(self):
369 raise NotImplementedError(self._finish_commit)
371 def abort_edit(self):
374 def _start_revision(self):
378 class WeaveRevisionBuildEditor(RevisionBuildEditor):
379 """Subversion commit editor that can write to a weave-based repository.
381 def __init__(self, source, target):
382 RevisionBuildEditor.__init__(self, source, target)
383 self.weave_store = target.weave_store
385 def _start_revision(self):
386 self.target.start_write_group()
388 def _store_directory(self, file_id, parents):
389 file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
390 if not file_weave.has_version(self.revid):
391 file_weave.add_lines(self.revid, parents, [])
393 def _get_file_data(self, file_id, revid):
394 file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
395 return file_weave.get_text(revid)
397 def _store_file(self, file_id, lines, parents):
398 file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
399 if not file_weave.has_version(self.revid):
400 file_weave.add_lines(self.revid, parents, lines)
402 def _finish_commit(self):
403 rev = self._get_revision(self.revid)
404 self.inventory.revision_id = self.revid
405 rev.inventory_sha1 = osutils.sha_string(
406 self.target.serialise_inventory(self.inventory))
407 self.target.add_revision(self.revid, rev, self.inventory)
408 self.target.commit_write_group()
410 def abort_edit(self):
411 self.target.abort_write_group()
414 class PackRevisionBuildEditor(WeaveRevisionBuildEditor):
415 """Revision Build Editor for Subversion that is specific for the packs API.
417 def __init__(self, source, target):
418 WeaveRevisionBuildEditor.__init__(self, source, target)
420 def _add_text_to_weave(self, file_id, new_lines, parents):
421 return self.target._packs._add_text_to_weave(file_id,
422 self.revid, new_lines, parents, nostore_sha=None,
425 def _store_directory(self, file_id, parents):
426 self._add_text_to_weave(file_id, [], parents)
428 def _store_file(self, file_id, lines, parents):
429 self._add_text_to_weave(file_id, lines, parents)
432 class CommitBuilderRevisionBuildEditor(RevisionBuildEditor):
433 """Revision Build Editor for Subversion that uses the CommitBuilder API.
435 def __init__(self, source, target):
436 RevisionBuildEditor.__init__(self, source, target)
437 raise NotImplementedError(self)
440 def get_revision_build_editor(repository):
441 """Obtain a RevisionBuildEditor for a particular target repository."""
442 if hasattr(repository, '_packs'):
443 return PackRevisionBuildEditor
444 return WeaveRevisionBuildEditor
447 class InterFromSvnRepository(InterRepository):
448 """Svn to any repository actions."""
450 _matching_repo_format = SvnRepositoryFormat()
453 def _get_repo_format_to_test():
457 """Find all revisions from the source repository that are not
458 yet in the target repository.
461 needed = filter(lambda x: not self.target.has_revision(x),
462 self.source.all_revision_ids())
464 (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
465 parents[revid] = self.source._mainline_revision_parent(branch,
467 return (needed, parents)
469 def _find_until(self, revision_id):
470 """Find all missing revisions until revision_id
472 :param revision_id: Stop revision
473 :return: Tuple with revisions missing and a dictionary with
474 parents for those revision.
478 (path, until_revnum, scheme) = self.source.lookup_revision_id(
482 for (branch, revnum) in self.source.follow_branch(path,
483 until_revnum, scheme):
484 revid = self.source.generate_revision_id(revnum, branch, str(scheme))
486 if prev_revid is not None:
487 parents[prev_revid] = revid
491 if not self.target.has_revision(revid):
494 parents[prev_revid] = None
495 return (needed, parents)
497 def copy_content(self, revision_id=None, pb=None):
498 """See InterRepository.copy_content."""
499 if revision_id == NULL_REVISION:
501 # Dictionary with paths as keys, revnums as values
503 # Loop over all the revnums until revision_id
504 # (or youngest_revnum) and call self.target.add_revision()
505 # or self.target.add_inventory() each time
506 self.target.lock_read()
508 if revision_id is None:
509 (needed, lhs_parent) = self._find_all()
511 (needed, lhs_parent) = self._find_until(revision_id)
519 self._copy_revisions_switch(needed, pb, lhs_parent)
521 def _copy_revisions_replay(self, revids, pb=none):
522 """Copy a set of related revisions using svn.ra.replay.
524 :param revids: Revision ids to copy.
525 :param pb: Optional progress bar
527 raise NotImplementedError(self._copy_revisions_replay)
529 def _copy_revisions_switch(self, revids, pb=None, lhs_parent=None):
530 """Copy a set of related revisions using svn.ra.switch.
532 :param revids: List of revision ids of revisions to copy,
534 :param pb: Optional progress bar.
536 repos_root = self.source.transport.get_svn_repos_root()
539 transport = self.source.transport
541 pb = ui.ui_factory.nested_progress_bar()
548 self.target.lock_write()
549 revbuildklass = get_revision_build_editor(self.target)
550 editor = revbuildklass(self.source, self.target)
553 for revid in reversed(revids):
554 pb.update('copying revision', num, len(revids))
556 parent_revid = lhs_parent[revid]
558 if parent_revid is None:
559 parent_inv = Inventory(root_id=None)
560 elif prev_revid != parent_revid:
561 parent_inv = self.target.get_inventory(parent_revid)
563 parent_inv = prev_inv
565 editor.start_revision(revid, parent_inv)
570 if parent_revid is None:
571 branch_url = urlutils.join(repos_root,
573 transport.reparent(branch_url)
574 assert transport.svn_url == branch_url.rstrip("/"), \
575 "Expected %r, got %r" % (transport.svn_url, branch_url)
576 reporter = transport.do_update(editor.revnum, True,
579 # Report status of existing paths
580 reporter.set_path("", editor.revnum, True, None, pool)
582 (parent_branch, parent_revnum, scheme) = \
583 self.source.lookup_revision_id(parent_revid)
584 transport.reparent(urlutils.join(repos_root, parent_branch))
586 if parent_branch != editor.branch_path:
587 reporter = transport.do_switch(editor.revnum, True,
588 urlutils.join(repos_root, editor.branch_path),
591 reporter = transport.do_update(editor.revnum, True, editor)
593 # Report status of existing paths
594 reporter.set_path("", parent_revnum, False, None, pool)
596 lock = transport.lock_read(".")
597 reporter.finish_report(pool)
603 prev_inv = editor.inventory
609 if nested_pb is not None:
611 self.source.transport.reparent_root()
613 def fetch(self, revision_id=None, pb=None):
614 """Fetch revisions. """
615 self.copy_content(revision_id=revision_id, pb=pb)
618 def is_compatible(source, target):
619 """Be compatible with SvnRepository."""
620 # FIXME: Also check target uses VersionedFile
621 return isinstance(source, SvnRepository) and target.supports_rich_root()