1 # Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 """Fetching revisions from Subversion repositories in batches."""
19 from bzrlib import osutils, ui, urlutils
20 from bzrlib.inventory import Inventory
21 from bzrlib.revision import Revision
22 from bzrlib.repository import InterRepository
23 from bzrlib.trace import mutter
26 from cStringIO import StringIO
29 from svn.core import Pool
32 from fileids import generate_file_id
33 from repository import (SvnRepository, SVN_PROP_BZR_ANCESTRY,
34 SVN_PROP_SVK_MERGE, SVN_PROP_BZR_MERGE,
35 SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO,
36 SVN_PROP_BZR_BRANCHING_SCHEME, SVN_PROP_BZR_REVISION_ID,
37 SVN_PROP_BZR_FILEIDS, SvnRepositoryFormat,
38 parse_revision_metadata, parse_merge_property)
39 from tree import apply_txdelta_handler
42 def md5_strings(strings):
43 """Return the MD5sum of the concatenation of strings.
45 :param strings: Strings to find the MD5sum of.
49 map(s.update, strings)
53 class RevisionBuildEditor(svn.delta.Editor):
54 """Implementation of the Subversion commit editor interface that builds a
57 def __init__(self, source, target, branch_path, prev_inventory, revid,
58 svn_revprops, id_map, scheme):
59 self.branch_path = branch_path
60 self.old_inventory = prev_inventory
61 self.inventory = copy(prev_inventory)
67 self.transact = target.get_transaction()
68 self.weave_store = target.weave_store
73 self._svn_revprops = svn_revprops
74 self._premature_deletes = set()
77 def _get_revision(self, revid):
78 """Creates the revision object.
80 :param revid: Revision id of the revision to create.
82 parent_ids = self.source.revision_parents(revid, self._bzr_merges)
84 # Commit SVN revision properties to a Revision object
85 rev = Revision(revision_id=revid, parent_ids=parent_ids)
87 if self._svn_revprops[2] is not None:
88 rev.timestamp = 1.0 * svn.core.secs_from_timestr(
89 self._svn_revprops[2], None) #date
91 rev.timestamp = 0 # FIXME: Obtain repository creation time
94 rev.committer = self._svn_revprops[0] # author
95 if rev.committer is None:
97 rev.message = self._svn_revprops[1] # message
100 parse_revision_metadata(self._revinfo, rev)
104 def open_root(self, base_revnum, baton):
105 if self.old_inventory.root is None:
106 # First time the root is set
107 file_id = generate_file_id(self.source, self.revid, "")
108 self.dir_baserev[file_id] = []
110 assert self.old_inventory.root.revision is not None
111 if self.id_map.has_key(""):
112 file_id = self.id_map[""]
114 file_id = self.old_inventory.root.file_id
115 self.dir_baserev[file_id] = [self.old_inventory.root.revision]
117 if self.inventory.root is not None and \
118 file_id == self.inventory.root.file_id:
119 ie = self.inventory.root
121 ie = self.inventory.add_path("", 'directory', file_id)
122 ie.revision = self.revid
125 def _get_existing_id(self, parent_id, path):
126 if self.id_map.has_key(path):
127 return self.id_map[path]
128 return self._get_old_id(parent_id, path)
130 def _get_old_id(self, parent_id, old_path):
131 return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
133 def _get_new_id(self, parent_id, new_path):
134 if self.id_map.has_key(new_path):
135 return self.id_map[new_path]
136 return generate_file_id(self.source, self.revid, new_path)
138 def delete_entry(self, path, revnum, parent_id, pool):
139 path = path.decode("utf-8")
140 if path in self._premature_deletes:
141 self._premature_deletes.remove(path)
143 self.inventory.remove_recursive_id(self._get_old_id(parent_id, path))
145 def close_directory(self, id):
146 self.inventory[id].revision = self.revid
148 # Only record root if the target repository supports it
149 if self.target.supports_rich_root:
150 file_weave = self.weave_store.get_weave_or_empty(id, self.transact)
151 if not file_weave.has_version(self.revid):
152 file_weave.add_lines(self.revid, self.dir_baserev[id], [])
154 def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum,
156 path = path.decode("utf-8")
157 file_id = self._get_new_id(parent_id, path)
159 self.dir_baserev[file_id] = []
160 if file_id in self.inventory:
161 # This directory was moved here from somewhere else, but the
162 # other location hasn't been removed yet.
163 if copyfrom_path is None:
164 # FIXME: This should never happen!
165 copyfrom_path = self.old_inventory.id2path(file_id)
166 assert copyfrom_path == self.old_inventory.id2path(file_id)
167 assert copyfrom_path not in self._premature_deletes
168 self._premature_deletes.add(copyfrom_path)
169 self.inventory.rename(file_id, parent_id, urlutils.basename(path))
170 ie = self.inventory[file_id]
172 ie = self.inventory.add_path(path, 'directory', file_id)
173 ie.revision = self.revid
177 def open_directory(self, path, parent_id, base_revnum, pool):
178 assert base_revnum >= 0
179 base_file_id = self._get_old_id(parent_id, path)
180 base_revid = self.old_inventory[base_file_id].revision
181 file_id = self._get_existing_id(parent_id, path)
182 if file_id == base_file_id:
183 self.dir_baserev[file_id] = [base_revid]
184 ie = self.inventory[file_id]
186 # Replace if original was inside this branch
187 # change id of base_file_id to file_id
188 ie = self.inventory[base_file_id]
189 for name in ie.children:
190 ie.children[name].parent_id = file_id
191 # FIXME: Don't touch inventory internals
192 del self.inventory._byid[base_file_id]
193 self.inventory._byid[file_id] = ie
195 self.dir_baserev[file_id] = []
196 ie.revision = self.revid
199 def change_dir_prop(self, id, name, value, pool):
200 if name == SVN_PROP_BZR_BRANCHING_SCHEME:
201 if id != self.inventory.root.file_id:
202 mutter('rogue %r on non-root directory' % name)
204 elif name == SVN_PROP_BZR_ANCESTRY+str(self.scheme):
205 if id != self.inventory.root.file_id:
206 mutter('rogue %r on non-root directory' % name)
209 self._bzr_merges = parse_merge_property(value.splitlines()[-1])
210 elif (name.startswith(SVN_PROP_BZR_ANCESTRY) or
211 name.startswith(SVN_PROP_BZR_REVISION_ID)):
213 elif name == SVN_PROP_SVK_MERGE:
214 self._svk_merges = None # Force Repository.revision_parents() to look it up
215 elif name == SVN_PROP_BZR_REVISION_INFO:
216 if id != self.inventory.root.file_id:
217 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
220 self._revinfo = value
221 elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
222 svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
223 svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
224 svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
225 svn.core.SVN_PROP_ENTRY_UUID,
226 svn.core.SVN_PROP_EXECUTABLE):
228 elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
230 elif name in (SVN_PROP_BZR_MERGE, SVN_PROP_BZR_FILEIDS):
232 elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
233 name.startswith(SVN_PROP_BZR_PREFIX)):
234 mutter('unsupported dir property %r' % name)
236 def change_file_prop(self, id, name, value, pool):
237 if name == svn.core.SVN_PROP_EXECUTABLE:
238 # You'd expect executable to match
239 # svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not
240 # how SVN behaves. It appears to consider the presence
241 # of the property sufficient to mark it executable.
242 self.is_executable = (value != None)
243 elif (name == svn.core.SVN_PROP_SPECIAL):
244 self.is_symlink = (value != None)
245 elif name == svn.core.SVN_PROP_ENTRY_COMMITTED_REV:
246 self.last_file_rev = int(value)
247 elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
248 svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
249 svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
250 svn.core.SVN_PROP_ENTRY_UUID,
251 svn.core.SVN_PROP_MIME_TYPE):
253 elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
255 elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
256 name.startswith(SVN_PROP_BZR_PREFIX)):
257 mutter('unsupported file property %r' % name)
259 def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
260 path = path.decode("utf-8")
261 self.is_symlink = False
262 self.is_executable = None
264 self.file_parents = []
265 self.file_stream = None
266 self.file_id = self._get_new_id(parent_id, path)
267 if self.file_id in self.inventory:
268 # This file was moved here from somewhere else, but the
269 # other location hasn't been removed yet.
270 if copyfrom_path is None:
271 # FIXME: This should never happen!
272 copyfrom_path = self.old_inventory.id2path(self.file_id)
273 assert copyfrom_path == self.old_inventory.id2path(self.file_id)
274 assert copyfrom_path not in self._premature_deletes
275 self._premature_deletes.add(copyfrom_path)
276 self.inventory.rename(self.file_id, parent_id, urlutils.basename(path))
279 def open_file(self, path, parent_id, base_revnum, pool):
280 base_file_id = self._get_old_id(parent_id, path)
281 base_revid = self.old_inventory[base_file_id].revision
282 self.file_id = self._get_existing_id(parent_id, path)
283 self.is_executable = None
284 self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
285 file_weave = self.weave_store.get_weave_or_empty(base_file_id,
287 self.file_data = file_weave.get_text(base_revid)
288 self.file_stream = None
289 if self.file_id == base_file_id:
290 self.file_parents = [base_revid]
293 del self.inventory[base_file_id]
294 self.file_parents = []
297 def close_file(self, path, checksum):
298 if self.file_stream is not None:
299 self.file_stream.seek(0)
300 lines = osutils.split_lines(self.file_stream.read())
302 # Data didn't change or file is new
303 lines = osutils.split_lines(self.file_data)
305 actual_checksum = md5_strings(lines)
306 assert checksum is None or checksum == actual_checksum
308 file_weave = self.weave_store.get_weave_or_empty(self.file_id,
310 if not file_weave.has_version(self.revid):
311 file_weave.add_lines(self.revid, self.file_parents, lines)
313 if self.file_id in self.inventory:
314 ie = self.inventory[self.file_id]
315 elif self.is_symlink:
316 ie = self.inventory.add_path(path, 'symlink', self.file_id)
318 ie = self.inventory.add_path(path, 'file', self.file_id)
319 ie.revision = self.revid
322 ie.symlink_target = lines[0][len("link "):]
327 ie.text_sha1 = osutils.sha_strings(lines)
328 ie.text_size = sum(map(len, lines))
329 if self.is_executable is not None:
330 ie.executable = self.is_executable
332 self.file_stream = None
334 def close_edit(self):
335 assert len(self._premature_deletes) == 0
336 rev = self._get_revision(self.revid)
337 self.inventory.revision_id = self.revid
338 rev.inventory_sha1 = osutils.sha_string(
339 bzrlib.xml5.serializer_v5.write_inventory_to_string(
341 self.target.add_revision(self.revid, rev, self.inventory)
344 def abort_edit(self):
347 def apply_textdelta(self, file_id, base_checksum):
348 actual_checksum = md5.new(self.file_data).hexdigest(),
349 assert (base_checksum is None or base_checksum == actual_checksum,
350 "base checksum mismatch: %r != %r" % (base_checksum,
352 self.file_stream = StringIO()
353 return apply_txdelta_handler(StringIO(self.file_data),
354 self.file_stream, self.pool)
357 class InterFromSvnRepository(InterRepository):
358 """Svn to any repository actions."""
360 _matching_repo_format = SvnRepositoryFormat()
363 def _get_repo_format_to_test():
368 needed = filter(lambda x: not self.target.has_revision(x),
369 self.source.all_revision_ids())
371 (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
372 parents[revid] = self.source._mainline_revision_parent(branch,
374 return (needed, parents)
376 def _find_until(self, revision_id):
377 """Find all missing revisions until revision_id
379 :param revision_id: Stop revision
380 :return: Tuple with revisions missing and a dictionary with
381 parents for those revision.
385 (path, until_revnum, scheme) = self.source.lookup_revision_id(
389 for (branch, revnum) in self.source.follow_branch(path,
390 until_revnum, scheme):
391 revid = self.source.generate_revision_id(revnum, branch, str(scheme))
393 if prev_revid is not None:
394 parents[prev_revid] = revid
398 if not self.target.has_revision(revid):
401 parents[prev_revid] = None
402 return (needed, parents)
404 def copy_content(self, revision_id=None, pb=None):
405 """See InterRepository.copy_content."""
406 # Dictionary with paths as keys, revnums as values
408 # Loop over all the revnums until revision_id
409 # (or youngest_revnum) and call self.target.add_revision()
410 # or self.target.add_inventory() each time
411 self.target.lock_read()
413 if revision_id is None:
414 (needed, parents) = self._find_all()
416 (needed, parents) = self._find_until(revision_id)
424 repos_root = self.source.transport.get_repos_root()
427 transport = self.source.transport
428 self.target.lock_write()
430 pb = ui.ui_factory.nested_progress_bar()
437 for revid in reversed(needed):
438 (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
439 pb.update('copying revision', num, len(needed))
441 parent_revid = parents[revid]
443 if parent_revid is None:
444 parent_inv = Inventory(root_id=None)
445 elif prev_revid != parent_revid:
446 parent_inv = self.target.get_inventory(parent_revid)
448 assert prev_inv is not None
449 parent_inv = prev_inv
451 changes = self.source._log.get_revision_paths(revnum, branch)
452 renames = self.source.revision_fileid_renames(revid)
453 id_map = self.source.transform_fileid_map(self.source.uuid,
454 revnum, branch, changes, renames, scheme)
456 editor = RevisionBuildEditor(self.source, self.target, branch,
458 self.source._log.get_revision_info(revnum),
462 edit, edit_baton = svn.delta.make_editor(editor, pool)
464 if parent_revid is None:
465 branch_url = urlutils.join(repos_root, branch)
466 transport.reparent(branch_url)
467 assert transport.svn_url == branch_url.rstrip("/"), \
468 "Expected %r, got %r" % (transport.svn_url, branch_url)
469 reporter = transport.do_update(
470 revnum, True, edit, edit_baton, pool)
472 # Report status of existing paths
473 reporter.set_path("", revnum, True, None, pool)
475 (parent_branch, parent_revnum, scheme) = \
476 self.source.lookup_revision_id(parent_revid)
477 transport.reparent(urlutils.join(repos_root, parent_branch))
479 if parent_branch != branch:
480 reporter = transport.do_switch(
482 urlutils.join(repos_root, branch),
483 edit, edit_baton, pool)
485 reporter = transport.do_update(
486 revnum, True, edit, edit_baton, pool)
488 # Report status of existing paths
489 reporter.set_path("", parent_revnum, False, None, pool)
491 lock = transport.lock_read(".")
492 reporter.finish_report(pool)
495 prev_inv = editor.inventory
501 if nested_pb is not None:
503 self.source.transport.reparent_root()
505 def fetch(self, revision_id=None, pb=None):
506 """Fetch revisions. """
507 self.copy_content(revision_id=revision_id, pb=pb)
510 def is_compatible(source, target):
511 """Be compatible with SvnRepository."""
512 # FIXME: Also check target uses VersionedFile
513 return isinstance(source, SvnRepository) and \
514 target.supports_rich_root()