1 # Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 """Fetching revisions from Subversion repositories in batches."""
19 from bzrlib import osutils, ui, urlutils
20 from bzrlib.inventory import Inventory
21 from bzrlib.revision import Revision, NULL_REVISION
22 from bzrlib.repository import InterRepository
23 from bzrlib.trace import mutter
26 from cStringIO import StringIO
29 from svn.core import Pool
32 from fileids import generate_file_id
33 from repository import (SvnRepository, SVN_PROP_BZR_ANCESTRY,
34 SVN_PROP_SVK_MERGE, SVN_PROP_BZR_MERGE,
35 SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO,
36 SVN_PROP_BZR_BRANCHING_SCHEME, SVN_PROP_BZR_REVISION_ID,
37 SVN_PROP_BZR_FILEIDS, SvnRepositoryFormat,
38 parse_revision_metadata, parse_merge_property)
39 from tree import apply_txdelta_handler
42 def md5_strings(strings):
43 """Return the MD5sum of the concatenation of strings.
45 :param strings: Strings to find the MD5sum of.
49 map(s.update, strings)
53 class RevisionBuildEditor(svn.delta.Editor):
54 """Implementation of the Subversion commit editor interface that builds a
57 def __init__(self, source, target, branch_path, prev_inventory, revid,
58 svn_revprops, id_map, scheme):
59 self.branch_path = branch_path
60 self.old_inventory = prev_inventory
61 self.inventory = copy(prev_inventory)
67 self.transact = target.get_transaction()
68 self.weave_store = target.weave_store
73 self._svn_revprops = svn_revprops
74 self._premature_deletes = set()
77 def _get_revision(self, revid):
78 """Creates the revision object.
80 :param revid: Revision id of the revision to create.
82 parent_ids = self.source.revision_parents(revid, self._bzr_merges)
84 # Commit SVN revision properties to a Revision object
85 rev = Revision(revision_id=revid, parent_ids=parent_ids)
87 if self._svn_revprops[2] is not None:
88 rev.timestamp = 1.0 * svn.core.secs_from_timestr(
89 self._svn_revprops[2], None) #date
91 rev.timestamp = 0 # FIXME: Obtain repository creation time
94 rev.committer = self._svn_revprops[0] # author
95 if rev.committer is None:
97 rev.message = self._svn_revprops[1] # message
100 parse_revision_metadata(self._revinfo, rev)
104 def open_root(self, base_revnum, baton):
105 if self.old_inventory.root is None:
106 # First time the root is set
107 file_id = generate_file_id(self.source, self.revid, "")
108 self.dir_baserev[file_id] = []
110 assert self.old_inventory.root.revision is not None
111 if self.id_map.has_key(""):
112 file_id = self.id_map[""]
114 file_id = self.old_inventory.root.file_id
115 self.dir_baserev[file_id] = [self.old_inventory.root.revision]
117 if self.inventory.root is not None and \
118 file_id == self.inventory.root.file_id:
119 ie = self.inventory.root
121 ie = self.inventory.add_path("", 'directory', file_id)
122 ie.revision = self.revid
125 def _get_existing_id(self, parent_id, path):
126 if self.id_map.has_key(path):
127 return self.id_map[path]
128 return self._get_old_id(parent_id, path)
130 def _get_old_id(self, parent_id, old_path):
131 return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
133 def _get_new_id(self, parent_id, new_path):
134 if self.id_map.has_key(new_path):
135 return self.id_map[new_path]
136 return generate_file_id(self.source, self.revid, new_path)
138 def delete_entry(self, path, revnum, parent_id, pool):
139 path = path.decode("utf-8")
140 if path in self._premature_deletes:
141 self._premature_deletes.remove(path)
143 self.inventory.remove_recursive_id(self._get_old_id(parent_id, path))
145 def close_directory(self, id):
146 self.inventory[id].revision = self.revid
148 # Only record root if the target repository supports it
149 if self.target.supports_rich_root:
150 file_weave = self.weave_store.get_weave_or_empty(id, self.transact)
151 if not file_weave.has_version(self.revid):
152 file_weave.add_lines(self.revid, self.dir_baserev[id], [])
154 def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum,
156 path = path.decode("utf-8")
157 file_id = self._get_new_id(parent_id, path)
159 self.dir_baserev[file_id] = []
160 if file_id in self.inventory:
161 # This directory was moved here from somewhere else, but the
162 # other location hasn't been removed yet.
163 if copyfrom_path is None:
164 # FIXME: This should never happen!
165 copyfrom_path = self.old_inventory.id2path(file_id)
166 assert copyfrom_path == self.old_inventory.id2path(file_id)
167 assert copyfrom_path not in self._premature_deletes
168 self._premature_deletes.add(copyfrom_path)
169 self.inventory.rename(file_id, parent_id, urlutils.basename(path))
170 ie = self.inventory[file_id]
172 ie = self.inventory.add_path(path, 'directory', file_id)
173 ie.revision = self.revid
177 def open_directory(self, path, parent_id, base_revnum, pool):
178 assert base_revnum >= 0
179 base_file_id = self._get_old_id(parent_id, path)
180 base_revid = self.old_inventory[base_file_id].revision
181 file_id = self._get_existing_id(parent_id, path)
182 if file_id == base_file_id:
183 self.dir_baserev[file_id] = [base_revid]
184 ie = self.inventory[file_id]
186 # Replace if original was inside this branch
187 # change id of base_file_id to file_id
188 ie = self.inventory[base_file_id]
189 for name in ie.children:
190 ie.children[name].parent_id = file_id
191 # FIXME: Don't touch inventory internals
192 del self.inventory._byid[base_file_id]
193 self.inventory._byid[file_id] = ie
195 self.dir_baserev[file_id] = []
196 ie.revision = self.revid
199 def change_dir_prop(self, id, name, value, pool):
200 if name == SVN_PROP_BZR_BRANCHING_SCHEME:
201 if id != self.inventory.root.file_id:
202 mutter('rogue %r on non-root directory' % name)
204 elif name == SVN_PROP_BZR_ANCESTRY+str(self.scheme):
205 if id != self.inventory.root.file_id:
206 mutter('rogue %r on non-root directory' % name)
209 self._bzr_merges = parse_merge_property(value.splitlines()[-1])
210 elif (name.startswith(SVN_PROP_BZR_ANCESTRY) or
211 name.startswith(SVN_PROP_BZR_REVISION_ID)):
213 elif name == SVN_PROP_SVK_MERGE:
214 self._svk_merges = None # Force Repository.revision_parents() to look it up
215 elif name == SVN_PROP_BZR_REVISION_INFO:
216 if id != self.inventory.root.file_id:
217 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
220 self._revinfo = value
221 elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
222 svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
223 svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
224 svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
225 svn.core.SVN_PROP_ENTRY_UUID,
226 svn.core.SVN_PROP_EXECUTABLE):
228 elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
230 elif name in (SVN_PROP_BZR_MERGE, SVN_PROP_BZR_FILEIDS):
232 elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
233 name.startswith(SVN_PROP_BZR_PREFIX)):
234 mutter('unsupported dir property %r' % name)
236 def change_file_prop(self, id, name, value, pool):
237 if name == svn.core.SVN_PROP_EXECUTABLE:
238 # You'd expect executable to match
239 # svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not
240 # how SVN behaves. It appears to consider the presence
241 # of the property sufficient to mark it executable.
242 self.is_executable = (value != None)
243 elif (name == svn.core.SVN_PROP_SPECIAL):
244 self.is_symlink = (value != None)
245 elif name == svn.core.SVN_PROP_ENTRY_COMMITTED_REV:
246 self.last_file_rev = int(value)
247 elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
248 svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
249 svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
250 svn.core.SVN_PROP_ENTRY_UUID,
251 svn.core.SVN_PROP_MIME_TYPE):
253 elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
255 elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
256 name.startswith(SVN_PROP_BZR_PREFIX)):
257 mutter('unsupported file property %r' % name)
259 def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
260 path = path.decode("utf-8")
261 self.is_symlink = False
262 self.is_executable = None
264 self.file_parents = []
265 self.file_stream = None
266 self.file_id = self._get_new_id(parent_id, path)
267 if self.file_id in self.inventory:
268 # This file was moved here from somewhere else, but the
269 # other location hasn't been removed yet.
270 if copyfrom_path is None:
271 # FIXME: This should never happen!
272 copyfrom_path = self.old_inventory.id2path(self.file_id)
273 assert copyfrom_path == self.old_inventory.id2path(self.file_id)
274 assert copyfrom_path not in self._premature_deletes
275 self._premature_deletes.add(copyfrom_path)
276 self.inventory.rename(self.file_id, parent_id, urlutils.basename(path))
279 def open_file(self, path, parent_id, base_revnum, pool):
280 base_file_id = self._get_old_id(parent_id, path)
281 base_revid = self.old_inventory[base_file_id].revision
282 self.file_id = self._get_existing_id(parent_id, path)
283 self.is_executable = None
284 self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
285 file_weave = self.weave_store.get_weave_or_empty(base_file_id,
287 self.file_data = file_weave.get_text(base_revid)
288 self.file_stream = None
289 if self.file_id == base_file_id:
290 self.file_parents = [base_revid]
293 del self.inventory[base_file_id]
294 self.file_parents = []
297 def close_file(self, path, checksum):
298 if self.file_stream is not None:
299 self.file_stream.seek(0)
300 lines = osutils.split_lines(self.file_stream.read())
302 # Data didn't change or file is new
303 lines = osutils.split_lines(self.file_data)
305 actual_checksum = md5_strings(lines)
306 assert checksum is None or checksum == actual_checksum
308 file_weave = self.weave_store.get_weave_or_empty(self.file_id,
310 if not file_weave.has_version(self.revid):
311 file_weave.add_lines(self.revid, self.file_parents, lines)
313 if self.file_id in self.inventory:
314 ie = self.inventory[self.file_id]
315 elif self.is_symlink:
316 ie = self.inventory.add_path(path, 'symlink', self.file_id)
318 ie = self.inventory.add_path(path, 'file', self.file_id)
319 ie.revision = self.revid
322 ie.symlink_target = lines[0][len("link "):]
327 ie.text_sha1 = osutils.sha_strings(lines)
328 ie.text_size = sum(map(len, lines))
329 if self.is_executable is not None:
330 ie.executable = self.is_executable
332 self.file_stream = None
334 def close_edit(self):
335 assert len(self._premature_deletes) == 0
336 rev = self._get_revision(self.revid)
337 self.inventory.revision_id = self.revid
338 rev.inventory_sha1 = osutils.sha_string(
339 bzrlib.xml5.serializer_v5.write_inventory_to_string(
341 self.target.start_write_group()
343 self.target.add_revision(self.revid, rev, self.inventory)
344 self.target.commit_write_group()
346 self.target.abort_write_group()
349 def abort_edit(self):
352 def apply_textdelta(self, file_id, base_checksum):
353 actual_checksum = md5.new(self.file_data).hexdigest(),
354 assert (base_checksum is None or base_checksum == actual_checksum,
355 "base checksum mismatch: %r != %r" % (base_checksum,
357 self.file_stream = StringIO()
358 return apply_txdelta_handler(StringIO(self.file_data),
359 self.file_stream, self.pool)
362 class InterFromSvnRepository(InterRepository):
363 """Svn to any repository actions."""
365 _matching_repo_format = SvnRepositoryFormat()
368 def _get_repo_format_to_test():
373 needed = filter(lambda x: not self.target.has_revision(x),
374 self.source.all_revision_ids())
376 (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
377 parents[revid] = self.source._mainline_revision_parent(branch,
379 return (needed, parents)
381 def _find_until(self, revision_id):
382 """Find all missing revisions until revision_id
384 :param revision_id: Stop revision
385 :return: Tuple with revisions missing and a dictionary with
386 parents for those revision.
390 (path, until_revnum, scheme) = self.source.lookup_revision_id(
394 for (branch, revnum) in self.source.follow_branch(path,
395 until_revnum, scheme):
396 revid = self.source.generate_revision_id(revnum, branch, str(scheme))
398 if prev_revid is not None:
399 parents[prev_revid] = revid
403 if not self.target.has_revision(revid):
406 parents[prev_revid] = None
407 return (needed, parents)
409 def copy_content(self, revision_id=None, pb=None):
410 """See InterRepository.copy_content."""
411 if revision_id == NULL_REVISION:
413 # Dictionary with paths as keys, revnums as values
415 # Loop over all the revnums until revision_id
416 # (or youngest_revnum) and call self.target.add_revision()
417 # or self.target.add_inventory() each time
418 self.target.lock_read()
420 if revision_id is None:
421 (needed, parents) = self._find_all()
423 (needed, parents) = self._find_until(revision_id)
431 repos_root = self.source.transport.get_repos_root()
434 transport = self.source.transport
435 self.target.lock_write()
437 pb = ui.ui_factory.nested_progress_bar()
444 for revid in reversed(needed):
445 (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
446 pb.update('copying revision', num, len(needed))
448 parent_revid = parents[revid]
450 if parent_revid is None:
451 parent_inv = Inventory(root_id=None)
452 elif prev_revid != parent_revid:
453 parent_inv = self.target.get_inventory(parent_revid)
455 assert prev_inv is not None
456 parent_inv = prev_inv
458 changes = self.source._log.get_revision_paths(revnum, branch)
459 renames = self.source.revision_fileid_renames(revid)
460 id_map = self.source.transform_fileid_map(self.source.uuid,
461 revnum, branch, changes, renames, scheme)
463 editor = RevisionBuildEditor(self.source, self.target, branch,
465 self.source._log.get_revision_info(revnum),
469 edit, edit_baton = svn.delta.make_editor(editor, pool)
471 if parent_revid is None:
472 branch_url = urlutils.join(repos_root, branch)
473 transport.reparent(branch_url)
474 assert transport.svn_url == branch_url.rstrip("/"), \
475 "Expected %r, got %r" % (transport.svn_url, branch_url)
476 reporter = transport.do_update(
477 revnum, True, edit, edit_baton, pool)
479 # Report status of existing paths
480 reporter.set_path("", revnum, True, None, pool)
482 (parent_branch, parent_revnum, scheme) = \
483 self.source.lookup_revision_id(parent_revid)
484 transport.reparent(urlutils.join(repos_root, parent_branch))
486 if parent_branch != branch:
487 reporter = transport.do_switch(
489 urlutils.join(repos_root, branch),
490 edit, edit_baton, pool)
492 reporter = transport.do_update(
493 revnum, True, edit, edit_baton, pool)
495 # Report status of existing paths
496 reporter.set_path("", parent_revnum, False, None, pool)
498 lock = transport.lock_read(".")
499 reporter.finish_report(pool)
502 prev_inv = editor.inventory
508 if nested_pb is not None:
510 self.source.transport.reparent_root()
512 def fetch(self, revision_id=None, pb=None):
513 """Fetch revisions. """
514 self.copy_content(revision_id=revision_id, pb=pb)
517 def is_compatible(source, target):
518 """Be compatible with SvnRepository."""
519 # FIXME: Also check target uses VersionedFile
520 return isinstance(source, SvnRepository) and \
521 target.supports_rich_root()