1 #!/usr/bin/env python2.4
3 # Copyright (C) 2005 by Canonical Ltd
4 # Copyright (C) 2005-2006 by Jelmer Vernooij
6 # Written by Gustavo Niemeyer <gustavo@niemeyer.net>
7 # Bugfixes and additional features by Jelmer Vernooij <jelmer@samba.org>
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 import svn.ra, svn.core, svn.repos
31 from svn.core import SubversionException
32 from cStringIO import StringIO
34 from bzrlib.plugin import load_plugins
37 logger = logging.getLogger("bzr")
38 logger.addHandler(logging.FileHandler("/dev/null"))
40 from bzrlib.bzrdir import BzrDir
41 from bzrlib.branch import Branch
42 import bzrlib.osutils as osutils
43 from bzrlib.progress import ProgressBar
44 from bzrlib.workingtree import WorkingTree
50 sys.setrecursionlimit(10000)
53 if hasattr(get_logger, "initialized"):
54 logger = logging.getLogger("svn2bzr")
56 get_logger.initialized = True
57 class Formatter(logging.Formatter):
58 def format(self, record):
59 if record.levelno != logging.INFO:
60 record.prefix = record.levelname.lower()+": "
63 return logging.Formatter.format(self, record)
64 formatter = Formatter("%(prefix)s%(message)s")
65 handler = logging.StreamHandler(sys.stderr)
66 handler.setFormatter(formatter)
67 #logger = logging.getLogger("bzr")
68 #logger.addHandler(handler)
69 #logger.setLevel(logging.ERROR)
70 logger = logging.getLogger("svn2bzr")
71 logger.addHandler(handler)
72 logger.setLevel(logging.INFO)
76 class Error(Exception): pass
78 class BranchCreator(object):
80 def __init__(self, ra, root=None, prefix=None, log=None, check=False):
83 self._root = os.path.realpath(root)
85 self._prefix = prefix.strip("/")
86 self._prefix_dir = self._prefix+"/"
89 self._prefix_dir = None
94 self._log = log or get_logger()
96 def _new_branch(self, branch, wt):
97 # Ugly, but let's wait until that API stabilizes. Right
98 # now branch.working_tree() will open the branch again.
99 self._log.debug("Creating new branch: %s" % branch.base)
101 config = bzrlib.config.LocationConfig(branch.base)
102 config.set_user_option('create_signatures', 'never')
104 def _remove_branch(self, branch):
105 raise NotImplementedError
107 def _get_branch(self, path):
108 raise NotImplementedError
110 def _get_all_branches(self):
111 raise NotImplementedError
113 def _get_branch_path(self, path):
114 path = self.unprefix(path)
115 if self.is_good(path):
116 branch = self._get_branch(path)
118 abspath = os.path.join(self._root, path)
119 return branch, branch.__wt.relpath(abspath)
122 def add_filter(self, include, regexp):
123 self._filter.append((include, re.compile(regexp)))
125 def is_good(self, path):
126 for include, pattern in self._filter:
127 if pattern.match(path):
131 def unprefix(self, path):
134 elif path == self._prefix:
136 elif path.startswith(self._prefix):
137 return path[len(self._prefix)+1:]
141 def add_file(self, path, content):
142 branch, path_branch = self._get_branch_path(path)
144 abspath = branch.__wt.abspath(path_branch)
145 self._log.debug("Adding file: %s" % abspath)
146 open(abspath, "w").write(content)
147 branch.__wt.add(path_branch)
148 self._changed[branch.__wt] = True
150 def change_file(self, path, content):
151 branch, path_branch = self._get_branch_path(path)
153 abspath = branch.__wt.abspath(path_branch)
154 self._log.debug("Changing file: %s" % abspath)
155 open(abspath, "w").write(content)
156 self._changed[branch.__wt] = True
158 def copy_file(self, orig_path, orig_revno, dest_path, content):
159 dest_branch, dest_path_branch = self._get_branch_path(dest_path)
161 abspath = dest_branch.__wt.abspath(dest_path_branch)
162 self._log.debug("Copying file: %s at %d to %s" %
163 (orig_path, orig_revno, abspath))
164 open(abspath, "w").write(content)
165 dest_branch.__wt.add(dest_path_branch)
166 self._changed[dest_branch.__wt] = True
168 def add_dir(self, path):
169 branch, path_branch = self._get_branch_path(path)
170 # The path test below checks if we got an empty path,
171 # which happens when adding the self._prefix directory itself,
172 # and shouldn't be considered since creating that directory
173 # must have been done by _get_branch().
174 if branch and path_branch:
175 # Due to filtering, the directory may be added
176 # without adding parent directories.
177 abspath = branch.__wt.abspath(path_branch)
178 self._log.debug("Adding dir: %s" % abspath)
179 if os.path.isdir(os.path.dirname(abspath)):
181 branch.__wt.add(path_branch)
183 path_parts = path_branch.split('/')
184 dir = branch._transport.local_abspath('.')
185 for part in path_parts:
186 dir = "%s/%s" % (dir, part)
187 if not os.path.isdir(dir):
189 branch.__wt.add(branch.__wt.relpath(dir))
190 self._changed[branch.__wt] = True
192 def copy_dir(self, orig_path, orig_revno, dest_path):
193 # Inside that method we cannot assume that dest_branch
194 # is a valid branch, since we may be interested just in
195 # part of the copy being made, for which a branch does
198 # To better understand what each path means, let's assume that
199 # a copy of "trunk/foo" is being made to "branches/mine/foo",
200 # "trunk" and "branches/mine" are different branches", and that
201 # "trunk/foo/bar" exists and is being copied during the current
204 # orig_path = "trunk/foo"
205 # dest_path = "branches/mine/foo"
206 # dest_path_branch = "foo"
207 # path = "trunk/foo/bar"
209 # copy_dest_path = "branches/mine/foo/bar"
213 dest_branch, dest_path_branch = self._get_branch_path(dest_path)
214 (dirents, _, _) = svn.ra.get_dir2(self._ra, orig_path.encode('utf8'), orig_revno, 0)
217 path = "%s/%s" % (orig_path, name)
218 tail = path[len(orig_path)+1:]
219 copy_dest_path = os.path.join(dest_path, tail)
220 node_kind = svn.ra.check_path(self._ra, path, orig_revno)
221 if node_kind == svn.core.svn_node_file:
223 svn.ra.get_file(self._ra, path.encode('utf8'), orig_revno,
225 self.add_file(copy_dest_path, stream.read())
226 elif node_kind == svn.core.svn_node_dir:
227 self.copy_dir(copy_dest_path)
230 def set_ignore_glob(self, path, globs):
231 from bzrlib.atomicfile import AtomicFile
232 branch, path_branch = self._get_branch_path(path)
235 self._log.debug("Ignoring out-of-branch ignore settings on %s" % path)
238 # Obtain list of existing ignores
239 ifn = branch.__wt.abspath('.bzrignore')
241 if os.path.exists(ifn):
243 existing = f.read().decode('utf-8').splitlines()
249 igns = list(existing)
251 # Figure out which elements are already there
253 if os.path.dirname(ign) == path_branch:
254 if os.path.basename(ign) in globs:
255 globs.remove(os.path.basename(ign))
257 self._log.debug("Removing ignore entry '%s'" % ign)
260 # The remaining items didn't exist yet
262 if ign.strip() != "":
263 entry = os.path.join(path_branch, ign)
264 self._log.debug("Adding ignore entry '%s'" % entry)
267 f = AtomicFile(ifn, 'wt')
270 f.write("%s\n" % i.encode('utf-8'))
273 if not branch.__wt.path2id('.bzrignore'):
274 branch.__wt.add('.bzrignore')
276 self._changed[branch.__wt] = True
278 def set_executable(self, path, executable):
279 branch, path_branch = self._get_branch_path(path)
281 self._log.debug("Ignoring out-of-branch executable settings on %s" % path)
284 abspath = branch.__wt.abspath(path_branch)
285 mode = os.stat(abspath).st_mode
290 os.chmod(abspath, mode)
291 self._changed[branch.__wt] = True
293 def copy(self, orig_path, orig_revno, dest_path, content):
294 node_kind = svn.ra.check_path(self._ra, orig_path, orig_revno)
295 if node_kind == svn.core.svn_node_dir:
296 self.copy_dir(orig_path, orig_revno, dest_path)
298 self.copy_file(orig_path, orig_revno, dest_path, content)
300 def move(self, orig_path, orig_revno, dest_path, content):
301 orig_branch, orig_path_branch = self._get_branch_path(orig_path)
302 dest_branch, dest_path_branch = self._get_branch_path(dest_path)
303 if not dest_branch or orig_branch != dest_branch:
304 self.remove(orig_path)
305 self.copy(orig_path, orig_revno, dest_path, content)
307 orig_abspath = orig_branch.__wt.abspath(orig_path_branch)
308 if not os.path.exists(orig_abspath):
309 from bzrlib.transform import revert
310 # Was previously removed, as usual in svn.
311 orig_branch.__wt.revert([orig_path_branch], backups=False)
313 self._log.debug("Moving: %s to %s" %
315 dest_branch.__wt.abspath(dest_path_branch)))
316 orig_branch.__wt.rename_one(orig_path_branch, dest_path_branch)
317 self._changed[orig_branch.__wt] = True
319 def remove(self, path):
320 branch, path_branch = self._get_branch_path(path)
322 abspath = branch.__wt.abspath(path_branch)
324 # Do we want to remove the branch or its content?
325 self._log.debug("Removing branch: %s" % abspath)
326 self._remove_branch(branch)
327 elif os.path.exists(abspath):
328 if os.path.isdir(abspath):
329 self._log.debug("Removing dir: %s" % abspath)
330 shutil.rmtree(abspath)
331 self.set_ignore_glob(abspath, [])
332 # If the directory parent is filtered, no one is
333 # taking care of it, so remove it as well.
334 abspath = os.path.dirname(abspath)
335 while abspath != branch._transport.local_abspath('.'):
336 relpath = abspath[len(branch._transport.local_abspath('.'))+1:]
337 if self.is_good(relpath):
343 elif os.path.isfile(abspath):
344 self._log.debug("Removing file: %s" % abspath)
346 branch.__wt.remove(path_branch)
347 self._changed[branch.__wt] = True
349 def check_uncommitted(self, wt):
350 from bzrlib.delta import compare_trees
352 delta = compare_trees(wt.basis_tree(), wt)
354 assert(delta.modified == [])
355 assert(delta.added == [])
356 assert(delta.removed == [])
357 assert(delta.renamed == [])
359 def commit(self, revno, message, committer, revprops, timestamp):
361 self._log.debug("Committing revision %d" % revno)
362 for wt in self._changed:
363 # TODO: Set revision id
364 wt.commit(message, committer=committer, timestamp=timestamp,
365 revprops=revprops, verbose=False)
367 self.check_uncommitted(wt)
369 self._log.debug("Nothing changed in revision %d" % revno)
370 self._revisions[revno] = revs = {}
371 for (path, branch) in self._branches.items():
372 revs[path] = (branch, branch.last_revision())
373 self._changed.clear()
380 revprops = svn.ra.rev_proplist(self._ra, revno)
382 # Parse timestamps like 2005-09-23T17:52:33.719737Z
383 time_tokens = revprops["svn:date"].split(".")
384 parsed_time = time.strptime(time_tokens[0],
386 timestamp = time.mktime(parsed_time)
387 timestamp += float(time_tokens[1][:-1])
389 self.commit(revno, revprops.get("svn:log", ""),
390 committer=revprops["svn:author"],
391 revprops=revprops, timestamp=timestamp)
397 def rcvr(paths, revno, author, date, message, pool):
401 pb.update("fetching revision info", revno, latest_revnum)
405 latest_revnum = svn.ra.get_latest_revnum(self._ra)
409 svn.ra.get_log(self._ra, ['/'], 0, latest_revnum, 0, True, False, rcvr)
416 pb.update('processing revision', revno, latest_revnum)
417 paths = logs[revno].keys()
419 for node_path in paths:
420 node = logs[revno][node_path]
421 node_path = node_path.lstrip("/")
422 if self.unprefix(node_path) is None:
425 node_kind = svn.ra.check_path(self._ra, node_path.encode('utf8'), revno)
427 assert (node_kind in (svn.core.svn_node_file, svn.core.svn_node_dir) or (node.action == 'D' and node_kind == svn.core.svn_node_none))
428 assert node.action in ('A', 'M', 'D', 'R')
430 if node_kind == svn.core.svn_node_file:
432 (_, props) = svn.ra.get_file(self._ra, node_path.encode('utf8'), revno, stream)
434 content = stream.read()
435 elif node_kind == svn.core.svn_node_dir:
436 (_, _, props) = svn.ra.get_dir2(self._ra, node_path.encode('utf8'), revno, 0)
438 elif node_kind == svn.core.svn_node_none:
441 if node.action == 'D':
442 self.remove(node_path)
443 deleted[node_path] = True
445 elif node.action == 'A' or node.action == 'R':
447 if node.action == 'R':
448 self.remove(node_path)
450 if node.copyfrom_path:
451 copy_path = node.copyfrom_path.lstrip("/")
452 copy_revno = node.copyfrom_rev
454 if copy_path in deleted and copy_revno == revno-1:
455 self.move(copy_path, copy_revno, node_path, content)
456 elif node_kind == svn.core.svn_node_file:
457 self.copy_file(copy_path, copy_revno, node_path, content)
459 self.copy_dir(copy_path, copy_revno, node_path)
461 elif node_kind == svn.core.svn_node_file:
462 self.add_file(node_path, content)
464 elif node_kind == svn.core.svn_node_dir:
465 self.add_dir(node_path)
468 self.change_file(node_path, content)
470 if os.path.isfile(os.path.join(self._root, node_path)):
471 if props.has_key('svn:executable') and \
472 props['svn:executable'].strip() == '*':
473 self.set_executable(node_path, True)
475 self.set_executable(node_path, False)
477 if props.has_key('svn:ignore'):
478 self.set_ignore_glob(node_path, \
479 props['svn:ignore'].splitlines())
486 class SingleBranchCreator(BranchCreator):
488 def __init__(self, ra, root, prefix=None, log=None, check=False):
489 BranchCreator.__init__(self, ra, root, prefix, log, check)
492 def _remove_branch(self, branch):
494 shutil.rmtree(self._root)
496 def _get_branch(self, path):
498 wt = BzrDir.create_standalone_workingtree(self._root)
499 self._branch = wt.branch
500 self._new_branch(self._branch, wt)
503 def _get_all_branches(self):
504 if self._branch is None:
507 return [self._branch]
510 class DynamicBranchCreator(BranchCreator):
513 def __init__(self, ra, root, prefix=None, log=None, check=False):
514 BranchCreator.__init__(self, ra, root, prefix, log, check)
515 BzrDir.create_repository(self._root, shared=True)
517 def _remove_branch(self, branch):
518 # Retire a branch to the attic
519 rel_path = branch._transport.local_abspath('.')[len(self._root)+1:].rstrip("/")
520 attic_branch = "%s-r%d" % (os.path.basename(rel_path), self._revisions.keys()[-1])
521 branch_top = os.path.join(self._root, DynamicBranchCreator.ATTICDIR, os.path.dirname(rel_path))
522 self._log.debug("Retiring %s to %s" % (rel_path, attic_branch))
523 if not os.path.isdir(branch_top):
524 os.makedirs(branch_top)
525 attic_path = os.path.join(branch_top, attic_branch)
526 shutil.move(branch._transport.local_abspath('.'), attic_path)
527 bzrdir = BzrDir.open(attic_path)
528 new_branch = bzrdir.open_branch()
529 self._new_branch(new_branch, bzrdir.open_workingtree())
531 # Set correct path for old revisions that used this branch
532 for revno in self._revisions:
533 if not self._revisions[revno].has_key(rel_path):
536 (b, r) = self._revisions[revno][rel_path]
538 self._revisions[revno][rel_path] = (new_branch, r)
540 del self._branches[rel_path]
542 def _want_branch(self, path):
545 def _get_branch(self, path):
546 for (bp, branch) in self._branches.items():
547 if path == bp or path.startswith(bp+"/"):
550 def _get_all_branches(self):
551 return self._branches.values()
553 def add_dir(self, path):
554 branch, path_branch = self._get_branch_path(path)
555 unpref_path = self.unprefix(path)
557 if self.is_good(unpref_path) and self._want_branch(unpref_path):
558 branch_path = os.path.join(self._root, unpref_path)
559 os.makedirs(branch_path)
560 branch = BzrDir.create_branch_convenience(branch_path)
561 self._branches[unpref_path] = branch
562 self._new_branch(branch, WorkingTree.open(branch_path))
564 BranchCreator.add_dir(self, path)
566 def copy_dir(self, orig_path, orig_revno, dest_path):
567 # unpref_dest_path can't be None because it was
568 # already filtered in run()
569 unpref_orig_path = self.unprefix(orig_path)
570 unpref_dest_path = self.unprefix(dest_path)
571 orig_abspath = os.path.join(self._root, unpref_orig_path)
572 if (unpref_orig_path is None or
573 not self._revisions[orig_revno].has_key(unpref_orig_path) or
574 self._get_branch(unpref_dest_path)):
577 BranchCreator.copy_dir(self, orig_path, orig_revno,
580 elif self.is_good(unpref_dest_path):
583 dest_abspath = os.path.join(self._root, unpref_dest_path)
584 (orig_branch, revid) = self._revisions[orig_revno][unpref_orig_path]
585 os.makedirs(dest_abspath)
586 bzrdir = orig_branch.bzrdir.sprout(url="file://%s" % dest_abspath, revision_id=revid)
587 branch = bzrdir.open_branch()
588 self._branches[unpref_dest_path] = branch
589 self._new_branch(branch, bzrdir.open_workingtree())
591 def remove(self, path):
592 unpref_path = self.unprefix(path)
593 if not self._get_branch(unpref_path):
594 abspath = os.path.join(self._root, unpref_path)
595 if os.path.isdir(abspath):
596 shutil.rmtree(abspath)
597 for branch_path in self._branches.keys():
598 if branch_path.startswith(path+"/"):
599 del self._branches[branch_path]
601 BranchCreator.remove(self, path)
604 class TrunkBranchCreator(DynamicBranchCreator):
606 def _want_branch(self, path):
607 return path not in ("", "tags", "branches")
610 def svn2bzr(url, output_dir, creator_class=None, prefix=None, filter=[], consistency_check=False):
612 if os.path.exists(output_dir):
613 raise Error, "%s already exists" % output_dir
615 if creator_class is None:
616 creator_class = SingleBranchCreator
620 if os.path.isfile(url):
621 tmp_repos = tempfile.mkdtemp(prefix='bzr-svn-dump-')
622 repos = svn.repos.svn_repos_create(tmp_repos, '', '', None, None)
624 svn.repos.load_fs2(repos, open(url), StringIO(), svn.repos.load_uuid_default, '', 0, 0, None)
625 except SubversionException, (svn.core.SVN_ERR_STREAM_MALFORMED_DATA, _):
626 raise Error, "%s is not a dump file" % url
628 url = "file://%s" % tmp_repos
630 elif os.path.isdir(url):
631 url = "file://%s" % url
633 ra = svn.ra.open2(url.encode('utf8'), svn.ra.callbacks2_t(), None, None)
635 root = svn.ra.get_repos_root(ra)
637 svn.ra.reparent(ra, root)
639 creator = creator_class(ra, output_dir, prefix, check=consistency_check)
641 for include, regexp in filter:
642 creator.add_filter(include, regexp)
647 osutils.rmtree(tmp_repos)
649 def append_filter(option, opt, value, parser):
650 lst = getattr(parser.values, option.dest)
651 if type(lst) is not list:
653 setattr(parser.values, option.dest, lst)
654 lst.append((opt == "--include", value))
658 parser = optparse.OptionParser("svn2bzr [options] "
659 "<dump file|svn-url> <output dir>",
660 version="%prog "+VERSION)
661 parser.defaults["filter"] = []
662 parser.add_option("--include", dest="filter", metavar="REGEXP",
663 type="string", action="callback", callback=append_filter,
664 help="paths matching the regular expression are "
665 "considered if no prior exclude matched")
666 parser.add_option("--exclude", dest="filter", metavar="REGEXP",
667 type="string", action="callback", callback=append_filter,
668 help="paths matching the regular expression are "
669 "discarded if no prior include matched")
670 parser.add_option("--prefix", metavar="PATH", type="string",
671 help="Subversion repository will be considered as if "
672 "it started at the given path")
673 parser.add_option("--scheme", metavar="SCHEME", type="string",
674 help="Subversion repository scheme (single or trunk, "
675 "default is single)",
677 parser.add_option("--check", action="store_true",
678 help="Enable extra consistency checks (slower)")
679 parser.add_option("--log", metavar="LEVEL",
680 help="set logging level to LEVEL (debug, info, "
681 "warning, error)", default="info")
682 opts, args = parser.parse_args()
692 bzrlib.user_encoding = 'utf8'
694 opts = parse_options()
696 if opts.scheme == "trunk":
697 creator_class = TrunkBranchCreator
699 creator_class = SingleBranchCreator
702 log.setLevel(logging.getLevelName(opts.log.upper()))
705 svn2bzr(opts.args[0], opts.args[1], creator_class,
706 opts.prefix, opts.filter, opts.check)
708 sys.exit("error: %s" % e)
709 except KeyboardInterrupt:
710 sys.exit("Interrupted")
712 if __name__ == "__main__":