1 # Copyright (C) 2006 Jelmer Vernooij <jelmer@samba.org>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 from bzrlib.errors import NoSuchRevision, BzrError, NotBranchError
18 from bzrlib.progress import ProgressBar, DummyProgress
19 from bzrlib.trace import mutter
23 from svn.core import SubversionException
24 from transport import SvnRaTransport
32 from pysqlite2 import dbapi2 as sqlite3
36 def _escape_commit_message(message):
37 """Replace xml-incompatible control characters."""
41 # FIXME: RBC 20060419 this should be done by the revision
42 # serialiser not by commit. Then we can also add an unescaper
43 # in the deserializer and start roundtripping revision messages
44 # precisely. See repository_implementations/test_repository.py
46 # Python strings can include characters that can't be
47 # represented in well-formed XML; escape characters that
48 # aren't listed in the XML specification
49 # (http://www.w3.org/TR/REC-xml/#NT-Char).
51 u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
52 lambda match: match.group(0).encode('unicode_escape'),
57 class NotSvnBranchPath(BzrError):
58 _fmt = """{%(branch_path)s} is not a valid Svn branch path"""
60 def __init__(self, branch_path):
61 BzrError.__init__(self)
62 self.branch_path = branch_path
65 class LogWalker(object):
66 """Easy way to access the history of a Subversion repository."""
67 def __init__(self, scheme, transport=None, cache_db=None, last_revnum=None, pb=None):
68 """Create a new instance.
70 :param scheme: Branching scheme to use.
71 :param transport: SvnRaTransport to use to access the repository.
72 :param cache_db: Optional sql database connection to use. Doesn't
74 :param last_revnum: Last known revnum in the repository. Will be
75 determined if not specified.
76 :param pb: Progress bar to report progress to.
78 assert isinstance(transport, SvnRaTransport)
80 if last_revnum is None:
81 last_revnum = transport.get_latest_revnum()
83 self.last_revnum = last_revnum
85 self.transport = transport.clone()
89 self.db = sqlite3.connect(":memory:")
93 self.db.executescript("""
94 create table if not exists revision(revno integer unique, author text, message text, date text);
95 create unique index if not exists revision_revno on revision (revno);
96 create table if not exists changed_path(rev integer, action text, path text, copyfrom_path text, copyfrom_rev integer);
97 create index if not exists path_rev_path on changed_path(rev, path);
100 self.saved_revnum = self.db.execute("SELECT MAX(revno) FROM revision").fetchone()[0]
101 if self.saved_revnum is None:
102 self.saved_revnum = 0
104 def fetch_revisions(self, to_revnum, pb=None):
105 """Fetch information about all revisions in the remote repository
108 :param to_revnum: End of range to fetch information for
109 :param pb: Optional progress bar to use
111 def rcvr(orig_paths, rev, author, date, message, pool):
112 pb.update('fetching svn revision info', rev, to_revnum)
114 if orig_paths is None:
117 copyfrom_path = orig_paths[p].copyfrom_path
119 copyfrom_path = copyfrom_path.strip("/")
122 "insert into changed_path (rev, path, action, copyfrom_path, copyfrom_rev) values (?, ?, ?, ?, ?)",
123 (rev, p.strip("/"), orig_paths[p].action, copyfrom_path, orig_paths[p].copyfrom_rev))
125 if message is not None:
126 message = base64.b64encode(message)
128 self.db.execute("replace into revision (revno, author, date, message) values (?,?,?,?)", (rev, author, date, message))
130 self.saved_revnum = rev
132 to_revnum = max(self.last_revnum, to_revnum)
134 # Don't bother for only a few revisions
135 if abs(self.saved_revnum-to_revnum) < 10:
142 mutter('getting log %r:%r' % (self.saved_revnum, to_revnum))
143 self.transport.get_log(["/"], self.saved_revnum, to_revnum,
147 except SubversionException, (_, num):
148 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
149 raise NoSuchRevision(branch=self,
150 revision="Revision number %d" % to_revnum)
154 def follow_history(self, branch_path, revnum):
155 """Return iterator over all the revisions between revnum and
156 0 that touch branch_path.
158 :param branch_path: Branch path to start reporting (in revnum)
159 :param revnum: Start revision.
163 if revnum == 0 and branch_path in (None, ""):
166 if not branch_path is None and not self.scheme.is_branch(branch_path):
167 raise NotSvnBranchPath(branch_path)
170 branch_path = branch_path.strip("/")
172 if revnum > self.saved_revnum:
173 self.fetch_revisions(revnum)
175 continue_revnum = None
176 for i in range(revnum+1):
182 if not (continue_revnum is None or continue_revnum == i):
185 continue_revnum = None
188 revpaths = self._get_revision_paths(i)
190 if (branch_path is None or
193 p.startswith(branch_path+"/")):
196 (bp, rp) = self.scheme.unprefix(p)
197 if not changed_paths.has_key(bp):
198 changed_paths[bp] = {}
199 changed_paths[bp][p] = revpaths[p]
200 except NotBranchError:
203 assert branch_path is None or len(changed_paths) <= 1
205 for bp in changed_paths:
206 yield (bp, changed_paths[bp], i)
208 if (not branch_path is None and
209 branch_path in revpaths and
210 not revpaths[branch_path][1] is None):
211 # In this revision, this branch was copied from
213 # FIXME: What if copyfrom_path is not a branch path?
214 continue_revnum = revpaths[branch_path][2]
215 branch_path = revpaths[branch_path][1]
217 def find_branches(self, revnum):
218 """Find all branches that were changed in the specified revision number.
220 :param revnum: Revision to search for branches.
222 created_branches = {}
224 if revnum > self.saved_revnum:
225 self.fetch_revisions(revnum)
227 for i in range(revnum+1):
229 paths = {'': ('A', None, None)}
231 paths = self._get_revision_paths(i)
233 if self.scheme.is_branch(p):
234 if paths[p][0] in ('R', 'D'):
235 del created_branches[p]
238 if paths[p][0] in ('A', 'R'):
239 created_branches[p] = i
241 for p in created_branches:
244 def _get_revision_paths(self, revnum):
246 for p, act, cf, cr in self.db.execute("select path, action, copyfrom_path, copyfrom_rev from changed_path where rev="+str(revnum)):
247 paths[p] = (act, cf, cr)
250 def get_revision_info(self, revnum, pb=None):
251 """Obtain basic information for a specific revision.
253 :param revnum: Revision number.
254 :returns: Tuple with author, log message and date of the revision.
256 if revnum > self.saved_revnum:
257 self.fetch_revisions(revnum, pb)
258 (author, message, date) = self.db.execute("select author, message, date from revision where revno="+ str(revnum)).fetchone()
261 return (author, _escape_commit_message(base64.b64decode(message)), date)
264 def find_latest_change(self, path, revnum):
265 """Find latest revision that touched path.
267 :param path: Path to check for changes
268 :param revnum: First revision to check
270 if revnum > self.saved_revnum:
271 self.fetch_revisions(revnum)
273 row = self.db.execute(
274 "select rev from changed_path where path='%s' and rev <= %d order by rev desc limit 1" % (path.strip("/"), revnum)).fetchone()
275 if row is None and path == "":
280 def touches_path(self, path, revnum):
281 """Check whether path was changed in specified revision.
283 :param path: Path to check
284 :param revnum: Revision to check
286 if revnum > self.saved_revnum:
287 self.fetch_revisions(revnum)
290 return (self.db.execute("select 1 from changed_path where path='%s' and rev=%d" % (path, revnum)).fetchone() is not None)
292 def find_children(self, path, revnum):
293 """Find all children of path in revnum."""
294 # TODO: Find children by walking history, or use
296 mutter("svn ls -r %d '%r'" % (revnum, path))
299 (dirents, _, _) = self.transport.get_dir(
300 "/" + path.encode('utf8'), revnum)
301 except SubversionException, (_, num):
302 if num == svn.core.SVN_ERR_FS_NOT_DIRECTORY:
307 yield os.path.join(path, p)
308 for c in self.find_children(os.path.join(path, p), revnum):
311 def get_previous(self, path, revnum):
312 """Return path,revnum pair specified pair was derived from.
314 :param path: Path to check
315 :param revnum: Revision to check
317 if revnum > self.saved_revnum:
318 self.fetch_revisions(revnum)
321 row = self.db.execute("select copyfrom_path, copyfrom_rev from changed_path where path='%s' and rev=%d" % (path, revnum)).fetchone()
323 return (path, revnum-1)