1 # Copyright (C) 2006 Jelmer Vernooij <jelmer@samba.org>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 from bzrlib.errors import NoSuchRevision, BzrError, NotBranchError
18 from bzrlib.progress import ProgressBar, DummyProgress
19 from bzrlib.trace import mutter
23 from svn.core import SubversionException, Pool
24 from transport import SvnRaTransport
32 from pysqlite2 import dbapi2 as sqlite3
36 def _escape_commit_message(message):
37 """Replace xml-incompatible control characters."""
41 # FIXME: RBC 20060419 this should be done by the revision
42 # serialiser not by commit. Then we can also add an unescaper
43 # in the deserializer and start roundtripping revision messages
44 # precisely. See repository_implementations/test_repository.py
46 # Python strings can include characters that can't be
47 # represented in well-formed XML; escape characters that
48 # aren't listed in the XML specification
49 # (http://www.w3.org/TR/REC-xml/#NT-Char).
51 u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
52 lambda match: match.group(0).encode('unicode_escape'),
57 class LogWalker(object):
58 """Easy way to access the history of a Subversion repository."""
59 def __init__(self, transport=None, cache_db=None, last_revnum=None):
60 """Create a new instance.
62 :param transport: SvnRaTransport to use to access the repository.
63 :param cache_db: Optional sql database connection to use. Doesn't
65 :param last_revnum: Last known revnum in the repository. Will be
66 determined if not specified.
68 assert isinstance(transport, SvnRaTransport)
70 if last_revnum is None:
71 last_revnum = transport.get_latest_revnum()
73 self.last_revnum = last_revnum
75 self.transport = SvnRaTransport(transport.get_repos_root())
78 self.db = sqlite3.connect(":memory:")
82 self.db.executescript("""
83 create table if not exists revision(revno integer unique, author text, message text, date text);
84 create unique index if not exists revision_revno on revision (revno);
85 create table if not exists changed_path(rev integer, action text, path text, copyfrom_path text, copyfrom_rev integer);
86 create index if not exists path_rev on changed_path(rev);
87 create index if not exists path_rev_path on changed_path(rev, path);
90 self.saved_revnum = self.db.execute("SELECT MAX(revno) FROM revision").fetchone()[0]
91 if self.saved_revnum is None:
94 def fetch_revisions(self, to_revnum, pb=None):
95 """Fetch information about all revisions in the remote repository
98 :param to_revnum: End of range to fetch information for
99 :param pb: Optional progress bar to use
101 def rcvr(orig_paths, rev, author, date, message, pool):
102 pb.update('fetching svn revision info', rev, to_revnum)
104 if orig_paths is None:
107 copyfrom_path = orig_paths[p].copyfrom_path
109 copyfrom_path = copyfrom_path.strip("/")
112 "insert into changed_path (rev, path, action, copyfrom_path, copyfrom_rev) values (?, ?, ?, ?, ?)",
113 (rev, p.strip("/"), orig_paths[p].action, copyfrom_path, orig_paths[p].copyfrom_rev))
115 if message is not None:
116 message = base64.b64encode(message)
118 self.db.execute("replace into revision (revno, author, date, message) values (?,?,?,?)", (rev, author, date, message))
120 self.saved_revnum = rev
122 to_revnum = max(self.last_revnum, to_revnum)
124 # Don't bother for only a few revisions
125 if abs(self.saved_revnum-to_revnum) < 10:
133 self.transport.get_log("/", self.saved_revnum, to_revnum,
134 0, True, True, rcvr, pool)
137 except SubversionException, (_, num):
138 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
139 raise NoSuchRevision(branch=self,
140 revision="Revision number %d" % to_revnum)
145 def follow_path(self, path, revnum):
146 """Return iterator over all the revisions between revnum and
147 0 named path or inside path.
149 :param path: Branch path to start reporting (in revnum)
150 :param revnum: Start revision.
152 :return: An iterators that yields tuples with (path, paths, revnum)
153 where paths is a dictionary with all changes that happened in path
158 if revnum == 0 and path == "":
161 path = path.strip("/")
164 revpaths = self.get_revision_paths(revnum, path)
167 yield (path, revpaths, revnum)
169 if revpaths.has_key(path):
170 if revpaths[path][1] is None:
171 if revpaths[path][0] in ('A', 'R'):
172 # this path didn't exist before this revision
175 # In this revision, this path was copied from
177 revnum = revpaths[path][2]
178 path = revpaths[path][1]
182 def get_revision_paths(self, revnum, path=None):
183 """Obtain dictionary with all the changes in a particular revision.
185 :param revnum: Subversion revision number
186 :param path: optional path under which to return all entries
187 :returns: dictionary with paths as keys and
188 (action, copyfrom_path, copyfrom_rev) as values.
192 return {'': ('A', None, -1)}
194 if revnum > self.saved_revnum:
195 self.fetch_revisions(revnum)
197 query = "select path, action, copyfrom_path, copyfrom_rev from changed_path where rev="+str(revnum)
198 if path is not None and path != "":
199 query += " and (path='%s' or path like '%s/%%')" % (path, path)
202 for p, act, cf, cr in self.db.execute(query):
203 paths[p] = (act, cf, cr)
206 def get_revision_info(self, revnum, pb=None):
207 """Obtain basic information for a specific revision.
209 :param revnum: Revision number.
210 :returns: Tuple with author, log message and date of the revision.
213 if revnum > self.saved_revnum:
214 self.fetch_revisions(revnum, pb)
215 (author, message, date) = self.db.execute("select author, message, date from revision where revno="+ str(revnum)).fetchone()
218 return (author, _escape_commit_message(base64.b64decode(message)), date)
220 def find_latest_change(self, path, revnum):
221 """Find latest revision that touched path.
223 :param path: Path to check for changes
224 :param revnum: First revision to check
226 if revnum > self.saved_revnum:
227 self.fetch_revisions(revnum)
229 row = self.db.execute(
230 "select rev from changed_path where path='%s' and rev <= %d order by rev desc limit 1" % (path.strip("/"), revnum)).fetchone()
231 if row is None and path == "":
234 assert row is not None, "no latest change for %r:%d" % (path, revnum)
238 def touches_path(self, path, revnum):
239 """Check whether path was changed in specified revision.
241 :param path: Path to check
242 :param revnum: Revision to check
244 if revnum > self.saved_revnum:
245 self.fetch_revisions(revnum)
248 return (self.db.execute("select 1 from changed_path where path='%s' and rev=%d" % (path, revnum)).fetchone() is not None)
250 def find_children(self, path, revnum):
251 """Find all children of path in revnum."""
252 # TODO: Find children by walking history, or use
256 (dirents, _, _) = self.transport.get_dir(
257 path.lstrip("/").encode('utf8'), revnum, kind=True)
258 except SubversionException, (_, num):
259 if num == svn.core.SVN_ERR_FS_NOT_DIRECTORY:
264 yield os.path.join(path, p)
265 # This needs to be != svn.core.svn_node_file because
266 # some ra backends seem to return negative values for .kind.
267 # however, dirents[p].node seems to contain semi-random
269 for c in self.find_children(os.path.join(path, p), revnum):
272 def get_previous(self, path, revnum):
273 """Return path,revnum pair specified pair was derived from.
275 :param path: Path to check
276 :param revnum: Revision to check
279 if revnum > self.saved_revnum:
280 self.fetch_revisions(revnum)
283 row = self.db.execute("select action, copyfrom_path, copyfrom_rev from changed_path where path='%s' and rev=%d" % (path, revnum)).fetchone()
287 return (path, revnum-1)
288 return (row[1], row[2])