Add get_property_diff() function, another helper function in the logwalker and some...
[jelmer/subvertpy.git] / logwalker.py
1 # Copyright (C) 2006 Jelmer Vernooij <jelmer@samba.org>
2
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17 from bzrlib.errors import NoSuchRevision, BzrError, NotBranchError
18 from bzrlib.progress import ProgressBar, DummyProgress
19 from bzrlib.trace import mutter
20
21 import os
22
23 from svn.core import SubversionException
24 from transport import SvnRaTransport
25 import svn.core
26
27 import base64
28
29 try:
30     import sqlite3
31 except ImportError:
32     from pysqlite2 import dbapi2 as sqlite3
33
34 shelves = {}
35
36 def _escape_commit_message(message):
37     """Replace xml-incompatible control characters."""
38     if message is None:
39         return None
40     import re
41     # FIXME: RBC 20060419 this should be done by the revision
42     # serialiser not by commit. Then we can also add an unescaper
43     # in the deserializer and start roundtripping revision messages
44     # precisely. See repository_implementations/test_repository.py
45     
46     # Python strings can include characters that can't be
47     # represented in well-formed XML; escape characters that
48     # aren't listed in the XML specification
49     # (http://www.w3.org/TR/REC-xml/#NT-Char).
50     message, _ = re.subn(
51         u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
52         lambda match: match.group(0).encode('unicode_escape'),
53         message)
54     return message
55
56
57 class NotSvnBranchPath(BzrError):
58     _fmt = """{%(branch_path)s} is not a valid Svn branch path"""
59
60     def __init__(self, branch_path):
61         BzrError.__init__(self)
62         self.branch_path = branch_path
63
64
65 class LogWalker(object):
66     """Easy way to access the history of a Subversion repository."""
67     def __init__(self, scheme, transport=None, cache_db=None, last_revnum=None, pb=None):
68         """Create a new instance.
69
70         :param scheme:  Branching scheme to use.
71         :param transport:   SvnRaTransport to use to access the repository.
72         :param cache_db:    Optional sql database connection to use. Doesn't 
73                             cache if not set.
74         :param last_revnum: Last known revnum in the repository. Will be 
75                             determined if not specified.
76         :param pb:          Progress bar to report progress to.
77         """
78         assert isinstance(transport, SvnRaTransport)
79
80         if last_revnum is None:
81             last_revnum = transport.get_latest_revnum()
82
83         self.last_revnum = last_revnum
84
85         self.transport = transport.clone()
86         self.scheme = scheme
87
88         if cache_db is None:
89             self.db = sqlite3.connect(":memory:")
90         else:
91             self.db = cache_db
92
93         self.db.executescript("""
94           create table if not exists revision(revno integer unique, author text, message text, date text);
95           create unique index if not exists revision_revno on revision (revno);
96           create table if not exists changed_path(rev integer, action text, path text, copyfrom_path text, copyfrom_rev integer);
97           create index if not exists path_rev_path on changed_path(rev, path);
98         """)
99         self.db.commit()
100         self.saved_revnum = self.db.execute("SELECT MAX(revno) FROM revision").fetchone()[0]
101         if self.saved_revnum is None:
102             self.saved_revnum = 0
103
104     def fetch_revisions(self, to_revnum, pb=None):
105         """Fetch information about all revisions in the remote repository
106         until to_revnum.
107
108         :param to_revnum: End of range to fetch information for
109         :param pb: Optional progress bar to use
110         """
111         def rcvr(orig_paths, rev, author, date, message, pool):
112             pb.update('fetching svn revision info', rev, to_revnum)
113             paths = {}
114             if orig_paths is None:
115                 orig_paths = {}
116             for p in orig_paths:
117                 copyfrom_path = orig_paths[p].copyfrom_path
118                 if copyfrom_path:
119                     copyfrom_path = copyfrom_path.strip("/")
120
121                 self.db.execute(
122                      "insert into changed_path (rev, path, action, copyfrom_path, copyfrom_rev) values (?, ?, ?, ?, ?)", 
123                      (rev, p.strip("/"), orig_paths[p].action, copyfrom_path, orig_paths[p].copyfrom_rev))
124
125             if message is not None:
126                 message = base64.b64encode(message)
127
128             self.db.execute("replace into revision (revno, author, date, message) values (?,?,?,?)", (rev, author, date, message))
129
130             self.saved_revnum = rev
131
132         to_revnum = max(self.last_revnum, to_revnum)
133
134         # Don't bother for only a few revisions
135         if abs(self.saved_revnum-to_revnum) < 10:
136             pb = DummyProgress()
137         else:
138             pb = ProgressBar()
139
140         try:
141             try:
142                 mutter('getting log %r:%r' % (self.saved_revnum, to_revnum))
143                 self.transport.get_log(["/"], self.saved_revnum, to_revnum, 
144                                0, True, True, rcvr)
145             finally:
146                 pb.clear()
147         except SubversionException, (_, num):
148             if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
149                 raise NoSuchRevision(branch=self, 
150                     revision="Revision number %d" % to_revnum)
151             raise
152         self.db.commit()
153
154     def follow_history(self, branch_path, revnum):
155         """Return iterator over all the revisions between revnum and 
156         0 that touch branch_path.
157         
158         :param branch_path:   Branch path to start reporting (in revnum)
159         :param revnum:        Start revision.
160         """
161         assert revnum >= 0
162
163         if revnum == 0 and branch_path in (None, ""):
164             return
165
166         if not branch_path is None and not self.scheme.is_branch(branch_path):
167             raise NotSvnBranchPath(branch_path)
168
169         if branch_path:
170             branch_path = branch_path.strip("/")
171
172         if revnum > self.saved_revnum:
173             self.fetch_revisions(revnum)
174
175         continue_revnum = None
176         for i in range(revnum+1):
177             i = revnum - i
178
179             if i == 0:
180                 continue
181
182             if not (continue_revnum is None or continue_revnum == i):
183                 continue
184
185             continue_revnum = None
186
187             changed_paths = {}
188             revpaths = self._get_revision_paths(i)
189             for p in revpaths:
190                 if (branch_path is None or 
191                     p == branch_path or
192                     branch_path == "" or
193                     p.startswith(branch_path+"/")):
194
195                     try:
196                         (bp, rp) = self.scheme.unprefix(p)
197                         if not changed_paths.has_key(bp):
198                             changed_paths[bp] = {}
199                         changed_paths[bp][p] = revpaths[p]
200                     except NotBranchError:
201                         pass
202
203             assert branch_path is None or len(changed_paths) <= 1
204
205             for bp in changed_paths:
206                 yield (bp, changed_paths[bp], i)
207
208             if (not branch_path is None and 
209                 branch_path in revpaths and 
210                 not revpaths[branch_path][1] is None):
211                 # In this revision, this branch was copied from 
212                 # somewhere else
213                 # FIXME: What if copyfrom_path is not a branch path?
214                 continue_revnum = revpaths[branch_path][2]
215                 branch_path = revpaths[branch_path][1]
216
217     def find_branches(self, revnum):
218         """Find all branches that were changed in the specified revision number.
219
220         :param revnum: Revision to search for branches.
221         """
222         created_branches = {}
223
224         if revnum > self.saved_revnum:
225             self.fetch_revisions(revnum)
226
227         for i in range(revnum+1):
228             if i == 0:
229                 paths = {'': ('A', None, None)}
230             else:
231                 paths = self._get_revision_paths(i)
232             for p in paths:
233                 if self.scheme.is_branch(p):
234                     if paths[p][0] in ('R', 'D'):
235                         del created_branches[p]
236                         yield (p, i, False)
237
238                     if paths[p][0] in ('A', 'R'): 
239                         created_branches[p] = i
240
241         for p in created_branches:
242             yield (p, i, True)
243
244     def _get_revision_paths(self, revnum):
245         paths = {}
246         for p, act, cf, cr in self.db.execute("select path, action, copyfrom_path, copyfrom_rev from changed_path where rev="+str(revnum)):
247             paths[p] = (act, cf, cr)
248         return paths
249
250     def get_revision_info(self, revnum, pb=None):
251         """Obtain basic information for a specific revision.
252
253         :param revnum: Revision number.
254         :returns: Tuple with author, log message and date of the revision.
255         """
256         if revnum > self.saved_revnum:
257             self.fetch_revisions(revnum, pb)
258         (author, message, date) = self.db.execute("select author, message, date from revision where revno="+ str(revnum)).fetchone()
259         if author is None:
260             author = None
261         return (author, _escape_commit_message(base64.b64decode(message)), date)
262
263     
264     def find_latest_change(self, path, revnum):
265         """Find latest revision that touched path.
266
267         :param path: Path to check for changes
268         :param revnum: First revision to check
269         """
270         if revnum > self.saved_revnum:
271             self.fetch_revisions(revnum)
272
273         row = self.db.execute(
274              "select rev from changed_path where path='%s' and rev <= %d order by rev desc limit 1" % (path.strip("/"), revnum)).fetchone()
275         if row is None and path == "":
276             return 0
277
278         return row[0]
279
280     def touches_path(self, path, revnum):
281         """Check whether path was changed in specified revision.
282
283         :param path:  Path to check
284         :param revnum:  Revision to check
285         """
286         if revnum > self.saved_revnum:
287             self.fetch_revisions(revnum)
288         if revnum == 0:
289             return (path == "")
290         return (self.db.execute("select 1 from changed_path where path='%s' and rev=%d" % (path, revnum)).fetchone() is not None)
291
292     def find_children(self, path, revnum):
293         """Find all children of path in revnum."""
294         # TODO: Find children by walking history, or use 
295         # cache?
296         mutter("svn ls -r %d '%r'" % (revnum, path))
297
298         try:
299             (dirents, _, _) = self.transport.get_dir(
300                 "/" + path.encode('utf8'), revnum)
301         except SubversionException, (_, num):
302             if num == svn.core.SVN_ERR_FS_NOT_DIRECTORY:
303                 return
304             raise
305
306         for p in dirents:
307             yield os.path.join(path, p)
308             for c in self.find_children(os.path.join(path, p), revnum):
309                 yield c
310
311     def get_previous(self, path, revnum):
312         """Return path,revnum pair specified pair was derived from.
313
314         :param path:  Path to check
315         :param revnum:  Revision to check
316         """
317         if revnum > self.saved_revnum:
318             self.fetch_revisions(revnum)
319         if revnum == 0:
320             return (None, -1)
321         row = self.db.execute("select copyfrom_path, copyfrom_rev from changed_path where path='%s' and rev=%d" % (path, revnum)).fetchone()
322         if row[1] == -1:
323             return (path, revnum-1)
324         return row