Update notes about memory leaks.
[jelmer/subvertpy.git] / logwalker.py
1 # Copyright (C) 2006 Jelmer Vernooij <jelmer@samba.org>
2
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 """Cache of the Subversion history log."""
17
18 from bzrlib import urlutils
19 from bzrlib.errors import NoSuchRevision
20 import bzrlib.ui as ui
21 from copy import copy
22
23 from svn.core import SubversionException, Pool
24 from transport import SvnRaTransport
25 import svn.core
26
27 from cache import sqlite3
28
29 LOG_CHUNK_LIMIT = 1000
30
31 class LogWalker(object):
32     """Easy way to access the history of a Subversion repository."""
33     def __init__(self, transport, cache_db=None, limit=None):
34         """Create a new instance.
35
36         :param transport:   SvnRaTransport to use to access the repository.
37         :param cache_db:    Optional sql database connection to use. Doesn't 
38                             cache if not set.
39         """
40         assert isinstance(transport, SvnRaTransport)
41
42         self.url = transport.base
43         self._transport = None
44
45         if limit is not None:
46             self._limit = limit
47         else:
48             self._limit = LOG_CHUNK_LIMIT
49
50         if cache_db is None:
51             self.db = sqlite3.connect(":memory:")
52         else:
53             self.db = cache_db
54
55         self.db.executescript("""
56           create table if not exists changed_path(rev integer, action text, path text, copyfrom_path text, copyfrom_rev integer);
57           create index if not exists path_rev on changed_path(rev);
58           create unique index if not exists path_rev_path on changed_path(rev, path);
59           create unique index if not exists path_rev_path_action on changed_path(rev, path, action);
60         """)
61         self.db.commit()
62         self.saved_revnum = self.db.execute("SELECT MAX(rev) FROM changed_path").fetchone()[0]
63         if self.saved_revnum is None:
64             self.saved_revnum = 0
65
66     def _get_transport(self):
67         if self._transport is not None:
68             return self._transport
69         self._transport = SvnRaTransport(self.url)
70         return self._transport
71
72     def fetch_revisions(self, to_revnum=None):
73         """Fetch information about all revisions in the remote repository
74         until to_revnum.
75
76         :param to_revnum: End of range to fetch information for
77         """
78         if to_revnum <= self.saved_revnum:
79             return
80         latest_revnum = self._get_transport().get_latest_revnum()
81         to_revnum = max(latest_revnum, to_revnum)
82
83         pb = ui.ui_factory.nested_progress_bar()
84
85         def rcvr(log_entry, pool):
86             pb.update('fetching svn revision info', log_entry.revision, to_revnum)
87             orig_paths = log_entry.changed_paths
88             if orig_paths is None:
89                 orig_paths = {}
90             for p in orig_paths:
91                 copyfrom_path = orig_paths[p].copyfrom_path
92                 if copyfrom_path is not None:
93                     copyfrom_path = copyfrom_path.strip("/")
94
95                 self.db.execute(
96                      "replace into changed_path (rev, path, action, copyfrom_path, copyfrom_rev) values (?, ?, ?, ?, ?)", 
97                      (log_entry.revision, p.strip("/"), orig_paths[p].action, copyfrom_path, orig_paths[p].copyfrom_rev))
98                 # Work around nasty memory leak in Subversion
99                 orig_paths[p]._parent_pool.destroy()
100
101             self.saved_revnum = log_entry.revision
102             if self.saved_revnum % 1000 == 0:
103                 self.db.commit()
104
105         try:
106             try:
107                 while self.saved_revnum < to_revnum:
108                     pool = Pool()
109                     self._get_transport().get_log("", self.saved_revnum, 
110                                              to_revnum, self._limit, True, 
111                                              True, [], rcvr, pool)
112                     pool.destroy()
113             finally:
114                 pb.finished()
115         except SubversionException, (_, num):
116             if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
117                 raise NoSuchRevision(branch=self, 
118                     revision="Revision number %d" % to_revnum)
119             raise
120         self.db.commit()
121
122     def follow_path(self, path, revnum):
123         """Return iterator over all the revisions between revnum and 0 named path or inside path.
124
125         :param path:    Branch path to start reporting (in revnum)
126         :param revnum:  Start revision.
127         :return: An iterator that yields tuples with (path, paths, revnum)
128             where paths is a dictionary with all changes that happened in path 
129             in revnum.
130         """
131         assert revnum >= 0
132
133         if revnum == 0 and path == "":
134             return
135
136         recurse = (path != "")
137
138         path = path.strip("/")
139
140         while revnum >= 0:
141             assert revnum > 0 or path == ""
142             revpaths = self.get_revision_paths(revnum, path, recurse=recurse)
143
144             if revpaths != {}:
145                 yield (path, copy(revpaths), revnum)
146
147             if path == "":
148                 revnum -= 1
149                 continue
150
151             if revpaths.has_key(path):
152                 if revpaths[path][1] is None:
153                     if revpaths[path][0] in ('A', 'R'):
154                         # this path didn't exist before this revision
155                         return
156                 else:
157                     # In this revision, this path was copied from 
158                     # somewhere else
159                     revnum = revpaths[path][2]
160                     path = revpaths[path][1]
161                     assert path == "" or revnum > 0
162                     continue
163             revnum -= 1
164             for p in sorted(revpaths.keys()):
165                 if path.startswith(p+"/") and revpaths[p][0] in ('A', 'R'):
166                     assert revpaths[p][1]
167                     path = path.replace(p, revpaths[p][1])
168                     revnum = revpaths[p][2]
169                     break
170
171     def get_revision_paths(self, revnum, path=None, recurse=False):
172         """Obtain dictionary with all the changes in a particular revision.
173
174         :param revnum: Subversion revision number
175         :param path: optional path under which to return all entries
176         :param recurse: Report changes to parents as well
177         :returns: dictionary with paths as keys and 
178                   (action, copyfrom_path, copyfrom_rev) as values.
179         """
180
181         if revnum == 0:
182             assert path is None or path == ""
183             return {'': ('A', None, -1)}
184                 
185         self.fetch_revisions(revnum)
186
187         query = "select path, action, copyfrom_path, copyfrom_rev from changed_path where rev="+str(revnum)
188         if path is not None and path != "":
189             query += " and (path='%s' or path like '%s/%%'" % (path, path)
190             if recurse:
191                 query += " or ('%s' LIKE path || '/%%')" % path
192             query += ")"
193
194         paths = {}
195         for p, act, cf, cr in self.db.execute(query):
196             paths[p.encode("utf-8")] = (act, cf, cr)
197         return paths
198
199     def find_latest_change(self, path, revnum, include_parents=False,
200                            include_children=False):
201         """Find latest revision that touched path.
202
203         :param path: Path to check for changes
204         :param revnum: First revision to check
205         """
206         assert isinstance(path, basestring)
207         assert isinstance(revnum, int) and revnum >= 0
208         self.fetch_revisions(revnum)
209
210         extra = ""
211         if include_children:
212             if path == "":
213                 extra += " OR path LIKE '%'"
214             else:
215                 extra += " OR path LIKE '%s/%%'" % path.strip("/")
216         if include_parents:
217             extra += " OR ('%s' LIKE (path || '/%%') AND (action = 'R' OR action = 'A'))" % path.strip("/")
218         query = "SELECT rev FROM changed_path WHERE (path='%s'%s) AND rev <= %d ORDER BY rev DESC LIMIT 1" % (path.strip("/"), extra, revnum)
219
220         row = self.db.execute(query).fetchone()
221         if row is None and path == "":
222             return 0
223
224         if row is None:
225             return None
226
227         return row[0]
228
229     def touches_path(self, path, revnum):
230         """Check whether path was changed in specified revision.
231
232         :param path:  Path to check
233         :param revnum:  Revision to check
234         """
235         self.fetch_revisions(revnum)
236         if revnum == 0:
237             return (path == "")
238         return (self.db.execute("select 1 from changed_path where path='%s' and rev=%d" % (path, revnum)).fetchone() is not None)
239
240     def find_children(self, path, revnum):
241         """Find all children of path in revnum.
242
243         :param path:  Path to check
244         :param revnum:  Revision to check
245         """
246         path = path.strip("/")
247         transport = self._get_transport()
248         ft = transport.check_path(path, revnum)
249         if ft == svn.core.svn_node_file:
250             return []
251         assert ft == svn.core.svn_node_dir
252
253         class TreeLister(svn.delta.Editor):
254             def __init__(self, base):
255                 self.files = []
256                 self.base = base
257
258             def set_target_revision(self, revnum):
259                 """See Editor.set_target_revision()."""
260                 pass
261
262             def open_root(self, revnum, baton):
263                 """See Editor.open_root()."""
264                 return path
265
266             def add_directory(self, path, parent_baton, copyfrom_path, copyfrom_revnum, pool):
267                 """See Editor.add_directory()."""
268                 self.files.append(urlutils.join(self.base, path))
269                 return path
270
271             def change_dir_prop(self, id, name, value, pool):
272                 pass
273
274             def change_file_prop(self, id, name, value, pool):
275                 pass
276
277             def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
278                 self.files.append(urlutils.join(self.base, path))
279                 return path
280
281             def close_dir(self, id):
282                 pass
283
284             def close_file(self, path, checksum):
285                 pass
286
287             def close_edit(self):
288                 pass
289
290             def abort_edit(self):
291                 pass
292
293             def apply_textdelta(self, file_id, base_checksum):
294                 pass
295         pool = Pool()
296         editor = TreeLister(path)
297         old_base = transport.base
298         try:
299             root_repos = transport.get_svn_repos_root()
300             transport.reparent(urlutils.join(root_repos, path))
301             reporter = transport.do_update(revnum, True, editor, pool)
302             reporter.set_path("", revnum, True, None, pool)
303             reporter.finish_report(pool)
304         finally:
305             transport.reparent(old_base)
306         return editor.files
307
308     def get_previous(self, path, revnum):
309         """Return path,revnum pair specified pair was derived from.
310
311         :param path:  Path to check
312         :param revnum:  Revision to check
313         """
314         assert revnum >= 0
315         self.fetch_revisions(revnum)
316         if revnum == 0:
317             return (None, -1)
318         row = self.db.execute("select action, copyfrom_path, copyfrom_rev from changed_path where path='%s' and rev=%d" % (path, revnum)).fetchone()
319         if row[2] == -1:
320             if row[0] == 'A':
321                 return (None, -1)
322             return (path, revnum-1)
323         return (row[1], row[2])