Ignore decode errors from utf-8 - Subversion guarantees to always
[jelmer/subvertpy.git] / logwalker.py
1 # Copyright (C) 2006 Jelmer Vernooij <jelmer@samba.org>
2
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17 from bzrlib.errors import NoSuchRevision, BzrError, NotBranchError
18 from bzrlib.progress import ProgressBar, DummyProgress
19 from bzrlib.trace import mutter
20
21 import os
22 import shelve
23
24 from svn.core import SubversionException
25 import svn.ra
26
27 class NotSvnBranchPath(BzrError):
28     def __init__(self, branch_path):
29         BzrError.__init__(self, 
30                 "%r is not a valid Svn branch path", 
31                 branch_path)
32         self.branch_path = branch_path
33
34
35 class LogWalker(object):
36     def __init__(self, scheme, ra=None, cache_dir=None, last_revnum=None, repos_url=None, pb=None):
37         if ra is None:
38             callbacks = svn.ra.callbacks2_t()
39             ra = svn.ra.open2(repos_url.encode('utf8'), callbacks, None, None)
40             root = svn.ra.get_repos_root(ra)
41             if root != repos_url:
42                 svn.ra.reparent(ra, root.encode('utf8'))
43
44         if last_revnum is None:
45             last_revnum = svn.ra.get_latest_revnum(ra)
46
47         self.ra = ra
48         self.scheme = scheme
49
50         # Try to load cache from file
51         if cache_dir is not None:
52             self.revisions = shelve.open(os.path.join(cache_dir, 'log'))
53         else:
54             self.revisions = {}
55         self.saved_revnum = max(len(self.revisions)-1, 0)
56
57         if self.saved_revnum < last_revnum:
58             self.fetch_revisions(self.saved_revnum, last_revnum, pb)
59         else:
60             self.last_revnum = self.saved_revnum
61
62     def fetch_revisions(self, from_revnum, to_revnum, pb=None):
63         def rcvr(orig_paths, rev, author, date, message, pool):
64             pb.update('fetching svn revision info', rev, to_revnum)
65             paths = {}
66             if orig_paths is None:
67                 orig_paths = {}
68             for p in orig_paths:
69                 copyfrom_path = orig_paths[p].copyfrom_path
70                 if copyfrom_path:
71                     copyfrom_path = copyfrom_path.strip("/")
72                 paths[p.strip("/")] = (orig_paths[p].action,
73                             copyfrom_path, orig_paths[p].copyfrom_rev)
74
75             self.revisions[str(rev)] = {
76                     'paths': paths,
77                     'author': author,
78                     'date': date,
79                     'message': message
80                     }
81
82         # Don't bother for only a few revisions
83         if abs(self.saved_revnum-to_revnum) < 10:
84             pb = DummyProgress()
85         else:
86             pb = ProgressBar()
87
88         try:
89             try:
90                 mutter('getting log %r:%r' % (self.saved_revnum, to_revnum))
91                 svn.ra.get_log(self.ra, ["/"], self.saved_revnum, to_revnum, 
92                                0, True, True, rcvr)
93                 self.last_revnum = to_revnum
94             finally:
95                 pb.clear()
96         except SubversionException, (_, num):
97             if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
98                 raise NoSuchRevision(branch=self, 
99                     revision="Revision number %d" % to_revnum)
100             raise
101
102     def follow_history(self, branch_path, revnum):
103         """Return iterator over all the revisions between from_revnum and 
104         to_revnum that touch branch_path."""
105         assert revnum >= 0
106
107         if not branch_path is None and not self.scheme.is_branch(branch_path):
108             raise NotSvnBranchPath(branch_path)
109
110         if branch_path:
111             branch_path = branch_path.strip("/")
112
113         if revnum > self.last_revnum:
114             self.fetch_revisions(self.last_revnum, revnum)
115
116         continue_revnum = None
117         for i in range(revnum+1):
118             i = revnum - i
119
120             if i == 0:
121                 continue
122
123             if not (continue_revnum is None or continue_revnum == i):
124                 continue
125
126             continue_revnum = None
127
128             rev = self.revisions[str(i)]
129             changed_paths = {}
130             for p in rev['paths']:
131                 if (branch_path is None or 
132                     p == branch_path or
133                     branch_path == "" or
134                     p.startswith(branch_path+"/")):
135
136                     try:
137                         (bp, rp) = self.scheme.unprefix(p)
138                         if not changed_paths.has_key(bp):
139                             changed_paths[bp] = {}
140                         changed_paths[bp][p] = rev['paths'][p]
141                     except NotBranchError:
142                         pass
143
144             assert branch_path is None or len(changed_paths) <= 1
145
146             for bp in changed_paths:
147                 yield (bp, changed_paths[bp], i)
148
149             if (not branch_path is None and 
150                 branch_path in rev['paths'] and 
151                 not rev['paths'][branch_path][1] is None):
152                 # In this revision, this branch was copied from 
153                 # somewhere else
154                 # FIXME: What if copyfrom_path is not a branch path?
155                 continue_revnum = rev['paths'][branch_path][2]
156                 branch_path = rev['paths'][branch_path][1]
157
158     def find_branches(self, revnum):
159         created_branches = {}
160
161         for i in range(revnum):
162             if i == 0:
163                 continue
164             rev = self.revisions[str(i)]
165             for p in rev['paths']:
166                 if self.scheme.is_branch(p):
167                     if rev['paths'][p][0] in ('R', 'D'):
168                         del created_branches[p]
169                         yield (p, i, False)
170
171                     if rev['paths'][p][0] in ('A', 'R'): 
172                         created_branches[p] = i
173
174         for p in created_branches:
175             yield (p, i, True)
176
177     def get_revision_info(self, revnum, pb=None):
178         """Obtain basic information for a specific revision.
179
180         :param revnum: Revision number.
181         :returns: Tuple with author, log message and date of the revision.
182         """
183         if revnum > self.last_revnum:
184             self.fetch_revisions(self.saved_revnum, revnum, pb)
185         rev = self.revisions[str(revnum)]
186         return (rev['author'].decode('utf-8', 'ignore'),
187                 rev['message'].decode('utf-8', 'ignore'), 
188                 rev['date'], rev['paths'])
189
190     def follow_local_history(self, branch_path, revnum):
191         for (bp, paths, rev) in self.follow_history(branch_path, revnum):
192             new_paths = {}
193             for p, data in paths.items():
194                 assert p.startswith(bp)
195                 p = p[len(bp):].strip("/") # remove branch path
196                 if data[1] is not None:
197                     (cbp, crp) = self.scheme.unprefix(data[1])
198                     # TODO: See if data[1]:data[2] is the same branch as 
199                     # the current branch. The current code doesn't handle
200                     # replaced branches very well
201                     related = (cbp == bp)
202
203                     if related:
204                         data = (data[0], crp, data[2])
205                     else:
206                         data = (data[0], None, None)
207                         # FIXME: Add children of data[1] to new_paths
208
209                 new_paths[p] = data
210             yield (bp, new_paths, rev)