Add note on svn versions.
[jelmer/subvertpy.git] / fileids.py
1 # Copyright (C) 2006-2007 Jelmer Vernooij <jelmer@samba.org>
2
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 """Generation of file-ids."""
17
18 from bzrlib import osutils, ui
19 from bzrlib.errors import NotBranchError, RevisionNotPresent
20 from bzrlib.knit import KnitVersionedFile
21 from bzrlib.revision import NULL_REVISION
22 from bzrlib.trace import mutter
23
24 import urllib
25
26 import sha
27
28 from revids import escape_svn_path
29
30 def generate_svn_file_id(uuid, revnum, branch, path):
31     """Create a file id identifying a Subversion file.
32
33     :param uuid: UUID of the repository
34     :param revnu: Revision number at which the file was introduced.
35     :param branch: Branch path of the branch in which the file was introduced.
36     :param path: Original path of the file within the branch
37     """
38     ret = "%d@%s:%s:%s" % (revnum, uuid, escape_svn_path(branch), escape_svn_path(path))
39     if len(ret) > 150:
40         ret = "%d@%s:%s;%s" % (revnum, uuid, 
41                             escape_svn_path(branch),
42                             sha.new(path.encode('utf-8')).hexdigest())
43     assert isinstance(ret, str)
44     return osutils.safe_file_id(ret)
45
46
47 def generate_file_id(repos, revid, path):
48     (branch, revnum, _) = repos.lookup_revision_id(revid)
49     return generate_svn_file_id(repos.uuid, revnum, branch, path)
50
51
52 def get_local_changes(paths, scheme, generate_revid, get_children=None):
53     new_paths = {}
54     for p in sorted(paths.keys()):
55         data = paths[p]
56         new_p = scheme.unprefix(p)[1]
57         if data[1] is not None:
58             try:
59                 (cbp, crp) = scheme.unprefix(data[1])
60
61                 # Branch copy
62                 if (crp == "" and new_p == ""):
63                     data = ('M', None, None)
64                 else:
65                     data = (data[0], crp, generate_revid(
66                                   data[2], cbp.encode("utf-8"), str(scheme)))
67             except NotBranchError:
68                 # Copied from outside of a known branch
69                 # Make it look like the files were added in this revision
70                 if get_children is not None:
71                     for c in get_children(data[1], data[2]):
72                         mutter('oops: %r child %r' % (data[1], c))
73                         new_paths[(new_p+"/"+c[len(data[1]):].strip("/")).strip("/")] = (data[0], None, -1)
74                 data = (data[0], None, -1)
75
76         new_paths[new_p] = data
77     return new_paths
78
79
80 FILEIDMAP_VERSION = 1
81
82 class FileIdMap(object):
83     """ File id store. 
84
85     Keeps a map
86
87     revnum -> branch -> path -> fileid
88     """
89     def __init__(self, repos, cache_transport):
90         self.repos = repos
91         self.idmap_knit = KnitVersionedFile("fileidmap-v%d" % FILEIDMAP_VERSION, cache_transport, create=True)
92
93     def save(self, revid, parent_revids, _map):
94         mutter('saving file id map for %r' % revid)
95                 
96         self.idmap_knit.add_lines_with_ghosts(revid, parent_revids, 
97                 ["%s\t%s\t%s\n" % (urllib.quote(filename), urllib.quote(_map[filename][0]), 
98                                         urllib.quote(_map[filename][1])) for filename in sorted(_map.keys())])
99
100     def load(self, revid):
101         map = {}
102         for line in self.idmap_knit.get_lines(revid):
103             (filename, id, create_revid) = line.rstrip("\n").split("\t", 3)
104             map[urllib.unquote(filename)] = (urllib.unquote(id), urllib.unquote(create_revid))
105             assert isinstance(map[urllib.unquote(filename)][0], str)
106
107         return map
108
109     def apply_changes(self, uuid, revnum, branch, global_changes, 
110                       renames, scheme, find_children=None):
111         """Change file id map to incorporate specified changes.
112
113         :param uuid: UUID of repository changes happen in
114         :param revnum: Revno for revision in which changes happened
115         :param branch: Branch path where changes happened
116         :param global_changes: Dict with global changes that happened
117         :param renames: List of renames
118         :param scheme: Branching scheme
119         """
120         changes = get_local_changes(global_changes, scheme,
121                     self.repos.generate_revision_id, find_children)
122         if find_children is not None:
123             def get_children(path, revid):
124                 (bp, revnum, scheme) = self.repos.lookup_revision_id(revid)
125                 for p in find_children(bp+"/"+path, revnum):
126                     yield scheme.unprefix(p)[1]
127         else:
128             get_children = None
129
130         revid = self.repos.generate_revision_id(revnum, branch, str(scheme))
131
132         def new_file_id(x):
133             if renames.has_key(x):
134                 return renames[x]
135             return generate_file_id(self.repos, revid, x)
136          
137         return self._apply_changes(new_file_id, changes, get_children)
138
139     def get_map(self, uuid, revnum, branch, renames_cb, scheme):
140         """Make sure the map is up to date until revnum."""
141         # First, find the last cached map
142         todo = []
143         next_parent_revs = []
144         if revnum == 0:
145             assert branch == ""
146             return {"": (generate_svn_file_id(uuid, revnum, branch, ""), 
147               self.repos.generate_revision_id(revnum, branch, str(scheme)))}
148
149         # No history -> empty map
150         for (bp, paths, rev) in self.repos.follow_branch_history(branch, 
151                                              revnum, scheme):
152             revid = self.repos.generate_revision_id(rev, bp.encode("utf-8"), 
153                                                     str(scheme))
154             try:
155                 map = self.load(revid)
156                 # found the nearest cached map
157                 next_parent_revs = [revid]
158                 break
159             except RevisionNotPresent:
160                 todo.append((revid, paths))
161    
162         # target revision was present
163         if len(todo) == 0:
164             return map
165
166         if len(next_parent_revs) == 0:
167             if scheme.is_branch(""):
168                 map = {"": (generate_svn_file_id(uuid, 0, "", ""), NULL_REVISION)}
169             else:
170                 map = {}
171
172         pb = ui.ui_factory.nested_progress_bar()
173
174         try:
175             i = 1
176             for (revid, global_changes) in reversed(todo):
177                 expensive = False
178                 def log_find_children(path, revnum):
179                     expensive = True
180                     return self.repos._log.find_children(path, revnum)
181                 changes = get_local_changes(global_changes, scheme,
182                                             self.repos.generate_revision_id, 
183                                             log_find_children)
184                 pb.update('generating file id map', i, len(todo))
185
186                 def find_children(path, revid):
187                     (bp, revnum, scheme) = self.repos.lookup_revision_id(revid)
188                     for p in log_find_children(bp+"/"+path, revnum):
189                         yield scheme.unprefix(p)[1]
190
191                 parent_revs = next_parent_revs
192
193                 renames = renames_cb(revid)
194
195                 def new_file_id(x):
196                     if renames.has_key(x):
197                         return renames[x]
198                     return generate_file_id(self.repos, revid, x)
199                 
200                 revmap = self._apply_changes(new_file_id, changes, find_children)
201                 for p in changes:
202                     if changes[p][0] == 'M' and not revmap.has_key(p):
203                         revmap[p] = map[p][0]
204
205                 map.update(dict([(x, (str(revmap[x]), revid)) for x in revmap]))
206
207                 # Mark all parent paths as changed
208                 for p in revmap:
209                     parts = p.split("/")
210                     for j in range(1, len(parts)+1):
211                         parent = "/".join(parts[0:len(parts)-j])
212                         assert map.has_key(parent), "Parent item %s of %s doesn't exist in map" % (parent, p)
213                         if map[parent][1] == revid:
214                             break
215                         map[parent] = map[parent][0], revid
216                         
217                 saved = False
218                 if i % 500 == 0 or expensive:
219                     self.save(revid, parent_revs, map)
220                     saved = True
221                 next_parent_revs = [revid]
222                 i += 1
223         finally:
224             pb.finished()
225         if not saved:
226             self.save(revid, parent_revs, map)
227         return map
228
229
230 class SimpleFileIdMap(FileIdMap):
231     @staticmethod
232     def _apply_changes(new_file_id, changes, find_children=None):
233         map = {}
234         for p in sorted(changes.keys()):
235             data = changes[p]
236
237             if data[0] in ('A', 'R'):
238                 map[p] = new_file_id(p)
239
240                 if data[1] is not None:
241                     mutter('%r copied from %r:%s' % (p, data[1], data[2]))
242                     if find_children is not None:
243                         for c in find_children(data[1], data[2]):
244                             path = c.replace(data[1], p+"/", 1).replace("//", "/")
245                             map[path] = new_file_id(path)
246                             mutter('added mapping %r -> %r' % (path, map[path]))
247
248         return map