Fix bug in revid caching.
[jelmer/subvertpy.git] / fetch.py
1 # Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
2
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 """Fetching revisions from Subversion repositories in batches."""
17
18 import bzrlib
19 from bzrlib.inventory import Inventory
20 import bzrlib.osutils as osutils
21 from bzrlib.revision import Revision
22 from bzrlib.repository import InterRepository
23 from bzrlib.trace import mutter
24 import bzrlib.ui as ui
25
26 from copy import copy
27 from cStringIO import StringIO
28 import md5
29 import os
30
31 from svn.core import Pool
32 import svn.core
33
34 from fileids import generate_file_id
35 from repository import (SvnRepository, SVN_PROP_BZR_MERGE, SVN_PROP_SVK_MERGE,
36                 SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO, 
37                 SvnRepositoryFormat, parse_revision_metadata)
38 from tree import apply_txdelta_handler
39
40
41 def md5_strings(strings):
42     s = md5.new()
43     map(s.update, strings)
44     return s.hexdigest()
45
46
47 class RevisionBuildEditor(svn.delta.Editor):
48     """Implementation of the Subversion commit editor interface that builds a 
49     Bazaar revision.
50     """
51     def __init__(self, source, target, branch_path, prev_inventory, revid, 
52                  svn_revprops, id_map):
53         self.branch_path = branch_path
54         self.old_inventory = prev_inventory
55         self.inventory = copy(prev_inventory)
56         self.revid = revid
57         self.id_map = id_map
58         self.source = source
59         self.target = target
60         self.transact = target.get_transaction()
61         self.weave_store = target.weave_store
62         self.dir_baserev = {}
63         self._parent_ids = None
64         self._revinfo = None
65         self._svn_revprops = svn_revprops
66         self.pool = Pool()
67
68     def _get_revision(self, revid):
69         """Creates the revision object.
70
71         :param revid: Revision id of the revision to create.
72         """
73         if self._parent_ids is None:
74             self._parent_ids = ""
75
76         parent_ids = self.source.revision_parents(revid, self._parent_ids)
77
78         # Commit SVN revision properties to a Revision object
79         rev = Revision(revision_id=revid, parent_ids=parent_ids)
80
81         if self._svn_revprops[2] is not None:
82             rev.timestamp = 1.0 * svn.core.secs_from_timestr(
83                 self._svn_revprops[2], None) #date
84         else:
85             rev.timestamp = 0 # FIXME: Obtain repository creation time
86         rev.timezone = None
87
88         rev.committer = self._svn_revprops[0] # author
89         if rev.committer is None:
90             rev.committer = ""
91         rev.message = self._svn_revprops[1] # message
92
93         if self._revinfo:
94             parse_revision_metadata(self._revinfo, rev)
95
96         return rev
97
98     def open_root(self, base_revnum, baton):
99         if self.old_inventory.root is None:
100             # First time the root is set
101             file_id = generate_file_id(self.source, self.revid, "")
102             self.dir_baserev[file_id] = []
103         else:
104             assert self.old_inventory.root.revision is not None
105             if self.id_map.has_key(""):
106                 file_id = self.id_map[""]
107             else:
108                 file_id = self.old_inventory.root.file_id
109             self.dir_baserev[file_id] = [self.old_inventory.root.revision]
110
111         if self.inventory.root is not None and \
112                 file_id == self.inventory.root.file_id:
113             ie = self.inventory.root
114         else:
115             ie = self.inventory.add_path("", 'directory', file_id)
116         ie.revision = self.revid
117         return file_id
118
119     def _get_existing_id(self, parent_id, path):
120         if self.id_map.has_key(path):
121             return self.id_map[path]
122         return self._get_old_id(parent_id, path)
123
124     def _get_old_id(self, parent_id, old_path):
125         return self.old_inventory[parent_id].children[os.path.basename(old_path)].file_id
126
127     def _get_new_id(self, parent_id, new_path):
128         if self.id_map.has_key(new_path):
129             return self.id_map[new_path]
130         return generate_file_id(self.source, self.revid, new_path)
131
132     def delete_entry(self, path, revnum, parent_id, pool):
133         path = path.decode("utf-8")
134         del self.inventory[self._get_old_id(parent_id, path)]
135
136     def close_directory(self, id):
137         self.inventory[id].revision = self.revid
138
139         file_weave = self.weave_store.get_weave_or_empty(id, self.transact)
140         if not file_weave.has_version(self.revid):
141             file_weave.add_lines(self.revid, self.dir_baserev[id], [])
142
143     def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum, 
144                       pool):
145         path = path.decode("utf-8")
146         file_id = self._get_new_id(parent_id, path)
147
148         self.dir_baserev[file_id] = []
149         ie = self.inventory.add_path(path, 'directory', file_id)
150         ie.revision = self.revid
151
152         return file_id
153
154     def open_directory(self, path, parent_id, base_revnum, pool):
155         assert base_revnum >= 0
156         base_file_id = self._get_old_id(parent_id, path)
157         base_revid = self.old_inventory[base_file_id].revision
158         file_id = self._get_existing_id(parent_id, path)
159         if file_id == base_file_id:
160             self.dir_baserev[file_id] = [base_revid]
161             ie = self.inventory[file_id]
162         else:
163             # Replace if original was inside this branch
164             # change id of base_file_id to file_id
165             ie = self.inventory[base_file_id]
166             for name in ie.children:
167                 ie.children[name].parent_id = file_id
168             # FIXME: Don't touch inventory internals
169             del self.inventory._byid[base_file_id]
170             self.inventory._byid[file_id] = ie
171             ie.file_id = file_id
172             self.dir_baserev[file_id] = []
173         ie.revision = self.revid
174         return file_id
175
176     def change_dir_prop(self, id, name, value, pool):
177         if name == SVN_PROP_BZR_MERGE:
178             if id != self.inventory.root.file_id:
179                 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_MERGE)
180                 return
181             
182             self._parent_ids = value.splitlines()[-1]
183         elif name == SVN_PROP_SVK_MERGE:
184             if self._parent_ids is None:
185                 # Only set parents using svk:merge if no 
186                 # bzr:merge set.
187                 pass # FIXME 
188         elif name == SVN_PROP_BZR_REVISION_INFO:
189             if id != self.inventory.root.file_id:
190                 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
191                 return
192  
193             self._revinfo = value
194         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
195                       svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
196                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
197                       svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
198                       svn.core.SVN_PROP_ENTRY_UUID,
199                       svn.core.SVN_PROP_EXECUTABLE):
200             pass
201         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
202             pass
203         elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
204               name.startswith(SVN_PROP_BZR_PREFIX)):
205             mutter('unsupported file property %r' % name)
206
207     def change_file_prop(self, id, name, value, pool):
208         if name == svn.core.SVN_PROP_EXECUTABLE: 
209             # You'd expect executable to match 
210             # svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not 
211             # how SVN behaves. It appears to consider the presence 
212             # of the property sufficient to mark it executable.
213             self.is_executable = (value != None)
214         elif (name == svn.core.SVN_PROP_SPECIAL):
215             self.is_symlink = (value != None)
216         elif name == svn.core.SVN_PROP_ENTRY_COMMITTED_REV:
217             self.last_file_rev = int(value)
218         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
219                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
220                       svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
221                       svn.core.SVN_PROP_ENTRY_UUID,
222                       svn.core.SVN_PROP_MIME_TYPE):
223             pass
224         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
225             pass
226         elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
227               name.startswith(SVN_PROP_BZR_PREFIX)):
228             mutter('unsupported file property %r' % name)
229
230     def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
231         path = path.decode("utf-8")
232         self.is_symlink = False
233         self.is_executable = None
234         self.file_data = ""
235         self.file_parents = []
236         self.file_stream = None
237         self.file_id = self._get_new_id(parent_id, path)
238         return path
239
240     def open_file(self, path, parent_id, base_revnum, pool):
241         base_file_id = self._get_old_id(parent_id, path)
242         base_revid = self.old_inventory[base_file_id].revision
243         self.file_id = self._get_existing_id(parent_id, path)
244         self.is_executable = None
245         self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
246         file_weave = self.weave_store.get_weave_or_empty(base_file_id, 
247                                                          self.transact)
248         self.file_data = file_weave.get_text(base_revid)
249         self.file_stream = None
250         if self.file_id == base_file_id:
251             self.file_parents = [base_revid]
252         else:
253             # Replace
254             del self.inventory[base_file_id]
255             self.file_parents = []
256         return path
257
258     def close_file(self, path, checksum):
259         if self.file_stream is not None:
260             self.file_stream.seek(0)
261             lines = osutils.split_lines(self.file_stream.read())
262         else:
263             # Data didn't change or file is new
264             lines = osutils.split_lines(self.file_data)
265
266         actual_checksum = md5_strings(lines)
267         assert checksum is None or checksum == actual_checksum
268
269         file_weave = self.weave_store.get_weave_or_empty(self.file_id, 
270                                                          self.transact)
271         if not file_weave.has_version(self.revid):
272             file_weave.add_lines(self.revid, self.file_parents, lines)
273
274         if self.file_id in self.inventory:
275             ie = self.inventory[self.file_id]
276         elif self.is_symlink:
277             ie = self.inventory.add_path(path, 'symlink', self.file_id)
278         else:
279             ie = self.inventory.add_path(path, 'file', self.file_id)
280         ie.revision = self.revid
281
282         if self.is_symlink:
283             ie.symlink_target = lines[0][len("link "):]
284             ie.text_sha1 = None
285             ie.text_size = None
286             ie.text_id = None
287         else:
288             ie.text_sha1 = osutils.sha_strings(lines)
289             ie.text_size = sum(map(len, lines))
290             if self.is_executable is not None:
291                 ie.executable = self.is_executable
292
293         self.file_stream = None
294
295     def close_edit(self):
296         rev = self._get_revision(self.revid)
297         self.inventory.revision_id = self.revid
298         rev.inventory_sha1 = osutils.sha_string(
299             bzrlib.xml5.serializer_v5.write_inventory_to_string(
300                 self.inventory))
301         self.target.add_revision(self.revid, rev, self.inventory)
302         self.pool.destroy()
303
304     def abort_edit(self):
305         pass
306
307     def apply_textdelta(self, file_id, base_checksum):
308         actual_checksum = md5.new(self.file_data).hexdigest(),
309         assert (base_checksum is None or base_checksum == actual_checksum,
310             "base checksum mismatch: %r != %r" % (base_checksum, 
311                                                   actual_checksum))
312         self.file_stream = StringIO()
313         return apply_txdelta_handler(StringIO(self.file_data), 
314                                      self.file_stream, self.pool)
315
316
317 class InterFromSvnRepository(InterRepository):
318     """Svn to any repository actions."""
319
320     _matching_repo_format = SvnRepositoryFormat()
321
322     @staticmethod
323     def _get_repo_format_to_test():
324         return None
325
326     def _find_all(self):
327         needed = []
328         parents = {}
329         for (branch, revnum) in self.source.follow_history(
330                                                 self.source._latest_revnum):
331             mutter('br, revnum: %r, %r' % (branch, revnum))
332             revid = self.source.generate_revision_id(revnum, branch)
333             parents[revid] = self.source._mainline_revision_parent(branch, 
334                                                                    revnum)
335
336             if not self.target.has_revision(revid):
337                 needed.append(revid)
338         return (needed, parents)
339
340     def _find_until(self, revision_id):
341         needed = []
342         parents = {}
343         (path, until_revnum) = self.source.lookup_revision_id(revision_id)
344
345         prev_revid = None
346         for (branch, revnum) in self.source.follow_branch(path, 
347                                                           until_revnum):
348             revid = self.source.generate_revision_id(revnum, branch)
349
350             if prev_revid is not None:
351                 parents[prev_revid] = revid
352
353             prev_revid = revid
354
355             if not self.target.has_revision(revid):
356                 needed.append(revid)
357
358         parents[prev_revid] = None
359         return (needed, parents)
360
361     def copy_content(self, revision_id=None, basis=None, pb=None):
362         """See InterRepository.copy_content."""
363         # FIXME: Use basis
364         # Dictionary with paths as keys, revnums as values
365
366         # Loop over all the revnums until revision_id
367         # (or youngest_revnum) and call self.target.add_revision() 
368         # or self.target.add_inventory() each time
369         needed = []
370         parents = {}
371         self.target.lock_read()
372         try:
373             if revision_id is None:
374                 (needed, parents) = self._find_all()
375             else:
376                 (needed, parents) = self._find_until(revision_id)
377         finally:
378             self.target.unlock()
379
380         if len(needed) == 0:
381             # Nothing to fetch
382             return
383
384         repos_root = self.source.transport.get_repos_root()
385
386         needed.reverse()
387         prev_revid = None
388         transport = self.source.transport
389         self.target.lock_write()
390         if pb is None:
391             pb = ui.ui_factory.nested_progress_bar()
392             nested_pb = pb
393         else:
394             nested_pb = None
395         num = 0
396         prev_inv = None
397         try:
398             for revid in needed:
399                 (branch, revnum) = self.source.lookup_revision_id(revid)
400                 pb.update('copying revision', num, len(needed))
401
402                 parent_revid = parents[revid]
403
404                 if parent_revid is None:
405                     parent_inv = Inventory(root_id=None)
406                 elif prev_revid != parent_revid:
407                     parent_inv = self.target.get_inventory(parent_revid)
408                 else:
409                     assert prev_inv is not None
410                     parent_inv = prev_inv
411
412                 changes = self.source._log.get_revision_paths(revnum, branch)
413                 renames = self.source.revision_fileid_renames(revid)
414                 id_map = self.source.transform_fileid_map(self.source.uuid, 
415                                             revnum, branch, changes, renames)
416
417                 editor = RevisionBuildEditor(self.source, self.target, branch, 
418                              parent_inv, revid, 
419                              self.source._log.get_revision_info(revnum),
420                              id_map)
421
422                 pool = Pool()
423                 edit, edit_baton = svn.delta.make_editor(editor, pool)
424
425                 if parent_revid is None:
426                     transport.reparent("%s/%s" % (repos_root, branch))
427                     reporter = transport.do_update(
428                                    revnum, "", True, edit, edit_baton, pool)
429
430                     # Report status of existing paths
431                     reporter.set_path("", revnum, True, None, pool)
432                 else:
433                     (parent_branch, parent_revnum) = \
434                             self.source.lookup_revision_id(parent_revid)
435                     transport.reparent("%s/%s" % (repos_root, parent_branch))
436
437                     if parent_branch != branch:
438                         switch_url = "%s/%s" % (repos_root, branch)
439                         reporter = transport.do_switch(
440                                    revnum, "", True, 
441                                    switch_url, edit, edit_baton, pool)
442                     else:
443                         reporter = transport.do_update(
444                                    revnum, "", True, edit, edit_baton, pool)
445
446                     # Report status of existing paths
447                     reporter.set_path("", parent_revnum, False, None, pool)
448
449                 transport.lock()
450                 reporter.finish_report(pool)
451                 transport.unlock()
452
453                 prev_inv = editor.inventory
454                 prev_revid = revid
455                 pool.destroy()
456                 num += 1
457         finally:
458             self.target.unlock()
459             if nested_pb is not None:
460                 nested_pb.finished()
461         self.source.transport.reparent(repos_root)
462
463     def fetch(self, revision_id=None, pb=None):
464         """Fetch revisions. """
465         self.copy_content(revision_id=revision_id, pb=pb)
466
467     @staticmethod
468     def is_compatible(source, target):
469         """Be compatible with SvnRepository."""
470         mutter("Checking from %r %r" % (source, target))
471         # FIXME: Also check target uses VersionedFile
472         return isinstance(source, SvnRepository)
473