Implement ListBranchingScheme.is_branch_parent().
[jelmer/subvertpy.git] / fetch.py
1 # Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
2
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 """Fetching revisions from Subversion repositories in batches."""
17
18 import bzrlib
19 from bzrlib import osutils, ui, urlutils
20 from bzrlib.inventory import Inventory
21 from bzrlib.revision import Revision, NULL_REVISION
22 from bzrlib.repository import InterRepository
23 from bzrlib.trace import mutter
24
25 from copy import copy
26 from cStringIO import StringIO
27 import md5
28
29 from svn.core import Pool
30 import svn.core
31
32 from fileids import generate_file_id
33 from repository import (SvnRepository, SVN_PROP_BZR_ANCESTRY, 
34                 SVN_PROP_SVK_MERGE, SVN_PROP_BZR_MERGE,
35                 SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO, 
36                 SVN_PROP_BZR_BRANCHING_SCHEME, SVN_PROP_BZR_REVISION_ID,
37                 SVN_PROP_BZR_FILEIDS, SvnRepositoryFormat, 
38                 parse_revision_metadata, parse_merge_property)
39 from tree import apply_txdelta_handler
40
41
42 def md5_strings(strings):
43     """Return the MD5sum of the concatenation of strings.
44
45     :param strings: Strings to find the MD5sum of.
46     :return: MD5sum
47     """
48     s = md5.new()
49     map(s.update, strings)
50     return s.hexdigest()
51
52
53 class RevisionBuildEditor(svn.delta.Editor):
54     """Implementation of the Subversion commit editor interface that builds a 
55     Bazaar revision.
56     """
57     def __init__(self, source, target, branch_path, prev_inventory, revid, 
58                  svn_revprops, id_map, scheme):
59         self.branch_path = branch_path
60         self.old_inventory = prev_inventory
61         self.inventory = copy(prev_inventory)
62         self.revid = revid
63         self.id_map = id_map
64         self.scheme = scheme
65         self.source = source
66         self.target = target
67         self.transact = target.get_transaction()
68         self.weave_store = target.weave_store
69         self.dir_baserev = {}
70         self._bzr_merges = []
71         self._svk_merges = []
72         self._revinfo = None
73         self._svn_revprops = svn_revprops
74         self._premature_deletes = set()
75         self.pool = Pool()
76
77     def _get_revision(self, revid):
78         """Creates the revision object.
79
80         :param revid: Revision id of the revision to create.
81         """
82         parent_ids = self.source.revision_parents(revid, self._bzr_merges)
83
84         # Commit SVN revision properties to a Revision object
85         rev = Revision(revision_id=revid, parent_ids=parent_ids)
86
87         if self._svn_revprops[2] is not None:
88             rev.timestamp = 1.0 * svn.core.secs_from_timestr(
89                 self._svn_revprops[2], None) #date
90         else:
91             rev.timestamp = 0 # FIXME: Obtain repository creation time
92         rev.timezone = None
93
94         rev.committer = self._svn_revprops[0] # author
95         if rev.committer is None:
96             rev.committer = ""
97         rev.message = self._svn_revprops[1] # message
98
99         if self._revinfo:
100             parse_revision_metadata(self._revinfo, rev)
101
102         return rev
103
104     def open_root(self, base_revnum, baton):
105         if self.old_inventory.root is None:
106             # First time the root is set
107             file_id = generate_file_id(self.source, self.revid, "")
108             self.dir_baserev[file_id] = []
109         else:
110             assert self.old_inventory.root.revision is not None
111             if self.id_map.has_key(""):
112                 file_id = self.id_map[""]
113             else:
114                 file_id = self.old_inventory.root.file_id
115             self.dir_baserev[file_id] = [self.old_inventory.root.revision]
116
117         if self.inventory.root is not None and \
118                 file_id == self.inventory.root.file_id:
119             ie = self.inventory.root
120         else:
121             ie = self.inventory.add_path("", 'directory', file_id)
122         ie.revision = self.revid
123         return file_id
124
125     def _get_existing_id(self, parent_id, path):
126         if self.id_map.has_key(path):
127             return self.id_map[path]
128         return self._get_old_id(parent_id, path)
129
130     def _get_old_id(self, parent_id, old_path):
131         return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
132
133     def _get_new_id(self, parent_id, new_path):
134         if self.id_map.has_key(new_path):
135             return self.id_map[new_path]
136         return generate_file_id(self.source, self.revid, new_path)
137
138     def delete_entry(self, path, revnum, parent_id, pool):
139         path = path.decode("utf-8")
140         if path in self._premature_deletes:
141             self._premature_deletes.remove(path)
142         else:
143             self.inventory.remove_recursive_id(self._get_old_id(parent_id, path))
144
145     def close_directory(self, id):
146         self.inventory[id].revision = self.revid
147
148         # Only record root if the target repository supports it
149         if self.target.supports_rich_root:
150             file_weave = self.weave_store.get_weave_or_empty(id, self.transact)
151             if not file_weave.has_version(self.revid):
152                 file_weave.add_lines(self.revid, self.dir_baserev[id], [])
153
154     def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum, 
155                       pool):
156         path = path.decode("utf-8")
157         file_id = self._get_new_id(parent_id, path)
158
159         self.dir_baserev[file_id] = []
160         if file_id in self.inventory:
161             # This directory was moved here from somewhere else, but the 
162             # other location hasn't been removed yet. 
163             if copyfrom_path is None:
164                 # FIXME: This should never happen!
165                 copyfrom_path = self.old_inventory.id2path(file_id)
166             assert copyfrom_path == self.old_inventory.id2path(file_id)
167             assert copyfrom_path not in self._premature_deletes
168             self._premature_deletes.add(copyfrom_path)
169             self.inventory.rename(file_id, parent_id, urlutils.basename(path))
170             ie = self.inventory[file_id]
171         else:
172             ie = self.inventory.add_path(path, 'directory', file_id)
173         ie.revision = self.revid
174
175         return file_id
176
177     def open_directory(self, path, parent_id, base_revnum, pool):
178         assert base_revnum >= 0
179         base_file_id = self._get_old_id(parent_id, path)
180         base_revid = self.old_inventory[base_file_id].revision
181         file_id = self._get_existing_id(parent_id, path)
182         if file_id == base_file_id:
183             self.dir_baserev[file_id] = [base_revid]
184             ie = self.inventory[file_id]
185         else:
186             # Replace if original was inside this branch
187             # change id of base_file_id to file_id
188             ie = self.inventory[base_file_id]
189             for name in ie.children:
190                 ie.children[name].parent_id = file_id
191             # FIXME: Don't touch inventory internals
192             del self.inventory._byid[base_file_id]
193             self.inventory._byid[file_id] = ie
194             ie.file_id = file_id
195             self.dir_baserev[file_id] = []
196         ie.revision = self.revid
197         return file_id
198
199     def change_dir_prop(self, id, name, value, pool):
200         if name == SVN_PROP_BZR_BRANCHING_SCHEME:
201             if id != self.inventory.root.file_id:
202                 mutter('rogue %r on non-root directory' % name)
203                 return
204         elif name == SVN_PROP_BZR_ANCESTRY+str(self.scheme):
205             if id != self.inventory.root.file_id:
206                 mutter('rogue %r on non-root directory' % name)
207                 return
208             
209             self._bzr_merges = parse_merge_property(value.splitlines()[-1])
210         elif (name.startswith(SVN_PROP_BZR_ANCESTRY) or 
211               name.startswith(SVN_PROP_BZR_REVISION_ID)):
212             pass
213         elif name == SVN_PROP_SVK_MERGE:
214             self._svk_merges = None # Force Repository.revision_parents() to look it up
215         elif name == SVN_PROP_BZR_REVISION_INFO:
216             if id != self.inventory.root.file_id:
217                 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
218                 return
219  
220             self._revinfo = value
221         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
222                       svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
223                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
224                       svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
225                       svn.core.SVN_PROP_ENTRY_UUID,
226                       svn.core.SVN_PROP_EXECUTABLE):
227             pass
228         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
229             pass
230         elif name in (SVN_PROP_BZR_MERGE, SVN_PROP_BZR_FILEIDS):
231             pass
232         elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
233               name.startswith(SVN_PROP_BZR_PREFIX)):
234             mutter('unsupported dir property %r' % name)
235
236     def change_file_prop(self, id, name, value, pool):
237         if name == svn.core.SVN_PROP_EXECUTABLE: 
238             # You'd expect executable to match 
239             # svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not 
240             # how SVN behaves. It appears to consider the presence 
241             # of the property sufficient to mark it executable.
242             self.is_executable = (value != None)
243         elif (name == svn.core.SVN_PROP_SPECIAL):
244             self.is_symlink = (value != None)
245         elif name == svn.core.SVN_PROP_ENTRY_COMMITTED_REV:
246             self.last_file_rev = int(value)
247         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
248                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
249                       svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
250                       svn.core.SVN_PROP_ENTRY_UUID,
251                       svn.core.SVN_PROP_MIME_TYPE):
252             pass
253         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
254             pass
255         elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
256               name.startswith(SVN_PROP_BZR_PREFIX)):
257             mutter('unsupported file property %r' % name)
258
259     def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
260         path = path.decode("utf-8")
261         self.is_symlink = False
262         self.is_executable = None
263         self.file_data = ""
264         self.file_parents = []
265         self.file_stream = None
266         self.file_id = self._get_new_id(parent_id, path)
267         if self.file_id in self.inventory:
268             # This file was moved here from somewhere else, but the 
269             # other location hasn't been removed yet. 
270             if copyfrom_path is None:
271                 # FIXME: This should never happen!
272                 copyfrom_path = self.old_inventory.id2path(self.file_id)
273             assert copyfrom_path == self.old_inventory.id2path(self.file_id)
274             assert copyfrom_path not in self._premature_deletes
275             self._premature_deletes.add(copyfrom_path)
276             self.inventory.rename(self.file_id, parent_id, urlutils.basename(path))
277         return path
278
279     def open_file(self, path, parent_id, base_revnum, pool):
280         base_file_id = self._get_old_id(parent_id, path)
281         base_revid = self.old_inventory[base_file_id].revision
282         self.file_id = self._get_existing_id(parent_id, path)
283         self.is_executable = None
284         self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
285         file_weave = self.weave_store.get_weave_or_empty(base_file_id, 
286                                                          self.transact)
287         self.file_data = file_weave.get_text(base_revid)
288         self.file_stream = None
289         if self.file_id == base_file_id:
290             self.file_parents = [base_revid]
291         else:
292             # Replace
293             del self.inventory[base_file_id]
294             self.file_parents = []
295         return path
296
297     def close_file(self, path, checksum):
298         if self.file_stream is not None:
299             self.file_stream.seek(0)
300             lines = osutils.split_lines(self.file_stream.read())
301         else:
302             # Data didn't change or file is new
303             lines = osutils.split_lines(self.file_data)
304
305         actual_checksum = md5_strings(lines)
306         assert checksum is None or checksum == actual_checksum
307
308         file_weave = self.weave_store.get_weave_or_empty(self.file_id, 
309                                                          self.transact)
310         if not file_weave.has_version(self.revid):
311             file_weave.add_lines(self.revid, self.file_parents, lines)
312
313         if self.file_id in self.inventory:
314             ie = self.inventory[self.file_id]
315         elif self.is_symlink:
316             ie = self.inventory.add_path(path, 'symlink', self.file_id)
317         else:
318             ie = self.inventory.add_path(path, 'file', self.file_id)
319         ie.revision = self.revid
320
321         if self.is_symlink:
322             ie.symlink_target = lines[0][len("link "):]
323             ie.text_sha1 = None
324             ie.text_size = None
325             ie.text_id = None
326         else:
327             ie.text_sha1 = osutils.sha_strings(lines)
328             ie.text_size = sum(map(len, lines))
329             if self.is_executable is not None:
330                 ie.executable = self.is_executable
331
332         self.file_stream = None
333
334     def close_edit(self):
335         assert len(self._premature_deletes) == 0
336         rev = self._get_revision(self.revid)
337         self.inventory.revision_id = self.revid
338         rev.inventory_sha1 = osutils.sha_string(
339             bzrlib.xml5.serializer_v5.write_inventory_to_string(
340                 self.inventory))
341         self.target.start_write_group()
342         try:
343             self.target.add_revision(self.revid, rev, self.inventory)
344             self.target.commit_write_group()
345         except:
346             self.target.abort_write_group()
347         self.pool.destroy()
348
349     def abort_edit(self):
350         pass
351
352     def apply_textdelta(self, file_id, base_checksum):
353         actual_checksum = md5.new(self.file_data).hexdigest(),
354         assert (base_checksum is None or base_checksum == actual_checksum,
355             "base checksum mismatch: %r != %r" % (base_checksum, 
356                                                   actual_checksum))
357         self.file_stream = StringIO()
358         return apply_txdelta_handler(StringIO(self.file_data), 
359                                      self.file_stream, self.pool)
360
361
362 class InterFromSvnRepository(InterRepository):
363     """Svn to any repository actions."""
364
365     _matching_repo_format = SvnRepositoryFormat()
366
367     @staticmethod
368     def _get_repo_format_to_test():
369         return None
370
371     def _find_all(self):
372         parents = {}
373         needed = filter(lambda x: not self.target.has_revision(x), 
374                         self.source.all_revision_ids())
375         for revid in needed:
376             (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
377             parents[revid] = self.source._mainline_revision_parent(branch, 
378                                                revnum, scheme)
379         return (needed, parents)
380
381     def _find_until(self, revision_id):
382         """Find all missing revisions until revision_id
383
384         :param revision_id: Stop revision
385         :return: Tuple with revisions missing and a dictionary with 
386             parents for those revision.
387         """
388         needed = []
389         parents = {}
390         (path, until_revnum, scheme) = self.source.lookup_revision_id(
391                                                                     revision_id)
392
393         prev_revid = None
394         for (branch, revnum) in self.source.follow_branch(path, 
395                                                           until_revnum, scheme):
396             revid = self.source.generate_revision_id(revnum, branch, str(scheme))
397
398             if prev_revid is not None:
399                 parents[prev_revid] = revid
400
401             prev_revid = revid
402
403             if not self.target.has_revision(revid):
404                 needed.append(revid)
405
406         parents[prev_revid] = None
407         return (needed, parents)
408
409     def copy_content(self, revision_id=None, pb=None):
410         """See InterRepository.copy_content."""
411         if revision_id == NULL_REVISION:
412             return
413         # Dictionary with paths as keys, revnums as values
414
415         # Loop over all the revnums until revision_id
416         # (or youngest_revnum) and call self.target.add_revision() 
417         # or self.target.add_inventory() each time
418         self.target.lock_read()
419         try:
420             if revision_id is None:
421                 (needed, parents) = self._find_all()
422             else:
423                 (needed, parents) = self._find_until(revision_id)
424         finally:
425             self.target.unlock()
426
427         if len(needed) == 0:
428             # Nothing to fetch
429             return
430
431         repos_root = self.source.transport.get_repos_root()
432
433         prev_revid = None
434         transport = self.source.transport
435         self.target.lock_write()
436         if pb is None:
437             pb = ui.ui_factory.nested_progress_bar()
438             nested_pb = pb
439         else:
440             nested_pb = None
441         num = 0
442         prev_inv = None
443         try:
444             for revid in reversed(needed):
445                 (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
446                 pb.update('copying revision', num, len(needed))
447
448                 parent_revid = parents[revid]
449
450                 if parent_revid is None:
451                     parent_inv = Inventory(root_id=None)
452                 elif prev_revid != parent_revid:
453                     parent_inv = self.target.get_inventory(parent_revid)
454                 else:
455                     assert prev_inv is not None
456                     parent_inv = prev_inv
457
458                 changes = self.source._log.get_revision_paths(revnum, branch)
459                 renames = self.source.revision_fileid_renames(revid)
460                 id_map = self.source.transform_fileid_map(self.source.uuid, 
461                                       revnum, branch, changes, renames, scheme)
462
463                 editor = RevisionBuildEditor(self.source, self.target, branch, 
464                              parent_inv, revid, 
465                              self.source._log.get_revision_info(revnum),
466                              id_map, scheme)
467
468                 pool = Pool()
469                 edit, edit_baton = svn.delta.make_editor(editor, pool)
470
471                 if parent_revid is None:
472                     branch_url = urlutils.join(repos_root, branch)
473                     transport.reparent(branch_url)
474                     assert transport.svn_url == branch_url.rstrip("/"), \
475                         "Expected %r, got %r" % (transport.svn_url, branch_url)
476                     reporter = transport.do_update(
477                                    revnum, True, edit, edit_baton, pool)
478
479                     # Report status of existing paths
480                     reporter.set_path("", revnum, True, None, pool)
481                 else:
482                     (parent_branch, parent_revnum, scheme) = \
483                             self.source.lookup_revision_id(parent_revid)
484                     transport.reparent(urlutils.join(repos_root, parent_branch))
485
486                     if parent_branch != branch:
487                         reporter = transport.do_switch(
488                                    revnum, True, 
489                                    urlutils.join(repos_root, branch), 
490                                    edit, edit_baton, pool)
491                     else:
492                         reporter = transport.do_update(
493                                    revnum, True, edit, edit_baton, pool)
494
495                     # Report status of existing paths
496                     reporter.set_path("", parent_revnum, False, None, pool)
497
498                 lock = transport.lock_read(".")
499                 reporter.finish_report(pool)
500                 lock.unlock()
501
502                 prev_inv = editor.inventory
503                 prev_revid = revid
504                 pool.destroy()
505                 num += 1
506         finally:
507             self.target.unlock()
508             if nested_pb is not None:
509                 nested_pb.finished()
510         self.source.transport.reparent_root()
511
512     def fetch(self, revision_id=None, pb=None):
513         """Fetch revisions. """
514         self.copy_content(revision_id=revision_id, pb=pb)
515
516     @staticmethod
517     def is_compatible(source, target):
518         """Be compatible with SvnRepository."""
519         # FIXME: Also check target uses VersionedFile
520         return isinstance(source, SvnRepository) and \
521                 target.supports_rich_root()
522