e02b50dd3f1a9c1895794eaa0d1ca73f7882140e
[jelmer/subvertpy.git] / fetch.py
1 # Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
2
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 """Fetching revisions from Subversion repositories in batches."""
17
18 import bzrlib
19 from bzrlib import osutils, ui, urlutils
20 from bzrlib.inventory import Inventory
21 from bzrlib.revision import Revision, NULL_REVISION
22 from bzrlib.repository import InterRepository
23 from bzrlib.trace import mutter
24
25 from cStringIO import StringIO
26 import md5
27
28 from svn.core import Pool
29 import svn.core
30
31 from fileids import generate_file_id
32 from repository import (SvnRepository, SVN_PROP_BZR_ANCESTRY, 
33                 SVN_PROP_SVK_MERGE, SVN_PROP_BZR_MERGE,
34                 SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO, 
35                 SVN_PROP_BZR_BRANCHING_SCHEME, SVN_PROP_BZR_REVISION_ID,
36                 SVN_PROP_BZR_FILEIDS, SvnRepositoryFormat, 
37                 parse_revision_metadata, parse_merge_property)
38 from tree import apply_txdelta_handler
39
40
41 def md5_strings(strings):
42     """Return the MD5sum of the concatenation of strings.
43
44     :param strings: Strings to find the MD5sum of.
45     :return: MD5sum
46     """
47     s = md5.new()
48     map(s.update, strings)
49     return s.hexdigest()
50
51
52 class RevisionBuildEditor(svn.delta.Editor):
53     """Implementation of the Subversion commit editor interface that builds a 
54     Bazaar revision.
55     """
56     def __init__(self, source, target):
57         self.target = target
58         self.source = source
59         self.transact = target.get_transaction()
60
61     def start_revision(self, revid, prev_inventory):
62         self.revid = revid
63         (self.branch_path, self.revnum, self.scheme) = self.source.lookup_revision_id(revid)
64         changes = self.source._log.get_revision_paths(self.revnum, self.branch_path)
65         renames = self.source.revision_fileid_renames(revid)
66         self.id_map = self.source.transform_fileid_map(self.source.uuid, 
67                               self.revnum, self.branch_path, changes, renames, 
68                               self.scheme)
69         self.dir_baserev = {}
70         self._revinfo = None
71         self._bzr_merges = []
72         self._svk_merges = []
73         self._premature_deletes = set()
74         self.pool = Pool()
75         self.old_inventory = prev_inventory
76         self.inventory = prev_inventory.copy()
77         self._start_revision()
78
79     def _get_parent_ids(self):
80         return self.source.revision_parents(self.revid, self._bzr_merges)
81
82     def _get_revision(self, revid):
83         """Creates the revision object.
84
85         :param revid: Revision id of the revision to create.
86         """
87
88         # Commit SVN revision properties to a Revision object
89         rev = Revision(revision_id=revid, parent_ids=self._get_parent_ids())
90
91         _svn_revprops = self.source._log.get_revision_info(self.revnum)
92         if _svn_revprops[2] is not None:
93             rev.timestamp = 1.0 * svn.core.secs_from_timestr(
94                 _svn_revprops[2], None) #date
95         else:
96             rev.timestamp = 0 # FIXME: Obtain repository creation time
97         rev.timezone = None
98
99         rev.committer = _svn_revprops[0] # author
100         if rev.committer is None:
101             rev.committer = ""
102         rev.message = _svn_revprops[1] # message
103
104         if self._revinfo:
105             parse_revision_metadata(self._revinfo, rev)
106
107         return rev
108
109     def open_root(self, base_revnum, baton):
110         if self.old_inventory.root is None:
111             # First time the root is set
112             file_id = generate_file_id(self.source, self.revid, "")
113             self.dir_baserev[file_id] = []
114         else:
115             assert self.old_inventory.root.revision is not None
116             if self.id_map.has_key(""):
117                 file_id = self.id_map[""]
118             else:
119                 file_id = self.old_inventory.root.file_id
120             self.dir_baserev[file_id] = [self.old_inventory.root.revision]
121
122         if self.inventory.root is not None and \
123                 file_id == self.inventory.root.file_id:
124             ie = self.inventory.root
125         else:
126             ie = self.inventory.add_path("", 'directory', file_id)
127         ie.revision = self.revid
128         return file_id
129
130     def _get_existing_id(self, parent_id, path):
131         if self.id_map.has_key(path):
132             return self.id_map[path]
133         return self._get_old_id(parent_id, path)
134
135     def _get_old_id(self, parent_id, old_path):
136         return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
137
138     def _get_new_id(self, parent_id, new_path):
139         if self.id_map.has_key(new_path):
140             return self.id_map[new_path]
141         return generate_file_id(self.source, self.revid, new_path)
142
143     def _rename(self, file_id, parent_id, path):
144         # Only rename if not right yet
145         if (self.inventory[file_id].parent_id == parent_id and 
146             self.inventory[file_id].name == urlutils.basename(path)):
147             return
148         self.inventory.rename(file_id, parent_id, urlutils.basename(path))
149
150     def delete_entry(self, path, revnum, parent_id, pool):
151         path = path.decode("utf-8")
152         if path in self._premature_deletes:
153             # Delete recursively
154             self._premature_deletes.remove(path)
155             for p in self._premature_deletes.copy():
156                 if p.startswith("%s/" % path):
157                     self._premature_deletes.remove(p)
158         else:
159             self.inventory.remove_recursive_id(self._get_old_id(parent_id, path))
160
161     def close_directory(self, id):
162         self.inventory[id].revision = self.revid
163
164         # Only record root if the target repository supports it
165         self._store_directory(id, self.dir_baserev[id])
166
167     def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum, 
168                       pool):
169         path = path.decode("utf-8")
170         file_id = self._get_new_id(parent_id, path)
171
172         self.dir_baserev[file_id] = []
173         if file_id in self.inventory:
174             # This directory was moved here from somewhere else, but the 
175             # other location hasn't been removed yet. 
176             if copyfrom_path is None:
177                 # This should ideally never happen!
178                 copyfrom_path = self.old_inventory.id2path(file_id)
179                 mutter('no copyfrom path set, assuming %r' % copyfrom_path)
180             assert copyfrom_path == self.old_inventory.id2path(file_id)
181             assert copyfrom_path not in self._premature_deletes
182             self._premature_deletes.add(copyfrom_path)
183             self._rename(file_id, parent_id, path)
184             ie = self.inventory[file_id]
185         else:
186             ie = self.inventory.add_path(path, 'directory', file_id)
187         ie.revision = self.revid
188
189         return file_id
190
191     def open_directory(self, path, parent_id, base_revnum, pool):
192         assert base_revnum >= 0
193         base_file_id = self._get_old_id(parent_id, path)
194         base_revid = self.old_inventory[base_file_id].revision
195         file_id = self._get_existing_id(parent_id, path)
196         if file_id == base_file_id:
197             self.dir_baserev[file_id] = [base_revid]
198             ie = self.inventory[file_id]
199         else:
200             # Replace if original was inside this branch
201             # change id of base_file_id to file_id
202             ie = self.inventory[base_file_id]
203             for name in ie.children:
204                 ie.children[name].parent_id = file_id
205             # FIXME: Don't touch inventory internals
206             del self.inventory._byid[base_file_id]
207             self.inventory._byid[file_id] = ie
208             ie.file_id = file_id
209             self.dir_baserev[file_id] = []
210         ie.revision = self.revid
211         return file_id
212
213     def change_dir_prop(self, id, name, value, pool):
214         if name == SVN_PROP_BZR_BRANCHING_SCHEME:
215             if id != self.inventory.root.file_id:
216                 mutter('rogue %r on non-root directory' % name)
217                 return
218         elif name == SVN_PROP_BZR_ANCESTRY+str(self.scheme):
219             if id != self.inventory.root.file_id:
220                 mutter('rogue %r on non-root directory' % name)
221                 return
222             
223             self._bzr_merges = parse_merge_property(value.splitlines()[-1])
224         elif (name.startswith(SVN_PROP_BZR_ANCESTRY) or 
225               name.startswith(SVN_PROP_BZR_REVISION_ID)):
226             pass
227         elif name == SVN_PROP_SVK_MERGE:
228             self._svk_merges = None # Force Repository.revision_parents() to look it up
229         elif name == SVN_PROP_BZR_REVISION_INFO:
230             if id != self.inventory.root.file_id:
231                 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
232                 return
233  
234             self._revinfo = value
235         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
236                       svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
237                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
238                       svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
239                       svn.core.SVN_PROP_ENTRY_UUID,
240                       svn.core.SVN_PROP_EXECUTABLE):
241             pass
242         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
243             pass
244         elif name in (SVN_PROP_BZR_MERGE, SVN_PROP_BZR_FILEIDS):
245             pass
246         elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
247               name.startswith(SVN_PROP_BZR_PREFIX)):
248             mutter('unsupported dir property %r' % name)
249
250     def change_file_prop(self, id, name, value, pool):
251         if name == svn.core.SVN_PROP_EXECUTABLE: 
252             # You'd expect executable to match 
253             # svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not 
254             # how SVN behaves. It appears to consider the presence 
255             # of the property sufficient to mark it executable.
256             self.is_executable = (value != None)
257         elif (name == svn.core.SVN_PROP_SPECIAL):
258             self.is_symlink = (value != None)
259         elif name == svn.core.SVN_PROP_ENTRY_COMMITTED_REV:
260             self.last_file_rev = int(value)
261         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
262                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
263                       svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
264                       svn.core.SVN_PROP_ENTRY_UUID,
265                       svn.core.SVN_PROP_MIME_TYPE):
266             pass
267         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
268             pass
269         elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
270               name.startswith(SVN_PROP_BZR_PREFIX)):
271             mutter('unsupported file property %r' % name)
272
273     def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
274         path = path.decode("utf-8")
275         self.is_symlink = False
276         self.is_executable = None
277         self.file_data = ""
278         self.file_parents = []
279         self.file_stream = None
280         self.file_id = self._get_new_id(parent_id, path)
281         if self.file_id in self.inventory:
282             # This file was moved here from somewhere else, but the 
283             # other location hasn't been removed yet. 
284             if copyfrom_path is None:
285                 # This should ideally never happen
286                 copyfrom_path = self.old_inventory.id2path(self.file_id)
287                 mutter('no copyfrom path set, assuming %r' % copyfrom_path)
288             assert copyfrom_path == self.old_inventory.id2path(self.file_id)
289             assert copyfrom_path not in self._premature_deletes
290             self._premature_deletes.add(copyfrom_path)
291             # No need to rename if it's already in the right spot
292             self._rename(self.file_id, parent_id, path)
293         return path
294
295     def open_file(self, path, parent_id, base_revnum, pool):
296         base_file_id = self._get_old_id(parent_id, path)
297         base_revid = self.old_inventory[base_file_id].revision
298         self.file_id = self._get_existing_id(parent_id, path)
299         self.is_executable = None
300         self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
301         self.file_data = self._get_file_data(base_file_id, base_revid)
302         self.file_stream = None
303         if self.file_id == base_file_id:
304             self.file_parents = [base_revid]
305         else:
306             # Replace
307             del self.inventory[base_file_id]
308             self.file_parents = []
309         return path
310
311     def close_file(self, path, checksum):
312         if self.file_stream is not None:
313             self.file_stream.seek(0)
314             lines = osutils.split_lines(self.file_stream.read())
315         else:
316             # Data didn't change or file is new
317             lines = osutils.split_lines(self.file_data)
318
319         actual_checksum = md5_strings(lines)
320         assert checksum is None or checksum == actual_checksum
321
322         self._store_file(self.file_id, lines, self.file_parents)
323
324         if self.file_id in self.inventory:
325             ie = self.inventory[self.file_id]
326         elif self.is_symlink:
327             ie = self.inventory.add_path(path, 'symlink', self.file_id)
328         else:
329             ie = self.inventory.add_path(path, 'file', self.file_id)
330         ie.revision = self.revid
331
332         if self.is_symlink:
333             ie.symlink_target = lines[0][len("link "):]
334             ie.text_sha1 = None
335             ie.text_size = None
336             ie.text_id = None
337         else:
338             ie.text_sha1 = osutils.sha_strings(lines)
339             ie.text_size = sum(map(len, lines))
340             if self.is_executable is not None:
341                 ie.executable = self.is_executable
342
343         self.file_stream = None
344
345     def close_edit(self):
346         assert len(self._premature_deletes) == 0
347         self._finish_commit()
348         self.pool.destroy()
349
350     def apply_textdelta(self, file_id, base_checksum):
351         actual_checksum = md5.new(self.file_data).hexdigest(),
352         assert (base_checksum is None or base_checksum == actual_checksum,
353             "base checksum mismatch: %r != %r" % (base_checksum, 
354                                                   actual_checksum))
355         self.file_stream = StringIO()
356         return apply_txdelta_handler(StringIO(self.file_data), 
357                                      self.file_stream, self.pool)
358
359     def _store_file(self, file_id, lines, parents):
360         raise NotImplementedError(self._store_file)
361
362     def _store_directory(self, file_id, parents):
363         raise NotImplementedError(self._store_directory)
364
365     def _get_file_data(self, file_id, revid):
366         raise NotImplementedError(self._get_file_data)
367
368     def _finish_commit(self):
369         raise NotImplementedError(self._finish_commit)
370
371     def abort_edit(self):
372         pass
373
374     def _start_revision(self):
375         pass
376
377
378 class WeaveRevisionBuildEditor(RevisionBuildEditor):
379     """Subversion commit editor that can write to a weave-based repository.
380     """
381     def __init__(self, source, target):
382         RevisionBuildEditor.__init__(self, source, target)
383         self.weave_store = target.weave_store
384
385     def _start_revision(self):
386         self.target.start_write_group()
387
388     def _store_directory(self, file_id, parents):
389         file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
390         if not file_weave.has_version(self.revid):
391             file_weave.add_lines(self.revid, parents, [])
392
393     def _get_file_data(self, file_id, revid):
394         file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
395         return file_weave.get_text(revid)
396
397     def _store_file(self, file_id, lines, parents):
398         file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
399         if not file_weave.has_version(self.revid):
400             file_weave.add_lines(self.revid, parents, lines)
401
402     def _finish_commit(self):
403         rev = self._get_revision(self.revid)
404         self.inventory.revision_id = self.revid
405         rev.inventory_sha1 = osutils.sha_string(
406                 self.target.serialise_inventory(self.inventory))
407         self.target.add_revision(self.revid, rev, self.inventory)
408         self.target.commit_write_group()
409
410     def abort_edit(self):
411         self.target.abort_write_group()
412
413
414 class PackRevisionBuildEditor(WeaveRevisionBuildEditor):
415     """Revision Build Editor for Subversion that is specific for the packs API.
416     """
417     def __init__(self, source, target):
418         WeaveRevisionBuildEditor.__init__(self, source, target)
419
420     def _add_text_to_weave(self, file_id, new_lines, parents):
421         return self.target._packs._add_text_to_weave(file_id,
422             self.revid, new_lines, parents, nostore_sha=None, 
423             random_revid=False)
424
425     def _store_directory(self, file_id, parents):
426         self._add_text_to_weave(file_id, [], parents)
427
428     def _store_file(self, file_id, lines, parents):
429         self._add_text_to_weave(file_id, lines, parents)
430
431
432 class CommitBuilderRevisionBuildEditor(RevisionBuildEditor):
433     """Revision Build Editor for Subversion that uses the CommitBuilder API.
434     """
435     def __init__(self, source, target):
436         RevisionBuildEditor.__init__(self, source, target)
437         raise NotImplementedError(self)
438
439
440 def get_revision_build_editor(repository):
441     """Obtain a RevisionBuildEditor for a particular target repository."""
442     if hasattr(repository, '_packs'):
443         return PackRevisionBuildEditor
444     return WeaveRevisionBuildEditor
445
446
447 class InterFromSvnRepository(InterRepository):
448     """Svn to any repository actions."""
449
450     _matching_repo_format = SvnRepositoryFormat()
451
452     @staticmethod
453     def _get_repo_format_to_test():
454         return None
455
456     def _find_all(self):
457         """Find all revisions from the source repository that are not 
458         yet in the target repository.
459         """
460         parents = {}
461         needed = filter(lambda x: not self.target.has_revision(x), 
462                         self.source.all_revision_ids())
463         for revid in needed:
464             (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
465             parents[revid] = self.source._mainline_revision_parent(branch, 
466                                                revnum, scheme)
467         return (needed, parents)
468
469     def _find_until(self, revision_id):
470         """Find all missing revisions until revision_id
471
472         :param revision_id: Stop revision
473         :return: Tuple with revisions missing and a dictionary with 
474             parents for those revision.
475         """
476         needed = []
477         parents = {}
478         (path, until_revnum, scheme) = self.source.lookup_revision_id(
479                                                                     revision_id)
480
481         prev_revid = None
482         for (branch, revnum) in self.source.follow_branch(path, 
483                                                           until_revnum, scheme):
484             revid = self.source.generate_revision_id(revnum, branch, str(scheme))
485
486             if prev_revid is not None:
487                 parents[prev_revid] = revid
488
489             prev_revid = revid
490
491             if not self.target.has_revision(revid):
492                 needed.append(revid)
493
494         parents[prev_revid] = None
495         return (needed, parents)
496
497     def copy_content(self, revision_id=None, pb=None):
498         """See InterRepository.copy_content."""
499         if revision_id == NULL_REVISION:
500             return
501         # Dictionary with paths as keys, revnums as values
502
503         # Loop over all the revnums until revision_id
504         # (or youngest_revnum) and call self.target.add_revision() 
505         # or self.target.add_inventory() each time
506         self.target.lock_read()
507         try:
508             if revision_id is None:
509                 (needed, lhs_parent) = self._find_all()
510             else:
511                 (needed, lhs_parent) = self._find_until(revision_id)
512         finally:
513             self.target.unlock()
514
515         if len(needed) == 0:
516             # Nothing to fetch
517             return
518
519         self._copy_revisions_switch(needed, pb, lhs_parent)
520
521     def _copy_revisions_replay(self, revids, pb=none):
522         """Copy a set of related revisions using svn.ra.replay.
523
524         :param revids: Revision ids to copy.
525         :param pb: Optional progress bar
526         """
527         raise NotImplementedError(self._copy_revisions_replay)
528
529     def _copy_revisions_switch(self, revids, pb=None, lhs_parent=None):
530         """Copy a set of related revisions using svn.ra.switch.
531
532         :param revids: List of revision ids of revisions to copy, 
533                        newest first.
534         :param pb: Optional progress bar.
535         """
536         repos_root = self.source.transport.get_svn_repos_root()
537
538         prev_revid = None
539         transport = self.source.transport
540         if pb is None:
541             pb = ui.ui_factory.nested_progress_bar()
542             nested_pb = pb
543         else:
544             nested_pb = None
545         num = 0
546         prev_inv = None
547
548         self.target.lock_write()
549         revbuildklass = get_revision_build_editor(self.target)
550         editor = revbuildklass(self.source, self.target)
551
552         try:
553             for revid in reversed(revids):
554                 pb.update('copying revision', num, len(revids))
555
556                 parent_revid = lhs_parent[revid]
557
558                 if parent_revid is None:
559                     parent_inv = Inventory(root_id=None)
560                 elif prev_revid != parent_revid:
561                     parent_inv = self.target.get_inventory(parent_revid)
562                 else:
563                     parent_inv = prev_inv
564
565                 editor.start_revision(revid, parent_inv)
566
567                 try:
568                     pool = Pool()
569
570                     if parent_revid is None:
571                         branch_url = urlutils.join(repos_root, 
572                                                    editor.branch_path)
573                         transport.reparent(branch_url)
574                         assert transport.svn_url == branch_url.rstrip("/"), \
575                             "Expected %r, got %r" % (transport.svn_url, branch_url)
576                         reporter = transport.do_update(editor.revnum, True, 
577                                                        editor, pool)
578
579                         # Report status of existing paths
580                         reporter.set_path("", editor.revnum, True, None, pool)
581                     else:
582                         (parent_branch, parent_revnum, scheme) = \
583                                 self.source.lookup_revision_id(parent_revid)
584                         transport.reparent(urlutils.join(repos_root, parent_branch))
585
586                         if parent_branch != editor.branch_path:
587                             reporter = transport.do_switch(editor.revnum, True, 
588                                 urlutils.join(repos_root, editor.branch_path), 
589                                 editor, pool)
590                         else:
591                             reporter = transport.do_update(editor.revnum, True, editor)
592
593                         # Report status of existing paths
594                         reporter.set_path("", parent_revnum, False, None, pool)
595
596                     lock = transport.lock_read(".")
597                     reporter.finish_report(pool)
598                     lock.unlock()
599                 except:
600                     editor.abort_edit()
601                     raise
602
603                 prev_inv = editor.inventory
604                 prev_revid = revid
605                 pool.destroy()
606                 num += 1
607         finally:
608             self.target.unlock()
609             if nested_pb is not None:
610                 nested_pb.finished()
611         self.source.transport.reparent_root()
612
613     def fetch(self, revision_id=None, pb=None):
614         """Fetch revisions. """
615         self.copy_content(revision_id=revision_id, pb=pb)
616
617     @staticmethod
618     def is_compatible(source, target):
619         """Be compatible with SvnRepository."""
620         # FIXME: Also check target uses VersionedFile
621         return isinstance(source, SvnRepository) and target.supports_rich_root()
622