Handle corner cases pulling pushed changes with directory renames.
[jelmer/subvertpy.git] / fetch.py
1 # Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
2
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 """Fetching revisions from Subversion repositories in batches."""
17
18 import bzrlib
19 from bzrlib import osutils, ui, urlutils
20 from bzrlib.inventory import Inventory
21 from bzrlib.revision import Revision, NULL_REVISION
22 from bzrlib.repository import InterRepository
23 from bzrlib.trace import mutter
24
25 from cStringIO import StringIO
26 import md5
27
28 from svn.core import Pool
29 import svn.core
30
31 from fileids import generate_file_id
32 from repository import (SvnRepository, SVN_PROP_BZR_ANCESTRY, 
33                 SVN_PROP_SVK_MERGE, SVN_PROP_BZR_MERGE,
34                 SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO, 
35                 SVN_PROP_BZR_BRANCHING_SCHEME, SVN_PROP_BZR_REVISION_ID,
36                 SVN_PROP_BZR_FILEIDS, SvnRepositoryFormat, 
37                 parse_revision_metadata, parse_merge_property)
38 from tree import apply_txdelta_handler
39
40
41 def md5_strings(strings):
42     """Return the MD5sum of the concatenation of strings.
43
44     :param strings: Strings to find the MD5sum of.
45     :return: MD5sum
46     """
47     s = md5.new()
48     map(s.update, strings)
49     return s.hexdigest()
50
51
52 class RevisionBuildEditor(svn.delta.Editor):
53     """Implementation of the Subversion commit editor interface that builds a 
54     Bazaar revision.
55     """
56     def __init__(self, source, target, branch_path, prev_inventory, revid, 
57                  svn_revprops, id_map, scheme):
58         self.branch_path = branch_path
59         self.old_inventory = prev_inventory
60         self.inventory = prev_inventory.copy()
61         self.revid = revid
62         self.id_map = id_map
63         self.scheme = scheme
64         self.source = source
65         self.target = target
66         self.transact = target.get_transaction()
67         self.dir_baserev = {}
68         self._bzr_merges = []
69         self._svk_merges = []
70         self._revinfo = None
71         self._svn_revprops = svn_revprops
72         self._premature_deletes = set()
73         self.pool = Pool()
74
75     def _get_parent_ids(self):
76         return self.source.revision_parents(self.revid, self._bzr_merges)
77
78     def _get_revision(self, revid):
79         """Creates the revision object.
80
81         :param revid: Revision id of the revision to create.
82         """
83
84         # Commit SVN revision properties to a Revision object
85         rev = Revision(revision_id=revid, parent_ids=self._get_parent_ids())
86
87         if self._svn_revprops[2] is not None:
88             rev.timestamp = 1.0 * svn.core.secs_from_timestr(
89                 self._svn_revprops[2], None) #date
90         else:
91             rev.timestamp = 0 # FIXME: Obtain repository creation time
92         rev.timezone = None
93
94         rev.committer = self._svn_revprops[0] # author
95         if rev.committer is None:
96             rev.committer = ""
97         rev.message = self._svn_revprops[1] # message
98
99         if self._revinfo:
100             parse_revision_metadata(self._revinfo, rev)
101
102         return rev
103
104     def open_root(self, base_revnum, baton):
105         if self.old_inventory.root is None:
106             # First time the root is set
107             file_id = generate_file_id(self.source, self.revid, "")
108             self.dir_baserev[file_id] = []
109         else:
110             assert self.old_inventory.root.revision is not None
111             if self.id_map.has_key(""):
112                 file_id = self.id_map[""]
113             else:
114                 file_id = self.old_inventory.root.file_id
115             self.dir_baserev[file_id] = [self.old_inventory.root.revision]
116
117         if self.inventory.root is not None and \
118                 file_id == self.inventory.root.file_id:
119             ie = self.inventory.root
120         else:
121             ie = self.inventory.add_path("", 'directory', file_id)
122         ie.revision = self.revid
123         return file_id
124
125     def _get_existing_id(self, parent_id, path):
126         if self.id_map.has_key(path):
127             return self.id_map[path]
128         return self._get_old_id(parent_id, path)
129
130     def _get_old_id(self, parent_id, old_path):
131         return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
132
133     def _get_new_id(self, parent_id, new_path):
134         if self.id_map.has_key(new_path):
135             return self.id_map[new_path]
136         return generate_file_id(self.source, self.revid, new_path)
137
138     def _rename(self, file_id, parent_id, path):
139         # Only rename if not right yet
140         if (self.inventory[file_id].parent_id == parent_id and 
141             self.inventory[file_id].name == urlutils.basename(path)):
142             return
143         self.inventory.rename(file_id, parent_id, urlutils.basename(path))
144
145     def delete_entry(self, path, revnum, parent_id, pool):
146         path = path.decode("utf-8")
147         if path in self._premature_deletes:
148             # Delete recursively
149             self._premature_deletes.remove(path)
150             for p in self._premature_deletes.copy():
151                 if p.startswith("%s/" % path):
152                     self._premature_deletes.remove(p)
153         else:
154             self.inventory.remove_recursive_id(self._get_old_id(parent_id, path))
155
156     def close_directory(self, id):
157         self.inventory[id].revision = self.revid
158
159         # Only record root if the target repository supports it
160         self._store_directory(id, self.dir_baserev[id])
161
162     def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum, 
163                       pool):
164         path = path.decode("utf-8")
165         file_id = self._get_new_id(parent_id, path)
166
167         self.dir_baserev[file_id] = []
168         if file_id in self.inventory:
169             # This directory was moved here from somewhere else, but the 
170             # other location hasn't been removed yet. 
171             if copyfrom_path is None:
172                 # This should ideally never happen!
173                 copyfrom_path = self.old_inventory.id2path(file_id)
174                 mutter('no copyfrom path set, assuming %r' % copyfrom_path)
175             assert copyfrom_path == self.old_inventory.id2path(file_id)
176             assert copyfrom_path not in self._premature_deletes
177             self._premature_deletes.add(copyfrom_path)
178             self._rename(file_id, parent_id, path)
179             ie = self.inventory[file_id]
180         else:
181             ie = self.inventory.add_path(path, 'directory', file_id)
182         ie.revision = self.revid
183
184         return file_id
185
186     def open_directory(self, path, parent_id, base_revnum, pool):
187         assert base_revnum >= 0
188         base_file_id = self._get_old_id(parent_id, path)
189         base_revid = self.old_inventory[base_file_id].revision
190         file_id = self._get_existing_id(parent_id, path)
191         if file_id == base_file_id:
192             self.dir_baserev[file_id] = [base_revid]
193             ie = self.inventory[file_id]
194         else:
195             # Replace if original was inside this branch
196             # change id of base_file_id to file_id
197             ie = self.inventory[base_file_id]
198             for name in ie.children:
199                 ie.children[name].parent_id = file_id
200             # FIXME: Don't touch inventory internals
201             del self.inventory._byid[base_file_id]
202             self.inventory._byid[file_id] = ie
203             ie.file_id = file_id
204             self.dir_baserev[file_id] = []
205         ie.revision = self.revid
206         return file_id
207
208     def change_dir_prop(self, id, name, value, pool):
209         if name == SVN_PROP_BZR_BRANCHING_SCHEME:
210             if id != self.inventory.root.file_id:
211                 mutter('rogue %r on non-root directory' % name)
212                 return
213         elif name == SVN_PROP_BZR_ANCESTRY+str(self.scheme):
214             if id != self.inventory.root.file_id:
215                 mutter('rogue %r on non-root directory' % name)
216                 return
217             
218             self._bzr_merges = parse_merge_property(value.splitlines()[-1])
219         elif (name.startswith(SVN_PROP_BZR_ANCESTRY) or 
220               name.startswith(SVN_PROP_BZR_REVISION_ID)):
221             pass
222         elif name == SVN_PROP_SVK_MERGE:
223             self._svk_merges = None # Force Repository.revision_parents() to look it up
224         elif name == SVN_PROP_BZR_REVISION_INFO:
225             if id != self.inventory.root.file_id:
226                 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
227                 return
228  
229             self._revinfo = value
230         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
231                       svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
232                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
233                       svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
234                       svn.core.SVN_PROP_ENTRY_UUID,
235                       svn.core.SVN_PROP_EXECUTABLE):
236             pass
237         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
238             pass
239         elif name in (SVN_PROP_BZR_MERGE, SVN_PROP_BZR_FILEIDS):
240             pass
241         elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
242               name.startswith(SVN_PROP_BZR_PREFIX)):
243             mutter('unsupported dir property %r' % name)
244
245     def change_file_prop(self, id, name, value, pool):
246         if name == svn.core.SVN_PROP_EXECUTABLE: 
247             # You'd expect executable to match 
248             # svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not 
249             # how SVN behaves. It appears to consider the presence 
250             # of the property sufficient to mark it executable.
251             self.is_executable = (value != None)
252         elif (name == svn.core.SVN_PROP_SPECIAL):
253             self.is_symlink = (value != None)
254         elif name == svn.core.SVN_PROP_ENTRY_COMMITTED_REV:
255             self.last_file_rev = int(value)
256         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
257                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
258                       svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
259                       svn.core.SVN_PROP_ENTRY_UUID,
260                       svn.core.SVN_PROP_MIME_TYPE):
261             pass
262         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
263             pass
264         elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
265               name.startswith(SVN_PROP_BZR_PREFIX)):
266             mutter('unsupported file property %r' % name)
267
268     def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
269         path = path.decode("utf-8")
270         self.is_symlink = False
271         self.is_executable = None
272         self.file_data = ""
273         self.file_parents = []
274         self.file_stream = None
275         self.file_id = self._get_new_id(parent_id, path)
276         if self.file_id in self.inventory:
277             # This file was moved here from somewhere else, but the 
278             # other location hasn't been removed yet. 
279             if copyfrom_path is None:
280                 # This should ideally never happen
281                 copyfrom_path = self.old_inventory.id2path(self.file_id)
282                 mutter('no copyfrom path set, assuming %r' % copyfrom_path)
283             assert copyfrom_path == self.old_inventory.id2path(self.file_id)
284             assert copyfrom_path not in self._premature_deletes
285             self._premature_deletes.add(copyfrom_path)
286             # No need to rename if it's already in the right spot
287             self._rename(self.file_id, parent_id, path)
288         return path
289
290     def open_file(self, path, parent_id, base_revnum, pool):
291         base_file_id = self._get_old_id(parent_id, path)
292         base_revid = self.old_inventory[base_file_id].revision
293         self.file_id = self._get_existing_id(parent_id, path)
294         self.is_executable = None
295         self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
296         self.file_data = self._get_file_data(base_file_id, base_revid)
297         self.file_stream = None
298         if self.file_id == base_file_id:
299             self.file_parents = [base_revid]
300         else:
301             # Replace
302             del self.inventory[base_file_id]
303             self.file_parents = []
304         return path
305
306     def close_file(self, path, checksum):
307         if self.file_stream is not None:
308             self.file_stream.seek(0)
309             lines = osutils.split_lines(self.file_stream.read())
310         else:
311             # Data didn't change or file is new
312             lines = osutils.split_lines(self.file_data)
313
314         actual_checksum = md5_strings(lines)
315         assert checksum is None or checksum == actual_checksum
316
317         self._store_file(self.file_id, lines, self.file_parents)
318
319         if self.file_id in self.inventory:
320             ie = self.inventory[self.file_id]
321         elif self.is_symlink:
322             ie = self.inventory.add_path(path, 'symlink', self.file_id)
323         else:
324             ie = self.inventory.add_path(path, 'file', self.file_id)
325         ie.revision = self.revid
326
327         if self.is_symlink:
328             ie.symlink_target = lines[0][len("link "):]
329             ie.text_sha1 = None
330             ie.text_size = None
331             ie.text_id = None
332         else:
333             ie.text_sha1 = osutils.sha_strings(lines)
334             ie.text_size = sum(map(len, lines))
335             if self.is_executable is not None:
336                 ie.executable = self.is_executable
337
338         self.file_stream = None
339
340     def close_edit(self):
341         assert len(self._premature_deletes) == 0
342         self._finish_commit()
343         self.pool.destroy()
344
345     def apply_textdelta(self, file_id, base_checksum):
346         actual_checksum = md5.new(self.file_data).hexdigest(),
347         assert (base_checksum is None or base_checksum == actual_checksum,
348             "base checksum mismatch: %r != %r" % (base_checksum, 
349                                                   actual_checksum))
350         self.file_stream = StringIO()
351         return apply_txdelta_handler(StringIO(self.file_data), 
352                                      self.file_stream, self.pool)
353
354     def _store_file(self, file_id, lines, parents):
355         raise NotImplementedError(self._store_file)
356
357     def _store_directory(self, file_id, parents):
358         raise NotImplementedError(self._store_directory)
359
360     def _get_file_data(self, file_id, revid):
361         raise NotImplementedError(self._get_file_data)
362
363     def _finish_commit(self):
364         raise NotImplementedError(self._finish_commit)
365
366     def abort_edit(self):
367         pass
368
369
370 class WeaveRevisionBuildEditor(RevisionBuildEditor):
371     """Subversion commit editor that can write to a weave-based repository.
372     """
373     def __init__(self, source, target, branch_path, prev_inventory, revid,
374                  svn_revprops, id_map, scheme):
375         RevisionBuildEditor.__init__(self, source, target, 
376             branch_path, prev_inventory, revid, svn_revprops, id_map, scheme)
377         self.weave_store = target.weave_store
378         self.target.start_write_group()
379
380     def _store_directory(self, file_id, parents):
381         file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
382         if not file_weave.has_version(self.revid):
383             file_weave.add_lines(self.revid, parents, [])
384
385     def _get_file_data(self, file_id, revid):
386         file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
387         return file_weave.get_text(revid)
388
389     def _store_file(self, file_id, lines, parents):
390         file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
391         if not file_weave.has_version(self.revid):
392             file_weave.add_lines(self.revid, parents, lines)
393
394     def _finish_commit(self):
395         rev = self._get_revision(self.revid)
396         self.inventory.revision_id = self.revid
397         rev.inventory_sha1 = osutils.sha_string(
398                 self.target.serialise_inventory(self.inventory))
399         self.target.add_revision(self.revid, rev, self.inventory)
400         self.target.commit_write_group()
401
402     def abort_edit(self):
403         self.target.abort_write_group()
404
405
406 class PackRevisionBuildEditor(WeaveRevisionBuildEditor):
407     """Revision Build Editor for Subversion that is specific for the packs API.
408     """
409     def __init__(self, source, target, branch_path, prev_inventory, revid,
410                  svn_revprops, id_map, scheme):
411         WeaveRevisionBuildEditor.__init__(self, source, target, 
412             branch_path, prev_inventory, revid, svn_revprops, id_map, scheme)
413
414     def _add_text_to_weave(self, file_id, new_lines, parents):
415         return self.target._packs._add_text_to_weave(file_id,
416             self.revid, new_lines, parents, nostore_sha=None, 
417             random_revid=False)
418
419     def _store_directory(self, file_id, parents):
420         self._add_text_to_weave(file_id, [], parents)
421
422     def _store_file(self, file_id, lines, parents):
423         self._add_text_to_weave(file_id, lines, parents)
424
425
426 class CommitBuilderRevisionBuildEditor(RevisionBuildEditor):
427     """Revision Build Editor for Subversion that uses the CommitBuilder API.
428     """
429     def __init__(self, source, target, branch_path, prev_inventory, revid,
430                  svn_revprops, id_map, scheme):
431         RevisionBuildEditor.__init__(self, source, target, 
432             branch_path, prev_inventory, revid, svn_revprops, id_map, scheme)
433         raise NotImplementedError(self)
434
435
436 class InterFromSvnRepository(InterRepository):
437     """Svn to any repository actions."""
438
439     _matching_repo_format = SvnRepositoryFormat()
440
441     @staticmethod
442     def _get_repo_format_to_test():
443         return None
444
445     def _find_all(self):
446         parents = {}
447         needed = filter(lambda x: not self.target.has_revision(x), 
448                         self.source.all_revision_ids())
449         for revid in needed:
450             (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
451             parents[revid] = self.source._mainline_revision_parent(branch, 
452                                                revnum, scheme)
453         return (needed, parents)
454
455     def _find_until(self, revision_id):
456         """Find all missing revisions until revision_id
457
458         :param revision_id: Stop revision
459         :return: Tuple with revisions missing and a dictionary with 
460             parents for those revision.
461         """
462         needed = []
463         parents = {}
464         (path, until_revnum, scheme) = self.source.lookup_revision_id(
465                                                                     revision_id)
466
467         prev_revid = None
468         for (branch, revnum) in self.source.follow_branch(path, 
469                                                           until_revnum, scheme):
470             revid = self.source.generate_revision_id(revnum, branch, str(scheme))
471
472             if prev_revid is not None:
473                 parents[prev_revid] = revid
474
475             prev_revid = revid
476
477             if not self.target.has_revision(revid):
478                 needed.append(revid)
479
480         parents[prev_revid] = None
481         return (needed, parents)
482
483     def copy_content(self, revision_id=None, pb=None):
484         """See InterRepository.copy_content."""
485         if revision_id == NULL_REVISION:
486             return
487         # Dictionary with paths as keys, revnums as values
488
489         # Loop over all the revnums until revision_id
490         # (or youngest_revnum) and call self.target.add_revision() 
491         # or self.target.add_inventory() each time
492         self.target.lock_read()
493         try:
494             if revision_id is None:
495                 (needed, parents) = self._find_all()
496             else:
497                 (needed, parents) = self._find_until(revision_id)
498         finally:
499             self.target.unlock()
500
501         if len(needed) == 0:
502             # Nothing to fetch
503             return
504
505         repos_root = self.source.transport.get_svn_repos_root()
506
507         prev_revid = None
508         transport = self.source.transport
509         if pb is None:
510             pb = ui.ui_factory.nested_progress_bar()
511             nested_pb = pb
512         else:
513             nested_pb = None
514         num = 0
515         prev_inv = None
516         self.target.lock_write()
517         try:
518             for revid in reversed(needed):
519                 (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
520                 pb.update('copying revision', num, len(needed))
521
522                 parent_revid = parents[revid]
523
524                 if parent_revid is None:
525                     parent_inv = Inventory(root_id=None)
526                 elif prev_revid != parent_revid:
527                     parent_inv = self.target.get_inventory(parent_revid)
528                 else:
529                     assert prev_inv is not None
530                     parent_inv = prev_inv
531
532                 changes = self.source._log.get_revision_paths(revnum, branch)
533                 renames = self.source.revision_fileid_renames(revid)
534                 id_map = self.source.transform_fileid_map(self.source.uuid, 
535                                       revnum, branch, changes, renames, scheme)
536
537                 revbuildklass = WeaveRevisionBuildEditor
538                 if hasattr(self.target, '_packs'):
539                     revbuildklass = PackRevisionBuildEditor
540
541                 editor = revbuildklass(self.source, self.target, 
542                              branch, parent_inv, revid, 
543                              self.source._log.get_revision_info(revnum),
544                              id_map, scheme)
545
546                 try:
547                     pool = Pool()
548
549                     if parent_revid is None:
550                         branch_url = urlutils.join(repos_root, branch)
551                         transport.reparent(branch_url)
552                         assert transport.svn_url == branch_url.rstrip("/"), \
553                             "Expected %r, got %r" % (transport.svn_url, branch_url)
554                         reporter = transport.do_update(revnum, True, editor,
555                                                        pool)
556
557                         # Report status of existing paths
558                         reporter.set_path("", revnum, True, None, pool)
559                     else:
560                         (parent_branch, parent_revnum, scheme) = \
561                                 self.source.lookup_revision_id(parent_revid)
562                         transport.reparent(urlutils.join(repos_root, parent_branch))
563
564                         if parent_branch != branch:
565                             reporter = transport.do_switch(
566                                        revnum, True, 
567                                        urlutils.join(repos_root, branch), 
568                                        editor, pool)
569                         else:
570                             reporter = transport.do_update(revnum, True, editor)
571
572                         # Report status of existing paths
573                         reporter.set_path("", parent_revnum, False, None, pool)
574
575                     lock = transport.lock_read(".")
576                     reporter.finish_report(pool)
577                     lock.unlock()
578                 except:
579                     editor.abort_edit()
580                     raise
581
582                 prev_inv = editor.inventory
583                 prev_revid = revid
584                 pool.destroy()
585                 num += 1
586         finally:
587             self.target.unlock()
588             if nested_pb is not None:
589                 nested_pb.finished()
590         self.source.transport.reparent_root()
591
592     def fetch(self, revision_id=None, pb=None):
593         """Fetch revisions. """
594         self.copy_content(revision_id=revision_id, pb=pb)
595
596     @staticmethod
597     def is_compatible(source, target):
598         """Be compatible with SvnRepository."""
599         # FIXME: Also check target uses VersionedFile
600         return isinstance(source, SvnRepository) and \
601                 target.supports_rich_root()
602