Release 0.4.3
[jelmer/subvertpy.git] / fetch.py
1 # Copyright (C) 2005-2007 Jelmer Vernooij <jelmer@samba.org>
2
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 """Fetching revisions from Subversion repositories in batches."""
17
18 import bzrlib
19 from bzrlib import osutils, ui, urlutils
20 from bzrlib.inventory import Inventory
21 from bzrlib.revision import Revision
22 from bzrlib.repository import InterRepository
23 from bzrlib.trace import mutter
24
25 from copy import copy
26 from cStringIO import StringIO
27 import md5
28
29 from svn.core import Pool
30 import svn.core
31
32 from fileids import generate_file_id
33 from repository import (SvnRepository, SVN_PROP_BZR_ANCESTRY, 
34                 SVN_PROP_SVK_MERGE, SVN_PROP_BZR_MERGE,
35                 SVN_PROP_BZR_PREFIX, SVN_PROP_BZR_REVISION_INFO, 
36                 SVN_PROP_BZR_BRANCHING_SCHEME, SVN_PROP_BZR_REVISION_ID,
37                 SVN_PROP_BZR_FILEIDS, SvnRepositoryFormat, 
38                 parse_revision_metadata, parse_merge_property)
39 from tree import apply_txdelta_handler
40
41
42 def md5_strings(strings):
43     """Return the MD5sum of the concatenation of strings.
44
45     :param strings: Strings to find the MD5sum of.
46     :return: MD5sum
47     """
48     s = md5.new()
49     map(s.update, strings)
50     return s.hexdigest()
51
52
53 class RevisionBuildEditor(svn.delta.Editor):
54     """Implementation of the Subversion commit editor interface that builds a 
55     Bazaar revision.
56     """
57     def __init__(self, source, target, branch_path, prev_inventory, revid, 
58                  svn_revprops, id_map, scheme):
59         self.branch_path = branch_path
60         self.old_inventory = prev_inventory
61         self.inventory = copy(prev_inventory)
62         self.revid = revid
63         self.id_map = id_map
64         self.scheme = scheme
65         self.source = source
66         self.target = target
67         self.transact = target.get_transaction()
68         self.weave_store = target.weave_store
69         self.dir_baserev = {}
70         self._bzr_merges = []
71         self._svk_merges = []
72         self._revinfo = None
73         self._svn_revprops = svn_revprops
74         self._premature_deletes = set()
75         self.pool = Pool()
76
77     def _get_revision(self, revid):
78         """Creates the revision object.
79
80         :param revid: Revision id of the revision to create.
81         """
82         parent_ids = self.source.revision_parents(revid, self._bzr_merges)
83
84         # Commit SVN revision properties to a Revision object
85         rev = Revision(revision_id=revid, parent_ids=parent_ids)
86
87         if self._svn_revprops[2] is not None:
88             rev.timestamp = 1.0 * svn.core.secs_from_timestr(
89                 self._svn_revprops[2], None) #date
90         else:
91             rev.timestamp = 0 # FIXME: Obtain repository creation time
92         rev.timezone = None
93
94         rev.committer = self._svn_revprops[0] # author
95         if rev.committer is None:
96             rev.committer = ""
97         rev.message = self._svn_revprops[1] # message
98
99         if self._revinfo:
100             parse_revision_metadata(self._revinfo, rev)
101
102         return rev
103
104     def open_root(self, base_revnum, baton):
105         if self.old_inventory.root is None:
106             # First time the root is set
107             file_id = generate_file_id(self.source, self.revid, "")
108             self.dir_baserev[file_id] = []
109         else:
110             assert self.old_inventory.root.revision is not None
111             if self.id_map.has_key(""):
112                 file_id = self.id_map[""]
113             else:
114                 file_id = self.old_inventory.root.file_id
115             self.dir_baserev[file_id] = [self.old_inventory.root.revision]
116
117         if self.inventory.root is not None and \
118                 file_id == self.inventory.root.file_id:
119             ie = self.inventory.root
120         else:
121             ie = self.inventory.add_path("", 'directory', file_id)
122         ie.revision = self.revid
123         return file_id
124
125     def _get_existing_id(self, parent_id, path):
126         if self.id_map.has_key(path):
127             return self.id_map[path]
128         return self._get_old_id(parent_id, path)
129
130     def _get_old_id(self, parent_id, old_path):
131         return self.old_inventory[parent_id].children[urlutils.basename(old_path)].file_id
132
133     def _get_new_id(self, parent_id, new_path):
134         if self.id_map.has_key(new_path):
135             return self.id_map[new_path]
136         return generate_file_id(self.source, self.revid, new_path)
137
138     def delete_entry(self, path, revnum, parent_id, pool):
139         path = path.decode("utf-8")
140         if path in self._premature_deletes:
141             self._premature_deletes.remove(path)
142         else:
143             self.inventory.remove_recursive_id(self._get_old_id(parent_id, path))
144
145     def close_directory(self, id):
146         self.inventory[id].revision = self.revid
147
148         # Only record root if the target repository supports it
149         if self.target.supports_rich_root:
150             file_weave = self.weave_store.get_weave_or_empty(id, self.transact)
151             if not file_weave.has_version(self.revid):
152                 file_weave.add_lines(self.revid, self.dir_baserev[id], [])
153
154     def add_directory(self, path, parent_id, copyfrom_path, copyfrom_revnum, 
155                       pool):
156         path = path.decode("utf-8")
157         file_id = self._get_new_id(parent_id, path)
158
159         self.dir_baserev[file_id] = []
160         if file_id in self.inventory:
161             # This directory was moved here from somewhere else, but the 
162             # other location hasn't been removed yet. 
163             if copyfrom_path is None:
164                 # FIXME: This should never happen!
165                 copyfrom_path = self.old_inventory.id2path(file_id)
166             assert copyfrom_path == self.old_inventory.id2path(file_id)
167             assert copyfrom_path not in self._premature_deletes
168             self._premature_deletes.add(copyfrom_path)
169             self.inventory.rename(file_id, parent_id, urlutils.basename(path))
170             ie = self.inventory[file_id]
171         else:
172             ie = self.inventory.add_path(path, 'directory', file_id)
173         ie.revision = self.revid
174
175         return file_id
176
177     def open_directory(self, path, parent_id, base_revnum, pool):
178         assert base_revnum >= 0
179         base_file_id = self._get_old_id(parent_id, path)
180         base_revid = self.old_inventory[base_file_id].revision
181         file_id = self._get_existing_id(parent_id, path)
182         if file_id == base_file_id:
183             self.dir_baserev[file_id] = [base_revid]
184             ie = self.inventory[file_id]
185         else:
186             # Replace if original was inside this branch
187             # change id of base_file_id to file_id
188             ie = self.inventory[base_file_id]
189             for name in ie.children:
190                 ie.children[name].parent_id = file_id
191             # FIXME: Don't touch inventory internals
192             del self.inventory._byid[base_file_id]
193             self.inventory._byid[file_id] = ie
194             ie.file_id = file_id
195             self.dir_baserev[file_id] = []
196         ie.revision = self.revid
197         return file_id
198
199     def change_dir_prop(self, id, name, value, pool):
200         if name == SVN_PROP_BZR_BRANCHING_SCHEME:
201             if id != self.inventory.root.file_id:
202                 mutter('rogue %r on non-root directory' % name)
203                 return
204         elif name == SVN_PROP_BZR_ANCESTRY+str(self.scheme):
205             if id != self.inventory.root.file_id:
206                 mutter('rogue %r on non-root directory' % name)
207                 return
208             
209             self._bzr_merges = parse_merge_property(value.splitlines()[-1])
210         elif (name.startswith(SVN_PROP_BZR_ANCESTRY) or 
211               name.startswith(SVN_PROP_BZR_REVISION_ID)):
212             pass
213         elif name == SVN_PROP_SVK_MERGE:
214             self._svk_merges = None # Force Repository.revision_parents() to look it up
215         elif name == SVN_PROP_BZR_REVISION_INFO:
216             if id != self.inventory.root.file_id:
217                 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_REVISION_INFO)
218                 return
219  
220             self._revinfo = value
221         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
222                       svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
223                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
224                       svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
225                       svn.core.SVN_PROP_ENTRY_UUID,
226                       svn.core.SVN_PROP_EXECUTABLE):
227             pass
228         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
229             pass
230         elif name in (SVN_PROP_BZR_MERGE, SVN_PROP_BZR_FILEIDS):
231             pass
232         elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
233               name.startswith(SVN_PROP_BZR_PREFIX)):
234             mutter('unsupported dir property %r' % name)
235
236     def change_file_prop(self, id, name, value, pool):
237         if name == svn.core.SVN_PROP_EXECUTABLE: 
238             # You'd expect executable to match 
239             # svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not 
240             # how SVN behaves. It appears to consider the presence 
241             # of the property sufficient to mark it executable.
242             self.is_executable = (value != None)
243         elif (name == svn.core.SVN_PROP_SPECIAL):
244             self.is_symlink = (value != None)
245         elif name == svn.core.SVN_PROP_ENTRY_COMMITTED_REV:
246             self.last_file_rev = int(value)
247         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
248                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
249                       svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
250                       svn.core.SVN_PROP_ENTRY_UUID,
251                       svn.core.SVN_PROP_MIME_TYPE):
252             pass
253         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
254             pass
255         elif (name.startswith(svn.core.SVN_PROP_PREFIX) or
256               name.startswith(SVN_PROP_BZR_PREFIX)):
257             mutter('unsupported file property %r' % name)
258
259     def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
260         path = path.decode("utf-8")
261         self.is_symlink = False
262         self.is_executable = None
263         self.file_data = ""
264         self.file_parents = []
265         self.file_stream = None
266         self.file_id = self._get_new_id(parent_id, path)
267         if self.file_id in self.inventory:
268             # This file was moved here from somewhere else, but the 
269             # other location hasn't been removed yet. 
270             if copyfrom_path is None:
271                 # FIXME: This should never happen!
272                 copyfrom_path = self.old_inventory.id2path(self.file_id)
273             assert copyfrom_path == self.old_inventory.id2path(self.file_id)
274             assert copyfrom_path not in self._premature_deletes
275             self._premature_deletes.add(copyfrom_path)
276             self.inventory.rename(self.file_id, parent_id, urlutils.basename(path))
277         return path
278
279     def open_file(self, path, parent_id, base_revnum, pool):
280         base_file_id = self._get_old_id(parent_id, path)
281         base_revid = self.old_inventory[base_file_id].revision
282         self.file_id = self._get_existing_id(parent_id, path)
283         self.is_executable = None
284         self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
285         file_weave = self.weave_store.get_weave_or_empty(base_file_id, 
286                                                          self.transact)
287         self.file_data = file_weave.get_text(base_revid)
288         self.file_stream = None
289         if self.file_id == base_file_id:
290             self.file_parents = [base_revid]
291         else:
292             # Replace
293             del self.inventory[base_file_id]
294             self.file_parents = []
295         return path
296
297     def close_file(self, path, checksum):
298         if self.file_stream is not None:
299             self.file_stream.seek(0)
300             lines = osutils.split_lines(self.file_stream.read())
301         else:
302             # Data didn't change or file is new
303             lines = osutils.split_lines(self.file_data)
304
305         actual_checksum = md5_strings(lines)
306         assert checksum is None or checksum == actual_checksum
307
308         file_weave = self.weave_store.get_weave_or_empty(self.file_id, 
309                                                          self.transact)
310         if not file_weave.has_version(self.revid):
311             file_weave.add_lines(self.revid, self.file_parents, lines)
312
313         if self.file_id in self.inventory:
314             ie = self.inventory[self.file_id]
315         elif self.is_symlink:
316             ie = self.inventory.add_path(path, 'symlink', self.file_id)
317         else:
318             ie = self.inventory.add_path(path, 'file', self.file_id)
319         ie.revision = self.revid
320
321         if self.is_symlink:
322             ie.symlink_target = lines[0][len("link "):]
323             ie.text_sha1 = None
324             ie.text_size = None
325             ie.text_id = None
326         else:
327             ie.text_sha1 = osutils.sha_strings(lines)
328             ie.text_size = sum(map(len, lines))
329             if self.is_executable is not None:
330                 ie.executable = self.is_executable
331
332         self.file_stream = None
333
334     def close_edit(self):
335         assert len(self._premature_deletes) == 0
336         rev = self._get_revision(self.revid)
337         self.inventory.revision_id = self.revid
338         rev.inventory_sha1 = osutils.sha_string(
339             bzrlib.xml5.serializer_v5.write_inventory_to_string(
340                 self.inventory))
341         self.target.add_revision(self.revid, rev, self.inventory)
342         self.pool.destroy()
343
344     def abort_edit(self):
345         pass
346
347     def apply_textdelta(self, file_id, base_checksum):
348         actual_checksum = md5.new(self.file_data).hexdigest(),
349         assert (base_checksum is None or base_checksum == actual_checksum,
350             "base checksum mismatch: %r != %r" % (base_checksum, 
351                                                   actual_checksum))
352         self.file_stream = StringIO()
353         return apply_txdelta_handler(StringIO(self.file_data), 
354                                      self.file_stream, self.pool)
355
356
357 class InterFromSvnRepository(InterRepository):
358     """Svn to any repository actions."""
359
360     _matching_repo_format = SvnRepositoryFormat()
361
362     @staticmethod
363     def _get_repo_format_to_test():
364         return None
365
366     def _find_all(self):
367         parents = {}
368         needed = filter(lambda x: not self.target.has_revision(x), 
369                         self.source.all_revision_ids())
370         for revid in needed:
371             (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
372             parents[revid] = self.source._mainline_revision_parent(branch, 
373                                                revnum, scheme)
374         return (needed, parents)
375
376     def _find_until(self, revision_id):
377         """Find all missing revisions until revision_id
378
379         :param revision_id: Stop revision
380         :return: Tuple with revisions missing and a dictionary with 
381             parents for those revision.
382         """
383         needed = []
384         parents = {}
385         (path, until_revnum, scheme) = self.source.lookup_revision_id(
386                                                                     revision_id)
387
388         prev_revid = None
389         for (branch, revnum) in self.source.follow_branch(path, 
390                                                           until_revnum, scheme):
391             revid = self.source.generate_revision_id(revnum, branch, str(scheme))
392
393             if prev_revid is not None:
394                 parents[prev_revid] = revid
395
396             prev_revid = revid
397
398             if not self.target.has_revision(revid):
399                 needed.append(revid)
400
401         parents[prev_revid] = None
402         return (needed, parents)
403
404     def copy_content(self, revision_id=None, pb=None):
405         """See InterRepository.copy_content."""
406         # Dictionary with paths as keys, revnums as values
407
408         # Loop over all the revnums until revision_id
409         # (or youngest_revnum) and call self.target.add_revision() 
410         # or self.target.add_inventory() each time
411         self.target.lock_read()
412         try:
413             if revision_id is None:
414                 (needed, parents) = self._find_all()
415             else:
416                 (needed, parents) = self._find_until(revision_id)
417         finally:
418             self.target.unlock()
419
420         if len(needed) == 0:
421             # Nothing to fetch
422             return
423
424         repos_root = self.source.transport.get_repos_root()
425
426         prev_revid = None
427         transport = self.source.transport
428         self.target.lock_write()
429         if pb is None:
430             pb = ui.ui_factory.nested_progress_bar()
431             nested_pb = pb
432         else:
433             nested_pb = None
434         num = 0
435         prev_inv = None
436         try:
437             for revid in reversed(needed):
438                 (branch, revnum, scheme) = self.source.lookup_revision_id(revid)
439                 pb.update('copying revision', num, len(needed))
440
441                 parent_revid = parents[revid]
442
443                 if parent_revid is None:
444                     parent_inv = Inventory(root_id=None)
445                 elif prev_revid != parent_revid:
446                     parent_inv = self.target.get_inventory(parent_revid)
447                 else:
448                     assert prev_inv is not None
449                     parent_inv = prev_inv
450
451                 changes = self.source._log.get_revision_paths(revnum, branch)
452                 renames = self.source.revision_fileid_renames(revid)
453                 id_map = self.source.transform_fileid_map(self.source.uuid, 
454                                       revnum, branch, changes, renames, scheme)
455
456                 editor = RevisionBuildEditor(self.source, self.target, branch, 
457                              parent_inv, revid, 
458                              self.source._log.get_revision_info(revnum),
459                              id_map, scheme)
460
461                 pool = Pool()
462                 edit, edit_baton = svn.delta.make_editor(editor, pool)
463
464                 if parent_revid is None:
465                     branch_url = urlutils.join(repos_root, branch)
466                     transport.reparent(branch_url)
467                     assert transport.svn_url == branch_url.rstrip("/"), \
468                         "Expected %r, got %r" % (transport.svn_url, branch_url)
469                     reporter = transport.do_update(
470                                    revnum, True, edit, edit_baton, pool)
471
472                     # Report status of existing paths
473                     reporter.set_path("", revnum, True, None, pool)
474                 else:
475                     (parent_branch, parent_revnum, scheme) = \
476                             self.source.lookup_revision_id(parent_revid)
477                     transport.reparent(urlutils.join(repos_root, parent_branch))
478
479                     if parent_branch != branch:
480                         reporter = transport.do_switch(
481                                    revnum, True, 
482                                    urlutils.join(repos_root, branch), 
483                                    edit, edit_baton, pool)
484                     else:
485                         reporter = transport.do_update(
486                                    revnum, True, edit, edit_baton, pool)
487
488                     # Report status of existing paths
489                     reporter.set_path("", parent_revnum, False, None, pool)
490
491                 lock = transport.lock_read(".")
492                 reporter.finish_report(pool)
493                 lock.unlock()
494
495                 prev_inv = editor.inventory
496                 prev_revid = revid
497                 pool.destroy()
498                 num += 1
499         finally:
500             self.target.unlock()
501             if nested_pb is not None:
502                 nested_pb.finished()
503         self.source.transport.reparent_root()
504
505     def fetch(self, revision_id=None, pb=None):
506         """Fetch revisions. """
507         self.copy_content(revision_id=revision_id, pb=pb)
508
509     @staticmethod
510     def is_compatible(source, target):
511         """Be compatible with SvnRepository."""
512         # FIXME: Also check target uses VersionedFile
513         return isinstance(source, SvnRepository) and \
514                 target.supports_rich_root()
515