Use pools to try to reduce the memory consumption.
[jelmer/subvertpy.git] / fetch.py
1 # Copyright (C) 2005-2006 Jelmer Vernooij <jelmer@samba.org>
2
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17 import bzrlib
18 from bzrlib.decorators import needs_write_lock
19 from bzrlib.inventory import Inventory, ROOT_ID
20 import bzrlib.osutils as osutils
21 from bzrlib.progress import ProgressBar
22 from bzrlib.revision import Revision
23 from bzrlib.repository import InterRepository
24 from bzrlib.trace import mutter
25
26 from copy import copy
27 from cStringIO import StringIO
28 import md5
29 import os
30
31 from svn.core import SubversionException, Pool
32 import svn.core, svn.ra
33
34 from repository import (SvnRepository, SVN_PROP_BZR_MERGE, SVN_PROP_SVK_MERGE,
35                 SVN_PROP_BZR_REVPROP_PREFIX, SvnRepositoryFormat)
36 from tree import apply_txdelta_handler
37
38
39 def md5_strings(strings):
40     s = md5.new()
41     map(s.update, strings)
42     return s.hexdigest()
43
44 class RevisionBuildEditor(svn.delta.Editor):
45     def __init__(self, source, target, branch_path, revnum, prev_inventory, revid, svn_revprops, id_map, parent_branch, parent_id_map):
46         self.branch_path = branch_path
47         self.inventory = copy(prev_inventory)
48         self.revid = revid
49         self.revnum = revnum
50         self.id_map = id_map
51         self.parent_branch = parent_branch
52         self.parent_id_map = parent_id_map
53         self.source = source
54         self.target = target
55         self.transact = target.get_transaction()
56         self.weave_store = target.weave_store
57     
58         self.dir_baserev = {}
59
60         self._parent_ids = None
61         self._revprops = {}
62         self._svn_revprops = svn_revprops
63
64     def _get_revision(self, revid):
65         if self._parent_ids is None:
66             self._parent_ids = ""
67
68         parent_ids = self.source.revision_parents(revid, self._parent_ids)
69
70         # Commit SVN revision properties to a Revision object
71         rev = Revision(revision_id=revid, parent_ids=parent_ids)
72
73         rev.timestamp = 1.0 * svn.core.secs_from_timestr(
74             self._svn_revprops[2], None) #date
75         rev.timezone = None
76
77         rev.committer = self._svn_revprops[0] # author
78         if rev.committer is None:
79             rev.committer = ""
80         rev.message = self._svn_revprops[1] # message
81
82         rev.properties = self._revprops
83         return rev
84
85     def open_root(self, base_revnum, baton):
86         if self.inventory.revision_id is None:
87             self.dir_baserev[ROOT_ID] = []
88         else:
89             self.dir_baserev[ROOT_ID] = [self.inventory.revision_id]
90         self.inventory.revision_id = self.revid
91         return ROOT_ID
92
93     def relpath(self, path):
94         return path.strip("/")
95
96     def delete_entry(self, path, revnum, parent_baton, pool):
97         del self.inventory[self.inventory.path2id(path)]
98
99     def close_directory(self, id):
100         revid = self.revid
101
102         if id != ROOT_ID:
103             self.inventory[id].revision = revid
104
105             file_weave = self.weave_store.get_weave_or_empty(id, self.transact)
106             if not file_weave.has_version(revid):
107                 file_weave.add_lines(revid, self.dir_baserev[id], [])
108
109     def add_directory(self, path, parent_baton, copyfrom_path, copyfrom_revnum, pool):
110         file_id, revision_id = self.id_map[path]
111
112         if copyfrom_path is not None:
113             base_file_id, base_revid = self.source.path_to_file_id(copyfrom_revnum, os.path.join(self.parent_branch, copyfrom_path))
114             if base_file_id == file_id: 
115                 self.dir_baserev[file_id] = [base_revid]
116                 ie = self.inventory[file_id]
117                 ie.revision = revision_id
118                 return file_id
119
120         self.dir_baserev[file_id] = []
121         ie = self.inventory.add_path(path, 'directory', file_id)
122         ie.revision = revision_id
123
124         return file_id
125
126     def open_directory(self, path, parent_baton, base_revnum, pool):
127         return self.add_directory(path, parent_baton, path, base_revnum, pool)
128
129     def change_dir_prop(self, id, name, value, pool):
130         if name == SVN_PROP_BZR_MERGE:
131             if id != ROOT_ID:
132                 mutter('rogue %r on non-root directory' % SVN_PROP_BZR_MERGE)
133                 return
134             
135             self._parent_ids = value.splitlines()[-1]
136         elif name == SVN_PROP_SVK_MERGE:
137             if self._parent_ids is None:
138                 # Only set parents using svk:merge if no 
139                 # bzr:merge set.
140                 pass # FIXME 
141         elif name.startswith(SVN_PROP_BZR_REVPROP_PREFIX):
142             self._revprops[name[len(SVN_PROP_BZR_REVPROP_PREFIX):]] = value
143         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
144                       svn.core.SVN_PROP_ENTRY_COMMITTED_REV,
145                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
146                       svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
147                       svn.core.SVN_PROP_ENTRY_UUID,
148                       svn.core.SVN_PROP_EXECUTABLE):
149             pass
150         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
151             pass
152         else:
153             mutter('unsupported file property %r' % name)
154
155     def change_file_prop(self, id, name, value, pool):
156         if name == svn.core.SVN_PROP_EXECUTABLE: 
157             # Strange, you'd expect executable to match svn.core.SVN_PROP_EXECUTABLE_VALUE, but that's not how SVN behaves.
158             self.is_executable = (value != None)
159         elif (name == svn.core.SVN_PROP_SPECIAL):
160             self.is_symlink = (value != None)
161         elif name == svn.core.SVN_PROP_ENTRY_COMMITTED_REV:
162             self.last_file_rev = int(value)
163         elif name in (svn.core.SVN_PROP_ENTRY_COMMITTED_DATE,
164                       svn.core.SVN_PROP_ENTRY_LAST_AUTHOR,
165                       svn.core.SVN_PROP_ENTRY_LOCK_TOKEN,
166                       svn.core.SVN_PROP_ENTRY_UUID,
167                       svn.core.SVN_PROP_MIME_TYPE):
168             pass
169         elif name.startswith(svn.core.SVN_PROP_WC_PREFIX):
170             pass
171         else:
172             mutter('unsupported file property %r' % name)
173
174     def add_file(self, path, parent_id, copyfrom_path, copyfrom_revnum, baton):
175         self.is_symlink = False
176         self.is_executable = None
177         self.file_data = ""
178         self.file_parents = []
179         self.file_stream = None
180         return path
181
182     def open_file(self, path, parent_id, base_revnum, pool):
183         base_file_id, base_revid = self.source.path_to_file_id(base_revnum, os.path.join(self.parent_branch, path))
184         file_id, revid = self.id_map[path]
185         self.is_executable = None
186         self.is_symlink = (self.inventory[base_file_id].kind == 'symlink')
187         file_weave = self.weave_store.get_weave_or_empty(base_file_id, self.transact)
188         self.file_data = file_weave.get_text(base_revid)
189         self.file_stream = None
190         if file_id == base_file_id:
191             self.file_parents = [base_revid]
192         else:
193             # Replace
194             del self.inventory[base_file_id]
195             self.file_parents = []
196         return path
197
198     def close_file(self, path, checksum):
199         if self.file_stream is not None:
200             self.file_stream.seek(0)
201             lines = osutils.split_lines(self.file_stream.read())
202         else:
203             # Data didn't change or file is new
204             lines = osutils.split_lines(self.file_data)
205
206         actual_checksum = md5_strings(lines)
207         assert checksum is None or checksum == actual_checksum
208
209         file_id, revision_id = self.id_map[path]
210         file_weave = self.weave_store.get_weave_or_empty(file_id, self.transact)
211         if not file_weave.has_version(revision_id):
212             file_weave.add_lines(revision_id, self.file_parents, lines)
213
214         if file_id in self.inventory:
215             ie = self.inventory[file_id]
216         elif self.is_symlink:
217             ie = self.inventory.add_path(path, 'symlink', file_id)
218         else:
219             ie = self.inventory.add_path(path, 'file', file_id)
220         ie.revision = revision_id
221
222         if self.is_symlink:
223             ie.symlink_target = lines[0][len("link "):]
224             ie.text_sha1 = None
225             ie.text_size = None
226             ie.text_id = None
227         else:
228             ie.text_sha1 = osutils.sha_strings(lines)
229             ie.text_size = sum(map(len, lines))
230             if self.is_executable is not None:
231                 ie.executable = self.is_executable
232
233         self.file_stream = None
234
235     def close_edit(self):
236         rev = self._get_revision(self.revid)
237         self.inventory.revision_id = self.revid
238         rev.inventory_sha1 = osutils.sha_string(
239             bzrlib.xml5.serializer_v5.write_inventory_to_string(
240                 self.inventory))
241         self.target.add_revision(self.revid, rev, self.inventory)
242
243     def abort_edit(self):
244         pass
245
246     def apply_textdelta(self, file_id, base_checksum):
247         actual_checksum = md5.new(self.file_data).hexdigest(),
248         assert (base_checksum is None or base_checksum == actual_checksum,
249             "base checksum mismatch: %r != %r" % (base_checksum, actual_checksum))
250         self.file_stream = StringIO()
251         return apply_txdelta_handler(StringIO(self.file_data), self.file_stream)
252
253
254 class InterSvnRepository(InterRepository):
255     """Svn to any repository actions."""
256
257     _matching_repo_format = SvnRepositoryFormat()
258     """The format to test with."""
259
260     @needs_write_lock
261     def copy_content(self, revision_id=None, basis=None, pb=ProgressBar()):
262         """See InterRepository.copy_content."""
263         # Dictionary with paths as keys, revnums as values
264
265         # Loop over all the revnums until revision_id
266         # (or youngest_revnum) and call self.target.add_revision() 
267         # or self.target.add_inventory() each time
268         if revision_id is None:
269             path = None
270             until_revnum = self.source._latest_revnum
271         else:
272             (path, until_revnum) = self.source.parse_revision_id(revision_id)
273
274         repos_root = self.source.transport.get_repos_root()
275         
276         needed = []
277         parents = {}
278         prev_revid = None
279         for (branch, changes, revnum) in \
280             self.source._log.follow_history(path, until_revnum):
281             revid = self.source.generate_revision_id(revnum, branch)
282
283             if prev_revid is not None:
284                 parents[prev_revid] = revid
285
286             prev_revid = revid
287
288             if not self.target.has_revision(revid):
289                 needed.append((branch, revnum, revid, changes))
290
291         parents[prev_revid] = None
292
293         num = 0
294         needed.reverse()
295         prev_revid = None
296         transport = self.source.transport
297         for (branch, revnum, revid, changes) in needed:
298             if pb is not None:
299                 pb.update('copying revision', num+1, len(needed)+1)
300             num += 1
301
302             parent_revid = parents[revid]
303
304             if parent_revid is not None:
305                 (parent_branch, parent_revnum) = self.source.parse_revision_id(parent_revid)
306             else:
307                 parent_revnum = 0
308                 parent_branch = None
309
310             if parent_revid is None:
311                 parent_id_map = {"": (ROOT_ID, None)}
312                 id_map = self.source.get_fileid_map(revnum, branch)
313                 parent_inv = Inventory(ROOT_ID)
314             elif prev_revid != parent_revid:
315                 parent_id_map = self.source.get_fileid_map(parent_revnum, parent_branch)
316                 id_map = self.source.get_fileid_map(revnum, branch)
317                 parent_inv = self.target.get_inventory(parent_revid)
318             else:
319                 parent_id_map = copy(id_map)
320                 self.source.transform_fileid_map(self.source.uuid, 
321                                         revnum, branch, 
322                                         changes, id_map)
323                 parent_inv = prev_inv
324
325
326             editor = RevisionBuildEditor(self.source, self.target, branch, 
327                                          revnum, parent_inv, revid, 
328                                      self.source._log.get_revision_info(revnum),
329                                      id_map, parent_branch, parent_id_map)
330
331             edit, edit_baton = svn.delta.make_editor(editor)
332
333             if parent_branch is None:
334                 transport.reparent(repos_root)
335             else:
336                 transport.reparent("%s/%s" % (repos_root, parent_branch))
337             pool = Pool()
338             if parent_branch != branch:
339                 switch_url = "%s/%s" % (repos_root, branch)
340                 mutter('svn switch %r:%r -> %r:%r' % 
341                                (parent_branch, parent_revnum, switch_url, revnum))
342                 reporter, reporter_baton = transport.do_switch(
343                            revnum, "", True, 
344                            switch_url,
345                            edit, edit_baton, pool)
346             else:
347                 mutter('svn update -r %r:%r %r' % 
348                                (parent_revnum, revnum, branch))
349                 reporter, reporter_baton = transport.do_update(
350                            revnum, "", True, 
351                            edit, edit_baton, pool)
352
353             # Report status of existing paths
354             svn.ra.reporter2_invoke_set_path(reporter, reporter_baton, 
355                 "", parent_revnum, False, None, pool)
356
357             transport.lock()
358             svn.ra.reporter2_invoke_finish_report(reporter, reporter_baton, pool)
359             transport.unlock()
360
361             prev_inv = editor.inventory
362             prev_revid = revid
363
364             pool.destroy()
365
366         if pb is not None:
367             pb.clear()
368
369         self.source.transport.reparent(repos_root)
370
371     @needs_write_lock
372     def fetch(self, revision_id=None, pb=ProgressBar()):
373         """Fetch revisions. """
374         self.copy_content(revision_id=revision_id, pb=pb)
375
376     @staticmethod
377     def is_compatible(source, target):
378         """Be compatible with SvnRepository."""
379         # FIXME: Also check target uses VersionedFile
380         mutter('test %r' % source)
381         return isinstance(source, SvnRepository)
382
383
384