Check system and installed pysqlite's sqlite versions.
[jelmer/subvertpy.git] / repository.py
1 # Copyright (C) 2006 Jelmer Vernooij <jelmer@samba.org>
2
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 """Subversion repository access."""
17
18 import bzrlib
19 from bzrlib.branch import BranchCheckResult
20 from bzrlib.errors import (InvalidRevisionId, NoSuchRevision, 
21                            NotBranchError, UninitializableFormat)
22 from bzrlib.inventory import Inventory
23 from bzrlib.lockable_files import LockableFiles, TransportLock
24 import bzrlib.osutils as osutils
25 from bzrlib.repository import Repository, RepositoryFormat
26 from bzrlib.revisiontree import RevisionTree
27 from bzrlib.revision import Revision, NULL_REVISION
28 from bzrlib.transport import Transport
29 from bzrlib.trace import mutter
30
31 from svn.core import SubversionException, Pool
32 import svn.core
33
34 import os
35
36 from branchprops import BranchPropertyList
37 from cache import create_cache_dir, sqlite3
38 import errors
39 import logwalker
40 from tree import SvnRevisionTree
41
42 MAPPING_VERSION = 2
43 SVN_PROP_BZR_PREFIX = 'bzr:'
44 REVISION_ID_PREFIX = "svn-v%d:" % MAPPING_VERSION
45 SVN_PROP_BZR_MERGE = 'bzr:merge'
46 SVN_PROP_SVK_MERGE = 'svk:merge'
47 SVN_PROP_BZR_FILEIDS = 'bzr:file-ids'
48 SVN_PROP_BZR_REVPROP_PREFIX = 'bzr:revprop:'
49 SVN_REVPROP_BZR_SIGNATURE = 'bzr:gpg-signature'
50
51
52 def escape_svn_path(id, unsafe="%/-\t \n"):
53     assert "%" in unsafe
54     r = [((c in unsafe) and ('%%%02x' % ord(c)) or c)
55          for c in id]
56     ret = ''.join(r)
57     if isinstance(ret, unicode):
58         return ret.encode("utf-8")
59     return ret
60
61
62 import urllib
63
64 unescape_svn_path = urllib.unquote
65
66
67 def parse_svn_revision_id(revid):
68     """Parse an existing Subversion-based revision id.
69
70     :param revid: The revision id.
71     :raises: InvalidRevisionId
72     :return: Tuple with uuid, branch path and revision number.
73     """
74
75     assert revid
76     assert isinstance(revid, basestring)
77
78     if not revid.startswith(REVISION_ID_PREFIX):
79         raise InvalidRevisionId(revid, "")
80
81     revid = revid[len(REVISION_ID_PREFIX):]
82
83     at = revid.index("@")
84     fash = revid.rindex("-")
85     uuid = revid[at+1:fash]
86
87     branch_path = unescape_svn_path(revid[fash+1:])
88     revnum = int(revid[0:at])
89     assert revnum >= 0
90     return (uuid, branch_path, revnum)
91
92
93 def generate_svn_revision_id(uuid, revnum, path):
94     """Generate a unambiguous revision id. 
95     
96     :param uuid: UUID of the repository.
97     :param revnum: Subversion revision number.
98     :param path: Branch path.
99
100     :return: New revision id.
101     """
102     assert isinstance(revnum, int)
103     assert isinstance(path, basestring)
104     assert revnum >= 0
105     if revnum == 0:
106         return NULL_REVISION
107     return "%s%d@%s-%s" % (REVISION_ID_PREFIX, revnum, uuid, escape_svn_path(path.strip("/")))
108
109
110 def svk_feature_to_revision_id(feature):
111     """Create a revision id from a svk feature identifier.
112
113     :param feature: The feature identifier as string.
114     :return: Matching revision id.
115     """
116     (uuid, branch, revnum) = feature.split(":")
117     return generate_svn_revision_id(uuid, int(revnum), branch.strip("/"))
118
119
120 def revision_id_to_svk_feature(revid):
121     """Create a SVK feature identifier from a revision id.
122
123     :param revid: Revision id to convert.
124     :return: Matching SVK feature identifier.
125     """
126     (uuid, branch, revnum) = parse_svn_revision_id(revid)
127     return "%s:/%s:%d" % (uuid, branch, revnum)
128
129
130 class SvnRepositoryFormat(RepositoryFormat):
131     rich_root_data = False
132
133     def __init__(self):
134         super(SvnRepositoryFormat, self).__init__()
135         from format import SvnFormat
136         self._matchingbzrdir = SvnFormat()
137
138     def get_format_description(self):
139         return "Subversion Repository"
140
141     def initialize(self, url, shared=False, _internal=False):
142         """Svn repositories cannot be created."""
143         raise UninitializableFormat(self)
144
145 cachedbs = {}
146
147 class SvnRepository(Repository):
148     """
149     Provides a simplified interface to a Subversion repository 
150     by using the RA (remote access) API from subversion
151     """
152     def __init__(self, bzrdir, transport):
153         from fileids import SimpleFileIdMap
154         _revision_store = None
155
156         assert isinstance(transport, Transport)
157
158         control_files = LockableFiles(transport, '', TransportLock)
159         Repository.__init__(self, SvnRepositoryFormat(), bzrdir, 
160             control_files, None, None, None)
161
162         self.transport = transport
163         self.uuid = transport.get_uuid()
164         self.base = transport.base
165         self.dir_cache = {}
166         self.scheme = bzrdir.scheme
167         self.pool = Pool()
168
169         assert self.base
170         assert self.uuid
171
172         cache_file = os.path.join(self.create_cache_dir(), 'cache-v1')
173         if not cachedbs.has_key(cache_file):
174             cachedbs[cache_file] = sqlite3.connect(cache_file)
175         self.cachedb = cachedbs[cache_file]
176
177         self._latest_revnum = transport.get_latest_revnum()
178         self._log = logwalker.LogWalker(transport=transport, 
179                                         cache_db=self.cachedb, 
180                                         last_revnum=self._latest_revnum)
181
182         self.branchprop_list = BranchPropertyList(self._log, self.cachedb)
183         self.fileid_map = SimpleFileIdMap(self, self.cachedb)
184
185     def set_branching_scheme(self, scheme):
186         self.scheme = scheme
187
188     def _warn_if_deprecated(self):
189         # This class isn't deprecated
190         pass
191
192     def __repr__(self):
193         return '%s(%r)' % (self.__class__.__name__, 
194                            self.base)
195
196     def create_cache_dir(self):
197         cache_dir = create_cache_dir()
198         dir = os.path.join(cache_dir, self.uuid)
199         if not os.path.exists(dir):
200             os.mkdir(dir)
201         return dir
202
203     def _check(self, revision_ids):
204         return BranchCheckResult(self)
205
206     def get_inventory(self, revision_id):
207         assert revision_id != None
208         return self.revision_tree(revision_id).inventory
209
210     def get_fileid_map(self, revnum, path):
211         return self.fileid_map.get_map(self.uuid, revnum, path,
212                                        self.revision_fileid_renames)
213
214     def transform_fileid_map(self, uuid, revnum, branch, changes, renames):
215         return self.fileid_map.apply_changes(uuid, revnum, branch, changes, 
216                                              renames)
217
218     def all_revision_ids(self):
219         for (bp, rev) in self.follow_history(self.transport.get_latest_revnum()):
220             yield self.generate_revision_id(rev, bp)
221
222     def get_inventory_weave(self):
223         raise NotImplementedError(self.get_inventory_weave)
224
225     def set_make_working_trees(self, new_value):
226         """See Repository.set_make_working_trees()."""
227         pass # FIXME: ignored, nowhere to store it... 
228
229     def make_working_trees(self):
230         return False
231
232     def get_ancestry(self, revision_id):
233         """See Repository.get_ancestry().
234         
235         Note: only the first bit is topologically ordered!
236         """
237         if revision_id is None: 
238             return [None]
239
240         (path, revnum) = self.parse_revision_id(revision_id)
241
242         ancestry = []
243
244         for l in self.branchprop_list.get_property(path, revnum, 
245                                     SVN_PROP_BZR_MERGE, "").splitlines():
246             ancestry.extend(l.split("\n"))
247
248         for (branch, rev) in self.follow_branch(path, revnum - 1):
249             ancestry.append(self.generate_revision_id(rev, branch))
250
251         ancestry.append(None)
252
253         ancestry.reverse()
254
255         return ancestry
256
257     def has_revision(self, revision_id):
258         if revision_id is None:
259             return True
260
261         try:
262             (path, revnum) = self.parse_revision_id(revision_id)
263         except NoSuchRevision:
264             return False
265
266         try:
267             return (svn.core.svn_node_none != self.transport.check_path(path.encode('utf8'), revnum))
268         except SubversionException, (_, num):
269             if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
270                 return False
271             raise
272
273     def revision_trees(self, revids):
274         for revid in revids:
275             yield self.revision_tree(revid)
276
277     def revision_tree(self, revision_id):
278         if revision_id is None:
279             revision_id = NULL_REVISION
280
281         if revision_id == NULL_REVISION:
282             inventory = Inventory()
283             inventory.revision_id = revision_id
284             return RevisionTree(self, inventory, revision_id)
285
286         return SvnRevisionTree(self, revision_id)
287
288     def revision_fileid_renames(self, revid):
289         (path, revnum) = self.parse_revision_id(revid)
290         items = self.branchprop_list.get_property_diff(path, revnum, 
291                                   SVN_PROP_BZR_FILEIDS).splitlines()
292         return dict(map(lambda x: x.split("\t"), items))
293
294     def _mainline_revision_parent(self, path, revnum):
295         assert isinstance(path, basestring)
296         assert isinstance(revnum, int)
297         for (branch, rev) in self.follow_branch(path, revnum):
298             if rev < revnum:
299                 return self.generate_revision_id(rev, branch)
300         return None
301
302     def revision_parents(self, revision_id, merged_data=None):
303         parent_ids = []
304         (branch, revnum) = self.parse_revision_id(revision_id)
305         mainline_parent = self._mainline_revision_parent(branch, revnum)
306         if mainline_parent is not None:
307             parent_ids.append(mainline_parent)
308             (parent_path, parent_revnum) = self.parse_revision_id(mainline_parent)
309         else:
310             parent_path = None
311
312         # if the branch didn't change, bzr:merge can't have changed
313         if not self._log.touches_path(branch, revnum):
314             return parent_ids
315        
316         if merged_data is None:
317             new_merge = self.branchprop_list.get_property(branch, revnum, 
318                                            SVN_PROP_BZR_MERGE, "").splitlines()
319
320             if len(new_merge) == 0 or parent_path is None:
321                 old_merge = ""
322             else:
323                 old_merge = self.branchprop_list.get_property(parent_path, parent_revnum, 
324                         SVN_PROP_BZR_MERGE, "").splitlines()
325
326             assert (len(old_merge) == len(new_merge) or 
327                     len(old_merge) + 1 == len(new_merge))
328
329             if len(old_merge) < len(new_merge):
330                 merged_data = new_merge[-1]
331             else:
332                 merged_data = ""
333
334         if ' ' in merged_data:
335             mutter('invalid revision id %r in merged property, skipping' % merged_data)
336             merged_data = ""
337
338         if merged_data != "":
339             parent_ids.extend(merged_data.split("\t"))
340
341         return parent_ids
342
343     def get_revision(self, revision_id):
344         """See Repository.get_revision."""
345         if not revision_id or not isinstance(revision_id, basestring):
346             raise InvalidRevisionId(revision_id=revision_id, branch=self)
347
348         (path, revnum) = self.parse_revision_id(revision_id)
349         
350         parent_ids = self.revision_parents(revision_id)
351
352         # Commit SVN revision properties to a Revision object
353         rev = Revision(revision_id=revision_id, parent_ids=parent_ids)
354
355         svn_props = self.branchprop_list.get_properties(path, revnum)
356         bzr_props = {}
357         for name in svn_props:
358             if not name.startswith(SVN_PROP_BZR_REVPROP_PREFIX):
359                 continue
360
361             bzr_props[name[len(SVN_PROP_BZR_REVPROP_PREFIX):]] = svn_props[name]
362
363         (rev.committer, rev.message, date) = self._log.get_revision_info(revnum)
364         if rev.committer is None:
365             rev.committer = ""
366
367         rev.timestamp = 1.0 * svn.core.secs_from_timestr(date, None)
368         rev.timezone = None
369         rev.properties = bzr_props
370         rev.inventory_sha1 = property(lambda: self.get_inventory_sha1(revision_id))
371
372         return rev
373
374     def get_revisions(self, revision_ids):
375         # TODO: More efficient implementation?
376         return map(self.get_revision, revision_ids)
377
378     def add_revision(self, rev_id, rev, inv=None, config=None):
379         raise NotImplementedError(self.add_revision)
380
381     def fileid_involved_between_revs(self, from_revid, to_revid):
382         raise NotImplementedError(self.fileid_involved_by_set)
383
384     def fileid_involved(self, last_revid=None):
385         raise NotImplementedError(self.fileid_involved)
386
387     def fileids_altered_by_revision_ids(self, revision_ids):
388         raise NotImplementedError(self.fileids_altered_by_revision_ids)
389
390     def fileid_involved_by_set(self, changes):
391         raise NotImplementedError(self.fileid_involved_by_set)
392
393     def generate_revision_id(self, revnum, path):
394         """Generate a unambiguous revision id. 
395         
396         :param revnum: Subversion revision number.
397         :param path: Branch path.
398
399         :return: New revision id.
400         """
401         return generate_svn_revision_id(self.uuid, revnum, path)
402
403     def parse_revision_id(self, revid):
404         """Parse an existing Subversion-based revision id.
405
406         :param revid: The revision id.
407         :raises: NoSuchRevision
408         :return: Tuple with branch path and revision number.
409         """
410
411         try:
412             (uuid, branch_path, revnum) = parse_svn_revision_id(revid)
413         except InvalidRevisionId:
414             raise NoSuchRevision(self, revid)
415
416         if uuid != self.uuid:
417             raise NoSuchRevision(self, revid)
418
419         return (branch_path, revnum)
420
421     def get_inventory_xml(self, revision_id):
422         return bzrlib.xml5.serializer_v5.write_inventory_to_string(
423             self.get_inventory(revision_id))
424
425     def get_inventory_sha1(self, revision_id):
426         return osutils.sha_string(self.get_inventory_xml(revision_id))
427
428     def get_revision_xml(self, revision_id):
429         return bzrlib.xml5.serializer_v5.write_revision_to_string(
430             self.get_revision(revision_id))
431
432     def follow_history(self, revnum):
433         while revnum > 0:
434             yielded_paths = []
435             paths = self._log.get_revision_paths(revnum)
436             for p in paths:
437                 try:
438                     bp = self.scheme.unprefix(p)[0]
439                     if not bp in yielded_paths:
440                         if not paths.has_key(bp) or paths[bp][0] != 'D':
441                             yield (bp, revnum)
442                         yielded_paths.append(bp)
443                 except NotBranchError:
444                     pass
445             revnum -= 1
446
447     def follow_branch(self, branch_path, revnum):
448         assert branch_path is not None
449         assert isinstance(revnum, int) and revnum >= 0
450         if not self.scheme.is_branch(branch_path):
451             raise errors.NotSvnBranchPath(branch_path, revnum)
452         branch_path = branch_path.strip("/")
453
454         while revnum > 0:
455             paths = self._log.get_revision_paths(revnum, branch_path)
456             if paths == {}:
457                 revnum -= 1
458                 continue
459             yield (branch_path, revnum)
460             # FIXME: what if one of the parents of branch_path was moved?
461             if (paths.has_key(branch_path) and 
462                 paths[branch_path][0] in ('R', 'A')):
463                 if paths[branch_path][1] is None:
464                     return
465                 if not self.scheme.is_branch(paths[branch_path][1]):
466                     # FIXME: if copyfrom_path is not a branch path, 
467                     # should simulate a reverse "split" of a branch
468                     # for now, just make it look like the branch ended here
469                     return
470                 revnum = paths[branch_path][2]
471                 branch_path = paths[branch_path][1]
472                 continue
473             revnum -= 1
474
475     def follow_branch_history(self, branch_path, revnum):
476         assert branch_path is not None
477         if not self.scheme.is_branch(branch_path):
478             raise errors.NotSvnBranchPath(branch_path, revnum)
479
480         for (bp, paths, revnum) in self._log.follow_path(branch_path, revnum):
481             # FIXME: what if one of the parents of branch_path was moved?
482             if (paths.has_key(bp) and 
483                 paths[bp][1] is not None and
484                 not self.scheme.is_branch(paths[bp][1])):
485                 # FIXME: if copyfrom_path is not a branch path, 
486                 # should simulate a reverse "split" of a branch
487                 # for now, just make it look like the branch ended here
488                 for c in self._log.find_children(paths[bp][1], paths[bp][2]):
489                     path = c.replace(paths[bp][1], bp+"/", 1).replace("//", "/")
490                     paths[path] = ('A', None, -1)
491                 paths[bp] = ('A', None, -1)
492
493                 yield (bp, paths, revnum)
494                 return
495                      
496             yield (bp, paths, revnum)
497
498     def has_signature_for_revision_id(self, revision_id):
499         # TODO: Retrieve from SVN_PROP_BZR_SIGNATURE 
500         return False # SVN doesn't store GPG signatures. Perhaps 
501                      # store in SVN revision property?
502
503     def get_signature_text(self, revision_id):
504         # TODO: Retrieve from SVN_PROP_BZR_SIGNATURE 
505         # SVN doesn't store GPG signatures
506         raise NoSuchRevision(self, revision_id)
507
508     def get_revision_graph(self, revision_id):
509         if revision_id == NULL_REVISION:
510             return {}
511
512         (path, revnum) = self.parse_revision_id(revision_id)
513
514         self._previous = revision_id
515         self._ancestry = {}
516         
517         for (branch, rev) in self.follow_branch(path, revnum - 1):
518             revid = self.generate_revision_id(rev, branch)
519             self._ancestry[self._previous] = [revid]
520             self._previous = revid
521
522         self._ancestry[self._previous] = []
523
524         return self._ancestry
525
526     def find_branches(self, revnum=None, pb=None):
527         """Find all branches that were changed in the specified revision number.
528
529         :param revnum: Revision to search for branches.
530         """
531         if revnum is None:
532             revnum = self.transport.get_latest_revnum()
533
534         created_branches = {}
535
536         for i in range(revnum+1):
537             if pb is not None:
538                 pb.update("finding branches", i, revnum+1)
539             paths = self._log.get_revision_paths(i)
540             names = paths.keys()
541             names.sort()
542             for p in names:
543                 if self.scheme.is_branch(p):
544                     if paths[p][0] in ('R', 'D'):
545                         del created_branches[p]
546                         yield (p, i, False)
547
548                     if paths[p][0] in ('A', 'R'): 
549                         created_branches[p] = i
550                 elif self.scheme.is_branch_parent(p):
551                     if paths[p][0] in ('R', 'D'):
552                         k = created_branches.keys()
553                         for c in k:
554                             if c.startswith(p+"/"):
555                                 del created_branches[c] 
556                                 yield (c, i, False)
557                     if paths[p][0] in ('A', 'R'):
558                         parents = [p]
559                         while parents:
560                             p = parents.pop()
561                             for c in self.transport.get_dir(p, i)[0].keys():
562                                 n = p+"/"+c
563                                 if self.scheme.is_branch(n):
564                                     created_branches[n] = i
565                                 elif self.scheme.is_branch_parent(n):
566                                     parents.append(n)
567
568         for p in created_branches:
569             j = self._log.find_latest_change(p, revnum, recurse=True)
570             if j is None:
571                 j = created_branches[p]
572             yield (p, j, True)
573
574     def is_shared(self):
575         """Return True if this repository is flagged as a shared repository."""
576         return True
577
578     def get_physical_lock_status(self):
579         return False
580
581     def get_commit_builder(self, branch, parents, config, timestamp=None, 
582                            timezone=None, committer=None, revprops=None, 
583                            revision_id=None):
584         if timestamp != None:
585             raise NotImplementedError(self.get_commit_builder, 
586                 "timestamp can not be user-specified for Subversion repositories")
587
588         if timezone != None:
589             raise NotImplementedError(self.get_commit_builder, 
590                 "timezone can not be user-specified for Subversion repositories")
591
592         if committer != None:
593             raise NotImplementedError(self.get_commit_builder, 
594                 "committer can not be user-specified for Subversion repositories")
595
596         if revision_id != None:
597             raise NotImplementedError(self.get_commit_builder, 
598                 "revision_id can not be user-specified for Subversion repositories")
599
600         from commit import SvnCommitBuilder
601         return SvnCommitBuilder(self, branch, parents, config, revprops)
602
603