Merge fix for 115026
[jelmer/subvertpy.git] / repository.py
index 8c9742151df675a6caad90a60c9edb7b3f5a2a51..aacecf6a0bb46d59a118d99b744b26ec2f6637d0 100644 (file)
@@ -18,7 +18,7 @@
 import bzrlib
 from bzrlib.branch import BranchCheckResult
 from bzrlib.errors import (InvalidRevisionId, NoSuchRevision, 
-                           NotBranchError, UninitializableFormat)
+                           NotBranchError, UninitializableFormat, BzrError)
 from bzrlib.inventory import Inventory
 from bzrlib.lockable_files import LockableFiles, TransportLock
 import bzrlib.osutils as osutils
@@ -26,6 +26,7 @@ from bzrlib.repository import Repository, RepositoryFormat
 from bzrlib.revisiontree import RevisionTree
 from bzrlib.revision import Revision, NULL_REVISION
 from bzrlib.transport import Transport
+from bzrlib.timestamp import unpack_highres_date, format_highres_date
 from bzrlib.trace import mutter
 
 from svn.core import SubversionException, Pool
@@ -37,74 +38,49 @@ from branchprops import BranchPropertyList
 from cache import create_cache_dir, sqlite3
 import errors
 import logwalker
+from revids import (generate_svn_revision_id, parse_svn_revision_id, 
+                    MAPPING_VERSION, RevidMap)
 from tree import SvnRevisionTree
 
-MAPPING_VERSION = 2
 SVN_PROP_BZR_PREFIX = 'bzr:'
-REVISION_ID_PREFIX = "svn-v%d:" % MAPPING_VERSION
 SVN_PROP_BZR_MERGE = 'bzr:merge'
+SVN_PROP_BZR_FILEIDS = 'bzr:file-ids'
 SVN_PROP_SVK_MERGE = 'svk:merge'
 SVN_PROP_BZR_FILEIDS = 'bzr:file-ids'
-SVN_PROP_BZR_REVPROP_PREFIX = 'bzr:revprop:'
+SVN_PROP_BZR_REVISION_INFO = 'bzr:revision-info'
 SVN_REVPROP_BZR_SIGNATURE = 'bzr:gpg-signature'
+SVN_PROP_BZR_REVISION_ID = 'bzr:revision-id-v%d' % MAPPING_VERSION
 
-
-def escape_svn_path(id, unsafe="%/-\t \n"):
-    assert "%" in unsafe
-    r = [((c in unsafe) and ('%%%02x' % ord(c)) or c)
-         for c in id]
-    ret = ''.join(r)
-    if isinstance(ret, unicode):
-        return ret.encode("utf-8")
-    return ret
-
-
-import urllib
-
-unescape_svn_path = urllib.unquote
-
-
-def parse_svn_revision_id(revid):
-    """Parse an existing Subversion-based revision id.
-
-    :param revid: The revision id.
-    :raises: InvalidRevisionId
-    :return: Tuple with uuid, branch path and revision number.
-    """
-
-    assert revid
-    assert isinstance(revid, basestring)
-
-    if not revid.startswith(REVISION_ID_PREFIX):
-        raise InvalidRevisionId(revid, "")
-
-    revid = revid[len(REVISION_ID_PREFIX):]
-
-    at = revid.index("@")
-    fash = revid.rindex("-")
-    uuid = revid[at+1:fash]
-
-    branch_path = unescape_svn_path(revid[fash+1:])
-    revnum = int(revid[0:at])
-    assert revnum >= 0
-    return (uuid, branch_path, revnum)
-
-
-def generate_svn_revision_id(uuid, revnum, path):
-    """Generate a unambiguous revision id. 
-    
-    :param uuid: UUID of the repository.
-    :param revnum: Subversion revision number.
-    :param path: Branch path.
-
-    :return: New revision id.
-    """
-    assert isinstance(revnum, int)
-    assert isinstance(path, basestring)
-    assert revnum >= 0
-    if revnum == 0:
-        return NULL_REVISION
-    return "%s%d@%s-%s" % (REVISION_ID_PREFIX, revnum, uuid, escape_svn_path(path.strip("/")))
+def parse_revision_metadata(text, rev):
+    in_properties = False
+    for l in text.splitlines():
+        try:
+            key, value = l.split(": ", 2)
+        except ValueError:
+            raise BzrError("Missing : in revision metadata")
+        if key == "committer":
+            rev.committer = str(value)
+        elif key == "timestamp":
+            (rev.timestamp, rev.timezone) = unpack_highres_date(value)
+        elif key == "properties":
+            in_properties = True
+        elif key[0] == "\t" and in_properties:
+            rev.properties[str(key[1:])] = str(value)
+        else:
+            raise BzrError("Invalid key %r" % key)
+
+def generate_revision_metadata(timestamp, timezone, committer, revprops):
+    assert timestamp is None or isinstance(timestamp, float)
+    text = ""
+    if timestamp is not None:
+        text += "timestamp: %s\n" % format_highres_date(timestamp, timezone) 
+    if committer is not None:
+        text += "committer: %s\n" % committer
+    if revprops is not None and revprops != {}:
+        text += "properties: \n"
+        for k, v in sorted(revprops.items()):
+            text += "\t%s: %s\n" % (k, v)
+    return text
 
 
 def svk_feature_to_revision_id(feature):
@@ -162,6 +138,7 @@ class SvnRepository(Repository):
         self.transport = transport
         self.uuid = transport.get_uuid()
         self.base = transport.base
+        self._serializer = None
         self.dir_cache = {}
         self.scheme = bzrdir.scheme
         self.pool = Pool()
@@ -181,6 +158,7 @@ class SvnRepository(Repository):
 
         self.branchprop_list = BranchPropertyList(self._log, self.cachedb)
         self.fileid_map = SimpleFileIdMap(self, self.cachedb)
+        self.revmap = RevidMap(self.cachedb)
 
     def set_branching_scheme(self, scheme):
         self.scheme = scheme
@@ -237,21 +215,20 @@ class SvnRepository(Repository):
         if revision_id is None: 
             return [None]
 
-        (path, revnum) = self.parse_revision_id(revision_id)
+        (path, revnum) = self.lookup_revision_id(revision_id)
 
-        ancestry = []
+        ancestry = [revision_id]
 
         for l in self.branchprop_list.get_property(path, revnum, 
                                     SVN_PROP_BZR_MERGE, "").splitlines():
             ancestry.extend(l.split("\n"))
 
-        for (branch, rev) in self.follow_branch(path, revnum - 1):
-            ancestry.append(self.generate_revision_id(rev, branch))
+        if revnum > 0:
+            for (branch, rev) in self.follow_branch(path, revnum - 1):
+                ancestry.append(self.generate_revision_id(rev, branch))
 
         ancestry.append(None)
-
         ancestry.reverse()
-
         return ancestry
 
     def has_revision(self, revision_id):
@@ -259,7 +236,7 @@ class SvnRepository(Repository):
             return True
 
         try:
-            (path, revnum) = self.parse_revision_id(revision_id)
+            (path, revnum) = self.lookup_revision_id(revision_id)
         except NoSuchRevision:
             return False
 
@@ -279,14 +256,14 @@ class SvnRepository(Repository):
             revision_id = NULL_REVISION
 
         if revision_id == NULL_REVISION:
-            inventory = Inventory()
+            inventory = Inventory(root_id=None)
             inventory.revision_id = revision_id
             return RevisionTree(self, inventory, revision_id)
 
         return SvnRevisionTree(self, revision_id)
 
     def revision_fileid_renames(self, revid):
-        (path, revnum) = self.parse_revision_id(revid)
+        (path, revnum) = self.lookup_revision_id(revid)
         items = self.branchprop_list.get_property_diff(path, revnum, 
                                   SVN_PROP_BZR_FILEIDS).splitlines()
         return dict(map(lambda x: x.split("\t"), items))
@@ -294,18 +271,31 @@ class SvnRepository(Repository):
     def _mainline_revision_parent(self, path, revnum):
         assert isinstance(path, basestring)
         assert isinstance(revnum, int)
-        for (branch, rev) in self.follow_branch(path, revnum):
-            if rev < revnum:
-                return self.generate_revision_id(rev, branch)
-        return None
+
+        if not self.scheme.is_branch(path) and \
+           not self.scheme.is_tag(path):
+            raise NoSuchRevision(self, self.generate_revision_id(revnum, path))
+
+        it = self.follow_branch(path, revnum)
+        # the first tuple returned should match the one specified. 
+        # if it's not, then the branch, revnum didn't change in the specified 
+        # revision and so it is invalid
+        if (path, revnum) != it.next():
+            raise NoSuchRevision(self, self.generate_revision_id(revnum, path))
+        try:
+            (branch, rev) = it.next()
+            return self.generate_revision_id(rev, branch)
+        except StopIteration:
+            # The specified revision was the first one in the branch
+            return None
 
     def revision_parents(self, revision_id, merged_data=None):
         parent_ids = []
-        (branch, revnum) = self.parse_revision_id(revision_id)
+        (branch, revnum) = self.lookup_revision_id(revision_id)
         mainline_parent = self._mainline_revision_parent(branch, revnum)
         if mainline_parent is not None:
             parent_ids.append(mainline_parent)
-            (parent_path, parent_revnum) = self.parse_revision_id(mainline_parent)
+            (parent_path, parent_revnum) = self.lookup_revision_id(mainline_parent)
         else:
             parent_path = None
 
@@ -345,28 +335,27 @@ class SvnRepository(Repository):
         if not revision_id or not isinstance(revision_id, basestring):
             raise InvalidRevisionId(revision_id=revision_id, branch=self)
 
-        (path, revnum) = self.parse_revision_id(revision_id)
+        (path, revnum) = self.lookup_revision_id(revision_id)
         
         parent_ids = self.revision_parents(revision_id)
 
         # Commit SVN revision properties to a Revision object
         rev = Revision(revision_id=revision_id, parent_ids=parent_ids)
 
-        svn_props = self.branchprop_list.get_properties(path, revnum)
-        bzr_props = {}
-        for name in svn_props:
-            if not name.startswith(SVN_PROP_BZR_REVPROP_PREFIX):
-                continue
-
-            bzr_props[name[len(SVN_PROP_BZR_REVPROP_PREFIX):]] = svn_props[name]
-
         (rev.committer, rev.message, date) = self._log.get_revision_info(revnum)
         if rev.committer is None:
             rev.committer = ""
 
-        rev.timestamp = 1.0 * svn.core.secs_from_timestr(date, None)
+        if date is not None:
+            rev.timestamp = 1.0 * svn.core.secs_from_timestr(date, None)
+        else:
+            rev.timestamp = 0.0 # FIXME: Obtain repository creation time
         rev.timezone = None
-        rev.properties = bzr_props
+        rev.properties = {}
+        parse_revision_metadata(
+                self.branchprop_list.get_property(path, revnum, 
+                     SVN_PROP_BZR_REVISION_INFO, ""), rev)
+
         rev.inventory_sha1 = property(lambda: self.get_inventory_sha1(revision_id))
 
         return rev
@@ -391,16 +380,28 @@ class SvnRepository(Repository):
         raise NotImplementedError(self.fileid_involved_by_set)
 
     def generate_revision_id(self, revnum, path):
-        """Generate a unambiguous revision id. 
+        """Generate an unambiguous revision id. 
         
         :param revnum: Subversion revision number.
         :param path: Branch path.
 
         :return: New revision id.
         """
-        return generate_svn_revision_id(self.uuid, revnum, path)
+        # Look in the cache to see if it already has a revision id
+        revid = self.revmap.lookup_branch_revnum(revnum, path)
+        if revid is not None:
+            return revid
+
+        revid = self.branchprop_list.get_property_diff(path, revnum, 
+                SVN_PROP_BZR_REVISION_ID).strip("\n")
+        if revid == "":
+            revid = generate_svn_revision_id(self.uuid, revnum, path)
+
+        self.revmap.insert_revid(revid, path, revnum, revnum, "undefined")
 
-    def parse_revision_id(self, revid):
+        return revid
+
+    def lookup_revision_id(self, revid):
         """Parse an existing Subversion-based revision id.
 
         :param revid: The revision id.
@@ -408,29 +409,82 @@ class SvnRepository(Repository):
         :return: Tuple with branch path and revision number.
         """
 
+        # Try a simple parse
         try:
             (uuid, branch_path, revnum) = parse_svn_revision_id(revid)
+            assert isinstance(branch_path, str)
+            if uuid == self.uuid:
+                return (branch_path, revnum)
         except InvalidRevisionId:
-            raise NoSuchRevision(self, revid)
-
-        if uuid != self.uuid:
-            raise NoSuchRevision(self, revid)
+            pass
 
-        return (branch_path, revnum)
+        # Check the record out of the revmap, if it exists
+        try:
+            (branch_path, min_revnum, max_revnum, \
+                    scheme) = self.revmap.lookup_revid(revid)
+            assert isinstance(branch_path, str)
+            # Entry already complete?
+            if min_revnum == max_revnum:
+                return (branch_path, min_revnum)
+        except NoSuchRevision:
+            # If there is no entry in the map, walk over all branches:
+            for (branch, revno, exists) in self.find_branches():
+                # Look at their bzr:revision-id-vX
+                revids = self.branchprop_list.get_property(branch, revno, 
+                        SVN_PROP_BZR_REVISION_ID, "")
+
+                # If there are any new entries that are not yet in the cache, 
+                # add them
+                for r in revids:
+                    self.revmap.insert_revid(revid, branch, 0, revno, 
+                            "undefined")
+
+                if revid in revids:
+                    break
+                
+            (branch_path, min_revnum, max_revnum, scheme) = self.revmap.lookup_revid(revid)
+            assert isinstance(branch_path, str)
+
+        # Find the branch property between min_revnum and max_revnum that 
+        # added revid
+        i = min_revnum
+        for (bp, rev) in self.follow_branch(branch_path, max_revnum):
+            if self.branchprop_list.get_property_diff(bp, rev, SVN_PROP_BZR_REVISION_ID).strip("\n") == revid:
+                self.revmap.insert_revid(revid, bp, rev, rev, "undefined")
+                return (bp, rev)
+
+        raise AssertionError("Revision id was added incorrectly")
 
     def get_inventory_xml(self, revision_id):
         return bzrlib.xml5.serializer_v5.write_inventory_to_string(
             self.get_inventory(revision_id))
 
+    """Get the sha1 for the XML representation of an inventory.
+
+    :param revision_id: Revision id of the inventory for which to return the 
+        SHA1.
+    :return: XML string
+    """
     def get_inventory_sha1(self, revision_id):
         return osutils.sha_string(self.get_inventory_xml(revision_id))
 
+    """Return the XML representation of a revision.
+
+    :param revision_id: Revision for which to return the XML.
+    :return: XML string
+    """
     def get_revision_xml(self, revision_id):
         return bzrlib.xml5.serializer_v5.write_revision_to_string(
             self.get_revision(revision_id))
 
+    """Yield all the branches found between the start of history 
+    and a specified revision number.
+
+    :param revnum: Revision number up to which to search.
+    :return: iterator over branches in the range 0..revnum
+    """
     def follow_history(self, revnum):
-        while revnum > 0:
+        while revnum >= 0:
             yielded_paths = []
             paths = self._log.get_revision_paths(revnum)
             for p in paths:
@@ -438,31 +492,54 @@ class SvnRepository(Repository):
                     bp = self.scheme.unprefix(p)[0]
                     if not bp in yielded_paths:
                         if not paths.has_key(bp) or paths[bp][0] != 'D':
+                            assert revnum > 0 or bp == ""
                             yield (bp, revnum)
                         yielded_paths.append(bp)
                 except NotBranchError:
                     pass
             revnum -= 1
 
+    """Follow the history of a branch. Will yield all the 
+    left-hand side ancestors of a specified revision.
+    
+    :param branch_path: Subversion path to search.
+    :param revnum: Revision number in Subversion to start.
+    :return: iterator over the ancestors
+    """
     def follow_branch(self, branch_path, revnum):
         assert branch_path is not None
         assert isinstance(revnum, int) and revnum >= 0
-        if not self.scheme.is_branch(branch_path):
+        if not self.scheme.is_branch(branch_path) and \
+           not self.scheme.is_tag(branch_path):
             raise errors.NotSvnBranchPath(branch_path, revnum)
         branch_path = branch_path.strip("/")
 
-        while revnum > 0:
-            paths = self._log.get_revision_paths(revnum, branch_path)
-            if paths == {}:
-                revnum -= 1
-                continue
-            yield (branch_path, revnum)
-            # FIXME: what if one of the parents of branch_path was moved?
+        while revnum >= 0:
+            paths = self._log.get_revision_paths(revnum)
+
+            yielded = False
+            # If something underneath branch_path changed, there is a 
+            # revision there, so yield it.
+            for p in paths:
+                if p.startswith(branch_path+"/") or branch_path == "":
+                    yield (branch_path, revnum)
+                    yielded = True
+                    break
+            
+            # If there are no special cases, just go try the 
+            # next revnum in history
+            revnum -= 1
+
+            # Make sure we get the right location for next time, if 
+            # the branch itself was copied
             if (paths.has_key(branch_path) and 
                 paths[branch_path][0] in ('R', 'A')):
+                if not yielded:
+                    yield (branch_path, revnum+1)
                 if paths[branch_path][1] is None:
                     return
-                if not self.scheme.is_branch(paths[branch_path][1]):
+                if not self.scheme.is_branch(paths[branch_path][1]) and \
+                   not self.scheme.is_tag(paths[branch_path][1]):
                     # FIXME: if copyfrom_path is not a branch path, 
                     # should simulate a reverse "split" of a branch
                     # for now, just make it look like the branch ended here
@@ -470,18 +547,38 @@ class SvnRepository(Repository):
                 revnum = paths[branch_path][2]
                 branch_path = paths[branch_path][1]
                 continue
-            revnum -= 1
-
+            
+            # Make sure we get the right location for the next time if 
+            # one of the parents changed
+
+            # Path names need to be sorted so the longer paths 
+            # override the shorter ones
+            path_names = paths.keys()
+            path_names.sort()
+            for p in path_names:
+                if branch_path.startswith(p+"/"):
+                    assert paths[p][1] is not None and paths[p][0] in ('A', 'R'), "Parent didn't exist yet, but child wasn't added !?"
+
+                    revnum = paths[p][2]
+                    branch_path = paths[p][1] + branch_path[len(p):]
+
+    """Return all the changes that happened in a branch 
+    between branch_path and revnum. 
+
+    :return: iterator that returns tuples with branch path, 
+    changed paths and revision number.
+    """
     def follow_branch_history(self, branch_path, revnum):
         assert branch_path is not None
-        if not self.scheme.is_branch(branch_path):
+        if not self.scheme.is_branch(branch_path) and \
+           not self.scheme.is_tag(branch_path):
             raise errors.NotSvnBranchPath(branch_path, revnum)
 
         for (bp, paths, revnum) in self._log.follow_path(branch_path, revnum):
-            # FIXME: what if one of the parents of branch_path was moved?
             if (paths.has_key(bp) and 
-                paths[bp][1] is not None and
-                not self.scheme.is_branch(paths[bp][1])):
+                paths[bp][1] is not None and 
+                not self.scheme.is_branch(paths[bp][1]) and
+                not self.scheme.is_tag(paths[bp][1])):
                 # FIXME: if copyfrom_path is not a branch path, 
                 # should simulate a reverse "split" of a branch
                 # for now, just make it look like the branch ended here
@@ -495,31 +592,54 @@ class SvnRepository(Repository):
                      
             yield (bp, paths, revnum)
 
+    """Check whether a signature exists for a particular revision id.
+
+    :param revision_id: Revision id for which the signatures should be looked up.
+    :return: False, as no signatures are stored for revisions in Subversion 
+        at the moment.
+    """
     def has_signature_for_revision_id(self, revision_id):
         # TODO: Retrieve from SVN_PROP_BZR_SIGNATURE 
         return False # SVN doesn't store GPG signatures. Perhaps 
                      # store in SVN revision property?
 
+    """Return the signature text for a particular revision.
+
+    :param revision_id: Id of the revision for which to return the signature.
+    :raises NoSuchRevision: Always
+    """
     def get_signature_text(self, revision_id):
         # TODO: Retrieve from SVN_PROP_BZR_SIGNATURE 
         # SVN doesn't store GPG signatures
         raise NoSuchRevision(self, revision_id)
 
-    def get_revision_graph(self, revision_id):
+    def _full_revision_graph(self):
+        graph = {}
+        for (branch, revnum) in self.follow_history(self._latest_revnum):
+            mutter('%r, %r' % (branch, revnum))
+            revid = self.generate_revision_id(revnum, branch)
+            graph[revid] = self.revision_parents(revid)
+        return graph
+
+    def get_revision_graph(self, revision_id=None):
         if revision_id == NULL_REVISION:
             return {}
 
-        (path, revnum) = self.parse_revision_id(revision_id)
+        if revision_id is None:
+            return self._full_revision_graph()
+
+        (path, revnum) = self.lookup_revision_id(revision_id)
 
-        self._previous = revision_id
+        _previous = revision_id
         self._ancestry = {}
         
-        for (branch, rev) in self.follow_branch(path, revnum - 1):
-            revid = self.generate_revision_id(rev, branch)
-            self._ancestry[self._previous] = [revid]
-            self._previous = revid
+        if revnum > 0:
+            for (branch, rev) in self.follow_branch(path, revnum - 1):
+                revid = self.generate_revision_id(rev, branch)
+                self._ancestry[_previous] = [revid]
+                _previous = revid
 
-        self._ancestry[self._previous] = []
+        self._ancestry[_previous] = []
 
         return self._ancestry
 
@@ -527,6 +647,7 @@ class SvnRepository(Repository):
         """Find all branches that were changed in the specified revision number.
 
         :param revnum: Revision to search for branches.
+        :return: iterator that returns tuples with (path, revision number, still exists)
         """
         if revnum is None:
             revnum = self.transport.get_latest_revnum()
@@ -540,14 +661,14 @@ class SvnRepository(Repository):
             names = paths.keys()
             names.sort()
             for p in names:
-                if self.scheme.is_branch(p):
+                if self.scheme.is_branch(p) or self.scheme.is_tag(p):
                     if paths[p][0] in ('R', 'D'):
                         del created_branches[p]
                         yield (p, i, False)
 
                     if paths[p][0] in ('A', 'R'): 
                         created_branches[p] = i
-                elif self.scheme.is_branch_parent(p):
+                elif self.scheme.is_branch_parent(p) or self.scheme.is_tag_parent(p):
                     if paths[p][0] in ('R', 'D'):
                         k = created_branches.keys()
                         for c in k:
@@ -560,9 +681,9 @@ class SvnRepository(Repository):
                             p = parents.pop()
                             for c in self.transport.get_dir(p, i)[0].keys():
                                 n = p+"/"+c
-                                if self.scheme.is_branch(n):
+                                if self.scheme.is_branch(n) or self.scheme.is_tag(n):
                                     created_branches[n] = i
-                                elif self.scheme.is_branch_parent(n):
+                                elif self.scheme.is_branch_parent(n) or self.scheme.is_tag_parent(n):
                                     parents.append(n)
 
         for p in created_branches:
@@ -581,23 +702,8 @@ class SvnRepository(Repository):
     def get_commit_builder(self, branch, parents, config, timestamp=None, 
                            timezone=None, committer=None, revprops=None, 
                            revision_id=None):
-        if timestamp != None:
-            raise NotImplementedError(self.get_commit_builder, 
-                "timestamp can not be user-specified for Subversion repositories")
-
-        if timezone != None:
-            raise NotImplementedError(self.get_commit_builder, 
-                "timezone can not be user-specified for Subversion repositories")
-
-        if committer != None:
-            raise NotImplementedError(self.get_commit_builder, 
-                "committer can not be user-specified for Subversion repositories")
-
-        if revision_id != None:
-            raise NotImplementedError(self.get_commit_builder, 
-                "revision_id can not be user-specified for Subversion repositories")
-
         from commit import SvnCommitBuilder
-        return SvnCommitBuilder(self, branch, parents, config, revprops)
+        return SvnCommitBuilder(self, branch, parents, config, timestamp, 
+                timezone, committer, revprops, revision_id)