Add smart HTTP support to dul-web.
[jelmer/dulwich-libgit2.git] / dulwich / server.py
index 6e370f9e0fbaeccf77d411c51745f1fc440729f8..2e19838d402790a49b86682b7a1525ac075b2b27 100644 (file)
 # MA  02110-1301, USA.
 
 
-"""Git smart network protocol server implementation."""
+"""Git smart network protocol server implementation.
+
+For more detailed implementation on the network protocol, see the
+Documentation/technical directory in the cgit distribution, and in particular:
+    Documentation/technical/protocol-capabilities.txt
+    Documentation/technical/pack-protocol.txt
+"""
 
 
 import collections
@@ -25,6 +31,8 @@ import SocketServer
 import tempfile
 
 from dulwich.errors import (
+    ApplyDeltaError,
+    ChecksumMismatch,
     GitProtocolError,
     )
 from dulwich.objects import (
@@ -38,6 +46,7 @@ from dulwich.protocol import (
     extract_want_line_capabilities,
     SINGLE_ACK,
     MULTI_ACK,
+    MULTI_ACK_DETAILED,
     ack_type,
     )
 from dulwich.repo import (
@@ -86,18 +95,55 @@ class GitBackend(Backend):
 
     def apply_pack(self, refs, read):
         f, commit = self.repo.object_store.add_thin_pack()
+        all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError)
+        status = []
+        unpack_error = None
+        # TODO: more informative error messages than just the exception string
+        try:
+            # TODO: decode the pack as we stream to avoid blocking reads beyond
+            # the end of data (when using HTTP/1.1 chunked encoding)
+            while True:
+                data = read(10240)
+                if not data:
+                    break
+                f.write(data)
+        except all_exceptions, e:
+            unpack_error = str(e).replace('\n', '')
         try:
-            f.write(read())
-        finally:
             commit()
+        except all_exceptions, e:
+            if not unpack_error:
+                unpack_error = str(e).replace('\n', '')
+
+        if unpack_error:
+            status.append(('unpack', unpack_error))
+        else:
+            status.append(('unpack', 'ok'))
 
         for oldsha, sha, ref in refs:
-            if ref == "0" * 40:
-                del self.repo.refs[ref]
+            # TODO: check refname
+            ref_error = None
+            try:
+                if ref == "0" * 40:
+                    try:
+                        del self.repo.refs[ref]
+                    except all_exceptions:
+                        ref_error = 'failed to delete'
+                else:
+                    try:
+                        self.repo.refs[ref] = sha
+                    except all_exceptions:
+                        ref_error = 'failed to write'
+            except KeyError, e:
+                ref_error = 'bad ref'
+            if ref_error:
+                status.append((ref, ref_error))
             else:
-                self.repo.refs[ref] = sha
+                status.append((ref, 'ok'))
+
 
         print "pack applied"
+        return status
 
 
 class Handler(object):
@@ -114,13 +160,17 @@ class Handler(object):
 class UploadPackHandler(Handler):
     """Protocol handler for uploading a pack to the server."""
 
-    def __init__(self, backend, read, write):
+    def __init__(self, backend, read, write,
+                 stateless_rpc=False, advertise_refs=False):
         Handler.__init__(self, backend, read, write)
         self._client_capabilities = None
         self._graph_walker = None
+        self.stateless_rpc = stateless_rpc
+        self.advertise_refs = advertise_refs
 
     def default_capabilities(self):
-        return ("multi_ack", "side-band-64k", "thin-pack", "ofs-delta")
+        return ("multi_ack_detailed", "multi_ack", "side-band-64k", "thin-pack",
+                "ofs-delta")
 
     def set_client_capabilities(self, caps):
         my_caps = self.default_capabilities()
@@ -137,47 +187,13 @@ class UploadPackHandler(Handler):
                                    set_client_capabilities)
 
     def handle(self):
-        def determine_wants(heads):
-            keys = heads.keys()
-            values = set(heads.itervalues())
-            if keys:
-                self.proto.write_pkt_line("%s %s\x00%s\n" % ( heads[keys[0]], keys[0], self.capabilities()))
-                for k in keys[1:]:
-                    self.proto.write_pkt_line("%s %s\n" % (heads[k], k))
-
-            # i'm done..
-            self.proto.write("0000")
-
-            # Now client will either send "0000", meaning that it doesnt want to pull.
-            # or it will start sending want want want commands
-            want = self.proto.read_pkt_line()
-            if want == None:
-                return []
-
-            want, self.client_capabilities = extract_want_line_capabilities(want)
-            graph_walker.set_ack_type(ack_type(self.client_capabilities))
-
-            want_revs = []
-            while want and want[:4] == 'want':
-                sha = want[5:45]
-                try:
-                    hex_to_sha(sha)
-                except (TypeError, AssertionError), e:
-                    raise GitProtocolError(e)
-
-                if sha not in values:
-                    raise GitProtocolError(
-                        'Client wants invalid object %s' % sha)
-                want_revs.append(sha)
-                want = self.proto.read_pkt_line()
-            graph_walker.set_wants(want_revs)
-            return want_revs
 
         progress = lambda x: self.proto.write_sideband(2, x)
         write = lambda x: self.proto.write_sideband(1, x)
 
-        graph_walker = ProtocolGraphWalker(self.backend.object_store, self.proto)
-        objects_iter = self.backend.fetch_objects(determine_wants, graph_walker, progress)
+        graph_walker = ProtocolGraphWalker(self)
+        objects_iter = self.backend.fetch_objects(
+          graph_walker.determine_wants, graph_walker, progress)
 
         # Do they want any objects?
         if len(objects_iter) == 0:
@@ -205,15 +221,69 @@ class ProtocolGraphWalker(object):
     call to set_ack_level() is required to set up the implementation, before any
     calls to next() or ack() are made.
     """
-    def __init__(self, object_store, proto):
-        self.store = object_store
-        self.proto = proto
+    def __init__(self, handler):
+        self.handler = handler
+        self.store = handler.backend.object_store
+        self.proto = handler.proto
+        self.stateless_rpc = handler.stateless_rpc
+        self.advertise_refs = handler.advertise_refs
         self._wants = []
         self._cached = False
         self._cache = []
         self._cache_index = 0
         self._impl = None
 
+    def determine_wants(self, heads):
+        """Determine the wants for a set of heads.
+
+        The given heads are advertised to the client, who then specifies which
+        refs he wants using 'want' lines. This portion of the protocol is the
+        same regardless of ack type, and in fact is used to set the ack type of
+        the ProtocolGraphWalker.
+
+        :param heads: a dict of refname->SHA1 to advertise
+        :return: a list of SHA1s requested by the client
+        """
+        if not heads:
+            raise GitProtocolError('No heads found')
+        values = set(heads.itervalues())
+        if self.advertise_refs or not self.stateless_rpc:
+            for i, (ref, sha) in enumerate(heads.iteritems()):
+                line = "%s %s" % (sha, ref)
+                if not i:
+                    line = "%s\x00%s" % (line, self.handler.capabilities())
+                self.proto.write_pkt_line("%s\n" % line)
+                # TODO: include peeled value of any tags
+
+            # i'm done..
+            self.proto.write_pkt_line(None)
+
+            if self.advertise_refs:
+                return []
+
+        # Now client will sending want want want commands
+        want = self.proto.read_pkt_line()
+        if not want:
+            return []
+        line, caps = extract_want_line_capabilities(want)
+        self.handler.client_capabilities = caps
+        self.set_ack_type(ack_type(caps))
+        command, sha = self._split_proto_line(line)
+
+        want_revs = []
+        while command != None:
+            if command != 'want':
+                raise GitProtocolError(
+                    'Protocol got unexpected command %s' % command)
+            if sha not in values:
+                raise GitProtocolError(
+                    'Client wants invalid object %s' % sha)
+            want_revs.append(sha)
+            command, sha = self.read_proto_line()
+
+        self.set_wants(want_revs)
+        return want_revs
+
     def ack(self, have_ref):
         return self._impl.ack(have_ref)
 
@@ -223,33 +293,42 @@ class ProtocolGraphWalker(object):
 
     def next(self):
         if not self._cached:
+            if not self._impl and self.stateless_rpc:
+                return None
             return self._impl.next()
         self._cache_index += 1
         if self._cache_index > len(self._cache):
             return None
         return self._cache[self._cache_index]
 
+    def _split_proto_line(self, line):
+        fields = line.rstrip('\n').split(' ', 1)
+        if len(fields) == 1 and fields[0] == 'done':
+            return ('done', None)
+        elif len(fields) == 2 and fields[0] in ('want', 'have'):
+            try:
+                hex_to_sha(fields[1])
+                return tuple(fields)
+            except (TypeError, AssertionError), e:
+                raise GitProtocolError(e)
+        raise GitProtocolError('Received invalid line from client:\n%s' % line)
+
     def read_proto_line(self):
         """Read a line from the wire.
 
         :return: a tuple having one of the following forms:
+            ('want', obj_id)
             ('have', obj_id)
             ('done', None)
             (None, None)  (for a flush-pkt)
+
+        :raise GitProtocolError: if the line cannot be parsed into one of the
+            possible return values.
         """
         line = self.proto.read_pkt_line()
         if not line:
             return (None, None)
-        fields = line.rstrip('\n').split(' ', 1)
-        if len(fields) == 1 and fields[0] == 'done':
-            return ('done', None)
-        if len(fields) == 2 and fields[0] == 'have':
-            try:
-                hex_to_sha(fields[1])
-                return fields
-            except (TypeError, AssertionError), e:
-                raise GitProtocolError(e)
-        raise GitProtocolError('Received invalid line from client:\n%s' % line)
+        return self._split_proto_line(line)
 
     def send_ack(self, sha, ack_type=''):
         if ack_type:
@@ -307,6 +386,7 @@ class ProtocolGraphWalker(object):
     def set_ack_type(self, ack_type):
         impl_classes = {
             MULTI_ACK: MultiAckGraphWalkerImpl,
+            MULTI_ACK_DETAILED: MultiAckDetailedGraphWalkerImpl,
             SINGLE_ACK: SingleAckGraphWalkerImpl,
             }
         self._impl = impl_classes[ack_type](self)
@@ -351,27 +431,83 @@ class MultiAckGraphWalkerImpl(object):
         # else we blind ack within next
 
     def next(self):
-        command, sha = self.walker.read_proto_line()
-        if command is None:
-            self.walker.send_nak()
-            return None
-        elif command == 'done':
-            # don't nak unless no common commits were found, even if not
-            # everything is satisfied
-            if self._common:
-                self.walker.send_ack(self._common[-1])
-            else:
+        while True:
+            command, sha = self.walker.read_proto_line()
+            if command is None:
                 self.walker.send_nak()
-            return None
-        elif command == 'have':
-            if self._found_base:
-                # blind ack
-                self.walker.send_ack(sha, 'continue')
-            return sha
+                # in multi-ack mode, a flush-pkt indicates the client wants to
+                # flush but more have lines are still coming
+                continue
+            elif command == 'done':
+                # don't nak unless no common commits were found, even if not
+                # everything is satisfied
+                if self._common:
+                    self.walker.send_ack(self._common[-1])
+                else:
+                    self.walker.send_nak()
+                return None
+            elif command == 'have':
+                if self._found_base:
+                    # blind ack
+                    self.walker.send_ack(sha, 'continue')
+                return sha
+
+
+class MultiAckDetailedGraphWalkerImpl(object):
+    """Graph walker implementation speaking the multi-ack-detailed protocol."""
+
+    def __init__(self, walker):
+        self.walker = walker
+        self._found_base = False
+        self._common = []
+
+    def ack(self, have_ref):
+        self._common.append(have_ref)
+        if not self._found_base:
+            self.walker.send_ack(have_ref, 'common')
+            if self.walker.all_wants_satisfied(self._common):
+                self._found_base = True
+                self.walker.send_ack(have_ref, 'ready')
+        # else we blind ack within next
+
+    def next(self):
+        while True:
+            command, sha = self.walker.read_proto_line()
+            if command is None:
+                self.walker.send_nak()
+                if self.walker.stateless_rpc:
+                    return None
+                continue
+            elif command == 'done':
+                # don't nak unless no common commits were found, even if not
+                # everything is satisfied
+                if self._common:
+                    self.walker.send_ack(self._common[-1])
+                else:
+                    self.walker.send_nak()
+                return None
+            elif command == 'have':
+                if self._found_base:
+                    # blind ack; can happen if the client has more requests
+                    # inflight
+                    self.walker.send_ack(sha, 'ready')
+                return sha
 
 
 class ReceivePackHandler(Handler):
-    """Protocol handler for downloading a pack to the client."""
+    """Protocol handler for downloading a pack from the client."""
+
+    def __init__(self, backend, read, write,
+                 stateless_rpc=False, advertise_refs=False):
+        Handler.__init__(self, backend, read, write)
+        self.stateless_rpc = stateless_rpc
+        self.advertise_refs = advertise_refs
+
+    def __init__(self, backend, read, write,
+                 stateless_rpc=False, advertise_refs=False):
+        Handler.__init__(self, backend, read, write)
+        self._stateless_rpc = stateless_rpc
+        self._advertise_refs = advertise_refs
 
     def default_capabilities(self):
         return ("report-status", "delete-refs")
@@ -379,15 +515,18 @@ class ReceivePackHandler(Handler):
     def handle(self):
         refs = self.backend.get_refs().items()
 
-        if refs:
-            self.proto.write_pkt_line("%s %s\x00%s\n" % (refs[0][1], refs[0][0], self.capabilities()))
-            for i in range(1, len(refs)):
-                ref = refs[i]
-                self.proto.write_pkt_line("%s %s\n" % (ref[1], ref[0]))
-        else:
-            self.proto.write_pkt_line("0000000000000000000000000000000000000000 capabilities^{} %s" % self.capabilities())
+        if self.advertise_refs or not self.stateless_rpc:
+            if refs:
+                self.proto.write_pkt_line("%s %s\x00%s\n" % (refs[0][1], refs[0][0], self.capabilities()))
+                for i in range(1, len(refs)):
+                    ref = refs[i]
+                    self.proto.write_pkt_line("%s %s\n" % (ref[1], ref[0]))
+            else:
+                self.proto.write_pkt_line("0000000000000000000000000000000000000000 capabilities^{} %s" % self.capabilities())
 
-        self.proto.write("0000")
+            self.proto.write("0000")
+            if self.advertise_refs:
+                return
 
         client_refs = []
         ref = self.proto.read_pkt_line()
@@ -404,11 +543,19 @@ class ReceivePackHandler(Handler):
             ref = self.proto.read_pkt_line()
 
         # backend can now deal with this refs and read a pack using self.read
-        self.backend.apply_pack(client_refs, self.proto.read)
-
-        # when we have read all the pack from the client, it assumes 
-        # everything worked OK.
-        # there is NO ack from the server before it reports victory.
+        status = self.backend.apply_pack(client_refs, self.proto.read)
+
+        # when we have read all the pack from the client, send a status report
+        # if the client asked for it
+        if 'report-status' in client_capabilities:
+            for name, msg in status:
+                if name == 'unpack':
+                    self.proto.write_pkt_line('unpack %s\n' % msg)
+                elif msg == 'ok':
+                    self.proto.write_pkt_line('ok %s\n' % name)
+                else:
+                    self.proto.write_pkt_line('ng %s %s\n' % (name, msg))
+            self.proto.write_pkt_line(None)
 
 
 class TCPGitRequestHandler(SocketServer.StreamRequestHandler):