Make dul-daemon a trivial wrapper around server functionality.
[jelmer/dulwich-libgit2.git] / dulwich / server.py
index 589aca9d44c8c2f357e15df393376652fc8d8d48..524a2223262b72272940a4a13ee4f9c12bb99246 100644 (file)
@@ -1,5 +1,5 @@
 # server.py -- Implementation of the server side git protocols
-# Copryight (C) 2008 John Carr <john.carr@unrouted.co.uk>
+# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -27,32 +27,45 @@ Documentation/technical directory in the cgit distribution, and in particular:
 
 
 import collections
+import socket
 import SocketServer
+import sys
+import zlib
 
 from dulwich.errors import (
     ApplyDeltaError,
     ChecksumMismatch,
     GitProtocolError,
+    ObjectFormatException,
     )
+from dulwich import log_utils
 from dulwich.objects import (
     hex_to_sha,
     )
+from dulwich.pack import (
+    PackStreamReader,
+    write_pack_data,
+    )
 from dulwich.protocol import (
-    Protocol,
+    MULTI_ACK,
+    MULTI_ACK_DETAILED,
     ProtocolFile,
+    ReceivableProtocol,
+    SINGLE_ACK,
     TCP_GIT_PORT,
     ZERO_SHA,
+    ack_type,
     extract_capabilities,
     extract_want_line_capabilities,
-    SINGLE_ACK,
-    MULTI_ACK,
-    MULTI_ACK_DETAILED,
-    ack_type,
     )
-from dulwich.pack import (
-    write_pack_data,
+from dulwich.repo import (
+    Repo,
     )
 
+
+logger = log_utils.getLogger(__name__)
+
+
 class Backend(object):
     """A backend for the Git smart server implementation."""
 
@@ -102,6 +115,32 @@ class BackendRepo(object):
         raise NotImplementedError
 
 
+class PackStreamCopier(PackStreamReader):
+    """Class to verify a pack stream as it is being read.
+
+    The pack is read from a ReceivableProtocol using read() or recv() as
+    appropriate and written out to the given file-like object.
+    """
+
+    def __init__(self, read_all, read_some, outfile):
+        super(PackStreamCopier, self).__init__(read_all, read_some)
+        self.outfile = outfile
+
+    def _read(self, read, size):
+        data = super(PackStreamCopier, self)._read(read, size)
+        self.outfile.write(data)
+        return data
+
+    def verify(self):
+        """Verify a pack stream and write it to the output file.
+
+        See PackStreamReader.iterobjects for a list of exceptions this may
+        throw.
+        """
+        for _, _, _ in self.read_objects():
+            pass
+
+
 class DictBackend(Backend):
     """Trivial backend that looks up Git repositories in a dictionary."""
 
@@ -109,6 +148,7 @@ class DictBackend(Backend):
         self.repos = repos
 
     def open_repository(self, path):
+        logger.debug('Opening repository at %s', path)
         # FIXME: What to do in case there is no repo ?
         return self.repos[path]
 
@@ -116,9 +156,9 @@ class DictBackend(Backend):
 class Handler(object):
     """Smart protocol command handler base class."""
 
-    def __init__(self, backend, read, write):
+    def __init__(self, backend, proto):
         self.backend = backend
-        self.proto = Protocol(read, write)
+        self.proto = proto
         self._client_capabilities = None
 
     def capability_line(self):
@@ -146,6 +186,7 @@ class Handler(object):
                 raise GitProtocolError('Client does not support required '
                                        'capability %s.' % cap)
         self._client_capabilities = set(caps)
+        logger.info('Client capabilities: %s', caps)
 
     def has_capability(self, cap):
         if self._client_capabilities is None:
@@ -157,9 +198,9 @@ class Handler(object):
 class UploadPackHandler(Handler):
     """Protocol handler for uploading a pack to the server."""
 
-    def __init__(self, backend, args, read, write,
+    def __init__(self, backend, args, proto,
                  stateless_rpc=False, advertise_refs=False):
-        Handler.__init__(self, backend, read, write)
+        Handler.__init__(self, backend, proto)
         self.repo = backend.open_repository(args[0])
         self._graph_walker = None
         self.stateless_rpc = stateless_rpc
@@ -298,10 +339,10 @@ class ProtocolGraphWalker(object):
         while command != None:
             if command != 'want':
                 raise GitProtocolError(
-                    'Protocol got unexpected command %s' % command)
+                  'Protocol got unexpected command %s' % command)
             if sha not in values:
                 raise GitProtocolError(
-                    'Client wants invalid object %s' % sha)
+                  'Client wants invalid object %s' % sha)
             want_revs.append(sha)
             command, sha = self.read_proto_line()
 
@@ -409,10 +450,10 @@ class ProtocolGraphWalker(object):
 
     def set_ack_type(self, ack_type):
         impl_classes = {
-            MULTI_ACK: MultiAckGraphWalkerImpl,
-            MULTI_ACK_DETAILED: MultiAckDetailedGraphWalkerImpl,
-            SINGLE_ACK: SingleAckGraphWalkerImpl,
-            }
+          MULTI_ACK: MultiAckGraphWalkerImpl,
+          MULTI_ACK_DETAILED: MultiAckDetailedGraphWalkerImpl,
+          SINGLE_ACK: SingleAckGraphWalkerImpl,
+          }
         self._impl = impl_classes[ack_type](self)
 
 
@@ -521,9 +562,9 @@ class MultiAckDetailedGraphWalkerImpl(object):
 class ReceivePackHandler(Handler):
     """Protocol handler for downloading a pack from the client."""
 
-    def __init__(self, backend, args, read, write,
+    def __init__(self, backend, args, proto,
                  stateless_rpc=False, advertise_refs=False):
-        Handler.__init__(self, backend, read, write)
+        Handler.__init__(self, backend, proto)
         self.repo = backend.open_repository(args[0])
         self.stateless_rpc = stateless_rpc
         self.advertise_refs = advertise_refs
@@ -531,56 +572,45 @@ class ReceivePackHandler(Handler):
     def capabilities(self):
         return ("report-status", "delete-refs")
 
-    def _apply_pack(self, refs, read):
+    def _apply_pack(self, refs):
         f, commit = self.repo.object_store.add_thin_pack()
-        all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError)
+        all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,
+                          AssertionError, socket.error, zlib.error,
+                          ObjectFormatException)
         status = []
-        unpack_error = None
         # TODO: more informative error messages than just the exception string
         try:
-            # TODO: decode the pack as we stream to avoid blocking reads beyond
-            # the end of data (when using HTTP/1.1 chunked encoding)
-            while True:
-                data = read(10240)
-                if not data:
-                    break
-                f.write(data)
-        except all_exceptions, e:
-            unpack_error = str(e).replace('\n', '')
-        try:
-            commit()
-        except all_exceptions, e:
-            if not unpack_error:
-                unpack_error = str(e).replace('\n', '')
-
-        if unpack_error:
-            status.append(('unpack', unpack_error))
-        else:
+            PackStreamCopier(self.proto.read, self.proto.recv, f).verify()
+            p = commit()
+            if not p:
+                raise IOError('Failed to write pack')
+            p.check()
             status.append(('unpack', 'ok'))
+        except all_exceptions, e:
+            status.append(('unpack', str(e).replace('\n', '')))
+            # The pack may still have been moved in, but it may contain broken
+            # objects. We trust a later GC to clean it up.
 
         for oldsha, sha, ref in refs:
-            ref_error = None
+            ref_status = 'ok'
             try:
                 if sha == ZERO_SHA:
-                    if not self.has_capability('delete-refs'):
+                    if not 'delete-refs' in self.capabilities():
                         raise GitProtocolError(
                           'Attempted to delete refs without delete-refs '
                           'capability.')
                     try:
                         del self.repo.refs[ref]
                     except all_exceptions:
-                        ref_error = 'failed to delete'
+                        ref_status = 'failed to delete'
                 else:
                     try:
                         self.repo.refs[ref] = sha
                     except all_exceptions:
-                        ref_error = 'failed to write'
+                        ref_status = 'failed to write'
             except KeyError, e:
-                ref_error = 'bad ref'
-            if ref_error:
-                status.append((ref, ref_error))
-            else:
-                status.append((ref, 'ok'))
+                ref_status = 'bad ref'
+            status.append((ref, ref_status))
 
         return status
 
@@ -619,7 +649,7 @@ class ReceivePackHandler(Handler):
             ref = self.proto.read_pkt_line()
 
         # backend can now deal with this refs and read a pack using self.read
-        status = self.repo._apply_pack(client_refs, self.proto.read)
+        status = self._apply_pack(client_refs)
 
         # when we have read all the pack from the client, send a status report
         # if the client asked for it
@@ -634,21 +664,28 @@ class ReceivePackHandler(Handler):
             self.proto.write_pkt_line(None)
 
 
+# Default handler classes for git services.
+DEFAULT_HANDLERS = {
+  'git-upload-pack': UploadPackHandler,
+  'git-receive-pack': ReceivePackHandler,
+  }
+
+
 class TCPGitRequestHandler(SocketServer.StreamRequestHandler):
 
+    def __init__(self, handlers, *args, **kwargs):
+        self.handlers = handlers and handlers or DEFAULT_HANDLERS
+        SocketServer.StreamRequestHandler.__init__(self, *args, **kwargs)
+
     def handle(self):
-        proto = Protocol(self.rfile.read, self.wfile.write)
+        proto = ReceivableProtocol(self.connection.recv, self.wfile.write)
         command, args = proto.read_cmd()
+        logger.info('Handling %s request, args=%s', command, args)
 
-        # switch case to handle the specific git command
-        if command == 'git-upload-pack':
-            cls = UploadPackHandler
-        elif command == 'git-receive-pack':
-            cls = ReceivePackHandler
-        else:
-            return
-
-        h = cls(self.server.backend, args, self.rfile.read, self.wfile.write)
+        cls = self.handlers.get(command, None)
+        if not callable(cls):
+            raise GitProtocolError('Invalid service %s' % command)
+        h = cls(self.server.backend, args, proto)
         h.handle()
 
 
@@ -657,6 +694,33 @@ class TCPGitServer(SocketServer.TCPServer):
     allow_reuse_address = True
     serve = SocketServer.TCPServer.serve_forever
 
-    def __init__(self, backend, listen_addr, port=TCP_GIT_PORT):
+    def _make_handler(self, *args, **kwargs):
+        return TCPGitRequestHandler(self.handlers, *args, **kwargs)
+
+    def __init__(self, backend, listen_addr, port=TCP_GIT_PORT, handlers=None):
         self.backend = backend
-        SocketServer.TCPServer.__init__(self, (listen_addr, port), TCPGitRequestHandler)
+        self.handlers = handlers
+        logger.info('Listening for TCP connections on %s:%d', listen_addr, port)
+        SocketServer.TCPServer.__init__(self, (listen_addr, port),
+                                        self._make_handler)
+
+    def verify_request(self, request, client_address):
+        logger.info('Handling request from %s', client_address)
+        return True
+
+    def handle_error(self, request, client_address):
+        logger.exception('Exception happened during processing of request '
+                         'from %s', client_address)
+
+
+def main(argv=sys.argv):
+    """Entry point for starting a TCP git server."""
+    if len(argv) > 1:
+        gitdir = argv[1]
+    else:
+        gitdir = '.'
+
+    log_utils.default_logging_config()
+    backend = DictBackend({'/': Repo(gitdir)})
+    server = TCPGitServer(backend, 'localhost')
+    server.serve_forever()