Working pass at an HTTP server, only dumb at the moment
authorDave Borowitz <dborowitz@google.com>
Thu, 28 Jan 2010 20:47:51 +0000 (12:47 -0800)
committerDave Borowitz <dborowitz@google.com>
Tue, 9 Feb 2010 17:51:20 +0000 (09:51 -0800)
Change-Id: I92c883b2274252ad83bc044f87fc8608b006b6ab

bin/dul-web [new file with mode: 0644]
dulwich/object_store.py
dulwich/repo.py
dulwich/server.py
dulwich/web.py [new file with mode: 0644]
setup.py

diff --git a/bin/dul-web b/bin/dul-web
new file mode 100644 (file)
index 0000000..35a8ecb
--- /dev/null
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+# dul-web - HTTP-based git server
+# Copyright (C) 2010 David Borowitz <dborowitz@google.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+import os
+import sys
+from dulwich.repo import Repo
+from dulwich.server import GitBackend
+from dulwich.web import HTTPGitApplication
+from wsgiref.simple_server import make_server
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        gitdir = sys.argv[1]
+    else:
+        gitdir = os.getcwd()
+
+    backend = GitBackend(Repo(gitdir))
+    app = HTTPGitApplication(backend)
+    # TODO: allow serving on other ports via command-line flag
+    server = make_server('', 8000, app)
+    server.serve_forever()
index cad989443e7f9745a0f287b9ca6ee7cb08cb1248..f95f8afc26fd39f160f7f094f076b1174a01a47f 100644 (file)
@@ -20,6 +20,7 @@
 """Git object store interfaces and implementation."""
 
 
+import errno
 import itertools
 import os
 import stat
@@ -269,15 +270,21 @@ class DiskObjectStore(BaseObjectStore):
     def packs(self):
         """List with pack objects."""
         if self._pack_cache is None:
-            self._pack_cache = list(self._load_packs())
+            self._pack_cache = self._load_packs()
         return self._pack_cache
 
     def _load_packs(self):
         if not os.path.exists(self.pack_dir):
-            return
+            return []
+        pack_files = []
         for name in os.listdir(self.pack_dir):
+            # TODO: verify that idx exists first
             if name.startswith("pack-") and name.endswith(".pack"):
-                yield Pack(os.path.join(self.pack_dir, name[:-len(".pack")]))
+                filename = os.path.join(self.pack_dir, name)
+                pack_files.append((os.stat(filename).st_mtime, filename))
+        pack_files.sort(reverse=True)
+        suffix_len = len(".pack")
+        return [Pack(f[:-suffix_len]) for _, f in pack_files]
 
     def _add_known_pack(self, path):
         """Add a newly appeared pack to the cache by path.
index 41581d91fbe8b7bb3ee19fba203604b6f5fd4893..dca20d34d9d26aefd093cd81b5510cfc12ca0d15 100644 (file)
@@ -522,6 +522,18 @@ class BaseRepo(object):
         self.object_store = object_store
         self.refs = refs
 
+    def get_named_file(self, path):
+        """Get a file from the control dir with a specific name.
+
+        Although the filename should be interpreted as a filename relative to
+        the control dir in a disk-baked Repo, the object returned need not be
+        pointing to a file in that location.
+
+        :param path: The path to the file, relative to the control dir.
+        :return: An open file object, or None if the file does not exist.
+        """
+        raise NotImplementedError(self.get_named_file)
+
     def fetch(self, target, determine_wants=None, progress=None):
         """Fetch objects into another repository.
 
@@ -685,6 +697,23 @@ class Repo(BaseRepo):
         """Return the path of the control directory."""
         return self._controldir
 
+    def get_named_file(self, path):
+        """Get a file from the control dir with a specific name.
+
+        Although the filename should be interpreted as a filename relative to
+        the control dir in a disk-baked Repo, the object returned need not be
+        pointing to a file in that location.
+
+        :param path: The path to the file, relative to the control dir.
+        :return: An open file object, or None if the file does not exist.
+        """
+        try:
+            return open(os.path.join(self.controldir(), path.lstrip('/')), 'rb')
+        except (IOError, OSError), e:
+            if e.errno == errno.ENOENT:
+                return None
+            raise
+
     def index_path(self):
         """Return path to the index file."""
         return os.path.join(self.controldir(), INDEX_FILENAME)
index eff09ac6caed2917b419a43250d533c14efbe4ff..1824cfd82b89dcbb2a4e2ddf5e33d0cf02544fe9 100644 (file)
@@ -120,10 +120,13 @@ class Handler(object):
 class UploadPackHandler(Handler):
     """Protocol handler for uploading a pack to the server."""
 
-    def __init__(self, backend, read, write):
+    def __init__(self, backend, read, write,
+                 stateless_rpc=False, advertise_refs=False):
         Handler.__init__(self, backend, read, write)
         self._client_capabilities = None
         self._graph_walker = None
+        self._stateless_rpc = stateless_rpc
+        self._advertise_refs = advertise_refs
 
     def default_capabilities(self):
         return ("multi_ack", "side-band-64k", "thin-pack", "ofs-delta")
@@ -402,6 +405,12 @@ class MultiAckGraphWalkerImpl(object):
 class ReceivePackHandler(Handler):
     """Protocol handler for downloading a pack to the client."""
 
+    def __init__(self, backend, read, write,
+                 stateless_rpc=False, advertise_refs=False):
+        Handler.__init__(self, backend, read, write)
+        self._stateless_rpc = stateless_rpc
+        self._advertise_refs = advertise_refs
+
     def default_capabilities(self):
         return ("report-status", "delete-refs")
 
diff --git a/dulwich/web.py b/dulwich/web.py
new file mode 100644 (file)
index 0000000..4a47dbd
--- /dev/null
@@ -0,0 +1,275 @@
+# web.py -- WSGI smart-http server
+# Copryight (C) 2010 Google, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+"""HTTP server for dulwich that implements the git smart HTTP protocol."""
+
+from cStringIO import StringIO
+import cgi
+import os
+import re
+import time
+
+from dulwich.objects import (
+    Tag,
+    )
+from dulwich.repo import (
+    Repo,
+    )
+from dulwich.server import (
+    GitBackend,
+    ReceivePackHandler,
+    UploadPackHandler,
+    )
+
+HTTP_OK = '200 OK'
+HTTP_NOT_FOUND = '404 Not Found'
+HTTP_FORBIDDEN = '403 Forbidden'
+
+
+def date_time_string(self, timestamp=None):
+    # Based on BaseHTTPServer.py in python2.5
+    weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+    months = [None,
+              'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+              'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+    if timestamp is None:
+        timestamp = time.time()
+    year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
+    return '%s, %02d %3s %4d %02d:%02d:%02d GMD' % (
+            weekdays[wd], day, months[month], year, hh, mm, ss)
+
+
+def send_file(req, f, content_type):
+    """Send a file-like object to the request output.
+
+    :param req: The HTTPGitRequest object to send output to.
+    :param f: An open file-like object to send; will be closed.
+    :param content_type: The MIME type for the file.
+    :yield: The contents of the file.
+    """
+    if f is None:
+        yield req.not_found('File not found')
+        return
+    try:
+        try:
+            req.respond(HTTP_OK, content_type)
+            while True:
+                data = f.read(10240)
+                if not data:
+                    break
+                yield data
+        except IOError:
+            yield req.not_found('Error reading file')
+    finally:
+        f.close()
+
+
+def get_text_file(req, backend, mat):
+    req.nocache()
+    return send_file(req, backend.repo.get_named_file(mat.group()),
+                     'text/plain')
+
+
+def get_loose_object(req, backend, mat):
+    sha = mat.group(1) + mat.group(2)
+    object_store = backend.object_store
+    if not object_store.contains_loose(sha):
+        yield req.not_found('Object not found')
+        return
+    try:
+        data = object_store[sha].as_legacy_object()
+    except IOError:
+        yield req.not_found('Error reading object')
+    req.cache_forever()
+    req.respond(HTTP_OK, 'application/x-git-loose-object')
+    yield data
+
+
+def get_pack_file(req, backend, mat):
+    req.cache_forever()
+    return send_file(req, backend.repo.get_named_file(mat.group()),
+                     'application/x-git-packed-objects', False)
+
+
+def get_idx_file(req, backend, mat):
+    req.cache_forever()
+    return send_file(req, backend.repo.get_named_file(mat.group()),
+                     'application/x-git-packed-objects-toc', False)
+
+
+services = {'git-upload-pack': UploadPackHandler,
+            'git-receive-pack': ReceivePackHandler}
+def get_info_refs(req, backend, mat):
+    params = cgi.parse_qs(req.environ['QUERY_STRING'])
+    service = params.get('service', [None])[0]
+    if service:
+        handler_cls = services.get(service, None)
+        if handler_cls is None:
+            yield req.forbidden('Unsupported service %s' % service)
+        req.nocache()
+        req.respond(HTTP_OK, 'application/x-%s-advertisement' % service)
+        output = StringIO()
+        dummy_input = StringIO()  # GET request, handler doesn't need to read
+        handler = handler_cls(backend, dummy_input.read, output.write,
+                              stateless_rpc=True, advertise_refs=True)
+        handler.proto.write_pkt_line('# service=%s\n' % service)
+        handler.proto.write_pkt_line(None)
+        handler.handle()
+        yield output.getvalue()
+    else:
+        # non-smart fallback
+        # TODO: select_getanyfile() (see http-backend.c)
+        req.nocache()
+        req.respond(HTTP_OK, 'text/plain')
+        refs = backend.get_refs()
+        for name in sorted(refs.iterkeys()):
+            # get_refs() includes HEAD as a special case, but we don't want to
+            # advertise it
+            if name == 'HEAD':
+                continue
+            sha = refs[name]
+            o = backend.repo[sha]
+            if not o:
+                continue
+            yield '%s\t%s\n' % (sha, name)
+            if isinstance(o, Tag):
+                while isinstance(o, Tag):
+                    _, sha = o.object
+                    o = backend.repo[sha]
+                if not o:
+                    continue
+                yield '%s\t%s^{}\n' % (o.sha(), name)
+
+
+def get_info_packs(req, backend, mat):
+    req.nocache()
+    req.respond(HTTP_OK, 'text/plain')
+    for pack in backend.object_store.packs:
+        yield 'P pack-%s.pack\n' % pack.name()
+
+
+def handle_service_request(req, backend, mat):
+    service = mat.group().lstrip('/')
+    handler_cls = services.get(service, None)
+    if handler_cls is None:
+        yield req.forbidden('Unsupported service %s' % service)
+    req.nocache()
+    req.respond(HTTP_OK, 'application/x-%s-response' % service)
+
+    output = StringIO()
+    input = req.environ['wsgi.input']
+    handler = handler_cls(backend, input.read, output.write, stateless_rpc=True)
+    handler.handle()
+    yield output.getvalue()
+
+
+class HTTPGitRequest(object):
+    """Class encapsulating the state of a single git HTTP request.
+
+    :ivar environ: the WSGI environment for the request.
+    """
+
+    def __init__(self, environ, start_response):
+        self.environ = environ
+        self._start_response = start_response
+        self._cache_headers = []
+        self._headers = []
+
+    def add_header(self, name, value):
+        """Add a header to the response."""
+        self._headers.append((name, value))
+
+    def respond(self, status=HTTP_OK, content_type=None, headers=None):
+        """Begin a response with the given status and other headers."""
+        if headers:
+            self._headers.extend(headers)
+        if content_type:
+            self._headers.append(('Content-Type', content_type))
+        self._headers.extend(self._cache_headers)
+
+        self._start_response(status, self._headers)
+
+    def not_found(self, message):
+        """Begin a HTTP 404 response and return the text of a message."""
+        self._cache_headers = []
+        self.respond(HTTP_NOT_FOUND, 'text/plain')
+        return message
+
+    def forbidden(self, message):
+        """Begin a HTTP 403 response and return the text of a message."""
+        self._cache_headers = []
+        self.respond(HTTP_FORBIDDEN, 'text/plain')
+        return message
+
+    def nocache(self):
+        """Set the response to never be cached by the client."""
+        self._cache_headers = [
+            ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'),
+            ('Pragma', 'no-cache'),
+            ('Cache-Control', 'no-cache, max-age=0, must-revalidate'),
+            ]
+
+    def cache_forever(self):
+        """Set the response to be cached forever by the client."""
+        now = time.time()
+        self._cache_headers = [
+            ('Date', date_time_string(now)),
+            ('Expires', date_time_string(now + 31536000)),
+            ('Cache-Control', 'public, max-age=31536000'),
+            ]
+
+
+class HTTPGitApplication(object):
+    """Class encapsulating the state of a git WSGI application.
+
+    :ivar backend: the Backend object backing this application
+    """
+
+    services = {
+        ('GET', re.compile('/HEAD$')): get_text_file,
+        ('GET', re.compile('/info/refs$')): get_info_refs,
+        ('GET', re.compile('/objects/info/alternates$')): get_text_file,
+        ('GET', re.compile('/objects/info/http-alternates$')): get_text_file,
+        ('GET', re.compile('/objects/info/packs$')): get_info_packs,
+        ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')): get_loose_object,
+        ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')): get_pack_file,
+        ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')): get_idx_file,
+
+        ('POST', re.compile('/git-upload-pack$')): handle_service_request,
+        ('POST', re.compile('/git-receive-pack$')): handle_service_request,
+    }
+
+    def __init__(self, backend):
+        self.backend = backend
+
+    def __call__(self, environ, start_response):
+        path = environ['PATH_INFO']
+        method = environ['REQUEST_METHOD']
+        req = HTTPGitRequest(environ, start_response)
+        # environ['QUERY_STRING'] has qs args
+        handler = None
+        for smethod, spath in self.services.iterkeys():
+            if smethod != method:
+                continue
+            mat = spath.search(path)
+            if mat:
+                handler = self.services[smethod, spath]
+                break
+        if handler is None:
+            return req.not_found('Sorry, that method is not supported')
+        return handler(req, self.backend, mat)
index 7f87c018e83616e02b73c31db7bc57640b3c65f3..0b0f219e244aea0e19c183d67d2482684a8350a4 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@ setup(name='dulwich',
       in one of the Monty Python sketches.
       """,
       packages=['dulwich', 'dulwich.tests'],
-      scripts=['bin/dulwich', 'bin/dul-daemon'],
+      scripts=['bin/dulwich', 'bin/dul-daemon', 'bin/dul-web'],
       features = {'speedups': speedups},
       ext_modules = mandatory_ext_modules,
       )