1 # web.py -- WSGI smart-http server
2 # Copyright (C) 2010 Google, Inc.
3 # Copyright (C) 2012 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; version 2
8 # or (at your option) any later version of the License.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """HTTP server for dulwich that implements the git smart HTTP protocol."""
22 from io import BytesIO
30 from urlparse import parse_qs
32 from dulwich import log_utils
33 from dulwich.protocol import (
36 from dulwich.repo import (
39 from dulwich.server import (
43 generate_objects_info_packs,
47 logger = log_utils.getLogger(__name__)
52 HTTP_NOT_FOUND = '404 Not Found'
53 HTTP_FORBIDDEN = '403 Forbidden'
54 HTTP_ERROR = '500 Internal Server Error'
57 def date_time_string(timestamp=None):
58 # From BaseHTTPRequestHandler.date_time_string in BaseHTTPServer.py in the
59 # Python 2.6.5 standard library, following modifications:
60 # - Made a global rather than an instance method.
61 # - weekdayname and monthname are renamed and locals rather than class
63 # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
64 weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
66 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
67 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
69 timestamp = time.time()
70 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
71 return '%s, %02d %3s %4d %02d:%02d:%02d GMD' % (
72 weekdays[wd], day, months[month], year, hh, mm, ss)
76 """Extract the URL prefix from a regex match.
78 :param mat: A regex match object.
79 :returns: The URL prefix, defined as the text before the match in the
80 original string. Normalized to start with one leading slash and end with
83 return '/' + mat.string[:mat.start()].strip('/')
86 def get_repo(backend, mat):
87 """Get a Repo instance for the given backend and URL regex match."""
88 return backend.open_repository(url_prefix(mat))
91 def send_file(req, f, content_type):
92 """Send a file-like object to the request output.
94 :param req: The HTTPGitRequest object to send output to.
95 :param f: An open file-like object to send; will be closed.
96 :param content_type: The MIME type for the file.
97 :return: Iterator over the contents of the file, as chunks.
100 yield req.not_found('File not found')
103 req.respond(HTTP_OK, content_type)
112 yield req.error('Error reading file')
118 def _url_to_path(url):
119 return url.replace('/', os.path.sep)
122 def get_text_file(req, backend, mat):
124 path = _url_to_path(mat.group())
125 logger.info('Sending plain text file %s', path)
126 return send_file(req, get_repo(backend, mat).get_named_file(path),
130 def get_loose_object(req, backend, mat):
131 sha = mat.group(1) + mat.group(2)
132 logger.info('Sending loose object %s', sha)
133 object_store = get_repo(backend, mat).object_store
134 if not object_store.contains_loose(sha):
135 yield req.not_found('Object not found')
138 data = object_store[sha].as_legacy_object()
140 yield req.error('Error reading object')
143 req.respond(HTTP_OK, 'application/x-git-loose-object')
147 def get_pack_file(req, backend, mat):
149 path = _url_to_path(mat.group())
150 logger.info('Sending pack file %s', path)
151 return send_file(req, get_repo(backend, mat).get_named_file(path),
152 'application/x-git-packed-objects')
155 def get_idx_file(req, backend, mat):
157 path = _url_to_path(mat.group())
158 logger.info('Sending pack file %s', path)
159 return send_file(req, get_repo(backend, mat).get_named_file(path),
160 'application/x-git-packed-objects-toc')
163 def get_info_refs(req, backend, mat):
164 params = parse_qs(req.environ['QUERY_STRING'])
165 service = params.get('service', [None])[0]
166 if service and not req.dumb:
167 handler_cls = req.handlers.get(service, None)
168 if handler_cls is None:
169 yield req.forbidden('Unsupported service %s' % service)
172 write = req.respond(HTTP_OK, 'application/x-%s-advertisement' % service)
173 proto = ReceivableProtocol(BytesIO().read, write)
174 handler = handler_cls(backend, [url_prefix(mat)], proto,
175 http_req=req, advertise_refs=True)
176 handler.proto.write_pkt_line('# service=%s\n' % service)
177 handler.proto.write_pkt_line(None)
181 # TODO: select_getanyfile() (see http-backend.c)
183 req.respond(HTTP_OK, 'text/plain')
184 logger.info('Emulating dumb info/refs')
185 repo = get_repo(backend, mat)
186 for text in generate_info_refs(repo):
190 def get_info_packs(req, backend, mat):
192 req.respond(HTTP_OK, 'text/plain')
193 logger.info('Emulating dumb info/packs')
194 return generate_objects_info_packs(get_repo(backend, mat))
197 class _LengthLimitedFile(object):
198 """Wrapper class to limit the length of reads from a file-like object.
200 This is used to ensure EOF is read from the wsgi.input object once
201 Content-Length bytes are read. This behavior is required by the WSGI spec
202 but not implemented in wsgiref as of 2.5.
205 def __init__(self, input, max_bytes):
207 self._bytes_avail = max_bytes
209 def read(self, size=-1):
210 if self._bytes_avail <= 0:
212 if size == -1 or size > self._bytes_avail:
213 size = self._bytes_avail
214 self._bytes_avail -= size
215 return self._input.read(size)
217 # TODO: support more methods as necessary
220 def handle_service_request(req, backend, mat):
221 service = mat.group().lstrip('/')
222 logger.info('Handling service request for %s', service)
223 handler_cls = req.handlers.get(service, None)
224 if handler_cls is None:
225 yield req.forbidden('Unsupported service %s' % service)
228 write = req.respond(HTTP_OK, 'application/x-%s-result' % service)
229 proto = ReceivableProtocol(req.environ['wsgi.input'].read, write)
230 handler = handler_cls(backend, [url_prefix(mat)], proto, http_req=req)
234 class HTTPGitRequest(object):
235 """Class encapsulating the state of a single git HTTP request.
237 :ivar environ: the WSGI environment for the request.
240 def __init__(self, environ, start_response, dumb=False, handlers=None):
241 self.environ = environ
243 self.handlers = handlers
244 self._start_response = start_response
245 self._cache_headers = []
248 def add_header(self, name, value):
249 """Add a header to the response."""
250 self._headers.append((name, value))
252 def respond(self, status=HTTP_OK, content_type=None, headers=None):
253 """Begin a response with the given status and other headers."""
255 self._headers.extend(headers)
257 self._headers.append(('Content-Type', content_type))
258 self._headers.extend(self._cache_headers)
260 return self._start_response(status, self._headers)
262 def not_found(self, message):
263 """Begin a HTTP 404 response and return the text of a message."""
264 self._cache_headers = []
265 logger.info('Not found: %s', message)
266 self.respond(HTTP_NOT_FOUND, 'text/plain')
269 def forbidden(self, message):
270 """Begin a HTTP 403 response and return the text of a message."""
271 self._cache_headers = []
272 logger.info('Forbidden: %s', message)
273 self.respond(HTTP_FORBIDDEN, 'text/plain')
276 def error(self, message):
277 """Begin a HTTP 500 response and return the text of a message."""
278 self._cache_headers = []
279 logger.error('Error: %s', message)
280 self.respond(HTTP_ERROR, 'text/plain')
284 """Set the response to never be cached by the client."""
285 self._cache_headers = [
286 ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'),
287 ('Pragma', 'no-cache'),
288 ('Cache-Control', 'no-cache, max-age=0, must-revalidate'),
291 def cache_forever(self):
292 """Set the response to be cached forever by the client."""
294 self._cache_headers = [
295 ('Date', date_time_string(now)),
296 ('Expires', date_time_string(now + 31536000)),
297 ('Cache-Control', 'public, max-age=31536000'),
301 class HTTPGitApplication(object):
302 """Class encapsulating the state of a git WSGI application.
304 :ivar backend: the Backend object backing this application
308 ('GET', re.compile('/HEAD$')): get_text_file,
309 ('GET', re.compile('/info/refs$')): get_info_refs,
310 ('GET', re.compile('/objects/info/alternates$')): get_text_file,
311 ('GET', re.compile('/objects/info/http-alternates$')): get_text_file,
312 ('GET', re.compile('/objects/info/packs$')): get_info_packs,
313 ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')): get_loose_object,
314 ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')): get_pack_file,
315 ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')): get_idx_file,
317 ('POST', re.compile('/git-upload-pack$')): handle_service_request,
318 ('POST', re.compile('/git-receive-pack$')): handle_service_request,
321 def __init__(self, backend, dumb=False, handlers=None, fallback_app=None):
322 self.backend = backend
324 self.handlers = dict(DEFAULT_HANDLERS)
325 self.fallback_app = fallback_app
326 if handlers is not None:
327 self.handlers.update(handlers)
329 def __call__(self, environ, start_response):
330 path = environ['PATH_INFO']
331 method = environ['REQUEST_METHOD']
332 req = HTTPGitRequest(environ, start_response, dumb=self.dumb,
333 handlers=self.handlers)
334 # environ['QUERY_STRING'] has qs args
336 for smethod, spath in self.services.iterkeys():
337 if smethod != method:
339 mat = spath.search(path)
341 handler = self.services[smethod, spath]
345 if self.fallback_app is not None:
346 return self.fallback_app(environ, start_response)
348 return req.not_found('Sorry, that method is not supported')
350 return handler(req, self.backend, mat)
353 class GunzipFilter(object):
354 """WSGI middleware that unzips gzip-encoded requests before
355 passing on to the underlying application.
358 def __init__(self, application):
359 self.app = application
361 def __call__(self, environ, start_response):
362 if environ.get('HTTP_CONTENT_ENCODING', '') == 'gzip':
363 if hasattr(environ['wsgi.input'], 'seek'):
364 wsgi_input = environ['wsgi.input']
366 # The gzip implementation in the standard library of Python 2.x
367 # requires the '.seek()' and '.tell()' methods to be available
368 # on the input stream. Read the data into a temporary file to
369 # work around this limitation.
370 wsgi_input = tempfile.SpooledTemporaryFile(16 * 1024 * 1024)
371 shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
374 environ['wsgi.input'] = gzip.GzipFile(filename=None, fileobj=wsgi_input, mode='r')
375 del environ['HTTP_CONTENT_ENCODING']
376 if 'CONTENT_LENGTH' in environ:
377 del environ['CONTENT_LENGTH']
379 return self.app(environ, start_response)
382 class LimitedInputFilter(object):
383 """WSGI middleware that limits the input length of a request to that
384 specified in Content-Length.
387 def __init__(self, application):
388 self.app = application
390 def __call__(self, environ, start_response):
391 # This is not necessary if this app is run from a conforming WSGI
392 # server. Unfortunately, there's no way to tell that at this point.
393 # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
395 content_length = environ.get('CONTENT_LENGTH', '')
397 environ['wsgi.input'] = _LengthLimitedFile(
398 environ['wsgi.input'], int(content_length))
399 return self.app(environ, start_response)
402 def make_wsgi_chain(*args, **kwargs):
403 """Factory function to create an instance of HTTPGitApplication,
404 correctly wrapped with needed middleware.
406 app = HTTPGitApplication(*args, **kwargs)
407 wrapped_app = GunzipFilter(LimitedInputFilter(app))
411 # The reference server implementation is based on wsgiref, which is not
412 # distributed with python 2.4. If wsgiref is not present, users will not be
413 # able to use the HTTP server without a little extra work.
415 from wsgiref.simple_server import (
421 class ServerHandlerLogger(ServerHandler):
422 """ServerHandler that uses dulwich's logger for logging exceptions."""
424 def log_exception(self, exc_info):
425 logger.exception('Exception happened during processing of request',
428 def log_message(self, format, *args):
429 logger.info(format, *args)
431 def log_error(self, *args):
434 class WSGIRequestHandlerLogger(WSGIRequestHandler):
435 """WSGIRequestHandler that uses dulwich's logger for logging exceptions."""
437 def log_exception(self, exc_info):
438 logger.exception('Exception happened during processing of request',
441 def log_message(self, format, *args):
442 logger.info(format, *args)
444 def log_error(self, *args):
448 """Handle a single HTTP request"""
450 self.raw_requestline = self.rfile.readline()
451 if not self.parse_request(): # An error code has been sent, just exit
454 handler = ServerHandlerLogger(
455 self.rfile, self.wfile, self.get_stderr(), self.get_environ()
457 handler.request_handler = self # backpointer for logging
458 handler.run(self.server.get_app())
460 class WSGIServerLogger(WSGIServer):
461 def handle_error(self, request, client_address):
462 """Handle an error. """
463 logger.exception('Exception happened during processing of request from %s' % str(client_address))
465 def main(argv=sys.argv):
466 """Entry point for starting an HTTP git server."""
472 # TODO: allow serving on other addresses/ports via command-line flag
476 log_utils.default_logging_config()
477 backend = DictBackend({'/': Repo(gitdir)})
478 app = make_wsgi_chain(backend)
479 server = make_server(listen_addr, port, app,
480 handler_class=WSGIRequestHandlerLogger,
481 server_class=WSGIServerLogger)
482 logger.info('Listening for HTTP connections on %s:%d', listen_addr,
484 server.serve_forever()
487 # No wsgiref found; don't provide the reference functionality, but leave
488 # the rest of the WSGI-based implementation.
489 def main(argv=sys.argv):
490 """Stub entry point for failing to start a server without wsgiref."""
492 'Sorry, the wsgiref module is required for dul-web.\n')
496 if __name__ == '__main__':