1 # web.py -- WSGI smart-http server
2 # Copyright (C) 2010 Google, Inc.
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; version 2
7 # or (at your option) any later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 """HTTP server for dulwich that implements the git smart HTTP protocol."""
21 from cStringIO import StringIO
28 from urlparse import parse_qs
30 from dulwich.misc import parse_qs
31 from dulwich import log_utils
32 from dulwich.protocol import (
35 from dulwich.repo import (
38 from dulwich.server import (
44 logger = log_utils.getLogger(__name__)
49 HTTP_NOT_FOUND = '404 Not Found'
50 HTTP_FORBIDDEN = '403 Forbidden'
51 HTTP_ERROR = '500 Internal Server Error'
54 def date_time_string(timestamp=None):
55 # From BaseHTTPRequestHandler.date_time_string in BaseHTTPServer.py in the
56 # Python 2.6.5 standard library, following modifications:
57 # - Made a global rather than an instance method.
58 # - weekdayname and monthname are renamed and locals rather than class
60 # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
61 weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
63 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
64 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
66 timestamp = time.time()
67 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
68 return '%s, %02d %3s %4d %02d:%02d:%02d GMD' % (
69 weekdays[wd], day, months[month], year, hh, mm, ss)
73 """Extract the URL prefix from a regex match.
75 :param mat: A regex match object.
76 :returns: The URL prefix, defined as the text before the match in the
77 original string. Normalized to start with one leading slash and end with
80 return '/' + mat.string[:mat.start()].strip('/')
83 def get_repo(backend, mat):
84 """Get a Repo instance for the given backend and URL regex match."""
85 return backend.open_repository(url_prefix(mat))
88 def send_file(req, f, content_type):
89 """Send a file-like object to the request output.
91 :param req: The HTTPGitRequest object to send output to.
92 :param f: An open file-like object to send; will be closed.
93 :param content_type: The MIME type for the file.
94 :return: Iterator over the contents of the file, as chunks.
97 yield req.not_found('File not found')
100 req.respond(HTTP_OK, content_type)
109 yield req.error('Error reading file')
115 def _url_to_path(url):
116 return url.replace('/', os.path.sep)
119 def get_text_file(req, backend, mat):
121 path = _url_to_path(mat.group())
122 logger.info('Sending plain text file %s', path)
123 return send_file(req, get_repo(backend, mat).get_named_file(path),
127 def get_loose_object(req, backend, mat):
128 sha = mat.group(1) + mat.group(2)
129 logger.info('Sending loose object %s', sha)
130 object_store = get_repo(backend, mat).object_store
131 if not object_store.contains_loose(sha):
132 yield req.not_found('Object not found')
135 data = object_store[sha].as_legacy_object()
137 yield req.error('Error reading object')
140 req.respond(HTTP_OK, 'application/x-git-loose-object')
144 def get_pack_file(req, backend, mat):
146 path = _url_to_path(mat.group())
147 logger.info('Sending pack file %s', path)
148 return send_file(req, get_repo(backend, mat).get_named_file(path),
149 'application/x-git-packed-objects')
152 def get_idx_file(req, backend, mat):
154 path = _url_to_path(mat.group())
155 logger.info('Sending pack file %s', path)
156 return send_file(req, get_repo(backend, mat).get_named_file(path),
157 'application/x-git-packed-objects-toc')
160 def get_info_refs(req, backend, mat):
161 params = parse_qs(req.environ['QUERY_STRING'])
162 service = params.get('service', [None])[0]
163 if service and not req.dumb:
164 handler_cls = req.handlers.get(service, None)
165 if handler_cls is None:
166 yield req.forbidden('Unsupported service %s' % service)
169 write = req.respond(HTTP_OK, 'application/x-%s-advertisement' % service)
170 proto = ReceivableProtocol(StringIO().read, write)
171 handler = handler_cls(backend, [url_prefix(mat)], proto,
172 stateless_rpc=True, advertise_refs=True)
173 handler.proto.write_pkt_line('# service=%s\n' % service)
174 handler.proto.write_pkt_line(None)
178 # TODO: select_getanyfile() (see http-backend.c)
180 req.respond(HTTP_OK, 'text/plain')
181 logger.info('Emulating dumb info/refs')
182 repo = get_repo(backend, mat)
183 refs = repo.get_refs()
184 for name in sorted(refs.iterkeys()):
185 # get_refs() includes HEAD as a special case, but we don't want to
193 yield '%s\t%s\n' % (sha, name)
194 peeled_sha = repo.get_peeled(name)
195 if peeled_sha != sha:
196 yield '%s\t%s^{}\n' % (peeled_sha, name)
199 def get_info_packs(req, backend, mat):
201 req.respond(HTTP_OK, 'text/plain')
202 logger.info('Emulating dumb info/packs')
203 for pack in get_repo(backend, mat).object_store.packs:
204 yield 'P pack-%s.pack\n' % pack.name()
207 class _LengthLimitedFile(object):
208 """Wrapper class to limit the length of reads from a file-like object.
210 This is used to ensure EOF is read from the wsgi.input object once
211 Content-Length bytes are read. This behavior is required by the WSGI spec
212 but not implemented in wsgiref as of 2.5.
215 def __init__(self, input, max_bytes):
217 self._bytes_avail = max_bytes
219 def read(self, size=-1):
220 if self._bytes_avail <= 0:
222 if size == -1 or size > self._bytes_avail:
223 size = self._bytes_avail
224 self._bytes_avail -= size
225 return self._input.read(size)
227 # TODO: support more methods as necessary
230 def handle_service_request(req, backend, mat):
231 service = mat.group().lstrip('/')
232 logger.info('Handling service request for %s', service)
233 handler_cls = req.handlers.get(service, None)
234 if handler_cls is None:
235 yield req.forbidden('Unsupported service %s' % service)
238 write = req.respond(HTTP_OK, 'application/x-%s-response' % service)
240 input = req.environ['wsgi.input']
241 # This is not necessary if this app is run from a conforming WSGI server.
242 # Unfortunately, there's no way to tell that at this point.
243 # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
245 content_length = req.environ.get('CONTENT_LENGTH', '')
247 input = _LengthLimitedFile(input, int(content_length))
248 proto = ReceivableProtocol(input.read, write)
249 handler = handler_cls(backend, [url_prefix(mat)], proto, stateless_rpc=True)
253 class HTTPGitRequest(object):
254 """Class encapsulating the state of a single git HTTP request.
256 :ivar environ: the WSGI environment for the request.
259 def __init__(self, environ, start_response, dumb=False, handlers=None):
260 self.environ = environ
262 self.handlers = handlers
263 self._start_response = start_response
264 self._cache_headers = []
267 def add_header(self, name, value):
268 """Add a header to the response."""
269 self._headers.append((name, value))
271 def respond(self, status=HTTP_OK, content_type=None, headers=None):
272 """Begin a response with the given status and other headers."""
274 self._headers.extend(headers)
276 self._headers.append(('Content-Type', content_type))
277 self._headers.extend(self._cache_headers)
279 return self._start_response(status, self._headers)
281 def not_found(self, message):
282 """Begin a HTTP 404 response and return the text of a message."""
283 self._cache_headers = []
284 logger.info('Not found: %s', message)
285 self.respond(HTTP_NOT_FOUND, 'text/plain')
288 def forbidden(self, message):
289 """Begin a HTTP 403 response and return the text of a message."""
290 self._cache_headers = []
291 logger.info('Forbidden: %s', message)
292 self.respond(HTTP_FORBIDDEN, 'text/plain')
295 def error(self, message):
296 """Begin a HTTP 500 response and return the text of a message."""
297 self._cache_headers = []
298 logger.error('Error: %s', message)
299 self.respond(HTTP_ERROR, 'text/plain')
303 """Set the response to never be cached by the client."""
304 self._cache_headers = [
305 ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'),
306 ('Pragma', 'no-cache'),
307 ('Cache-Control', 'no-cache, max-age=0, must-revalidate'),
310 def cache_forever(self):
311 """Set the response to be cached forever by the client."""
313 self._cache_headers = [
314 ('Date', date_time_string(now)),
315 ('Expires', date_time_string(now + 31536000)),
316 ('Cache-Control', 'public, max-age=31536000'),
320 class HTTPGitApplication(object):
321 """Class encapsulating the state of a git WSGI application.
323 :ivar backend: the Backend object backing this application
327 ('GET', re.compile('/HEAD$')): get_text_file,
328 ('GET', re.compile('/info/refs$')): get_info_refs,
329 ('GET', re.compile('/objects/info/alternates$')): get_text_file,
330 ('GET', re.compile('/objects/info/http-alternates$')): get_text_file,
331 ('GET', re.compile('/objects/info/packs$')): get_info_packs,
332 ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')): get_loose_object,
333 ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')): get_pack_file,
334 ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')): get_idx_file,
336 ('POST', re.compile('/git-upload-pack$')): handle_service_request,
337 ('POST', re.compile('/git-receive-pack$')): handle_service_request,
340 def __init__(self, backend, dumb=False, handlers=None):
341 self.backend = backend
343 self.handlers = dict(DEFAULT_HANDLERS)
344 if handlers is not None:
345 self.handlers.update(handlers)
347 def __call__(self, environ, start_response):
348 path = environ['PATH_INFO']
349 method = environ['REQUEST_METHOD']
350 req = HTTPGitRequest(environ, start_response, dumb=self.dumb,
351 handlers=self.handlers)
352 # environ['QUERY_STRING'] has qs args
354 for smethod, spath in self.services.iterkeys():
355 if smethod != method:
357 mat = spath.search(path)
359 handler = self.services[smethod, spath]
362 return req.not_found('Sorry, that method is not supported')
363 return handler(req, self.backend, mat)
366 # The reference server implementation is based on wsgiref, which is not
367 # distributed with python 2.4. If wsgiref is not present, users will not be able
368 # to use the HTTP server without a little extra work.
370 from wsgiref.simple_server import (
375 class HTTPGitRequestHandler(WSGIRequestHandler):
376 """Handler that uses dulwich's logger for logging exceptions."""
378 def log_exception(self, exc_info):
379 logger.exception('Exception happened during processing of request',
382 def log_message(self, format, *args):
383 logger.info(format, *args)
385 def log_error(self, *args):
389 def main(argv=sys.argv):
390 """Entry point for starting an HTTP git server."""
396 # TODO: allow serving on other addresses/ports via command-line flag
400 log_utils.default_logging_config()
401 backend = DictBackend({'/': Repo(gitdir)})
402 app = HTTPGitApplication(backend)
403 server = make_server(listen_addr, port, app,
404 handler_class=HTTPGitRequestHandler)
405 logger.info('Listening for HTTP connections on %s:%d', listen_addr,
407 server.serve_forever()
410 # No wsgiref found; don't provide the reference functionality, but leave the
411 # rest of the WSGI-based implementation.
412 def main(argv=sys.argv):
413 """Stub entry point for failing to start a server without wsgiref."""
414 sys.stderr.write('Sorry, the wsgiref module is required for dul-web.\n')