1 # web.py -- WSGI smart-http server
2 # Copryight (C) 2010 Google, Inc.
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; version 2
7 # or (at your option) any later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 """HTTP server for dulwich that implements the git smart HTTP protocol."""
21 from cStringIO import StringIO
26 from urlparse import parse_qs
28 from dulwich.misc import parse_qs
29 from dulwich.protocol import (
32 from dulwich.server import (
38 HTTP_NOT_FOUND = '404 Not Found'
39 HTTP_FORBIDDEN = '403 Forbidden'
42 def date_time_string(timestamp=None):
43 # Based on BaseHTTPServer.py in python2.5
44 weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
46 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
47 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
49 timestamp = time.time()
50 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
51 return '%s, %02d %3s %4d %02d:%02d:%02d GMD' % (
52 weekdays[wd], day, months[month], year, hh, mm, ss)
56 """Extract the URL prefix from a regex match.
58 :param mat: A regex match object.
59 :returns: The URL prefix, defined as the text before the match in the
60 original string. Normalized to start with one leading slash and end with
63 return '/' + mat.string[:mat.start()].strip('/')
66 def get_repo(backend, mat):
67 """Get a Repo instance for the given backend and URL regex match."""
68 return backend.open_repository(url_prefix(mat))
71 def send_file(req, f, content_type):
72 """Send a file-like object to the request output.
74 :param req: The HTTPGitRequest object to send output to.
75 :param f: An open file-like object to send; will be closed.
76 :param content_type: The MIME type for the file.
77 :yield: The contents of the file.
80 yield req.not_found('File not found')
84 req.respond(HTTP_OK, content_type)
91 yield req.not_found('Error reading file')
96 def get_text_file(req, backend, mat):
98 return send_file(req, get_repo(backend, mat).get_named_file(mat.group()),
102 def get_loose_object(req, backend, mat):
103 sha = mat.group(1) + mat.group(2)
104 object_store = get_repo(backend, mat).object_store
105 if not object_store.contains_loose(sha):
106 yield req.not_found('Object not found')
109 data = object_store[sha].as_legacy_object()
111 yield req.not_found('Error reading object')
113 req.respond(HTTP_OK, 'application/x-git-loose-object')
117 def get_pack_file(req, backend, mat):
119 return send_file(req, get_repo(backend, mat).get_named_file(mat.group()),
120 'application/x-git-packed-objects')
123 def get_idx_file(req, backend, mat):
125 return send_file(req, get_repo(backend, mat).get_named_file(mat.group()),
126 'application/x-git-packed-objects-toc')
129 default_services = {'git-upload-pack': UploadPackHandler,
130 'git-receive-pack': ReceivePackHandler}
131 def get_info_refs(req, backend, mat, services=None):
133 services = default_services
134 params = parse_qs(req.environ['QUERY_STRING'])
135 service = params.get('service', [None])[0]
136 if service and not req.dumb:
137 handler_cls = services.get(service, None)
138 if handler_cls is None:
139 yield req.forbidden('Unsupported service %s' % service)
142 req.respond(HTTP_OK, 'application/x-%s-advertisement' % service)
144 proto = ReceivableProtocol(StringIO().read, output.write)
145 handler = handler_cls(backend, [url_prefix(mat)], proto,
146 stateless_rpc=True, advertise_refs=True)
147 handler.proto.write_pkt_line('# service=%s\n' % service)
148 handler.proto.write_pkt_line(None)
150 yield output.getvalue()
153 # TODO: select_getanyfile() (see http-backend.c)
155 req.respond(HTTP_OK, 'text/plain')
156 repo = get_repo(backend, mat)
157 refs = repo.get_refs()
158 for name in sorted(refs.iterkeys()):
159 # get_refs() includes HEAD as a special case, but we don't want to
167 yield '%s\t%s\n' % (sha, name)
168 peeled_sha = repo.get_peeled(name)
169 if peeled_sha != sha:
170 yield '%s\t%s^{}\n' % (peeled_sha, name)
173 def get_info_packs(req, backend, mat):
175 req.respond(HTTP_OK, 'text/plain')
176 for pack in get_repo(backend, mat).object_store.packs:
177 yield 'P pack-%s.pack\n' % pack.name()
180 class _LengthLimitedFile(object):
181 """Wrapper class to limit the length of reads from a file-like object.
183 This is used to ensure EOF is read from the wsgi.input object once
184 Content-Length bytes are read. This behavior is required by the WSGI spec
185 but not implemented in wsgiref as of 2.5.
187 def __init__(self, input, max_bytes):
189 self._bytes_avail = max_bytes
191 def read(self, size=-1):
192 if self._bytes_avail <= 0:
194 if size == -1 or size > self._bytes_avail:
195 size = self._bytes_avail
196 self._bytes_avail -= size
197 return self._input.read(size)
199 # TODO: support more methods as necessary
202 def handle_service_request(req, backend, mat, services=None):
204 services = default_services
205 service = mat.group().lstrip('/')
206 handler_cls = services.get(service, None)
207 if handler_cls is None:
208 yield req.forbidden('Unsupported service %s' % service)
211 req.respond(HTTP_OK, 'application/x-%s-response' % service)
214 input = req.environ['wsgi.input']
215 # This is not necessary if this app is run from a conforming WSGI server.
216 # Unfortunately, there's no way to tell that at this point.
217 # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
219 if 'CONTENT_LENGTH' in req.environ:
220 input = _LengthLimitedFile(input, int(req.environ['CONTENT_LENGTH']))
221 proto = ReceivableProtocol(input.read, output.write)
222 handler = handler_cls(backend, [url_prefix(mat)], proto, stateless_rpc=True)
224 yield output.getvalue()
227 class HTTPGitRequest(object):
228 """Class encapsulating the state of a single git HTTP request.
230 :ivar environ: the WSGI environment for the request.
233 def __init__(self, environ, start_response, dumb=False):
234 self.environ = environ
236 self._start_response = start_response
237 self._cache_headers = []
240 def add_header(self, name, value):
241 """Add a header to the response."""
242 self._headers.append((name, value))
244 def respond(self, status=HTTP_OK, content_type=None, headers=None):
245 """Begin a response with the given status and other headers."""
247 self._headers.extend(headers)
249 self._headers.append(('Content-Type', content_type))
250 self._headers.extend(self._cache_headers)
252 self._start_response(status, self._headers)
254 def not_found(self, message):
255 """Begin a HTTP 404 response and return the text of a message."""
256 self._cache_headers = []
257 self.respond(HTTP_NOT_FOUND, 'text/plain')
260 def forbidden(self, message):
261 """Begin a HTTP 403 response and return the text of a message."""
262 self._cache_headers = []
263 self.respond(HTTP_FORBIDDEN, 'text/plain')
267 """Set the response to never be cached by the client."""
268 self._cache_headers = [
269 ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'),
270 ('Pragma', 'no-cache'),
271 ('Cache-Control', 'no-cache, max-age=0, must-revalidate'),
274 def cache_forever(self):
275 """Set the response to be cached forever by the client."""
277 self._cache_headers = [
278 ('Date', date_time_string(now)),
279 ('Expires', date_time_string(now + 31536000)),
280 ('Cache-Control', 'public, max-age=31536000'),
284 class HTTPGitApplication(object):
285 """Class encapsulating the state of a git WSGI application.
287 :ivar backend: the Backend object backing this application
291 ('GET', re.compile('/HEAD$')): get_text_file,
292 ('GET', re.compile('/info/refs$')): get_info_refs,
293 ('GET', re.compile('/objects/info/alternates$')): get_text_file,
294 ('GET', re.compile('/objects/info/http-alternates$')): get_text_file,
295 ('GET', re.compile('/objects/info/packs$')): get_info_packs,
296 ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')): get_loose_object,
297 ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')): get_pack_file,
298 ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')): get_idx_file,
300 ('POST', re.compile('/git-upload-pack$')): handle_service_request,
301 ('POST', re.compile('/git-receive-pack$')): handle_service_request,
304 def __init__(self, backend, dumb=False):
305 self.backend = backend
308 def __call__(self, environ, start_response):
309 path = environ['PATH_INFO']
310 method = environ['REQUEST_METHOD']
311 req = HTTPGitRequest(environ, start_response, self.dumb)
312 # environ['QUERY_STRING'] has qs args
314 for smethod, spath in self.services.iterkeys():
315 if smethod != method:
317 mat = spath.search(path)
319 handler = self.services[smethod, spath]
322 return req.not_found('Sorry, that method is not supported')
323 return handler(req, self.backend, mat)