1 # server.py -- Implementation of the server side git protocols
2 # Copryight (C) 2008 John Carr <john.carr@unrouted.co.uk>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; version 2
7 # or (at your option) any later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git smart network protocol server implementation.
22 For more detailed implementation on the network protocol, see the
23 Documentation/technical directory in the cgit distribution, and in particular:
24 Documentation/technical/protocol-capabilities.txt
25 Documentation/technical/pack-protocol.txt
32 from dulwich.errors import (
37 from dulwich.objects import (
40 from dulwich.protocol import (
46 extract_want_line_capabilities,
52 from dulwich.pack import (
56 class Backend(object):
57 """A backend for the Git smart server implementation."""
59 def open_repository(self, path):
60 """Open the repository at a path."""
61 raise NotImplementedError(self.open_repository)
64 class BackendRepo(object):
65 """Repository abstraction used by the Git server.
67 Eventually this should become just a subset of Repo.
72 Get all the refs in the repository
74 :return: dict of name -> sha
76 raise NotImplementedError
78 def get_peeled(self, name):
79 """Return the cached peeled value of a ref, if available.
81 :param name: Name of the ref to peel
82 :return: The peeled value of the ref. If the ref is known not point to
83 a tag, this will be the SHA the ref refers to. If the ref may
84 point to a tag, but no cached information is available, None is
89 def apply_pack(self, refs, read, delete_refs=True):
90 """ Import a set of changes into a repository and update the refs
92 :param refs: list of tuple(name, sha)
93 :param read: callback to read from the incoming pack
94 :param delete_refs: whether to allow deleting refs
96 raise NotImplementedError
98 def fetch_objects(self, determine_wants, graph_walker, progress,
101 Yield the objects required for a list of commits.
103 :param progress: is a callback to send progress messages to the client
104 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
105 sha for including tags.
107 raise NotImplementedError
110 class GitBackendRepo(BackendRepo):
112 def __init__(self, repo):
114 self.refs = self.repo.refs
115 self.object_store = self.repo.object_store
116 self.fetch_objects = self.repo.fetch_objects
117 self.get_refs = self.repo.get_refs
119 def apply_pack(self, refs, read, delete_refs=True):
120 f, commit = self.repo.object_store.add_thin_pack()
121 all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError)
124 # TODO: more informative error messages than just the exception string
126 # TODO: decode the pack as we stream to avoid blocking reads beyond
127 # the end of data (when using HTTP/1.1 chunked encoding)
133 except all_exceptions, e:
134 unpack_error = str(e).replace('\n', '')
137 except all_exceptions, e:
139 unpack_error = str(e).replace('\n', '')
142 status.append(('unpack', unpack_error))
144 status.append(('unpack', 'ok'))
146 for oldsha, sha, ref in refs:
151 raise GitProtocolError(
152 'Attempted to delete refs without delete-refs '
155 del self.repo.refs[ref]
156 except all_exceptions:
157 ref_error = 'failed to delete'
160 self.repo.refs[ref] = sha
161 except all_exceptions:
162 ref_error = 'failed to write'
164 ref_error = 'bad ref'
166 status.append((ref, ref_error))
168 status.append((ref, 'ok'))
174 class DictBackend(Backend):
175 """Trivial backend that looks up Git repositories in a dictionary."""
177 def __init__(self, repos):
180 def open_repository(self, path):
181 # FIXME: What to do in case there is no repo ?
182 return self.repos[path]
185 class Handler(object):
186 """Smart protocol command handler base class."""
188 def __init__(self, backend, read, write):
189 self.backend = backend
190 self.proto = Protocol(read, write)
191 self._client_capabilities = None
193 def capability_line(self):
194 return " ".join(self.capabilities())
196 def capabilities(self):
197 raise NotImplementedError(self.capabilities)
199 def innocuous_capabilities(self):
200 return ("include-tag", "thin-pack", "no-progress", "ofs-delta")
202 def required_capabilities(self):
203 """Return a list of capabilities that we require the client to have."""
206 def set_client_capabilities(self, caps):
207 allowable_caps = set(self.innocuous_capabilities())
208 allowable_caps.update(self.capabilities())
210 if cap not in allowable_caps:
211 raise GitProtocolError('Client asked for capability %s that '
212 'was not advertised.' % cap)
213 for cap in self.required_capabilities():
215 raise GitProtocolError('Client does not support required '
216 'capability %s.' % cap)
217 self._client_capabilities = set(caps)
219 def has_capability(self, cap):
220 if self._client_capabilities is None:
221 raise GitProtocolError('Server attempted to access capability %s '
222 'before asking client' % cap)
223 return cap in self._client_capabilities
226 class UploadPackHandler(Handler):
227 """Protocol handler for uploading a pack to the server."""
229 def __init__(self, backend, args, read, write,
230 stateless_rpc=False, advertise_refs=False):
231 Handler.__init__(self, backend, read, write)
232 self.repo = backend.open_repository(args[0])
233 self._graph_walker = None
234 self.stateless_rpc = stateless_rpc
235 self.advertise_refs = advertise_refs
237 def capabilities(self):
238 return ("multi_ack_detailed", "multi_ack", "side-band-64k", "thin-pack",
239 "ofs-delta", "no-progress", "include-tag")
241 def required_capabilities(self):
242 return ("side-band-64k", "thin-pack", "ofs-delta")
244 def progress(self, message):
245 if self.has_capability("no-progress"):
247 self.proto.write_sideband(2, message)
249 def get_tagged(self, refs=None, repo=None):
250 """Get a dict of peeled values of tags to their original tag shas.
252 :param refs: dict of refname -> sha of possible tags; defaults to all of
254 :param repo: optional Repo instance for getting peeled refs; defaults to
255 the backend's repo, if available
256 :return: dict of peeled_sha -> tag_sha, where tag_sha is the sha of a
257 tag whose peeled value is peeled_sha.
259 if not self.has_capability("include-tag"):
262 refs = self.repo.get_refs()
264 repo = getattr(self.repo, "repo", None)
266 # Bail if we don't have a Repo available; this is ok since
267 # clients must be able to handle if the server doesn't include
269 # TODO: fix behavior when missing
272 for name, sha in refs.iteritems():
273 peeled_sha = repo.get_peeled(name)
274 if peeled_sha != sha:
275 tagged[peeled_sha] = sha
279 write = lambda x: self.proto.write_sideband(1, x)
281 graph_walker = ProtocolGraphWalker(self, self.repo.object_store,
282 self.repo.get_peeled)
283 objects_iter = self.repo.fetch_objects(
284 graph_walker.determine_wants, graph_walker, self.progress,
285 get_tagged=self.get_tagged)
287 # Do they want any objects?
288 if len(objects_iter) == 0:
291 self.progress("dul-daemon says what\n")
292 self.progress("counting objects: %d, done.\n" % len(objects_iter))
293 write_pack_data(ProtocolFile(None, write), objects_iter,
295 self.progress("how was that, then?\n")
297 self.proto.write("0000")
300 class ProtocolGraphWalker(object):
301 """A graph walker that knows the git protocol.
303 As a graph walker, this class implements ack(), next(), and reset(). It also
304 contains some base methods for interacting with the wire and walking the
307 The work of determining which acks to send is passed on to the
308 implementation instance stored in _impl. The reason for this is that we do
309 not know at object creation time what ack level the protocol requires. A
310 call to set_ack_level() is required to set up the implementation, before any
311 calls to next() or ack() are made.
313 def __init__(self, handler, object_store, get_peeled):
314 self.handler = handler
315 self.store = object_store
316 self.get_peeled = get_peeled
317 self.proto = handler.proto
318 self.stateless_rpc = handler.stateless_rpc
319 self.advertise_refs = handler.advertise_refs
323 self._cache_index = 0
326 def determine_wants(self, heads):
327 """Determine the wants for a set of heads.
329 The given heads are advertised to the client, who then specifies which
330 refs he wants using 'want' lines. This portion of the protocol is the
331 same regardless of ack type, and in fact is used to set the ack type of
332 the ProtocolGraphWalker.
334 :param heads: a dict of refname->SHA1 to advertise
335 :return: a list of SHA1s requested by the client
338 raise GitProtocolError('No heads found')
339 values = set(heads.itervalues())
340 if self.advertise_refs or not self.stateless_rpc:
341 for i, (ref, sha) in enumerate(heads.iteritems()):
342 line = "%s %s" % (sha, ref)
344 line = "%s\x00%s" % (line, self.handler.capability_line())
345 self.proto.write_pkt_line("%s\n" % line)
346 peeled_sha = self.get_peeled(ref)
347 if peeled_sha != sha:
348 self.proto.write_pkt_line('%s %s^{}\n' %
352 self.proto.write_pkt_line(None)
354 if self.advertise_refs:
357 # Now client will sending want want want commands
358 want = self.proto.read_pkt_line()
361 line, caps = extract_want_line_capabilities(want)
362 self.handler.set_client_capabilities(caps)
363 self.set_ack_type(ack_type(caps))
364 command, sha = self._split_proto_line(line)
367 while command != None:
368 if command != 'want':
369 raise GitProtocolError(
370 'Protocol got unexpected command %s' % command)
371 if sha not in values:
372 raise GitProtocolError(
373 'Client wants invalid object %s' % sha)
374 want_revs.append(sha)
375 command, sha = self.read_proto_line()
377 self.set_wants(want_revs)
380 def ack(self, have_ref):
381 return self._impl.ack(have_ref)
385 self._cache_index = 0
389 if not self._impl and self.stateless_rpc:
391 return self._impl.next()
392 self._cache_index += 1
393 if self._cache_index > len(self._cache):
395 return self._cache[self._cache_index]
397 def _split_proto_line(self, line):
398 fields = line.rstrip('\n').split(' ', 1)
399 if len(fields) == 1 and fields[0] == 'done':
400 return ('done', None)
401 elif len(fields) == 2 and fields[0] in ('want', 'have'):
403 hex_to_sha(fields[1])
405 except (TypeError, AssertionError), e:
406 raise GitProtocolError(e)
407 raise GitProtocolError('Received invalid line from client:\n%s' % line)
409 def read_proto_line(self):
410 """Read a line from the wire.
412 :return: a tuple having one of the following forms:
416 (None, None) (for a flush-pkt)
418 :raise GitProtocolError: if the line cannot be parsed into one of the
419 possible return values.
421 line = self.proto.read_pkt_line()
424 return self._split_proto_line(line)
426 def send_ack(self, sha, ack_type=''):
428 ack_type = ' %s' % ack_type
429 self.proto.write_pkt_line('ACK %s%s\n' % (sha, ack_type))
432 self.proto.write_pkt_line('NAK\n')
434 def set_wants(self, wants):
437 def _is_satisfied(self, haves, want, earliest):
438 """Check whether a want is satisfied by a set of haves.
440 A want, typically a branch tip, is "satisfied" only if there exists a
441 path back from that want to one of the haves.
443 :param haves: A set of commits we know the client has.
444 :param want: The want to check satisfaction for.
445 :param earliest: A timestamp beyond which the search for haves will be
446 terminated, presumably because we're searching too far down the
450 pending = collections.deque([o])
452 commit = pending.popleft()
453 if commit.id in haves:
455 if not getattr(commit, 'get_parents', None):
456 # non-commit wants are assumed to be satisfied
458 for parent in commit.get_parents():
459 parent_obj = self.store[parent]
460 # TODO: handle parents with later commit times than children
461 if parent_obj.commit_time >= earliest:
462 pending.append(parent_obj)
465 def all_wants_satisfied(self, haves):
466 """Check whether all the current wants are satisfied by a set of haves.
468 :param haves: A set of commits we know the client has.
469 :note: Wants are specified with set_wants rather than passed in since
470 in the current interface they are determined outside this class.
473 earliest = min([self.store[h].commit_time for h in haves])
474 for want in self._wants:
475 if not self._is_satisfied(haves, want, earliest):
479 def set_ack_type(self, ack_type):
481 MULTI_ACK: MultiAckGraphWalkerImpl,
482 MULTI_ACK_DETAILED: MultiAckDetailedGraphWalkerImpl,
483 SINGLE_ACK: SingleAckGraphWalkerImpl,
485 self._impl = impl_classes[ack_type](self)
488 class SingleAckGraphWalkerImpl(object):
489 """Graph walker implementation that speaks the single-ack protocol."""
491 def __init__(self, walker):
493 self._sent_ack = False
495 def ack(self, have_ref):
496 if not self._sent_ack:
497 self.walker.send_ack(have_ref)
498 self._sent_ack = True
501 command, sha = self.walker.read_proto_line()
502 if command in (None, 'done'):
503 if not self._sent_ack:
504 self.walker.send_nak()
506 elif command == 'have':
510 class MultiAckGraphWalkerImpl(object):
511 """Graph walker implementation that speaks the multi-ack protocol."""
513 def __init__(self, walker):
515 self._found_base = False
518 def ack(self, have_ref):
519 self._common.append(have_ref)
520 if not self._found_base:
521 self.walker.send_ack(have_ref, 'continue')
522 if self.walker.all_wants_satisfied(self._common):
523 self._found_base = True
524 # else we blind ack within next
528 command, sha = self.walker.read_proto_line()
530 self.walker.send_nak()
531 # in multi-ack mode, a flush-pkt indicates the client wants to
532 # flush but more have lines are still coming
534 elif command == 'done':
535 # don't nak unless no common commits were found, even if not
536 # everything is satisfied
538 self.walker.send_ack(self._common[-1])
540 self.walker.send_nak()
542 elif command == 'have':
545 self.walker.send_ack(sha, 'continue')
549 class MultiAckDetailedGraphWalkerImpl(object):
550 """Graph walker implementation speaking the multi-ack-detailed protocol."""
552 def __init__(self, walker):
554 self._found_base = False
557 def ack(self, have_ref):
558 self._common.append(have_ref)
559 if not self._found_base:
560 self.walker.send_ack(have_ref, 'common')
561 if self.walker.all_wants_satisfied(self._common):
562 self._found_base = True
563 self.walker.send_ack(have_ref, 'ready')
564 # else we blind ack within next
568 command, sha = self.walker.read_proto_line()
570 self.walker.send_nak()
571 if self.walker.stateless_rpc:
574 elif command == 'done':
575 # don't nak unless no common commits were found, even if not
576 # everything is satisfied
578 self.walker.send_ack(self._common[-1])
580 self.walker.send_nak()
582 elif command == 'have':
584 # blind ack; can happen if the client has more requests
586 self.walker.send_ack(sha, 'ready')
590 class ReceivePackHandler(Handler):
591 """Protocol handler for downloading a pack from the client."""
593 def __init__(self, backend, args, read, write,
594 stateless_rpc=False, advertise_refs=False):
595 Handler.__init__(self, backend, read, write)
596 self.repo = backend.open_repository(args[0])
597 self.stateless_rpc = stateless_rpc
598 self.advertise_refs = advertise_refs
600 def capabilities(self):
601 return ("report-status", "delete-refs")
604 refs = self.repo.get_refs().items()
606 if self.advertise_refs or not self.stateless_rpc:
608 self.proto.write_pkt_line(
609 "%s %s\x00%s\n" % (refs[0][1], refs[0][0],
610 self.capability_line()))
611 for i in range(1, len(refs)):
613 self.proto.write_pkt_line("%s %s\n" % (ref[1], ref[0]))
615 self.proto.write_pkt_line("%s capabilities^{} %s" % (
616 ZERO_SHA, self.capability_line()))
618 self.proto.write("0000")
619 if self.advertise_refs:
623 ref = self.proto.read_pkt_line()
625 # if ref is none then client doesnt want to send us anything..
629 ref, caps = extract_capabilities(ref)
630 self.set_client_capabilities(caps)
632 # client will now send us a list of (oldsha, newsha, ref)
634 client_refs.append(ref.split())
635 ref = self.proto.read_pkt_line()
637 # backend can now deal with this refs and read a pack using self.read
638 status = self.repo.apply_pack(client_refs, self.proto.read,
639 self.has_capability('delete-refs'))
641 # when we have read all the pack from the client, send a status report
642 # if the client asked for it
643 if self.has_capability('report-status'):
644 for name, msg in status:
646 self.proto.write_pkt_line('unpack %s\n' % msg)
648 self.proto.write_pkt_line('ok %s\n' % name)
650 self.proto.write_pkt_line('ng %s %s\n' % (name, msg))
651 self.proto.write_pkt_line(None)
654 class TCPGitRequestHandler(SocketServer.StreamRequestHandler):
657 proto = Protocol(self.rfile.read, self.wfile.write)
658 command, args = proto.read_cmd()
660 # switch case to handle the specific git command
661 if command == 'git-upload-pack':
662 cls = UploadPackHandler
663 elif command == 'git-receive-pack':
664 cls = ReceivePackHandler
668 h = cls(self.server.backend, args, self.rfile.read, self.wfile.write)
672 class TCPGitServer(SocketServer.TCPServer):
674 allow_reuse_address = True
675 serve = SocketServer.TCPServer.serve_forever
677 def __init__(self, backend, listen_addr, port=TCP_GIT_PORT):
678 self.backend = backend
679 SocketServer.TCPServer.__init__(self, (listen_addr, port), TCPGitRequestHandler)