1 # protocol.py -- Shared parts of the git protocols
2 # Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
3 # Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; version 2
8 # or (at your option) any later version of the License.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Generic functions for talking the git smart server protocol."""
22 from io import BytesIO
28 from dulwich.errors import (
39 MULTI_ACK_DETAILED = 2
42 class ProtocolFile(object):
43 """A dummy file for network ops that expect file-like objects."""
45 def __init__(self, read, write):
57 """Wrap data in a pkt-line.
59 :param data: The data to wrap, as a str or None.
60 :return: The data prefixed with its length in pkt-line format; if data was
61 None, returns the flush-pkt ('0000').
65 return '%04x%s' % (len(data) + 4, data)
68 class Protocol(object):
69 """Class for interacting with a remote git process over the wire.
71 Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line
72 consists of the length of the line as a 4-byte hex string, followed by the
73 payload data. The length includes the 4-byte header. The special line '0000'
74 indicates the end of a section of input and is called a 'flush-pkt'.
76 For details on the pkt-line format, see the cgit distribution:
77 Documentation/technical/protocol-common.txt
80 def __init__(self, read, write, report_activity=None):
83 self.report_activity = report_activity
84 self._readahead = None
86 def read_pkt_line(self):
87 """Reads a pkt-line from the remote git process.
89 This method may read from the readahead buffer; see unread_pkt_line.
91 :return: The next string from the stream, without the length prefix, or
92 None for a flush-pkt ('0000').
94 if self._readahead is None:
97 read = self._readahead.read
98 self._readahead = None
103 raise HangupException()
104 size = int(sizestr, 16)
106 if self.report_activity:
107 self.report_activity(4, 'read')
109 if self.report_activity:
110 self.report_activity(size, 'read')
112 except socket.error as e:
113 raise GitProtocolError(e)
116 """Test whether the protocol stream has reached EOF.
118 Note that this refers to the actual stream EOF and not just a flush-pkt.
120 :return: True if the stream is at EOF, False otherwise.
123 next_line = self.read_pkt_line()
124 except HangupException:
126 self.unread_pkt_line(next_line)
129 def unread_pkt_line(self, data):
130 """Unread a single line of data into the readahead buffer.
132 This method can be used to unread a single pkt-line into a fixed
135 :param data: The data to unread, without the length prefix.
136 :raise ValueError: If more than one pkt-line is unread.
138 if self._readahead is not None:
139 raise ValueError('Attempted to unread multiple pkt-lines.')
140 self._readahead = BytesIO(pkt_line(data))
142 def read_pkt_seq(self):
143 """Read a sequence of pkt-lines from the remote git process.
145 :return: Yields each line of data up to but not including the next flush-pkt.
147 pkt = self.read_pkt_line()
150 pkt = self.read_pkt_line()
152 def write_pkt_line(self, line):
153 """Sends a pkt-line to the remote git process.
155 :param line: A string containing the data to send, without the length
159 line = pkt_line(line)
161 if self.report_activity:
162 self.report_activity(len(line), 'write')
163 except socket.error as e:
164 raise GitProtocolError(e)
166 def write_file(self):
167 """Return a writable file-like object for this protocol."""
169 class ProtocolFile(object):
171 def __init__(self, proto):
175 def write(self, data):
176 self._proto.write(data)
177 self._offset += len(data)
185 return ProtocolFile(self)
187 def write_sideband(self, channel, blob):
188 """Write multiplexed data to the sideband.
190 :param channel: An int specifying the channel to write to.
191 :param blob: A blob of data (as a string) to send on this channel.
193 # a pktline can be a max of 65520. a sideband line can therefore be
195 # WTF: Why have the len in ASCII, but the channel in binary.
197 self.write_pkt_line("%s%s" % (chr(channel), blob[:65515]))
200 def send_cmd(self, cmd, *args):
201 """Send a command and some arguments to a git server.
203 Only used for the TCP git protocol (git://).
205 :param cmd: The remote service to access.
206 :param args: List of arguments to send to remove service.
208 self.write_pkt_line("%s %s" % (cmd, "".join(["%s\0" % a for a in args])))
211 """Read a command and some arguments from the git client
213 Only used for the TCP git protocol (git://).
215 :return: A tuple of (command, [list of arguments]).
217 line = self.read_pkt_line()
218 splice_at = line.find(" ")
219 cmd, args = line[:splice_at], line[splice_at+1:]
220 assert args[-1] == "\x00"
221 return cmd, args[:-1].split(chr(0))
224 _RBUFSIZE = 8192 # Default read buffer size.
227 class ReceivableProtocol(Protocol):
228 """Variant of Protocol that allows reading up to a size without blocking.
230 This class has a recv() method that behaves like socket.recv() in addition
233 If you want to read n bytes from the wire and block until exactly n bytes
234 (or EOF) are read, use read(n). If you want to read at most n bytes from the
235 wire but don't care if you get less, use recv(n). Note that recv(n) will
236 still block until at least one byte is read.
239 def __init__(self, recv, write, report_activity=None, rbufsize=_RBUFSIZE):
240 super(ReceivableProtocol, self).__init__(self.read, write,
243 self._rbuf = BytesIO()
244 self._rbufsize = rbufsize
246 def read(self, size):
247 # From _fileobj.read in socket.py in the Python 2.6.5 standard library,
248 # with the following modifications:
249 # - omit the size <= 0 branch
250 # - seek back to start rather than 0 in case some buffer has been
252 # - use SEEK_END instead of the magic number.
253 # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
254 # Licensed under the Python Software Foundation License.
255 # TODO: see if buffer is more efficient than cBytesIO.
258 # Our use of BytesIO rather than lists of string objects returned by
259 # recv() minimizes memory usage and fragmentation that occurs when
260 # rbufsize is large compared to the typical return value of recv().
263 buf.seek(0, SEEK_END)
264 # buffer may have been partially consumed by recv()
265 buf_len = buf.tell() - start
267 # Already have size bytes in our buffer? Extract and return.
270 self._rbuf = BytesIO()
271 self._rbuf.write(buf.read())
275 self._rbuf = BytesIO() # reset _rbuf. we consume it via buf.
277 left = size - buf_len
278 # recv() will malloc the amount of memory given as its
279 # parameter even though it often returns much less data
280 # than that. The returned data string is short lived
281 # as we copy it into a BytesIO and free it. This avoids
282 # fragmentation issues on many platforms.
283 data = self._recv(left)
287 if n == size and not buf_len:
288 # Shortcut. Avoid buffer data copies when:
289 # - We have no data in our buffer.
291 # - Our call to recv returned exactly the
292 # number of bytes we were asked to read.
296 del data # explicit free
298 assert n <= left, "_recv(%d) returned %d bytes" % (left, n)
301 del data # explicit free
302 #assert buf_len == buf.tell()
306 def recv(self, size):
311 buf.seek(0, SEEK_END)
315 left = buf_len - start
317 # only read from the wire if our read buffer is exhausted
318 data = self._recv(self._rbufsize)
319 if len(data) == size:
320 # shortcut: skip the buffer if we read exactly size bytes
325 del data # explicit free
327 return buf.read(size)
330 def extract_capabilities(text):
331 """Extract a capabilities list from a string, if present.
333 :param text: String to extract from
334 :return: Tuple with text with capabilities removed and list of capabilities
338 text, capabilities = text.rstrip().split("\0")
339 return (text, capabilities.strip().split(" "))
342 def extract_want_line_capabilities(text):
343 """Extract a capabilities list from a want line, if present.
345 Note that want lines have capabilities separated from the rest of the line
346 by a space instead of a null byte. Thus want lines have the form:
348 want obj-id cap1 cap2 ...
350 :param text: Want line to extract from
351 :return: Tuple with text with capabilities removed and list of capabilities
353 split_text = text.rstrip().split(" ")
354 if len(split_text) < 3:
356 return (" ".join(split_text[:2]), split_text[2:])
359 def ack_type(capabilities):
360 """Extract the ack type from a capabilities list."""
361 if 'multi_ack_detailed' in capabilities:
362 return MULTI_ACK_DETAILED
363 elif 'multi_ack' in capabilities:
368 class BufferedPktLineWriter(object):
369 """Writer that wraps its data in pkt-lines and has an independent buffer.
371 Consecutive calls to write() wrap the data in a pkt-line and then buffers it
372 until enough lines have been written such that their total length (including
373 length prefix) reach the buffer size.
376 def __init__(self, write, bufsize=65515):
377 """Initialize the BufferedPktLineWriter.
379 :param write: A write callback for the underlying writer.
380 :param bufsize: The internal buffer size, including length prefixes.
383 self._bufsize = bufsize
384 self._wbuf = BytesIO()
387 def write(self, data):
388 """Write data, wrapping it in a pkt-line."""
389 line = pkt_line(data)
391 over = self._buflen + line_len - self._bufsize
393 start = line_len - over
394 self._wbuf.write(line[:start])
399 self._wbuf.write(saved)
400 self._buflen += len(saved)
403 """Flush all data from the buffer."""
404 data = self._wbuf.getvalue()
408 self._wbuf = BytesIO()
411 class PktLineParser(object):
412 """Packet line parser that hands completed packets off to a callback.
415 def __init__(self, handle_pkt):
416 self.handle_pkt = handle_pkt
417 self._readahead = BytesIO()
419 def parse(self, data):
420 """Parse a fragment of data and call back for any completed packets.
422 self._readahead.write(data)
423 buf = self._readahead.getvalue()
427 size = int(buf[:4], 16)
429 self.handle_pkt(None)
431 elif size <= len(buf):
432 self.handle_pkt(buf[4:size])
436 self._readahead = BytesIO()
437 self._readahead.write(buf)
440 """Read back any unused data."""
441 return self._readahead.getvalue()