Merge various pack improvements from Dave.

author Jelmer Vernooij <jelmer@samba.org>

Fri, 16 Apr 2010 19:49:17 +0000 (21:49 +0200)

committer Jelmer Vernooij <jelmer@samba.org>

Fri, 16 Apr 2010 19:49:17 +0000 (21:49 +0200)
author Jelmer Vernooij <jelmer@samba.org>
Fri, 16 Apr 2010 19:49:17 +0000 (21:49 +0200)
committer Jelmer Vernooij <jelmer@samba.org>
Fri, 16 Apr 2010 19:49:17 +0000 (21:49 +0200)
diff --git a/dulwich/pack.py b/dulwich/pack.py

index 791d1c434fdf3b1eb6ec9d2239d7978807d83cda..2b2789141b32bb9497b0f6348da6dfda6ae35116 100644 (file)
--- a/dulwich/pack.py
+++ b/dulwich/pack.py
@@ -1,17 +1,17 @@
  # pack.py -- For dealing wih packed git objects.
  # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  # Copryight (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
-# 
+#
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public License
  # as published by the Free Software Foundation; version 2
  # of the License or (at your option) a later version.
-# 
+#
  # This program is distributed in the hope that it will be useful,
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  # GNU General Public License for more details.
-# 
+#
  # You should have received a copy of the GNU General Public License
  # along with this program; if not, write to the Free Software
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
@@ -75,7 +75,7 @@ supports_mmap_offset = (sys.version_info[0] >= 3 or
  
  def take_msb_bytes(read):
      """Read bytes marked with most significant bit.
-    
+
      :param read: Read function
      """
      ret = []
@@ -84,29 +84,42 @@ def take_msb_bytes(read):
      return ret
  
  
-def read_zlib_chunks(read, buffer_size=4096):
-    """Read chunks of zlib data from a buffer.
-    
-    :param read: Read function
-    :return: Tuple with list of chunks, length of 
-        compressed data length and unused read data
+def read_zlib_chunks(read_some, dec_size, buffer_size=4096):
+    """Read zlib data from a buffer.
+
+    This function requires that the buffer have additional data following the
+    compressed data, which is guaranteed to be the case for git pack files.
+
+    :param read_some: Read function that returns at least one byte, but may
+        return less than the requested size
+    :param dec_size: Expected size of the decompressed buffer
+    :param buffer_size: Size of the read buffer
+    :return: Tuple with list of chunks, length of compressed data length and
+        and unused read data.
+    :raise zlib.error: if a decompression error occurred.
      """
+    if dec_size <= -1:
+        raise ValueError("non-negative zlib data stream size expected")
      obj = zlib.decompressobj()
      ret = []
      fed = 0
+    size = 0
      while obj.unused_data == "":
-        add = read(buffer_size)
-        if len(add) < buffer_size:
-            add += "Z"
+        add = read_some(buffer_size)
+        if not add:
+            raise zlib.error("EOF before end of zlib stream")
          fed += len(add)
-        ret.append(obj.decompress(add))
-    comp_len = fed-len(obj.unused_data)
+        decomp = obj.decompress(add)
+        size += len(decomp)
+        ret.append(decomp)
+    if size != dec_size:
+        raise zlib.error("decompressed data does not match expected size")
+    comp_len = fed - len(obj.unused_data)
      return ret, comp_len, obj.unused_data
  
-
  def iter_sha1(iter):
      """Return the hexdigest of the SHA1 over a set of names.
-    
+
      :param iter: Iterator over string objects
      :return: 40-byte hex sha1 digest
      """
@@ -120,6 +133,7 @@ def load_pack_index(path):
      """Load an index file by path.
  
      :param filename: Path to the index file
+    :return: A PackIndex loaded from the given path
      """
      f = GitFile(path, 'rb')
      return load_pack_index_file(path, f)
@@ -149,6 +163,7 @@ def load_pack_index_file(path, f):
  
      :param path: Path for the index file
      :param f: File-like object
+    :return: A PackIndex loaded from the given file
      """
      contents, size = _load_file_contents(f)
      if contents[:4] == '\377tOc':
@@ -164,7 +179,7 @@ def load_pack_index_file(path, f):
  
  def bisect_find_sha(start, end, sha, unpack_name):
      """Find a SHA in a data blob with sorted SHAs.
-    
+
      :param start: Start index of range to search
      :param end: End index of range to search
      :param sha: Sha to find
@@ -187,10 +202,10 @@ def bisect_find_sha(start, end, sha, unpack_name):
  
  class PackIndex(object):
      """An index in to a packfile.
-  
+
      Given a sha id of an object a pack index can tell you the location in the
      packfile of that object if it has it.
-  
+
      To do the loop it opens the file, and indexes first 256 4 byte groups
      with the first byte of the sha id. The value in the four byte group indexed
      is the end of the group that shares the same starting byte. Subtract one
@@ -198,10 +213,10 @@ class PackIndex(object):
      The values are sorted by sha id within the group, so do the math to find
      the start and end offset and then bisect in to find if the value is present.
      """
-  
+
      def __init__(self, filename, file=None, contents=None, size=None):
          """Create a pack index object.
-    
+
          Provide it with the name of the index file to consider, and it will map
          it whenever required.
          """
@@ -216,14 +231,14 @@ class PackIndex(object):
              self._contents, self._size = _load_file_contents(file, size)
          else:
              self._contents, self._size = (contents, size)
-  
+
      def __eq__(self, other):
          if not isinstance(other, PackIndex):
              return False
-    
+
          if self._fan_out_table != other._fan_out_table:
              return False
-    
+
          for (name1, _, _), (name2, _, _) in izip(self.iterentries(),
                                                   other.iterentries()):
              if name1 != name2:
@@ -232,25 +247,25 @@ class PackIndex(object):
  
      def __ne__(self, other):
          return not self.__eq__(other)
-  
+
      def close(self):
          self._file.close()
-  
+
      def __len__(self):
          """Return the number of entries in this pack index."""
          return self._fan_out_table[-1]
-  
+
      def _unpack_entry(self, i):
          """Unpack the i-th entry in the index file.
-    
-        :return: Tuple with object name (SHA), offset in pack file and 
-              CRC32 checksum (if known)."""
+
+        :return: Tuple with object name (SHA), offset in pack file and CRC32
+            checksum (if known)."""
          raise NotImplementedError(self._unpack_entry)
-  
+
      def _unpack_name(self, i):
          """Unpack the i-th name from the index file."""
          raise NotImplementedError(self._unpack_name)
-  
+
      def _unpack_offset(self, i):
          """Unpack the i-th object offset from the index file."""
          raise NotImplementedError(self._unpack_offset)
@@ -258,43 +273,43 @@ class PackIndex(object):
      def _unpack_crc32_checksum(self, i):
          """Unpack the crc32 checksum for the i-th object from the index file."""
          raise NotImplementedError(self._unpack_crc32_checksum)
-  
+
      def __iter__(self):
          """Iterate over the SHAs in this pack."""
          return imap(sha_to_hex, self._itersha())
-  
+
      def _itersha(self):
          for i in range(len(self)):
              yield self._unpack_name(i)
-  
+
      def objects_sha1(self):
          """Return the hex SHA1 over all the shas of all objects in this pack.
-        
+
          :note: This is used for the filename of the pack.
          """
          return iter_sha1(self._itersha())
-  
+
      def iterentries(self):
          """Iterate over the entries in this pack index.
-       
+
          Will yield tuples with object name, offset in packfile and crc32
          checksum.
          """
          for i in range(len(self)):
              yield self._unpack_entry(i)
-  
+
      def _read_fan_out_table(self, start_offset):
          ret = []
          for i in range(0x100):
-            ret.append(struct.unpack(">L",
-                self._contents[start_offset+i*4:start_offset+(i+1)*4])[0])
+            fanout_entry = self._contents[start_offset+i*4:start_offset+(i+1)*4]
+            ret.append(struct.unpack(">L", fanout_entry)[0])
          return ret
-  
+
      def check(self):
          """Check that the stored checksum matches the actual checksum."""
          # TODO: Check pack contents, too
          return self.calculate_checksum() == self.get_stored_checksum()
-  
+
      def calculate_checksum(self):
          """Calculate the SHA1 checksum over this pack index.
  
@@ -304,21 +319,21 @@ class PackIndex(object):
  
      def get_pack_checksum(self):
          """Return the SHA1 checksum stored for the corresponding packfile.
-        
+
          :return: 20-byte binary digest
          """
          return str(self._contents[-40:-20])
-  
+
      def get_stored_checksum(self):
          """Return the SHA1 checksum stored for this index.
-        
+
          :return: 20-byte binary digest
          """
          return str(self._contents[-20:])
-  
+
      def object_index(self, sha):
          """Return the index in to the corresponding packfile for the object.
-    
+
          Given the name of an object it will return the offset that object
          lives at within the corresponding pack file. If the pack file doesn't
          have the object then None will be returned.
@@ -326,10 +341,10 @@ class PackIndex(object):
          if len(sha) == 40:
              sha = hex_to_sha(sha)
          return self._object_index(sha)
-  
+
      def _object_index(self, sha):
          """See object_index.
-        
+
          :param sha: A *binary* SHA string. (20 characters long)_
          """
          assert len(sha) == 20
@@ -343,7 +358,6 @@ class PackIndex(object):
          if i is None:
              raise KeyError(sha)
          return self._unpack_offset(i)
-            
  
  
  class PackIndex1(PackIndex):
@@ -355,22 +369,22 @@ class PackIndex1(PackIndex):
          self._fan_out_table = self._read_fan_out_table(0)
  
      def _unpack_entry(self, i):
-        (offset, name) = unpack_from(">L20s", self._contents, 
-            (0x100 * 4) + (i * 24))
+        (offset, name) = unpack_from(">L20s", self._contents,
+                                     (0x100 * 4) + (i * 24))
          return (name, offset, None)
- 
+
      def _unpack_name(self, i):
          offset = (0x100 * 4) + (i * 24) + 4
          return self._contents[offset:offset+20]
-  
+
      def _unpack_offset(self, i):
          offset = (0x100 * 4) + (i * 24)
          return unpack_from(">L", self._contents, offset)[0]
-  
+
      def _unpack_crc32_checksum(self, i):
          # Not stored in v1 index files
-        return None 
-  
+        return None
+
  
  class PackIndex2(PackIndex):
      """Version 2 Pack Index."""
@@ -383,32 +397,32 @@ class PackIndex2(PackIndex):
          self._fan_out_table = self._read_fan_out_table(8)
          self._name_table_offset = 8 + 0x100 * 4
          self._crc32_table_offset = self._name_table_offset + 20 * len(self)
-        self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
+        self._pack_offset_table_offset = (self._crc32_table_offset +
+                                          4 * len(self))
  
      def _unpack_entry(self, i):
-        return (self._unpack_name(i), self._unpack_offset(i), 
+        return (self._unpack_name(i), self._unpack_offset(i),
                  self._unpack_crc32_checksum(i))
- 
+
      def _unpack_name(self, i):
          offset = self._name_table_offset + i * 20
          return self._contents[offset:offset+20]
-  
+
      def _unpack_offset(self, i):
          offset = self._pack_offset_table_offset + i * 4
          return unpack_from(">L", self._contents, offset)[0]
-  
+
      def _unpack_crc32_checksum(self, i):
-        return unpack_from(">L", self._contents, 
+        return unpack_from(">L", self._contents,
                            self._crc32_table_offset + i * 4)[0]
-  
  
  
-def read_pack_header(f):
+def read_pack_header(read):
      """Read the header of a pack file.
  
-    :param f: File-like object to read from
+    :param read: Read function
      """
-    header = f.read(12)
+    header = read(12)
      assert header[:4] == "PACK"
      (version,) = unpack_from(">L", header, 4)
      assert version in (2, 3), "Version was %d" % version
@@ -420,20 +434,25 @@ def chunks_length(chunks):
      return sum(imap(len, chunks))
  
  
-def unpack_object(read):
+def unpack_object(read_all, read_some=None):
      """Unpack a Git object.
  
-    :return: tuple with type, uncompressed data as chunks, compressed size and 
-        tail data
+    :param read_all: Read function that blocks until the number of requested
+        bytes are read.
+    :param read_some: Read function that returns at least one byte, but may not
+        return the number of bytes requested.
+    :return: tuple with type, uncompressed data, compressed size and tail data.
      """
-    bytes = take_msb_bytes(read)
+    if read_some is None:
+        read_some = read_all
+    bytes = take_msb_bytes(read_all)
      type = (bytes[0] >> 4) & 0x07
      size = bytes[0] & 0x0f
      for i, byte in enumerate(bytes[1:]):
          size += (byte & 0x7f) << ((i * 7) + 4)
      raw_base = len(bytes)
      if type == 6: # offset delta
-        bytes = take_msb_bytes(read)
+        bytes = take_msb_bytes(read_all)
          raw_base += len(bytes)
          assert not (bytes[-1] & 0x80)
          delta_base_offset = bytes[0] & 0x7f
@@ -441,24 +460,23 @@ def unpack_object(read):
              delta_base_offset += 1
              delta_base_offset <<= 7
              delta_base_offset += (byte & 0x7f)
-        uncomp, comp_len, unused = read_zlib_chunks(read)
+        uncomp, comp_len, unused = read_zlib_chunks(read_some, size)
          assert size == chunks_length(uncomp)
          return type, (delta_base_offset, uncomp), comp_len+raw_base, unused
      elif type == 7: # ref delta
-        basename = read(20)
+        basename = read_all(20)
          raw_base += 20
-        uncomp, comp_len, unused = read_zlib_chunks(read)
+        uncomp, comp_len, unused = read_zlib_chunks(read_some, size)
          assert size == chunks_length(uncomp)
          return type, (basename, uncomp), comp_len+raw_base, unused
      else:
-        uncomp, comp_len, unused = read_zlib_chunks(read)
+        uncomp, comp_len, unused = read_zlib_chunks(read_some, size)
          assert chunks_length(uncomp) == size
          return type, uncomp, comp_len+raw_base, unused
  
  
  def _compute_object_size((num, obj)):
-    """Compute the size of a unresolved object for use with LRUSizeCache.
-    """
+    """Compute the size of a unresolved object for use with LRUSizeCache."""
      if num in (6, 7):
          return chunks_length(obj[1])
      return chunks_length(obj)
@@ -466,38 +484,37 @@ def _compute_object_size((num, obj)):
  
  class PackData(object):
      """The data contained in a packfile.
-  
+
      Pack files can be accessed both sequentially for exploding a pack, and
      directly with the help of an index to retrieve a specific object.
-  
+
      The objects within are either complete or a delta aginst another.
-  
+
      The header is variable length. If the MSB of each byte is set then it
      indicates that the subsequent byte is still part of the header.
      For the first byte the next MS bits are the type, which tells you the type
      of object, and whether it is a delta. The LS byte is the lowest bits of the
      size. For each subsequent byte the LS 7 bits are the next MS bits of the
      size, i.e. the last byte of the header contains the MS bits of the size.
-  
+
      For the complete objects the data is stored as zlib deflated data.
      The size in the header is the uncompressed object size, so to uncompress
      you need to just keep feeding data to zlib until you get an object back,
      or it errors on bad data. This is done here by just giving the complete
      buffer from the start of the deflated object on. This is bad, but until I
      get mmap sorted out it will have to do.
-  
+
      Currently there are no integrity checks done. Also no attempt is made to
      try and detect the delta case, or a request for an object at the wrong
      position.  It will all just throw a zlib or KeyError.
      """
-  
+
      def __init__(self, filename, file=None, size=None):
-        """Create a PackData object that represents the pack in the given
-        filename.
-    
+        """Create a PackData object representing the pack in the given filename.
+
          The file must exist and stay readable until the object is disposed of.
          It must also stay the same size. It will be mapped whenever needed.
-    
+
          Currently there is a restriction on the size of the pack as the python
          mmap implementation is flawed.
          """
@@ -508,8 +525,8 @@ class PackData(object):
              self._file = GitFile(self._filename, 'rb')
          else:
              self._file = file
-        (version, self._num_objects) = read_pack_header(self._file)
-        self._offset_cache = LRUSizeCache(1024*1024*20, 
+        (version, self._num_objects) = read_pack_header(self._file.read)
+        self._offset_cache = LRUSizeCache(1024*1024*20,
              compute_size=_compute_object_size)
  
      @classmethod
@@ -527,13 +544,16 @@ class PackData(object):
          if self._size is not None:
              return self._size
          self._size = os.path.getsize(self._filename)
-        assert self._size >= self._header_size, "%s is too small for a packfile (%d < %d)" % (self._filename, self._size, self._header_size)
+        if self._size < self._header_size:
+            errmsg = ("%s is too small for a packfile (%d < %d)" %
+                      (self._filename, self._size, self._header_size))
+            raise AssertionError(errmsg)
          return self._size
-  
+
      def __len__(self):
          """Returns the number of objects in this pack."""
          return self._num_objects
-  
+
      def calculate_checksum(self):
          """Calculate the checksum for this pack.
  
@@ -550,7 +570,7 @@ class PackData(object):
  
      def resolve_object(self, offset, type, obj, get_ref, get_offset=None):
          """Resolve an object, possibly resolving deltas when necessary.
-        
+
          :return: Tuple with object type and contents.
          """
          if type not in (6, 7): # Not a delta
@@ -558,7 +578,7 @@ class PackData(object):
  
          if get_offset is None:
              get_offset = self.get_object_at
-      
+
          if type == 6: # offset delta
              (delta_offset, delta) = obj
              assert isinstance(delta_offset, int)
@@ -574,15 +594,15 @@ class PackData(object):
              assert type != 6
              base_offset = None
          type, base_chunks = self.resolve_object(base_offset, type, base_obj,
-            get_ref)
+                                                get_ref)
          if base_offset is not None:
              self._offset_cache[base_offset] = type, base_chunks
          return (type, apply_delta(base_chunks, delta))
-  
+
      def iterobjects(self, progress=None):
  
          class ObjectIterator(object):
-            
+
              def __init__(self, pack):
                  self.i = 0
                  self.offset = pack._header_size
@@ -594,7 +614,7 @@ class PackData(object):
  
              def __len__(self):
                  return self.num
-            
+
              def next(self):
                  if self.i == self.num:
                      raise StopIteration
@@ -609,7 +629,7 @@ class PackData(object):
                  self.i+=1
                  return ret
          return ObjectIterator(self)
-  
+
      def iterentries(self, ext_resolve_ref=None, progress=None):
          """Yield entries summarizing the contents of this pack.
  
@@ -624,7 +644,7 @@ class PackData(object):
          postponed = defaultdict(list)
          class Postpone(Exception):
              """Raised to postpone delta resolving."""
-          
+
          def get_ref_text(sha):
              assert len(sha) == 20
              if sha in found:
@@ -653,20 +673,20 @@ class PackData(object):
                  extra.extend(postponed.get(sha, []))
          if postponed:
              raise KeyError([sha_to_hex(h) for h in postponed.keys()])
-  
+
      def sorted_entries(self, resolve_ext_ref=None, progress=None):
          """Return entries in this pack, sorted by SHA.
  
-        :param ext_resolve_ref: Optional function to resolve base
+        :param resolve_ext_ref: Optional function to resolve base
              objects (in case this is a thin pack)
          :param progress: Progress function, called with current and
-            total object count.
+            total object count
          :return: List of tuples with (sha, offset, crc32)
          """
          ret = list(self.iterentries(resolve_ext_ref, progress=progress))
          ret.sort()
          return ret
-  
+
      def create_index_v1(self, filename, resolve_ext_ref=None, progress=None):
          """Create a version 1 file for this data file.
  
@@ -677,7 +697,7 @@ class PackData(object):
          """
          entries = self.sorted_entries(resolve_ext_ref, progress=progress)
          write_pack_index_v1(filename, entries, self.calculate_checksum())
-  
+
      def create_index_v2(self, filename, resolve_ext_ref=None, progress=None):
          """Create a version 2 index file for this data file.
  
@@ -704,19 +724,19 @@ class PackData(object):
              self.create_index_v2(filename, resolve_ext_ref, progress)
          else:
              raise ValueError("unknown index format %d" % version)
-  
+
      def get_stored_checksum(self):
          """Return the expected checksum stored in this pack."""
          self._file.seek(self._get_size()-20)
          return self._file.read(20)
-  
+
      def check(self):
          """Check the consistency of this pack."""
          return (self.calculate_checksum() == self.get_stored_checksum())
-  
+
      def get_object_at(self, offset):
          """Given an offset in to the packfile return the object that is there.
-    
+
          Using the associated index the location of an object can be looked up,
          and then the packfile can be asked directly for that object using this
          function.
@@ -731,8 +751,7 @@ class PackData(object):
  
  
  class SHA1Reader(object):
-    """Wrapper around a file-like object that remembers the SHA1 of 
-    the data read from it."""
+    """Wrapper around a file-like object that remembers the SHA1 of its data."""
  
      def __init__(self, f):
          self.f = f
@@ -756,9 +775,8 @@ class SHA1Reader(object):
  
  
  class SHA1Writer(object):
-    """Wrapper around a file-like object that remembers the SHA1 of 
-    the data written to it."""
-    
+    """Wrapper around a file-like object that remembers the SHA1 of its data."""
+
      def __init__(self, f):
          self.f = f
          self.sha1 = make_sha("")
@@ -786,7 +804,8 @@ def write_pack_object(f, type, object):
      """Write pack object to a file.
  
      :param f: File to write to
-    :param o: Object to write
+    :param type: Numeric type of the object
+    :param object: Object to write
      :return: Tuple with offset at which the object was written, and crc32
      """
      offset = f.tell()
@@ -851,8 +870,8 @@ def write_pack_data(f, objects, num_objects, window=10):
      for obj, path in recency:
          magic.append( (obj.type_num, path, 1, -obj.raw_length(), obj) )
      magic.sort()
-    # Build a map of objects and their index in magic - so we can find preceeding objects
-    # to diff against
+    # Build a map of objects and their index in magic - so we can find
+    # preceeding objects to diff against
      offs = {}
      for i in range(len(magic)):
          offs[magic[i][4]] = i
@@ -968,7 +987,7 @@ def create_delta(base_buf, target_buf):
  
  def apply_delta(src_buf, delta):
      """Based on the similar function in git's patch-delta.c.
-    
+
      :param src_buf: Source buffer
      :param delta: Delta instructions
      """
@@ -999,17 +1018,17 @@ def apply_delta(src_buf, delta):
          if cmd & 0x80:
              cp_off = 0
              for i in range(4):
-                if cmd & (1 << i): 
+                if cmd & (1 << i):
                      x = ord(delta[index])
                      index += 1
                      cp_off |= x << (i * 8)
              cp_size = 0
              for i in range(3):
-                if cmd & (1 << (4+i)): 
+                if cmd & (1 << (4+i)):
                      x = ord(delta[index])
                      index += 1
                      cp_size |= x << (i * 8)
-            if cp_size == 0: 
+            if cp_size == 0:
                  cp_size = 0x10000
              if (cp_off + cp_size < cp_size or
                  cp_off + cp_size > src_size or
@@ -1021,7 +1040,7 @@ def apply_delta(src_buf, delta):
              index += cmd
          else:
              raise ApplyDeltaError("Invalid opcode 0")
-    
+
      if index != delta_length:
          raise ApplyDeltaError("delta not empty: %r" % delta[index:])
  
@@ -1094,7 +1113,7 @@ class Pack(object):
              idx_stored_checksum = self.index.get_pack_checksum()
              data_stored_checksum = self._data.get_stored_checksum()
              if idx_stored_checksum != data_stored_checksum:
-                raise ChecksumMismatch(sha_to_hex(idx_stored_checksum), 
+                raise ChecksumMismatch(sha_to_hex(idx_stored_checksum),
                                         sha_to_hex(data_stored_checksum))
          return self._data
  
diff --git a/dulwich/protocol.py b/dulwich/protocol.py

index 5525f62a0fd1bdf8a35df4bc4ef7a4c18502de8a..0d83bb814e41b11c61895724648792dfafc82c77 100644 (file)
--- a/dulwich/protocol.py
+++ b/dulwich/protocol.py
@@ -19,6 +19,8 @@
  
  """Generic functions for talking the git smart server protocol."""
  
+from cStringIO import StringIO
+import os
  import socket
  
  from dulwich.errors import (
@@ -162,6 +164,112 @@ class Protocol(object):
          return cmd, args[:-1].split(chr(0))
  
  
+_RBUFSIZE = 8192  # Default read buffer size.
+
+
+class ReceivableProtocol(Protocol):
+    """Variant of Protocol that allows reading up to a size without blocking.
+
+    This class has a recv() method that behaves like socket.recv() in addition
+    to a read() method.
+
+    If you want to read n bytes from the wire and block until exactly n bytes
+    (or EOF) are read, use read(n). If you want to read at most n bytes from the
+    wire but don't care if you get less, use recv(n). Note that recv(n) will
+    still block until at least one byte is read.
+    """
+
+    def __init__(self, recv, write, report_activity=None, rbufsize=_RBUFSIZE):
+        super(ReceivableProtocol, self).__init__(self.read, write,
+                                                report_activity)
+        self._recv = recv
+        self._rbuf = StringIO()
+        self._rbufsize = rbufsize
+
+    def read(self, size):
+        # From _fileobj.read in socket.py in the Python 2.6.5 standard library,
+        # with the following modifications:
+        #  - omit the size <= 0 branch
+        #  - seek back to start rather than 0 in case some buffer has been
+        #    consumed.
+        #  - use os.SEEK_END instead of the magic number.
+        # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
+        # Licensed under the Python Software Foundation License.
+        # TODO: see if buffer is more efficient than cStringIO.
+        assert size > 0
+
+        # Our use of StringIO rather than lists of string objects returned by
+        # recv() minimizes memory usage and fragmentation that occurs when
+        # rbufsize is large compared to the typical return value of recv().
+        buf = self._rbuf
+        start = buf.tell()
+        buf.seek(0, os.SEEK_END)
+        # buffer may have been partially consumed by recv()
+        buf_len = buf.tell() - start
+        if buf_len >= size:
+            # Already have size bytes in our buffer?  Extract and return.
+            buf.seek(start)
+            rv = buf.read(size)
+            self._rbuf = StringIO()
+            self._rbuf.write(buf.read())
+            self._rbuf.seek(0)
+            return rv
+
+        self._rbuf = StringIO()  # reset _rbuf.  we consume it via buf.
+        while True:
+            left = size - buf_len
+            # recv() will malloc the amount of memory given as its
+            # parameter even though it often returns much less data
+            # than that.  The returned data string is short lived
+            # as we copy it into a StringIO and free it.  This avoids
+            # fragmentation issues on many platforms.
+            data = self._recv(left)
+            if not data:
+                break
+            n = len(data)
+            if n == size and not buf_len:
+                # Shortcut.  Avoid buffer data copies when:
+                # - We have no data in our buffer.
+                # AND
+                # - Our call to recv returned exactly the
+                #   number of bytes we were asked to read.
+                return data
+            if n == left:
+                buf.write(data)
+                del data  # explicit free
+                break
+            assert n <= left, "_recv(%d) returned %d bytes" % (left, n)
+            buf.write(data)
+            buf_len += n
+            del data  # explicit free
+            #assert buf_len == buf.tell()
+        buf.seek(start)
+        return buf.read()
+
+    def recv(self, size):
+        assert size > 0
+
+        buf = self._rbuf
+        start = buf.tell()
+        buf.seek(0, os.SEEK_END)
+        buf_len = buf.tell()
+        buf.seek(start)
+
+        left = buf_len - start
+        if not left:
+            # only read from the wire if our read buffer is exhausted
+            data = self._recv(self._rbufsize)
+            if len(data) == size:
+                # shortcut: skip the buffer if we read exactly size bytes
+                return data
+            buf = StringIO()
+            buf.write(data)
+            buf.seek(0)
+            del data  # explicit free
+            self._rbuf = buf
+        return buf.read(size)
+
+
  def extract_capabilities(text):
      """Extract a capabilities list from a string, if present.
  
@@ -171,7 +279,7 @@ def extract_capabilities(text):
      if not "\0" in text:
          return text, []
      text, capabilities = text.rstrip().split("\0")
-    return (text, capabilities.split(" "))
+    return (text, capabilities.strip().split(" "))
  
  
  def extract_want_line_capabilities(text):
diff --git a/dulwich/repo.py b/dulwich/repo.py

index a69760e45837ed8bb479f721b43666e8844d960f..067554fbccc8711421e8452c59b1c5b032c3abd2 100644 (file)
--- a/dulwich/repo.py
+++ b/dulwich/repo.py
@@ -684,6 +684,8 @@ class BaseRepo(object):
          :return: iterator over objects, with __len__ implemented
          """
          wants = determine_wants(self.get_refs())
+        if not wants:
+            return []
          haves = self.object_store.find_common_revisions(graph_walker)
          return self.object_store.iter_shas(
              self.object_store.find_missing_objects(haves, wants, progress,
diff --git a/dulwich/server.py b/dulwich/server.py

index 589aca9d44c8c2f357e15df393376652fc8d8d48..c08a2d5744a41b1a2dbdc3f1c1c620528ef184ce 100644 (file)
--- a/dulwich/server.py
+++ b/dulwich/server.py
@@ -27,19 +27,27 @@ Documentation/technical directory in the cgit distribution, and in particular:
  
  
  import collections
+from cStringIO import StringIO
+import socket
  import SocketServer
+import zlib
  
  from dulwich.errors import (
      ApplyDeltaError,
      ChecksumMismatch,
      GitProtocolError,
      )
+from dulwich.misc import (
+    make_sha,
+    )
  from dulwich.objects import (
      hex_to_sha,
+    sha_to_hex,
      )
  from dulwich.protocol import (
-    Protocol,
      ProtocolFile,
+    Protocol,
+    ReceivableProtocol,
      TCP_GIT_PORT,
      ZERO_SHA,
      extract_capabilities,
@@ -50,6 +58,8 @@ from dulwich.protocol import (
      ack_type,
      )
  from dulwich.pack import (
+    read_pack_header,
+    unpack_object,
      write_pack_data,
      )
  
@@ -102,6 +112,105 @@ class BackendRepo(object):
          raise NotImplementedError
  
  
+class PackStreamVerifier(object):
+    """Class to verify a pack stream as it is being read.
+
+    The pack is read from a ReceivableProtocol using read() or recv() as
+    appropriate and written out to the given file-like object.
+    """
+
+    def __init__(self, proto, outfile):
+        self.proto = proto
+        self.outfile = outfile
+        self.sha = make_sha()
+        self._rbuf = StringIO()
+        # trailer is a deque to avoid memory allocation on small reads
+        self._trailer = collections.deque()
+
+    def _read(self, read, size):
+        """Read up to size bytes using the given callback.
+
+        As a side effect, update the verifier's hash (excluding the last 20
+        bytes read) and write through to the output file.
+
+        :param read: The read callback to read from.
+        :param size: The maximum number of bytes to read; the particular
+            behavior is callback-specific.
+        """
+        data = read(size)
+
+        # maintain a trailer of the last 20 bytes we've read
+        n = len(data)
+        tn = len(self._trailer)
+        if n >= 20:
+            to_pop = tn
+            to_add = 20
+        else:
+            to_pop = max(n + tn - 20, 0)
+            to_add = n
+        for _ in xrange(to_pop):
+            self.sha.update(self._trailer.popleft())
+        self._trailer.extend(data[-to_add:])
+
+        # hash everything but the trailer
+        self.sha.update(data[:-to_add])
+        self.outfile.write(data)
+        return data
+
+    def _buf_len(self):
+        buf = self._rbuf
+        start = buf.tell()
+        buf.seek(0, 2)
+        end = buf.tell()
+        buf.seek(start)
+        return end - start
+
+    def read(self, size):
+        """Read, blocking until size bytes are read."""
+        buf_len = self._buf_len()
+        if buf_len >= size:
+            return self._rbuf.read(size)
+        buf_data = self._rbuf.read()
+        self._rbuf = StringIO()
+        return buf_data + self._read(self.proto.read, size - buf_len)
+
+    def recv(self, size):
+        """Read up to size bytes, blocking until one byte is read."""
+        buf_len = self._buf_len()
+        if buf_len:
+            data = self._rbuf.read(size)
+            if size >= buf_len:
+                self._rbuf = StringIO()
+            return data
+        return self._read(self.proto.recv, size)
+
+    def verify(self):
+        """Verify a pack stream and write it to the output file.
+
+        :raise AssertionError: if there is an error in the pack format.
+        :raise ChecksumMismatch: if the checksum of the pack contents does not
+            match the checksum in the pack trailer.
+        :raise socket.error: if an error occurred reading from the socket.
+        :raise zlib.error: if an error occurred during zlib decompression.
+        :raise IOError: if an error occurred writing to the output file.
+        """
+        _, num_objects = read_pack_header(self.read)
+        for i in xrange(num_objects):
+            type, _, _, unused = unpack_object(self.read, self.recv)
+
+            # prepend any unused data to current read buffer
+            buf = StringIO()
+            buf.write(unused)
+            buf.write(self._rbuf.read())
+            buf.seek(0)
+            self._rbuf = buf
+
+        pack_sha = sha_to_hex(''.join([c for c in self._trailer]))
+        calculated_sha = self.sha.hexdigest()
+        if pack_sha != calculated_sha:
+            raise ChecksumMismatch(pack_sha, calculated_sha)
+
+
  class DictBackend(Backend):
      """Trivial backend that looks up Git repositories in a dictionary."""
  
@@ -116,9 +225,9 @@ class DictBackend(Backend):
  class Handler(object):
      """Smart protocol command handler base class."""
  
-    def __init__(self, backend, read, write):
+    def __init__(self, backend, proto):
          self.backend = backend
-        self.proto = Protocol(read, write)
+        self.proto = proto
          self._client_capabilities = None
  
      def capability_line(self):
@@ -157,9 +266,9 @@ class Handler(object):
  class UploadPackHandler(Handler):
      """Protocol handler for uploading a pack to the server."""
  
-    def __init__(self, backend, args, read, write,
+    def __init__(self, backend, args, proto,
                   stateless_rpc=False, advertise_refs=False):
-        Handler.__init__(self, backend, read, write)
+        Handler.__init__(self, backend, proto)
          self.repo = backend.open_repository(args[0])
          self._graph_walker = None
          self.stateless_rpc = stateless_rpc
@@ -521,9 +630,9 @@ class MultiAckDetailedGraphWalkerImpl(object):
  class ReceivePackHandler(Handler):
      """Protocol handler for downloading a pack from the client."""
  
-    def __init__(self, backend, args, read, write,
+    def __init__(self, backend, args, proto,
                   stateless_rpc=False, advertise_refs=False):
-        Handler.__init__(self, backend, read, write)
+        Handler.__init__(self, backend, proto)
          self.repo = backend.open_repository(args[0])
          self.stateless_rpc = stateless_rpc
          self.advertise_refs = advertise_refs
@@ -531,20 +640,14 @@ class ReceivePackHandler(Handler):
      def capabilities(self):
          return ("report-status", "delete-refs")
  
-    def _apply_pack(self, refs, read):
+    def _apply_pack(self, refs):
          f, commit = self.repo.object_store.add_thin_pack()
          all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError)
          status = []
          unpack_error = None
          # TODO: more informative error messages than just the exception string
          try:
-            # TODO: decode the pack as we stream to avoid blocking reads beyond
-            # the end of data (when using HTTP/1.1 chunked encoding)
-            while True:
-                data = read(10240)
-                if not data:
-                    break
-                f.write(data)
+            PackStreamVerifier(self.proto, f).verify()
          except all_exceptions, e:
              unpack_error = str(e).replace('\n', '')
          try:
@@ -619,7 +722,7 @@ class ReceivePackHandler(Handler):
              ref = self.proto.read_pkt_line()
  
          # backend can now deal with this refs and read a pack using self.read
-        status = self.repo._apply_pack(client_refs, self.proto.read)
+        status = self._apply_pack(client_refs)
  
          # when we have read all the pack from the client, send a status report
          # if the client asked for it
@@ -637,7 +740,7 @@ class ReceivePackHandler(Handler):
  class TCPGitRequestHandler(SocketServer.StreamRequestHandler):
  
      def handle(self):
-        proto = Protocol(self.rfile.read, self.wfile.write)
+        proto = ReceivableProtocol(self.connection.recv, self.wfile.write)
          command, args = proto.read_cmd()
  
          # switch case to handle the specific git command
@@ -648,7 +751,7 @@ class TCPGitRequestHandler(SocketServer.StreamRequestHandler):
          else:
              return
  
-        h = cls(self.server.backend, args, self.rfile.read, self.wfile.write)
+        h = cls(self.server.backend, args, proto)
          h.handle()
  
  
diff --git a/dulwich/tests/compat/test_server.py b/dulwich/tests/compat/test_server.py

index 2f58a9c95c08c52f1fc56aca73d46c16848b78be..22d329dc01089b7d62bd65321ee213bd31443962 100644 (file)
--- a/dulwich/tests/compat/test_server.py
+++ b/dulwich/tests/compat/test_server.py
@@ -78,4 +78,5 @@ class GitServerTestCase(ServerTests, CompatTestCase):
          return port
  
      def test_push_to_dulwich(self):
+        # TODO(dborowitz): enable after merging thin pack fixes.
          raise TestSkipped('Skipping push test due to known deadlock bug.')
diff --git a/dulwich/tests/test_pack.py b/dulwich/tests/test_pack.py

index 1484ccb4663e6803daa9e4a09f7b0b5384ff9c86..dfdf23954c09733133ca1f0fe51b04d904d4666f 100644 (file)
--- a/dulwich/tests/test_pack.py
+++ b/dulwich/tests/test_pack.py
@@ -24,6 +24,7 @@
  from cStringIO import StringIO
  import os
  import unittest
+import zlib
  
  from dulwich.objects import (
      Tree,
@@ -277,11 +278,67 @@ class TestPackIndexWritingv2(unittest.TestCase, BaseTestPackIndexWriting):
          self._expected_version = 2
          self._write_fn = write_pack_index_v2
  
-TEST_COMP1 = """\x78\x9c\x9d\x8e\xc1\x0a\xc2\x30\x10\x44\xef\xf9\x8a\xbd\xa9\x08\x92\x86\xb4\x26\x20\xe2\xd9\x83\x78\xf2\xbe\x49\x37\xb5\xa5\x69\xca\x36\xf5\xfb\x4d\xfd\x04\x67\x6e\x33\xcc\xf0\x32\x13\x81\xc6\x16\x8d\xa9\xbd\xad\x6c\xe3\x8a\x03\x4a\x73\xd6\xda\xd5\xa6\x51\x2e\x58\x65\x6c\x13\xbc\x94\x4a\xcc\xc8\x34\x65\x78\xa4\x89\x04\xae\xf9\x9d\x18\xee\x34\x46\x62\x78\x11\x4f\x29\xf5\x03\x5c\x86\x5f\x70\x5b\x30\x3a\x3c\x25\xee\xae\x50\xa9\xf2\x60\xa4\xaa\x34\x1c\x65\x91\xf0\x29\xc6\x3e\x67\xfa\x6f\x2d\x9e\x9c\x3e\x7d\x4b\xc0\x34\x8f\xe8\x29\x6e\x48\xa1\xa0\xc4\x88\xf3\xfe\xb0\x5b\x20\x85\xb0\x50\x06\xe4\x6e\xdd\xca\xd3\x17\x26\xfa\x49\x23"""
  
+class ReadZlibTests(unittest.TestCase):
  
-class ZlibTests(unittest.TestCase):
+    decomp = (
+      'tree 4ada885c9196b6b6fa08744b5862bf92896fc002\n'
+      'parent None\n'
+      'author Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
+      'committer Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n'
+      '\n'
+      "Provide replacement for mmap()'s offset argument.")
+    comp = zlib.compress(decomp)
+    extra = 'nextobject'
+
+    def setUp(self):
+        self.read = StringIO(self.comp + self.extra).read
+
+    def test_decompress_size(self):
+        good_decomp_len = len(self.decomp)
+        self.assertRaises(ValueError, read_zlib_chunks, self.read, -1)
+        self.assertRaises(zlib.error, read_zlib_chunks, self.read,
+                          good_decomp_len - 1)
+        self.assertRaises(zlib.error, read_zlib_chunks, self.read,
+                          good_decomp_len + 1)
+
+    def test_decompress_truncated(self):
+        read = StringIO(self.comp[:10]).read
+        self.assertRaises(zlib.error, read_zlib_chunks, read, len(self.decomp))
+
+        read = StringIO(self.comp).read
+        self.assertRaises(zlib.error, read_zlib_chunks, read, len(self.decomp))
+
+    def test_decompress_empty(self):
+        comp = zlib.compress('')
+        read = StringIO(comp + self.extra).read
+        decomp, comp_len, unused_data = read_zlib_chunks(read, 0)
+        self.assertEqual('', ''.join(decomp))
+        self.assertEqual(len(comp), comp_len)
+        self.assertNotEquals('', unused_data)
+        self.assertEquals(self.extra, unused_data + read())
+
+    def _do_decompress_test(self, buffer_size):
+        decomp, comp_len, unused_data = read_zlib_chunks(
+          self.read, len(self.decomp), buffer_size=buffer_size)
+        self.assertEquals(self.decomp, ''.join(decomp))
+        self.assertEquals(len(self.comp), comp_len)
+        self.assertNotEquals('', unused_data)
+        self.assertEquals(self.extra, unused_data + self.read())
  
      def test_simple_decompress(self):
-        self.assertEquals((["tree 4ada885c9196b6b6fa08744b5862bf92896fc002\nparent None\nauthor Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\ncommitter Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n\nProvide replacement for mmap()'s offset argument."], 158, 'Z'), 
-        read_zlib_chunks(StringIO(TEST_COMP1).read, 229))
+        self._do_decompress_test(4096)
+
+    # These buffer sizes are not intended to be realistic, but rather simulate
+    # larger buffer sizes that may end at various places.
+    def test_decompress_buffer_size_1(self):
+        self._do_decompress_test(1)
+
+    def test_decompress_buffer_size_2(self):
+        self._do_decompress_test(2)
+
+    def test_decompress_buffer_size_3(self):
+        self._do_decompress_test(3)
+
+    def test_decompress_buffer_size_4(self):
+        self._do_decompress_test(4)
diff --git a/dulwich/tests/test_protocol.py b/dulwich/tests/test_protocol.py

index 008453eefd16e9c12489a9750225cb733309edec..7039456d88db00f356a843aac28b3d403c5a4bef 100644 (file)
--- a/dulwich/tests/test_protocol.py
+++ b/dulwich/tests/test_protocol.py
@@ -20,11 +20,12 @@
  """Tests for the smart protocol utility functions."""
  
  
-from cStringIO import StringIO
+from StringIO import StringIO
  from unittest import TestCase
  
  from dulwich.protocol import (
      Protocol,
+    ReceivableProtocol,
      extract_capabilities,
      extract_want_line_capabilities,
      ack_type,
@@ -33,12 +34,7 @@ from dulwich.protocol import (
      MULTI_ACK_DETAILED,
      )
  
-class ProtocolTests(TestCase):
-
-    def setUp(self):
-        self.rout = StringIO()
-        self.rin = StringIO()
-        self.proto = Protocol(self.rin.read, self.rout.write)
+class BaseProtocolTests(object):
  
      def test_write_pkt_line_none(self):
          self.proto.write_pkt_line(None)
@@ -82,6 +78,93 @@ class ProtocolTests(TestCase):
          self.assertRaises(AssertionError, self.proto.read_cmd)
  
  
+class ProtocolTests(BaseProtocolTests, TestCase):
+
+    def setUp(self):
+        TestCase.setUp(self)
+        self.rout = StringIO()
+        self.rin = StringIO()
+        self.proto = Protocol(self.rin.read, self.rout.write)
+
+
+class ReceivableStringIO(StringIO):
+    """StringIO with socket-like recv semantics for testing."""
+
+    def recv(self, size):
+        # fail fast if no bytes are available; in a real socket, this would
+        # block forever
+        if self.tell() == len(self.getvalue()):
+            raise AssertionError("Blocking read past end of socket")
+        if size == 1:
+            return self.read(1)
+        # calls shouldn't return quite as much as asked for
+        return self.read(size - 1)
+
+
+class ReceivableProtocolTests(BaseProtocolTests, TestCase):
+
+    def setUp(self):
+        TestCase.setUp(self)
+        self.rout = StringIO()
+        self.rin = ReceivableStringIO()
+        self.proto = ReceivableProtocol(self.rin.recv, self.rout.write)
+        self.proto._rbufsize = 8
+
+    def test_recv(self):
+        all_data = "1234567" * 10  # not a multiple of bufsize
+        self.rin.write(all_data)
+        self.rin.seek(0)
+        data = ""
+        # We ask for 8 bytes each time and actually read 7, so it should take
+        # exactly 10 iterations.
+        for _ in xrange(10):
+            data += self.proto.recv(10)
+        # any more reads would block
+        self.assertRaises(AssertionError, self.proto.recv, 10)
+        self.assertEquals(all_data, data)
+
+    def test_recv_read(self):
+        all_data = "1234567"  # recv exactly in one call
+        self.rin.write(all_data)
+        self.rin.seek(0)
+        self.assertEquals("1234", self.proto.recv(4))
+        self.assertEquals("567", self.proto.read(3))
+        self.assertRaises(AssertionError, self.proto.recv, 10)
+
+    def test_read_recv(self):
+        all_data = "12345678abcdefg"
+        self.rin.write(all_data)
+        self.rin.seek(0)
+        self.assertEquals("1234", self.proto.read(4))
+        self.assertEquals("5678abc", self.proto.recv(8))
+        self.assertEquals("defg", self.proto.read(4))
+        self.assertRaises(AssertionError, self.proto.recv, 10)
+
+    def test_mixed(self):
+        # arbitrary non-repeating string
+        all_data = ",".join(str(i) for i in xrange(100))
+        self.rin.write(all_data)
+        self.rin.seek(0)
+        data = ""
+
+        for i in xrange(1, 100):
+            data += self.proto.recv(i)
+            # if we get to the end, do a non-blocking read instead of blocking
+            if len(data) + i > len(all_data):
+                data += self.proto.recv(i)
+                # ReceivableStringIO leaves off the last byte unless we ask
+                # nicely
+                data += self.proto.recv(1)
+                break
+            else:
+                data += self.proto.read(i)
+        else:
+            # didn't break, something must have gone wrong
+            self.fail()
+
+        self.assertEquals(all_data, data)
+
+
  class CapabilitiesTestCase(TestCase):
  
      def test_plain(self):
diff --git a/dulwich/tests/test_server.py b/dulwich/tests/test_server.py

index e13ed2268e56f010b11c39a6ff7c19df3161f85d..e551f5b00dc9c96c4ba4586cb26937250557255d 100644 (file)
--- a/dulwich/tests/test_server.py
+++ b/dulwich/tests/test_server.py
@@ -79,7 +79,7 @@ class TestProto(object):
  class HandlerTestCase(TestCase):
  
      def setUp(self):
-        self._handler = Handler(Backend(), None, None)
+        self._handler = Handler(Backend(), None)
          self._handler.capabilities = lambda: ('cap1', 'cap2', 'cap3')
          self._handler.required_capabilities = lambda: ('cap2',)
  
diff --git a/dulwich/tests/test_web.py b/dulwich/tests/test_web.py

index 17bf499dff32f2ad8a9e33a61cd67927e35b7448..c9d7d7015a3ba3f3ff873a7c2f71c7c66a339f60 100644 (file)
--- a/dulwich/tests/test_web.py
+++ b/dulwich/tests/test_web.py
@@ -153,28 +153,16 @@ class DumbHandlersTestCase(WebTestCase):
  
  class SmartHandlersTestCase(WebTestCase):
  
-    class TestProtocol(object):
-        def __init__(self, handler):
-            self._handler = handler
-
-        def write_pkt_line(self, line):
-            if line is None:
-                self._handler.write('flush-pkt\n')
-            else:
-                self._handler.write('pkt-line: %s' % line)
-
      class _TestUploadPackHandler(object):
-        def __init__(self, backend, args, read, write, stateless_rpc=False,
+        def __init__(self, backend, args, proto, stateless_rpc=False,
                       advertise_refs=False):
              self.args = args
-            self.read = read
-            self.write = write
-            self.proto = SmartHandlersTestCase.TestProtocol(self)
+            self.proto = proto
              self.stateless_rpc = stateless_rpc
              self.advertise_refs = advertise_refs
  
          def handle(self):
-            self.write('handled input: %s' % self.read())
+            self.proto.write('handled input: %s' % self.proto.recv(1024))
  
      def _MakeHandler(self, *args, **kwargs):
          self._handler = self._TestUploadPackHandler(*args, **kwargs)
@@ -222,8 +210,8 @@ class SmartHandlersTestCase(WebTestCase):
          mat = re.search('.*', '/git-upload-pack')
          output = ''.join(get_info_refs(self._req, 'backend', mat,
                                         services=self.services()))
-        self.assertEquals(('pkt-line: # service=git-upload-pack\n'
-                           'flush-pkt\n'
+        self.assertEquals(('001e# service=git-upload-pack\n'
+                           '0000'
                             # input is ignored by the handler
                             'handled input: '), output)
          self.assertTrue(self._handler.advertise_refs)
diff --git a/dulwich/web.py b/dulwich/web.py

index 28af518c29c981081c16e55abab7bd56b9e2b64e..c075a992996947554a1b720fa6dcdb72c19711ec 100644 (file)
--- a/dulwich/web.py
+++ b/dulwich/web.py
@@ -26,6 +26,9 @@ try:
      from urlparse import parse_qs
  except ImportError:
      from dulwich.misc import parse_qs
+from dulwich.protocol import (
+    ReceivableProtocol,
+    )
  from dulwich.server import (
      ReceivePackHandler,
      UploadPackHandler,
@@ -138,9 +141,8 @@ def get_info_refs(req, backend, mat, services=None):
          req.nocache()
          req.respond(HTTP_OK, 'application/x-%s-advertisement' % service)
          output = StringIO()
-        dummy_input = StringIO()  # GET request, handler doesn't need to read
-        handler = handler_cls(backend, [url_prefix(mat)],
-                              dummy_input.read, output.write,
+        proto = ReceivableProtocol(StringIO().read, output.write)
+        handler = handler_cls(backend, [url_prefix(mat)], proto,
                                stateless_rpc=True, advertise_refs=True)
          handler.proto.write_pkt_line('# service=%s\n' % service)
          handler.proto.write_pkt_line(None)
@@ -216,8 +218,8 @@ def handle_service_request(req, backend, mat, services=None):
      # content-length
      if 'CONTENT_LENGTH' in req.environ:
          input = _LengthLimitedFile(input, int(req.environ['CONTENT_LENGTH']))
-    handler = handler_cls(backend, [url_prefix(mat)], input.read, output.write,
-                          stateless_rpc=True)
+    proto = ReceivableProtocol(input.read, output.write)
+    handler = handler_cls(backend, [url_prefix(mat)], proto, stateless_rpc=True)
      handler.handle()
      yield output.getvalue()
author	Jelmer Vernooij <jelmer@samba.org>
	Fri, 16 Apr 2010 19:49:17 +0000 (21:49 +0200)
committer	Jelmer Vernooij <jelmer@samba.org>
	Fri, 16 Apr 2010 19:49:17 +0000 (21:49 +0200)
dulwich/pack.py		patch \| blob \| history
dulwich/protocol.py		patch \| blob \| history
dulwich/repo.py		patch \| blob \| history
dulwich/server.py		patch \| blob \| history
dulwich/tests/compat/test_server.py		patch \| blob \| history
dulwich/tests/test_pack.py		patch \| blob \| history
dulwich/tests/test_protocol.py		patch \| blob \| history
dulwich/tests/test_server.py		patch \| blob \| history
dulwich/tests/test_web.py		patch \| blob \| history
dulwich/web.py		patch \| blob \| history