Remove authors of trivial contributions or who have given permission to relicense...
[jelmer/dulwich.git] / dulwich / client.py
index 33977747fef7ac7470c1c0d4c8c7af60bada1504..fb15f3fea9858ab9261a22b6a467f2a754338cb3 100644 (file)
@@ -1,5 +1,5 @@
 # client.py -- Implementation of the server side git protocols
-# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
+# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
 # Copyright (C) 2008 John Carr
 #
 # This program is free software; you can redistribute it and/or
@@ -38,12 +38,21 @@ Known capabilities that are not supported:
 
 __docformat__ = 'restructuredText'
 
-from cStringIO import StringIO
+from contextlib import closing
+from io import BytesIO, BufferedReader
+import dulwich
 import select
+import shlex
 import socket
 import subprocess
-import urllib2
-import urlparse
+import sys
+
+try:
+    import urllib2
+    import urlparse
+except ImportError:
+    import urllib.request as urllib2
+    import urllib.parse as urlparse
 
 from dulwich.errors import (
     GitProtocolError,
@@ -53,8 +62,22 @@ from dulwich.errors import (
     )
 from dulwich.protocol import (
     _RBUFSIZE,
+    CAPABILITY_DELETE_REFS,
+    CAPABILITY_MULTI_ACK,
+    CAPABILITY_MULTI_ACK_DETAILED,
+    CAPABILITY_OFS_DELTA,
+    CAPABILITY_REPORT_STATUS,
+    CAPABILITY_SIDE_BAND_64K,
+    CAPABILITY_THIN_PACK,
+    COMMAND_DONE,
+    COMMAND_HAVE,
+    COMMAND_WANT,
+    SIDE_BAND_CHANNEL_DATA,
+    SIDE_BAND_CHANNEL_PROGRESS,
+    SIDE_BAND_CHANNEL_FATAL,
     PktLineParser,
     Protocol,
+    ProtocolFile,
     TCP_GIT_PORT,
     ZERO_SHA,
     extract_capabilities,
@@ -62,25 +85,24 @@ from dulwich.protocol import (
 from dulwich.pack import (
     write_pack_objects,
     )
+from dulwich.refs import (
+    read_info_refs,
+    )
 
 
-# Python 2.6.6 included these in urlparse.uses_netloc upstream. Do
-# monkeypatching to enable similar behaviour in earlier Pythons:
-for scheme in ('git', 'git+ssh'):
-    if scheme not in urlparse.uses_netloc:
-        urlparse.uses_netloc.append(scheme)
-
 def _fileno_can_read(fileno):
     """Check if a file descriptor is readable."""
     return len(select.select([fileno], [], [], 0)[0]) > 0
 
-COMMON_CAPABILITIES = ['ofs-delta', 'side-band-64k']
-FETCH_CAPABILITIES = ['thin-pack', 'multi_ack', 'multi_ack_detailed'] + COMMON_CAPABILITIES
-SEND_CAPABILITIES = ['report-status'] + COMMON_CAPABILITIES
+COMMON_CAPABILITIES = [CAPABILITY_OFS_DELTA, CAPABILITY_SIDE_BAND_64K]
+FETCH_CAPABILITIES = ([CAPABILITY_THIN_PACK, CAPABILITY_MULTI_ACK,
+                       CAPABILITY_MULTI_ACK_DETAILED] +
+                      COMMON_CAPABILITIES)
+SEND_CAPABILITIES = [CAPABILITY_REPORT_STATUS] + COMMON_CAPABILITIES
 
 
 class ReportStatusParser(object):
-    """Handle status as reported by servers with the 'report-status' capability.
+    """Handle status as reported by servers with 'report-status' capability.
     """
 
     def __init__(self):
@@ -95,27 +117,27 @@ class ReportStatusParser(object):
         :raise SendPackError: Raised when the server could not unpack
         :raise UpdateRefsError: Raised when refs could not be updated
         """
-        if self._pack_status not in ('unpack ok', None):
+        if self._pack_status not in (b'unpack ok', None):
             raise SendPackError(self._pack_status)
         if not self._ref_status_ok:
             ref_status = {}
             ok = set()
             for status in self._ref_statuses:
-                if ' ' not in status:
+                if b' ' not in status:
                     # malformed response, move on to the next one
                     continue
-                status, ref = status.split(' ', 1)
+                status, ref = status.split(b' ', 1)
 
-                if status == 'ng':
-                    if ' ' in ref:
-                        ref, status = ref.split(' ', 1)
+                if status == b'ng':
+                    if b' ' in ref:
+                        ref, status = ref.split(b' ', 1)
                 else:
                     ok.add(ref)
                 ref_status[ref] = status
-            raise UpdateRefsError('%s failed to update' %
-                                  ', '.join([ref for ref in ref_status
-                                             if ref not in ok]),
-                                  ref_status=ref_status)
+            # TODO(jelmer): don't assume encoding of refs is ascii.
+            raise UpdateRefsError(', '.join([
+                ref.decode('ascii') for ref in ref_status if ref not in ok]) +
+                ' failed to update', ref_status=ref_status)
 
     def handle_packet(self, pkt):
         """Handle a packet.
@@ -133,7 +155,7 @@ class ReportStatusParser(object):
         else:
             ref_status = pkt.strip()
             self._ref_statuses.append(ref_status)
-            if not ref_status.startswith('ok '):
+            if not ref_status.startswith(b'ok '):
                 self._ref_status_ok = False
 
 
@@ -142,8 +164,8 @@ def read_pkt_refs(proto):
     refs = {}
     # Receive refs from server
     for pkt in proto.read_pkt_seq():
-        (sha, ref) = pkt.rstrip('\n').split(None, 1)
-        if sha == 'ERR':
+        (sha, ref) = pkt.rstrip(b'\n').split(None, 1)
+        if sha == b'ERR':
             raise GitProtocolError(ref)
         if server_capabilities is None:
             (ref, server_capabilities) = extract_capabilities(ref)
@@ -154,14 +176,6 @@ def read_pkt_refs(proto):
     return refs, set(server_capabilities)
 
 
-def read_info_refs(f):
-    ret = {}
-    for l in f.readlines():
-        (sha, name) = l.rstrip("\r\n").split("\t", 1)
-        ret[name] = sha
-    return ret
-
-
 # TODO(durin42): this doesn't correctly degrade if the server doesn't
 # support some capabilities. This should work properly with servers
 # that don't support multi_ack.
@@ -182,16 +196,18 @@ class GitClient(object):
         self._fetch_capabilities = set(FETCH_CAPABILITIES)
         self._send_capabilities = set(SEND_CAPABILITIES)
         if not thin_packs:
-            self._fetch_capabilities.remove('thin-pack')
+            self._fetch_capabilities.remove(CAPABILITY_THIN_PACK)
 
     def send_pack(self, path, determine_wants, generate_pack_contents,
-                  progress=None):
+                  progress=None, write_pack=write_pack_objects):
         """Upload a pack to a remote repository.
 
         :param path: Repository path
-        :param generate_pack_contents: Function that can return a sequence of the
-            shas of the objects to upload.
+        :param generate_pack_contents: Function that can return a sequence of
+            the shas of the objects to upload.
         :param progress: Optional progress function
+        :param write_pack: Function called with (file, iterable of objects) to
+            write the objects returned by generate_pack_contents to the server.
 
         :raises SendPackError: if server rejects the pack data
         :raises UpdateRefsError: if the server supports report-status
@@ -211,10 +227,12 @@ class GitClient(object):
         """
         if determine_wants is None:
             determine_wants = target.object_store.determine_wants_all
+
         f, commit, abort = target.object_store.add_pack()
         try:
-            result = self.fetch_pack(path, determine_wants,
-                    target.get_graph_walker(), f.write, progress)
+            result = self.fetch_pack(
+                path, determine_wants, target.get_graph_walker(), f.write,
+                progress)
         except:
             abort()
             raise
@@ -233,9 +251,16 @@ class GitClient(object):
         """
         raise NotImplementedError(self.fetch_pack)
 
+    def get_refs(self, path):
+        """Retrieve the current refs from a git smart server.
+
+        :param path: Path to the repo to fetch from.
+        """
+        raise NotImplementedError(self.get_refs)
+
     def _parse_status_report(self, proto):
         unpack = proto.read_pkt_line().strip()
-        if unpack != 'unpack ok':
+        if unpack != b'unpack ok':
             st = True
             # flush remaining error data
             while st is not None:
@@ -247,7 +272,7 @@ class GitClient(object):
         while ref_status:
             ref_status = ref_status.strip()
             statuses.append(ref_status)
-            if not ref_status.startswith('ok '):
+            if not ref_status.startswith(b'ok '):
                 errs = True
             ref_status = proto.read_pkt_line()
 
@@ -255,20 +280,20 @@ class GitClient(object):
             ref_status = {}
             ok = set()
             for status in statuses:
-                if ' ' not in status:
+                if b' ' not in status:
                     # malformed response, move on to the next one
                     continue
-                status, ref = status.split(' ', 1)
+                status, ref = status.split(b' ', 1)
 
-                if status == 'ng':
-                    if ' ' in ref:
-                        ref, status = ref.split(' ', 1)
+                if status == b'ng':
+                    if b' ' in ref:
+                        ref, status = ref.split(b' ', 1)
                 else:
                     ok.add(ref)
                 ref_status[ref] = status
-            raise UpdateRefsError('%s failed to update' %
-                                  ', '.join([ref for ref in ref_status
-                                             if ref not in ok]),
+            raise UpdateRefsError(', '.join([ref for ref in ref_status
+                                             if ref not in ok]) +
+                                             b' failed to update',
                                   ref_status=ref_status)
 
     def _read_side_band64k_data(self, proto, channel_callbacks):
@@ -281,7 +306,7 @@ class GitClient(object):
             handlers to use. None for a callback discards channel data.
         """
         for pkt in proto.read_pkt_seq():
-            channel = ord(pkt[0])
+            channel = ord(pkt[:1])
             pkt = pkt[1:]
             try:
                 cb = channel_callbacks[channel]
@@ -291,7 +316,8 @@ class GitClient(object):
                 if cb is not None:
                     cb(pkt)
 
-    def _handle_receive_pack_head(self, proto, capabilities, old_refs, new_refs):
+    def _handle_receive_pack_head(self, proto, capabilities, old_refs,
+                                  new_refs):
         """Handle the head of a 'git-receive-pack' request.
 
         :param proto: Protocol object to read from
@@ -304,18 +330,18 @@ class GitClient(object):
         have = [x for x in old_refs.values() if not x == ZERO_SHA]
         sent_capabilities = False
 
-        for refname in set(new_refs.keys() + old_refs.keys()):
+        all_refs = set(new_refs.keys()).union(set(old_refs.keys()))
+        for refname in all_refs:
             old_sha1 = old_refs.get(refname, ZERO_SHA)
             new_sha1 = new_refs.get(refname, ZERO_SHA)
 
             if old_sha1 != new_sha1:
                 if sent_capabilities:
-                    proto.write_pkt_line('%s %s %s' % (old_sha1, new_sha1,
-                                                            refname))
+                    proto.write_pkt_line(old_sha1 + b' ' + new_sha1 + b' ' + refname)
                 else:
                     proto.write_pkt_line(
-                      '%s %s %s\0%s' % (old_sha1, new_sha1, refname,
-                                        ' '.join(capabilities)))
+                        old_sha1 + b' ' + new_sha1 + b' ' + refname + b'\0' +
+                        b' '.join(capabilities))
                     sent_capabilities = True
             if new_sha1 not in have and new_sha1 != ZERO_SHA:
                 want.append(new_sha1)
@@ -329,24 +355,20 @@ class GitClient(object):
         :param capabilities: List of negotiated capabilities
         :param progress: Optional progress reporting function
         """
-        if "side-band-64k" in capabilities:
+        if b"side-band-64k" in capabilities:
             if progress is None:
                 progress = lambda x: None
-            channel_callbacks = { 2: progress }
-            if 'report-status' in capabilities:
+            channel_callbacks = {2: progress}
+            if CAPABILITY_REPORT_STATUS in capabilities:
                 channel_callbacks[1] = PktLineParser(
                     self._report_status_parser.handle_packet).parse
             self._read_side_band64k_data(proto, channel_callbacks)
         else:
-            if 'report-status' in capabilities:
+            if CAPABILITY_REPORT_STATUS in capabilities:
                 for pkt in proto.read_pkt_seq():
                     self._report_status_parser.handle_packet(pkt)
         if self._report_status_parser is not None:
             self._report_status_parser.check()
-        # wait for EOF before returning
-        data = proto.read()
-        if data:
-            raise SendPackError('Unexpected response %r' % data)
 
     def _handle_upload_pack_head(self, proto, capabilities, graph_walker,
                                  wants, can_read):
@@ -359,30 +381,29 @@ class GitClient(object):
         :param can_read: function that returns a boolean that indicates
             whether there is extra graph data to read on proto
         """
-        assert isinstance(wants, list) and type(wants[0]) == str
-        proto.write_pkt_line('want %s %s\n' % (
-            wants[0], ' '.join(capabilities)))
+        assert isinstance(wants, list) and isinstance(wants[0], bytes)
+        proto.write_pkt_line(COMMAND_WANT + b' ' + wants[0] + b' ' + b' '.join(capabilities) + b'\n')
         for want in wants[1:]:
-            proto.write_pkt_line('want %s\n' % want)
+            proto.write_pkt_line(COMMAND_WANT + b' ' + want + b'\n')
         proto.write_pkt_line(None)
-        have = graph_walker.next()
+        have = next(graph_walker)
         while have:
-            proto.write_pkt_line('have %s\n' % have)
+            proto.write_pkt_line(COMMAND_HAVE + b' ' + have + b'\n')
             if can_read():
                 pkt = proto.read_pkt_line()
-                parts = pkt.rstrip('\n').split(' ')
-                if parts[0] == 'ACK':
+                parts = pkt.rstrip(b'\n').split(b' ')
+                if parts[0] == b'ACK':
                     graph_walker.ack(parts[1])
-                    if parts[2] in ('continue', 'common'):
+                    if parts[2] in (b'continue', b'common'):
                         pass
-                    elif parts[2] == 'ready':
+                    elif parts[2] == b'ready':
                         break
                     else:
                         raise AssertionError(
                             "%s not in ('continue', 'ready', 'common)" %
                             parts[2])
-            have = graph_walker.next()
-        proto.write_pkt_line('done\n')
+            have = next(graph_walker)
+        proto.write_pkt_line(COMMAND_DONE + b'\n')
 
     def _handle_upload_pack_tail(self, proto, capabilities, graph_walker,
                                  pack_data, progress=None, rbufsize=_RBUFSIZE):
@@ -397,26 +418,25 @@ class GitClient(object):
         """
         pkt = proto.read_pkt_line()
         while pkt:
-            parts = pkt.rstrip('\n').split(' ')
-            if parts[0] == 'ACK':
-                graph_walker.ack(pkt.split(' ')[1])
+            parts = pkt.rstrip(b'\n').split(b' ')
+            if parts[0] == b'ACK':
+                graph_walker.ack(parts[1])
             if len(parts) < 3 or parts[2] not in (
-                    'ready', 'continue', 'common'):
+                    b'ready', b'continue', b'common'):
                 break
             pkt = proto.read_pkt_line()
-        if "side-band-64k" in capabilities:
+        if CAPABILITY_SIDE_BAND_64K in capabilities:
             if progress is None:
                 # Just ignore progress data
                 progress = lambda x: None
-            self._read_side_band64k_data(proto, {1: pack_data, 2: progress})
-            # wait for EOF before returning
-            data = proto.read()
-            if data:
-                raise Exception('Unexpected response %r' % data)
+            self._read_side_band64k_data(proto, {
+                SIDE_BAND_CHANNEL_DATA: pack_data,
+                SIDE_BAND_CHANNEL_PROGRESS: progress}
+            )
         else:
             while True:
                 data = proto.read(rbufsize)
-                if data == "":
+                if data == b"":
                     break
                 pack_data(data)
 
@@ -439,80 +459,73 @@ class TraditionalGitClient(GitClient):
         raise NotImplementedError()
 
     def send_pack(self, path, determine_wants, generate_pack_contents,
-                  progress=None):
+                  progress=None, write_pack=write_pack_objects):
         """Upload a pack to a remote repository.
 
         :param path: Repository path
-        :param generate_pack_contents: Function that can return a sequence of the
-            shas of the objects to upload.
+        :param generate_pack_contents: Function that can return a sequence of
+            the shas of the objects to upload.
         :param progress: Optional callback called with progress updates
+        :param write_pack: Function called with (file, iterable of objects) to
+            write the objects returned by generate_pack_contents to the server.
 
         :raises SendPackError: if server rejects the pack data
         :raises UpdateRefsError: if the server supports report-status
                                  and rejects ref updates
         """
-        proto, unused_can_read = self._connect('receive-pack', path)
-        old_refs, server_capabilities = read_pkt_refs(proto)
-        negotiated_capabilities = self._send_capabilities & server_capabilities
-
-        if 'report-status' in negotiated_capabilities:
-            self._report_status_parser = ReportStatusParser()
-        report_status_parser = self._report_status_parser
-
-        try:
-            new_refs = orig_new_refs = determine_wants(dict(old_refs))
-        except:
-            proto.write_pkt_line(None)
-            raise
-
-        if not 'delete-refs' in server_capabilities:
-            # Server does not support deletions. Fail later.
-            def remove_del(pair):
-                if pair[1] == ZERO_SHA:
-                    if 'report-status' in negotiated_capabilities:
-                        report_status_parser._ref_statuses.append(
-                            'ng %s remote does not support deleting refs'
-                            % pair[1])
-                        report_status_parser._ref_status_ok = False
-                    return False
-                else:
-                    return True
-
-            new_refs = dict(
-                filter(
-                    remove_del,
-                    [(ref, sha) for ref, sha in new_refs.iteritems()]))
-
-        if new_refs is None:
-            proto.write_pkt_line(None)
-            return old_refs
+        proto, unused_can_read = self._connect(b'receive-pack', path)
+        with proto:
+            old_refs, server_capabilities = read_pkt_refs(proto)
+            negotiated_capabilities = self._send_capabilities & server_capabilities
 
-        if len(new_refs) == 0 and len(orig_new_refs):
-            # NOOP - Original new refs filtered out by policy
-            proto.write_pkt_line(None)
-            if self._report_status_parser is not None:
-                self._report_status_parser.check()
-            return old_refs
+            if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
+                self._report_status_parser = ReportStatusParser()
+            report_status_parser = self._report_status_parser
 
-        (have, want) = self._handle_receive_pack_head(proto,
-            negotiated_capabilities, old_refs, new_refs)
-        if not want and old_refs == new_refs:
+            try:
+                new_refs = orig_new_refs = determine_wants(dict(old_refs))
+            except:
+                proto.write_pkt_line(None)
+                raise
+
+            if not CAPABILITY_DELETE_REFS in server_capabilities:
+                # Server does not support deletions. Fail later.
+                new_refs = dict(orig_new_refs)
+                for ref, sha in orig_new_refs.items():
+                    if sha == ZERO_SHA:
+                        if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
+                            report_status_parser._ref_statuses.append(
+                                b'ng ' + sha + b' remote does not support deleting refs')
+                            report_status_parser._ref_status_ok = False
+                        del new_refs[ref]
+
+            if new_refs is None:
+                proto.write_pkt_line(None)
+                return old_refs
+
+            if len(new_refs) == 0 and len(orig_new_refs):
+                # NOOP - Original new refs filtered out by policy
+                proto.write_pkt_line(None)
+                if report_status_parser is not None:
+                    report_status_parser.check()
+                return old_refs
+
+            (have, want) = self._handle_receive_pack_head(
+                proto, negotiated_capabilities, old_refs, new_refs)
+            if not want and old_refs == new_refs:
+                return new_refs
+            objects = generate_pack_contents(have, want)
+
+            dowrite = len(objects) > 0
+            dowrite = dowrite or any(old_refs.get(ref) != sha
+                                     for (ref, sha) in new_refs.items()
+                                     if sha != ZERO_SHA)
+            if dowrite:
+                write_pack(proto.write_file(), objects)
+
+            self._handle_receive_pack_tail(
+                proto, negotiated_capabilities, progress)
             return new_refs
-        objects = generate_pack_contents(have, want)
-        if len(objects) > 0:
-            entries, sha = write_pack_objects(proto.write_file(), objects)
-        elif len(set(new_refs.values()) - set([ZERO_SHA])) > 0:
-            # Check for valid create/update refs
-            filtered_new_refs = \
-                dict([(ref, sha) for ref, sha in new_refs.iteritems()
-                     if sha != ZERO_SHA])
-            if len(set(filtered_new_refs.iteritems()) -
-                    set(old_refs.iteritems())) > 0:
-                entries, sha = write_pack_objects(proto.write_file(), objects)
-
-        self._handle_receive_pack_tail(proto, negotiated_capabilities,
-            progress)
-        return new_refs
 
     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
                    progress=None):
@@ -523,47 +536,62 @@ class TraditionalGitClient(GitClient):
         :param pack_data: Callback called for each bit of data in the pack
         :param progress: Callback for progress reports (strings)
         """
-        proto, can_read = self._connect('upload-pack', path)
-        refs, server_capabilities = read_pkt_refs(proto)
-        negotiated_capabilities = self._fetch_capabilities & server_capabilities
+        proto, can_read = self._connect(b'upload-pack', path)
+        with proto:
+            refs, server_capabilities = read_pkt_refs(proto)
+            negotiated_capabilities = (
+                self._fetch_capabilities & server_capabilities)
 
-        if refs is None:
-            proto.write_pkt_line(None)
+            if refs is None:
+                proto.write_pkt_line(None)
+                return refs
+
+            try:
+                wants = determine_wants(refs)
+            except:
+                proto.write_pkt_line(None)
+                raise
+            if wants is not None:
+                wants = [cid for cid in wants if cid != ZERO_SHA]
+            if not wants:
+                proto.write_pkt_line(None)
+                return refs
+            self._handle_upload_pack_head(
+                proto, negotiated_capabilities, graph_walker, wants, can_read)
+            self._handle_upload_pack_tail(
+                proto, negotiated_capabilities, graph_walker, pack_data, progress)
             return refs
 
-        try:
-            wants = determine_wants(refs)
-        except:
-            proto.write_pkt_line(None)
-            raise
-        if wants is not None:
-            wants = [cid for cid in wants if cid != ZERO_SHA]
-        if not wants:
-            proto.write_pkt_line(None)
+    def get_refs(self, path):
+        """Retrieve the current refs from a git smart server."""
+        # stock `git ls-remote` uses upload-pack
+        proto, _ = self._connect(b'upload-pack', path)
+        with proto:
+            refs, _ = read_pkt_refs(proto)
             return refs
-        self._handle_upload_pack_head(proto, negotiated_capabilities,
-            graph_walker, wants, can_read)
-        self._handle_upload_pack_tail(proto, negotiated_capabilities,
-            graph_walker, pack_data, progress)
-        return refs
 
-    def archive(self, path, committish, write_data, progress=None):
-        proto, can_read = self._connect('upload-archive', path)
-        proto.write_pkt_line("argument %s" % committish)
-        proto.write_pkt_line(None)
-        pkt = proto.read_pkt_line()
-        if pkt == "NACK\n":
-            return
-        elif pkt == "ACK\n":
-            pass
-        elif pkt.startswith("ERR "):
-            raise GitProtocolError(pkt[4:].rstrip("\n"))
-        else:
-            raise AssertionError("invalid response %r" % pkt)
-        ret = proto.read_pkt_line()
-        if ret is not None:
-            raise AssertionError("expected pkt tail")
-        self._read_side_band64k_data(proto, {1: write_data, 2: progress})
+    def archive(self, path, committish, write_data, progress=None,
+                write_error=None):
+        proto, can_read = self._connect(b'upload-archive', path)
+        with proto:
+            proto.write_pkt_line(b"argument " + committish)
+            proto.write_pkt_line(None)
+            pkt = proto.read_pkt_line()
+            if pkt == b"NACK\n":
+                return
+            elif pkt == b"ACK\n":
+                pass
+            elif pkt.startswith(b"ERR "):
+                raise GitProtocolError(pkt[4:].rstrip(b"\n"))
+            else:
+                raise AssertionError("invalid response %r" % pkt)
+            ret = proto.read_pkt_line()
+            if ret is not None:
+                raise AssertionError("expected pkt tail")
+            self._read_side_band64k_data(proto, {
+                SIDE_BAND_CHANNEL_DATA: write_data,
+                SIDE_BAND_CHANNEL_PROGRESS: progress,
+                SIDE_BAND_CHANNEL_FATAL: write_error})
 
 
 class TCPGitClient(TraditionalGitClient):
@@ -577,8 +605,12 @@ class TCPGitClient(TraditionalGitClient):
         TraditionalGitClient.__init__(self, *args, **kwargs)
 
     def _connect(self, cmd, path):
-        sockaddrs = socket.getaddrinfo(self._host, self._port,
-            socket.AF_UNSPEC, socket.SOCK_STREAM)
+        if type(cmd) is not bytes:
+            raise TypeError(path)
+        if type(path) is not bytes:
+            raise TypeError(path)
+        sockaddrs = socket.getaddrinfo(
+            self._host, self._port, socket.AF_UNSPEC, socket.SOCK_STREAM)
         s = None
         err = socket.error("no address found for %s" % self._host)
         for (family, socktype, proto, canonname, sockaddr) in sockaddrs:
@@ -587,7 +619,7 @@ class TCPGitClient(TraditionalGitClient):
             try:
                 s.connect(sockaddr)
                 break
-            except socket.error, err:
+            except socket.error as err:
                 if s is not None:
                     s.close()
                 s = None
@@ -597,11 +629,17 @@ class TCPGitClient(TraditionalGitClient):
         rfile = s.makefile('rb', -1)
         # 0 means unbuffered
         wfile = s.makefile('wb', 0)
-        proto = Protocol(rfile.read, wfile.write,
+        def close():
+            rfile.close()
+            wfile.close()
+            s.close()
+
+        proto = Protocol(rfile.read, wfile.write, close,
                          report_activity=self._report_activity)
-        if path.startswith("/~"):
+        if path.startswith(b"/~"):
             path = path[1:]
-        proto.send_cmd('git-%s' % cmd, path, 'host=%s' % self._host)
+        # TODO(jelmer): Alternative to ascii?
+        proto.send_cmd(b'git-' + cmd, path, b'host=' + self._host.encode('ascii'))
         return proto, lambda: _fileno_can_read(s)
 
 
@@ -610,7 +648,10 @@ class SubprocessWrapper(object):
 
     def __init__(self, proc):
         self.proc = proc
-        self.read = proc.stdout.read
+        if sys.version_info[0] == 2:
+            self.read = proc.stdout.read
+        else:
+            self.read = BufferedReader(proc.stdout).read
         self.write = proc.stdin.write
 
     def can_read(self):
@@ -618,16 +659,34 @@ class SubprocessWrapper(object):
             from msvcrt import get_osfhandle
             from win32pipe import PeekNamedPipe
             handle = get_osfhandle(self.proc.stdout.fileno())
-            return PeekNamedPipe(handle, 0)[2] != 0
+            data, total_bytes_avail, msg_bytes_left = PeekNamedPipe(handle, 0)
+            return total_bytes_avail != 0
         else:
             return _fileno_can_read(self.proc.stdout.fileno())
 
     def close(self):
         self.proc.stdin.close()
         self.proc.stdout.close()
+        if self.proc.stderr:
+            self.proc.stderr.close()
         self.proc.wait()
 
 
+def find_git_command():
+    """Find command to run for system Git (usually C Git).
+    """
+    if sys.platform == 'win32': # support .exe, .bat and .cmd
+        try: # to avoid overhead
+            import win32api
+        except ImportError: # run through cmd.exe with some overhead
+            return ['cmd', '/c', 'git']
+        else:
+            status, git = win32api.FindExecutable('git')
+            return [git]
+    else:
+        return ['git']
+
+
 class SubprocessGitClient(TraditionalGitClient):
     """Git client that talks to a server using a subprocess."""
 
@@ -639,17 +698,125 @@ class SubprocessGitClient(TraditionalGitClient):
             del kwargs['stderr']
         TraditionalGitClient.__init__(self, *args, **kwargs)
 
+    git_command = None
+
     def _connect(self, service, path):
+        if type(service) is not bytes:
+            raise TypeError(path)
+        if type(path) is not bytes:
+            raise TypeError(path)
         import subprocess
-        argv = ['git', service, path]
+        if self.git_command is None:
+            git_command = find_git_command()
+        argv = git_command + [service.decode('ascii'), path]
         p = SubprocessWrapper(
             subprocess.Popen(argv, bufsize=0, stdin=subprocess.PIPE,
                              stdout=subprocess.PIPE,
                              stderr=self._stderr))
-        return Protocol(p.read, p.write,
+        return Protocol(p.read, p.write, p.close,
                         report_activity=self._report_activity), p.can_read
 
 
+class LocalGitClient(GitClient):
+    """Git Client that just uses a local Repo."""
+
+    def __init__(self, thin_packs=True, report_activity=None):
+        """Create a new LocalGitClient instance.
+
+        :param path: Path to the local repository
+        :param thin_packs: Whether or not thin packs should be retrieved
+        :param report_activity: Optional callback for reporting transport
+            activity.
+        """
+        self._report_activity = report_activity
+        # Ignore the thin_packs argument
+
+    def send_pack(self, path, determine_wants, generate_pack_contents,
+                  progress=None, write_pack=write_pack_objects):
+        """Upload a pack to a remote repository.
+
+        :param path: Repository path
+        :param generate_pack_contents: Function that can return a sequence of
+            the shas of the objects to upload.
+        :param progress: Optional progress function
+        :param write_pack: Function called with (file, iterable of objects) to
+            write the objects returned by generate_pack_contents to the server.
+
+        :raises SendPackError: if server rejects the pack data
+        :raises UpdateRefsError: if the server supports report-status
+                                 and rejects ref updates
+        """
+        from dulwich.repo import Repo
+
+        with closing(Repo(path)) as target:
+            old_refs = target.get_refs()
+            new_refs = determine_wants(dict(old_refs))
+
+            have = [sha1 for sha1 in old_refs.values() if sha1 != ZERO_SHA]
+            want = []
+            all_refs = set(new_refs.keys()).union(set(old_refs.keys()))
+            for refname in all_refs:
+                old_sha1 = old_refs.get(refname, ZERO_SHA)
+                new_sha1 = new_refs.get(refname, ZERO_SHA)
+                if new_sha1 not in have and new_sha1 != ZERO_SHA:
+                    want.append(new_sha1)
+
+            if not want and old_refs == new_refs:
+                return new_refs
+
+            target.object_store.add_objects(generate_pack_contents(have, want))
+
+            for name, sha in new_refs.items():
+                target.refs[name] = sha
+
+        return new_refs
+
+    def fetch(self, path, target, determine_wants=None, progress=None):
+        """Fetch into a target repository.
+
+        :param path: Path to fetch from
+        :param target: Target repository to fetch into
+        :param determine_wants: Optional function to determine what refs
+            to fetch
+        :param progress: Optional progress function
+        :return: remote refs as dictionary
+        """
+        from dulwich.repo import Repo
+        with closing(Repo(path)) as r:
+            return r.fetch(target, determine_wants=determine_wants,
+                           progress=progress)
+
+    def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
+                   progress=None):
+        """Retrieve a pack from a git smart server.
+
+        :param determine_wants: Callback that returns list of commits to fetch
+        :param graph_walker: Object with next() and ack().
+        :param pack_data: Callback called for each bit of data in the pack
+        :param progress: Callback for progress reports (strings)
+        """
+        from dulwich.repo import Repo
+        with closing(Repo(path)) as r:
+            objects_iter = r.fetch_objects(determine_wants, graph_walker, progress)
+
+            # Did the process short-circuit (e.g. in a stateless RPC call)? Note
+            # that the client still expects a 0-object pack in most cases.
+            if objects_iter is None:
+                return
+            write_pack_objects(ProtocolFile(None, pack_data), objects_iter)
+
+    def get_refs(self, path):
+        """Retrieve the current refs from a git smart server."""
+        from dulwich.repo import Repo
+
+        with closing(Repo(path)) as target:
+            return target.get_refs()
+
+
+# What Git client to use for local access
+default_local_git_client_cls = LocalGitClient
+
+
 class SSHVendor(object):
     """A client side SSH implementation."""
 
@@ -667,7 +834,7 @@ class SSHVendor(object):
         with the remote command.
 
         :param host: Host name
-        :param command: Command to run
+        :param command: Command to run (as argv array)
         :param username: Optional ame of user to log in as
         :param port: Optional SSH port to use
         """
@@ -678,6 +845,10 @@ class SubprocessSSHVendor(SSHVendor):
     """SSH vendor that shells out to the local 'ssh' command."""
 
     def run_command(self, host, command, username=None, port=None):
+        if (type(command) is not list or
+            not all([isinstance(b, bytes) for b in command])):
+            raise TypeError(command)
+
         import subprocess
         #FIXME: This has no way to deal with passwords..
         args = ['ssh', '-x']
@@ -715,7 +886,8 @@ else:
 
             # Start
             if self.should_monitor:
-                self.monitor_thread = threading.Thread(target=self.monitor_stderr)
+                self.monitor_thread = threading.Thread(
+                    target=self.monitor_stderr)
                 self.monitor_thread.start()
 
         def monitor_stderr(self):
@@ -772,14 +944,16 @@ else:
             self.channel.close()
             self.stop_monitoring()
 
-        def __del__(self):
-            self.close()
-
     class ParamikoSSHVendor(object):
 
-        def run_command(self, host, command, username=None, port=None,
-                progress_stderr=None, **kwargs):
+        def __init__(self):
+            self.ssh_kwargs = {}
 
+        def run_command(self, host, command, username=None, port=None,
+                        progress_stderr=None):
+            if (type(command) is not list or
+                not all([isinstance(b, bytes) for b in command])):
+                raise TypeError(command)
             # Paramiko needs an explicit port. None is not valid
             if port is None:
                 port = 22
@@ -788,16 +962,17 @@ else:
 
             policy = paramiko.client.MissingHostKeyPolicy()
             client.set_missing_host_key_policy(policy)
-            client.connect(host, username=username, port=port, **kwargs)
+            client.connect(host, username=username, port=port,
+                           **self.ssh_kwargs)
 
             # Open SSH session
             channel = client.get_transport().open_session()
 
             # Run commands
-            apply(channel.exec_command, command)
+            channel.exec_command(subprocess.list2cmdline(command))
 
-            return ParamikoWrapper(client, channel,
-                    progress_stderr=progress_stderr)
+            return ParamikoWrapper(
+                client, channel, progress_stderr=progress_stderr)
 
 
 # Can be overridden by users
@@ -814,32 +989,74 @@ class SSHGitClient(TraditionalGitClient):
         self.alternative_paths = {}
 
     def _get_cmd_path(self, cmd):
-        return self.alternative_paths.get(cmd, 'git-%s' % cmd)
+        cmd = self.alternative_paths.get(cmd, b'git-' + cmd)
+        assert isinstance(cmd, bytes)
+        if sys.version_info[:2] <= (2, 6):
+            return shlex.split(cmd)
+        else:
+            # TODO(jelmer): Don't decode/encode here
+            return [x.encode('ascii') for x in shlex.split(cmd.decode('ascii'))]
 
     def _connect(self, cmd, path):
-        if path.startswith("/~"):
+        if type(cmd) is not bytes:
+            raise TypeError(path)
+        if type(path) is not bytes:
+            raise TypeError(path)
+        if path.startswith(b"/~"):
             path = path[1:]
+        argv = self._get_cmd_path(cmd) + [path]
         con = get_ssh_vendor().run_command(
-            self.host, ["%s '%s'" % (self._get_cmd_path(cmd), path)],
-            port=self.port, username=self.username)
-        return (Protocol(con.read, con.write, report_activity=self._report_activity),
+            self.host, argv, port=self.port, username=self.username)
+        return (Protocol(con.read, con.write, con.close,
+                         report_activity=self._report_activity),
                 con.can_read)
 
 
+def default_user_agent_string():
+    return "dulwich/%s" % ".".join([str(x) for x in dulwich.__version__])
+
+
+def default_urllib2_opener(config):
+    if config is not None:
+        proxy_server = config.get("http", "proxy")
+    else:
+        proxy_server = None
+    handlers = []
+    if proxy_server is not None:
+        handlers.append(urllib2.ProxyHandler({"http": proxy_server}))
+    opener = urllib2.build_opener(*handlers)
+    if config is not None:
+        user_agent = config.get("http", "useragent")
+    else:
+        user_agent = None
+    if user_agent is None:
+        user_agent = default_user_agent_string()
+    opener.addheaders = [('User-agent', user_agent)]
+    return opener
+
+
 class HttpGitClient(GitClient):
 
-    def __init__(self, base_url, dumb=None, *args, **kwargs):
+    def __init__(self, base_url, dumb=None, opener=None, config=None, *args,
+                 **kwargs):
         self.base_url = base_url.rstrip("/") + "/"
         self.dumb = dumb
+        if opener is None:
+            self.opener = default_urllib2_opener(config)
+        else:
+            self.opener = opener
         GitClient.__init__(self, *args, **kwargs)
 
+    def __repr__(self):
+        return "%s(%r, dumb=%r)" % (type(self).__name__, self.base_url, self.dumb)
+
     def _get_url(self, path):
         return urlparse.urljoin(self.base_url, path).rstrip("/") + "/"
 
     def _http_request(self, url, headers={}, data=None):
         req = urllib2.Request(url, headers=headers, data=data)
         try:
-            resp = self._perform(req)
+            resp = self.opener.open(req)
         except urllib2.HTTPError as e:
             if e.code == 404:
                 raise NotGitRepository()
@@ -847,35 +1064,28 @@ class HttpGitClient(GitClient):
                 raise GitProtocolError("unexpected http response %d" % e.code)
         return resp
 
-    def _perform(self, req):
-        """Perform a HTTP request.
-
-        This is provided so subclasses can provide their own version.
-
-        :param req: urllib2.Request instance
-        :return: matching response
-        """
-        return urllib2.urlopen(req)
-
     def _discover_references(self, service, url):
         assert url[-1] == "/"
         url = urlparse.urljoin(url, "info/refs")
         headers = {}
-        if self.dumb != False:
+        if self.dumb is not False:
             url += "?service=%s" % service
             headers["Content-Type"] = "application/x-%s-request" % service
         resp = self._http_request(url, headers)
-        self.dumb = (not resp.info().gettype().startswith("application/x-git-"))
-        if not self.dumb:
-            proto = Protocol(resp.read, None)
-            # The first line should mention the service
-            pkts = list(proto.read_pkt_seq())
-            if pkts != [('# service=%s\n' % service)]:
-                raise GitProtocolError(
-                    "unexpected first line %r from smart server" % pkts)
-            return read_pkt_refs(proto)
-        else:
-            return read_info_refs(resp), set()
+        try:
+            self.dumb = (not resp.info().gettype().startswith("application/x-git-"))
+            if not self.dumb:
+                proto = Protocol(resp.read, None)
+                # The first line should mention the service
+                pkts = list(proto.read_pkt_seq())
+                if pkts != [('# service=%s\n' % service)]:
+                    raise GitProtocolError(
+                        "unexpected first line %r from smart server" % pkts)
+                return read_pkt_refs(proto)
+            else:
+                return read_info_refs(resp), set()
+        finally:
+            resp.close()
 
     def _smart_request(self, service, url, data):
         assert url[-1] == "/"
@@ -888,13 +1098,15 @@ class HttpGitClient(GitClient):
         return resp
 
     def send_pack(self, path, determine_wants, generate_pack_contents,
-                  progress=None):
+                  progress=None, write_pack=write_pack_objects):
         """Upload a pack to a remote repository.
 
         :param path: Repository path
-        :param generate_pack_contents: Function that can return a sequence of the
-            shas of the objects to upload.
+        :param generate_pack_contents: Function that can return a sequence of
+            the shas of the objects to upload.
         :param progress: Optional progress function
+        :param write_pack: Function called with (file, iterable of objects) to
+            write the objects returned by generate_pack_contents to the server.
 
         :raises SendPackError: if server rejects the pack data
         :raises UpdateRefsError: if the server supports report-status
@@ -902,10 +1114,10 @@ class HttpGitClient(GitClient):
         """
         url = self._get_url(path)
         old_refs, server_capabilities = self._discover_references(
-            "git-receive-pack", url)
+            b"git-receive-pack", url)
         negotiated_capabilities = self._send_capabilities & server_capabilities
 
-        if 'report-status' in negotiated_capabilities:
+        if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
             self._report_status_parser = ReportStatusParser()
 
         new_refs = determine_wants(dict(old_refs))
@@ -913,7 +1125,7 @@ class HttpGitClient(GitClient):
             return old_refs
         if self.dumb:
             raise NotImplementedError(self.fetch_pack)
-        req_data = StringIO()
+        req_data = BytesIO()
         req_proto = Protocol(None, req_data.write)
         (have, want) = self._handle_receive_pack_head(
             req_proto, negotiated_capabilities, old_refs, new_refs)
@@ -921,13 +1133,17 @@ class HttpGitClient(GitClient):
             return new_refs
         objects = generate_pack_contents(have, want)
         if len(objects) > 0:
-            entries, sha = write_pack_objects(req_proto.write_file(), objects)
-        resp = self._smart_request("git-receive-pack", url,
-            data=req_data.getvalue())
-        resp_proto = Protocol(resp.read, None)
-        self._handle_receive_pack_tail(resp_proto, negotiated_capabilities,
-            progress)
-        return new_refs
+            write_pack(req_proto.write_file(), objects)
+        resp = self._smart_request(b"git-receive-pack", url,
+                                   data=req_data.getvalue())
+        try:
+            resp_proto = Protocol(resp.read, None)
+            self._handle_receive_pack_tail(resp_proto, negotiated_capabilities,
+                progress)
+            return new_refs
+        finally:
+            resp.close()
+
 
     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
                    progress=None):
@@ -941,7 +1157,7 @@ class HttpGitClient(GitClient):
         """
         url = self._get_url(path)
         refs, server_capabilities = self._discover_references(
-            "git-upload-pack", url)
+            b"git-upload-pack", url)
         negotiated_capabilities = self._fetch_capabilities & server_capabilities
         wants = determine_wants(refs)
         if wants is not None:
@@ -950,29 +1166,40 @@ class HttpGitClient(GitClient):
             return refs
         if self.dumb:
             raise NotImplementedError(self.send_pack)
-        req_data = StringIO()
+        req_data = BytesIO()
         req_proto = Protocol(None, req_data.write)
-        self._handle_upload_pack_head(req_proto,
-            negotiated_capabilities, graph_walker, wants,
+        self._handle_upload_pack_head(
+            req_proto, negotiated_capabilities, graph_walker, wants,
             lambda: False)
-        resp = self._smart_request("git-upload-pack", url,
-            data=req_data.getvalue())
-        resp_proto = Protocol(resp.read, None)
-        self._handle_upload_pack_tail(resp_proto, negotiated_capabilities,
-            graph_walker, pack_data, progress)
+        resp = self._smart_request(
+            b"git-upload-pack", url, data=req_data.getvalue())
+        try:
+            resp_proto = Protocol(resp.read, None)
+            self._handle_upload_pack_tail(resp_proto, negotiated_capabilities,
+                graph_walker, pack_data, progress)
+            return refs
+        finally:
+            resp.close()
+
+    def get_refs(self, path):
+        """Retrieve the current refs from a git smart server."""
+        url = self._get_url(path)
+        refs, _ = self._discover_references(
+            b"git-upload-pack", url)
         return refs
 
 
-def get_transport_and_path(uri, **kwargs):
-    """Obtain a git client from a URI or path.
+def get_transport_and_path_from_url(url, config=None, **kwargs):
+    """Obtain a git client from a URL.
 
-    :param uri: URI or path
+    :param url: URL to open
+    :param config: Optional config object
     :param thin_packs: Whether or not thin packs should be retrieved
     :param report_activity: Optional callback for reporting transport
         activity.
     :return: Tuple with client instance and relative path.
     """
-    parsed = urlparse.urlparse(uri)
+    parsed = urlparse.urlparse(url)
     if parsed.scheme == 'git':
         return (TCPGitClient(parsed.hostname, port=parsed.port, **kwargs),
                 parsed.path)
@@ -983,18 +1210,44 @@ def get_transport_and_path(uri, **kwargs):
         return SSHGitClient(parsed.hostname, port=parsed.port,
                             username=parsed.username, **kwargs), path
     elif parsed.scheme in ('http', 'https'):
-        return HttpGitClient(urlparse.urlunparse(parsed), **kwargs), parsed.path
+        return HttpGitClient(urlparse.urlunparse(parsed), config=config,
+                **kwargs), parsed.path
+    elif parsed.scheme == 'file':
+        return default_local_git_client_cls(**kwargs), parsed.path
+
+    raise ValueError("unknown scheme '%s'" % parsed.scheme)
+
 
-    if parsed.scheme and not parsed.netloc:
+def get_transport_and_path(location, **kwargs):
+    """Obtain a git client from a URL.
+
+    :param location: URL or path
+    :param config: Optional config object
+    :param thin_packs: Whether or not thin packs should be retrieved
+    :param report_activity: Optional callback for reporting transport
+        activity.
+    :return: Tuple with client instance and relative path.
+    """
+    # First, try to parse it as a URL
+    try:
+        return get_transport_and_path_from_url(location, **kwargs)
+    except ValueError:
+        pass
+
+    if (sys.platform == 'win32' and
+            location[0].isalpha() and location[1:3] == ':\\'):
+        # Windows local path
+        return default_local_git_client_cls(**kwargs), location
+
+    if ':' in location and not '@' in location:
         # SSH with no user@, zero or one leading slash.
-        return SSHGitClient(parsed.scheme, **kwargs), parsed.path
-    elif parsed.scheme:
-        raise ValueError('Unknown git protocol scheme: %s' % parsed.scheme)
-    elif '@' in parsed.path and ':' in parsed.path:
+        (hostname, path) = location.split(':')
+        return SSHGitClient(hostname, **kwargs), path
+    elif '@' in location and ':' in location:
         # SSH with user@host:foo.
-        user_host, path = parsed.path.split(':')
+        user_host, path = location.split(':')
         user, host = user_host.rsplit('@')
         return SSHGitClient(host, username=user, **kwargs), path
 
     # Otherwise, assume it's a local path.
-    return SubprocessGitClient(**kwargs), uri
+    return default_local_git_client_cls(**kwargs), location