Support the http.sslVerify and http.sslCAInfo configuration options.
[jelmer/dulwich.git] / dulwich / client.py
1 # client.py -- Implementation of the client side git protocols
2 # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
3 #
4 # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
5 # General Public License as public by the Free Software Foundation; version 2.0
6 # or (at your option) any later version. You can redistribute it and/or
7 # modify it under the terms of either of these two licenses.
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 #
15 # You should have received a copy of the licenses; if not, see
16 # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
17 # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
18 # License, Version 2.0.
19 #
20
21 """Client side support for the Git protocol.
22
23 The Dulwich client supports the following capabilities:
24
25  * thin-pack
26  * multi_ack_detailed
27  * multi_ack
28  * side-band-64k
29  * ofs-delta
30  * quiet
31  * report-status
32  * delete-refs
33
34 Known capabilities that are not supported:
35
36  * shallow
37  * no-progress
38  * include-tag
39 """
40
41 from contextlib import closing
42 from io import BytesIO, BufferedReader
43 import gzip
44 import select
45 import socket
46 import subprocess
47 import sys
48
49 try:
50     from urllib import quote as urlquote
51     from urllib import unquote as urlunquote
52 except ImportError:
53     from urllib.parse import quote as urlquote
54     from urllib.parse import unquote as urlunquote
55
56 try:
57     import urllib2
58     import urlparse
59 except ImportError:
60     import urllib.request as urllib2
61     import urllib.parse as urlparse
62
63 import urllib3
64 import urllib3.util
65
66 import dulwich
67 from dulwich.errors import (
68     GitProtocolError,
69     NotGitRepository,
70     SendPackError,
71     UpdateRefsError,
72     )
73 from dulwich.protocol import (
74     _RBUFSIZE,
75     agent_string,
76     capability_agent,
77     extract_capability_names,
78     CAPABILITY_AGENT,
79     CAPABILITY_DELETE_REFS,
80     CAPABILITY_MULTI_ACK,
81     CAPABILITY_MULTI_ACK_DETAILED,
82     CAPABILITY_OFS_DELTA,
83     CAPABILITY_QUIET,
84     CAPABILITY_REPORT_STATUS,
85     CAPABILITY_SYMREF,
86     CAPABILITY_SIDE_BAND_64K,
87     CAPABILITY_THIN_PACK,
88     CAPABILITIES_REF,
89     KNOWN_RECEIVE_CAPABILITIES,
90     KNOWN_UPLOAD_CAPABILITIES,
91     COMMAND_DONE,
92     COMMAND_HAVE,
93     COMMAND_WANT,
94     SIDE_BAND_CHANNEL_DATA,
95     SIDE_BAND_CHANNEL_PROGRESS,
96     SIDE_BAND_CHANNEL_FATAL,
97     PktLineParser,
98     Protocol,
99     ProtocolFile,
100     TCP_GIT_PORT,
101     ZERO_SHA,
102     extract_capabilities,
103     parse_capability,
104     )
105 from dulwich.pack import (
106     write_pack_data,
107     write_pack_objects,
108     )
109 from dulwich.refs import (
110     read_info_refs,
111     )
112
113
114 def _fileno_can_read(fileno):
115     """Check if a file descriptor is readable."""
116     return len(select.select([fileno], [], [], 0)[0]) > 0
117
118
119 def _win32_peek_avail(handle):
120     """Wrapper around PeekNamedPipe to check how many bytes are available."""
121     from ctypes import byref, wintypes, windll
122     c_avail = wintypes.DWORD()
123     c_message = wintypes.DWORD()
124     success = windll.kernel32.PeekNamedPipe(
125         handle, None, 0, None, byref(c_avail),
126         byref(c_message))
127     if not success:
128         raise OSError(wintypes.GetLastError())
129     return c_avail.value
130
131
132 COMMON_CAPABILITIES = [CAPABILITY_OFS_DELTA, CAPABILITY_SIDE_BAND_64K]
133 UPLOAD_CAPABILITIES = ([CAPABILITY_THIN_PACK, CAPABILITY_MULTI_ACK,
134                         CAPABILITY_MULTI_ACK_DETAILED] + COMMON_CAPABILITIES)
135 RECEIVE_CAPABILITIES = [CAPABILITY_REPORT_STATUS] + COMMON_CAPABILITIES
136
137
138 class ReportStatusParser(object):
139     """Handle status as reported by servers with 'report-status' capability.
140     """
141
142     def __init__(self):
143         self._done = False
144         self._pack_status = None
145         self._ref_status_ok = True
146         self._ref_statuses = []
147
148     def check(self):
149         """Check if there were any errors and, if so, raise exceptions.
150
151         :raise SendPackError: Raised when the server could not unpack
152         :raise UpdateRefsError: Raised when refs could not be updated
153         """
154         if self._pack_status not in (b'unpack ok', None):
155             raise SendPackError(self._pack_status)
156         if not self._ref_status_ok:
157             ref_status = {}
158             ok = set()
159             for status in self._ref_statuses:
160                 if b' ' not in status:
161                     # malformed response, move on to the next one
162                     continue
163                 status, ref = status.split(b' ', 1)
164
165                 if status == b'ng':
166                     if b' ' in ref:
167                         ref, status = ref.split(b' ', 1)
168                 else:
169                     ok.add(ref)
170                 ref_status[ref] = status
171             # TODO(jelmer): don't assume encoding of refs is ascii.
172             raise UpdateRefsError(', '.join([
173                 refname.decode('ascii') for refname in ref_status
174                 if refname not in ok]) +
175                 ' failed to update', ref_status=ref_status)
176
177     def handle_packet(self, pkt):
178         """Handle a packet.
179
180         :raise GitProtocolError: Raised when packets are received after a
181             flush packet.
182         """
183         if self._done:
184             raise GitProtocolError("received more data after status report")
185         if pkt is None:
186             self._done = True
187             return
188         if self._pack_status is None:
189             self._pack_status = pkt.strip()
190         else:
191             ref_status = pkt.strip()
192             self._ref_statuses.append(ref_status)
193             if not ref_status.startswith(b'ok '):
194                 self._ref_status_ok = False
195
196
197 def read_pkt_refs(proto):
198     server_capabilities = None
199     refs = {}
200     # Receive refs from server
201     for pkt in proto.read_pkt_seq():
202         (sha, ref) = pkt.rstrip(b'\n').split(None, 1)
203         if sha == b'ERR':
204             raise GitProtocolError(ref)
205         if server_capabilities is None:
206             (ref, server_capabilities) = extract_capabilities(ref)
207         refs[ref] = sha
208
209     if len(refs) == 0:
210         return {}, set([])
211     if refs == {CAPABILITIES_REF: ZERO_SHA}:
212         refs = {}
213     return refs, set(server_capabilities)
214
215
216 class FetchPackResult(object):
217     """Result of a fetch-pack operation.
218
219     :var refs: Dictionary with all remote refs
220     :var symrefs: Dictionary with remote symrefs
221     :var agent: User agent string
222     """
223
224     _FORWARDED_ATTRS = [
225             'clear', 'copy', 'fromkeys', 'get', 'has_key', 'items',
226             'iteritems', 'iterkeys', 'itervalues', 'keys', 'pop', 'popitem',
227             'setdefault', 'update', 'values', 'viewitems', 'viewkeys',
228             'viewvalues']
229
230     def __init__(self, refs, symrefs, agent):
231         self.refs = refs
232         self.symrefs = symrefs
233         self.agent = agent
234
235     def _warn_deprecated(self):
236         import warnings
237         warnings.warn(
238             "Use FetchPackResult.refs instead.",
239             DeprecationWarning, stacklevel=3)
240
241     def __eq__(self, other):
242         if isinstance(other, dict):
243             self._warn_deprecated()
244             return (self.refs == other)
245         return (self.refs == other.refs and
246                 self.symrefs == other.symrefs and
247                 self.agent == other.agent)
248
249     def __contains__(self, name):
250         self._warn_deprecated()
251         return name in self.refs
252
253     def __getitem__(self, name):
254         self._warn_deprecated()
255         return self.refs[name]
256
257     def __len__(self):
258         self._warn_deprecated()
259         return len(self.refs)
260
261     def __iter__(self):
262         self._warn_deprecated()
263         return iter(self.refs)
264
265     def __getattribute__(self, name):
266         if name in type(self)._FORWARDED_ATTRS:
267             self._warn_deprecated()
268             return getattr(self.refs, name)
269         return super(FetchPackResult, self).__getattribute__(name)
270
271     def __repr__(self):
272         return "%s(%r, %r, %r)" % (
273                 self.__class__.__name__, self.refs, self.symrefs, self.agent)
274
275
276 # TODO(durin42): this doesn't correctly degrade if the server doesn't
277 # support some capabilities. This should work properly with servers
278 # that don't support multi_ack.
279 class GitClient(object):
280     """Git smart server client.
281
282     """
283
284     def __init__(self, thin_packs=True, report_activity=None, quiet=False):
285         """Create a new GitClient instance.
286
287         :param thin_packs: Whether or not thin packs should be retrieved
288         :param report_activity: Optional callback for reporting transport
289             activity.
290         """
291         self._report_activity = report_activity
292         self._report_status_parser = None
293         self._fetch_capabilities = set(UPLOAD_CAPABILITIES)
294         self._fetch_capabilities.add(capability_agent())
295         self._send_capabilities = set(RECEIVE_CAPABILITIES)
296         self._send_capabilities.add(capability_agent())
297         if quiet:
298             self._send_capabilities.add(CAPABILITY_QUIET)
299         if not thin_packs:
300             self._fetch_capabilities.remove(CAPABILITY_THIN_PACK)
301
302     def get_url(self, path):
303         """Retrieves full url to given path.
304
305         :param path: Repository path (as string)
306         :return: Url to path (as string)
307         """
308         raise NotImplementedError(self.get_url)
309
310     @classmethod
311     def from_parsedurl(cls, parsedurl, **kwargs):
312         """Create an instance of this client from a urlparse.parsed object.
313
314         :param parsedurl: Result of urlparse.urlparse()
315         :return: A `GitClient` object
316         """
317         raise NotImplementedError(cls.from_parsedurl)
318
319     def send_pack(self, path, update_refs, generate_pack_data,
320                   progress=None):
321         """Upload a pack to a remote repository.
322
323         :param path: Repository path (as bytestring)
324         :param update_refs: Function to determine changes to remote refs.
325             Receive dict with existing remote refs, returns dict with
326             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
327         :param generate_pack_data: Function that can return a tuple
328             with number of objects and list of pack data to include
329         :param progress: Optional progress function
330
331         :raises SendPackError: if server rejects the pack data
332         :raises UpdateRefsError: if the server supports report-status
333                                  and rejects ref updates
334         :return: new_refs dictionary containing the changes that were made
335             {refname: new_ref}, including deleted refs.
336         """
337         raise NotImplementedError(self.send_pack)
338
339     def fetch(self, path, target, determine_wants=None, progress=None):
340         """Fetch into a target repository.
341
342         :param path: Path to fetch from (as bytestring)
343         :param target: Target repository to fetch into
344         :param determine_wants: Optional function to determine what refs
345             to fetch. Receives dictionary of name->sha, should return
346             list of shas to fetch. Defaults to all shas.
347         :param progress: Optional progress function
348         :return: Dictionary with all remote refs (not just those fetched)
349         """
350         if determine_wants is None:
351             determine_wants = target.object_store.determine_wants_all
352         if CAPABILITY_THIN_PACK in self._fetch_capabilities:
353             # TODO(jelmer): Avoid reading entire file into memory and
354             # only processing it after the whole file has been fetched.
355             f = BytesIO()
356
357             def commit():
358                 if f.tell():
359                     f.seek(0)
360                     target.object_store.add_thin_pack(f.read, None)
361
362             def abort():
363                 pass
364         else:
365             f, commit, abort = target.object_store.add_pack()
366         try:
367             result = self.fetch_pack(
368                 path, determine_wants, target.get_graph_walker(), f.write,
369                 progress)
370         except BaseException:
371             abort()
372             raise
373         else:
374             commit()
375         return result
376
377     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
378                    progress=None):
379         """Retrieve a pack from a git smart server.
380
381         :param path: Remote path to fetch from
382         :param determine_wants: Function determine what refs
383             to fetch. Receives dictionary of name->sha, should return
384             list of shas to fetch.
385         :param graph_walker: Object with next() and ack().
386         :param pack_data: Callback called for each bit of data in the pack
387         :param progress: Callback for progress reports (strings)
388         :return: FetchPackResult object
389         """
390         raise NotImplementedError(self.fetch_pack)
391
392     def get_refs(self, path):
393         """Retrieve the current refs from a git smart server.
394
395         :param path: Path to the repo to fetch from. (as bytestring)
396         """
397         raise NotImplementedError(self.get_refs)
398
399     def _parse_status_report(self, proto):
400         unpack = proto.read_pkt_line().strip()
401         if unpack != b'unpack ok':
402             st = True
403             # flush remaining error data
404             while st is not None:
405                 st = proto.read_pkt_line()
406             raise SendPackError(unpack)
407         statuses = []
408         errs = False
409         ref_status = proto.read_pkt_line()
410         while ref_status:
411             ref_status = ref_status.strip()
412             statuses.append(ref_status)
413             if not ref_status.startswith(b'ok '):
414                 errs = True
415             ref_status = proto.read_pkt_line()
416
417         if errs:
418             ref_status = {}
419             ok = set()
420             for status in statuses:
421                 if b' ' not in status:
422                     # malformed response, move on to the next one
423                     continue
424                 status, ref = status.split(b' ', 1)
425
426                 if status == b'ng':
427                     if b' ' in ref:
428                         ref, status = ref.split(b' ', 1)
429                 else:
430                     ok.add(ref)
431                 ref_status[ref] = status
432             raise UpdateRefsError(', '.join([
433                 refname for refname in ref_status if refname not in ok]) +
434                 b' failed to update', ref_status=ref_status)
435
436     def _read_side_band64k_data(self, proto, channel_callbacks):
437         """Read per-channel data.
438
439         This requires the side-band-64k capability.
440
441         :param proto: Protocol object to read from
442         :param channel_callbacks: Dictionary mapping channels to packet
443             handlers to use. None for a callback discards channel data.
444         """
445         for pkt in proto.read_pkt_seq():
446             channel = ord(pkt[:1])
447             pkt = pkt[1:]
448             try:
449                 cb = channel_callbacks[channel]
450             except KeyError:
451                 raise AssertionError('Invalid sideband channel %d' % channel)
452             else:
453                 if cb is not None:
454                     cb(pkt)
455
456     def _handle_receive_pack_head(self, proto, capabilities, old_refs,
457                                   new_refs):
458         """Handle the head of a 'git-receive-pack' request.
459
460         :param proto: Protocol object to read from
461         :param capabilities: List of negotiated capabilities
462         :param old_refs: Old refs, as received from the server
463         :param new_refs: Refs to change
464         :return: (have, want) tuple
465         """
466         want = []
467         have = [x for x in old_refs.values() if not x == ZERO_SHA]
468         sent_capabilities = False
469
470         for refname in new_refs:
471             if not isinstance(refname, bytes):
472                 raise TypeError('refname is not a bytestring: %r' % refname)
473             old_sha1 = old_refs.get(refname, ZERO_SHA)
474             if not isinstance(old_sha1, bytes):
475                 raise TypeError('old sha1 for %s is not a bytestring: %r' %
476                                 (refname, old_sha1))
477             new_sha1 = new_refs.get(refname, ZERO_SHA)
478             if not isinstance(new_sha1, bytes):
479                 raise TypeError('old sha1 for %s is not a bytestring %r' %
480                                 (refname, new_sha1))
481
482             if old_sha1 != new_sha1:
483                 if sent_capabilities:
484                     proto.write_pkt_line(old_sha1 + b' ' + new_sha1 + b' ' +
485                                          refname)
486                 else:
487                     proto.write_pkt_line(
488                         old_sha1 + b' ' + new_sha1 + b' ' + refname + b'\0' +
489                         b' '.join(capabilities))
490                     sent_capabilities = True
491             if new_sha1 not in have and new_sha1 != ZERO_SHA:
492                 want.append(new_sha1)
493         proto.write_pkt_line(None)
494         return (have, want)
495
496     def _negotiate_receive_pack_capabilities(self, server_capabilities):
497         negotiated_capabilities = (
498             self._send_capabilities & server_capabilities)
499         unknown_capabilities = (  # noqa: F841
500             extract_capability_names(server_capabilities) -
501             KNOWN_RECEIVE_CAPABILITIES)
502         # TODO(jelmer): warn about unknown capabilities
503         return negotiated_capabilities
504
505     def _handle_receive_pack_tail(self, proto, capabilities, progress=None):
506         """Handle the tail of a 'git-receive-pack' request.
507
508         :param proto: Protocol object to read from
509         :param capabilities: List of negotiated capabilities
510         :param progress: Optional progress reporting function
511         """
512         if CAPABILITY_SIDE_BAND_64K in capabilities:
513             if progress is None:
514                 def progress(x):
515                     pass
516             channel_callbacks = {2: progress}
517             if CAPABILITY_REPORT_STATUS in capabilities:
518                 channel_callbacks[1] = PktLineParser(
519                     self._report_status_parser.handle_packet).parse
520             self._read_side_band64k_data(proto, channel_callbacks)
521         else:
522             if CAPABILITY_REPORT_STATUS in capabilities:
523                 for pkt in proto.read_pkt_seq():
524                     self._report_status_parser.handle_packet(pkt)
525         if self._report_status_parser is not None:
526             self._report_status_parser.check()
527
528     def _negotiate_upload_pack_capabilities(self, server_capabilities):
529         unknown_capabilities = (  # noqa: F841
530             extract_capability_names(server_capabilities) -
531             KNOWN_UPLOAD_CAPABILITIES)
532         # TODO(jelmer): warn about unknown capabilities
533         symrefs = {}
534         agent = None
535         for capability in server_capabilities:
536             k, v = parse_capability(capability)
537             if k == CAPABILITY_SYMREF:
538                 (src, dst) = v.split(b':', 1)
539                 symrefs[src] = dst
540             if k == CAPABILITY_AGENT:
541                 agent = v
542
543         negotiated_capabilities = (
544             self._fetch_capabilities & server_capabilities)
545         return (negotiated_capabilities, symrefs, agent)
546
547     def _handle_upload_pack_head(self, proto, capabilities, graph_walker,
548                                  wants, can_read):
549         """Handle the head of a 'git-upload-pack' request.
550
551         :param proto: Protocol object to read from
552         :param capabilities: List of negotiated capabilities
553         :param graph_walker: GraphWalker instance to call .ack() on
554         :param wants: List of commits to fetch
555         :param can_read: function that returns a boolean that indicates
556             whether there is extra graph data to read on proto
557         """
558         assert isinstance(wants, list) and isinstance(wants[0], bytes)
559         proto.write_pkt_line(COMMAND_WANT + b' ' + wants[0] + b' ' +
560                              b' '.join(capabilities) + b'\n')
561         for want in wants[1:]:
562             proto.write_pkt_line(COMMAND_WANT + b' ' + want + b'\n')
563         proto.write_pkt_line(None)
564         have = next(graph_walker)
565         while have:
566             proto.write_pkt_line(COMMAND_HAVE + b' ' + have + b'\n')
567             if can_read():
568                 pkt = proto.read_pkt_line()
569                 parts = pkt.rstrip(b'\n').split(b' ')
570                 if parts[0] == b'ACK':
571                     graph_walker.ack(parts[1])
572                     if parts[2] in (b'continue', b'common'):
573                         pass
574                     elif parts[2] == b'ready':
575                         break
576                     else:
577                         raise AssertionError(
578                             "%s not in ('continue', 'ready', 'common)" %
579                             parts[2])
580             have = next(graph_walker)
581         proto.write_pkt_line(COMMAND_DONE + b'\n')
582
583     def _handle_upload_pack_tail(self, proto, capabilities, graph_walker,
584                                  pack_data, progress=None, rbufsize=_RBUFSIZE):
585         """Handle the tail of a 'git-upload-pack' request.
586
587         :param proto: Protocol object to read from
588         :param capabilities: List of negotiated capabilities
589         :param graph_walker: GraphWalker instance to call .ack() on
590         :param pack_data: Function to call with pack data
591         :param progress: Optional progress reporting function
592         :param rbufsize: Read buffer size
593         """
594         pkt = proto.read_pkt_line()
595         while pkt:
596             parts = pkt.rstrip(b'\n').split(b' ')
597             if parts[0] == b'ACK':
598                 graph_walker.ack(parts[1])
599             if len(parts) < 3 or parts[2] not in (
600                     b'ready', b'continue', b'common'):
601                 break
602             pkt = proto.read_pkt_line()
603         if CAPABILITY_SIDE_BAND_64K in capabilities:
604             if progress is None:
605                 # Just ignore progress data
606
607                 def progress(x):
608                     pass
609             self._read_side_band64k_data(proto, {
610                 SIDE_BAND_CHANNEL_DATA: pack_data,
611                 SIDE_BAND_CHANNEL_PROGRESS: progress}
612             )
613         else:
614             while True:
615                 data = proto.read(rbufsize)
616                 if data == b"":
617                     break
618                 pack_data(data)
619
620
621 class TraditionalGitClient(GitClient):
622     """Traditional Git client."""
623
624     DEFAULT_ENCODING = 'utf-8'
625
626     def __init__(self, path_encoding=DEFAULT_ENCODING, **kwargs):
627         self._remote_path_encoding = path_encoding
628         super(TraditionalGitClient, self).__init__(**kwargs)
629
630     def _connect(self, cmd, path):
631         """Create a connection to the server.
632
633         This method is abstract - concrete implementations should
634         implement their own variant which connects to the server and
635         returns an initialized Protocol object with the service ready
636         for use and a can_read function which may be used to see if
637         reads would block.
638
639         :param cmd: The git service name to which we should connect.
640         :param path: The path we should pass to the service. (as bytestirng)
641         """
642         raise NotImplementedError()
643
644     def send_pack(self, path, update_refs, generate_pack_data,
645                   progress=None):
646         """Upload a pack to a remote repository.
647
648         :param path: Repository path (as bytestring)
649         :param update_refs: Function to determine changes to remote refs.
650             Receive dict with existing remote refs, returns dict with
651             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
652         :param generate_pack_data: Function that can return a tuple with
653             number of objects and pack data to upload.
654         :param progress: Optional callback called with progress updates
655
656         :raises SendPackError: if server rejects the pack data
657         :raises UpdateRefsError: if the server supports report-status
658                                  and rejects ref updates
659         :return: new_refs dictionary containing the changes that were made
660             {refname: new_ref}, including deleted refs.
661         """
662         proto, unused_can_read = self._connect(b'receive-pack', path)
663         with proto:
664             old_refs, server_capabilities = read_pkt_refs(proto)
665             negotiated_capabilities = \
666                 self._negotiate_receive_pack_capabilities(server_capabilities)
667             if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
668                 self._report_status_parser = ReportStatusParser()
669             report_status_parser = self._report_status_parser
670
671             try:
672                 new_refs = orig_new_refs = update_refs(dict(old_refs))
673             except BaseException:
674                 proto.write_pkt_line(None)
675                 raise
676
677             if CAPABILITY_DELETE_REFS not in server_capabilities:
678                 # Server does not support deletions. Fail later.
679                 new_refs = dict(orig_new_refs)
680                 for ref, sha in orig_new_refs.items():
681                     if sha == ZERO_SHA:
682                         if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
683                             report_status_parser._ref_statuses.append(
684                                 b'ng ' + sha +
685                                 b' remote does not support deleting refs')
686                             report_status_parser._ref_status_ok = False
687                         del new_refs[ref]
688
689             if new_refs is None:
690                 proto.write_pkt_line(None)
691                 return old_refs
692
693             if len(new_refs) == 0 and len(orig_new_refs):
694                 # NOOP - Original new refs filtered out by policy
695                 proto.write_pkt_line(None)
696                 if report_status_parser is not None:
697                     report_status_parser.check()
698                 return old_refs
699
700             (have, want) = self._handle_receive_pack_head(
701                 proto, negotiated_capabilities, old_refs, new_refs)
702             if (not want and
703                     set(new_refs.items()).issubset(set(old_refs.items()))):
704                 return new_refs
705             pack_data_count, pack_data = generate_pack_data(
706                 have, want,
707                 ofs_delta=(CAPABILITY_OFS_DELTA in negotiated_capabilities))
708
709             dowrite = bool(pack_data_count)
710             dowrite = dowrite or any(old_refs.get(ref) != sha
711                                      for (ref, sha) in new_refs.items()
712                                      if sha != ZERO_SHA)
713             if dowrite:
714                 write_pack_data(proto.write_file(), pack_data_count, pack_data)
715
716             self._handle_receive_pack_tail(
717                 proto, negotiated_capabilities, progress)
718             return new_refs
719
720     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
721                    progress=None):
722         """Retrieve a pack from a git smart server.
723
724         :param path: Remote path to fetch from
725         :param determine_wants: Function determine what refs
726             to fetch. Receives dictionary of name->sha, should return
727             list of shas to fetch.
728         :param graph_walker: Object with next() and ack().
729         :param pack_data: Callback called for each bit of data in the pack
730         :param progress: Callback for progress reports (strings)
731         :return: FetchPackResult object
732         """
733         proto, can_read = self._connect(b'upload-pack', path)
734         with proto:
735             refs, server_capabilities = read_pkt_refs(proto)
736             negotiated_capabilities, symrefs, agent = (
737                     self._negotiate_upload_pack_capabilities(
738                             server_capabilities))
739
740             if refs is None:
741                 proto.write_pkt_line(None)
742                 return FetchPackResult(refs, symrefs, agent)
743
744             try:
745                 wants = determine_wants(refs)
746             except BaseException:
747                 proto.write_pkt_line(None)
748                 raise
749             if wants is not None:
750                 wants = [cid for cid in wants if cid != ZERO_SHA]
751             if not wants:
752                 proto.write_pkt_line(None)
753                 return FetchPackResult(refs, symrefs, agent)
754             self._handle_upload_pack_head(
755                 proto, negotiated_capabilities, graph_walker, wants, can_read)
756             self._handle_upload_pack_tail(
757                 proto, negotiated_capabilities, graph_walker, pack_data,
758                 progress)
759             return FetchPackResult(refs, symrefs, agent)
760
761     def get_refs(self, path):
762         """Retrieve the current refs from a git smart server."""
763         # stock `git ls-remote` uses upload-pack
764         proto, _ = self._connect(b'upload-pack', path)
765         with proto:
766             refs, _ = read_pkt_refs(proto)
767             proto.write_pkt_line(None)
768             return refs
769
770     def archive(self, path, committish, write_data, progress=None,
771                 write_error=None):
772         proto, can_read = self._connect(b'upload-archive', path)
773         with proto:
774             proto.write_pkt_line(b"argument " + committish)
775             proto.write_pkt_line(None)
776             pkt = proto.read_pkt_line()
777             if pkt == b"NACK\n":
778                 return
779             elif pkt == b"ACK\n":
780                 pass
781             elif pkt.startswith(b"ERR "):
782                 raise GitProtocolError(pkt[4:].rstrip(b"\n"))
783             else:
784                 raise AssertionError("invalid response %r" % pkt)
785             ret = proto.read_pkt_line()
786             if ret is not None:
787                 raise AssertionError("expected pkt tail")
788             self._read_side_band64k_data(proto, {
789                 SIDE_BAND_CHANNEL_DATA: write_data,
790                 SIDE_BAND_CHANNEL_PROGRESS: progress,
791                 SIDE_BAND_CHANNEL_FATAL: write_error})
792
793
794 class TCPGitClient(TraditionalGitClient):
795     """A Git Client that works over TCP directly (i.e. git://)."""
796
797     def __init__(self, host, port=None, **kwargs):
798         if port is None:
799             port = TCP_GIT_PORT
800         self._host = host
801         self._port = port
802         super(TCPGitClient, self).__init__(**kwargs)
803
804     @classmethod
805     def from_parsedurl(cls, parsedurl, **kwargs):
806         return cls(parsedurl.hostname, port=parsedurl.port, **kwargs)
807
808     def get_url(self, path):
809         netloc = self._host
810         if self._port is not None and self._port != TCP_GIT_PORT:
811             netloc += ":%d" % self._port
812         return urlparse.urlunsplit(("git", netloc, path, '', ''))
813
814     def _connect(self, cmd, path):
815         if not isinstance(cmd, bytes):
816             raise TypeError(cmd)
817         if not isinstance(path, bytes):
818             path = path.encode(self._remote_path_encoding)
819         sockaddrs = socket.getaddrinfo(
820             self._host, self._port, socket.AF_UNSPEC, socket.SOCK_STREAM)
821         s = None
822         err = socket.error("no address found for %s" % self._host)
823         for (family, socktype, proto, canonname, sockaddr) in sockaddrs:
824             s = socket.socket(family, socktype, proto)
825             s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
826             try:
827                 s.connect(sockaddr)
828                 break
829             except socket.error as err:
830                 if s is not None:
831                     s.close()
832                 s = None
833         if s is None:
834             raise err
835         # -1 means system default buffering
836         rfile = s.makefile('rb', -1)
837         # 0 means unbuffered
838         wfile = s.makefile('wb', 0)
839
840         def close():
841             rfile.close()
842             wfile.close()
843             s.close()
844
845         proto = Protocol(rfile.read, wfile.write, close,
846                          report_activity=self._report_activity)
847         if path.startswith(b"/~"):
848             path = path[1:]
849         # TODO(jelmer): Alternative to ascii?
850         proto.send_cmd(
851             b'git-' + cmd, path, b'host=' + self._host.encode('ascii'))
852         return proto, lambda: _fileno_can_read(s)
853
854
855 class SubprocessWrapper(object):
856     """A socket-like object that talks to a subprocess via pipes."""
857
858     def __init__(self, proc):
859         self.proc = proc
860         if sys.version_info[0] == 2:
861             self.read = proc.stdout.read
862         else:
863             self.read = BufferedReader(proc.stdout).read
864         self.write = proc.stdin.write
865
866     def can_read(self):
867         if sys.platform == 'win32':
868             from msvcrt import get_osfhandle
869             handle = get_osfhandle(self.proc.stdout.fileno())
870             return _win32_peek_avail(handle) != 0
871         else:
872             return _fileno_can_read(self.proc.stdout.fileno())
873
874     def close(self):
875         self.proc.stdin.close()
876         self.proc.stdout.close()
877         if self.proc.stderr:
878             self.proc.stderr.close()
879         self.proc.wait()
880
881
882 def find_git_command():
883     """Find command to run for system Git (usually C Git).
884     """
885     if sys.platform == 'win32':  # support .exe, .bat and .cmd
886         try:  # to avoid overhead
887             import win32api
888         except ImportError:  # run through cmd.exe with some overhead
889             return ['cmd', '/c', 'git']
890         else:
891             status, git = win32api.FindExecutable('git')
892             return [git]
893     else:
894         return ['git']
895
896
897 class SubprocessGitClient(TraditionalGitClient):
898     """Git client that talks to a server using a subprocess."""
899
900     def __init__(self, **kwargs):
901         self._connection = None
902         self._stderr = None
903         self._stderr = kwargs.get('stderr')
904         if 'stderr' in kwargs:
905             del kwargs['stderr']
906         super(SubprocessGitClient, self).__init__(**kwargs)
907
908     @classmethod
909     def from_parsedurl(cls, parsedurl, **kwargs):
910         return cls(**kwargs)
911
912     git_command = None
913
914     def _connect(self, service, path):
915         if not isinstance(service, bytes):
916             raise TypeError(service)
917         if isinstance(path, bytes):
918             path = path.decode(self._remote_path_encoding)
919         if self.git_command is None:
920             git_command = find_git_command()
921         argv = git_command + [service.decode('ascii'), path]
922         p = SubprocessWrapper(
923             subprocess.Popen(argv, bufsize=0, stdin=subprocess.PIPE,
924                              stdout=subprocess.PIPE,
925                              stderr=self._stderr))
926         return Protocol(p.read, p.write, p.close,
927                         report_activity=self._report_activity), p.can_read
928
929
930 class LocalGitClient(GitClient):
931     """Git Client that just uses a local Repo."""
932
933     def __init__(self, thin_packs=True, report_activity=None, config=None):
934         """Create a new LocalGitClient instance.
935
936         :param thin_packs: Whether or not thin packs should be retrieved
937         :param report_activity: Optional callback for reporting transport
938             activity.
939         """
940         self._report_activity = report_activity
941         # Ignore the thin_packs argument
942
943     def get_url(self, path):
944         return urlparse.urlunsplit(('file', '', path, '', ''))
945
946     @classmethod
947     def from_parsedurl(cls, parsedurl, **kwargs):
948         return cls(**kwargs)
949
950     @classmethod
951     def _open_repo(cls, path):
952         from dulwich.repo import Repo
953         if not isinstance(path, str):
954             path = path.decode(sys.getfilesystemencoding())
955         return closing(Repo(path))
956
957     def send_pack(self, path, update_refs, generate_pack_data,
958                   progress=None):
959         """Upload a pack to a remote repository.
960
961         :param path: Repository path (as bytestring)
962         :param update_refs: Function to determine changes to remote refs.
963             Receive dict with existing remote refs, returns dict with
964             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
965         :param generate_pack_data: Function that can return a tuple
966             with number of items and pack data to upload.
967         :param progress: Optional progress function
968
969         :raises SendPackError: if server rejects the pack data
970         :raises UpdateRefsError: if the server supports report-status
971                                  and rejects ref updates
972         :return: new_refs dictionary containing the changes that were made
973             {refname: new_ref}, including deleted refs.
974         """
975         if not progress:
976             def progress(x):
977                 pass
978
979         with self._open_repo(path) as target:
980             old_refs = target.get_refs()
981             new_refs = update_refs(dict(old_refs))
982
983             have = [sha1 for sha1 in old_refs.values() if sha1 != ZERO_SHA]
984             want = []
985             for refname, new_sha1 in new_refs.items():
986                 if (new_sha1 not in have and
987                         new_sha1 not in want and
988                         new_sha1 != ZERO_SHA):
989                     want.append(new_sha1)
990
991             if (not want and
992                     set(new_refs.items()).issubset(set(old_refs.items()))):
993                 return new_refs
994
995             target.object_store.add_pack_data(
996                 *generate_pack_data(have, want, ofs_delta=True))
997
998             for refname, new_sha1 in new_refs.items():
999                 old_sha1 = old_refs.get(refname, ZERO_SHA)
1000                 if new_sha1 != ZERO_SHA:
1001                     if not target.refs.set_if_equals(
1002                             refname, old_sha1, new_sha1):
1003                         progress('unable to set %s to %s' %
1004                                  (refname, new_sha1))
1005                 else:
1006                     if not target.refs.remove_if_equals(refname, old_sha1):
1007                         progress('unable to remove %s' % refname)
1008
1009         return new_refs
1010
1011     def fetch(self, path, target, determine_wants=None, progress=None):
1012         """Fetch into a target repository.
1013
1014         :param path: Path to fetch from (as bytestring)
1015         :param target: Target repository to fetch into
1016         :param determine_wants: Optional function determine what refs
1017             to fetch. Receives dictionary of name->sha, should return
1018             list of shas to fetch. Defaults to all shas.
1019         :param progress: Optional progress function
1020         :return: FetchPackResult object
1021         """
1022         with self._open_repo(path) as r:
1023             refs = r.fetch(target, determine_wants=determine_wants,
1024                            progress=progress)
1025             return FetchPackResult(refs, r.refs.get_symrefs(),
1026                                    agent_string())
1027
1028     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
1029                    progress=None):
1030         """Retrieve a pack from a git smart server.
1031
1032         :param path: Remote path to fetch from
1033         :param determine_wants: Function determine what refs
1034             to fetch. Receives dictionary of name->sha, should return
1035             list of shas to fetch.
1036         :param graph_walker: Object with next() and ack().
1037         :param pack_data: Callback called for each bit of data in the pack
1038         :param progress: Callback for progress reports (strings)
1039         :return: FetchPackResult object
1040         """
1041         with self._open_repo(path) as r:
1042             objects_iter = r.fetch_objects(
1043                 determine_wants, graph_walker, progress)
1044             symrefs = r.refs.get_symrefs()
1045             agent = agent_string()
1046
1047             # Did the process short-circuit (e.g. in a stateless RPC call)?
1048             # Note that the client still expects a 0-object pack in most cases.
1049             if objects_iter is None:
1050                 return FetchPackResult(None, symrefs, agent)
1051             protocol = ProtocolFile(None, pack_data)
1052             write_pack_objects(protocol, objects_iter)
1053             return FetchPackResult(r.get_refs(), symrefs, agent)
1054
1055     def get_refs(self, path):
1056         """Retrieve the current refs from a git smart server."""
1057
1058         with self._open_repo(path) as target:
1059             return target.get_refs()
1060
1061
1062 # What Git client to use for local access
1063 default_local_git_client_cls = LocalGitClient
1064
1065
1066 class SSHVendor(object):
1067     """A client side SSH implementation."""
1068
1069     def connect_ssh(self, host, command, username=None, port=None,
1070                     password=None, key_filename=None):
1071         # This function was deprecated in 0.9.1
1072         import warnings
1073         warnings.warn(
1074             "SSHVendor.connect_ssh has been renamed to SSHVendor.run_command",
1075             DeprecationWarning)
1076         return self.run_command(host, command, username=username, port=port,
1077                                 password=password, key_filename=key_filename)
1078
1079     def run_command(self, host, command, username=None, port=None,
1080                     password=None, key_filename=None):
1081         """Connect to an SSH server.
1082
1083         Run a command remotely and return a file-like object for interaction
1084         with the remote command.
1085
1086         :param host: Host name
1087         :param command: Command to run (as argv array)
1088         :param username: Optional ame of user to log in as
1089         :param port: Optional SSH port to use
1090         :param password: Optional ssh password for login or private key
1091         :param key_filename: Optional path to private keyfile
1092         """
1093         raise NotImplementedError(self.run_command)
1094
1095
1096 class StrangeHostname(Exception):
1097     """Refusing to connect to strange SSH hostname."""
1098
1099     def __init__(self, hostname):
1100         super(StrangeHostname, self).__init__(hostname)
1101
1102
1103 class SubprocessSSHVendor(SSHVendor):
1104     """SSH vendor that shells out to the local 'ssh' command."""
1105
1106     def run_command(self, host, command, username=None, port=None,
1107                     password=None, key_filename=None):
1108
1109         if password:
1110             raise NotImplementedError(
1111                 "You can't set password or passphrase for ssh key "
1112                 "with SubprocessSSHVendor, use ParamikoSSHVendor instead"
1113             )
1114
1115         args = ['ssh', '-x']
1116
1117         if port:
1118             args.extend(['-p', str(port)])
1119
1120         if key_filename:
1121             args.extend(['-i', str(key_filename)])
1122
1123         if username:
1124             host = '%s@%s' % (username, host)
1125         if host.startswith('-'):
1126             raise StrangeHostname(hostname=host)
1127         args.append(host)
1128
1129         proc = subprocess.Popen(args + [command], bufsize=0,
1130                                 stdin=subprocess.PIPE,
1131                                 stdout=subprocess.PIPE)
1132         return SubprocessWrapper(proc)
1133
1134
1135 class PuttySSHVendor(SSHVendor):
1136     """SSH vendor that shells out to the local 'putty' command."""
1137
1138     def run_command(self, host, command, username=None, port=None,
1139                     password=None, key_filename=None):
1140
1141         if password and key_filename:
1142             raise NotImplementedError(
1143                 "You can't set passphrase for ssh key "
1144                 "with PuttySSHVendor, use ParamikoSSHVendor instead"
1145             )
1146
1147         if sys.platform == 'win32':
1148             args = ['putty.exe', '-ssh']
1149         else:
1150             args = ['putty', '-ssh']
1151
1152         if password:
1153             import warnings
1154             warnings.warn(
1155                 "Invoking Putty with a password exposes the password in the "
1156                 "process list.")
1157             args.extend(['-pw', str(password)])
1158
1159         if port:
1160             args.extend(['-P', str(port)])
1161
1162         if key_filename:
1163             args.extend(['-i', str(key_filename)])
1164
1165         if username:
1166             host = '%s@%s' % (username, host)
1167         if host.startswith('-'):
1168             raise StrangeHostname(hostname=host)
1169         args.append(host)
1170
1171         proc = subprocess.Popen(args + [command], bufsize=0,
1172                                 stdin=subprocess.PIPE,
1173                                 stdout=subprocess.PIPE)
1174         return SubprocessWrapper(proc)
1175
1176
1177 def ParamikoSSHVendor(**kwargs):
1178     import warnings
1179     warnings.warn(
1180         "ParamikoSSHVendor has been moved to dulwich.contrib.paramiko_vendor.",
1181         DeprecationWarning)
1182     from dulwich.contrib.paramiko_vendor import ParamikoSSHVendor
1183     return ParamikoSSHVendor(**kwargs)
1184
1185
1186 # Can be overridden by users
1187 get_ssh_vendor = SubprocessSSHVendor
1188
1189
1190 class SSHGitClient(TraditionalGitClient):
1191
1192     def __init__(self, host, port=None, username=None, vendor=None,
1193                  config=None, password=None, key_filename=None, **kwargs):
1194         self.host = host
1195         self.port = port
1196         self.username = username
1197         self.password = password
1198         self.key_filename = key_filename
1199         super(SSHGitClient, self).__init__(**kwargs)
1200         self.alternative_paths = {}
1201         if vendor is not None:
1202             self.ssh_vendor = vendor
1203         else:
1204             self.ssh_vendor = get_ssh_vendor()
1205
1206     def get_url(self, path):
1207         netloc = self.host
1208         if self.port is not None:
1209             netloc += ":%d" % self.port
1210
1211         if self.username is not None:
1212             netloc = urlquote(self.username, '@/:') + "@" + netloc
1213
1214         return urlparse.urlunsplit(('ssh', netloc, path, '', ''))
1215
1216     @classmethod
1217     def from_parsedurl(cls, parsedurl, **kwargs):
1218         return cls(host=parsedurl.hostname, port=parsedurl.port,
1219                    username=parsedurl.username, **kwargs)
1220
1221     def _get_cmd_path(self, cmd):
1222         cmd = self.alternative_paths.get(cmd, b'git-' + cmd)
1223         assert isinstance(cmd, bytes)
1224         return cmd
1225
1226     def _connect(self, cmd, path):
1227         if not isinstance(cmd, bytes):
1228             raise TypeError(cmd)
1229         if isinstance(path, bytes):
1230             path = path.decode(self._remote_path_encoding)
1231         if path.startswith("/~"):
1232             path = path[1:]
1233         argv = (self._get_cmd_path(cmd).decode(self._remote_path_encoding) +
1234                 " '" + path + "'")
1235         con = self.ssh_vendor.run_command(
1236             self.host, argv, port=self.port, username=self.username,
1237             password=self.password, key_filename=self.key_filename)
1238         return (Protocol(con.read, con.write, con.close,
1239                          report_activity=self._report_activity),
1240                 con.can_read)
1241
1242
1243 def default_user_agent_string():
1244     # Start user agent with "git/", because GitHub requires this. :-( See
1245     # https://github.com/jelmer/dulwich/issues/562 for details.
1246     return "git/dulwich/%s" % ".".join([str(x) for x in dulwich.__version__])
1247
1248
1249 def default_urllib3_manager(config, **override_kwargs):
1250     """Return `urllib3` connection pool manager.
1251
1252     Honour detected proxy configurations.
1253
1254     :param config: `dulwich.config.ConfigDict` instance with Git configuration.
1255     :param kwargs: Additional arguments for urllib3.ProxyManager
1256     :return: `urllib3.ProxyManager` instance for proxy configurations,
1257         `urllib3.PoolManager` otherwise.
1258     """
1259     proxy_server = user_agent = None
1260     ca_certs = ssl_verify = None
1261
1262     if config is not None:
1263         try:
1264             proxy_server = config.get(b"http", b"proxy")
1265         except KeyError:
1266             pass
1267         try:
1268             user_agent = config.get(b"http", b"useragent")
1269         except KeyError:
1270             pass
1271
1272         # TODO(jelmer): Support per-host settings
1273         try:
1274             ssl_verify = config.get_boolean(b"http", b"sslVerify")
1275         except KeyError:
1276             ssl_verify = True
1277
1278         try:
1279             ca_certs = config.get_boolean(b"http", b"sslCAInfo")
1280         except KeyError:
1281             ca_certs = None
1282
1283     if user_agent is None:
1284         user_agent = default_user_agent_string()
1285
1286     headers = {"User-agent": user_agent}
1287
1288     kwargs = {}
1289     if ssl_verify is True:
1290         kwargs['cert_reqs'] = "CERT_REQUIRED"
1291     elif ssl_verify is False:
1292         kwargs['cert_reqs'] = 'CERT_NONE'
1293     else:
1294         # Default to SSL verification
1295         kwargs['cert_reqs'] = "CERT_REQUIRED"
1296
1297     if ca_certs is not None:
1298         kwargs['ca_certs'] = ca_certs
1299     kwargs.update(override_kwargs)
1300
1301     # Try really hard to find a SSL certificate path
1302     if 'ca_certs' not in kwargs and kwargs.get('cert_reqs') != 'CERT_NONE':
1303         try:
1304             import certifi
1305         except ImportError:
1306             pass
1307         else:
1308             kwargs['ca_certs'] = certifi.where()
1309
1310     if proxy_server is not None:
1311         # `urllib3` requires a `str` object in both Python 2 and 3, while
1312         # `ConfigDict` coerces entries to `bytes` on Python 3. Compensate.
1313         if not isinstance(proxy_server, str):
1314             proxy_server = proxy_server.decode()
1315         manager = urllib3.ProxyManager(proxy_server, headers=headers,
1316                                        **kwargs)
1317     else:
1318         manager = urllib3.PoolManager(headers=headers, **kwargs)
1319
1320     return manager
1321
1322
1323 class HttpGitClient(GitClient):
1324
1325     def __init__(self, base_url, dumb=None, pool_manager=None, config=None,
1326                  username=None, password=None, **kwargs):
1327         self._base_url = base_url.rstrip("/") + "/"
1328         self._username = username
1329         self._password = password
1330         self.dumb = dumb
1331         self.headers = {}
1332
1333         if pool_manager is None:
1334             self.pool_manager = default_urllib3_manager(config)
1335         else:
1336             self.pool_manager = pool_manager
1337
1338         if username is not None:
1339             # No escaping needed: ":" is not allowed in username:
1340             # https://tools.ietf.org/html/rfc2617#section-2
1341             credentials = "%s:%s" % (username, password)
1342             basic_auth = urllib3.util.make_headers(basic_auth=credentials)
1343             self.pool_manager.headers.update(basic_auth)
1344
1345         GitClient.__init__(self, **kwargs)
1346
1347     def get_url(self, path):
1348         return self._get_url(path).rstrip("/")
1349
1350     @classmethod
1351     def from_parsedurl(cls, parsedurl, **kwargs):
1352         auth, host = urllib2.splituser(parsedurl.netloc)
1353         password = parsedurl.password
1354         if password is not None:
1355             password = urlunquote(password)
1356         username = parsedurl.username
1357         if username is not None:
1358             username = urlunquote(username)
1359         # TODO(jelmer): This also strips the username
1360         parsedurl = parsedurl._replace(netloc=host)
1361         return cls(urlparse.urlunparse(parsedurl),
1362                    password=password, username=username, **kwargs)
1363
1364     def __repr__(self):
1365         return "%s(%r, dumb=%r)" % (
1366             type(self).__name__, self._base_url, self.dumb)
1367
1368     def _get_url(self, path):
1369         if not isinstance(path, str):
1370             # TODO(jelmer): this is unrelated to the local filesystem;
1371             # This is not necessarily the right encoding to decode the path
1372             # with.
1373             path = path.decode(sys.getfilesystemencoding())
1374         return urlparse.urljoin(self._base_url, path).rstrip("/") + "/"
1375
1376     def _http_request(self, url, headers=None, data=None,
1377                       allow_compression=False):
1378         """Perform HTTP request.
1379
1380         :param url: Request URL.
1381         :param headers: Optional custom headers to override defaults.
1382         :param data: Request data.
1383         :param allow_compression: Allow GZipped communication.
1384         :return: Tuple (`response`, `read`), where response is an `urllib3`
1385             response object with additional `content_type` and
1386             `redirect_location` properties, and `read` is a consumable read
1387             method for the response data.
1388         """
1389         req_headers = self.pool_manager.headers.copy()
1390         if headers is not None:
1391             req_headers.update(headers)
1392         req_headers["Pragma"] = "no-cache"
1393         if allow_compression:
1394             req_headers["Accept-Encoding"] = "gzip"
1395         else:
1396             req_headers["Accept-Encoding"] = "identity"
1397
1398         if data is None:
1399             resp = self.pool_manager.request("GET", url, headers=req_headers)
1400         else:
1401             resp = self.pool_manager.request("POST", url, headers=req_headers,
1402                                              body=data)
1403
1404         if resp.status == 404:
1405             raise NotGitRepository()
1406         elif resp.status != 200:
1407             raise GitProtocolError("unexpected http resp %d for %s" %
1408                                    (resp.status, url))
1409
1410         # TODO: Optimization available by adding `preload_content=False` to the
1411         # request and just passing the `read` method on instead of going via
1412         # `BytesIO`, if we can guarantee that the entire response is consumed
1413         # before issuing the next to still allow for connection reuse from the
1414         # pool.
1415         if resp.getheader("Content-Encoding") == "gzip":
1416             read = gzip.GzipFile(fileobj=BytesIO(resp.data)).read
1417         else:
1418             read = BytesIO(resp.data).read
1419
1420         resp.content_type = resp.getheader("Content-Type")
1421         resp.redirect_location = resp.get_redirect_location()
1422
1423         return resp, read
1424
1425     def _discover_references(self, service, base_url):
1426         assert base_url[-1] == "/"
1427         tail = "info/refs"
1428         headers = {"Accept": "*/*"}
1429         if self.dumb is not False:
1430             tail += "?service=%s" % service.decode('ascii')
1431         url = urlparse.urljoin(base_url, tail)
1432         resp, read = self._http_request(url, headers, allow_compression=True)
1433
1434         if resp.redirect_location:
1435             # Something changed (redirect!), so let's update the base URL
1436             if not resp.redirect_location.endswith(tail):
1437                 raise GitProtocolError(
1438                         "Redirected from URL %s to URL %s without %s" % (
1439                             url, resp.redirect_location, tail))
1440             base_url = resp.redirect_location[:-len(tail)]
1441
1442         try:
1443             self.dumb = not resp.content_type.startswith("application/x-git-")
1444             if not self.dumb:
1445                 proto = Protocol(read, None)
1446                 # The first line should mention the service
1447                 try:
1448                     [pkt] = list(proto.read_pkt_seq())
1449                 except ValueError:
1450                     raise GitProtocolError(
1451                         "unexpected number of packets received")
1452                 if pkt.rstrip(b'\n') != (b'# service=' + service):
1453                     raise GitProtocolError(
1454                         "unexpected first line %r from smart server" % pkt)
1455                 return read_pkt_refs(proto) + (base_url, )
1456             else:
1457                 return read_info_refs(resp), set(), base_url
1458         finally:
1459             resp.close()
1460
1461     def _smart_request(self, service, url, data):
1462         assert url[-1] == "/"
1463         url = urlparse.urljoin(url, service)
1464         result_content_type = "application/x-%s-result" % service
1465         headers = {
1466             "Content-Type": "application/x-%s-request" % service,
1467             "Accept": result_content_type,
1468             "Content-Length": str(len(data)),
1469         }
1470         resp, read = self._http_request(url, headers, data)
1471         if resp.content_type != result_content_type:
1472             raise GitProtocolError("Invalid content-type from server: %s"
1473                                    % resp.content_type)
1474         return resp, read
1475
1476     def send_pack(self, path, update_refs, generate_pack_data,
1477                   progress=None):
1478         """Upload a pack to a remote repository.
1479
1480         :param path: Repository path (as bytestring)
1481         :param update_refs: Function to determine changes to remote refs.
1482             Receive dict with existing remote refs, returns dict with
1483             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
1484         :param generate_pack_data: Function that can return a tuple
1485             with number of elements and pack data to upload.
1486         :param progress: Optional progress function
1487
1488         :raises SendPackError: if server rejects the pack data
1489         :raises UpdateRefsError: if the server supports report-status
1490                                  and rejects ref updates
1491         :return: new_refs dictionary containing the changes that were made
1492             {refname: new_ref}, including deleted refs.
1493         """
1494         url = self._get_url(path)
1495         old_refs, server_capabilities, url = self._discover_references(
1496             b"git-receive-pack", url)
1497         negotiated_capabilities = self._negotiate_receive_pack_capabilities(
1498                 server_capabilities)
1499         negotiated_capabilities.add(capability_agent())
1500
1501         if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
1502             self._report_status_parser = ReportStatusParser()
1503
1504         new_refs = update_refs(dict(old_refs))
1505         if new_refs is None:
1506             # Determine wants function is aborting the push.
1507             return old_refs
1508         if self.dumb:
1509             raise NotImplementedError(self.fetch_pack)
1510         req_data = BytesIO()
1511         req_proto = Protocol(None, req_data.write)
1512         (have, want) = self._handle_receive_pack_head(
1513             req_proto, negotiated_capabilities, old_refs, new_refs)
1514         if not want and set(new_refs.items()).issubset(set(old_refs.items())):
1515             return new_refs
1516         pack_data_count, pack_data = generate_pack_data(
1517                 have, want,
1518                 ofs_delta=(CAPABILITY_OFS_DELTA in negotiated_capabilities))
1519         if pack_data_count:
1520             write_pack_data(req_proto.write_file(), pack_data_count, pack_data)
1521         resp, read = self._smart_request("git-receive-pack", url,
1522                                          data=req_data.getvalue())
1523         try:
1524             resp_proto = Protocol(read, None)
1525             self._handle_receive_pack_tail(
1526                 resp_proto, negotiated_capabilities, progress)
1527             return new_refs
1528         finally:
1529             resp.close()
1530
1531     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
1532                    progress=None):
1533         """Retrieve a pack from a git smart server.
1534
1535         :param determine_wants: Callback that returns list of commits to fetch
1536         :param graph_walker: Object with next() and ack().
1537         :param pack_data: Callback called for each bit of data in the pack
1538         :param progress: Callback for progress reports (strings)
1539         :return: FetchPackResult object
1540         """
1541         url = self._get_url(path)
1542         refs, server_capabilities, url = self._discover_references(
1543             b"git-upload-pack", url)
1544         negotiated_capabilities, symrefs, agent = (
1545                 self._negotiate_upload_pack_capabilities(
1546                         server_capabilities))
1547         wants = determine_wants(refs)
1548         if wants is not None:
1549             wants = [cid for cid in wants if cid != ZERO_SHA]
1550         if not wants:
1551             return FetchPackResult(refs, symrefs, agent)
1552         if self.dumb:
1553             raise NotImplementedError(self.send_pack)
1554         req_data = BytesIO()
1555         req_proto = Protocol(None, req_data.write)
1556         self._handle_upload_pack_head(
1557                 req_proto, negotiated_capabilities, graph_walker, wants,
1558                 lambda: False)
1559         resp, read = self._smart_request(
1560             "git-upload-pack", url, data=req_data.getvalue())
1561         try:
1562             resp_proto = Protocol(read, None)
1563             self._handle_upload_pack_tail(
1564                 resp_proto, negotiated_capabilities, graph_walker, pack_data,
1565                 progress)
1566             return FetchPackResult(refs, symrefs, agent)
1567         finally:
1568             resp.close()
1569
1570     def get_refs(self, path):
1571         """Retrieve the current refs from a git smart server."""
1572         url = self._get_url(path)
1573         refs, _, _ = self._discover_references(
1574             b"git-upload-pack", url)
1575         return refs
1576
1577
1578 def get_transport_and_path_from_url(url, config=None, **kwargs):
1579     """Obtain a git client from a URL.
1580
1581     :param url: URL to open (a unicode string)
1582     :param config: Optional config object
1583     :param thin_packs: Whether or not thin packs should be retrieved
1584     :param report_activity: Optional callback for reporting transport
1585         activity.
1586     :return: Tuple with client instance and relative path.
1587     """
1588     parsed = urlparse.urlparse(url)
1589     if parsed.scheme == 'git':
1590         return (TCPGitClient.from_parsedurl(parsed, **kwargs),
1591                 parsed.path)
1592     elif parsed.scheme in ('git+ssh', 'ssh'):
1593         return SSHGitClient.from_parsedurl(parsed, **kwargs), parsed.path
1594     elif parsed.scheme in ('http', 'https'):
1595         return HttpGitClient.from_parsedurl(
1596             parsed, config=config, **kwargs), parsed.path
1597     elif parsed.scheme == 'file':
1598         return default_local_git_client_cls.from_parsedurl(
1599             parsed, **kwargs), parsed.path
1600
1601     raise ValueError("unknown scheme '%s'" % parsed.scheme)
1602
1603
1604 def get_transport_and_path(location, **kwargs):
1605     """Obtain a git client from a URL.
1606
1607     :param location: URL or path (a string)
1608     :param config: Optional config object
1609     :param thin_packs: Whether or not thin packs should be retrieved
1610     :param report_activity: Optional callback for reporting transport
1611         activity.
1612     :return: Tuple with client instance and relative path.
1613     """
1614     # First, try to parse it as a URL
1615     try:
1616         return get_transport_and_path_from_url(location, **kwargs)
1617     except ValueError:
1618         pass
1619
1620     if (sys.platform == 'win32' and
1621             location[0].isalpha() and location[1:3] == ':\\'):
1622         # Windows local path
1623         return default_local_git_client_cls(**kwargs), location
1624
1625     if ':' in location and '@' not in location:
1626         # SSH with no user@, zero or one leading slash.
1627         (hostname, path) = location.split(':', 1)
1628         return SSHGitClient(hostname, **kwargs), path
1629     elif ':' in location:
1630         # SSH with user@host:foo.
1631         user_host, path = location.split(':', 1)
1632         if '@' in user_host:
1633             user, host = user_host.rsplit('@', 1)
1634         else:
1635             user = None
1636             host = user_host
1637         return SSHGitClient(host, username=user, **kwargs), path
1638
1639     # Otherwise, assume it's a local path.
1640     return default_local_git_client_cls(**kwargs), location