Apply some lib2to3.fixes.fix_idioms (exclued list(x).sort() -> sorted(x) changes.)
[jelmer/dulwich.git] / dulwich / client.py
1 # client.py -- Implementation of the server side git protocols
2 # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
3 # Copyright (C) 2008 John Carr
4 #
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # or (at your option) a later version of the License.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 # MA  02110-1301, USA.
19
20 """Client side support for the Git protocol.
21
22 The Dulwich client supports the following capabilities:
23
24  * thin-pack
25  * multi_ack_detailed
26  * multi_ack
27  * side-band-64k
28  * ofs-delta
29  * report-status
30  * delete-refs
31
32 Known capabilities that are not supported:
33
34  * shallow
35  * no-progress
36  * include-tag
37 """
38
39 __docformat__ = 'restructuredText'
40
41 from cStringIO import StringIO
42 import dulwich
43 import select
44 import socket
45 import subprocess
46 import urllib2
47 import urlparse
48
49 from dulwich.errors import (
50     GitProtocolError,
51     NotGitRepository,
52     SendPackError,
53     UpdateRefsError,
54     )
55 from dulwich.protocol import (
56     _RBUFSIZE,
57     PktLineParser,
58     Protocol,
59     ProtocolFile,
60     TCP_GIT_PORT,
61     ZERO_SHA,
62     extract_capabilities,
63     )
64 from dulwich.pack import (
65     write_pack_objects,
66     )
67 from dulwich.refs import (
68     read_info_refs,
69     )
70
71
72 # Python 2.6.6 included these in urlparse.uses_netloc upstream. Do
73 # monkeypatching to enable similar behaviour in earlier Pythons:
74 for scheme in ('git', 'git+ssh'):
75     if scheme not in urlparse.uses_netloc:
76         urlparse.uses_netloc.append(scheme)
77
78 def _fileno_can_read(fileno):
79     """Check if a file descriptor is readable."""
80     return len(select.select([fileno], [], [], 0)[0]) > 0
81
82 COMMON_CAPABILITIES = ['ofs-delta', 'side-band-64k']
83 FETCH_CAPABILITIES = ['thin-pack', 'multi_ack', 'multi_ack_detailed'] + COMMON_CAPABILITIES
84 SEND_CAPABILITIES = ['report-status'] + COMMON_CAPABILITIES
85
86
87 class ReportStatusParser(object):
88     """Handle status as reported by servers with the 'report-status' capability.
89     """
90
91     def __init__(self):
92         self._done = False
93         self._pack_status = None
94         self._ref_status_ok = True
95         self._ref_statuses = []
96
97     def check(self):
98         """Check if there were any errors and, if so, raise exceptions.
99
100         :raise SendPackError: Raised when the server could not unpack
101         :raise UpdateRefsError: Raised when refs could not be updated
102         """
103         if self._pack_status not in ('unpack ok', None):
104             raise SendPackError(self._pack_status)
105         if not self._ref_status_ok:
106             ref_status = {}
107             ok = set()
108             for status in self._ref_statuses:
109                 if ' ' not in status:
110                     # malformed response, move on to the next one
111                     continue
112                 status, ref = status.split(' ', 1)
113
114                 if status == 'ng':
115                     if ' ' in ref:
116                         ref, status = ref.split(' ', 1)
117                 else:
118                     ok.add(ref)
119                 ref_status[ref] = status
120             raise UpdateRefsError('%s failed to update' %
121                                   ', '.join([ref for ref in ref_status
122                                              if ref not in ok]),
123                                   ref_status=ref_status)
124
125     def handle_packet(self, pkt):
126         """Handle a packet.
127
128         :raise GitProtocolError: Raised when packets are received after a
129             flush packet.
130         """
131         if self._done:
132             raise GitProtocolError("received more data after status report")
133         if pkt is None:
134             self._done = True
135             return
136         if self._pack_status is None:
137             self._pack_status = pkt.strip()
138         else:
139             ref_status = pkt.strip()
140             self._ref_statuses.append(ref_status)
141             if not ref_status.startswith('ok '):
142                 self._ref_status_ok = False
143
144
145 def read_pkt_refs(proto):
146     server_capabilities = None
147     refs = {}
148     # Receive refs from server
149     for pkt in proto.read_pkt_seq():
150         (sha, ref) = pkt.rstrip('\n').split(None, 1)
151         if sha == 'ERR':
152             raise GitProtocolError(ref)
153         if server_capabilities is None:
154             (ref, server_capabilities) = extract_capabilities(ref)
155         refs[ref] = sha
156
157     if len(refs) == 0:
158         return None, set([])
159     return refs, set(server_capabilities)
160
161
162 # TODO(durin42): this doesn't correctly degrade if the server doesn't
163 # support some capabilities. This should work properly with servers
164 # that don't support multi_ack.
165 class GitClient(object):
166     """Git smart server client.
167
168     """
169
170     def __init__(self, thin_packs=True, report_activity=None):
171         """Create a new GitClient instance.
172
173         :param thin_packs: Whether or not thin packs should be retrieved
174         :param report_activity: Optional callback for reporting transport
175             activity.
176         """
177         self._report_activity = report_activity
178         self._report_status_parser = None
179         self._fetch_capabilities = set(FETCH_CAPABILITIES)
180         self._send_capabilities = set(SEND_CAPABILITIES)
181         if not thin_packs:
182             self._fetch_capabilities.remove('thin-pack')
183
184     def send_pack(self, path, determine_wants, generate_pack_contents,
185                   progress=None):
186         """Upload a pack to a remote repository.
187
188         :param path: Repository path
189         :param generate_pack_contents: Function that can return a sequence of the
190             shas of the objects to upload.
191         :param progress: Optional progress function
192
193         :raises SendPackError: if server rejects the pack data
194         :raises UpdateRefsError: if the server supports report-status
195                                  and rejects ref updates
196         """
197         raise NotImplementedError(self.send_pack)
198
199     def fetch(self, path, target, determine_wants=None, progress=None):
200         """Fetch into a target repository.
201
202         :param path: Path to fetch from
203         :param target: Target repository to fetch into
204         :param determine_wants: Optional function to determine what refs
205             to fetch
206         :param progress: Optional progress function
207         :return: remote refs as dictionary
208         """
209         if determine_wants is None:
210             determine_wants = target.object_store.determine_wants_all
211         f, commit, abort = target.object_store.add_pack()
212         try:
213             result = self.fetch_pack(path, determine_wants,
214                     target.get_graph_walker(), f.write, progress)
215         except:
216             abort()
217             raise
218         else:
219             commit()
220         return result
221
222     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
223                    progress=None):
224         """Retrieve a pack from a git smart server.
225
226         :param determine_wants: Callback that returns list of commits to fetch
227         :param graph_walker: Object with next() and ack().
228         :param pack_data: Callback called for each bit of data in the pack
229         :param progress: Callback for progress reports (strings)
230         """
231         raise NotImplementedError(self.fetch_pack)
232
233     def _parse_status_report(self, proto):
234         unpack = proto.read_pkt_line().strip()
235         if unpack != 'unpack ok':
236             st = True
237             # flush remaining error data
238             while st is not None:
239                 st = proto.read_pkt_line()
240             raise SendPackError(unpack)
241         statuses = []
242         errs = False
243         ref_status = proto.read_pkt_line()
244         while ref_status:
245             ref_status = ref_status.strip()
246             statuses.append(ref_status)
247             if not ref_status.startswith('ok '):
248                 errs = True
249             ref_status = proto.read_pkt_line()
250
251         if errs:
252             ref_status = {}
253             ok = set()
254             for status in statuses:
255                 if ' ' not in status:
256                     # malformed response, move on to the next one
257                     continue
258                 status, ref = status.split(' ', 1)
259
260                 if status == 'ng':
261                     if ' ' in ref:
262                         ref, status = ref.split(' ', 1)
263                 else:
264                     ok.add(ref)
265                 ref_status[ref] = status
266             raise UpdateRefsError('%s failed to update' %
267                                   ', '.join([ref for ref in ref_status
268                                              if ref not in ok]),
269                                   ref_status=ref_status)
270
271     def _read_side_band64k_data(self, proto, channel_callbacks):
272         """Read per-channel data.
273
274         This requires the side-band-64k capability.
275
276         :param proto: Protocol object to read from
277         :param channel_callbacks: Dictionary mapping channels to packet
278             handlers to use. None for a callback discards channel data.
279         """
280         for pkt in proto.read_pkt_seq():
281             channel = ord(pkt[0])
282             pkt = pkt[1:]
283             try:
284                 cb = channel_callbacks[channel]
285             except KeyError:
286                 raise AssertionError('Invalid sideband channel %d' % channel)
287             else:
288                 if cb is not None:
289                     cb(pkt)
290
291     def _handle_receive_pack_head(self, proto, capabilities, old_refs, new_refs):
292         """Handle the head of a 'git-receive-pack' request.
293
294         :param proto: Protocol object to read from
295         :param capabilities: List of negotiated capabilities
296         :param old_refs: Old refs, as received from the server
297         :param new_refs: New refs
298         :return: (have, want) tuple
299         """
300         want = []
301         have = [x for x in old_refs.values() if not x == ZERO_SHA]
302         sent_capabilities = False
303
304         for refname in set(new_refs.keys() + old_refs.keys()):
305             old_sha1 = old_refs.get(refname, ZERO_SHA)
306             new_sha1 = new_refs.get(refname, ZERO_SHA)
307
308             if old_sha1 != new_sha1:
309                 if sent_capabilities:
310                     proto.write_pkt_line('%s %s %s' % (old_sha1, new_sha1,
311                                                             refname))
312                 else:
313                     proto.write_pkt_line(
314                       '%s %s %s\0%s' % (old_sha1, new_sha1, refname,
315                                         ' '.join(capabilities)))
316                     sent_capabilities = True
317             if new_sha1 not in have and new_sha1 != ZERO_SHA:
318                 want.append(new_sha1)
319         proto.write_pkt_line(None)
320         return (have, want)
321
322     def _handle_receive_pack_tail(self, proto, capabilities, progress=None):
323         """Handle the tail of a 'git-receive-pack' request.
324
325         :param proto: Protocol object to read from
326         :param capabilities: List of negotiated capabilities
327         :param progress: Optional progress reporting function
328         """
329         if "side-band-64k" in capabilities:
330             if progress is None:
331                 progress = lambda x: None
332             channel_callbacks = { 2: progress }
333             if 'report-status' in capabilities:
334                 channel_callbacks[1] = PktLineParser(
335                     self._report_status_parser.handle_packet).parse
336             self._read_side_band64k_data(proto, channel_callbacks)
337         else:
338             if 'report-status' in capabilities:
339                 for pkt in proto.read_pkt_seq():
340                     self._report_status_parser.handle_packet(pkt)
341         if self._report_status_parser is not None:
342             self._report_status_parser.check()
343         # wait for EOF before returning
344         data = proto.read()
345         if data:
346             raise SendPackError('Unexpected response %r' % data)
347
348     def _handle_upload_pack_head(self, proto, capabilities, graph_walker,
349                                  wants, can_read):
350         """Handle the head of a 'git-upload-pack' request.
351
352         :param proto: Protocol object to read from
353         :param capabilities: List of negotiated capabilities
354         :param graph_walker: GraphWalker instance to call .ack() on
355         :param wants: List of commits to fetch
356         :param can_read: function that returns a boolean that indicates
357             whether there is extra graph data to read on proto
358         """
359         assert isinstance(wants, list) and isinstance(wants[0], str)
360         proto.write_pkt_line('want %s %s\n' % (
361             wants[0], ' '.join(capabilities)))
362         for want in wants[1:]:
363             proto.write_pkt_line('want %s\n' % want)
364         proto.write_pkt_line(None)
365         have = graph_walker.next()
366         while have:
367             proto.write_pkt_line('have %s\n' % have)
368             if can_read():
369                 pkt = proto.read_pkt_line()
370                 parts = pkt.rstrip('\n').split(' ')
371                 if parts[0] == 'ACK':
372                     graph_walker.ack(parts[1])
373                     if parts[2] in ('continue', 'common'):
374                         pass
375                     elif parts[2] == 'ready':
376                         break
377                     else:
378                         raise AssertionError(
379                             "%s not in ('continue', 'ready', 'common)" %
380                             parts[2])
381             have = graph_walker.next()
382         proto.write_pkt_line('done\n')
383
384     def _handle_upload_pack_tail(self, proto, capabilities, graph_walker,
385                                  pack_data, progress=None, rbufsize=_RBUFSIZE):
386         """Handle the tail of a 'git-upload-pack' request.
387
388         :param proto: Protocol object to read from
389         :param capabilities: List of negotiated capabilities
390         :param graph_walker: GraphWalker instance to call .ack() on
391         :param pack_data: Function to call with pack data
392         :param progress: Optional progress reporting function
393         :param rbufsize: Read buffer size
394         """
395         pkt = proto.read_pkt_line()
396         while pkt:
397             parts = pkt.rstrip('\n').split(' ')
398             if parts[0] == 'ACK':
399                 graph_walker.ack(pkt.split(' ')[1])
400             if len(parts) < 3 or parts[2] not in (
401                     'ready', 'continue', 'common'):
402                 break
403             pkt = proto.read_pkt_line()
404         if "side-band-64k" in capabilities:
405             if progress is None:
406                 # Just ignore progress data
407                 progress = lambda x: None
408             self._read_side_band64k_data(proto, {1: pack_data, 2: progress})
409             # wait for EOF before returning
410             data = proto.read()
411             if data:
412                 raise Exception('Unexpected response %r' % data)
413         else:
414             while True:
415                 data = proto.read(rbufsize)
416                 if data == "":
417                     break
418                 pack_data(data)
419
420
421 class TraditionalGitClient(GitClient):
422     """Traditional Git client."""
423
424     def _connect(self, cmd, path):
425         """Create a connection to the server.
426
427         This method is abstract - concrete implementations should
428         implement their own variant which connects to the server and
429         returns an initialized Protocol object with the service ready
430         for use and a can_read function which may be used to see if
431         reads would block.
432
433         :param cmd: The git service name to which we should connect.
434         :param path: The path we should pass to the service.
435         """
436         raise NotImplementedError()
437
438     def send_pack(self, path, determine_wants, generate_pack_contents,
439                   progress=None):
440         """Upload a pack to a remote repository.
441
442         :param path: Repository path
443         :param generate_pack_contents: Function that can return a sequence of the
444             shas of the objects to upload.
445         :param progress: Optional callback called with progress updates
446
447         :raises SendPackError: if server rejects the pack data
448         :raises UpdateRefsError: if the server supports report-status
449                                  and rejects ref updates
450         """
451         proto, unused_can_read = self._connect('receive-pack', path)
452         old_refs, server_capabilities = read_pkt_refs(proto)
453         negotiated_capabilities = self._send_capabilities & server_capabilities
454
455         if 'report-status' in negotiated_capabilities:
456             self._report_status_parser = ReportStatusParser()
457         report_status_parser = self._report_status_parser
458
459         try:
460             new_refs = orig_new_refs = determine_wants(dict(old_refs))
461         except:
462             proto.write_pkt_line(None)
463             raise
464
465         if not 'delete-refs' in server_capabilities:
466             # Server does not support deletions. Fail later.
467             def remove_del(pair):
468                 if pair[1] == ZERO_SHA:
469                     if 'report-status' in negotiated_capabilities:
470                         report_status_parser._ref_statuses.append(
471                             'ng %s remote does not support deleting refs'
472                             % pair[1])
473                         report_status_parser._ref_status_ok = False
474                     return False
475                 else:
476                     return True
477
478             new_refs = dict(
479                 filter(
480                     remove_del,
481                     [(ref, sha) for ref, sha in new_refs.iteritems()]))
482
483         if new_refs is None:
484             proto.write_pkt_line(None)
485             return old_refs
486
487         if len(new_refs) == 0 and len(orig_new_refs):
488             # NOOP - Original new refs filtered out by policy
489             proto.write_pkt_line(None)
490             if self._report_status_parser is not None:
491                 self._report_status_parser.check()
492             return old_refs
493
494         (have, want) = self._handle_receive_pack_head(proto,
495             negotiated_capabilities, old_refs, new_refs)
496         if not want and old_refs == new_refs:
497             return new_refs
498         objects = generate_pack_contents(have, want)
499         if len(objects) > 0:
500             entries, sha = write_pack_objects(proto.write_file(), objects)
501         elif len(set(new_refs.values()) - set([ZERO_SHA])) > 0:
502             # Check for valid create/update refs
503             filtered_new_refs = \
504                 dict([(ref, sha) for ref, sha in new_refs.iteritems()
505                      if sha != ZERO_SHA])
506             if len(set(filtered_new_refs.iteritems()) -
507                     set(old_refs.iteritems())) > 0:
508                 entries, sha = write_pack_objects(proto.write_file(), objects)
509
510         self._handle_receive_pack_tail(proto, negotiated_capabilities,
511             progress)
512         return new_refs
513
514     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
515                    progress=None):
516         """Retrieve a pack from a git smart server.
517
518         :param determine_wants: Callback that returns list of commits to fetch
519         :param graph_walker: Object with next() and ack().
520         :param pack_data: Callback called for each bit of data in the pack
521         :param progress: Callback for progress reports (strings)
522         """
523         proto, can_read = self._connect('upload-pack', path)
524         refs, server_capabilities = read_pkt_refs(proto)
525         negotiated_capabilities = self._fetch_capabilities & server_capabilities
526
527         if refs is None:
528             proto.write_pkt_line(None)
529             return refs
530
531         try:
532             wants = determine_wants(refs)
533         except:
534             proto.write_pkt_line(None)
535             raise
536         if wants is not None:
537             wants = [cid for cid in wants if cid != ZERO_SHA]
538         if not wants:
539             proto.write_pkt_line(None)
540             return refs
541         self._handle_upload_pack_head(proto, negotiated_capabilities,
542             graph_walker, wants, can_read)
543         self._handle_upload_pack_tail(proto, negotiated_capabilities,
544             graph_walker, pack_data, progress)
545         return refs
546
547     def archive(self, path, committish, write_data, progress=None):
548         proto, can_read = self._connect('upload-archive', path)
549         proto.write_pkt_line("argument %s" % committish)
550         proto.write_pkt_line(None)
551         pkt = proto.read_pkt_line()
552         if pkt == "NACK\n":
553             return
554         elif pkt == "ACK\n":
555             pass
556         elif pkt.startswith("ERR "):
557             raise GitProtocolError(pkt[4:].rstrip("\n"))
558         else:
559             raise AssertionError("invalid response %r" % pkt)
560         ret = proto.read_pkt_line()
561         if ret is not None:
562             raise AssertionError("expected pkt tail")
563         self._read_side_band64k_data(proto, {1: write_data, 2: progress})
564
565
566 class TCPGitClient(TraditionalGitClient):
567     """A Git Client that works over TCP directly (i.e. git://)."""
568
569     def __init__(self, host, port=None, *args, **kwargs):
570         if port is None:
571             port = TCP_GIT_PORT
572         self._host = host
573         self._port = port
574         TraditionalGitClient.__init__(self, *args, **kwargs)
575
576     def _connect(self, cmd, path):
577         sockaddrs = socket.getaddrinfo(self._host, self._port,
578             socket.AF_UNSPEC, socket.SOCK_STREAM)
579         s = None
580         err = socket.error("no address found for %s" % self._host)
581         for (family, socktype, proto, canonname, sockaddr) in sockaddrs:
582             s = socket.socket(family, socktype, proto)
583             s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
584             try:
585                 s.connect(sockaddr)
586                 break
587             except socket.error as err:
588                 if s is not None:
589                     s.close()
590                 s = None
591         if s is None:
592             raise err
593         # -1 means system default buffering
594         rfile = s.makefile('rb', -1)
595         # 0 means unbuffered
596         wfile = s.makefile('wb', 0)
597         proto = Protocol(rfile.read, wfile.write,
598                          report_activity=self._report_activity)
599         if path.startswith("/~"):
600             path = path[1:]
601         proto.send_cmd('git-%s' % cmd, path, 'host=%s' % self._host)
602         return proto, lambda: _fileno_can_read(s)
603
604
605 class SubprocessWrapper(object):
606     """A socket-like object that talks to a subprocess via pipes."""
607
608     def __init__(self, proc):
609         self.proc = proc
610         self.read = proc.stdout.read
611         self.write = proc.stdin.write
612
613     def can_read(self):
614         if subprocess.mswindows:
615             from msvcrt import get_osfhandle
616             from win32pipe import PeekNamedPipe
617             handle = get_osfhandle(self.proc.stdout.fileno())
618             return PeekNamedPipe(handle, 0)[2] != 0
619         else:
620             return _fileno_can_read(self.proc.stdout.fileno())
621
622     def close(self):
623         self.proc.stdin.close()
624         self.proc.stdout.close()
625         self.proc.wait()
626
627
628 class SubprocessGitClient(TraditionalGitClient):
629     """Git client that talks to a server using a subprocess."""
630
631     def __init__(self, *args, **kwargs):
632         self._connection = None
633         self._stderr = None
634         self._stderr = kwargs.get('stderr')
635         if 'stderr' in kwargs:
636             del kwargs['stderr']
637         TraditionalGitClient.__init__(self, *args, **kwargs)
638
639     def _connect(self, service, path):
640         import subprocess
641         argv = ['git', service, path]
642         p = SubprocessWrapper(
643             subprocess.Popen(argv, bufsize=0, stdin=subprocess.PIPE,
644                              stdout=subprocess.PIPE,
645                              stderr=self._stderr))
646         return Protocol(p.read, p.write,
647                         report_activity=self._report_activity), p.can_read
648
649
650 class LocalGitClient(GitClient):
651     """Git Client that just uses a local Repo."""
652
653     def __init__(self, thin_packs=True, report_activity=None):
654         """Create a new LocalGitClient instance.
655
656         :param path: Path to the local repository
657         :param thin_packs: Whether or not thin packs should be retrieved
658         :param report_activity: Optional callback for reporting transport
659             activity.
660         """
661         self._report_activity = report_activity
662         # Ignore the thin_packs argument
663
664     def send_pack(self, path, determine_wants, generate_pack_contents,
665                   progress=None):
666         """Upload a pack to a remote repository.
667
668         :param path: Repository path
669         :param generate_pack_contents: Function that can return a sequence of the
670             shas of the objects to upload.
671         :param progress: Optional progress function
672
673         :raises SendPackError: if server rejects the pack data
674         :raises UpdateRefsError: if the server supports report-status
675                                  and rejects ref updates
676         """
677         raise NotImplementedError(self.send_pack)
678
679     def fetch(self, path, target, determine_wants=None, progress=None):
680         """Fetch into a target repository.
681
682         :param path: Path to fetch from
683         :param target: Target repository to fetch into
684         :param determine_wants: Optional function to determine what refs
685             to fetch
686         :param progress: Optional progress function
687         :return: remote refs as dictionary
688         """
689         from dulwich.repo import Repo
690         r = Repo(path)
691         return r.fetch(target, determine_wants=determine_wants, progress=progress)
692
693     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
694                    progress=None):
695         """Retrieve a pack from a git smart server.
696
697         :param determine_wants: Callback that returns list of commits to fetch
698         :param graph_walker: Object with next() and ack().
699         :param pack_data: Callback called for each bit of data in the pack
700         :param progress: Callback for progress reports (strings)
701         """
702         from dulwich.repo import Repo
703         r = Repo(path)
704         objects_iter = r.fetch_objects(determine_wants, graph_walker, progress)
705
706         # Did the process short-circuit (e.g. in a stateless RPC call)? Note
707         # that the client still expects a 0-object pack in most cases.
708         if objects_iter is None:
709             return
710         write_pack_objects(ProtocolFile(None, pack_data), objects_iter)
711
712
713 # What Git client to use for local access
714 default_local_git_client_cls = SubprocessGitClient
715
716 class SSHVendor(object):
717     """A client side SSH implementation."""
718
719     def connect_ssh(self, host, command, username=None, port=None):
720         import warnings
721         warnings.warn(
722             "SSHVendor.connect_ssh has been renamed to SSHVendor.run_command",
723             DeprecationWarning)
724         return self.run_command(host, command, username=username, port=port)
725
726     def run_command(self, host, command, username=None, port=None):
727         """Connect to an SSH server.
728
729         Run a command remotely and return a file-like object for interaction
730         with the remote command.
731
732         :param host: Host name
733         :param command: Command to run
734         :param username: Optional ame of user to log in as
735         :param port: Optional SSH port to use
736         """
737         raise NotImplementedError(self.run_command)
738
739
740 class SubprocessSSHVendor(SSHVendor):
741     """SSH vendor that shells out to the local 'ssh' command."""
742
743     def run_command(self, host, command, username=None, port=None):
744         import subprocess
745         #FIXME: This has no way to deal with passwords..
746         args = ['ssh', '-x']
747         if port is not None:
748             args.extend(['-p', str(port)])
749         if username is not None:
750             host = '%s@%s' % (username, host)
751         args.append(host)
752         proc = subprocess.Popen(args + command,
753                                 stdin=subprocess.PIPE,
754                                 stdout=subprocess.PIPE)
755         return SubprocessWrapper(proc)
756
757
758 try:
759     import paramiko
760 except ImportError:
761     pass
762 else:
763     import threading
764
765     class ParamikoWrapper(object):
766         STDERR_READ_N = 2048  # 2k
767
768         def __init__(self, client, channel, progress_stderr=None):
769             self.client = client
770             self.channel = channel
771             self.progress_stderr = progress_stderr
772             self.should_monitor = bool(progress_stderr) or True
773             self.monitor_thread = None
774             self.stderr = ''
775
776             # Channel must block
777             self.channel.setblocking(True)
778
779             # Start
780             if self.should_monitor:
781                 self.monitor_thread = threading.Thread(target=self.monitor_stderr)
782                 self.monitor_thread.start()
783
784         def monitor_stderr(self):
785             while self.should_monitor:
786                 # Block and read
787                 data = self.read_stderr(self.STDERR_READ_N)
788
789                 # Socket closed
790                 if not data:
791                     self.should_monitor = False
792                     break
793
794                 # Emit data
795                 if self.progress_stderr:
796                     self.progress_stderr(data)
797
798                 # Append to buffer
799                 self.stderr += data
800
801         def stop_monitoring(self):
802             # Stop StdErr thread
803             if self.should_monitor:
804                 self.should_monitor = False
805                 self.monitor_thread.join()
806
807                 # Get left over data
808                 data = self.channel.in_stderr_buffer.empty()
809                 self.stderr += data
810
811         def can_read(self):
812             return self.channel.recv_ready()
813
814         def write(self, data):
815             return self.channel.sendall(data)
816
817         def read_stderr(self, n):
818             return self.channel.recv_stderr(n)
819
820         def read(self, n=None):
821             data = self.channel.recv(n)
822             data_len = len(data)
823
824             # Closed socket
825             if not data:
826                 return
827
828             # Read more if needed
829             if n and data_len < n:
830                 diff_len = n - data_len
831                 return data + self.read(diff_len)
832             return data
833
834         def close(self):
835             self.channel.close()
836             self.stop_monitoring()
837
838         def __del__(self):
839             self.close()
840
841     class ParamikoSSHVendor(object):
842
843         def __init__(self):
844             self.ssh_kwargs = {}
845
846         def run_command(self, host, command, username=None, port=None,
847                 progress_stderr=None):
848
849             # Paramiko needs an explicit port. None is not valid
850             if port is None:
851                 port = 22
852
853             client = paramiko.SSHClient()
854
855             policy = paramiko.client.MissingHostKeyPolicy()
856             client.set_missing_host_key_policy(policy)
857             client.connect(host, username=username, port=port,
858                            **self.ssh_kwargs)
859
860             # Open SSH session
861             channel = client.get_transport().open_session()
862
863             # Run commands
864             channel.exec_command(*command)
865
866             return ParamikoWrapper(client, channel,
867                     progress_stderr=progress_stderr)
868
869
870 # Can be overridden by users
871 get_ssh_vendor = SubprocessSSHVendor
872
873
874 class SSHGitClient(TraditionalGitClient):
875
876     def __init__(self, host, port=None, username=None, *args, **kwargs):
877         self.host = host
878         self.port = port
879         self.username = username
880         TraditionalGitClient.__init__(self, *args, **kwargs)
881         self.alternative_paths = {}
882
883     def _get_cmd_path(self, cmd):
884         return self.alternative_paths.get(cmd, 'git-%s' % cmd)
885
886     def _connect(self, cmd, path):
887         if path.startswith("/~"):
888             path = path[1:]
889         con = get_ssh_vendor().run_command(
890             self.host, ["%s '%s'" % (self._get_cmd_path(cmd), path)],
891             port=self.port, username=self.username)
892         return (Protocol(con.read, con.write, report_activity=self._report_activity),
893                 con.can_read)
894
895
896 def default_user_agent_string():
897     return "dulwich/%s" % ".".join([str(x) for x in dulwich.__version__])
898
899
900 def default_urllib2_opener(config):
901     if config is not None:
902         proxy_server = config.get("http", "proxy")
903     else:
904         proxy_server = None
905     handlers = []
906     if proxy_server is not None:
907         handlers.append(urllib2.ProxyHandler({"http" : proxy_server}))
908     opener = urllib2.build_opener(*handlers)
909     if config is not None:
910         user_agent = config.get("http", "useragent")
911     else:
912         user_agent = None
913     if user_agent is None:
914         user_agent = default_user_agent_string()
915     opener.addheaders = [('User-agent', user_agent)]
916     return opener
917
918
919 class HttpGitClient(GitClient):
920
921     def __init__(self, base_url, dumb=None, opener=None, config=None, *args, **kwargs):
922         self.base_url = base_url.rstrip("/") + "/"
923         self.dumb = dumb
924         if opener is None:
925             self.opener = default_urllib2_opener(config)
926         else:
927             self.opener = opener
928         GitClient.__init__(self, *args, **kwargs)
929
930     def _get_url(self, path):
931         return urlparse.urljoin(self.base_url, path).rstrip("/") + "/"
932
933     def _http_request(self, url, headers={}, data=None):
934         req = urllib2.Request(url, headers=headers, data=data)
935         try:
936             resp = self.opener.open(req)
937         except urllib2.HTTPError as e:
938             if e.code == 404:
939                 raise NotGitRepository()
940             if e.code != 200:
941                 raise GitProtocolError("unexpected http response %d" % e.code)
942         return resp
943
944     def _discover_references(self, service, url):
945         assert url[-1] == "/"
946         url = urlparse.urljoin(url, "info/refs")
947         headers = {}
948         if self.dumb != False:
949             url += "?service=%s" % service
950             headers["Content-Type"] = "application/x-%s-request" % service
951         resp = self._http_request(url, headers)
952         self.dumb = (not resp.info().gettype().startswith("application/x-git-"))
953         if not self.dumb:
954             proto = Protocol(resp.read, None)
955             # The first line should mention the service
956             pkts = list(proto.read_pkt_seq())
957             if pkts != [('# service=%s\n' % service)]:
958                 raise GitProtocolError(
959                     "unexpected first line %r from smart server" % pkts)
960             return read_pkt_refs(proto)
961         else:
962             return read_info_refs(resp), set()
963
964     def _smart_request(self, service, url, data):
965         assert url[-1] == "/"
966         url = urlparse.urljoin(url, service)
967         headers = {"Content-Type": "application/x-%s-request" % service}
968         resp = self._http_request(url, headers, data)
969         if resp.info().gettype() != ("application/x-%s-result" % service):
970             raise GitProtocolError("Invalid content-type from server: %s"
971                 % resp.info().gettype())
972         return resp
973
974     def send_pack(self, path, determine_wants, generate_pack_contents,
975                   progress=None):
976         """Upload a pack to a remote repository.
977
978         :param path: Repository path
979         :param generate_pack_contents: Function that can return a sequence of the
980             shas of the objects to upload.
981         :param progress: Optional progress function
982
983         :raises SendPackError: if server rejects the pack data
984         :raises UpdateRefsError: if the server supports report-status
985                                  and rejects ref updates
986         """
987         url = self._get_url(path)
988         old_refs, server_capabilities = self._discover_references(
989             "git-receive-pack", url)
990         negotiated_capabilities = self._send_capabilities & server_capabilities
991
992         if 'report-status' in negotiated_capabilities:
993             self._report_status_parser = ReportStatusParser()
994
995         new_refs = determine_wants(dict(old_refs))
996         if new_refs is None:
997             return old_refs
998         if self.dumb:
999             raise NotImplementedError(self.fetch_pack)
1000         req_data = StringIO()
1001         req_proto = Protocol(None, req_data.write)
1002         (have, want) = self._handle_receive_pack_head(
1003             req_proto, negotiated_capabilities, old_refs, new_refs)
1004         if not want and old_refs == new_refs:
1005             return new_refs
1006         objects = generate_pack_contents(have, want)
1007         if len(objects) > 0:
1008             entries, sha = write_pack_objects(req_proto.write_file(), objects)
1009         resp = self._smart_request("git-receive-pack", url,
1010             data=req_data.getvalue())
1011         resp_proto = Protocol(resp.read, None)
1012         self._handle_receive_pack_tail(resp_proto, negotiated_capabilities,
1013             progress)
1014         return new_refs
1015
1016     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
1017                    progress=None):
1018         """Retrieve a pack from a git smart server.
1019
1020         :param determine_wants: Callback that returns list of commits to fetch
1021         :param graph_walker: Object with next() and ack().
1022         :param pack_data: Callback called for each bit of data in the pack
1023         :param progress: Callback for progress reports (strings)
1024         :return: Dictionary with the refs of the remote repository
1025         """
1026         url = self._get_url(path)
1027         refs, server_capabilities = self._discover_references(
1028             "git-upload-pack", url)
1029         negotiated_capabilities = self._fetch_capabilities & server_capabilities
1030         wants = determine_wants(refs)
1031         if wants is not None:
1032             wants = [cid for cid in wants if cid != ZERO_SHA]
1033         if not wants:
1034             return refs
1035         if self.dumb:
1036             raise NotImplementedError(self.send_pack)
1037         req_data = StringIO()
1038         req_proto = Protocol(None, req_data.write)
1039         self._handle_upload_pack_head(req_proto,
1040             negotiated_capabilities, graph_walker, wants,
1041             lambda: False)
1042         resp = self._smart_request("git-upload-pack", url,
1043             data=req_data.getvalue())
1044         resp_proto = Protocol(resp.read, None)
1045         self._handle_upload_pack_tail(resp_proto, negotiated_capabilities,
1046             graph_walker, pack_data, progress)
1047         return refs
1048
1049
1050 def get_transport_and_path_from_url(url, config=None, **kwargs):
1051     """Obtain a git client from a URL.
1052
1053     :param url: URL to open
1054     :param config: Optional config object
1055     :param thin_packs: Whether or not thin packs should be retrieved
1056     :param report_activity: Optional callback for reporting transport
1057         activity.
1058     :return: Tuple with client instance and relative path.
1059     """
1060     parsed = urlparse.urlparse(url)
1061     if parsed.scheme == 'git':
1062         return (TCPGitClient(parsed.hostname, port=parsed.port, **kwargs),
1063                 parsed.path)
1064     elif parsed.scheme == 'git+ssh':
1065         path = parsed.path
1066         if path.startswith('/'):
1067             path = parsed.path[1:]
1068         return SSHGitClient(parsed.hostname, port=parsed.port,
1069                             username=parsed.username, **kwargs), path
1070     elif parsed.scheme in ('http', 'https'):
1071         return HttpGitClient(urlparse.urlunparse(parsed), config=config,
1072                 **kwargs), parsed.path
1073     elif parsed.scheme == 'file':
1074         return default_local_git_client_cls(**kwargs), parsed.path
1075
1076     raise ValueError("unknown scheme '%s'" % parsed.scheme)
1077
1078
1079 def get_transport_and_path(location, **kwargs):
1080     """Obtain a git client from a URL.
1081
1082     :param location: URL or path
1083     :param config: Optional config object
1084     :param thin_packs: Whether or not thin packs should be retrieved
1085     :param report_activity: Optional callback for reporting transport
1086         activity.
1087     :return: Tuple with client instance and relative path.
1088     """
1089     # First, try to parse it as a URL
1090     try:
1091         return get_transport_and_path_from_url(location, **kwargs)
1092     except ValueError:
1093         pass
1094
1095     if ':' in location and not '@' in location:
1096         # SSH with no user@, zero or one leading slash.
1097         (hostname, path) = location.split(':')
1098         return SSHGitClient(hostname, **kwargs), path
1099     elif '@' in location and ':' in location:
1100         # SSH with user@host:foo.
1101         user_host, path = location.split(':')
1102         user, host = user_host.rsplit('@')
1103         return SSHGitClient(host, username=user, **kwargs), path
1104
1105     # Otherwise, assume it's a local path.
1106     return default_local_git_client_cls(**kwargs), location