Switched `default_local_git_client_cls` to `LocalGitClient`.
[jelmer/dulwich.git] / dulwich / client.py
1 # client.py -- Implementation of the server side git protocols
2 # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
3 # Copyright (C) 2008 John Carr
4 #
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # or (at your option) a later version of the License.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 # MA  02110-1301, USA.
19
20 """Client side support for the Git protocol.
21
22 The Dulwich client supports the following capabilities:
23
24  * thin-pack
25  * multi_ack_detailed
26  * multi_ack
27  * side-band-64k
28  * ofs-delta
29  * report-status
30  * delete-refs
31
32 Known capabilities that are not supported:
33
34  * shallow
35  * no-progress
36  * include-tag
37 """
38
39 __docformat__ = 'restructuredText'
40
41 from contextlib import closing
42 from io import BytesIO, BufferedReader
43 import dulwich
44 import select
45 import socket
46 import subprocess
47 import sys
48
49 try:
50     import urllib2
51     import urlparse
52 except ImportError:
53     import urllib.request as urllib2
54     import urllib.parse as urlparse
55
56 from dulwich.errors import (
57     GitProtocolError,
58     NotGitRepository,
59     SendPackError,
60     UpdateRefsError,
61     )
62 from dulwich.protocol import (
63     _RBUFSIZE,
64     CAPABILITY_DELETE_REFS,
65     CAPABILITY_MULTI_ACK,
66     CAPABILITY_MULTI_ACK_DETAILED,
67     CAPABILITY_OFS_DELTA,
68     CAPABILITY_REPORT_STATUS,
69     CAPABILITY_SIDE_BAND_64K,
70     CAPABILITY_THIN_PACK,
71     COMMAND_DONE,
72     COMMAND_HAVE,
73     COMMAND_WANT,
74     SIDE_BAND_CHANNEL_DATA,
75     SIDE_BAND_CHANNEL_PROGRESS,
76     SIDE_BAND_CHANNEL_FATAL,
77     PktLineParser,
78     Protocol,
79     ProtocolFile,
80     TCP_GIT_PORT,
81     ZERO_SHA,
82     extract_capabilities,
83     )
84 from dulwich.pack import (
85     write_pack_objects,
86     )
87 from dulwich.refs import (
88     read_info_refs,
89     )
90
91
92 def _fileno_can_read(fileno):
93     """Check if a file descriptor is readable."""
94     return len(select.select([fileno], [], [], 0)[0]) > 0
95
96 COMMON_CAPABILITIES = [CAPABILITY_OFS_DELTA, CAPABILITY_SIDE_BAND_64K]
97 FETCH_CAPABILITIES = ([CAPABILITY_THIN_PACK, CAPABILITY_MULTI_ACK,
98                        CAPABILITY_MULTI_ACK_DETAILED] +
99                       COMMON_CAPABILITIES)
100 SEND_CAPABILITIES = [CAPABILITY_REPORT_STATUS] + COMMON_CAPABILITIES
101
102
103 class ReportStatusParser(object):
104     """Handle status as reported by servers with 'report-status' capability.
105     """
106
107     def __init__(self):
108         self._done = False
109         self._pack_status = None
110         self._ref_status_ok = True
111         self._ref_statuses = []
112
113     def check(self):
114         """Check if there were any errors and, if so, raise exceptions.
115
116         :raise SendPackError: Raised when the server could not unpack
117         :raise UpdateRefsError: Raised when refs could not be updated
118         """
119         if self._pack_status not in (b'unpack ok', None):
120             raise SendPackError(self._pack_status)
121         if not self._ref_status_ok:
122             ref_status = {}
123             ok = set()
124             for status in self._ref_statuses:
125                 if b' ' not in status:
126                     # malformed response, move on to the next one
127                     continue
128                 status, ref = status.split(b' ', 1)
129
130                 if status == b'ng':
131                     if b' ' in ref:
132                         ref, status = ref.split(b' ', 1)
133                 else:
134                     ok.add(ref)
135                 ref_status[ref] = status
136             # TODO(jelmer): don't assume encoding of refs is ascii.
137             raise UpdateRefsError(', '.join([
138                 ref.decode('ascii') for ref in ref_status if ref not in ok]) +
139                 ' failed to update', ref_status=ref_status)
140
141     def handle_packet(self, pkt):
142         """Handle a packet.
143
144         :raise GitProtocolError: Raised when packets are received after a
145             flush packet.
146         """
147         if self._done:
148             raise GitProtocolError("received more data after status report")
149         if pkt is None:
150             self._done = True
151             return
152         if self._pack_status is None:
153             self._pack_status = pkt.strip()
154         else:
155             ref_status = pkt.strip()
156             self._ref_statuses.append(ref_status)
157             if not ref_status.startswith(b'ok '):
158                 self._ref_status_ok = False
159
160
161 def read_pkt_refs(proto):
162     server_capabilities = None
163     refs = {}
164     # Receive refs from server
165     for pkt in proto.read_pkt_seq():
166         (sha, ref) = pkt.rstrip(b'\n').split(None, 1)
167         if sha == b'ERR':
168             raise GitProtocolError(ref)
169         if server_capabilities is None:
170             (ref, server_capabilities) = extract_capabilities(ref)
171         refs[ref] = sha
172
173     if len(refs) == 0:
174         return None, set([])
175     return refs, set(server_capabilities)
176
177
178 # TODO(durin42): this doesn't correctly degrade if the server doesn't
179 # support some capabilities. This should work properly with servers
180 # that don't support multi_ack.
181 class GitClient(object):
182     """Git smart server client.
183
184     """
185
186     def __init__(self, thin_packs=True, report_activity=None):
187         """Create a new GitClient instance.
188
189         :param thin_packs: Whether or not thin packs should be retrieved
190         :param report_activity: Optional callback for reporting transport
191             activity.
192         """
193         self._report_activity = report_activity
194         self._report_status_parser = None
195         self._fetch_capabilities = set(FETCH_CAPABILITIES)
196         self._send_capabilities = set(SEND_CAPABILITIES)
197         if not thin_packs:
198             self._fetch_capabilities.remove(CAPABILITY_THIN_PACK)
199
200     def send_pack(self, path, determine_wants, generate_pack_contents,
201                   progress=None, write_pack=write_pack_objects):
202         """Upload a pack to a remote repository.
203
204         :param path: Repository path
205         :param generate_pack_contents: Function that can return a sequence of
206             the shas of the objects to upload.
207         :param progress: Optional progress function
208         :param write_pack: Function called with (file, iterable of objects) to
209             write the objects returned by generate_pack_contents to the server.
210
211         :raises SendPackError: if server rejects the pack data
212         :raises UpdateRefsError: if the server supports report-status
213                                  and rejects ref updates
214         """
215         raise NotImplementedError(self.send_pack)
216
217     def fetch(self, path, target, determine_wants=None, progress=None):
218         """Fetch into a target repository.
219
220         :param path: Path to fetch from
221         :param target: Target repository to fetch into
222         :param determine_wants: Optional function to determine what refs
223             to fetch
224         :param progress: Optional progress function
225         :return: remote refs as dictionary
226         """
227         if determine_wants is None:
228             determine_wants = target.object_store.determine_wants_all
229         f, commit, abort = target.object_store.add_pack()
230         try:
231             result = self.fetch_pack(
232                 path, determine_wants, target.get_graph_walker(), f.write,
233                 progress)
234         except:
235             abort()
236             raise
237         else:
238             commit()
239         return result
240
241     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
242                    progress=None):
243         """Retrieve a pack from a git smart server.
244
245         :param determine_wants: Callback that returns list of commits to fetch
246         :param graph_walker: Object with next() and ack().
247         :param pack_data: Callback called for each bit of data in the pack
248         :param progress: Callback for progress reports (strings)
249         """
250         raise NotImplementedError(self.fetch_pack)
251
252     def _parse_status_report(self, proto):
253         unpack = proto.read_pkt_line().strip()
254         if unpack != b'unpack ok':
255             st = True
256             # flush remaining error data
257             while st is not None:
258                 st = proto.read_pkt_line()
259             raise SendPackError(unpack)
260         statuses = []
261         errs = False
262         ref_status = proto.read_pkt_line()
263         while ref_status:
264             ref_status = ref_status.strip()
265             statuses.append(ref_status)
266             if not ref_status.startswith(b'ok '):
267                 errs = True
268             ref_status = proto.read_pkt_line()
269
270         if errs:
271             ref_status = {}
272             ok = set()
273             for status in statuses:
274                 if b' ' not in status:
275                     # malformed response, move on to the next one
276                     continue
277                 status, ref = status.split(b' ', 1)
278
279                 if status == b'ng':
280                     if b' ' in ref:
281                         ref, status = ref.split(b' ', 1)
282                 else:
283                     ok.add(ref)
284                 ref_status[ref] = status
285             raise UpdateRefsError(', '.join([ref for ref in ref_status
286                                              if ref not in ok]) +
287                                              b' failed to update',
288                                   ref_status=ref_status)
289
290     def _read_side_band64k_data(self, proto, channel_callbacks):
291         """Read per-channel data.
292
293         This requires the side-band-64k capability.
294
295         :param proto: Protocol object to read from
296         :param channel_callbacks: Dictionary mapping channels to packet
297             handlers to use. None for a callback discards channel data.
298         """
299         for pkt in proto.read_pkt_seq():
300             channel = ord(pkt[:1])
301             pkt = pkt[1:]
302             try:
303                 cb = channel_callbacks[channel]
304             except KeyError:
305                 raise AssertionError('Invalid sideband channel %d' % channel)
306             else:
307                 if cb is not None:
308                     cb(pkt)
309
310     def _handle_receive_pack_head(self, proto, capabilities, old_refs,
311                                   new_refs):
312         """Handle the head of a 'git-receive-pack' request.
313
314         :param proto: Protocol object to read from
315         :param capabilities: List of negotiated capabilities
316         :param old_refs: Old refs, as received from the server
317         :param new_refs: New refs
318         :return: (have, want) tuple
319         """
320         want = []
321         have = [x for x in old_refs.values() if not x == ZERO_SHA]
322         sent_capabilities = False
323
324         all_refs = set(new_refs.keys()).union(set(old_refs.keys()))
325         for refname in all_refs:
326             old_sha1 = old_refs.get(refname, ZERO_SHA)
327             new_sha1 = new_refs.get(refname, ZERO_SHA)
328
329             if old_sha1 != new_sha1:
330                 if sent_capabilities:
331                     proto.write_pkt_line(old_sha1 + b' ' + new_sha1 + b' ' + refname)
332                 else:
333                     proto.write_pkt_line(
334                         old_sha1 + b' ' + new_sha1 + b' ' + refname + b'\0' +
335                         b' '.join(capabilities))
336                     sent_capabilities = True
337             if new_sha1 not in have and new_sha1 != ZERO_SHA:
338                 want.append(new_sha1)
339         proto.write_pkt_line(None)
340         return (have, want)
341
342     def _handle_receive_pack_tail(self, proto, capabilities, progress=None):
343         """Handle the tail of a 'git-receive-pack' request.
344
345         :param proto: Protocol object to read from
346         :param capabilities: List of negotiated capabilities
347         :param progress: Optional progress reporting function
348         """
349         if b"side-band-64k" in capabilities:
350             if progress is None:
351                 progress = lambda x: None
352             channel_callbacks = {2: progress}
353             if CAPABILITY_REPORT_STATUS in capabilities:
354                 channel_callbacks[1] = PktLineParser(
355                     self._report_status_parser.handle_packet).parse
356             self._read_side_band64k_data(proto, channel_callbacks)
357         else:
358             if CAPABILITY_REPORT_STATUS in capabilities:
359                 for pkt in proto.read_pkt_seq():
360                     self._report_status_parser.handle_packet(pkt)
361         if self._report_status_parser is not None:
362             self._report_status_parser.check()
363
364     def _handle_upload_pack_head(self, proto, capabilities, graph_walker,
365                                  wants, can_read):
366         """Handle the head of a 'git-upload-pack' request.
367
368         :param proto: Protocol object to read from
369         :param capabilities: List of negotiated capabilities
370         :param graph_walker: GraphWalker instance to call .ack() on
371         :param wants: List of commits to fetch
372         :param can_read: function that returns a boolean that indicates
373             whether there is extra graph data to read on proto
374         """
375         assert isinstance(wants, list) and isinstance(wants[0], bytes)
376         proto.write_pkt_line(COMMAND_WANT + b' ' + wants[0] + b' ' + b' '.join(capabilities) + b'\n')
377         for want in wants[1:]:
378             proto.write_pkt_line(COMMAND_WANT + b' ' + want + b'\n')
379         proto.write_pkt_line(None)
380         have = next(graph_walker)
381         while have:
382             proto.write_pkt_line(COMMAND_HAVE + b' ' + have + b'\n')
383             if can_read():
384                 pkt = proto.read_pkt_line()
385                 parts = pkt.rstrip(b'\n').split(b' ')
386                 if parts[0] == b'ACK':
387                     graph_walker.ack(parts[1])
388                     if parts[2] in (b'continue', b'common'):
389                         pass
390                     elif parts[2] == b'ready':
391                         break
392                     else:
393                         raise AssertionError(
394                             "%s not in ('continue', 'ready', 'common)" %
395                             parts[2])
396             have = next(graph_walker)
397         proto.write_pkt_line(COMMAND_DONE + b'\n')
398
399     def _handle_upload_pack_tail(self, proto, capabilities, graph_walker,
400                                  pack_data, progress=None, rbufsize=_RBUFSIZE):
401         """Handle the tail of a 'git-upload-pack' request.
402
403         :param proto: Protocol object to read from
404         :param capabilities: List of negotiated capabilities
405         :param graph_walker: GraphWalker instance to call .ack() on
406         :param pack_data: Function to call with pack data
407         :param progress: Optional progress reporting function
408         :param rbufsize: Read buffer size
409         """
410         pkt = proto.read_pkt_line()
411         while pkt:
412             parts = pkt.rstrip(b'\n').split(b' ')
413             if parts[0] == b'ACK':
414                 graph_walker.ack(parts[1])
415             if len(parts) < 3 or parts[2] not in (
416                     b'ready', b'continue', b'common'):
417                 break
418             pkt = proto.read_pkt_line()
419         if CAPABILITY_SIDE_BAND_64K in capabilities:
420             if progress is None:
421                 # Just ignore progress data
422                 progress = lambda x: None
423             self._read_side_band64k_data(proto, {
424                 SIDE_BAND_CHANNEL_DATA: pack_data,
425                 SIDE_BAND_CHANNEL_PROGRESS: progress}
426             )
427         else:
428             while True:
429                 data = proto.read(rbufsize)
430                 if data == b"":
431                     break
432                 pack_data(data)
433
434
435 class TraditionalGitClient(GitClient):
436     """Traditional Git client."""
437
438     def _connect(self, cmd, path):
439         """Create a connection to the server.
440
441         This method is abstract - concrete implementations should
442         implement their own variant which connects to the server and
443         returns an initialized Protocol object with the service ready
444         for use and a can_read function which may be used to see if
445         reads would block.
446
447         :param cmd: The git service name to which we should connect.
448         :param path: The path we should pass to the service.
449         """
450         raise NotImplementedError()
451
452     def send_pack(self, path, determine_wants, generate_pack_contents,
453                   progress=None, write_pack=write_pack_objects):
454         """Upload a pack to a remote repository.
455
456         :param path: Repository path
457         :param generate_pack_contents: Function that can return a sequence of
458             the shas of the objects to upload.
459         :param progress: Optional callback called with progress updates
460         :param write_pack: Function called with (file, iterable of objects) to
461             write the objects returned by generate_pack_contents to the server.
462
463         :raises SendPackError: if server rejects the pack data
464         :raises UpdateRefsError: if the server supports report-status
465                                  and rejects ref updates
466         """
467         proto, unused_can_read = self._connect(b'receive-pack', path)
468         with proto:
469             old_refs, server_capabilities = read_pkt_refs(proto)
470             negotiated_capabilities = self._send_capabilities & server_capabilities
471
472             if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
473                 self._report_status_parser = ReportStatusParser()
474             report_status_parser = self._report_status_parser
475
476             try:
477                 new_refs = orig_new_refs = determine_wants(dict(old_refs))
478             except:
479                 proto.write_pkt_line(None)
480                 raise
481
482             if not CAPABILITY_DELETE_REFS in server_capabilities:
483                 # Server does not support deletions. Fail later.
484                 new_refs = dict(orig_new_refs)
485                 for ref, sha in orig_new_refs.items():
486                     if sha == ZERO_SHA:
487                         if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
488                             report_status_parser._ref_statuses.append(
489                                 b'ng ' + sha + b' remote does not support deleting refs')
490                             report_status_parser._ref_status_ok = False
491                         del new_refs[ref]
492
493             if new_refs is None:
494                 proto.write_pkt_line(None)
495                 return old_refs
496
497             if len(new_refs) == 0 and len(orig_new_refs):
498                 # NOOP - Original new refs filtered out by policy
499                 proto.write_pkt_line(None)
500                 if report_status_parser is not None:
501                     report_status_parser.check()
502                 return old_refs
503
504             (have, want) = self._handle_receive_pack_head(
505                 proto, negotiated_capabilities, old_refs, new_refs)
506             if not want and old_refs == new_refs:
507                 return new_refs
508             objects = generate_pack_contents(have, want)
509
510             dowrite = len(objects) > 0
511             dowrite = dowrite or any(old_refs.get(ref) != sha
512                                      for (ref, sha) in new_refs.items()
513                                      if sha != ZERO_SHA)
514             if dowrite:
515                 write_pack(proto.write_file(), objects)
516
517             self._handle_receive_pack_tail(
518                 proto, negotiated_capabilities, progress)
519             return new_refs
520
521     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
522                    progress=None):
523         """Retrieve a pack from a git smart server.
524
525         :param determine_wants: Callback that returns list of commits to fetch
526         :param graph_walker: Object with next() and ack().
527         :param pack_data: Callback called for each bit of data in the pack
528         :param progress: Callback for progress reports (strings)
529         """
530         proto, can_read = self._connect(b'upload-pack', path)
531         with proto:
532             refs, server_capabilities = read_pkt_refs(proto)
533             negotiated_capabilities = (
534                 self._fetch_capabilities & server_capabilities)
535
536             if refs is None:
537                 proto.write_pkt_line(None)
538                 return refs
539
540             try:
541                 wants = determine_wants(refs)
542             except:
543                 proto.write_pkt_line(None)
544                 raise
545             if wants is not None:
546                 wants = [cid for cid in wants if cid != ZERO_SHA]
547             if not wants:
548                 proto.write_pkt_line(None)
549                 return refs
550             self._handle_upload_pack_head(
551                 proto, negotiated_capabilities, graph_walker, wants, can_read)
552             self._handle_upload_pack_tail(
553                 proto, negotiated_capabilities, graph_walker, pack_data, progress)
554             return refs
555
556     def archive(self, path, committish, write_data, progress=None,
557                 write_error=None):
558         proto, can_read = self._connect(b'upload-archive', path)
559         with proto:
560             proto.write_pkt_line(b"argument " + committish)
561             proto.write_pkt_line(None)
562             pkt = proto.read_pkt_line()
563             if pkt == b"NACK\n":
564                 return
565             elif pkt == b"ACK\n":
566                 pass
567             elif pkt.startswith(b"ERR "):
568                 raise GitProtocolError(pkt[4:].rstrip(b"\n"))
569             else:
570                 raise AssertionError("invalid response %r" % pkt)
571             ret = proto.read_pkt_line()
572             if ret is not None:
573                 raise AssertionError("expected pkt tail")
574             self._read_side_band64k_data(proto, {
575                 SIDE_BAND_CHANNEL_DATA: write_data,
576                 SIDE_BAND_CHANNEL_PROGRESS: progress,
577                 SIDE_BAND_CHANNEL_FATAL: write_error})
578
579
580 class TCPGitClient(TraditionalGitClient):
581     """A Git Client that works over TCP directly (i.e. git://)."""
582
583     def __init__(self, host, port=None, *args, **kwargs):
584         if port is None:
585             port = TCP_GIT_PORT
586         self._host = host
587         self._port = port
588         TraditionalGitClient.__init__(self, *args, **kwargs)
589
590     def _connect(self, cmd, path):
591         sockaddrs = socket.getaddrinfo(
592             self._host, self._port, socket.AF_UNSPEC, socket.SOCK_STREAM)
593         s = None
594         err = socket.error("no address found for %s" % self._host)
595         for (family, socktype, proto, canonname, sockaddr) in sockaddrs:
596             s = socket.socket(family, socktype, proto)
597             s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
598             try:
599                 s.connect(sockaddr)
600                 break
601             except socket.error as err:
602                 if s is not None:
603                     s.close()
604                 s = None
605         if s is None:
606             raise err
607         # -1 means system default buffering
608         rfile = s.makefile('rb', -1)
609         # 0 means unbuffered
610         wfile = s.makefile('wb', 0)
611         def close():
612             rfile.close()
613             wfile.close()
614             s.close()
615
616         proto = Protocol(rfile.read, wfile.write, close,
617                          report_activity=self._report_activity)
618         if path.startswith(b"/~"):
619             path = path[1:]
620         proto.send_cmd(b'git-' + cmd, path, b'host=' + self._host)
621         return proto, lambda: _fileno_can_read(s)
622
623
624 class SubprocessWrapper(object):
625     """A socket-like object that talks to a subprocess via pipes."""
626
627     def __init__(self, proc):
628         self.proc = proc
629         if sys.version_info[0] == 2:
630             self.read = proc.stdout.read
631         else:
632             self.read = BufferedReader(proc.stdout).read
633         self.write = proc.stdin.write
634
635     def can_read(self):
636         if subprocess.mswindows:
637             from msvcrt import get_osfhandle
638             from win32pipe import PeekNamedPipe
639             handle = get_osfhandle(self.proc.stdout.fileno())
640             data, total_bytes_avail, msg_bytes_left = PeekNamedPipe(handle, 0)
641             return total_bytes_avail != 0
642         else:
643             return _fileno_can_read(self.proc.stdout.fileno())
644
645     def close(self):
646         self.proc.stdin.close()
647         self.proc.stdout.close()
648         if self.proc.stderr:
649             self.proc.stderr.close()
650         self.proc.wait()
651
652
653 def find_git_command():
654     """Find command to run for system Git (usually C Git).
655     """
656     if sys.platform == 'win32': # support .exe, .bat and .cmd
657         try: # to avoid overhead
658             import win32api
659         except ImportError: # run through cmd.exe with some overhead
660             return ['cmd', '/c', 'git']
661         else:
662             status, git = win32api.FindExecutable('git')
663             return [git]
664     else:
665         return ['git']
666
667
668 class SubprocessGitClient(TraditionalGitClient):
669     """Git client that talks to a server using a subprocess."""
670
671     def __init__(self, *args, **kwargs):
672         self._connection = None
673         self._stderr = None
674         self._stderr = kwargs.get('stderr')
675         if 'stderr' in kwargs:
676             del kwargs['stderr']
677         TraditionalGitClient.__init__(self, *args, **kwargs)
678
679     git_command = None
680
681     def _connect(self, service, path):
682         import subprocess
683         if self.git_command is None:
684             git_command = find_git_command()
685         argv = git_command + [service, path]
686         argv = ['git', service.decode('ascii'), path]
687         p = SubprocessWrapper(
688             subprocess.Popen(argv, bufsize=0, stdin=subprocess.PIPE,
689                              stdout=subprocess.PIPE,
690                              stderr=self._stderr))
691         return Protocol(p.read, p.write, p.close,
692                         report_activity=self._report_activity), p.can_read
693
694
695 class LocalGitClient(GitClient):
696     """Git Client that just uses a local Repo."""
697
698     def __init__(self, thin_packs=True, report_activity=None):
699         """Create a new LocalGitClient instance.
700
701         :param path: Path to the local repository
702         :param thin_packs: Whether or not thin packs should be retrieved
703         :param report_activity: Optional callback for reporting transport
704             activity.
705         """
706         self._report_activity = report_activity
707         # Ignore the thin_packs argument
708
709     def send_pack(self, path, determine_wants, generate_pack_contents,
710                   progress=None, write_pack=write_pack_objects):
711         """Upload a pack to a remote repository.
712
713         :param path: Repository path
714         :param generate_pack_contents: Function that can return a sequence of
715             the shas of the objects to upload.
716         :param progress: Optional progress function
717         :param write_pack: Function called with (file, iterable of objects) to
718             write the objects returned by generate_pack_contents to the server.
719
720         :raises SendPackError: if server rejects the pack data
721         :raises UpdateRefsError: if the server supports report-status
722                                  and rejects ref updates
723         """
724         from dulwich.repo import Repo
725
726         with closing(Repo(path)) as target:
727             old_refs = target.get_refs()
728             new_refs = determine_wants(dict(old_refs))
729
730             have = [sha1 for sha1 in old_refs.values() if sha1 != ZERO_SHA]
731             want = []
732             all_refs = set(new_refs.keys()).union(set(old_refs.keys()))
733             for refname in all_refs:
734                 old_sha1 = old_refs.get(refname, ZERO_SHA)
735                 new_sha1 = new_refs.get(refname, ZERO_SHA)
736                 if new_sha1 not in have and new_sha1 != ZERO_SHA:
737                     want.append(new_sha1)
738
739             if not want and old_refs == new_refs:
740                 return new_refs
741
742             target.object_store.add_objects(generate_pack_contents(have, want))
743
744             for name, sha in new_refs.items():
745                 target.refs[name] = sha
746
747         return new_refs
748
749     def fetch(self, path, target, determine_wants=None, progress=None):
750         """Fetch into a target repository.
751
752         :param path: Path to fetch from
753         :param target: Target repository to fetch into
754         :param determine_wants: Optional function to determine what refs
755             to fetch
756         :param progress: Optional progress function
757         :return: remote refs as dictionary
758         """
759         from dulwich.repo import Repo
760         with closing(Repo(path)) as r:
761             return r.fetch(target, determine_wants=determine_wants,
762                            progress=progress)
763
764     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
765                    progress=None):
766         """Retrieve a pack from a git smart server.
767
768         :param determine_wants: Callback that returns list of commits to fetch
769         :param graph_walker: Object with next() and ack().
770         :param pack_data: Callback called for each bit of data in the pack
771         :param progress: Callback for progress reports (strings)
772         """
773         from dulwich.repo import Repo
774         with closing(Repo(path)) as r:
775             objects_iter = r.fetch_objects(determine_wants, graph_walker, progress)
776
777             # Did the process short-circuit (e.g. in a stateless RPC call)? Note
778             # that the client still expects a 0-object pack in most cases.
779             if objects_iter is None:
780                 return
781             write_pack_objects(ProtocolFile(None, pack_data), objects_iter)
782
783
784 # What Git client to use for local access
785 default_local_git_client_cls = LocalGitClient
786
787
788 class SSHVendor(object):
789     """A client side SSH implementation."""
790
791     def connect_ssh(self, host, command, username=None, port=None):
792         import warnings
793         warnings.warn(
794             "SSHVendor.connect_ssh has been renamed to SSHVendor.run_command",
795             DeprecationWarning)
796         return self.run_command(host, command, username=username, port=port)
797
798     def run_command(self, host, command, username=None, port=None):
799         """Connect to an SSH server.
800
801         Run a command remotely and return a file-like object for interaction
802         with the remote command.
803
804         :param host: Host name
805         :param command: Command to run
806         :param username: Optional ame of user to log in as
807         :param port: Optional SSH port to use
808         """
809         raise NotImplementedError(self.run_command)
810
811
812 class SubprocessSSHVendor(SSHVendor):
813     """SSH vendor that shells out to the local 'ssh' command."""
814
815     def run_command(self, host, command, username=None, port=None):
816         import subprocess
817         #FIXME: This has no way to deal with passwords..
818         args = ['ssh', '-x']
819         if port is not None:
820             args.extend(['-p', str(port)])
821         if username is not None:
822             host = '%s@%s' % (username, host)
823         args.append(host)
824         proc = subprocess.Popen(args + command,
825                                 stdin=subprocess.PIPE,
826                                 stdout=subprocess.PIPE)
827         return SubprocessWrapper(proc)
828
829
830 try:
831     import paramiko
832 except ImportError:
833     pass
834 else:
835     import threading
836
837     class ParamikoWrapper(object):
838         STDERR_READ_N = 2048  # 2k
839
840         def __init__(self, client, channel, progress_stderr=None):
841             self.client = client
842             self.channel = channel
843             self.progress_stderr = progress_stderr
844             self.should_monitor = bool(progress_stderr) or True
845             self.monitor_thread = None
846             self.stderr = ''
847
848             # Channel must block
849             self.channel.setblocking(True)
850
851             # Start
852             if self.should_monitor:
853                 self.monitor_thread = threading.Thread(
854                     target=self.monitor_stderr)
855                 self.monitor_thread.start()
856
857         def monitor_stderr(self):
858             while self.should_monitor:
859                 # Block and read
860                 data = self.read_stderr(self.STDERR_READ_N)
861
862                 # Socket closed
863                 if not data:
864                     self.should_monitor = False
865                     break
866
867                 # Emit data
868                 if self.progress_stderr:
869                     self.progress_stderr(data)
870
871                 # Append to buffer
872                 self.stderr += data
873
874         def stop_monitoring(self):
875             # Stop StdErr thread
876             if self.should_monitor:
877                 self.should_monitor = False
878                 self.monitor_thread.join()
879
880                 # Get left over data
881                 data = self.channel.in_stderr_buffer.empty()
882                 self.stderr += data
883
884         def can_read(self):
885             return self.channel.recv_ready()
886
887         def write(self, data):
888             return self.channel.sendall(data)
889
890         def read_stderr(self, n):
891             return self.channel.recv_stderr(n)
892
893         def read(self, n=None):
894             data = self.channel.recv(n)
895             data_len = len(data)
896
897             # Closed socket
898             if not data:
899                 return
900
901             # Read more if needed
902             if n and data_len < n:
903                 diff_len = n - data_len
904                 return data + self.read(diff_len)
905             return data
906
907         def close(self):
908             self.channel.close()
909             self.stop_monitoring()
910
911     class ParamikoSSHVendor(object):
912
913         def __init__(self):
914             self.ssh_kwargs = {}
915
916         def run_command(self, host, command, username=None, port=None,
917                         progress_stderr=None):
918
919             # Paramiko needs an explicit port. None is not valid
920             if port is None:
921                 port = 22
922
923             client = paramiko.SSHClient()
924
925             policy = paramiko.client.MissingHostKeyPolicy()
926             client.set_missing_host_key_policy(policy)
927             client.connect(host, username=username, port=port,
928                            **self.ssh_kwargs)
929
930             # Open SSH session
931             channel = client.get_transport().open_session()
932
933             # Run commands
934             channel.exec_command(*command)
935
936             return ParamikoWrapper(
937                 client, channel, progress_stderr=progress_stderr)
938
939
940 # Can be overridden by users
941 get_ssh_vendor = SubprocessSSHVendor
942
943
944 class SSHGitClient(TraditionalGitClient):
945
946     def __init__(self, host, port=None, username=None, *args, **kwargs):
947         self.host = host
948         self.port = port
949         self.username = username
950         TraditionalGitClient.__init__(self, *args, **kwargs)
951         self.alternative_paths = {}
952
953     def _get_cmd_path(self, cmd):
954         cmd = cmd.decode('ascii')
955         return self.alternative_paths.get(cmd, 'git-' + cmd)
956
957     def _connect(self, cmd, path):
958         if path.startswith("/~"):
959             path = path[1:]
960         con = get_ssh_vendor().run_command(
961             self.host, [self._get_cmd_path(cmd), path],
962             port=self.port, username=self.username)
963         return (Protocol(con.read, con.write, con.close,
964                          report_activity=self._report_activity),
965                 con.can_read)
966
967
968 def default_user_agent_string():
969     return "dulwich/%s" % ".".join([str(x) for x in dulwich.__version__])
970
971
972 def default_urllib2_opener(config):
973     if config is not None:
974         proxy_server = config.get("http", "proxy")
975     else:
976         proxy_server = None
977     handlers = []
978     if proxy_server is not None:
979         handlers.append(urllib2.ProxyHandler({"http": proxy_server}))
980     opener = urllib2.build_opener(*handlers)
981     if config is not None:
982         user_agent = config.get("http", "useragent")
983     else:
984         user_agent = None
985     if user_agent is None:
986         user_agent = default_user_agent_string()
987     opener.addheaders = [('User-agent', user_agent)]
988     return opener
989
990
991 class HttpGitClient(GitClient):
992
993     def __init__(self, base_url, dumb=None, opener=None, config=None, *args,
994                  **kwargs):
995         self.base_url = base_url.rstrip("/") + "/"
996         self.dumb = dumb
997         if opener is None:
998             self.opener = default_urllib2_opener(config)
999         else:
1000             self.opener = opener
1001         GitClient.__init__(self, *args, **kwargs)
1002
1003     def __repr__(self):
1004         return "%s(%r, dumb=%r)" % (type(self).__name__, self.base_url, self.dumb)
1005
1006     def _get_url(self, path):
1007         return urlparse.urljoin(self.base_url, path).rstrip("/") + "/"
1008
1009     def _http_request(self, url, headers={}, data=None):
1010         req = urllib2.Request(url, headers=headers, data=data)
1011         try:
1012             resp = self.opener.open(req)
1013         except urllib2.HTTPError as e:
1014             if e.code == 404:
1015                 raise NotGitRepository()
1016             if e.code != 200:
1017                 raise GitProtocolError("unexpected http response %d" % e.code)
1018         return resp
1019
1020     def _discover_references(self, service, url):
1021         assert url[-1] == "/"
1022         url = urlparse.urljoin(url, "info/refs")
1023         headers = {}
1024         if self.dumb is not False:
1025             url += "?service=%s" % service
1026             headers["Content-Type"] = "application/x-%s-request" % service
1027         resp = self._http_request(url, headers)
1028         try:
1029             self.dumb = (not resp.info().gettype().startswith("application/x-git-"))
1030             if not self.dumb:
1031                 proto = Protocol(resp.read, None)
1032                 # The first line should mention the service
1033                 pkts = list(proto.read_pkt_seq())
1034                 if pkts != [('# service=%s\n' % service)]:
1035                     raise GitProtocolError(
1036                         "unexpected first line %r from smart server" % pkts)
1037                 return read_pkt_refs(proto)
1038             else:
1039                 return read_info_refs(resp), set()
1040         finally:
1041             resp.close()
1042
1043     def _smart_request(self, service, url, data):
1044         assert url[-1] == "/"
1045         url = urlparse.urljoin(url, service)
1046         headers = {"Content-Type": "application/x-%s-request" % service}
1047         resp = self._http_request(url, headers, data)
1048         if resp.info().gettype() != ("application/x-%s-result" % service):
1049             raise GitProtocolError("Invalid content-type from server: %s"
1050                 % resp.info().gettype())
1051         return resp
1052
1053     def send_pack(self, path, determine_wants, generate_pack_contents,
1054                   progress=None, write_pack=write_pack_objects):
1055         """Upload a pack to a remote repository.
1056
1057         :param path: Repository path
1058         :param generate_pack_contents: Function that can return a sequence of
1059             the shas of the objects to upload.
1060         :param progress: Optional progress function
1061         :param write_pack: Function called with (file, iterable of objects) to
1062             write the objects returned by generate_pack_contents to the server.
1063
1064         :raises SendPackError: if server rejects the pack data
1065         :raises UpdateRefsError: if the server supports report-status
1066                                  and rejects ref updates
1067         """
1068         url = self._get_url(path)
1069         old_refs, server_capabilities = self._discover_references(
1070             b"git-receive-pack", url)
1071         negotiated_capabilities = self._send_capabilities & server_capabilities
1072
1073         if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
1074             self._report_status_parser = ReportStatusParser()
1075
1076         new_refs = determine_wants(dict(old_refs))
1077         if new_refs is None:
1078             return old_refs
1079         if self.dumb:
1080             raise NotImplementedError(self.fetch_pack)
1081         req_data = BytesIO()
1082         req_proto = Protocol(None, req_data.write)
1083         (have, want) = self._handle_receive_pack_head(
1084             req_proto, negotiated_capabilities, old_refs, new_refs)
1085         if not want and old_refs == new_refs:
1086             return new_refs
1087         objects = generate_pack_contents(have, want)
1088         if len(objects) > 0:
1089             write_pack(req_proto.write_file(), objects)
1090         resp = self._smart_request(b"git-receive-pack", url,
1091                                    data=req_data.getvalue())
1092         try:
1093             resp_proto = Protocol(resp.read, None)
1094             self._handle_receive_pack_tail(resp_proto, negotiated_capabilities,
1095                 progress)
1096             return new_refs
1097         finally:
1098             resp.close()
1099
1100
1101     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
1102                    progress=None):
1103         """Retrieve a pack from a git smart server.
1104
1105         :param determine_wants: Callback that returns list of commits to fetch
1106         :param graph_walker: Object with next() and ack().
1107         :param pack_data: Callback called for each bit of data in the pack
1108         :param progress: Callback for progress reports (strings)
1109         :return: Dictionary with the refs of the remote repository
1110         """
1111         url = self._get_url(path)
1112         refs, server_capabilities = self._discover_references(
1113             b"git-upload-pack", url)
1114         negotiated_capabilities = self._fetch_capabilities & server_capabilities
1115         wants = determine_wants(refs)
1116         if wants is not None:
1117             wants = [cid for cid in wants if cid != ZERO_SHA]
1118         if not wants:
1119             return refs
1120         if self.dumb:
1121             raise NotImplementedError(self.send_pack)
1122         req_data = BytesIO()
1123         req_proto = Protocol(None, req_data.write)
1124         self._handle_upload_pack_head(
1125             req_proto, negotiated_capabilities, graph_walker, wants,
1126             lambda: False)
1127         resp = self._smart_request(
1128             b"git-upload-pack", url, data=req_data.getvalue())
1129         try:
1130             resp_proto = Protocol(resp.read, None)
1131             self._handle_upload_pack_tail(resp_proto, negotiated_capabilities,
1132                 graph_walker, pack_data, progress)
1133             return refs
1134         finally:
1135             resp.close()
1136
1137
1138 def get_transport_and_path_from_url(url, config=None, **kwargs):
1139     """Obtain a git client from a URL.
1140
1141     :param url: URL to open
1142     :param config: Optional config object
1143     :param thin_packs: Whether or not thin packs should be retrieved
1144     :param report_activity: Optional callback for reporting transport
1145         activity.
1146     :return: Tuple with client instance and relative path.
1147     """
1148     parsed = urlparse.urlparse(url)
1149     if parsed.scheme == 'git':
1150         return (TCPGitClient(parsed.hostname, port=parsed.port, **kwargs),
1151                 parsed.path)
1152     elif parsed.scheme == 'git+ssh':
1153         path = parsed.path
1154         if path.startswith('/'):
1155             path = parsed.path[1:]
1156         return SSHGitClient(parsed.hostname, port=parsed.port,
1157                             username=parsed.username, **kwargs), path
1158     elif parsed.scheme in ('http', 'https'):
1159         return HttpGitClient(urlparse.urlunparse(parsed), config=config,
1160                 **kwargs), parsed.path
1161     elif parsed.scheme == 'file':
1162         return default_local_git_client_cls(**kwargs), parsed.path
1163
1164     raise ValueError("unknown scheme '%s'" % parsed.scheme)
1165
1166
1167 def get_transport_and_path(location, **kwargs):
1168     """Obtain a git client from a URL.
1169
1170     :param location: URL or path
1171     :param config: Optional config object
1172     :param thin_packs: Whether or not thin packs should be retrieved
1173     :param report_activity: Optional callback for reporting transport
1174         activity.
1175     :return: Tuple with client instance and relative path.
1176     """
1177     # First, try to parse it as a URL
1178     try:
1179         return get_transport_and_path_from_url(location, **kwargs)
1180     except ValueError:
1181         pass
1182
1183     if (sys.platform == 'win32' and
1184             location[0].isalpha() and location[1:3] == ':\\'):
1185         # Windows local path
1186         return default_local_git_client_cls(**kwargs), location
1187
1188     if ':' in location and not '@' in location:
1189         # SSH with no user@, zero or one leading slash.
1190         (hostname, path) = location.split(':')
1191         return SSHGitClient(hostname, **kwargs), path
1192     elif '@' in location and ':' in location:
1193         # SSH with user@host:foo.
1194         user_host, path = location.split(':')
1195         user, host = user_host.rsplit('@')
1196         return SSHGitClient(host, username=user, **kwargs), path
1197
1198     # Otherwise, assume it's a local path.
1199     return default_local_git_client_cls(**kwargs), location