- Added set_io_timeout(), which sets all the timeout-dependent
[rsync.git] / io.c
diff --git a/io.c b/io.c
index c9691ed18f6f6668a41028e8d8b74f5d3c6eeb82..90ff04f1a29ed736d0d9d8774f8643a4372e9f84 100644 (file)
--- a/io.c
+++ b/io.c
 /** If no timeout is specified then use a 60 second select timeout */
 #define SELECT_TIMEOUT 60
 
-static int io_multiplexing_out;
-static int io_multiplexing_in;
-static int multiplex_in_fd = -1;
-static int multiplex_out_fd = -1;
-static time_t last_io;
-static int no_flush;
-
 extern int bwlimit;
+extern size_t bwlimit_writemax;
 extern int verbose;
 extern int io_timeout;
+extern int allowed_lull;
 extern int am_server;
 extern int am_daemon;
 extern int am_sender;
+extern int am_generator;
+extern int eol_nulls;
+extern int read_batch;
+extern int csum_length;
+extern int checksum_seed;
+extern int protocol_version;
+extern int remove_sent_files;
+extern int preserve_hard_links;
+extern char *filesfrom_host;
 extern struct stats stats;
-
+extern struct file_list *the_file_list;
 
 const char phase_unknown[] = "unknown";
+int ignore_timeout = 0;
+int batch_fd = -1;
+int batch_gen_fd = -1;
 
 /**
  * The connection might be dropped at some point; perhaps because the
@@ -73,12 +80,22 @@ const char phase_unknown[] = "unknown";
 const char *io_write_phase = phase_unknown;
 const char *io_read_phase = phase_unknown;
 
-/** Ignore EOF errors while reading a module listing if the remote
-    version is 24 or less. */
-int kludge_around_eof = False;
+/* Ignore an EOF error if non-zero. See whine_about_eof(). */
+int kluge_around_eof = 0;
 
 int msg_fd_in = -1;
 int msg_fd_out = -1;
+int sock_f_in = -1;
+int sock_f_out = -1;
+
+static int io_multiplexing_out;
+static int io_multiplexing_in;
+static time_t last_io_in;
+static time_t last_io_out;
+static int no_flush;
+
+static int write_batch_monitor_in = -1;
+static int write_batch_monitor_out = -1;
 
 static int io_filesfrom_f_in = -1;
 static int io_filesfrom_f_out = -1;
@@ -86,16 +103,21 @@ static char io_filesfrom_buf[2048];
 static char *io_filesfrom_bp;
 static char io_filesfrom_lastchar;
 static int io_filesfrom_buflen;
+static size_t contiguous_write_len = 0;
+static int select_timeout = SELECT_TIMEOUT;
 
 static void read_loop(int fd, char *buf, size_t len);
 
-struct redo_list {
-       struct redo_list *next;
-       int num;
+struct flist_ndx_item {
+       struct flist_ndx_item *next;
+       int ndx;
 };
 
-static struct redo_list *redo_list_head;
-static struct redo_list *redo_list_tail;
+struct flist_ndx_list {
+       struct flist_ndx_item *head, *tail;
+};
+
+static struct flist_ndx_list redo_list, hlink_list;
 
 struct msg_list {
        struct msg_list *next;
@@ -106,55 +128,93 @@ struct msg_list {
 static struct msg_list *msg_list_head;
 static struct msg_list *msg_list_tail;
 
-static void redo_list_add(int num)
+static void flist_ndx_push(struct flist_ndx_list *lp, int ndx)
 {
-       struct redo_list *rl;
-
-       if (!(rl = new(struct redo_list)))
-               exit_cleanup(RERR_MALLOC);
-       rl->next = NULL;
-       rl->num = num;
-       if (redo_list_tail)
-               redo_list_tail->next = rl;
+       struct flist_ndx_item *item;
+
+       if (!(item = new(struct flist_ndx_item)))
+               out_of_memory("flist_ndx_push");
+       item->next = NULL;
+       item->ndx = ndx;
+       if (lp->tail)
+               lp->tail->next = item;
        else
-               redo_list_head = rl;
-       redo_list_tail = rl;
+               lp->head = item;
+       lp->tail = item;
+}
+
+static int flist_ndx_pop(struct flist_ndx_list *lp)
+{
+       struct flist_ndx_item *next;
+       int ndx;
+
+       if (!lp->head)
+               return -1;
+
+       ndx = lp->head->ndx;
+       next = lp->head->next;
+       free(lp->head);
+       lp->head = next;
+       if (!next)
+               lp->tail = NULL;
+
+       return ndx;
 }
 
 static void check_timeout(void)
 {
        time_t t;
 
-       if (!io_timeout)
+       if (!io_timeout || ignore_timeout)
                return;
 
-       if (!last_io) {
-               last_io = time(NULL);
+       if (!last_io_in) {
+               last_io_in = time(NULL);
                return;
        }
 
        t = time(NULL);
 
-       if (last_io && io_timeout && (t-last_io) >= io_timeout) {
+       if (t - last_io_in >= io_timeout) {
                if (!am_server && !am_daemon) {
-                       rprintf(FERROR, "io timeout after %d seconds - exiting\n",
-                               (int)(t-last_io));
+                       rprintf(FERROR, "io timeout after %d seconds -- exiting\n",
+                               (int)(t-last_io_in));
                }
                exit_cleanup(RERR_TIMEOUT);
        }
 }
 
-/** Setup the fd used to receive MSG_* messages.  Only needed when
- * we're the generator because the sender and receiver both use the
- * multiplexed I/O setup. */
+/* Note the fds used for the main socket (which might really be a pipe
+ * for a local transfer, but we can ignore that). */
+void io_set_sock_fds(int f_in, int f_out)
+{
+       sock_f_in = f_in;
+       sock_f_out = f_out;
+}
+
+void set_io_timeout(int secs)
+{
+       io_timeout = secs;
+
+       if (!io_timeout || io_timeout > SELECT_TIMEOUT)
+               select_timeout = SELECT_TIMEOUT;
+       else
+               select_timeout = io_timeout;
+
+       allowed_lull = read_batch ? 0 : (io_timeout + 1) / 2;
+}
+
+/* Setup the fd used to receive MSG_* messages.  Only needed during the
+ * early stages of being a local sender (up through the sending of the
+ * file list) or when we're the generator (to fetch the messages from
+ * the receiver). */
 void set_msg_fd_in(int fd)
 {
        msg_fd_in = fd;
 }
 
-/** Setup the fd used to send our MSG_* messages.  Only needed when
- * we're the receiver because the generator and the sender both use
- * the multiplexed I/O setup. */
+/* Setup the fd used to send our MSG_* messages.  Only needed when
+ * we're the receiver (to send our messages to the generator). */
 void set_msg_fd_out(int fd)
 {
        msg_fd_out = fd;
@@ -167,10 +227,10 @@ static void msg_list_add(int code, char *buf, int len)
        struct msg_list *ml;
 
        if (!(ml = new(struct msg_list)))
-               exit_cleanup(RERR_MALLOC);
+               out_of_memory("msg_list_add");
        ml->next = NULL;
        if (!(ml->buf = new_array(char, len+4)))
-               exit_cleanup(RERR_MALLOC);
+               out_of_memory("msg_list_add");
        SIVAL(ml->buf, 0, ((code+MPLEX_BASE)<<24) | len);
        memcpy(ml->buf+4, buf, len);
        ml->len = len+4;
@@ -183,21 +243,27 @@ static void msg_list_add(int code, char *buf, int len)
 
 void send_msg(enum msgcode code, char *buf, int len)
 {
+       if (msg_fd_out < 0) {
+               io_multiplex_write(code, buf, len);
+               return;
+       }
        msg_list_add(code, buf, len);
        msg_list_push(NORMAL_FLUSH);
 }
 
-/** Read a message from the MSG_* fd and dispatch it.  This is only
- * called by the generator. */
+/* Read a message from the MSG_* fd and handle it.  This is called either
+ * during the early stages of being a local sender (up through the sending
+ * of the file list) or when we're the generator (to fetch the messages
+ * from the receiver). */
 static void read_msg_fd(void)
 {
-       char buf[200];
+       char buf[2048];
        size_t n;
        int fd = msg_fd_in;
        int tag, len;
 
-       /* Temporarily disable msg_fd_in.  This is needed because we
-        * may call a write routine that could try to call us back. */
+       /* Temporarily disable msg_fd_in.  This is needed to avoid looping back
+        * to this routine from writefd_unbuffered(). */
        msg_fd_in = -1;
 
        read_loop(fd, buf, 4);
@@ -208,19 +274,38 @@ static void read_msg_fd(void)
 
        switch (tag) {
        case MSG_DONE:
-               if (len != 0) {
+               if (len != 0 || !am_generator) {
                        rprintf(FERROR, "invalid message %d:%d\n", tag, len);
                        exit_cleanup(RERR_STREAMIO);
                }
-               redo_list_add(-1);
+               flist_ndx_push(&redo_list, -1);
                break;
        case MSG_REDO:
-               if (len != 4) {
+               if (len != 4 || !am_generator) {
                        rprintf(FERROR, "invalid message %d:%d\n", tag, len);
                        exit_cleanup(RERR_STREAMIO);
                }
                read_loop(fd, buf, 4);
-               redo_list_add(IVAL(buf,0));
+               flist_ndx_push(&redo_list, IVAL(buf,0));
+               break;
+       case MSG_DELETED:
+               if (len >= (int)sizeof buf || !am_generator) {
+                       rprintf(FERROR, "invalid message %d:%d\n", tag, len);
+                       exit_cleanup(RERR_STREAMIO);
+               }
+               read_loop(fd, buf, len);
+               io_multiplex_write(MSG_DELETED, buf, len);
+               break;
+       case MSG_SUCCESS:
+               if (len != 4 || !am_generator) {
+                       rprintf(FERROR, "invalid message %d:%d\n", tag, len);
+                       exit_cleanup(RERR_STREAMIO);
+               }
+               read_loop(fd, buf, len);
+               if (remove_sent_files)
+                       io_multiplex_write(MSG_SUCCESS, buf, len);
+               if (preserve_hard_links)
+                       flist_ndx_push(&hlink_list, IVAL(buf,0));
                break;
        case MSG_INFO:
        case MSG_ERROR:
@@ -244,7 +329,7 @@ static void read_msg_fd(void)
 
 /* Try to push messages off the list onto the wire.  If we leave with more
  * to do, return 0.  On error, return -1.  If everything flushed, return 1.
- * This is only called by the receiver. */
+ * This is only active in the receiver. */
 int msg_list_push(int flush_it_all)
 {
        static int written = 0;
@@ -266,7 +351,7 @@ int msg_list_push(int flush_it_all)
                                return 0;
                        FD_ZERO(&fds);
                        FD_SET(msg_fd_out, &fds);
-                       tv.tv_sec = io_timeout ? io_timeout : SELECT_TIMEOUT;
+                       tv.tv_sec = select_timeout;
                        tv.tv_usec = 0;
                        if (!select(msg_fd_out+1, NULL, &fds, NULL, &tv))
                                check_timeout();
@@ -282,22 +367,22 @@ int msg_list_push(int flush_it_all)
        return 1;
 }
 
-int get_redo_num(void)
+int get_redo_num(int itemizing, enum logcode code)
 {
-       struct redo_list *next;
-       int num;
-
-       while (!redo_list_head)
+       while (1) {
+               if (hlink_list.head)
+                       check_for_finished_hlinks(itemizing, code);
+               if (redo_list.head)
+                       break;
                read_msg_fd();
+       }
 
-       num = redo_list_head->num;
-       next = redo_list_head->next;
-       free(redo_list_head);
-       redo_list_head = next;
-       if (!next)
-               redo_list_tail = NULL;
+       return flist_ndx_pop(&redo_list);
+}
 
-       return num;
+int get_hlink_num(void)
+{
+       return flist_ndx_pop(&hlink_list);
 }
 
 /**
@@ -319,36 +404,33 @@ void io_set_filesfrom_fds(int f_in, int f_out)
        io_filesfrom_buflen = 0;
 }
 
-/**
- * It's almost always an error to get an EOF when we're trying to read
- * from the network, because the protocol is self-terminating.
+/* It's almost always an error to get an EOF when we're trying to read from the
+ * network, because the protocol is (for the most part) self-terminating.
  *
- * However, there is one unfortunate cases where it is not, which is
- * rsync <2.4.6 sending a list of modules on a server, since the list
- * is terminated by closing the socket. So, for the section of the
- * program where that is a problem (start_socket_client),
- * kludge_around_eof is True and we just exit.
- */
-static void whine_about_eof(void)
+ * There is one case for the receiver when it is at the end of the transfer
+ * (hanging around reading any keep-alive packets that might come its way): if
+ * the sender dies before the generator's kill-signal comes through, we can end
+ * up here needing to loop until the kill-signal arrives.  In this situation,
+ * kluge_around_eof will be < 0.
+ *
+ * There is another case for older protocol versions (< 24) where the module
+ * listing was not terminated, so we must ignore an EOF error in that case and
+ * exit.  In this situation, kluge_around_eof will be > 0. */
+static void whine_about_eof(int fd)
 {
-       if (kludge_around_eof)
-               exit_cleanup(0);
-       else {
-               rprintf(FERROR, RSYNC_NAME ": connection unexpectedly closed "
-                       "(%.0f bytes read so far)\n",
-                       (double)stats.total_read);
-
-               exit_cleanup(RERR_STREAMIO);
+       if (kluge_around_eof && fd == sock_f_in) {
+               int i;
+               if (kluge_around_eof > 0)
+                       exit_cleanup(0);
+               /* If we're still here after 10 seconds, exit with an error. */
+               for (i = 10*1000/20; i--; )
+                       msleep(20);
        }
-}
-
 
-static void die_from_readerr(int err)
-{
-       /* this prevents us trying to write errors on a dead socket */
-       io_multiplexing_close();
+       rprintf(FERROR, RSYNC_NAME ": connection unexpectedly closed "
+               "(%.0f bytes received so far) [%s]\n",
+               (double)stats.total_read, who_am_i());
 
-       rsyserr(FERROR, err, "read error");
        exit_cleanup(RERR_STREAMIO);
 }
 
@@ -374,15 +456,16 @@ static int read_timeout(int fd, char *buf, size_t len)
                /* until we manage to read *something* */
                fd_set r_fds, w_fds;
                struct timeval tv;
-               int fd_count = fd+1;
+               int maxfd = fd;
                int count;
 
                FD_ZERO(&r_fds);
+               FD_ZERO(&w_fds);
                FD_SET(fd, &r_fds);
-               if (msg_fd_in >= 0) {
-                       FD_SET(msg_fd_in, &r_fds);
-                       if (msg_fd_in >= fd_count)
-                               fd_count = msg_fd_in+1;
+               if (msg_list_head) {
+                       FD_SET(msg_fd_out, &w_fds);
+                       if (msg_fd_out > maxfd)
+                               maxfd = msg_fd_out;
                }
                if (io_filesfrom_f_out >= 0) {
                        int new_fd;
@@ -395,37 +478,29 @@ static int read_timeout(int fd, char *buf, size_t len)
                                        new_fd = -1;
                                }
                        } else {
-                               FD_ZERO(&w_fds);
                                FD_SET(io_filesfrom_f_out, &w_fds);
                                new_fd = io_filesfrom_f_out;
                        }
-                       if (new_fd >= fd_count)
-                               fd_count = new_fd+1;
+                       if (new_fd > maxfd)
+                               maxfd = new_fd;
                }
 
-               tv.tv_sec = io_timeout?io_timeout:SELECT_TIMEOUT;
+               tv.tv_sec = select_timeout;
                tv.tv_usec = 0;
 
                errno = 0;
 
-               count = select(fd_count, &r_fds,
-                              io_filesfrom_buflen? &w_fds : NULL,
-                              NULL, &tv);
-
-               if (count == 0) {
-                       msg_list_push(NORMAL_FLUSH);
-                       check_timeout();
-               }
+               count = select(maxfd + 1, &r_fds, &w_fds, NULL, &tv);
 
                if (count <= 0) {
-                       if (errno == EBADF) {
+                       if (errno == EBADF)
                                exit_cleanup(RERR_SOCKETIO);
-                       }
+                       check_timeout();
                        continue;
                }
 
-               if (msg_fd_in >= 0 && FD_ISSET(msg_fd_in, &r_fds))
-                       read_msg_fd();
+               if (msg_list_head && FD_ISSET(msg_fd_out, &w_fds))
+                       msg_list_push(NORMAL_FLUSH);
 
                if (io_filesfrom_f_out >= 0) {
                        if (io_filesfrom_buflen) {
@@ -455,7 +530,6 @@ static int read_timeout(int fd, char *buf, size_t len)
                                                io_filesfrom_buflen = io_filesfrom_lastchar? 2 : 1;
                                                io_filesfrom_f_in = -1;
                                        } else {
-                                               extern int eol_nulls;
                                                if (!eol_nulls) {
                                                        char *s = io_filesfrom_buf + l;
                                                        /* Transform CR and/or LF into '\0' */
@@ -491,26 +565,31 @@ static int read_timeout(int fd, char *buf, size_t len)
                        }
                }
 
-               if (!FD_ISSET(fd, &r_fds)) continue;
+               if (!FD_ISSET(fd, &r_fds))
+                       continue;
 
                n = read(fd, buf, len);
 
-               if (n > 0) {
-                       buf += n;
-                       len -= n;
-                       ret += n;
-                       if (io_timeout)
-                               last_io = time(NULL);
-                       continue;
-               } else if (n == 0) {
-                       whine_about_eof();
-                       return -1; /* doesn't return */
-               } else if (n < 0) {
+               if (n <= 0) {
+                       if (n == 0)
+                               whine_about_eof(fd); /* Doesn't return. */
                        if (errno == EINTR || errno == EWOULDBLOCK
                            || errno == EAGAIN)
                                continue;
-                       die_from_readerr(errno);
+
+                       /* Don't write errors on a dead socket. */
+                       if (fd == sock_f_in)
+                               close_multiplexing_out();
+                       rsyserr(FERROR, errno, "read error");
+                       exit_cleanup(RERR_STREAMIO);
                }
+
+               buf += n;
+               len -= n;
+               ret += n;
+
+               if (fd == sock_f_in && io_timeout)
+                       last_io_in = time(NULL);
        }
 
        return ret;
@@ -524,10 +603,7 @@ int read_filesfrom_line(int fd, char *fname)
 {
        char ch, *s, *eob = fname + MAXPATHLEN - 1;
        int cnt;
-       extern int io_timeout;
-       extern int eol_nulls;
-       extern char *remote_filesfrom_file;
-       int reading_remotely = remote_filesfrom_file != NULL;
+       int reading_remotely = filesfrom_host != NULL;
        int nulls = eol_nulls || reading_remotely;
 
   start:
@@ -540,7 +616,7 @@ int read_filesfrom_line(int fd, char *fname)
                        fd_set fds;
                        FD_ZERO(&fds);
                        FD_SET(fd, &fds);
-                       tv.tv_sec = io_timeout? io_timeout : SELECT_TIMEOUT;
+                       tv.tv_sec = select_timeout;
                        tv.tv_usec = 0;
                        if (!select(fd+1, &fds, NULL, NULL, &tv))
                                check_timeout();
@@ -567,6 +643,64 @@ int read_filesfrom_line(int fd, char *fname)
 }
 
 
+static char *iobuf_out;
+static int iobuf_out_cnt;
+
+void io_start_buffering_out(void)
+{
+       if (iobuf_out)
+               return;
+       if (!(iobuf_out = new_array(char, IO_BUFFER_SIZE)))
+               out_of_memory("io_start_buffering_out");
+       iobuf_out_cnt = 0;
+}
+
+
+static char *iobuf_in;
+static size_t iobuf_in_siz;
+
+void io_start_buffering_in(void)
+{
+       if (iobuf_in)
+               return;
+       iobuf_in_siz = 2 * IO_BUFFER_SIZE;
+       if (!(iobuf_in = new_array(char, iobuf_in_siz)))
+               out_of_memory("io_start_buffering_in");
+}
+
+
+void io_end_buffering(void)
+{
+       io_flush(NORMAL_FLUSH);
+       if (!io_multiplexing_out) {
+               free(iobuf_out);
+               iobuf_out = NULL;
+       }
+}
+
+
+void maybe_flush_socket(void)
+{
+       if (iobuf_out && iobuf_out_cnt && time(NULL) - last_io_out >= 5)
+               io_flush(NORMAL_FLUSH);
+}
+
+
+void maybe_send_keepalive(void)
+{
+       if (time(NULL) - last_io_out >= allowed_lull) {
+               if (!iobuf_out || !iobuf_out_cnt) {
+                       if (protocol_version < 29)
+                               return; /* there's nothing we can do */
+                       write_int(sock_f_out, the_file_list->count);
+                       write_shortint(sock_f_out, ITEM_IS_NEW);
+               }
+               if (iobuf_out)
+                       io_flush(NORMAL_FLUSH);
+       }
+}
+
+
 /**
  * Continue trying to read len bytes - don't return until len has been
  * read.
@@ -588,33 +722,31 @@ static void read_loop(int fd, char *buf, size_t len)
  *
  * Never returns <= 0.
  */
-static int read_unbuffered(int fd, char *buf, size_t len)
+static int readfd_unbuffered(int fd, char *buf, size_t len)
 {
        static size_t remaining;
+       static size_t iobuf_in_ndx;
+       size_t msg_bytes;
        int tag, ret = 0;
-       char line[1024];
-       static char *buffer;
-       static size_t bufferIdx = 0;
-       static size_t bufferSz;
+#if MAXPATHLEN < 4096
+       char line[4096+1024];
+#else
+       char line[MAXPATHLEN+1024];
+#endif
 
-       if (fd != multiplex_in_fd)
+       if (!iobuf_in || fd != sock_f_in)
                return read_timeout(fd, buf, len);
 
        if (!io_multiplexing_in && remaining == 0) {
-               if (!buffer) {
-                       bufferSz = 2 * IO_BUFFER_SIZE;
-                       buffer   = new_array(char, bufferSz);
-                       if (!buffer) out_of_memory("read_unbuffered");
-               }
-               remaining = read_timeout(fd, buffer, bufferSz);
-               bufferIdx = 0;
+               remaining = read_timeout(fd, iobuf_in, iobuf_in_siz);
+               iobuf_in_ndx = 0;
        }
 
        while (ret == 0) {
                if (remaining) {
                        len = MIN(len, remaining);
-                       memcpy(buf, buffer + bufferIdx, len);
-                       bufferIdx += len;
+                       memcpy(buf, iobuf_in + iobuf_in_ndx, len);
+                       iobuf_in_ndx += len;
                        remaining -= len;
                        ret = len;
                        break;
@@ -623,32 +755,56 @@ static int read_unbuffered(int fd, char *buf, size_t len)
                read_loop(fd, line, 4);
                tag = IVAL(line, 0);
 
-               remaining = tag & 0xFFFFFF;
+               msg_bytes = tag & 0xFFFFFF;
                tag = (tag >> 24) - MPLEX_BASE;
 
                switch (tag) {
                case MSG_DATA:
-                       if (!buffer || remaining > bufferSz) {
-                               buffer = realloc_array(buffer, char, remaining);
-                               if (!buffer) out_of_memory("read_unbuffered");
-                               bufferSz = remaining;
+                       if (msg_bytes > iobuf_in_siz) {
+                               if (!(iobuf_in = realloc_array(iobuf_in, char,
+                                                              msg_bytes)))
+                                       out_of_memory("readfd_unbuffered");
+                               iobuf_in_siz = msg_bytes;
+                       }
+                       read_loop(fd, iobuf_in, msg_bytes);
+                       remaining = msg_bytes;
+                       iobuf_in_ndx = 0;
+                       break;
+               case MSG_DELETED:
+                       if (msg_bytes >= sizeof line)
+                               goto overflow;
+                       read_loop(fd, line, msg_bytes);
+                       line[msg_bytes] = '\0';
+                       /* A directory name was sent with the trailing null */
+                       if (msg_bytes > 0 && !line[msg_bytes-1])
+                               log_delete(line, S_IFDIR);
+                       else
+                               log_delete(line, S_IFREG);
+                       break;
+               case MSG_SUCCESS:
+                       if (msg_bytes != 4) {
+                               rprintf(FERROR, "invalid multi-message %d:%ld [%s]\n",
+                                       tag, (long)msg_bytes, who_am_i());
+                               exit_cleanup(RERR_STREAMIO);
                        }
-                       read_loop(fd, buffer, remaining);
-                       bufferIdx = 0;
+                       read_loop(fd, line, msg_bytes);
+                       successful_send(IVAL(line, 0));
                        break;
                case MSG_INFO:
                case MSG_ERROR:
-                       if (remaining >= sizeof line) {
-                               rprintf(FERROR, "multiplexing overflow %d:%ld\n\n",
-                                       tag, (long)remaining);
+                       if (msg_bytes >= sizeof line) {
+                           overflow:
+                               rprintf(FERROR,
+                                       "multiplexing overflow %d:%ld [%s]\n",
+                                       tag, (long)msg_bytes, who_am_i());
                                exit_cleanup(RERR_STREAMIO);
                        }
-                       read_loop(fd, line, remaining);
-                       rwrite((enum logcode)tag, line, remaining);
-                       remaining = 0;
+                       read_loop(fd, line, msg_bytes);
+                       rwrite((enum logcode)tag, line, msg_bytes);
                        break;
                default:
-                       rprintf(FERROR, "unexpected tag %d\n", tag);
+                       rprintf(FERROR, "unexpected tag %d [%s]\n",
+                               tag, who_am_i());
                        exit_cleanup(RERR_STREAMIO);
                }
        }
@@ -672,11 +828,25 @@ static void readfd(int fd, char *buffer, size_t N)
        size_t total = 0;
 
        while (total < N) {
-               ret = read_unbuffered(fd, buffer + total, N-total);
+               ret = readfd_unbuffered(fd, buffer + total, N-total);
                total += ret;
        }
 
-       stats.total_read += total;
+       if (fd == write_batch_monitor_in) {
+               if ((size_t)write(batch_fd, buffer, total) != total)
+                       exit_cleanup(RERR_FILEIO);
+       }
+
+       if (fd == sock_f_in)
+               stats.total_read += total;
+}
+
+
+int read_shortint(int f)
+{
+       uchar b[2];
+       readfd(f, (char *)b, 2);
+       return (b[1] << 8) + b[0];
 }
 
 
@@ -687,7 +857,8 @@ int32 read_int(int f)
 
        readfd(f,b,4);
        ret = IVAL(b,0);
-       if (ret == (int32)0xffffffff) return -1;
+       if (ret == (int32)0xffffffff)
+               return -1;
        return ret;
 }
 
@@ -697,12 +868,11 @@ int64 read_longint(int f)
        char b[8];
        ret = read_int(f);
 
-       if ((int32)ret != (int32)0xffffffff) {
+       if ((int32)ret != (int32)0xffffffff)
                return ret;
-       }
 
-#ifdef NO_INT64
-       rprintf(FERROR,"Integer overflow - attempted 64 bit offset\n");
+#if SIZEOF_INT64 < 8
+       rprintf(FERROR, "Integer overflow: attempted 64-bit offset\n");
        exit_cleanup(RERR_UNSUPPORTED);
 #else
        readfd(f,b,8);
@@ -719,17 +889,78 @@ void read_buf(int f,char *buf,size_t len)
 
 void read_sbuf(int f,char *buf,size_t len)
 {
-       read_buf(f,buf,len);
-       buf[len] = 0;
+       readfd(f, buf, len);
+       buf[len] = '\0';
 }
 
-unsigned char read_byte(int f)
+uchar read_byte(int f)
 {
-       unsigned char c;
-       read_buf(f, (char *)&c, 1);
+       uchar c;
+       readfd(f, (char *)&c, 1);
        return c;
 }
 
+int read_vstring(int f, char *buf, int bufsize)
+{
+       int len = read_byte(f);
+
+       if (len & 0x80)
+               len = (len & ~0x80) * 0x100 + read_byte(f);
+
+       if (len >= bufsize) {
+               rprintf(FERROR, "over-long vstring received (%d > %d)\n",
+                       len, bufsize - 1);
+               return -1;
+       }
+
+       if (len)
+               readfd(f, buf, len);
+       buf[len] = '\0';
+       return len;
+}
+
+/* Populate a sum_struct with values from the socket.  This is
+ * called by both the sender and the receiver. */
+void read_sum_head(int f, struct sum_struct *sum)
+{
+       sum->count = read_int(f);
+       sum->blength = read_int(f);
+       if (sum->blength < 0 || sum->blength > MAX_BLOCK_SIZE) {
+               rprintf(FERROR, "Invalid block length %ld [%s]\n",
+                       (long)sum->blength, who_am_i());
+               exit_cleanup(RERR_PROTOCOL);
+       }
+       sum->s2length = protocol_version < 27 ? csum_length : (int)read_int(f);
+       if (sum->s2length < 0 || sum->s2length > MD4_SUM_LENGTH) {
+               rprintf(FERROR, "Invalid checksum length %d [%s]\n",
+                       sum->s2length, who_am_i());
+               exit_cleanup(RERR_PROTOCOL);
+       }
+       sum->remainder = read_int(f);
+       if (sum->remainder < 0 || sum->remainder > sum->blength) {
+               rprintf(FERROR, "Invalid remainder length %ld [%s]\n",
+                       (long)sum->remainder, who_am_i());
+               exit_cleanup(RERR_PROTOCOL);
+       }
+}
+
+/* Send the values from a sum_struct over the socket.  Set sum to
+ * NULL if there are no checksums to send.  This is called by both
+ * the generator and the sender. */
+void write_sum_head(int f, struct sum_struct *sum)
+{
+       static struct sum_struct null_sum;
+
+       if (sum == NULL)
+               sum = &null_sum;
+
+       write_int(f, sum->count);
+       write_int(f, sum->blength);
+       if (protocol_version >= 27)
+               write_int(f, sum->s2length);
+       write_int(f, sum->remainder);
+}
+
 
 /**
  * Sleep after writing to limit I/O bandwidth usage.
@@ -739,81 +970,109 @@ unsigned char read_byte(int f)
  * use a bit less bandwidth than specified, because it doesn't make up
  * for slow periods.  But arguably this is a feature.  In addition, we
  * ought to take the time used to write the data into account.
+ *
+ * During some phases of big transfers (file FOO is uptodate) this is
+ * called with a small bytes_written every time.  As the kernel has to
+ * round small waits up to guarantee that we actually wait at least the
+ * requested number of microseconds, this can become grossly inaccurate.
+ * We therefore keep track of the bytes we've written over time and only
+ * sleep when the accumulated delay is at least 1 tenth of a second.
  **/
 static void sleep_for_bwlimit(int bytes_written)
 {
-       struct timeval tv;
+       static struct timeval prior_tv;
+       static long total_written = 0; 
+       struct timeval tv, start_tv;
+       long elapsed_usec, sleep_usec;
+
+#define ONE_SEC        1000000L /* # of microseconds in a second */
 
        if (!bwlimit)
                return;
 
-       assert(bytes_written > 0);
-       assert(bwlimit > 0);
+       total_written += bytes_written; 
 
-       tv.tv_usec = bytes_written * 1000 / bwlimit;
-       tv.tv_sec  = tv.tv_usec / 1000000;
-       tv.tv_usec = tv.tv_usec % 1000000;
+       gettimeofday(&start_tv, NULL);
+       if (prior_tv.tv_sec) {
+               elapsed_usec = (start_tv.tv_sec - prior_tv.tv_sec) * ONE_SEC
+                            + (start_tv.tv_usec - prior_tv.tv_usec);
+               total_written -= elapsed_usec * bwlimit / (ONE_SEC/1024);
+               if (total_written < 0)
+                       total_written = 0;
+       }
+
+       sleep_usec = total_written * (ONE_SEC/1024) / bwlimit;
+       if (sleep_usec < ONE_SEC / 10) {
+               prior_tv = start_tv;
+               return;
+       }
 
+       tv.tv_sec  = sleep_usec / ONE_SEC;
+       tv.tv_usec = sleep_usec % ONE_SEC;
        select(0, NULL, NULL, NULL, &tv);
+
+       gettimeofday(&prior_tv, NULL);
+       elapsed_usec = (prior_tv.tv_sec - start_tv.tv_sec) * ONE_SEC
+                    + (prior_tv.tv_usec - start_tv.tv_usec);
+       total_written = (sleep_usec - elapsed_usec) * bwlimit / (ONE_SEC/1024);
 }
 
 
-/**
- * Write len bytes to the file descriptor @p fd.
+/* Write len bytes to the file descriptor fd, looping as necessary to get
+ * the job done and also (in certain circumstnces) reading any data on
+ * msg_fd_in to avoid deadlock.
  *
  * This function underlies the multiplexing system.  The body of the
- * application never calls this function directly.
- **/
+ * application never calls this function directly. */
 static void writefd_unbuffered(int fd,char *buf,size_t len)
 {
-       size_t total = 0;
+       size_t n, total = 0;
        fd_set w_fds, r_fds;
-       int fd_count, count;
+       int maxfd, count, ret, using_r_fds;
        struct timeval tv;
 
-       msg_list_push(NORMAL_FLUSH);
-
        no_flush++;
 
        while (total < len) {
                FD_ZERO(&w_fds);
                FD_SET(fd,&w_fds);
-               fd_count = fd;
+               maxfd = fd;
 
-               if (msg_fd_in >= 0) {
+               if (msg_fd_in >= 0 && len-total >= contiguous_write_len) {
                        FD_ZERO(&r_fds);
                        FD_SET(msg_fd_in,&r_fds);
-                       if (msg_fd_in > fd_count)
-                               fd_count = msg_fd_in;
-               }
+                       if (msg_fd_in > maxfd)
+                               maxfd = msg_fd_in;
+                       using_r_fds = 1;
+               } else
+                       using_r_fds = 0;
 
-               tv.tv_sec = io_timeout?io_timeout:SELECT_TIMEOUT;
+               tv.tv_sec = select_timeout;
                tv.tv_usec = 0;
 
                errno = 0;
-               count = select(fd_count+1, msg_fd_in >= 0 ? &r_fds : NULL,
+               count = select(maxfd + 1, using_r_fds ? &r_fds : NULL,
                               &w_fds, NULL, &tv);
 
-               if (count == 0) {
-                       msg_list_push(NORMAL_FLUSH);
-                       check_timeout();
-               }
-
                if (count <= 0) {
-                       if (errno == EBADF) {
+                       if (count < 0 && errno == EBADF)
                                exit_cleanup(RERR_SOCKETIO);
-                       }
+                       check_timeout();
                        continue;
                }
 
-               if (msg_fd_in >= 0 && FD_ISSET(msg_fd_in, &r_fds))
+               if (using_r_fds && FD_ISSET(msg_fd_in, &r_fds))
                        read_msg_fd();
 
-               if (FD_ISSET(fd, &w_fds)) {
-                       int ret;
-                       size_t n = len-total;
-                       ret = write(fd,buf+total,n);
+               if (!FD_ISSET(fd, &w_fds))
+                       continue;
+
+               n = len - total;
+               if (bwlimit && n > bwlimit_writemax)
+                       n = bwlimit_writemax;
+               ret = write(fd, buf + total, n);
 
+               if (ret <= 0) {
                        if (ret < 0) {
                                if (errno == EINTR)
                                        continue;
@@ -823,22 +1082,29 @@ static void writefd_unbuffered(int fd,char *buf,size_t len)
                                }
                        }
 
-                       if (ret <= 0) {
-                               /* Don't try to write errors back
-                                * across the stream */
-                               io_multiplexing_close();
-                               rsyserr(FERROR, errno,
-                                       "writefd_unbuffered failed to write %ld bytes: phase \"%s\"",
-                                       (long) len, io_write_phase);
-                               exit_cleanup(RERR_STREAMIO);
+                       /* Don't try to write errors back across the stream. */
+                       if (fd == sock_f_out)
+                               close_multiplexing_out();
+                       rsyserr(FERROR, errno,
+                               "writefd_unbuffered failed to write %ld bytes: phase \"%s\" [%s]",
+                               (long)len, io_write_phase, who_am_i());
+                       /* If the other side is sending us error messages, try
+                        * to grab any messages they sent before they died. */
+                       while (fd == sock_f_out && io_multiplexing_in) {
+                               set_io_timeout(30);
+                               ignore_timeout = 0;
+                               readfd_unbuffered(sock_f_in, io_filesfrom_buf,
+                                                 sizeof io_filesfrom_buf);
                        }
+                       exit_cleanup(RERR_STREAMIO);
+               }
 
-                       sleep_for_bwlimit(ret);
-
-                       total += ret;
+               total += ret;
 
-                       if (io_timeout)
-                               last_io = time(NULL);
+               if (fd == sock_f_out) {
+                       if (io_timeout || am_generator)
+                               last_io_out = time(NULL);
+                       sleep_for_bwlimit(ret);
                }
        }
 
@@ -846,102 +1112,100 @@ static void writefd_unbuffered(int fd,char *buf,size_t len)
 }
 
 
-static char *io_buffer;
-static int io_buffer_count;
-
-void io_start_buffering_out(int fd)
-{
-       if (io_buffer) return;
-       multiplex_out_fd = fd;
-       io_buffer = new_array(char, IO_BUFFER_SIZE);
-       if (!io_buffer) out_of_memory("writefd");
-       io_buffer_count = 0;
-}
-
-void io_start_buffering_in(int fd)
-{
-       multiplex_in_fd = fd;
-}
-
 /**
  * Write an message to a multiplexed stream. If this fails then rsync
  * exits.
  **/
-static void mplex_write(int fd, enum msgcode code, char *buf, size_t len)
+static void mplex_write(enum msgcode code, char *buf, size_t len)
 {
        char buffer[4096];
        size_t n = len;
 
        SIVAL(buffer, 0, ((MPLEX_BASE + (int)code)<<24) + len);
 
-       if (n > (sizeof buffer - 4)) {
+       /* When the generator reads messages from the msg_fd_in pipe, it can
+        * cause output to occur down the socket.  Setting contiguous_write_len
+        * prevents the reading of msg_fd_in once we actually start to write
+        * this sequence of data (though we might read it before the start). */
+       if (am_generator && msg_fd_in >= 0)
+               contiguous_write_len = len + 4;
+
+       if (n > sizeof buffer - 4)
                n = sizeof buffer - 4;
-       }
 
        memcpy(&buffer[4], buf, n);
-       writefd_unbuffered(fd, buffer, n+4);
+       writefd_unbuffered(sock_f_out, buffer, n+4);
 
        len -= n;
        buf += n;
 
-       if (len) {
-               writefd_unbuffered(fd, buf, len);
-       }
+       if (len)
+               writefd_unbuffered(sock_f_out, buf, len);
+
+       if (am_generator && msg_fd_in >= 0)
+               contiguous_write_len = 0;
 }
 
 
 void io_flush(int flush_it_all)
 {
-       int fd = multiplex_out_fd;
-
        msg_list_push(flush_it_all);
 
-       if (!io_buffer_count || no_flush)
+       if (!iobuf_out_cnt || no_flush)
                return;
 
        if (io_multiplexing_out)
-               mplex_write(fd, MSG_DATA, io_buffer, io_buffer_count);
+               mplex_write(MSG_DATA, iobuf_out, iobuf_out_cnt);
        else
-               writefd_unbuffered(fd, io_buffer, io_buffer_count);
-       io_buffer_count = 0;
+               writefd_unbuffered(sock_f_out, iobuf_out, iobuf_out_cnt);
+       iobuf_out_cnt = 0;
 }
 
 
-void io_end_buffering(void)
+static void writefd(int fd,char *buf,size_t len)
 {
-       io_flush(NORMAL_FLUSH);
-       if (!io_multiplexing_out) {
-               free(io_buffer);
-               io_buffer = NULL;
+       if (fd == msg_fd_out) {
+               rprintf(FERROR, "Internal error: wrong write used in receiver.\n");
+               exit_cleanup(RERR_PROTOCOL);
        }
-}
 
-static void writefd(int fd,char *buf,size_t len)
-{
-       stats.total_written += len;
+       if (fd == sock_f_out)
+               stats.total_written += len;
 
-       msg_list_push(NORMAL_FLUSH);
+       if (fd == write_batch_monitor_out) {
+               if ((size_t)write(batch_fd, buf, len) != len)
+                       exit_cleanup(RERR_FILEIO);
+       }
 
-       if (!io_buffer || fd != multiplex_out_fd) {
+       if (!iobuf_out || fd != sock_f_out) {
                writefd_unbuffered(fd, buf, len);
                return;
        }
 
        while (len) {
-               int n = MIN((int) len, IO_BUFFER_SIZE-io_buffer_count);
+               int n = MIN((int)len, IO_BUFFER_SIZE - iobuf_out_cnt);
                if (n > 0) {
-                       memcpy(io_buffer+io_buffer_count, buf, n);
+                       memcpy(iobuf_out+iobuf_out_cnt, buf, n);
                        buf += n;
                        len -= n;
-                       io_buffer_count += n;
+                       iobuf_out_cnt += n;
                }
 
-               if (io_buffer_count == IO_BUFFER_SIZE)
+               if (iobuf_out_cnt == IO_BUFFER_SIZE)
                        io_flush(NORMAL_FLUSH);
        }
 }
 
 
+void write_shortint(int f, int x)
+{
+       uchar b[2];
+       b[0] = x;
+       b[1] = x >> 8;
+       writefd(f, (char *)b, 2);
+}
+
+
 void write_int(int f,int32 x)
 {
        char b[4];
@@ -971,8 +1235,8 @@ void write_longint(int f, int64 x)
                return;
        }
 
-#ifdef NO_INT64
-       rprintf(FERROR,"Integer overflow - attempted 64 bit offset\n");
+#if SIZEOF_INT64 < 8
+       rprintf(FERROR, "Integer overflow: attempted 64-bit offset\n");
        exit_cleanup(RERR_UNSUPPORTED);
 #else
        write_int(f, (int32)0xFFFFFFFF);
@@ -989,17 +1253,35 @@ void write_buf(int f,char *buf,size_t len)
 }
 
 /** Write a string to the connection */
-static void write_sbuf(int f,char *buf)
+void write_sbuf(int f, char *buf)
 {
-       write_buf(f, buf, strlen(buf));
+       writefd(f, buf, strlen(buf));
 }
 
-
-void write_byte(int f,unsigned char c)
+void write_byte(int f, uchar c)
 {
-       write_buf(f,(char *)&c,1);
+       writefd(f, (char *)&c, 1);
 }
 
+void write_vstring(int f, char *str, int len)
+{
+       uchar lenbuf[3], *lb = lenbuf;
+
+       if (len > 0x7F) {
+               if (len > 0x7FFF) {
+                       rprintf(FERROR,
+                               "attempting to send over-long vstring (%d > %d)\n",
+                               len, 0x7FFF);
+                       exit_cleanup(RERR_PROTOCOL);
+               }
+               *lb++ = len / 0x100 + 0x80;
+       }
+       *lb = len;
+
+       writefd(f, (char*)lenbuf, lb - lenbuf + 1);
+       if (len)
+               writefd(f, str, len);
+}
 
 
 /**
@@ -1038,43 +1320,72 @@ void io_printf(int fd, const char *format, ...)
        len = vsnprintf(buf, sizeof buf, format, ap);
        va_end(ap);
 
-       if (len < 0) exit_cleanup(RERR_STREAMIO);
+       if (len < 0)
+               exit_cleanup(RERR_STREAMIO);
 
        write_sbuf(fd, buf);
 }
 
 
 /** Setup for multiplexing a MSG_* stream with the data stream. */
-void io_start_multiplex_out(int fd)
+void io_start_multiplex_out(void)
 {
-       multiplex_out_fd = fd;
        io_flush(NORMAL_FLUSH);
-       io_start_buffering_out(fd);
+       io_start_buffering_out();
        io_multiplexing_out = 1;
 }
 
 /** Setup for multiplexing a MSG_* stream with the data stream. */
-void io_start_multiplex_in(int fd)
+void io_start_multiplex_in(void)
 {
-       multiplex_in_fd = fd;
        io_flush(NORMAL_FLUSH);
+       io_start_buffering_in();
        io_multiplexing_in = 1;
 }
 
 /** Write an message to the multiplexed data stream. */
 int io_multiplex_write(enum msgcode code, char *buf, size_t len)
 {
-       if (!io_multiplexing_out) return 0;
+       if (!io_multiplexing_out)
+               return 0;
 
        io_flush(NORMAL_FLUSH);
        stats.total_written += (len+4);
-       mplex_write(multiplex_out_fd, code, buf, len);
+       mplex_write(code, buf, len);
        return 1;
 }
 
+void close_multiplexing_in(void)
+{
+       io_multiplexing_in = 0;
+}
+
 /** Stop output multiplexing. */
-void io_multiplexing_close(void)
+void close_multiplexing_out(void)
 {
        io_multiplexing_out = 0;
 }
 
+void start_write_batch(int fd)
+{
+       write_stream_flags(batch_fd);
+
+       /* Some communication has already taken place, but we don't
+        * enable batch writing until here so that we can write a
+        * canonical record of the communication even though the
+        * actual communication so far depends on whether a daemon
+        * is involved. */
+       write_int(batch_fd, protocol_version);
+       write_int(batch_fd, checksum_seed);
+
+       if (am_sender)
+               write_batch_monitor_out = fd;
+       else
+               write_batch_monitor_in = fd;
+}
+
+void stop_write_batch(void)
+{
+       write_batch_monitor_out = -1;
+       write_batch_monitor_in = -1;
+}