Make compression-level handling generic.
[rsync.git] / token.c
1 /*
2  * Routines used by the file-transfer code.
3  *
4  * Copyright (C) 1996 Andrew Tridgell
5  * Copyright (C) 1996 Paul Mackerras
6  * Copyright (C) 2003-2020 Wayne Davison
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with this program; if not, visit the http://fsf.org website.
20  */
21
22 #include "rsync.h"
23 #include "itypes.h"
24 #include <zlib.h>
25
26 extern int do_compression;
27 extern int protocol_version;
28 extern int module_id;
29 extern int do_compression_level;
30 extern char *skip_compress;
31
32 #ifndef Z_INSERT_ONLY
33 #define Z_INSERT_ONLY Z_SYNC_FLUSH
34 #endif
35
36 static int compression_level, per_file_default_level;
37
38 struct suffix_tree {
39         struct suffix_tree *sibling;
40         struct suffix_tree *child;
41         char letter, word_end;
42 };
43
44 static char *match_list;
45 static struct suffix_tree *suftree;
46
47 void init_compression_level(void)
48 {
49         int min_level, max_level, def_level, off_level;
50
51         switch (do_compression) {
52         case CPRES_ZLIB:
53         case CPRES_ZLIBX:
54                 min_level = 1;
55                 max_level = Z_BEST_COMPRESSION;
56                 def_level = 6; /* Z_DEFAULT_COMPRESSION is -1, so set it to the real default */
57                 off_level = Z_NO_COMPRESSION;
58                 if (do_compression_level == Z_DEFAULT_COMPRESSION)
59                         do_compression_level = def_level;
60                 break;
61         default: /* paranoia to prevent missing case values */
62                 exit_cleanup(RERR_UNSUPPORTED);
63         }
64
65         if (do_compression_level == off_level) {
66                 do_compression = CPRES_NONE;
67                 return;
68         }
69
70         /* We don't bother with any errors or warnings -- just make sure that the values are valid. */
71         if (do_compression_level == CLVL_NOT_SPECIFIED)
72                 do_compression_level = def_level;
73         else if (do_compression_level < min_level)
74                 do_compression_level = min_level;
75         else if (do_compression_level > max_level)
76                 do_compression_level = max_level;
77 }
78
79 static void add_suffix(struct suffix_tree **prior, char ltr, const char *str)
80 {
81         struct suffix_tree *node, *newnode;
82
83         if (ltr == '[') {
84                 const char *after = strchr(str, ']');
85                 /* Treat "[foo" and "[]" as having a literal '['. */
86                 if (after && after++ != str+1) {
87                         while ((ltr = *str++) != ']')
88                                 add_suffix(prior, ltr, after);
89                         return;
90                 }
91         }
92
93         for (node = *prior; node; prior = &node->sibling, node = node->sibling) {
94                 if (node->letter == ltr) {
95                         if (*str)
96                                 add_suffix(&node->child, *str, str+1);
97                         else
98                                 node->word_end = 1;
99                         return;
100                 }
101                 if (node->letter > ltr)
102                         break;
103         }
104         if (!(newnode = new(struct suffix_tree)))
105                 out_of_memory("add_suffix");
106         newnode->sibling = node;
107         newnode->child = NULL;
108         newnode->letter = ltr;
109         *prior = newnode;
110         if (*str) {
111                 add_suffix(&newnode->child, *str, str+1);
112                 newnode->word_end = 0;
113         } else
114                 newnode->word_end = 1;
115 }
116
117 static void add_nocompress_suffixes(const char *str)
118 {
119         char *buf, *t;
120         const char *f = str;
121
122         if (!(buf = new_array(char, strlen(f) + 1)))
123                 out_of_memory("add_nocompress_suffixes");
124
125         while (*f) {
126                 if (*f == '/') {
127                         f++;
128                         continue;
129                 }
130
131                 t = buf;
132                 do {
133                         if (isUpper(f))
134                                 *t++ = toLower(f);
135                         else
136                                 *t++ = *f;
137                 } while (*++f != '/' && *f);
138                 *t++ = '\0';
139
140                 add_suffix(&suftree, *buf, buf+1);
141         }
142
143         free(buf);
144 }
145
146 static void init_set_compression(void)
147 {
148         const char *f;
149         char *t, *start;
150
151         if (skip_compress)
152                 add_nocompress_suffixes(skip_compress);
153
154         /* A non-daemon transfer skips the default suffix list if the
155          * user specified --skip-compress. */
156         if (skip_compress && module_id < 0)
157                 f = "";
158         else
159                 f = lp_dont_compress(module_id);
160
161         if (!(match_list = t = new_array(char, strlen(f) + 2)))
162                 out_of_memory("set_compression");
163
164         per_file_default_level = do_compression_level;
165
166         while (*f) {
167                 if (*f == ' ') {
168                         f++;
169                         continue;
170                 }
171
172                 start = t;
173                 do {
174                         if (isUpper(f))
175                                 *t++ = toLower(f);
176                         else
177                                 *t++ = *f;
178                 } while (*++f != ' ' && *f);
179                 *t++ = '\0';
180
181                 if (t - start == 1+1 && *start == '*') {
182                         /* Optimize a match-string of "*". */
183                         *match_list = '\0';
184                         suftree = NULL;
185                         per_file_default_level = 0;
186                         break;
187                 }
188
189                 /* Move *.foo items into the stuffix tree. */
190                 if (*start == '*' && start[1] == '.' && start[2]
191                  && !strpbrk(start+2, ".?*")) {
192                         add_suffix(&suftree, start[2], start+3);
193                         t = start;
194                 }
195         }
196         *t++ = '\0';
197 }
198
199 /* determine the compression level based on a wildcard filename list */
200 void set_compression(const char *fname)
201 {
202         const struct suffix_tree *node;
203         const char *s;
204         char ltr;
205
206         if (!do_compression)
207                 return;
208
209         if (!match_list)
210                 init_set_compression();
211
212         compression_level = per_file_default_level;
213
214         if (!*match_list && !suftree)
215                 return;
216
217         if ((s = strrchr(fname, '/')) != NULL)
218                 fname = s + 1;
219
220         for (s = match_list; *s; s += strlen(s) + 1) {
221                 if (iwildmatch(s, fname)) {
222                         compression_level = 0;
223                         return;
224                 }
225         }
226
227         if (!(node = suftree) || !(s = strrchr(fname, '.'))
228          || s == fname || !(ltr = *++s))
229                 return;
230
231         while (1) {
232                 if (isUpper(&ltr))
233                         ltr = toLower(&ltr);
234                 while (node->letter != ltr) {
235                         if (node->letter > ltr)
236                                 return;
237                         if (!(node = node->sibling))
238                                 return;
239                 }
240                 if ((ltr = *++s) == '\0') {
241                         if (node->word_end)
242                                 compression_level = 0;
243                         return;
244                 }
245                 if (!(node = node->child))
246                         return;
247         }
248 }
249
250 /* non-compressing recv token */
251 static int32 simple_recv_token(int f, char **data)
252 {
253         static int32 residue;
254         static char *buf;
255         int32 n;
256
257         if (!buf) {
258                 buf = new_array(char, CHUNK_SIZE);
259                 if (!buf)
260                         out_of_memory("simple_recv_token");
261         }
262
263         if (residue == 0) {
264                 int32 i = read_int(f);
265                 if (i <= 0)
266                         return i;
267                 residue = i;
268         }
269
270         *data = buf;
271         n = MIN(CHUNK_SIZE,residue);
272         residue -= n;
273         read_buf(f,buf,n);
274         return n;
275 }
276
277 /* non-compressing send token */
278 static void simple_send_token(int f, int32 token, struct map_struct *buf,
279                               OFF_T offset, int32 n)
280 {
281         if (n > 0) {
282                 int32 len = 0;
283                 while (len < n) {
284                         int32 n1 = MIN(CHUNK_SIZE, n-len);
285                         write_int(f, n1);
286                         write_buf(f, map_ptr(buf, offset+len, n1), n1);
287                         len += n1;
288                 }
289         }
290         /* a -2 token means to send data only and no token */
291         if (token != -2)
292                 write_int(f, -(token+1));
293 }
294
295 /* Flag bytes in compressed stream are encoded as follows: */
296 #define END_FLAG        0       /* that's all folks */
297 #define TOKEN_LONG      0x20    /* followed by 32-bit token number */
298 #define TOKENRUN_LONG   0x21    /* ditto with 16-bit run count */
299 #define DEFLATED_DATA   0x40    /* + 6-bit high len, then low len byte */
300 #define TOKEN_REL       0x80    /* + 6-bit relative token number */
301 #define TOKENRUN_REL    0xc0    /* ditto with 16-bit run count */
302
303 #define MAX_DATA_COUNT  16383   /* fit 14 bit count into 2 bytes with flags */
304
305 /* zlib.h says that if we want to be able to compress something in a single
306  * call, avail_out must be at least 0.1% larger than avail_in plus 12 bytes.
307  * We'll add in 0.1%+16, just to be safe (and we'll avoid floating point,
308  * to ensure that this is a compile-time value). */
309 #define AVAIL_OUT_SIZE(avail_in_size) ((avail_in_size)*1001/1000+16)
310
311 /* For coding runs of tokens */
312 static int32 last_token = -1;
313 static int32 run_start;
314 static int32 last_run_end;
315
316 /* Deflation state */
317 static z_stream tx_strm;
318
319 /* Output buffer */
320 static char *obuf;
321
322 /* We want obuf to be able to hold both MAX_DATA_COUNT+2 bytes as well as
323  * AVAIL_OUT_SIZE(CHUNK_SIZE) bytes, so make sure that it's large enough. */
324 #if MAX_DATA_COUNT+2 > AVAIL_OUT_SIZE(CHUNK_SIZE)
325 #define OBUF_SIZE       (MAX_DATA_COUNT+2)
326 #else
327 #define OBUF_SIZE       AVAIL_OUT_SIZE(CHUNK_SIZE)
328 #endif
329
330 /* Send a deflated token */
331 static void
332 send_deflated_token(int f, int32 token, struct map_struct *buf, OFF_T offset,
333                     int32 nb, int32 toklen)
334 {
335         static int init_done, flush_pending;
336         int32 n, r;
337
338         if (last_token == -1) {
339                 /* initialization */
340                 if (!init_done) {
341                         tx_strm.next_in = NULL;
342                         tx_strm.zalloc = NULL;
343                         tx_strm.zfree = NULL;
344                         if (deflateInit2(&tx_strm, compression_level,
345                                          Z_DEFLATED, -15, 8,
346                                          Z_DEFAULT_STRATEGY) != Z_OK) {
347                                 rprintf(FERROR, "compression init failed\n");
348                                 exit_cleanup(RERR_PROTOCOL);
349                         }
350                         if ((obuf = new_array(char, OBUF_SIZE)) == NULL)
351                                 out_of_memory("send_deflated_token");
352                         init_done = 1;
353                 } else
354                         deflateReset(&tx_strm);
355                 last_run_end = 0;
356                 run_start = token;
357                 flush_pending = 0;
358         } else if (last_token == -2) {
359                 run_start = token;
360         } else if (nb != 0 || token != last_token + 1
361                    || token >= run_start + 65536) {
362                 /* output previous run */
363                 r = run_start - last_run_end;
364                 n = last_token - run_start;
365                 if (r >= 0 && r <= 63) {
366                         write_byte(f, (n==0? TOKEN_REL: TOKENRUN_REL) + r);
367                 } else {
368                         write_byte(f, (n==0? TOKEN_LONG: TOKENRUN_LONG));
369                         write_int(f, run_start);
370                 }
371                 if (n != 0) {
372                         write_byte(f, n);
373                         write_byte(f, n >> 8);
374                 }
375                 last_run_end = last_token;
376                 run_start = token;
377         }
378
379         last_token = token;
380
381         if (nb != 0 || flush_pending) {
382                 /* deflate the data starting at offset */
383                 int flush = Z_NO_FLUSH;
384                 tx_strm.avail_in = 0;
385                 tx_strm.avail_out = 0;
386                 do {
387                         if (tx_strm.avail_in == 0 && nb != 0) {
388                                 /* give it some more input */
389                                 n = MIN(nb, CHUNK_SIZE);
390                                 tx_strm.next_in = (Bytef *)
391                                         map_ptr(buf, offset, n);
392                                 tx_strm.avail_in = n;
393                                 nb -= n;
394                                 offset += n;
395                         }
396                         if (tx_strm.avail_out == 0) {
397                                 tx_strm.next_out = (Bytef *)(obuf + 2);
398                                 tx_strm.avail_out = MAX_DATA_COUNT;
399                                 if (flush != Z_NO_FLUSH) {
400                                         /*
401                                          * We left the last 4 bytes in the
402                                          * buffer, in case they are the
403                                          * last 4.  Move them to the front.
404                                          */
405                                         memcpy(tx_strm.next_out,
406                                                obuf+MAX_DATA_COUNT-2, 4);
407                                         tx_strm.next_out += 4;
408                                         tx_strm.avail_out -= 4;
409                                 }
410                         }
411                         if (nb == 0 && token != -2)
412                                 flush = Z_SYNC_FLUSH;
413                         r = deflate(&tx_strm, flush);
414                         if (r != Z_OK) {
415                                 rprintf(FERROR, "deflate returned %d\n", r);
416                                 exit_cleanup(RERR_STREAMIO);
417                         }
418                         if (nb == 0 || tx_strm.avail_out == 0) {
419                                 n = MAX_DATA_COUNT - tx_strm.avail_out;
420                                 if (flush != Z_NO_FLUSH) {
421                                         /*
422                                          * We have to trim off the last 4
423                                          * bytes of output when flushing
424                                          * (they are just 0, 0, ff, ff).
425                                          */
426                                         n -= 4;
427                                 }
428                                 if (n > 0) {
429                                         obuf[0] = DEFLATED_DATA + (n >> 8);
430                                         obuf[1] = n;
431                                         write_buf(f, obuf, n+2);
432                                 }
433                         }
434                 } while (nb != 0 || tx_strm.avail_out == 0);
435                 flush_pending = token == -2;
436         }
437
438         if (token == -1) {
439                 /* end of file - clean up */
440                 write_byte(f, END_FLAG);
441         } else if (token != -2 && do_compression == CPRES_ZLIB) {
442                 /* Add the data in the current block to the compressor's
443                  * history and hash table. */
444                 do {
445                         /* Break up long sections in the same way that
446                          * see_deflate_token() does. */
447                         int32 n1 = toklen > 0xffff ? 0xffff : toklen;
448                         toklen -= n1;
449                         tx_strm.next_in = (Bytef *)map_ptr(buf, offset, n1);
450                         tx_strm.avail_in = n1;
451                         if (protocol_version >= 31) /* Newer protocols avoid a data-duplicating bug */
452                                 offset += n1;
453                         tx_strm.next_out = (Bytef *) obuf;
454                         tx_strm.avail_out = AVAIL_OUT_SIZE(CHUNK_SIZE);
455                         r = deflate(&tx_strm, Z_INSERT_ONLY);
456                         if (r != Z_OK || tx_strm.avail_in != 0) {
457                                 rprintf(FERROR, "deflate on token returned %d (%d bytes left)\n",
458                                         r, tx_strm.avail_in);
459                                 exit_cleanup(RERR_STREAMIO);
460                         }
461                 } while (toklen > 0);
462         }
463 }
464
465 /* tells us what the receiver is in the middle of doing */
466 static enum { r_init, r_idle, r_running, r_inflating, r_inflated } recv_state;
467
468 /* for inflating stuff */
469 static z_stream rx_strm;
470 static char *cbuf;
471 static char *dbuf;
472
473 /* for decoding runs of tokens */
474 static int32 rx_token;
475 static int32 rx_run;
476
477 /* Receive a deflated token and inflate it */
478 static int32 recv_deflated_token(int f, char **data)
479 {
480         static int init_done;
481         static int32 saved_flag;
482         int32 n, flag;
483         int r;
484
485         for (;;) {
486                 switch (recv_state) {
487                 case r_init:
488                         if (!init_done) {
489                                 rx_strm.next_out = NULL;
490                                 rx_strm.zalloc = NULL;
491                                 rx_strm.zfree = NULL;
492                                 if (inflateInit2(&rx_strm, -15) != Z_OK) {
493                                         rprintf(FERROR, "inflate init failed\n");
494                                         exit_cleanup(RERR_PROTOCOL);
495                                 }
496                                 if (!(cbuf = new_array(char, MAX_DATA_COUNT))
497                                     || !(dbuf = new_array(char, AVAIL_OUT_SIZE(CHUNK_SIZE))))
498                                         out_of_memory("recv_deflated_token");
499                                 init_done = 1;
500                         } else {
501                                 inflateReset(&rx_strm);
502                         }
503                         recv_state = r_idle;
504                         rx_token = 0;
505                         break;
506
507                 case r_idle:
508                 case r_inflated:
509                         if (saved_flag) {
510                                 flag = saved_flag & 0xff;
511                                 saved_flag = 0;
512                         } else
513                                 flag = read_byte(f);
514                         if ((flag & 0xC0) == DEFLATED_DATA) {
515                                 n = ((flag & 0x3f) << 8) + read_byte(f);
516                                 read_buf(f, cbuf, n);
517                                 rx_strm.next_in = (Bytef *)cbuf;
518                                 rx_strm.avail_in = n;
519                                 recv_state = r_inflating;
520                                 break;
521                         }
522                         if (recv_state == r_inflated) {
523                                 /* check previous inflated stuff ended correctly */
524                                 rx_strm.avail_in = 0;
525                                 rx_strm.next_out = (Bytef *)dbuf;
526                                 rx_strm.avail_out = AVAIL_OUT_SIZE(CHUNK_SIZE);
527                                 r = inflate(&rx_strm, Z_SYNC_FLUSH);
528                                 n = AVAIL_OUT_SIZE(CHUNK_SIZE) - rx_strm.avail_out;
529                                 /*
530                                  * Z_BUF_ERROR just means no progress was
531                                  * made, i.e. the decompressor didn't have
532                                  * any pending output for us.
533                                  */
534                                 if (r != Z_OK && r != Z_BUF_ERROR) {
535                                         rprintf(FERROR, "inflate flush returned %d (%d bytes)\n",
536                                                 r, n);
537                                         exit_cleanup(RERR_STREAMIO);
538                                 }
539                                 if (n != 0 && r != Z_BUF_ERROR) {
540                                         /* have to return some more data and
541                                            save the flag for later. */
542                                         saved_flag = flag + 0x10000;
543                                         *data = dbuf;
544                                         return n;
545                                 }
546                                 /*
547                                  * At this point the decompressor should
548                                  * be expecting to see the 0, 0, ff, ff bytes.
549                                  */
550                                 if (!inflateSyncPoint(&rx_strm)) {
551                                         rprintf(FERROR, "decompressor lost sync!\n");
552                                         exit_cleanup(RERR_STREAMIO);
553                                 }
554                                 rx_strm.avail_in = 4;
555                                 rx_strm.next_in = (Bytef *)cbuf;
556                                 cbuf[0] = cbuf[1] = 0;
557                                 cbuf[2] = cbuf[3] = 0xff;
558                                 inflate(&rx_strm, Z_SYNC_FLUSH);
559                                 recv_state = r_idle;
560                         }
561                         if (flag == END_FLAG) {
562                                 /* that's all folks */
563                                 recv_state = r_init;
564                                 return 0;
565                         }
566
567                         /* here we have a token of some kind */
568                         if (flag & TOKEN_REL) {
569                                 rx_token += flag & 0x3f;
570                                 flag >>= 6;
571                         } else
572                                 rx_token = read_int(f);
573                         if (flag & 1) {
574                                 rx_run = read_byte(f);
575                                 rx_run += read_byte(f) << 8;
576                                 recv_state = r_running;
577                         }
578                         return -1 - rx_token;
579
580                 case r_inflating:
581                         rx_strm.next_out = (Bytef *)dbuf;
582                         rx_strm.avail_out = AVAIL_OUT_SIZE(CHUNK_SIZE);
583                         r = inflate(&rx_strm, Z_NO_FLUSH);
584                         n = AVAIL_OUT_SIZE(CHUNK_SIZE) - rx_strm.avail_out;
585                         if (r != Z_OK) {
586                                 rprintf(FERROR, "inflate returned %d (%d bytes)\n", r, n);
587                                 exit_cleanup(RERR_STREAMIO);
588                         }
589                         if (rx_strm.avail_in == 0)
590                                 recv_state = r_inflated;
591                         if (n != 0) {
592                                 *data = dbuf;
593                                 return n;
594                         }
595                         break;
596
597                 case r_running:
598                         ++rx_token;
599                         if (--rx_run == 0)
600                                 recv_state = r_idle;
601                         return -1 - rx_token;
602                 }
603         }
604 }
605
606 /*
607  * put the data corresponding to a token that we've just returned
608  * from recv_deflated_token into the decompressor's history buffer.
609  */
610 static void see_deflate_token(char *buf, int32 len)
611 {
612         int r;
613         int32 blklen;
614         unsigned char hdr[5];
615
616         rx_strm.avail_in = 0;
617         blklen = 0;
618         hdr[0] = 0;
619         do {
620                 if (rx_strm.avail_in == 0 && len != 0) {
621                         if (blklen == 0) {
622                                 /* Give it a fake stored-block header. */
623                                 rx_strm.next_in = (Bytef *)hdr;
624                                 rx_strm.avail_in = 5;
625                                 blklen = len;
626                                 if (blklen > 0xffff)
627                                         blklen = 0xffff;
628                                 hdr[1] = blklen;
629                                 hdr[2] = blklen >> 8;
630                                 hdr[3] = ~hdr[1];
631                                 hdr[4] = ~hdr[2];
632                         } else {
633                                 rx_strm.next_in = (Bytef *)buf;
634                                 rx_strm.avail_in = blklen;
635                                 if (protocol_version >= 31) /* Newer protocols avoid a data-duplicating bug */
636                                         buf += blklen;
637                                 len -= blklen;
638                                 blklen = 0;
639                         }
640                 }
641                 rx_strm.next_out = (Bytef *)dbuf;
642                 rx_strm.avail_out = AVAIL_OUT_SIZE(CHUNK_SIZE);
643                 r = inflate(&rx_strm, Z_SYNC_FLUSH);
644                 if (r != Z_OK && r != Z_BUF_ERROR) {
645                         rprintf(FERROR, "inflate (token) returned %d\n", r);
646                         exit_cleanup(RERR_STREAMIO);
647                 }
648         } while (len || rx_strm.avail_out == 0);
649 }
650
651 /**
652  * Transmit a verbatim buffer of length @p n followed by a token.
653  * If token == -1 then we have reached EOF
654  * If n == 0 then don't send a buffer
655  */
656 void send_token(int f, int32 token, struct map_struct *buf, OFF_T offset,
657                 int32 n, int32 toklen)
658 {
659         if (!do_compression)
660                 simple_send_token(f, token, buf, offset, n);
661         else
662                 send_deflated_token(f, token, buf, offset, n, toklen);
663 }
664
665 /*
666  * receive a token or buffer from the other end. If the return value is >0 then
667  * it is a data buffer of that length, and *data will point at the data.
668  * if the return value is -i then it represents token i-1
669  * if the return value is 0 then the end has been reached
670  */
671 int32 recv_token(int f, char **data)
672 {
673         int tok;
674
675         if (!do_compression)
676                 tok = simple_recv_token(f,data);
677         else /* CPRES_ZLIB & CPRES_ZLIBX */
678                 tok = recv_deflated_token(f, data);
679         return tok;
680 }
681
682 /*
683  * look at the data corresponding to a token, if necessary
684  */
685 void see_token(char *data, int32 toklen)
686 {
687         if (do_compression == CPRES_ZLIB)
688                 see_deflate_token(data, toklen);
689 }