A relative source path starts in the --source-cd=DIR directory.
[rsync-patches.git] / gzip-rsyncable-checksum.diff
1 NOTE: this patch is for _gzip_!
2
3 Kevin Day's version of the gzip-rsyncable patch that uses the rsync
4 checksum algorithm.
5
6 --- original-1.2.2/deflate.c    2004-09-15 10:28:14.000000000 -0700
7 +++ rsyncable-1.2.2/deflate.c   2005-02-17 14:37:14.660957200 -0700
8 @@ -98,6 +98,10 @@
9                              int length));
10  #endif
11  
12 +local void rsync_roll(deflate_state *s, unsigned start, unsigned num);
13 +local void rsync_roll_noop(deflate_state *s, unsigned start, unsigned num);
14 +local void rsync_roll2(deflate_state *s, unsigned start, unsigned num);
15 +
16  /* ===========================================================================
17   * Local data
18   */
19 @@ -115,6 +119,39 @@
20   * See deflate.c for comments about the MIN_MATCH+1.
21   */
22  
23 +
24 +
25 +/* 
26 +       Valid values for RSYNC_DEFAULT_CHECKSUM_TYPE are:
27 +
28 +       Z_RSYNCABLE_OFF
29 +       Z_RSYNCABLE_SIMPLESUM
30 +       Z_RSYNCABLE_RSSUM
31 +*/
32 +
33 +#ifndef RSYNC_DEFAULT_CHECKSUM_TYPE
34 +#      define RSYNC_DEFAULT_CHECKSUM_TYPE Z_RSYNCABLE_RSSUM
35 +#endif
36 +
37 +#ifndef RSYNC_DEFAULT_WINDOW_SIZE
38 +#      define RSYNC_DEFAULT_WINDOW_SIZE 30
39 +#endif
40 +
41 +#ifndef RSYNC_DEFAULT_RESET_BLOCK_SIZE
42 +#      define RSYNC_DEFAULT_RESET_BLOCK_SIZE 4096 
43 +#endif
44 +
45 +#ifndef RSYNC_RESET_MAGIC_VALUE
46 +#      define RSYNC_RESET_MAGIC_VALUE 0
47 +#endif
48 +
49 +#define RSYNC_SUM_MATCH(s) ((s)->rsync_sum % (s)->rsync_reset_block_size == RSYNC_RESET_MAGIC_VALUE)
50 +/* Whether window sum matches magic value */
51 +
52 +/* Global rsync mode control variable */
53 +int zlib_rsync = 1 ;
54 +
55 +
56  /* Values for max_lazy_match, good_match and max_chain_length, depending on
57   * the desired pack level (0..9). The values given below have been tuned to
58   * exclude worst case performance for pathological files. Better values may be
59 @@ -212,6 +249,36 @@
60      /* To do: ignore strm->next_in if we use it as window */
61  }
62  
63 +int ZEXPORT deflateSetRsyncParameters_(strm, checksum_type, window_size, reset_block_size)
64 +    z_streamp strm;
65 +    int  checksum_type;
66 +       ulg window_size;
67 +       ulg reset_block_size;
68 +{
69 +       deflate_state *s = strm->state;
70 +
71 +       switch(checksum_type){
72 +               case Z_RSYNCABLE_SIMPLESUM: 
73 +                       s->rsync_rollfunction = rsync_roll; 
74 +                       break;
75 +               case Z_RSYNCABLE_RSSUM: 
76 +                       s->rsync_rollfunction = rsync_roll2; 
77 +                       break;
78 +               default: 
79 +                       s->rsync_rollfunction = rsync_roll_noop;
80 +       }
81 +
82 +       s->rsync_window_size = window_size != 0 ? window_size : RSYNC_DEFAULT_WINDOW_SIZE;
83 +       s->rsync_reset_block_size = reset_block_size != 0 ? reset_block_size : s->rsync_window_size;
84 +
85 +    s->rsync_chunk_end = 0xFFFFFFFFUL;
86 +    s->rsync_sum = 0;
87 +       s->rsync_s1 = 0;
88 +       s->rsync_s2 = 0;
89 +
90 +       return Z_OK;
91 +}
92 +
93  /* ========================================================================= */
94  int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
95                    version, stream_size)
96 @@ -307,9 +374,13 @@
97      s->strategy = strategy;
98      s->method = (Byte)method;
99  
100 +       deflateSetRsyncParameters_(strm, RSYNC_DEFAULT_CHECKSUM_TYPE, RSYNC_DEFAULT_WINDOW_SIZE, RSYNC_DEFAULT_RESET_BLOCK_SIZE);
101 +
102      return deflateReset(strm);
103  }
104  
105 +
106 +
107  /* ========================================================================= */
108  int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
109      z_streamp strm;
110 @@ -841,6 +912,13 @@
111  #ifdef ASMV
112      match_init(); /* initialize the asm code */
113  #endif
114 +
115 +    /* rsync params */
116 +    s->rsync_chunk_end = 0xFFFFFFFFUL;
117 +    s->rsync_sum = 0;
118 +       s->rsync_s1 = 0;
119 +       s->rsync_s2 = 0;
120 +
121  }
122  
123  #ifndef FASTEST
124 @@ -1123,6 +1201,8 @@
125              zmemcpy(s->window, s->window+wsize, (unsigned)wsize);
126              s->match_start -= wsize;
127              s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
128 +            if (s->rsync_chunk_end != 0xFFFFFFFFUL)
129 +                s->rsync_chunk_end -= wsize;
130              s->block_start -= (long) wsize;
131  
132              /* Slide the hash table (could be avoided with 32 bit values
133 @@ -1184,15 +1264,98 @@
134      } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
135  }
136  
137 +local void rsync_roll(s, start, num)
138 +    deflate_state *s;
139 +    unsigned start;
140 +    unsigned num;
141 +{
142 +    unsigned i;
143 +
144 +    if (start < s->rsync_window_size) {
145 +       /* before window fills. */
146 +       for (i = start; i < s->rsync_window_size; i++) {
147 +           if (i == start + num) return;
148 +           s->rsync_sum += (ulg)s->window[i];
149 +       }
150 +       num -= (s->rsync_window_size - start);
151 +       start = s->rsync_window_size;
152 +    }
153 +
154 +    /* buffer after window full */
155 +    for (i = start; i < start+num; i++) {
156 +       /* New character in */
157 +       s->rsync_sum += (ulg)s->window[i];
158 +       /* Old character out */
159 +       s->rsync_sum -= (ulg)s->window[i - s->rsync_window_size];
160 +       if (s->rsync_chunk_end == 0xFFFFFFFFUL
161 +            && RSYNC_SUM_MATCH(s))
162 +           s->rsync_chunk_end = i;
163 +    }
164 +}
165 +
166 +local void rsync_roll_noop(s, start, num)
167 +    deflate_state *s;
168 +    unsigned start;
169 +    unsigned num;
170 +{
171 +}
172 +
173 +/*
174 + Implements the 2 part rsync checksum, instead of a simple summation checksum.
175 +*/
176 +local void rsync_roll2(deflate_state *s, unsigned start, unsigned num)
177 +{
178 +    unsigned i;
179 +
180 +    if (start < s->rsync_window_size) {
181 +               /* before window fills. */
182 +               for (i = start; i < s->rsync_window_size; i++) {
183 +                       if (i == start + num) return;
184 +                       s->rsync_s1 = (s->rsync_s1 + (ulg)s->window[i]) & 0xffff;
185 +                       s->rsync_s2 = (s->rsync_s2 + s->rsync_s1) & 0xffff;
186 +               }
187 +               num -= (s->rsync_window_size - start);
188 +               start = s->rsync_window_size;
189 +    }
190 +
191 +    /* buffer after window full */
192 +    for (i = start; i < start+num; i++) {
193 +               /* Old character out */
194 +
195 +               s->rsync_s1 = (s->rsync_s1 - (ulg)s->window[i - s->rsync_window_size]) & 0xffff;
196 +               s->rsync_s2 = (s->rsync_s2 - s->rsync_window_size * (ulg)s->window[i - s->rsync_window_size]) & 0xffff;
197 +
198 +               /* New character in */
199 +               s->rsync_s1 = (s->rsync_s1 + (ulg)s->window[i]) & 0xffff;
200 +               s->rsync_s2 = (s->rsync_s2 + s->rsync_s1) & 0xffff;
201 +
202 +               // add the two together for the match calculation
203 +               s->rsync_sum = s->rsync_s1 + s->rsync_s2;
204 +
205 +
206 +               if (s->rsync_chunk_end == 0xFFFFFFFFUL
207 +                       && RSYNC_SUM_MATCH(s)){
208 +                       s->rsync_chunk_end = i;
209 +               }
210 +    }
211 +}
212 +
213 +/* ===========================================================================
214 + * Set rsync_chunk_end if window sum matches magic value.
215 + */
216 +#define RSYNC_ROLL(s, start, num) \
217 +   do { if (zlib_rsync) (s)->rsync_rollfunction((s), (start), (num)); } while(0)
218 +
219  /* ===========================================================================
220   * Flush the current block, with given end-of-file flag.
221   * IN assertion: strstart is set to the end of the current match.
222   */
223 -#define FLUSH_BLOCK_ONLY(s, eof) { \
224 +#define FLUSH_BLOCK_ONLY(s, eof, pad) { \
225     _tr_flush_block(s, (s->block_start >= 0L ? \
226                     (charf *)&s->window[(unsigned)s->block_start] : \
227                     (charf *)Z_NULL), \
228                  (ulg)((long)s->strstart - s->block_start), \
229 +                (pad), \
230                  (eof)); \
231     s->block_start = s->strstart; \
232     flush_pending(s->strm); \
233 @@ -1200,8 +1363,8 @@
234  }
235  
236  /* Same but force premature exit if necessary. */
237 -#define FLUSH_BLOCK(s, eof) { \
238 -   FLUSH_BLOCK_ONLY(s, eof); \
239 +#define FLUSH_BLOCK(s, eof, pad) { \
240 +   FLUSH_BLOCK_ONLY(s, eof, pad); \
241     if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \
242  }
243  
244 @@ -1252,16 +1415,16 @@
245              /* strstart == 0 is possible when wraparound on 16-bit machine */
246              s->lookahead = (uInt)(s->strstart - max_start);
247              s->strstart = (uInt)max_start;
248 -            FLUSH_BLOCK(s, 0);
249 +            FLUSH_BLOCK(s, 0, 0);
250          }
251          /* Flush if we may have to slide, otherwise block_start may become
252           * negative and the data will be gone:
253           */
254          if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) {
255 -            FLUSH_BLOCK(s, 0);
256 +            FLUSH_BLOCK(s, 0, 0);
257          }
258      }
259 -    FLUSH_BLOCK(s, flush == Z_FINISH);
260 +    FLUSH_BLOCK(s, flush == Z_FINISH, 0);
261      return flush == Z_FINISH ? finish_done : block_done;
262  }
263  
264 @@ -1330,6 +1493,7 @@
265  
266              s->lookahead -= s->match_length;
267  
268 +            RSYNC_ROLL(s, s->strstart, s->match_length);
269              /* Insert new strings in the hash table only if the match length
270               * is not too large. This saves time but degrades compression.
271               */
272 @@ -1363,12 +1527,17 @@
273              /* No match, output a literal byte */
274              Tracevv((stderr,"%c", s->window[s->strstart]));
275              _tr_tally_lit (s, s->window[s->strstart], bflush);
276 +            RSYNC_ROLL(s, s->strstart, 1);
277              s->lookahead--;
278              s->strstart++;
279          }
280 -        if (bflush) FLUSH_BLOCK(s, 0);
281 +       if (zlib_rsync && s->strstart > s->rsync_chunk_end) {
282 +           s->rsync_chunk_end = 0xFFFFFFFFUL;
283 +           bflush = 2;
284 +       }
285 +        if (bflush) FLUSH_BLOCK(s, 0, bflush-1);
286      }
287 -    FLUSH_BLOCK(s, flush == Z_FINISH);
288 +    FLUSH_BLOCK(s, flush == Z_FINISH, bflush-1);
289      return flush == Z_FINISH ? finish_done : block_done;
290  }
291  
292 @@ -1457,6 +1626,7 @@
293               */
294              s->lookahead -= s->prev_length-1;
295              s->prev_length -= 2;
296 +            RSYNC_ROLL(s, s->strstart, s->prev_length+1);
297              do {
298                  if (++s->strstart <= max_insert) {
299                      INSERT_STRING(s, s->strstart, hash_head);
300 @@ -1466,7 +1636,11 @@
301              s->match_length = MIN_MATCH-1;
302              s->strstart++;
303  
304 -            if (bflush) FLUSH_BLOCK(s, 0);
305 +            if (zlib_rsync && s->strstart > s->rsync_chunk_end) {
306 +                s->rsync_chunk_end = 0xFFFFFFFFUL;
307 +                bflush = 2;
308 +            } 
309 +            if (bflush) FLUSH_BLOCK(s, 0, bflush-1);
310  
311          } else if (s->match_available) {
312              /* If there was no match at the previous position, output a
313 @@ -1475,9 +1649,14 @@
314               */
315              Tracevv((stderr,"%c", s->window[s->strstart-1]));
316              _tr_tally_lit(s, s->window[s->strstart-1], bflush);
317 +            if (zlib_rsync && s->strstart > s->rsync_chunk_end) {
318 +                s->rsync_chunk_end = 0xFFFFFFFFUL;
319 +               bflush = 2;
320 +            } 
321              if (bflush) {
322 -                FLUSH_BLOCK_ONLY(s, 0);
323 +                FLUSH_BLOCK_ONLY(s, 0, bflush-1);
324              }
325 +            RSYNC_ROLL(s, s->strstart, 1);
326              s->strstart++;
327              s->lookahead--;
328              if (s->strm->avail_out == 0) return need_more;
329 @@ -1485,7 +1664,14 @@
330              /* There is no previous match to compare with, wait for
331               * the next step to decide.
332               */
333 +            if (zlib_rsync && s->strstart > s->rsync_chunk_end) {
334 +                /* Reset huffman tree */
335 +                s->rsync_chunk_end = 0xFFFFFFFFUL;
336 +                bflush = 2;
337 +                FLUSH_BLOCK(s, 0, bflush-1);
338 +            } 
339              s->match_available = 1;
340 +            RSYNC_ROLL(s, s->strstart, 1);
341              s->strstart++;
342              s->lookahead--;
343          }
344 @@ -1496,7 +1682,7 @@
345          _tr_tally_lit(s, s->window[s->strstart-1], bflush);
346          s->match_available = 0;
347      }
348 -    FLUSH_BLOCK(s, flush == Z_FINISH);
349 +    FLUSH_BLOCK(s, flush == Z_FINISH, bflush-1);
350      return flush == Z_FINISH ? finish_done : block_done;
351  }
352  #endif /* FASTEST */
353 diff -Naur original-1.2.2/deflate.h rsyncable-1.2.2/deflate.h
354 --- original-1.2.2/deflate.h    2004-02-24 07:38:44.000000000 -0700
355 +++ rsyncable-1.2.2/deflate.h   2005-02-17 13:46:12.056551200 -0700
356 @@ -254,6 +254,17 @@
357       * are always zero.
358       */
359  
360 +    ulg rsync_sum;      /* rolling sum of rsync window */
361 +    ulg rsync_chunk_end; /* next rsync sequence point */
362 +       ulg rsync_window_size; /* the number of bytes used in computing the rolling checksum */
363 +       ulg rsync_reset_block_size; /* the compressed stream will be reset approximately every 'rsync_reset_block_size' bytes */
364 +       ulg rsync_s1; /* part 1 of the checksum for use with checksum type Z_RSYNCABLE_RSSUM*/
365 +       ulg rsync_s2; /* part 2 of the checksum for use with checksum type Z_RSYNCABLE_RSSUM*/
366 +
367 +       /* the function that should be called for performing the rsyncable checksum roll */
368 +       void (*rsync_rollfunction)(struct internal_state*s , unsigned start, unsigned num);
369 +
370 +
371  } FAR deflate_state;
372  
373  /* Output a byte on the stream.
374 @@ -276,7 +287,7 @@
375  void _tr_init         OF((deflate_state *s));
376  int  _tr_tally        OF((deflate_state *s, unsigned dist, unsigned lc));
377  void _tr_flush_block  OF((deflate_state *s, charf *buf, ulg stored_len,
378 -                          int eof));
379 +                          int pad, int eof));
380  void _tr_align        OF((deflate_state *s));
381  void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
382                            int eof));
383 diff -Naur original-1.2.2/minigzip.c rsyncable-1.2.2/minigzip.c
384 --- original-1.2.2/minigzip.c   2003-11-04 18:19:26.000000000 -0700
385 +++ rsyncable-1.2.2/minigzip.c  2005-02-17 13:11:35.472851600 -0700
386 @@ -215,7 +215,7 @@
387      }
388      gz_compress(in, out);
389  
390 -    unlink(file);
391 +    //unlink(file);
392  }
393  
394  
395 @@ -236,7 +236,10 @@
396      if (len > SUFFIX_LEN && strcmp(file+len-SUFFIX_LEN, GZ_SUFFIX) == 0) {
397          infile = file;
398          outfile = buf;
399 -        outfile[len-3] = '\0';
400 +        outfile[len-3] = '.';
401 +        outfile[len-2] = 'u';
402 +        outfile[len-1] = 'z';
403 +        outfile[len-0] = '\0';
404      } else {
405          outfile = file;
406          infile = buf;
407 @@ -255,7 +258,7 @@
408  
409      gz_uncompress(in, out);
410  
411 -    unlink(infile);
412 +    //unlink(infile);
413  }
414  
415  
416 diff -Naur original-1.2.2/trees.c rsyncable-1.2.2/trees.c
417 --- original-1.2.2/trees.c      2004-02-24 07:36:38.000000000 -0700
418 +++ rsyncable-1.2.2/trees.c     2005-02-17 13:09:38.768435100 -0700
419 @@ -918,10 +918,11 @@
420   * Determine the best encoding for the current block: dynamic trees, static
421   * trees or store, and output the encoded block to the zip file.
422   */
423 -void _tr_flush_block(s, buf, stored_len, eof)
424 +void _tr_flush_block(s, buf, stored_len, pad, eof)
425      deflate_state *s;
426      charf *buf;       /* input block, or NULL if too old */
427      ulg stored_len;   /* length of input block */
428 +    int pad;          /* pad output to byte boundary */
429      int eof;          /* true if this is the last block for a file */
430  {
431      ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
432 @@ -1009,6 +1010,12 @@
433  #ifdef DEBUG
434          s->compressed_len += 7;  /* align on byte boundary */
435  #endif
436 +#ifdef DEBUG
437 +    } else if (pad && (s->compressed_len % 8) != 0) {
438 +#else
439 +    } else if (pad) {
440 +#endif
441 +        _tr_stored_block(s, buf, 0, eof);
442      }
443      Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
444             s->compressed_len-7*eof));
445 diff -Naur original-1.2.2/zlib.def rsyncable-1.2.2/zlib.def
446 --- original-1.2.2/zlib.def     1969-12-31 17:00:00.000000000 -0700
447 +++ rsyncable-1.2.2/zlib.def    2005-02-17 14:01:48.972258000 -0700
448 @@ -0,0 +1,61 @@
449 +LIBRARY
450 +; zlib data compression library
451 +
452 +EXPORTS
453 +; basic functions
454 +    zlibVersion
455 +    deflate
456 +    deflateEnd
457 +    inflate
458 +    inflateEnd
459 +; advanced functions
460 +    deflateSetDictionary
461 +    deflateCopy
462 +    deflateReset
463 +    deflateParams
464 +    deflateBound
465 +    deflatePrime
466 +    inflateSetDictionary
467 +    inflateSync
468 +    inflateCopy
469 +    inflateReset
470 +    inflateBack
471 +    inflateBackEnd
472 +    zlibCompileFlags
473 +; utility functions
474 +    compress
475 +    compress2
476 +    compressBound
477 +    uncompress
478 +    gzopen
479 +    gzdopen
480 +    gzsetparams
481 +    gzread
482 +    gzwrite
483 +    gzprintf
484 +    gzputs
485 +    gzgets
486 +    gzputc
487 +    gzgetc
488 +    gzungetc
489 +    gzflush
490 +    gzseek
491 +    gzrewind
492 +    gztell
493 +    gzeof
494 +    gzclose
495 +    gzerror
496 +    gzclearerr
497 +; checksum functions
498 +    adler32
499 +    crc32
500 +; various hacks, don't look :)
501 +    deflateInit_
502 +    deflateInit2_
503 +    inflateInit_
504 +    inflateInit2_
505 +    inflateBackInit_
506 +    inflateSyncPoint
507 +    get_crc_table
508 +    zError
509 +       deflateSetRsyncParameters_
510 \ No newline at end of file
511 diff -Naur original-1.2.2/zlib.h rsyncable-1.2.2/zlib.h
512 --- original-1.2.2/zlib.h       2004-10-03 22:57:26.000000000 -0700
513 +++ rsyncable-1.2.2/zlib.h      2005-02-17 14:02:11.753362200 -0700
514 @@ -179,6 +179,13 @@
515  
516  #define Z_NULL  0  /* for initializing zalloc, zfree, opaque */
517  
518 +
519 +/* Constants used for selecting Rsyncable checksum type */
520 +#define Z_RSYNCABLE_OFF                        0
521 +#define Z_RSYNCABLE_SIMPLESUM  1
522 +#define Z_RSYNCABLE_RSSUM              2
523 +
524 +
525  #define zlib_version zlibVersion()
526  /* for compatibility with versions < 1.0.2 */
527  
528 @@ -1185,6 +1192,17 @@
529          ZLIB_VERSION, sizeof(z_stream))
530  
531  
532 +
533 +/* deflateSetRsyncParameters allows for setting rsyncable parameters on a stream.
534 +       These parameters MUST be set immediately after the stream is created, and before
535 +       any data is written to the stream.
536 + */
537 +ZEXTERN int ZEXPORT deflateSetRsyncParameters_ OF((z_stream FAR *strm, int checksum_type, unsigned long window_size, unsigned long reset_block_size));
538 +
539 +#define deflateSetRsyncParameters(strm, checksum_type, window_size, reset_block_size) \
540 +        deflateSetRsyncParameters_((strm), (checksum_type), (window_size), (reset_block_size))
541 +
542 +
543  #if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL)
544      struct internal_state {int dummy;}; /* hack for buggy compilers */
545  #endif
546 @@ -1193,6 +1211,10 @@
547  ZEXTERN int            ZEXPORT inflateSyncPoint OF((z_streamp z));
548  ZEXTERN const uLongf * ZEXPORT get_crc_table    OF((void));
549  
550 +/* Global rsync mode control variable */
551 +extern int zlib_rsync;
552 +
553 +
554  #ifdef __cplusplus
555  }
556  #endif