NOTE: this patch is for _gzip_!
Kevin Day's version of the gzip-rsyncable patch that uses the rsync
checksum algorithm.
--- gzip-1.2.2/deflate.c 2004-09-15 10:28:14.000000000 -0700
+++ rsyncable/deflate.c 2005-02-17 14:37:14.660957200 -0700
@@ -98,6 +98,10 @@
int length));
#endif
+local void rsync_roll(deflate_state *s, unsigned start, unsigned num);
+local void rsync_roll_noop(deflate_state *s, unsigned start, unsigned num);
+local void rsync_roll2(deflate_state *s, unsigned start, unsigned num);
+
/* ===========================================================================
* Local data
*/
@@ -115,6 +119,39 @@
* See deflate.c for comments about the MIN_MATCH+1.
*/
+
+
+/*
+ Valid values for RSYNC_DEFAULT_CHECKSUM_TYPE are:
+
+ Z_RSYNCABLE_OFF
+ Z_RSYNCABLE_SIMPLESUM
+ Z_RSYNCABLE_RSSUM
+*/
+
+#ifndef RSYNC_DEFAULT_CHECKSUM_TYPE
+# define RSYNC_DEFAULT_CHECKSUM_TYPE Z_RSYNCABLE_RSSUM
+#endif
+
+#ifndef RSYNC_DEFAULT_WINDOW_SIZE
+# define RSYNC_DEFAULT_WINDOW_SIZE 30
+#endif
+
+#ifndef RSYNC_DEFAULT_RESET_BLOCK_SIZE
+# define RSYNC_DEFAULT_RESET_BLOCK_SIZE 4096
+#endif
+
+#ifndef RSYNC_RESET_MAGIC_VALUE
+# define RSYNC_RESET_MAGIC_VALUE 0
+#endif
+
+#define RSYNC_SUM_MATCH(s) ((s)->rsync_sum % (s)->rsync_reset_block_size == RSYNC_RESET_MAGIC_VALUE)
+/* Whether window sum matches magic value */
+
+/* Global rsync mode control variable */
+int zlib_rsync = 1 ;
+
+
/* Values for max_lazy_match, good_match and max_chain_length, depending on
* the desired pack level (0..9). The values given below have been tuned to
* exclude worst case performance for pathological files. Better values may be
@@ -212,6 +249,36 @@
/* To do: ignore strm->next_in if we use it as window */
}
+int ZEXPORT deflateSetRsyncParameters_(strm, checksum_type, window_size, reset_block_size)
+ z_streamp strm;
+ int checksum_type;
+ ulg window_size;
+ ulg reset_block_size;
+{
+ deflate_state *s = strm->state;
+
+ switch(checksum_type){
+ case Z_RSYNCABLE_SIMPLESUM:
+ s->rsync_rollfunction = rsync_roll;
+ break;
+ case Z_RSYNCABLE_RSSUM:
+ s->rsync_rollfunction = rsync_roll2;
+ break;
+ default:
+ s->rsync_rollfunction = rsync_roll_noop;
+ }
+
+ s->rsync_window_size = window_size != 0 ? window_size : RSYNC_DEFAULT_WINDOW_SIZE;
+ s->rsync_reset_block_size = reset_block_size != 0 ? reset_block_size : s->rsync_window_size;
+
+ s->rsync_chunk_end = 0xFFFFFFFFUL;
+ s->rsync_sum = 0;
+ s->rsync_s1 = 0;
+ s->rsync_s2 = 0;
+
+ return Z_OK;
+}
+
/* ========================================================================= */
int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
version, stream_size)
@@ -307,9 +374,13 @@
s->strategy = strategy;
s->method = (Byte)method;
+ deflateSetRsyncParameters_(strm, RSYNC_DEFAULT_CHECKSUM_TYPE, RSYNC_DEFAULT_WINDOW_SIZE, RSYNC_DEFAULT_RESET_BLOCK_SIZE);
+
return deflateReset(strm);
}
+
+
/* ========================================================================= */
int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
z_streamp strm;
@@ -841,6 +912,13 @@
#ifdef ASMV
match_init(); /* initialize the asm code */
#endif
+
+ /* rsync params */
+ s->rsync_chunk_end = 0xFFFFFFFFUL;
+ s->rsync_sum = 0;
+ s->rsync_s1 = 0;
+ s->rsync_s2 = 0;
+
}
#ifndef FASTEST
@@ -1123,6 +1201,8 @@
zmemcpy(s->window, s->window+wsize, (unsigned)wsize);
s->match_start -= wsize;
s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
+ if (s->rsync_chunk_end != 0xFFFFFFFFUL)
+ s->rsync_chunk_end -= wsize;
s->block_start -= (long) wsize;
/* Slide the hash table (could be avoided with 32 bit values
@@ -1184,15 +1264,98 @@
} while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
}
+local void rsync_roll(s, start, num)
+ deflate_state *s;
+ unsigned start;
+ unsigned num;
+{
+ unsigned i;
+
+ if (start < s->rsync_window_size) {
+ /* before window fills. */
+ for (i = start; i < s->rsync_window_size; i++) {
+ if (i == start + num) return;
+ s->rsync_sum += (ulg)s->window[i];
+ }
+ num -= (s->rsync_window_size - start);
+ start = s->rsync_window_size;
+ }
+
+ /* buffer after window full */
+ for (i = start; i < start+num; i++) {
+ /* New character in */
+ s->rsync_sum += (ulg)s->window[i];
+ /* Old character out */
+ s->rsync_sum -= (ulg)s->window[i - s->rsync_window_size];
+ if (s->rsync_chunk_end == 0xFFFFFFFFUL
+ && RSYNC_SUM_MATCH(s))
+ s->rsync_chunk_end = i;
+ }
+}
+
+local void rsync_roll_noop(s, start, num)
+ deflate_state *s;
+ unsigned start;
+ unsigned num;
+{
+}
+
+/*
+ Implements the 2 part rsync checksum, instead of a simple summation checksum.
+*/
+local void rsync_roll2(deflate_state *s, unsigned start, unsigned num)
+{
+ unsigned i;
+
+ if (start < s->rsync_window_size) {
+ /* before window fills. */
+ for (i = start; i < s->rsync_window_size; i++) {
+ if (i == start + num) return;
+ s->rsync_s1 = (s->rsync_s1 + (ulg)s->window[i]) & 0xffff;
+ s->rsync_s2 = (s->rsync_s2 + s->rsync_s1) & 0xffff;
+ }
+ num -= (s->rsync_window_size - start);
+ start = s->rsync_window_size;
+ }
+
+ /* buffer after window full */
+ for (i = start; i < start+num; i++) {
+ /* Old character out */
+
+ s->rsync_s1 = (s->rsync_s1 - (ulg)s->window[i - s->rsync_window_size]) & 0xffff;
+ s->rsync_s2 = (s->rsync_s2 - s->rsync_window_size * (ulg)s->window[i - s->rsync_window_size]) & 0xffff;
+
+ /* New character in */
+ s->rsync_s1 = (s->rsync_s1 + (ulg)s->window[i]) & 0xffff;
+ s->rsync_s2 = (s->rsync_s2 + s->rsync_s1) & 0xffff;
+
+ // add the two together for the match calculation
+ s->rsync_sum = s->rsync_s1 + s->rsync_s2;
+
+
+ if (s->rsync_chunk_end == 0xFFFFFFFFUL
+ && RSYNC_SUM_MATCH(s)){
+ s->rsync_chunk_end = i;
+ }
+ }
+}
+
+/* ===========================================================================
+ * Set rsync_chunk_end if window sum matches magic value.
+ */
+#define RSYNC_ROLL(s, start, num) \
+ do { if (zlib_rsync) (s)->rsync_rollfunction((s), (start), (num)); } while(0)
+
/* ===========================================================================
* Flush the current block, with given end-of-file flag.
* IN assertion: strstart is set to the end of the current match.
*/
-#define FLUSH_BLOCK_ONLY(s, eof) { \
+#define FLUSH_BLOCK_ONLY(s, eof, pad) { \
_tr_flush_block(s, (s->block_start >= 0L ? \
(charf *)&s->window[(unsigned)s->block_start] : \
(charf *)Z_NULL), \
(ulg)((long)s->strstart - s->block_start), \
+ (pad), \
(eof)); \
s->block_start = s->strstart; \
flush_pending(s->strm); \
@@ -1200,8 +1363,8 @@
}
/* Same but force premature exit if necessary. */
-#define FLUSH_BLOCK(s, eof) { \
- FLUSH_BLOCK_ONLY(s, eof); \
+#define FLUSH_BLOCK(s, eof, pad) { \
+ FLUSH_BLOCK_ONLY(s, eof, pad); \
if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \
}
@@ -1252,16 +1415,16 @@
/* strstart == 0 is possible when wraparound on 16-bit machine */
s->lookahead = (uInt)(s->strstart - max_start);
s->strstart = (uInt)max_start;
- FLUSH_BLOCK(s, 0);
+ FLUSH_BLOCK(s, 0, 0);
}
/* Flush if we may have to slide, otherwise block_start may become
* negative and the data will be gone:
*/
if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) {
- FLUSH_BLOCK(s, 0);
+ FLUSH_BLOCK(s, 0, 0);
}
}
- FLUSH_BLOCK(s, flush == Z_FINISH);
+ FLUSH_BLOCK(s, flush == Z_FINISH, 0);
return flush == Z_FINISH ? finish_done : block_done;
}
@@ -1330,6 +1493,7 @@
s->lookahead -= s->match_length;
+ RSYNC_ROLL(s, s->strstart, s->match_length);
/* Insert new strings in the hash table only if the match length
* is not too large. This saves time but degrades compression.
*/
@@ -1363,12 +1527,17 @@
/* No match, output a literal byte */
Tracevv((stderr,"%c", s->window[s->strstart]));
_tr_tally_lit (s, s->window[s->strstart], bflush);
+ RSYNC_ROLL(s, s->strstart, 1);
s->lookahead--;
s->strstart++;
}
- if (bflush) FLUSH_BLOCK(s, 0);
+ if (zlib_rsync && s->strstart > s->rsync_chunk_end) {
+ s->rsync_chunk_end = 0xFFFFFFFFUL;
+ bflush = 2;
+ }
+ if (bflush) FLUSH_BLOCK(s, 0, bflush-1);
}
- FLUSH_BLOCK(s, flush == Z_FINISH);
+ FLUSH_BLOCK(s, flush == Z_FINISH, bflush-1);
return flush == Z_FINISH ? finish_done : block_done;
}
@@ -1457,6 +1626,7 @@
*/
s->lookahead -= s->prev_length-1;
s->prev_length -= 2;
+ RSYNC_ROLL(s, s->strstart, s->prev_length+1);
do {
if (++s->strstart <= max_insert) {
INSERT_STRING(s, s->strstart, hash_head);
@@ -1466,7 +1636,11 @@
s->match_length = MIN_MATCH-1;
s->strstart++;
- if (bflush) FLUSH_BLOCK(s, 0);
+ if (zlib_rsync && s->strstart > s->rsync_chunk_end) {
+ s->rsync_chunk_end = 0xFFFFFFFFUL;
+ bflush = 2;
+ }
+ if (bflush) FLUSH_BLOCK(s, 0, bflush-1);
} else if (s->match_available) {
/* If there was no match at the previous position, output a
@@ -1475,9 +1649,14 @@
*/
Tracevv((stderr,"%c", s->window[s->strstart-1]));
_tr_tally_lit(s, s->window[s->strstart-1], bflush);
+ if (zlib_rsync && s->strstart > s->rsync_chunk_end) {
+ s->rsync_chunk_end = 0xFFFFFFFFUL;
+ bflush = 2;
+ }
if (bflush) {
- FLUSH_BLOCK_ONLY(s, 0);
+ FLUSH_BLOCK_ONLY(s, 0, bflush-1);
}
+ RSYNC_ROLL(s, s->strstart, 1);
s->strstart++;
s->lookahead--;
if (s->strm->avail_out == 0) return need_more;
@@ -1485,7 +1664,14 @@
/* There is no previous match to compare with, wait for
* the next step to decide.
*/
+ if (zlib_rsync && s->strstart > s->rsync_chunk_end) {
+ /* Reset huffman tree */
+ s->rsync_chunk_end = 0xFFFFFFFFUL;
+ bflush = 2;
+ FLUSH_BLOCK(s, 0, bflush-1);
+ }
s->match_available = 1;
+ RSYNC_ROLL(s, s->strstart, 1);
s->strstart++;
s->lookahead--;
}
@@ -1496,7 +1682,7 @@
_tr_tally_lit(s, s->window[s->strstart-1], bflush);
s->match_available = 0;
}
- FLUSH_BLOCK(s, flush == Z_FINISH);
+ FLUSH_BLOCK(s, flush == Z_FINISH, bflush-1);
return flush == Z_FINISH ? finish_done : block_done;
}
#endif /* FASTEST */
--- gzip-1.2.2/deflate.h 2004-02-24 07:38:44.000000000 -0700
+++ rsyncable/deflate.h 2005-02-17 13:46:12.056551200 -0700
@@ -254,6 +254,17 @@
* are always zero.
*/
+ ulg rsync_sum; /* rolling sum of rsync window */
+ ulg rsync_chunk_end; /* next rsync sequence point */
+ ulg rsync_window_size; /* the number of bytes used in computing the rolling checksum */
+ ulg rsync_reset_block_size; /* the compressed stream will be reset approximately every 'rsync_reset_block_size' bytes */
+ ulg rsync_s1; /* part 1 of the checksum for use with checksum type Z_RSYNCABLE_RSSUM*/
+ ulg rsync_s2; /* part 2 of the checksum for use with checksum type Z_RSYNCABLE_RSSUM*/
+
+ /* the function that should be called for performing the rsyncable checksum roll */
+ void (*rsync_rollfunction)(struct internal_state*s , unsigned start, unsigned num);
+
+
} FAR deflate_state;
/* Output a byte on the stream.
@@ -276,7 +287,7 @@
void _tr_init OF((deflate_state *s));
int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len,
- int eof));
+ int pad, int eof));
void _tr_align OF((deflate_state *s));
void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
int eof));
--- gzip-1.2.2/minigzip.c 2003-11-04 18:19:26.000000000 -0700
+++ rsyncable/minigzip.c 2005-02-17 13:11:35.472851600 -0700
@@ -215,7 +215,7 @@
}
gz_compress(in, out);
- unlink(file);
+ //unlink(file);
}
@@ -236,7 +236,10 @@
if (len > SUFFIX_LEN && strcmp(file+len-SUFFIX_LEN, GZ_SUFFIX) == 0) {
infile = file;
outfile = buf;
- outfile[len-3] = '\0';
+ outfile[len-3] = '.';
+ outfile[len-2] = 'u';
+ outfile[len-1] = 'z';
+ outfile[len-0] = '\0';
} else {
outfile = file;
infile = buf;
@@ -255,7 +258,7 @@
gz_uncompress(in, out);
- unlink(infile);
+ //unlink(infile);
}
--- gzip-1.2.2/trees.c 2004-02-24 07:36:38.000000000 -0700
+++ rsyncable/trees.c 2005-02-17 13:09:38.768435100 -0700
@@ -918,10 +918,11 @@
* Determine the best encoding for the current block: dynamic trees, static
* trees or store, and output the encoded block to the zip file.
*/
-void _tr_flush_block(s, buf, stored_len, eof)
+void _tr_flush_block(s, buf, stored_len, pad, eof)
deflate_state *s;
charf *buf; /* input block, or NULL if too old */
ulg stored_len; /* length of input block */
+ int pad; /* pad output to byte boundary */
int eof; /* true if this is the last block for a file */
{
ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
@@ -1009,6 +1010,12 @@
#ifdef DEBUG
s->compressed_len += 7; /* align on byte boundary */
#endif
+#ifdef DEBUG
+ } else if (pad && (s->compressed_len % 8) != 0) {
+#else
+ } else if (pad) {
+#endif
+ _tr_stored_block(s, buf, 0, eof);
}
Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
s->compressed_len-7*eof));
--- gzip-1.2.2/zlib.def 1969-12-31 17:00:00.000000000 -0700
+++ rsyncable/zlib.def 2005-02-17 14:01:48.972258000 -0700
@@ -0,0 +1,61 @@
+LIBRARY
+; zlib data compression library
+
+EXPORTS
+; basic functions
+ zlibVersion
+ deflate
+ deflateEnd
+ inflate
+ inflateEnd
+; advanced functions
+ deflateSetDictionary
+ deflateCopy
+ deflateReset
+ deflateParams
+ deflateBound
+ deflatePrime
+ inflateSetDictionary
+ inflateSync
+ inflateCopy
+ inflateReset
+ inflateBack
+ inflateBackEnd
+ zlibCompileFlags
+; utility functions
+ compress
+ compress2
+ compressBound
+ uncompress
+ gzopen
+ gzdopen
+ gzsetparams
+ gzread
+ gzwrite
+ gzprintf
+ gzputs
+ gzgets
+ gzputc
+ gzgetc
+ gzungetc
+ gzflush
+ gzseek
+ gzrewind
+ gztell
+ gzeof
+ gzclose
+ gzerror
+ gzclearerr
+; checksum functions
+ adler32
+ crc32
+; various hacks, don't look :)
+ deflateInit_
+ deflateInit2_
+ inflateInit_
+ inflateInit2_
+ inflateBackInit_
+ inflateSyncPoint
+ get_crc_table
+ zError
+ deflateSetRsyncParameters_
\ No newline at end of file
--- gzip-1.2.2/zlib.h 2004-10-03 22:57:26.000000000 -0700
+++ rsyncable/zlib.h 2005-02-17 14:02:11.753362200 -0700
@@ -179,6 +179,13 @@
#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */
+
+/* Constants used for selecting Rsyncable checksum type */
+#define Z_RSYNCABLE_OFF 0
+#define Z_RSYNCABLE_SIMPLESUM 1
+#define Z_RSYNCABLE_RSSUM 2
+
+
#define zlib_version zlibVersion()
/* for compatibility with versions < 1.0.2 */
@@ -1185,6 +1192,17 @@
ZLIB_VERSION, sizeof(z_stream))
+
+/* deflateSetRsyncParameters allows for setting rsyncable parameters on a stream.
+ These parameters MUST be set immediately after the stream is created, and before
+ any data is written to the stream.
+ */
+ZEXTERN int ZEXPORT deflateSetRsyncParameters_ OF((z_stream FAR *strm, int checksum_type, unsigned long window_size, unsigned long reset_block_size));
+
+#define deflateSetRsyncParameters(strm, checksum_type, window_size, reset_block_size) \
+ deflateSetRsyncParameters_((strm), (checksum_type), (window_size), (reset_block_size))
+
+
#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL)
struct internal_state {int dummy;}; /* hack for buggy compilers */
#endif
@@ -1193,6 +1211,10 @@
ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp z));
ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void));
+/* Global rsync mode control variable */
+extern int zlib_rsync;
+
+
#ifdef __cplusplus
}
#endif
syntax highlighted by Code2HTML, v. 0.9.1