summaryrefslogtreecommitdiff
path: root/xdelta3
diff options
context:
space:
mode:
authorjosh.macdonald <jmacd@users.noreply.github.com>2007-02-12 03:05:48 +0000
committerjosh.macdonald <jmacd@users.noreply.github.com>2007-02-12 03:05:48 +0000
commit792f7c0cf9cb987e3d2a0501e1b228836ec798e7 (patch)
tree40b2cbb40ceb3b9e07e62ac7f64c3bcf400c8f67 /xdelta3
parentff760f81e367b84b8357f55721ad8e24cf086ef4 (diff)
Fix and recomment xd3_srcwin_move_point() after changes in SVN 127 to use src->blksize as the advance size.
Diffstat (limited to 'xdelta3')
-rw-r--r--xdelta3/xdelta3-main.h2
-rw-r--r--xdelta3/xdelta3.c101
2 files changed, 58 insertions, 45 deletions
diff --git a/xdelta3/xdelta3-main.h b/xdelta3/xdelta3-main.h
index 7ccd63b..50cbb67 100644
--- a/xdelta3/xdelta3-main.h
+++ b/xdelta3/xdelta3-main.h
@@ -1998,7 +1998,7 @@ main_open_output (xd3_stream *stream, main_file *ofile)
1998 return ret; 1998 return ret;
1999 } 1999 }
2000 2000
2001 if (option_verbose > 1) { XPR(NT "open output: %s\n", ofile->filename); } 2001 if (option_verbose > 1) { XPR(NT "output file: %s\n", ofile->filename); }
2002 } 2002 }
2003 2003
2004#if EXTERNAL_COMPRESSION 2004#if EXTERNAL_COMPRESSION
diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c
index cc11f5b..b3fd2ef 100644
--- a/xdelta3/xdelta3.c
+++ b/xdelta3/xdelta3.c
@@ -4186,9 +4186,15 @@ xd3_srcwin_setup (xd3_stream *stream)
4186 return 0; 4186 return 0;
4187} 4187}
4188 4188
4189/* Called at every entrance to the string-match loop and each time 4189/* This function computes more source checksums to advance the window.
4190 * stream->input_position the value returned as *next_move_point. 4190 * Called at every entrance to the string-match loop and each time
4191 * This function computes more source checksums to advance the window. */ 4191 * stream->input_position reaches the value returned as
4192 * *next_move_point. NB: this is one of the most expensive functions
4193 * in this code and also the most critical for good compression.
4194 *
4195 * TODO: really would like a good test for this logic. how?
4196 * TODO: optimize the inner loop
4197 */
4192static int 4198static int
4193xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) 4199xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
4194{ 4200{
@@ -4201,21 +4207,23 @@ xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
4201 return 0; 4207 return 0;
4202 } 4208 }
4203 4209
4204 /* Begin by advancing at twice the input rate, up to half the maximum window size. */ 4210 /* Begin by advancing at twice the input rate, up to half the
4211 * maximum window size. */
4205 logical_input_cksum_pos = min((stream->total_in + stream->input_position) * 2, 4212 logical_input_cksum_pos = min((stream->total_in + stream->input_position) * 2,
4206 (stream->total_in + stream->input_position) + 4213 (stream->total_in + stream->input_position) +
4207 (stream->srcwin_maxsz / 2)); 4214 (stream->srcwin_maxsz / 2));
4208 4215
4209 /* If srcwin_cksum_pos is already greater, wait until the difference is met. */ 4216 /* If srcwin_cksum_pos is already greater, wait until the difference
4217 * is met. */
4210 if (stream->srcwin_cksum_pos > logical_input_cksum_pos) 4218 if (stream->srcwin_cksum_pos > logical_input_cksum_pos)
4211 { 4219 {
4212 *next_move_point = stream->input_position + 4220 *next_move_point = stream->input_position +
4213 (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos); 4221 (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos);
4214 goto finished; 4222 return 0;
4215 } 4223 }
4216 4224
4217 /* If the stream has matched beyond the srcwin_cksum_pos (good), we shouldn't 4225 /* A long match may have extended past srcwin_cksum_pos. Don't
4218 * begin reading so far back. */ 4226 * start checksumming already-matched source data. */
4219 if (stream->maxsrcaddr > stream->srcwin_cksum_pos) 4227 if (stream->maxsrcaddr > stream->srcwin_cksum_pos)
4220 { 4228 {
4221 stream->srcwin_cksum_pos = stream->maxsrcaddr; 4229 stream->srcwin_cksum_pos = stream->maxsrcaddr;
@@ -4226,19 +4234,20 @@ xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
4226 logical_input_cksum_pos = stream->srcwin_cksum_pos; 4234 logical_input_cksum_pos = stream->srcwin_cksum_pos;
4227 } 4235 }
4228 4236
4229 /* Advance at least one source block. For the command-line, this 4237 /* Advance at least one source block. With the command-line
4230 * means we read to EOF for files smaller than the source window. 4238 * defaults this means:
4231 * Note that this term (stream->src->blksize) is used again below.
4232 * 4239 *
4233 * TODO: This is possibly a post-3.0o performance regression. I 4240 * if (src->size <= srcwin_maxsz), index the entire source at once
4234 * have examples of files that perform better w/ xdelta1. Note that 4241 * using the position of the first non-match. This is good for
4235 * xdelta1 fills its cksum table in reverse so that earlier matches 4242 * small inputs, especially when the content may have moved anywhere
4236 * are preferred. Note that xdelta3 is designed to avoid 4243 * in the file (e.g., tar files).
4237 * unnecessary computation, which this might be if we have long 4244 *
4238 * matches ahead. OTOH, sometimes tar scrambles a file such that you 4245 * if (src->size > srcwin_maxsz), index at least one block (which
4239 * copy from the end to the beginning right away, and this logic at 4246 * the command-line sets to 1/32 of srcwin_maxsz) ahead of the
4240 * least helps those cases. Used to use 16K here, instead of 4247 * logical position. This is good for different reasons: when a
4241 * stream->src->blksize. 4248 * long match spanning several source blocks is encountered, this
4249 * avoids computing checksums for those blocks. If the data can
4250 * move anywhere, this is bad.
4242 */ 4251 */
4243 logical_input_cksum_pos += stream->src->blksize; 4252 logical_input_cksum_pos += stream->src->blksize;
4244 4253
@@ -4251,19 +4260,19 @@ xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
4251 stream->srcwin_cksum_pos < stream->src->size) 4260 stream->srcwin_cksum_pos < stream->src->size)
4252 { 4261 {
4253 xoff_t blkno = stream->srcwin_cksum_pos / stream->src->blksize; 4262 xoff_t blkno = stream->srcwin_cksum_pos / stream->src->blksize;
4254 usize_t blkoff = stream->srcwin_cksum_pos % stream->src->blksize; 4263 ssize_t oldpos = stream->srcwin_cksum_pos % stream->src->blksize;
4255 usize_t onblk = xd3_bytes_on_srcblk (stream->src, blkno); 4264 ssize_t blkpos = xd3_bytes_on_srcblk (stream->src, blkno);
4256 int ret; 4265 int ret;
4257 4266
4258 if (blkoff + stream->smatcher.large_look > onblk) 4267 if (oldpos + stream->smatcher.large_look > blkpos)
4259 { 4268 {
4260 /* Next block */
4261 stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; 4269 stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize;
4262 continue; 4270 continue;
4263 } 4271 }
4264 4272
4265 if ((ret = xd3_getblk (stream, blkno))) 4273 if ((ret = xd3_getblk (stream, blkno)))
4266 { 4274 {
4275 /* TOOFARBACK should never occur here, since we read forward. */
4267 if (ret == XD3_TOOFARBACK) 4276 if (ret == XD3_TOOFARBACK)
4268 { 4277 {
4269 ret = XD3_INTERNAL; 4278 ret = XD3_INTERNAL;
@@ -4271,43 +4280,47 @@ xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
4271 return ret; 4280 return ret;
4272 } 4281 }
4273 4282
4274 /* Note: I experimented with rewriting this block to use 4283 /* This inserts checksums for the entire block, in reverse,
4275 * LARGE_CKSUM_UPDATE() instead of recalculating the cksum every 4284 * starting from the end of the block. This logic does not test
4276 * N bytes. It seemed to make performance worse (except 4285 * stream->srcwin_cksum_pos because it always advances it to the
4277 * obviously for step == 1, not worth extra complexity). 4286 * start of the next block.
4278 * 4287 *
4279 * Prior to 3.0p, this loop inserted cksums forwards, now it 4288 * oldpos is the srcwin_cksum_pos within this block. blkpos is
4280 * goes in reverse like xdelta1. */ 4289 * the number of bytes available. Each iteration inspects
4281 while (onblk >= stream->smatcher.large_step && 4290 * large_look bytes then steps back large_step bytes. The
4282 onblk >= stream->smatcher.large_look) 4291 * if-stmt above ensures at least one large_look of data. */
4283 { 4292 blkpos -= stream->smatcher.large_look;
4284 onblk -= stream->smatcher.large_step;
4285 4293
4286 uint32_t cksum = xd3_lcksum (stream->src->curblk + onblk, 4294 do
4295 {
4296 uint32_t cksum = xd3_lcksum (stream->src->curblk + blkpos,
4287 stream->smatcher.large_look); 4297 stream->smatcher.large_look);
4288 usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum); 4298 usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum);
4289 4299
4290 /* TODO: can you simplify this computation? */ 4300 stream->large_table[hval] =
4291 stream->large_table[hval] = (stream->src->blksize * blkno + onblk) + HASH_CKOFFSET; 4301 (usize_t) ((stream->src->blksize * blkno) +
4302 (xoff_t)(blkpos + HASH_CKOFFSET));
4292 4303
4293 IF_DEBUG (stream->large_ckcnt += 1); 4304 IF_DEBUG (stream->large_ckcnt += 1);
4305
4306 blkpos -= stream->smatcher.large_step;
4294 } 4307 }
4308 while (blkpos >= oldpos);
4295 4309
4296 stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; 4310 stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize;
4297 } 4311 }
4298 4312
4299 if (stream->srcwin_cksum_pos > stream->src->size) 4313 if (stream->srcwin_cksum_pos >= stream->src->size)
4300 { 4314 {
4301 /* We do this so the xd3_source_cksum_offset() function, which 4315 /* This invariant is needed for xd3_source_cksum_offset() */
4302 * uses the high/low 32-bits of srcwin_cksum_pos, is guaranteed
4303 * never to return a position > src->size */
4304 stream->srcwin_cksum_pos = stream->src->size; 4316 stream->srcwin_cksum_pos = stream->src->size;
4317 *next_move_point = USIZE_T_MAX;
4318 return 0;
4305 } 4319 }
4306 4320
4307 finished:
4308 /* How long until this function should be called again. */ 4321 /* How long until this function should be called again. */
4309 /* TODO: Ack, this logic is underflowing the usize_t */ 4322 XD3_ASSERT(stream->srcwin_cksum_pos >= logical_input_cksum_pos);
4310 *next_move_point = stream->input_position + 4323 *next_move_point = stream->input_position + 1 +
4311 (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos); 4324 (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos);
4312 return 0; 4325 return 0;
4313} 4326}