diff options
author | josh.macdonald <jmacd@users.noreply.github.com> | 2007-02-12 03:05:48 +0000 |
---|---|---|
committer | josh.macdonald <jmacd@users.noreply.github.com> | 2007-02-12 03:05:48 +0000 |
commit | 792f7c0cf9cb987e3d2a0501e1b228836ec798e7 (patch) | |
tree | 40b2cbb40ceb3b9e07e62ac7f64c3bcf400c8f67 /xdelta3 | |
parent | ff760f81e367b84b8357f55721ad8e24cf086ef4 (diff) |
Fix and recomment xd3_srcwin_move_point() after changes in SVN 127 to use src->blksize as the advance size.
Diffstat (limited to 'xdelta3')
-rw-r--r-- | xdelta3/xdelta3-main.h | 2 | ||||
-rw-r--r-- | xdelta3/xdelta3.c | 101 |
2 files changed, 58 insertions, 45 deletions
diff --git a/xdelta3/xdelta3-main.h b/xdelta3/xdelta3-main.h index 7ccd63b..50cbb67 100644 --- a/xdelta3/xdelta3-main.h +++ b/xdelta3/xdelta3-main.h | |||
@@ -1998,7 +1998,7 @@ main_open_output (xd3_stream *stream, main_file *ofile) | |||
1998 | return ret; | 1998 | return ret; |
1999 | } | 1999 | } |
2000 | 2000 | ||
2001 | if (option_verbose > 1) { XPR(NT "open output: %s\n", ofile->filename); } | 2001 | if (option_verbose > 1) { XPR(NT "output file: %s\n", ofile->filename); } |
2002 | } | 2002 | } |
2003 | 2003 | ||
2004 | #if EXTERNAL_COMPRESSION | 2004 | #if EXTERNAL_COMPRESSION |
diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c index cc11f5b..b3fd2ef 100644 --- a/xdelta3/xdelta3.c +++ b/xdelta3/xdelta3.c | |||
@@ -4186,9 +4186,15 @@ xd3_srcwin_setup (xd3_stream *stream) | |||
4186 | return 0; | 4186 | return 0; |
4187 | } | 4187 | } |
4188 | 4188 | ||
4189 | /* Called at every entrance to the string-match loop and each time | 4189 | /* This function computes more source checksums to advance the window. |
4190 | * stream->input_position the value returned as *next_move_point. | 4190 | * Called at every entrance to the string-match loop and each time |
4191 | * This function computes more source checksums to advance the window. */ | 4191 | * stream->input_position reaches the value returned as |
4192 | * *next_move_point. NB: this is one of the most expensive functions | ||
4193 | * in this code and also the most critical for good compression. | ||
4194 | * | ||
4195 | * TODO: really would like a good test for this logic. how? | ||
4196 | * TODO: optimize the inner loop | ||
4197 | */ | ||
4192 | static int | 4198 | static int |
4193 | xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) | 4199 | xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) |
4194 | { | 4200 | { |
@@ -4201,21 +4207,23 @@ xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) | |||
4201 | return 0; | 4207 | return 0; |
4202 | } | 4208 | } |
4203 | 4209 | ||
4204 | /* Begin by advancing at twice the input rate, up to half the maximum window size. */ | 4210 | /* Begin by advancing at twice the input rate, up to half the |
4211 | * maximum window size. */ | ||
4205 | logical_input_cksum_pos = min((stream->total_in + stream->input_position) * 2, | 4212 | logical_input_cksum_pos = min((stream->total_in + stream->input_position) * 2, |
4206 | (stream->total_in + stream->input_position) + | 4213 | (stream->total_in + stream->input_position) + |
4207 | (stream->srcwin_maxsz / 2)); | 4214 | (stream->srcwin_maxsz / 2)); |
4208 | 4215 | ||
4209 | /* If srcwin_cksum_pos is already greater, wait until the difference is met. */ | 4216 | /* If srcwin_cksum_pos is already greater, wait until the difference |
4217 | * is met. */ | ||
4210 | if (stream->srcwin_cksum_pos > logical_input_cksum_pos) | 4218 | if (stream->srcwin_cksum_pos > logical_input_cksum_pos) |
4211 | { | 4219 | { |
4212 | *next_move_point = stream->input_position + | 4220 | *next_move_point = stream->input_position + |
4213 | (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos); | 4221 | (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos); |
4214 | goto finished; | 4222 | return 0; |
4215 | } | 4223 | } |
4216 | 4224 | ||
4217 | /* If the stream has matched beyond the srcwin_cksum_pos (good), we shouldn't | 4225 | /* A long match may have extended past srcwin_cksum_pos. Don't |
4218 | * begin reading so far back. */ | 4226 | * start checksumming already-matched source data. */ |
4219 | if (stream->maxsrcaddr > stream->srcwin_cksum_pos) | 4227 | if (stream->maxsrcaddr > stream->srcwin_cksum_pos) |
4220 | { | 4228 | { |
4221 | stream->srcwin_cksum_pos = stream->maxsrcaddr; | 4229 | stream->srcwin_cksum_pos = stream->maxsrcaddr; |
@@ -4226,19 +4234,20 @@ xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) | |||
4226 | logical_input_cksum_pos = stream->srcwin_cksum_pos; | 4234 | logical_input_cksum_pos = stream->srcwin_cksum_pos; |
4227 | } | 4235 | } |
4228 | 4236 | ||
4229 | /* Advance at least one source block. For the command-line, this | 4237 | /* Advance at least one source block. With the command-line |
4230 | * means we read to EOF for files smaller than the source window. | 4238 | * defaults this means: |
4231 | * Note that this term (stream->src->blksize) is used again below. | ||
4232 | * | 4239 | * |
4233 | * TODO: This is possibly a post-3.0o performance regression. I | 4240 | * if (src->size <= srcwin_maxsz), index the entire source at once |
4234 | * have examples of files that perform better w/ xdelta1. Note that | 4241 | * using the position of the first non-match. This is good for |
4235 | * xdelta1 fills its cksum table in reverse so that earlier matches | 4242 | * small inputs, especially when the content may have moved anywhere |
4236 | * are preferred. Note that xdelta3 is designed to avoid | 4243 | * in the file (e.g., tar files). |
4237 | * unnecessary computation, which this might be if we have long | 4244 | * |
4238 | * matches ahead. OTOH, sometimes tar scrambles a file such that you | 4245 | * if (src->size > srcwin_maxsz), index at least one block (which |
4239 | * copy from the end to the beginning right away, and this logic at | 4246 | * the command-line sets to 1/32 of srcwin_maxsz) ahead of the |
4240 | * least helps those cases. Used to use 16K here, instead of | 4247 | * logical position. This is good for different reasons: when a |
4241 | * stream->src->blksize. | 4248 | * long match spanning several source blocks is encountered, this |
4249 | * avoids computing checksums for those blocks. If the data can | ||
4250 | * move anywhere, this is bad. | ||
4242 | */ | 4251 | */ |
4243 | logical_input_cksum_pos += stream->src->blksize; | 4252 | logical_input_cksum_pos += stream->src->blksize; |
4244 | 4253 | ||
@@ -4251,19 +4260,19 @@ xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) | |||
4251 | stream->srcwin_cksum_pos < stream->src->size) | 4260 | stream->srcwin_cksum_pos < stream->src->size) |
4252 | { | 4261 | { |
4253 | xoff_t blkno = stream->srcwin_cksum_pos / stream->src->blksize; | 4262 | xoff_t blkno = stream->srcwin_cksum_pos / stream->src->blksize; |
4254 | usize_t blkoff = stream->srcwin_cksum_pos % stream->src->blksize; | 4263 | ssize_t oldpos = stream->srcwin_cksum_pos % stream->src->blksize; |
4255 | usize_t onblk = xd3_bytes_on_srcblk (stream->src, blkno); | 4264 | ssize_t blkpos = xd3_bytes_on_srcblk (stream->src, blkno); |
4256 | int ret; | 4265 | int ret; |
4257 | 4266 | ||
4258 | if (blkoff + stream->smatcher.large_look > onblk) | 4267 | if (oldpos + stream->smatcher.large_look > blkpos) |
4259 | { | 4268 | { |
4260 | /* Next block */ | ||
4261 | stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; | 4269 | stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; |
4262 | continue; | 4270 | continue; |
4263 | } | 4271 | } |
4264 | 4272 | ||
4265 | if ((ret = xd3_getblk (stream, blkno))) | 4273 | if ((ret = xd3_getblk (stream, blkno))) |
4266 | { | 4274 | { |
4275 | /* TOOFARBACK should never occur here, since we read forward. */ | ||
4267 | if (ret == XD3_TOOFARBACK) | 4276 | if (ret == XD3_TOOFARBACK) |
4268 | { | 4277 | { |
4269 | ret = XD3_INTERNAL; | 4278 | ret = XD3_INTERNAL; |
@@ -4271,43 +4280,47 @@ xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) | |||
4271 | return ret; | 4280 | return ret; |
4272 | } | 4281 | } |
4273 | 4282 | ||
4274 | /* Note: I experimented with rewriting this block to use | 4283 | /* This inserts checksums for the entire block, in reverse, |
4275 | * LARGE_CKSUM_UPDATE() instead of recalculating the cksum every | 4284 | * starting from the end of the block. This logic does not test |
4276 | * N bytes. It seemed to make performance worse (except | 4285 | * stream->srcwin_cksum_pos because it always advances it to the |
4277 | * obviously for step == 1, not worth extra complexity). | 4286 | * start of the next block. |
4278 | * | 4287 | * |
4279 | * Prior to 3.0p, this loop inserted cksums forwards, now it | 4288 | * oldpos is the srcwin_cksum_pos within this block. blkpos is |
4280 | * goes in reverse like xdelta1. */ | 4289 | * the number of bytes available. Each iteration inspects |
4281 | while (onblk >= stream->smatcher.large_step && | 4290 | * large_look bytes then steps back large_step bytes. The |
4282 | onblk >= stream->smatcher.large_look) | 4291 | * if-stmt above ensures at least one large_look of data. */ |
4283 | { | 4292 | blkpos -= stream->smatcher.large_look; |
4284 | onblk -= stream->smatcher.large_step; | ||
4285 | 4293 | ||
4286 | uint32_t cksum = xd3_lcksum (stream->src->curblk + onblk, | 4294 | do |
4295 | { | ||
4296 | uint32_t cksum = xd3_lcksum (stream->src->curblk + blkpos, | ||
4287 | stream->smatcher.large_look); | 4297 | stream->smatcher.large_look); |
4288 | usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum); | 4298 | usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum); |
4289 | 4299 | ||
4290 | /* TODO: can you simplify this computation? */ | 4300 | stream->large_table[hval] = |
4291 | stream->large_table[hval] = (stream->src->blksize * blkno + onblk) + HASH_CKOFFSET; | 4301 | (usize_t) ((stream->src->blksize * blkno) + |
4302 | (xoff_t)(blkpos + HASH_CKOFFSET)); | ||
4292 | 4303 | ||
4293 | IF_DEBUG (stream->large_ckcnt += 1); | 4304 | IF_DEBUG (stream->large_ckcnt += 1); |
4305 | |||
4306 | blkpos -= stream->smatcher.large_step; | ||
4294 | } | 4307 | } |
4308 | while (blkpos >= oldpos); | ||
4295 | 4309 | ||
4296 | stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; | 4310 | stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; |
4297 | } | 4311 | } |
4298 | 4312 | ||
4299 | if (stream->srcwin_cksum_pos > stream->src->size) | 4313 | if (stream->srcwin_cksum_pos >= stream->src->size) |
4300 | { | 4314 | { |
4301 | /* We do this so the xd3_source_cksum_offset() function, which | 4315 | /* This invariant is needed for xd3_source_cksum_offset() */ |
4302 | * uses the high/low 32-bits of srcwin_cksum_pos, is guaranteed | ||
4303 | * never to return a position > src->size */ | ||
4304 | stream->srcwin_cksum_pos = stream->src->size; | 4316 | stream->srcwin_cksum_pos = stream->src->size; |
4317 | *next_move_point = USIZE_T_MAX; | ||
4318 | return 0; | ||
4305 | } | 4319 | } |
4306 | 4320 | ||
4307 | finished: | ||
4308 | /* How long until this function should be called again. */ | 4321 | /* How long until this function should be called again. */ |
4309 | /* TODO: Ack, this logic is underflowing the usize_t */ | 4322 | XD3_ASSERT(stream->srcwin_cksum_pos >= logical_input_cksum_pos); |
4310 | *next_move_point = stream->input_position + | 4323 | *next_move_point = stream->input_position + 1 + |
4311 | (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos); | 4324 | (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos); |
4312 | return 0; | 4325 | return 0; |
4313 | } | 4326 | } |