From 4be58a9a9ab199ef2b3765f57e649398e8dd5890 Mon Sep 17 00:00:00 2001 From: "josh.macdonald" Date: Tue, 13 Feb 2007 02:44:36 +0000 Subject: Adds xd3_flags for setting compression level in xd3_encode_memory(). --- xdelta3/xdelta3-main.h | 6 +++- xdelta3/xdelta3-regtest.py | 81 +++++++++++++++++++++++----------------------- xdelta3/xdelta3-test.py | 22 +++++++++++-- xdelta3/xdelta3.c | 44 ++++++++++++++++++------- xdelta3/xdelta3.h | 24 ++++++++++---- xdelta3/xdelta3.swig | 28 ++++++++++++++-- 6 files changed, 139 insertions(+), 66 deletions(-) diff --git a/xdelta3/xdelta3-main.h b/xdelta3/xdelta3-main.h index 50cbb67..44f51ee 100644 --- a/xdelta3/xdelta3-main.h +++ b/xdelta3/xdelta3-main.h @@ -2454,7 +2454,11 @@ main_input (xd3_cmd cmd, { XPR(NT "compression level: %d\n", option_level); } - if (option_level == 0) { stream_flags |= XD3_NOCOMPRESS; } + if (option_level == 0) + { + stream_flags |= XD3_NOCOMPRESS; + config.smatch_cfg = XD3_SMATCH_FASTEST; + } else if (option_level == 1) { config.smatch_cfg = XD3_SMATCH_FASTEST; } else if (option_level <= 5) { config.smatch_cfg = XD3_SMATCH_FAST; } else if (option_level == 6) { config.smatch_cfg = XD3_SMATCH_DEFAULT; } diff --git a/xdelta3/xdelta3-regtest.py b/xdelta3/xdelta3-regtest.py index 928a5f0..8aa03ac 100755 --- a/xdelta3/xdelta3-regtest.py +++ b/xdelta3/xdelta3-regtest.py @@ -16,12 +16,12 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# Under construction. +# TODO: Test IOPT (1.5 vs. greedy) -# TODO: This is really part test, part performance evaluation suite, and -# really incomplete. +# TODO: Start testing window sizes -# TODO: Test IOPT (1.5 vs. greedy) +# TODO: Note: xd3_encode_memory is underperforming the command-line +# at run-speed tests (due to excess memory allocation?). Fix. import os, sys, math, re, time, types, array, random import xdelta3main @@ -85,7 +85,7 @@ def INPUT_SPEC(rand): RCSDIR = '/tmp/PRCS_read_copy' #RCSDIR = 'G:/jmacd/PRCS' -SAMPLEDIR = "C:/sample_data/WESNOTH_tmp/tar' +SAMPLEDIR = "C:/sample_data/WESNOTH_tmp/tar" TMPDIR = '/tmp/xd3regtest.%d' % os.getpid() @@ -260,14 +260,15 @@ class RcsFile: os.stat(self.Verf(v+1)).st_size < MIN_SIZE: continue - result = TimeRun(runnable.Runner(self.Verf(v), - self.Vstr(v), - self.Verf(v+1), - self.Vstr(v+1))) - print 'testing %s %s: ideal %.3f%%: time %.7f: in %u trials' % \ + runnable.SetInputs(self.Verf(v), + self.Vstr(v), + self.Verf(v+1), + self.Vstr(v+1)) + result = TimedTest(runnable) + print 'testing %s %s: ratio %.3f%%: time %.7f: in %u trials' % \ (os.path.basename(self.fname), self.Vstr(v+1), - result.r1.ideal, + result.r1.ratio, result.time.mean, result.trials) ntrials.append(result) @@ -370,9 +371,11 @@ class Bucks: f.write("%.1f %.1f %.1f %d\n" % (i[0],i[1],i[2],i[3])) # # -class TimeRun: +class TimedTest: def __init__(self,runnable, - skip_trials=SKIP_TRIALS,min_trials=MIN_TRIALS,max_trials=MAX_TRIALS, + skip_trials=SKIP_TRIALS, + min_trials=MIN_TRIALS, + max_trials=MAX_TRIALS, min_stddev_pct=MIN_STDDEV_PCT): min_trials = min(min_trials,max_trials) @@ -474,7 +477,6 @@ def RunCommand(args): raise CommandError(args, 'exited %d' % p) def RunCommandIO(args,infn,outfn): - #print "run command io", args p = os.fork() if p == 0: os.dup2(os.open(infn,os.O_RDONLY),0) @@ -488,7 +490,6 @@ def RunCommandIO(args,infn,outfn): def RunXdelta3(args): try: - #print 'RUN', args xdelta3main.main(args) except Exception, e: raise CommandError(args, "xdelta3.main exception") @@ -500,15 +501,12 @@ class GzipInfo: class Xdelta3Info: def __init__(self,target,delta): - # TODO: bug is fixed - self.extcomp = 0 # TODO: I removed some code that called printhdr - self.hdrsize = 0 # to compute these, but printhdr uses stdout (now) self.tgtsize = os.stat(target).st_size self.dsize = os.stat(delta).st_size if self.tgtsize > 0: - self.ideal = 100.0 * self.dsize / self.tgtsize; + self.ratio = 100.0 * self.dsize / self.tgtsize; else: - self.ideal = 0.0 + self.ratio = 0.0 class Xdelta3ModInfo: def __init__(self,target,delta): @@ -523,20 +521,20 @@ class Xdelta3ModInfo: self.tgtsize = len(target) self.dsize = len(delta) if self.tgtsize > 0: - self.ideal = 100.0 * self.dsize / self.tgtsize; + self.ratio = 100.0 * self.dsize / self.tgtsize; else: - self.ideal = 0.0 + self.ratio = 0.0 class Xdelta3Pair: - def __init__(self): + def __init__(self, extra): self.type = 'xdelta3' self.decode_args = '-dqf' self.encode_args = '-eqf' - self.extra = [] + self.extra = extra self.presrc = '-s' self.canrep = 1 - def Runner(self,old,oldv,new,newv): + def SetInputs(self,old,oldv,new,newv): self.old = old self.oldv = oldv self.new = new @@ -770,7 +768,7 @@ class RandomTester: return (TMPDIR + "/big.1", TMPDIR + "/big.2") - def RandomBigRun(self, f1, f2): + def RandomFileTest(self, f1, f2): config = None if len(self.old_configs) > 0: config = self.old_configs[0] @@ -781,9 +779,9 @@ class RandomTester: config = self.RandomConfig() #end - runner = Xdelta3Pair() - runner.extra = [ '-C', ','.join([str(x) for x in config]) ] - result = TimeRun(runner.Runner(f1, 1, f2, 2)) + runner = Xdelta3Pair([ '-C', ','.join([str(x) for x in config]) ]) + runner.SetInputs(f1, 1, f2, 2) + result = TimedTest(runner) tr = RandomTestResult(self.round_num, config, @@ -900,22 +898,24 @@ class RandomTester: else: stars = ' *' print 'Score: %0.6f %s (%.1f%s%s)' % \ - (test.score, test, s / len(all_r), stars, (len(all_r) > 2) and (' in %d' % len(all_r)) or "") + (test.score, test, s / len(all_r), stars, + (len(all_r) > 2) and + (' in %d' % len(all_r)) or "") #end return r #end #end +# This tests the raw speed of 0-byte inputs def RunSpeed(): - # TODO: Start testing window sizes for L in Decimals(MAX_RUN): SetFileSize(RUNFILE, L) - trx = TimeRun(Xdelta3Run1(RUNFILE)) + trx = TimedTest(Xdelta3Run1(RUNFILE)) ReportSpeed(L,trx,'xdelta3') - trm = TimeRun(Xdelta3Mod1(RUNFILE)) + trm = TimedTest(Xdelta3Mod1(RUNFILE)) ReportSpeed(L,trm,'module ') - trg = TimeRun(GzipRun1(RUNFILE)) + trg = TimedTest(GzipRun1(RUNFILE)) ReportSpeed(L,trg,'gzip ') #end #end @@ -924,15 +924,14 @@ if __name__ == "__main__": try: RunCommand(['rm', '-rf', TMPDIR]) os.mkdir(TMPDIR) - #rcsf = Test() - configs = [] - # This tests pairwise (date-ordered) performance - #rcsf.PairsByDate(Xdelta3Pair()) - - # This tests the raw speed of 0-byte inputs RunSpeed() + # This tests pairwise (date-ordered) performance + #rcsf = Test() + #rcsf.PairsByDate(Xdelta3Pair([])) + + configs = [] while 0: #f1 = '/tmp/big.1' @@ -944,7 +943,7 @@ if __name__ == "__main__": f2 = '/tmp/WESNOTH_tmp/wesnoth-1.1.13.tar' #f1 = '/tmp/big.1' #f2 = '/tmp/big.2' - test.RandomBigRun(f1, f2) + test.RandomFileTest(f1, f2) #end configs = test.ScoreTests() diff --git a/xdelta3/xdelta3-test.py b/xdelta3/xdelta3-test.py index 7ad25dd..2c37030 100755 --- a/xdelta3/xdelta3-test.py +++ b/xdelta3/xdelta3-test.py @@ -33,7 +33,7 @@ assert len(patch) < len(source) print 'encode: adler32 ...' result, patch_adler32 = xdelta3.xd3_encode_memory(target, source, 50, - 1<<10) + xdelta3.XD3_ADLER32) assert result == 0 assert len(patch_adler32) < len(source) @@ -41,7 +41,7 @@ assert len(patch_adler32) > len(patch) print 'encode: secondary ...' result, patch_djw = xdelta3.xd3_encode_memory(target, source, 50, - 1<<5) + xdelta3.XD3_SEC_DJW) assert result == 0 # secondary compression doesn't help @@ -111,7 +111,7 @@ for corrupt_pos in range(len(patch_adler32)): # without adler32 verification, the error may be in the data section which # in this case is 6 bytes 'target' result, corrupt = xdelta3.xd3_decode_memory(input, source, len(target), - 1<<11) + xdelta3.XD3_ADLER32_NOVER) if result == 0: noverify_count = noverify_count + 1 #print "got %s" % corrupt @@ -125,6 +125,22 @@ result, target2 = xdelta3.xd3_decode_memory(zdata, None, len(target)) assert result == 0 assert target == target2 +# Test compression level setting via flags. assumes a 9 byte checksum +# and that level 9 steps 2, level 1 steps 15: +# 01234567890123456789012345678901 +# level 1 only indexes 2 checksums "abcdefghi" and "ABCDEFGHI" +# outputs 43 vs. 23 bytes +print 'encode: compression level ...' + +source = '_la_la_abcdefghi_la_la_ABCDEFGHI' +target = 'la_la_ABCDEFGH__la_la_abcdefgh__' + +result1, level1 = xdelta3.xd3_encode_memory(target, source, 50, xdelta3.XD3_COMPLEVEL_1) +result9, level9 = xdelta3.xd3_encode_memory(target, source, 50, xdelta3.XD3_COMPLEVEL_9) + +assert result1 == 0 and result9 == 0 +assert len(level1) > len(level9) + # # diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c index b3fd2ef..32dc8b0 100644 --- a/xdelta3/xdelta3.c +++ b/xdelta3/xdelta3.c @@ -94,7 +94,7 @@ challenge. Search in this file for "black magic", a heuristic. 3. STREAM ALIGNMENT. Stream alignment is needed to compress large - inputs in constant space. TODO: redocument + inputs in constant space. See xd3_srcwin_move_point(). 4. WINDOW SELECTION. When the IOPT buffer flushes, in the first call to xd3_iopt_finish_encoding containing any kind of copy instruction, @@ -779,7 +779,7 @@ static const xd3_sec_type djw_sec_type = * allowing to vary the distribution of single- and * double-instructions and change the number of near and same copy * modes. More exotic tables are only possible by extending this - * code. TODO: experiment with a double-copy instruction. + * code. * * For performance reasons, both the parametrized and non-parametrized * versions of xd3_choose_instruction remain. The parametrized @@ -2543,6 +2543,28 @@ xd3_config_stream(xd3_stream *stream, return XD3_INTERNAL; } + if (config->smatch_cfg == XD3_SMATCH_DEFAULT && + (stream->flags & XD3_COMPLEVEL_MASK) != 0) + { + int level = (stream->flags & XD3_COMPLEVEL_MASK) >> XD3_COMPLEVEL_SHIFT; + + switch (level) + { + case 1: case 2: + IF_BUILD_FASTEST(*smatcher = __smatcher_fastest; + break;) + case 3: case 4: case 5: + IF_BUILD_FAST(*smatcher = __smatcher_fast; + break;) + case 6: + IF_BUILD_DEFAULT(*smatcher = __smatcher_default; + break;) + default: + IF_BUILD_SLOW(*smatcher = __smatcher_slow; + break;) + } + } + return 0; } @@ -3520,7 +3542,7 @@ xd3_encode_init (xd3_stream *stream) * first call to string_match--that way identical or short inputs require no table * allocation. */ - // TODO: experiments have to be done!!! + /* TODO: need to experiment w/ XD3_DEFAULT_SPREVSZ and large has functions */ if (large_comp) { usize_t hash_values = (stream->srcwin_maxsz / stream->smatcher.large_step); @@ -3532,8 +3554,7 @@ xd3_encode_init (xd3_stream *stream) if (small_comp) { - /* Hard-coded, keeps table small because small matches become inefficient. - * TODO: verify this stuff. */ + /* Hard-coded, keeps table small because small matches become inefficient. */ usize_t hash_values = min(stream->winsize, XD3_DEFAULT_SPREVSZ); xd3_size_hashtable (stream, @@ -3931,7 +3952,7 @@ xd3_process_memory (int is_encode, if (is_encode) { - /* TODO: for large inputs, limit window size ... */ + /* TODO: for large inputs, limit window size, need to select a default ... */ config.srcwin_maxsz = source_size; config.winsize = min(input_size, (usize_t) (1<<20)); } @@ -4117,7 +4138,8 @@ xd3_source_cksum_offset(xd3_stream *stream, usize_t low) return low; } - // This should not be >= because srcwin_cksum_pos is the next position to index + /* This should not be >= because srcwin_cksum_pos is the next + * position to index. */ if (low > sr) { return (--s0 << 32) | low; } @@ -4564,8 +4586,6 @@ xd3_source_extend_match (xd3_stream *stream) usize_t total = stream->match_fwd + stream->match_back; /* Correct the variables to remove match_back from the equation. */ - // IT'S A BUG! - usize_t target_position = stream->input_position - stream->match_back; usize_t match_length = stream->match_back + stream->match_fwd; xoff_t match_position = stream->match_srcpos - stream->match_back; @@ -4589,13 +4609,13 @@ xd3_source_extend_match (xd3_stream *stream) if (match_end > stream->match_maxaddr) { - // Note: per-window + /* Note: per-window */ stream->match_maxaddr = match_end; } if (match_end > stream->maxsrcaddr) { - // Note: across windows + /* Note: across windows */ stream->maxsrcaddr = match_end; } @@ -4927,7 +4947,7 @@ XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream) int run_l; int ret; usize_t match_length; - usize_t match_offset; // Note: "may be unused" warnings are bogus (due to min_match test) + usize_t match_offset; /* "may be unused" warnings are bogus (due to min_match test) */ usize_t next_move_point; /* If there will be no compression due to settings or short input, skip it entirely. */ diff --git a/xdelta3/xdelta3.h b/xdelta3/xdelta3.h index 8c11000..cee63cf 100644 --- a/xdelta3/xdelta3.h +++ b/xdelta3/xdelta3.h @@ -333,17 +333,27 @@ typedef enum * only search the source, not the target. */ XD3_BEGREEDY = (1 << 14), /* disable the "1.5-pass algorithm", instead use * greedy matching. Greedy is off by default. */ + + /* 4 bits to set the compression level the same as the command-line + * setting -1 through -9 (-0 corresponds to the XD3_NOCOMPRESS flag, + * and is independent of compression level). This is for + * convenience, especially with xd3_encode_memory(). */ + + XD3_COMPLEVEL_SHIFT = 20, /* 20 - 24 */ + XD3_COMPLEVEL_MASK = (0xF << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_1 = (1 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_3 = (3 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_6 = (6 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_9 = (9 << XD3_COMPLEVEL_SHIFT), + } xd3_flags; -/* The values of this enumeration are set in xd3_config using the smatch_cfg variable. It - * can be set to slow, fast, soft, or default. The fast and slow setting uses preset, - * hardcoded parameters and the soft setting is accompanied by user-supplied parameters. - * If the user supplies 'default' the code selects one of the available string matchers. - * Due to compile-time settings (see XD3_SLOW_SMATCHER, XD3_FAST_SMATCHER, - * XD3_SOFT_SMATCHER variables), not all options may be available. */ +/* The values of this enumeration are set in xd3_config using the + * smatch_cfg variable. It can be set to default, slow, fast, etc., + * and soft. */ typedef enum { - XD3_SMATCH_DEFAULT = 0, + XD3_SMATCH_DEFAULT = 0, /* Flags may contain XD3_COMPLEVEL bits, else default. */ XD3_SMATCH_SLOW = 1, XD3_SMATCH_FAST = 2, XD3_SMATCH_FASTEST = 3, diff --git a/xdelta3/xdelta3.swig b/xdelta3/xdelta3.swig index 696974f..2ef4306 100644 --- a/xdelta3/xdelta3.swig +++ b/xdelta3/xdelta3.swig @@ -64,6 +64,30 @@ int xd3_decode_memory (const char *input, unsigned int max_output, int flags); +int xd3_main_cmdline (int ARGC, char **ARGV); - -int xd3_main_cmdline (int ARGC, char **ARGV); +/* Is this the right way? */ +enum { + /*XD3_JUST_HDR,*/ + /*XD3_SKIP_WINDOW,*/ + /*XD3_SKIP_EMIT,*/ + /*XD3_FLUSH,*/ + XD3_SEC_DJW, + XD3_SEC_FGK, + /*XD3_SEC_TYPE,*/ + XD3_SEC_NODATA, + XD3_SEC_NOINST, + XD3_SEC_NOADDR, + /*XD3_SEC_OTHER,*/ + XD3_ADLER32, + XD3_ADLER32_NOVER, + XD3_ALT_CODE_TABLE, + XD3_NOCOMPRESS, + XD3_BEGREEDY, + XD3_COMPLEVEL_SHIFT, + XD3_COMPLEVEL_MASK, + XD3_COMPLEVEL_1, + XD3_COMPLEVEL_3, + XD3_COMPLEVEL_6, + XD3_COMPLEVEL_9, +}; -- cgit v1.2.3