summaryrefslogtreecommitdiff
path: root/xdelta3
diff options
context:
space:
mode:
Diffstat (limited to 'xdelta3')
-rwxr-xr-xxdelta3/examples/small_page_test.c4
-rw-r--r--xdelta3/xdelta3-cfgs.h30
-rw-r--r--xdelta3/xdelta3-main.h40
-rwxr-xr-xxdelta3/xdelta3-regtest.py121
-rw-r--r--xdelta3/xdelta3-test.h17
-rw-r--r--xdelta3/xdelta3.c317
-rw-r--r--xdelta3/xdelta3.h15
-rw-r--r--xdelta3/xdelta3.prj24
8 files changed, 173 insertions, 395 deletions
diff --git a/xdelta3/examples/small_page_test.c b/xdelta3/examples/small_page_test.c
index 714fd6e..8a4532b 100755
--- a/xdelta3/examples/small_page_test.c
+++ b/xdelta3/examples/small_page_test.c
@@ -12,10 +12,6 @@
12// SPACE_MAX of 32K is sufficient for most inputs with XD3_COMPLEVEL_1 12// SPACE_MAX of 32K is sufficient for most inputs with XD3_COMPLEVEL_1
13// XD3_COMPLEVEL_9 requires about 4x more space than XD3_COMPLEVEL_1 13// XD3_COMPLEVEL_9 requires about 4x more space than XD3_COMPLEVEL_1
14 14
15// TODO: experimental PROMOTE code in the encoder is using 16 bytes
16// per entry in a table, where 4 bytes are sufficient w/o this
17// (unimpressive) feature. Remove PROMOTE code.
18
19#include "xdelta3.h" 15#include "xdelta3.h"
20#include "xdelta3.c" 16#include "xdelta3.c"
21 17
diff --git a/xdelta3/xdelta3-cfgs.h b/xdelta3/xdelta3-cfgs.h
index c48df9f..1474d12 100644
--- a/xdelta3/xdelta3-cfgs.h
+++ b/xdelta3/xdelta3-cfgs.h
@@ -28,11 +28,8 @@
28#define SLOOK stream->smatcher.small_look 28#define SLOOK stream->smatcher.small_look
29#define SCHAIN stream->smatcher.small_chain 29#define SCHAIN stream->smatcher.small_chain
30#define SLCHAIN stream->smatcher.small_lchain 30#define SLCHAIN stream->smatcher.small_lchain
31#define SSMATCH stream->smatcher.ssmatch
32#define TRYLAZY stream->smatcher.try_lazy
33#define MAXLAZY stream->smatcher.max_lazy 31#define MAXLAZY stream->smatcher.max_lazy
34#define LONGENOUGH stream->smatcher.long_enough 32#define LONGENOUGH stream->smatcher.long_enough
35#define PROMOTE stream->smatcher.promote
36 33
37#define SOFTCFG 1 34#define SOFTCFG 1
38#include "xdelta3.c" 35#include "xdelta3.c"
@@ -44,11 +41,8 @@
44#undef LSTEP 41#undef LSTEP
45#undef SCHAIN 42#undef SCHAIN
46#undef SLCHAIN 43#undef SLCHAIN
47#undef SSMATCH
48#undef TRYLAZY
49#undef MAXLAZY 44#undef MAXLAZY
50#undef LONGENOUGH 45#undef LONGENOUGH
51#undef PROMOTE
52#endif 46#endif
53 47
54#define SOFTCFG 0 48#define SOFTCFG 0
@@ -63,11 +57,8 @@
63#define SLOOK 4 57#define SLOOK 4
64#define SCHAIN 1 58#define SCHAIN 1
65#define SLCHAIN 1 59#define SLCHAIN 1
66#define SSMATCH 0
67#define TRYLAZY 1
68#define MAXLAZY 18 60#define MAXLAZY 18
69#define LONGENOUGH 18 61#define LONGENOUGH 18
70#define PROMOTE 0
71 62
72#include "xdelta3.c" 63#include "xdelta3.c"
73 64
@@ -77,11 +68,8 @@
77#undef LSTEP 68#undef LSTEP
78#undef SCHAIN 69#undef SCHAIN
79#undef SLCHAIN 70#undef SLCHAIN
80#undef SSMATCH
81#undef TRYLAZY
82#undef MAXLAZY 71#undef MAXLAZY
83#undef LONGENOUGH 72#undef LONGENOUGH
84#undef PROMOTE
85#endif 73#endif
86 74
87/****************************************************************************************** 75/******************************************************************************************
@@ -94,11 +82,8 @@
94#define SLOOK 4 82#define SLOOK 4
95#define SCHAIN 4 83#define SCHAIN 4
96#define SLCHAIN 1 84#define SLCHAIN 1
97#define SSMATCH 0
98#define TRYLAZY 1
99#define MAXLAZY 18 85#define MAXLAZY 18
100#define LONGENOUGH 35 86#define LONGENOUGH 35
101#define PROMOTE 0
102 87
103#include "xdelta3.c" 88#include "xdelta3.c"
104 89
@@ -108,11 +93,8 @@
108#undef LSTEP 93#undef LSTEP
109#undef SCHAIN 94#undef SCHAIN
110#undef SLCHAIN 95#undef SLCHAIN
111#undef SSMATCH
112#undef TRYLAZY
113#undef MAXLAZY 96#undef MAXLAZY
114#undef LONGENOUGH 97#undef LONGENOUGH
115#undef PROMOTE
116#endif 98#endif
117 99
118/****************************************************************************************** 100/******************************************************************************************
@@ -125,11 +107,8 @@
125#define SLOOK 4 107#define SLOOK 4
126#define SCHAIN 44 108#define SCHAIN 44
127#define SLCHAIN 13 109#define SLCHAIN 13
128#define SSMATCH 0
129#define TRYLAZY 1
130#define MAXLAZY 90 110#define MAXLAZY 90
131#define LONGENOUGH 70 111#define LONGENOUGH 70
132#define PROMOTE 0
133 112
134#include "xdelta3.c" 113#include "xdelta3.c"
135 114
@@ -139,11 +118,8 @@
139#undef LSTEP 118#undef LSTEP
140#undef SCHAIN 119#undef SCHAIN
141#undef SLCHAIN 120#undef SLCHAIN
142#undef SSMATCH
143#undef TRYLAZY
144#undef MAXLAZY 121#undef MAXLAZY
145#undef LONGENOUGH 122#undef LONGENOUGH
146#undef PROMOTE
147#endif 123#endif
148 124
149/****************************************************************************************** 125/******************************************************************************************
@@ -156,11 +132,8 @@
156#define SLOOK 4 132#define SLOOK 4
157#define SCHAIN 8 133#define SCHAIN 8
158#define SLCHAIN 2 134#define SLCHAIN 2
159#define SSMATCH 0
160#define TRYLAZY 1
161#define MAXLAZY 36 135#define MAXLAZY 36
162#define LONGENOUGH 70 136#define LONGENOUGH 70
163#define PROMOTE 0
164 137
165#include "xdelta3.c" 138#include "xdelta3.c"
166 139
@@ -170,9 +143,6 @@
170#undef LSTEP 143#undef LSTEP
171#undef SCHAIN 144#undef SCHAIN
172#undef SLCHAIN 145#undef SLCHAIN
173#undef SSMATCH
174#undef TRYLAZY
175#undef MAXLAZY 146#undef MAXLAZY
176#undef LONGENOUGH 147#undef LONGENOUGH
177#undef PROMOTE
178#endif 148#endif
diff --git a/xdelta3/xdelta3-main.h b/xdelta3/xdelta3-main.h
index 3b7d11a..d33fd57 100644
--- a/xdelta3/xdelta3-main.h
+++ b/xdelta3/xdelta3-main.h
@@ -84,7 +84,7 @@ const char* xd3_mainerror(int err_num);
84#define PRINTHDR_SPECIAL -4378291 84#define PRINTHDR_SPECIAL -4378291
85 85
86/* The number of soft-config variables. */ 86/* The number of soft-config variables. */
87#define XD3_SOFTCFG_VARCNT 10 87#define XD3_SOFTCFG_VARCNT 7
88 88
89/* this is used as in XPR(NT XD3_LIB_ERRMSG (stream, ret)) to print an error message 89/* this is used as in XPR(NT XD3_LIB_ERRMSG (stream, ret)) to print an error message
90 * from the library. */ 90 * from the library. */
@@ -338,7 +338,6 @@ main_config (void)
338{ 338{
339 main_version (); 339 main_version ();
340 340
341 /* TODO: This needs cleaning up */
342 P(RINT "EXTERNAL_COMPRESSION=%d\n", EXTERNAL_COMPRESSION); 341 P(RINT "EXTERNAL_COMPRESSION=%d\n", EXTERNAL_COMPRESSION);
343 P(RINT "GENERIC_ENCODE_TABLES=%d\n", GENERIC_ENCODE_TABLES); 342 P(RINT "GENERIC_ENCODE_TABLES=%d\n", GENERIC_ENCODE_TABLES);
344 P(RINT "GENERIC_ENCODE_TABLES_COMPUTE=%d\n", GENERIC_ENCODE_TABLES_COMPUTE); 343 P(RINT "GENERIC_ENCODE_TABLES_COMPUTE=%d\n", GENERIC_ENCODE_TABLES_COMPUTE);
@@ -367,7 +366,6 @@ main_config (void)
367static void 366static void
368reset_defaults(void) 367reset_defaults(void)
369{ 368{
370 // TODO: this is dumb, use an object
371 option_stdout = 0; 369 option_stdout = 0;
372 option_force = 0; 370 option_force = 0;
373 option_verbose = 0; 371 option_verbose = 0;
@@ -720,20 +718,22 @@ main_file_close (main_file *xfile)
720static void 718static void
721main_file_cleanup (main_file *xfile) 719main_file_cleanup (main_file *xfile)
722{ 720{
723 /* TODO: inconsistent code style here */ 721 if (main_file_isopen (xfile))
724 if (main_file_isopen (xfile)) { 722 {
725 main_file_close (xfile); 723 main_file_close (xfile);
726 } 724 }
727 725
728 if (xfile->snprintf_buf != NULL) { 726 if (xfile->snprintf_buf != NULL)
729 main_free(xfile->snprintf_buf); 727 {
730 xfile->snprintf_buf = NULL; 728 main_free(xfile->snprintf_buf);
731 } 729 xfile->snprintf_buf = NULL;
730 }
732 731
733 if (xfile->filename_copy != NULL) { 732 if (xfile->filename_copy != NULL)
734 main_free(xfile->filename_copy); 733 {
735 xfile->filename_copy = NULL; 734 main_free(xfile->filename_copy);
736 } 735 xfile->filename_copy = NULL;
736 }
737} 737}
738 738
739static int 739static int
@@ -795,7 +795,6 @@ main_file_stat (main_file *xfile, xoff_t *size, int err_ifnoseek)
795 } else { 795 } else {
796 *size = li.QuadPart; 796 *size = li.QuadPart;
797 } 797 }
798 // TODO: check err_ifnoseek
799#else 798#else
800 struct stat sbuf; 799 struct stat sbuf;
801 if (fstat (XFNO (xfile), & sbuf) < 0) 800 if (fstat (XFNO (xfile), & sbuf) < 0)
@@ -2457,11 +2456,8 @@ main_input (xd3_cmd cmd,
2457 config.smatcher_soft.small_look = values[2]; 2456 config.smatcher_soft.small_look = values[2];
2458 config.smatcher_soft.small_chain = values[3]; 2457 config.smatcher_soft.small_chain = values[3];
2459 config.smatcher_soft.small_lchain = values[4]; 2458 config.smatcher_soft.small_lchain = values[4];
2460 config.smatcher_soft.ssmatch = values[5]; 2459 config.smatcher_soft.max_lazy = values[5];
2461 config.smatcher_soft.try_lazy = values[6]; 2460 config.smatcher_soft.long_enough = values[6];
2462 config.smatcher_soft.max_lazy = values[7];
2463 config.smatcher_soft.long_enough = values[8];
2464 config.smatcher_soft.promote = values[9];
2465 } 2461 }
2466 else 2462 else
2467 { 2463 {
@@ -2604,7 +2600,7 @@ main_input (xd3_cmd cmd,
2604 } 2600 }
2605 2601
2606 /* Check if we have no source name and need one. */ 2602 /* Check if we have no source name and need one. */
2607 /* TODO: this doesn't fire due to cpyblocks_ calculation check */ 2603 /* TODO: this doesn't fire due to cpyblocks_ calculation check? */
2608 if (need_src && ! recv_src) 2604 if (need_src && ! recv_src)
2609 { 2605 {
2610 XPR(NT "input requires a source file, use -s\n"); 2606 XPR(NT "input requires a source file, use -s\n");
diff --git a/xdelta3/xdelta3-regtest.py b/xdelta3/xdelta3-regtest.py
index 1248584..5c7b6bf 100755
--- a/xdelta3/xdelta3-regtest.py
+++ b/xdelta3/xdelta3-regtest.py
@@ -16,13 +16,21 @@
16# along with this program; if not, write to the Free Software 16# along with this program; if not, write to the Free Software
17# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 18
19# TODO: Test 1.5 vs. greedy 19# TODO: test 1.5 vs. greedy
20# TODO: Compare w/ bsdiff and generate more summary 20# TODO: generate a graph
21 21
22import os, sys, math, re, time, types, array, random 22import os, sys, math, re, time, types, array, random
23import xdelta3main 23import xdelta3main
24import xdelta3 24import xdelta3
25 25
26#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS'
27RCSDIR = '/tmp/PRCS_read_copy/prcs'
28SAMPLEDIR = "/tmp/WESNOTH_tmp/diff"
29
30#RCSDIR = 'G:/jmacd/PRCS/prcs/b'
31#SAMPLEDIR = "C:/sample_data/Wesnoth/tar"
32
33#
26MIN_SIZE = 0 34MIN_SIZE = 0
27 35
28TIME_TOO_SHORT = 0.050 36TIME_TOO_SHORT = 0.050
@@ -32,7 +40,7 @@ MIN_TRIALS = 3
32MAX_TRIALS = 15 40MAX_TRIALS = 15
33 41
34# 10 = fast 1.5 = slow 42# 10 = fast 1.5 = slow
35MIN_STDDEV_PCT = 10 43MIN_STDDEV_PCT = 1.5
36 44
37# How many results per round 45# How many results per round
38MAX_RESULTS = 4 46MAX_RESULTS = 4
@@ -48,73 +56,76 @@ MIN_RUN = 1000 * 1000 * 1
48MAX_RUN = 1000 * 1000 * 10 56MAX_RUN = 1000 * 1000 * 10
49 57
50# Testwide defaults 58# Testwide defaults
51ALL_ARGS = [ '-f' ] 59ALL_ARGS = [
60 # -v
61 ]
52 62
53# The first 10 args go to -C 63# The first 7 args go to -C
54SOFT_CONFIG_CNT = 10 64SOFT_CONFIG_CNT = 7
55 65
56CONFIG_ORDER = [ 'large_look', 66CONFIG_ORDER = [ 'large_look',
57 'large_step', 67 'large_step',
58 'small_look', 68 'small_look',
59 'small_chain', 69 'small_chain',
60 'small_lchain', 70 'small_lchain',
61 'ssmatch',
62 'trylazy',
63 'max_lazy', 71 'max_lazy',
64 'long_enough', 72 'long_enough',
65 'promote', 73
74 # > SOFT_CONFIG_CNT
75 'nocompress',
66 'winsize', 76 'winsize',
67 'srcwinsize', 77 'srcwinsize',
68 'sprevsz', 78 'sprevsz',
69 'iopt', 79 'iopt',
70 80 'djw',
71 # TODO: nocompress, djw (boolean flags)
72 ] 81 ]
73 82
74CONFIG_ARGMAP = { 83CONFIG_ARGMAP = {
75 'winsize' : '-W', 84 'winsize' : '-W',
76 'srcwinsize' : '-B', 85 'srcwinsize' : '-B',
77 'sprevsz' : '-P', 86 'sprevsz' : '-P',
78 'iopt' : '-I', 87 'iopt' : '-I',
88 'nocompress' : '-N',
89 'djw' : '-Sdjw'
79 } 90 }
80 91
81def INPUT_SPEC(rand): 92def INPUT_SPEC(rand):
82 return { 93 return {
83 # computational costs
84 'large_look' : lambda d: rand.choice([9]),
85 'large_step' : lambda d: rand.choice([15]),
86 94
87 'small_chain' : lambda d: rand.choice([1]), 95 # Time/space costs.
88 'small_lchain' : lambda d: rand.choice([1]),
89 96
97 # -C 1,2,3,4,5,6,7
98 'large_look' : lambda d: rand.choice([9]),
99 'large_step' : lambda d: rand.choice([3, 15]),
100 'small_chain' : lambda d: rand.choice([1, 2]),
101 'small_lchain' : lambda d: rand.choice([1]),
90 'max_lazy' : lambda d: rand.choice([18]), 102 'max_lazy' : lambda d: rand.choice([18]),
91 'long_enough' : lambda d: rand.choice([18]), 103 'long_enough' : lambda d: rand.choice([18]),
92
93 'small_look' : lambda d: rand.choice([4]), 104 'small_look' : lambda d: rand.choice([4]),
94 'ssmatch' : lambda d: rand.choice([0]),
95 105
96 'promote' : lambda d: rand.choice([0]), 106 # -N
107 'nocompress' : lambda d: rand.choice(['false']),
97 108
98 'trylazy' : lambda d: rand.choice([1]), 109 # -S djw
110 'djw' : lambda d: rand.choice(['false']),
99 111
100 # memory costs 112 # Mmemory costs.
101 'iopt' : lambda d: 0, # unlimited
102 113
114 # -W
103 'winsize' : lambda d: 8 * (1<<20), 115 'winsize' : lambda d: 8 * (1<<20),
116
117 # -B
104 'srcwinsize' : lambda d: 64 * (1<<20), 118 'srcwinsize' : lambda d: 64 * (1<<20),
105 119
106 'sprevsz' : lambda d: 1 * (1<<18), # only powers of two 120 # -I 0 is unlimited
107 } 121 'iopt' : lambda d: 0,
108 122
109# 123 # -P only powers of two
110# 124 'sprevsz' : lambda d: rand.choice([x * (1<<16) for x in [4]]),
111#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS' 125 }
112RCSDIR = '/tmp/PRCS_read_copy/prcs' 126#end
113SAMPLEDIR = "/tmp/WESNOTH_tmp/tar"
114
115#RCSDIR = 'G:/jmacd/PRCS/prcs/b'
116#SAMPLEDIR = "C:/sample_data/Wesnoth/tar"
117 127
128#
118TMPDIR = '/tmp/xd3regtest.%d' % os.getpid() 129TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
119 130
120RUNFILE = os.path.join(TMPDIR, 'run') 131RUNFILE = os.path.join(TMPDIR, 'run')
@@ -139,7 +150,7 @@ RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)')
139RE_EXTCOMP = re.compile('XDELTA ext comp.*') 150RE_EXTCOMP = re.compile('XDELTA ext comp.*')
140 151
141def c2s(c): 152def c2s(c):
142 return ' '.join(['%02d' % x for x in c]) 153 return ' '.join(['%s' % x for x in c])
143#end 154#end
144 155
145def SumList(l): 156def SumList(l):
@@ -470,8 +481,6 @@ class Xdelta1Runner:
470 #end 481 #end
471#end 482#end
472 483
473# TODO: cleanup below this line
474
475# exceptions 484# exceptions
476class SkipRcsException: 485class SkipRcsException:
477 def __init__(self,reason): 486 def __init__(self,reason):
@@ -771,14 +780,13 @@ def GetTestRcsFiles():
771 return rcsf 780 return rcsf
772#end 781#end
773 782
774# TODO: cleanup below this line
775
776# 783#
777class RandomTestResult: 784class RandomTestResult:
778 def __init__(self, round, config, runtime, compsize): 785 def __init__(self, round, config, runtime, compsize, decodetime):
779 self.round = round 786 self.round = round
780 self.myconfig = config 787 self.myconfig = config
781 self.runtime = runtime 788 self.runtime = runtime
789 self.decodetime = decodetime
782 self.compsize = compsize 790 self.compsize = compsize
783 self.score = None 791 self.score = None
784 self.time_pos = None 792 self.time_pos = None
@@ -787,10 +795,11 @@ class RandomTestResult:
787 #end 795 #end
788 796
789 def __str__(self): 797 def __str__(self):
790 return 'time %.6f%s size %d%s << %s >>' % ( 798 return 'time %.6f%s size %d%s << %s >> %.6f' % (
791 self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""), 799 self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""),
792 self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""), 800 self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""),
793 c2s(self.config())) 801 c2s(self.config()),
802 self.decodetime)
794 #end 803 #end
795 804
796 def time(self): 805 def time(self):
@@ -853,12 +862,6 @@ class RandomTester:
853 config.append(val) 862 config.append(val)
854 #end 863 #end
855 864
856 if map['small_chain'] < map['small_lchain']:
857 return None
858
859 if map['large_look'] < map['small_look']:
860 return None
861
862 for r in self.results: 865 for r in self.results:
863 if c2s(r.config()) == c2s(config): 866 if c2s(r.config()) == c2s(config):
864 return None 867 return None
@@ -873,7 +876,13 @@ class RandomTester:
873 for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)): 876 for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)):
874 key = CONFIG_ARGMAP[CONFIG_ORDER[i]] 877 key = CONFIG_ARGMAP[CONFIG_ORDER[i]]
875 val = config[i] 878 val = config[i]
876 args.append('%s=%s' % (key, val)) 879 if val == 'true' or val == 'false':
880 if val == 'true':
881 args.append('%s' % key)
882 #end
883 else:
884 args.append('%s=%s' % (key, val))
885 #end
877 #end 886 #end
878 return args 887 return args
879 #end 888 #end
@@ -924,7 +933,8 @@ class RandomTester:
924 tr = RandomTestResult(self.round_num, 933 tr = RandomTestResult(self.round_num,
925 config, 934 config,
926 result.encode_time.mean, 935 result.encode_time.mean,
927 result.encode_size) 936 result.encode_size,
937 result.decode_time.mean)
928 938
929 self.results.append(tr) 939 self.results.append(tr)
930 940
@@ -1060,7 +1070,6 @@ def RunRandomRcsTest(rcsf):
1060#end 1070#end
1061 1071
1062def RunSampleTest(d, files): 1072def RunSampleTest(d, files):
1063 # TODO: consolidate w/ the above
1064 print 'testing %s with %d files' % (d, len(files)) 1073 print 'testing %s with %d files' % (d, len(files))
1065 configs = [] 1074 configs = []
1066 while len(files) > 1: 1075 while len(files) > 1:
@@ -1106,6 +1115,11 @@ if __name__ == "__main__":
1106 #RunSpeedTest() 1115 #RunSpeedTest()
1107 1116
1108 #rcsf = GetTestRcsFiles() 1117 #rcsf = GetTestRcsFiles()
1118 #RunRandomRcsTest(rcsf)
1119
1120 RunSampleDataTest()
1121
1122 # Other
1109 1123
1110 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9'])) 1124 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9']))
1111 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw'])) 1125 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw']))
@@ -1117,9 +1131,6 @@ if __name__ == "__main__":
1117 #x1r = rcsf.AllPairsByDate(Xdelta1RunClass()) 1131 #x1r = rcsf.AllPairsByDate(Xdelta1RunClass())
1118 #ReportPairs('xdelta1', x1r) 1132 #ReportPairs('xdelta1', x1r)
1119 1133
1120 #RunRandomRcsTest(rcsf)
1121 RunSampleDataTest()
1122
1123 except CommandError: 1134 except CommandError:
1124 pass 1135 pass
1125 else: 1136 else:
diff --git a/xdelta3/xdelta3-test.h b/xdelta3/xdelta3-test.h
index f4d5972..248691c 100644
--- a/xdelta3/xdelta3-test.h
+++ b/xdelta3/xdelta3-test.h
@@ -693,11 +693,8 @@ test_compress_text (xd3_stream *stream,
693 cfg.smatcher_soft.small_look = 4; 693 cfg.smatcher_soft.small_look = 4;
694 cfg.smatcher_soft.small_chain = 128; 694 cfg.smatcher_soft.small_chain = 128;
695 cfg.smatcher_soft.small_lchain = 16; 695 cfg.smatcher_soft.small_lchain = 16;
696 cfg.smatcher_soft.ssmatch = 0;
697 cfg.smatcher_soft.try_lazy = 1;
698 cfg.smatcher_soft.max_lazy = 8; 696 cfg.smatcher_soft.max_lazy = 8;
699 cfg.smatcher_soft.long_enough = 128; 697 cfg.smatcher_soft.long_enough = 128;
700 cfg.smatcher_soft.promote = 0;
701 698
702 xd3_config_stream (stream, & cfg); 699 xd3_config_stream (stream, & cfg);
703 700
@@ -1959,9 +1956,7 @@ typedef struct _string_match_test string_match_test;
1959typedef enum 1956typedef enum
1960{ 1957{
1961 SM_NONE = 0, 1958 SM_NONE = 0,
1962 SM_SSMATCH = (1 << 0),
1963 SM_LAZY = (1 << 1), 1959 SM_LAZY = (1 << 1),
1964 SM_PROMOTE = (1 << 2),
1965} string_match_flags; 1960} string_match_flags;
1966 1961
1967struct _string_match_test 1962struct _string_match_test
@@ -1985,7 +1980,7 @@ static const string_match_test match_tests[] =
1985 1980
1986 /* simple promotion: the third copy address depends on promotion */ 1981 /* simple promotion: the third copy address depends on promotion */
1987 { "ABCDEF_ABCDEF^ABCDEF", SM_NONE, "C7/6@0 C14/6@7" }, 1982 { "ABCDEF_ABCDEF^ABCDEF", SM_NONE, "C7/6@0 C14/6@7" },
1988 { "ABCDEF_ABCDEF^ABCDEF", SM_PROMOTE, "C7/6@0 C14/6@0" }, 1983 /* { "ABCDEF_ABCDEF^ABCDEF", SM_PROMOTE, "C7/6@0 C14/6@0" }, forgotten */
1989 1984
1990 /* simple lazy: there is a better copy starting with "23 X" than "123 " */ 1985 /* simple lazy: there is a better copy starting with "23 X" than "123 " */
1991 { "123 23 XYZ 123 XYZ", SM_NONE, "C11/4@0" }, 1986 { "123 23 XYZ 123 XYZ", SM_NONE, "C11/4@0" },
@@ -2018,7 +2013,7 @@ static const string_match_test match_tests[] =
2018 2013
2019 /* ssmatch test */ 2014 /* ssmatch test */
2020 { "ABCDE___ABCDE*** BCDE***", SM_NONE, "C8/5@0 C17/4@1" }, 2015 { "ABCDE___ABCDE*** BCDE***", SM_NONE, "C8/5@0 C17/4@1" },
2021 { "ABCDE___ABCDE*** BCDE***", SM_SSMATCH, "C8/5@0 C17/7@9" }, 2016 /*{ "ABCDE___ABCDE*** BCDE***", SM_SSMATCH, "C8/5@0 C17/7@9" }, forgotten */
2022}; 2017};
2023 2018
2024static int 2019static int
@@ -2043,11 +2038,8 @@ test_string_matching (xd3_stream *stream, int ignore)
2043 config.smatcher_soft.small_look = 4; 2038 config.smatcher_soft.small_look = 4;
2044 config.smatcher_soft.small_chain = 10; 2039 config.smatcher_soft.small_chain = 10;
2045 config.smatcher_soft.small_lchain = 10; 2040 config.smatcher_soft.small_lchain = 10;
2046 config.smatcher_soft.max_lazy = 10; 2041 config.smatcher_soft.max_lazy = (test->flags & SM_LAZY) ? 10 : 0;
2047 config.smatcher_soft.long_enough = 10; 2042 config.smatcher_soft.long_enough = 10;
2048 config.smatcher_soft.ssmatch = (test->flags & SM_SSMATCH) && 1;
2049 config.smatcher_soft.try_lazy = (test->flags & SM_LAZY) && 1;
2050 config.smatcher_soft.promote = (test->flags & SM_PROMOTE) && 1;
2051 2043
2052 if ((ret = xd3_config_stream (stream, & config))) { return ret; } 2044 if ((ret = xd3_config_stream (stream, & config))) { return ret; }
2053 if ((ret = xd3_encode_init (stream))) { return ret; } 2045 if ((ret = xd3_encode_init (stream))) { return ret; }
@@ -2122,11 +2114,8 @@ test_iopt_flush_instructions (xd3_stream *stream, int ignore)
2122 config.smatcher_soft.small_look = 4; 2114 config.smatcher_soft.small_look = 4;
2123 config.smatcher_soft.small_chain = 128; 2115 config.smatcher_soft.small_chain = 128;
2124 config.smatcher_soft.small_lchain = 16; 2116 config.smatcher_soft.small_lchain = 16;
2125 config.smatcher_soft.ssmatch = 0;
2126 config.smatcher_soft.try_lazy = 1;
2127 config.smatcher_soft.max_lazy = 8; 2117 config.smatcher_soft.max_lazy = 8;
2128 config.smatcher_soft.long_enough = 128; 2118 config.smatcher_soft.long_enough = 128;
2129 config.smatcher_soft.promote = 0;
2130 2119
2131 if ((ret = xd3_config_stream (stream, & config))) { return ret; } 2120 if ((ret = xd3_config_stream (stream, & config))) { return ret; }
2132 2121
diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c
index b6ac778..c0e7880 100644
--- a/xdelta3/xdelta3.c
+++ b/xdelta3/xdelta3.c
@@ -61,12 +61,7 @@
61 copies within the TARGET. Small matching, which is more expensive, 61 copies within the TARGET. Small matching, which is more expensive,
62 usually dominates the large STRING-MATCH costs in this code - the 62 usually dominates the large STRING-MATCH costs in this code - the
63 more exhaustive the search, the better the results. Either of the 63 more exhaustive the search, the better the results. Either of the
64 two string-matching mechanisms may be disabled. Currently, large 64 two string-matching mechanisms may be disabled.
65 checksums are only performed in the source file, if present, and
66 small checksums are performed only in the left-over target input.
67 However, small matches are possible in the source file too, with a
68 range of possibilities. [I've seen a paper on this subject, but
69 I lost it.]
70 65
71 2. INSTRUCTION SELECTION. The IOPT buffer here represents a queue 66 2. INSTRUCTION SELECTION. The IOPT buffer here represents a queue
72 used to store overlapping copy instructions. There are two possible 67 used to store overlapping copy instructions. There are two possible
@@ -486,11 +481,9 @@ IF_BUILD_DEFAULT(static const xd3_smatcher __smatcher_default;)
486#define SMALL_HASH_DEBUG2(s,inp) \ 481#define SMALL_HASH_DEBUG2(s,inp) \
487 XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \ 482 XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \
488 xd3_scksum ((inp), (s)->smatcher.small_look))) 483 xd3_scksum ((inp), (s)->smatcher.small_look)))
489#define SMALL_HASH_STATS(x) x
490#else 484#else
491#define SMALL_HASH_DEBUG1(s,inp) 485#define SMALL_HASH_DEBUG1(s,inp)
492#define SMALL_HASH_DEBUG2(s,inp) 486#define SMALL_HASH_DEBUG2(s,inp)
493#define SMALL_HASH_STATS(x)
494#endif /* XD3_DEBUG */ 487#endif /* XD3_DEBUG */
495 488
496/* Update the run-length state */ 489/* Update the run-length state */
@@ -1169,6 +1162,7 @@ static usize_t __alternate_code_table_compressed_size;
1169int xd3_compute_code_table_encoding (xd3_stream *in_stream, const xd3_dinst *code_table, 1162int xd3_compute_code_table_encoding (xd3_stream *in_stream, const xd3_dinst *code_table,
1170 uint8_t *comp_string, usize_t *comp_string_size) 1163 uint8_t *comp_string, usize_t *comp_string_size)
1171{ 1164{
1165 /* TODO: use xd3_encode_memory() */
1172 uint8_t dflt_string[CODE_TABLE_STRING_SIZE]; 1166 uint8_t dflt_string[CODE_TABLE_STRING_SIZE];
1173 uint8_t code_string[CODE_TABLE_STRING_SIZE]; 1167 uint8_t code_string[CODE_TABLE_STRING_SIZE];
1174 xd3_stream stream; 1168 xd3_stream stream;
@@ -1194,11 +1188,8 @@ int xd3_compute_code_table_encoding (xd3_stream *in_stream, const xd3_dinst *cod
1194 config.smatcher_soft.small_look = 4; 1188 config.smatcher_soft.small_look = 4;
1195 config.smatcher_soft.small_chain = CODE_TABLE_STRING_SIZE; 1189 config.smatcher_soft.small_chain = CODE_TABLE_STRING_SIZE;
1196 config.smatcher_soft.small_lchain = CODE_TABLE_STRING_SIZE; 1190 config.smatcher_soft.small_lchain = CODE_TABLE_STRING_SIZE;
1197 config.smatcher_soft.ssmatch = 1;
1198 config.smatcher_soft.try_lazy = 1;
1199 config.smatcher_soft.max_lazy = CODE_TABLE_STRING_SIZE; 1191 config.smatcher_soft.max_lazy = CODE_TABLE_STRING_SIZE;
1200 config.smatcher_soft.long_enough = CODE_TABLE_STRING_SIZE; 1192 config.smatcher_soft.long_enough = CODE_TABLE_STRING_SIZE;
1201 config.smatcher_soft.promote = 1;
1202 1193
1203 if ((ret = xd3_config_stream (& stream, & config))) { goto fail; } 1194 if ((ret = xd3_config_stream (& stream, & config))) { goto fail; }
1204 1195
@@ -2503,11 +2494,7 @@ xd3_config_stream(xd3_stream *stream,
2503 smatcher->name = __smatcher_soft.name; 2494 smatcher->name = __smatcher_soft.name;
2504 if (smatcher->large_look < MIN_MATCH || 2495 if (smatcher->large_look < MIN_MATCH ||
2505 smatcher->large_step < 1 || 2496 smatcher->large_step < 1 ||
2506 smatcher->small_look < MIN_MATCH || 2497 smatcher->small_look < MIN_MATCH)
2507 smatcher->small_chain < 1 ||
2508 smatcher->large_look < smatcher->small_look ||
2509 smatcher->small_chain < smatcher->small_lchain ||
2510 (smatcher->small_lchain == 0 && smatcher->try_lazy))
2511 { 2498 {
2512 stream->msg = "invalid soft string-match config"; 2499 stream->msg = "invalid soft string-match config";
2513 return XD3_INVALID; 2500 return XD3_INVALID;
@@ -3581,22 +3568,6 @@ xd3_encode_init (xd3_stream *stream)
3581 return ENOMEM; 3568 return ENOMEM;
3582} 3569}
3583 3570
3584#if XD3_DEBUG
3585static int
3586xd3_check_sprevlist (xd3_stream *stream)
3587{
3588 int i;
3589 for (i = 0; i < stream->sprevsz; i += 1)
3590 {
3591 xd3_slist *l = & stream->small_prev[i];
3592
3593 XD3_ASSERT (l->prev->next == l);
3594 XD3_ASSERT (l->next->prev == l);
3595 }
3596 return 1;
3597}
3598#endif
3599
3600/* Called after the ENC_POSTOUT state, this puts the output buffers back into separate 3571/* Called after the ENC_POSTOUT state, this puts the output buffers back into separate
3601 * lists and re-initializes some variables. (The output lists were spliced together 3572 * lists and re-initializes some variables. (The output lists were spliced together
3602 * during the ENC_FLUSH state.) */ 3573 * during the ENC_FLUSH state.) */
@@ -3606,8 +3577,6 @@ xd3_encode_reset (xd3_stream *stream)
3606 int i; 3577 int i;
3607 xd3_output *olist; 3578 xd3_output *olist;
3608 3579
3609 XD3_ASSERT (stream->small_prev == NULL || xd3_check_sprevlist (stream));
3610
3611 IF_DEBUG (stream->n_emit = 0); 3580 IF_DEBUG (stream->n_emit = 0);
3612 stream->avail_in = 0; 3581 stream->avail_in = 0;
3613 stream->small_reset = 1; 3582 stream->small_reset = 1;
@@ -3930,9 +3899,6 @@ xd3_process_memory (int is_encode,
3930 return XD3_INTERNAL; 3899 return XD3_INTERNAL;
3931 } 3900 }
3932 3901
3933 /* TODO: for small inputs, the xd3_stream could be configured to allocate MUCH less
3934 * memory. Most of the allocations sizes are defaulted (see xdelta3.h), and assume
3935 * large inputs. */
3936 memset (& stream, 0, sizeof (stream)); 3902 memset (& stream, 0, sizeof (stream));
3937 memset (& config, 0, sizeof (config)); 3903 memset (& config, 0, sizeof (config));
3938 3904
@@ -3940,9 +3906,10 @@ xd3_process_memory (int is_encode,
3940 3906
3941 if (is_encode) 3907 if (is_encode)
3942 { 3908 {
3943 /* TODO: for large inputs, limit window size, need to select a default ... */
3944 config.srcwin_maxsz = source_size; 3909 config.srcwin_maxsz = source_size;
3945 config.winsize = min(input_size, (usize_t) (1<<20)); 3910 config.winsize = min(input_size, (usize_t) (1<<20));
3911 config.sprevsz = min(input_size, XD3_DEFAULT_SPREVSZ);
3912 config.iopt_size = min(input_size / 32, XD3_DEFAULT_IOPT_SIZE);
3946 } 3913 }
3947 3914
3948 if ((ret = xd3_config_stream (&stream, &config)) != 0) 3915 if ((ret = xd3_config_stream (&stream, &config)) != 0)
@@ -4056,6 +4023,14 @@ xd3_string_match_init (xd3_stream *stream)
4056 const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); 4023 const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS);
4057 const int DO_LARGE = (stream->src != NULL); 4024 const int DO_LARGE = (stream->src != NULL);
4058 4025
4026 if (DO_LARGE && stream->large_table == NULL)
4027 {
4028 if ((stream->large_table = xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL)
4029 {
4030 return ENOMEM;
4031 }
4032 }
4033
4059 if (DO_SMALL) 4034 if (DO_SMALL)
4060 { 4035 {
4061 /* Subsequent calls can return immediately after checking reset. */ 4036 /* Subsequent calls can return immediately after checking reset. */
@@ -4063,7 +4038,7 @@ xd3_string_match_init (xd3_stream *stream)
4063 { 4038 {
4064 /* The target hash table is reinitialized once per window. */ 4039 /* The target hash table is reinitialized once per window. */
4065 /* TODO: This would not have to be reinitialized if absolute offsets 4040 /* TODO: This would not have to be reinitialized if absolute offsets
4066 * were being stored. */ 4041 * were being stored, as we would do for VCD_TARGET encoding. */
4067 if (stream->small_reset) 4042 if (stream->small_reset)
4068 { 4043 {
4069 stream->small_reset = 0; 4044 stream->small_reset = 0;
@@ -4081,31 +4056,15 @@ xd3_string_match_init (xd3_stream *stream)
4081 } 4056 }
4082 4057
4083 /* If there is a previous table needed. */ 4058 /* If there is a previous table needed. */
4084 if (stream->smatcher.small_chain > 1) 4059 if (stream->smatcher.small_lchain > 1 ||
4060 stream->smatcher.small_chain > 1)
4085 { 4061 {
4086 xd3_slist *p, *m;
4087
4088 if ((stream->small_prev = xd3_alloc (stream, 4062 if ((stream->small_prev = xd3_alloc (stream,
4089 stream->sprevsz, 4063 stream->sprevsz,
4090 sizeof (xd3_slist))) == NULL) 4064 sizeof (xd3_slist))) == NULL)
4091 { 4065 {
4092 return ENOMEM; 4066 return ENOMEM;
4093 } 4067 }
4094
4095 /* Initialize circular lists. */
4096 for (p = stream->small_prev, m = stream->small_prev + stream->sprevsz; p != m; p += 1)
4097 {
4098 p->next = p;
4099 p->prev = p;
4100 }
4101 }
4102 }
4103
4104 if (DO_LARGE && stream->large_table == NULL)
4105 {
4106 if ((stream->large_table = xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL)
4107 {
4108 return ENOMEM;
4109 } 4068 }
4110 } 4069 }
4111 4070
@@ -4648,67 +4607,22 @@ xd3_source_extend_match (xd3_stream *stream)
4648 return 0; 4607 return 0;
4649} 4608}
4650 4609
4651/* Update the small hash. Values in the small_table are offset by HASH_CKOFFSET (1) to 4610/* Update the small hash. Values in the small_table are offset by
4652 * distinguish empty buckets the zero offset. This maintains the previous linked lists. 4611 * HASH_CKOFFSET (1) to distinguish empty buckets from real offsets. */
4653 * If owrite is true then this entry is replacing the existing record, otherwise it is
4654 * merely being called to promote the existing record in the hash bucket (for the same
4655 * address cache). */
4656static void 4612static void
4657xd3_scksum_insert (xd3_stream *stream, usize_t inx, usize_t scksum, usize_t pos) 4613xd3_scksum_insert (xd3_stream *stream,
4614 usize_t inx,
4615 usize_t scksum,
4616 usize_t pos)
4658{ 4617{
4659 /* If we are maintaining previous links. */ 4618 /* If we are maintaining previous duplicates. */
4660 if (stream->small_prev) 4619 if (stream->small_prev)
4661 { 4620 {
4662 usize_t last_pos = stream->small_table[inx]; 4621 usize_t last_pos = stream->small_table[inx];
4663 xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask]; 4622 xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask];
4664 xd3_slist *prev = pos_list->prev;
4665 xd3_slist *next = pos_list->next;
4666
4667 /* Assert link structure, update pos, cksum */
4668 XD3_ASSERT (prev->next == pos_list);
4669 XD3_ASSERT (next->prev == pos_list);
4670 pos_list->pos = pos;
4671 pos_list->scksum = scksum;
4672
4673 /* Subtract HASH_CKOFFSET and test for a previous offset. */
4674 if (last_pos-- != 0)
4675 {
4676 xd3_slist *last_list = & stream->small_prev[last_pos & stream->sprevmask];
4677 xd3_slist *last_next;
4678
4679 /* Verify existing entry. */
4680 SMALL_HASH_DEBUG1 (stream, stream->next_in + last_pos);
4681 SMALL_HASH_DEBUG2 (stream, stream->next_in + pos);
4682
4683 /* The two positions (mod sprevsz) may have the same checksum, making the old
4684 * and new entries the same. That is why the removal step is not before the
4685 * above if-stmt. */
4686 if (last_list != pos_list)
4687 {
4688 /* Remove current position from any list it may belong to. */
4689 next->prev = prev;
4690 prev->next = next;
4691
4692 /* The ordinary case, add current position to last_list. */
4693 last_next = last_list->next;
4694
4695 pos_list->next = last_next;
4696 pos_list->prev = last_list;
4697
4698 last_next->prev = pos_list;
4699 last_list->next = pos_list;
4700 }
4701 }
4702 else
4703 {
4704 /* Remove current position from any list it may belong to. */
4705 next->prev = prev;
4706 prev->next = next;
4707 4623
4708 /* Re-initialize current position. */ 4624 /* Note last_pos is offset by HASH_CKOFFSET. */
4709 pos_list->next = pos_list; 4625 pos_list->last_pos = last_pos;
4710 pos_list->prev = pos_list;
4711 }
4712 } 4626 }
4713 4627
4714 /* Enter the new position into the hash bucket. */ 4628 /* Enter the new position into the hash bucket. */
@@ -4736,62 +4650,42 @@ xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0,
4736} 4650}
4737#endif /* XD3_DEBUG */ 4651#endif /* XD3_DEBUG */
4738 4652
4739/* When the hash table indicates a possible small string match, it calls this routine to 4653/* When the hash table indicates a possible small string match, it
4740 * find the best match. The first matching position is taken from the small_table, 4654 * calls this routine to find the best match. The first matching
4741 * HASH_CKOFFSET is subtracted to get the actual position. After checking that match, if 4655 * position is taken from the small_table, HASH_CKOFFSET is subtracted
4742 * previous linked lists are in use (because stream->smatcher.small_chain > 1), previous matches 4656 * to get the actual position. After checking that match, if previous
4743 * are tested searching for the longest match. If (stream->min_match > MIN_MATCH) then a lazy 4657 * linked lists are in use (because stream->smatcher.small_chain > 1),
4744 * match is in effect. 4658 * previous matches are tested searching for the longest match. If
4745 * 4659 * (stream->min_match > MIN_MATCH) then a lazy match is in effect.
4746 * OPT: This is by far the most expensive function. The slowdown is in part due to the data
4747 * structure it maintains, which is relatively more expensive than it needs to be (in
4748 * comparison to zlib) in order to support the PROMOTE decision, which is to prefer the
4749 * most recently used matching address of a certain string to aid the VCDIFF same cache.
4750 *
4751 * --- TODO: declare the PROMOTE experiment a failure -- remove the extra LIST --
4752 * 4660 *
4753 * Weak reasoning? it's time to modularize this routine...? Let's say the PROMOTE 4661 * TODO: This is the second most-expensive function, after
4754 * feature supported by this slow data structure contributes around 2% improvement in 4662 * xd3_srcwin_move_point().
4755 * compressed size, is there a better code table that doesn't use the SAME address cache,
4756 * for which the speedup-discount could produce a better encoding?
4757 */ 4663 */
4758static /*inline*/ usize_t 4664static usize_t
4759xd3_smatch (xd3_stream *stream, usize_t base, usize_t scksum, usize_t *match_offset) 4665xd3_smatch (xd3_stream *stream,
4666 usize_t base,
4667 usize_t scksum,
4668 usize_t *match_offset)
4760{ 4669{
4761 usize_t cmp_len; 4670 usize_t cmp_len;
4762 usize_t match_length = 0; 4671 usize_t match_length = 0;
4763 usize_t chain = (stream->min_match == MIN_MATCH ? 4672 usize_t chain = (stream->min_match == MIN_MATCH ?
4764 stream->smatcher.small_chain : 4673 stream->smatcher.small_chain :
4765 stream->smatcher.small_lchain); 4674 stream->smatcher.small_lchain);
4766 xd3_slist *current = NULL;
4767 xd3_slist *first = NULL;
4768 const uint8_t *inp_max = stream->next_in + stream->avail_in; 4675 const uint8_t *inp_max = stream->next_in + stream->avail_in;
4769 const uint8_t *inp; 4676 const uint8_t *inp;
4770 const uint8_t *ref; 4677 const uint8_t *ref;
4771 4678
4772 SMALL_HASH_STATS (usize_t search_cnt = 0);
4773 SMALL_HASH_DEBUG1 (stream, stream->next_in + stream->input_position); 4679 SMALL_HASH_DEBUG1 (stream, stream->next_in + stream->input_position);
4774 SMALL_HASH_STATS (stream->sh_searches += 1);
4775 4680
4776 XD3_ASSERT (stream->min_match + stream->input_position <= stream->avail_in); 4681 XD3_ASSERT (stream->min_match + stream->input_position <= stream->avail_in);
4777 4682
4778 base -= HASH_CKOFFSET; 4683 base -= HASH_CKOFFSET;
4779 4684
4780 /* Initialize the chain. */
4781 if (stream->small_prev != NULL)
4782 {
4783 first = current = & stream->small_prev[base & stream->sprevmask];
4784
4785 /* Check if current->pos is correct. */
4786 if (current->pos != base) { goto done; }
4787 }
4788
4789 again: 4685 again:
4790 4686
4791 SMALL_HASH_STATS (search_cnt += 1); 4687 /* For small matches, we can always go to the end-of-input because
4792 4688 * the matching position must be less than the input position. */
4793 /* For small matches, we can always go to the end-of-input because the matching position
4794 * must be less than the input position. */
4795 XD3_ASSERT (base < stream->input_position); 4689 XD3_ASSERT (base < stream->input_position);
4796 4690
4797 ref = stream->next_in + base; 4691 ref = stream->next_in + base;
@@ -4809,7 +4703,8 @@ xd3_smatch (xd3_stream *stream, usize_t base, usize_t scksum, usize_t *match_off
4809 cmp_len = inp - (stream->next_in + stream->input_position); 4703 cmp_len = inp - (stream->next_in + stream->input_position);
4810 4704
4811 /* Verify correctness */ 4705 /* Verify correctness */
4812 XD3_ASSERT (xd3_check_smatch (stream->next_in + base, stream->next_in + stream->input_position, 4706 XD3_ASSERT (xd3_check_smatch (stream->next_in + base,
4707 stream->next_in + stream->input_position,
4813 inp_max, cmp_len)); 4708 inp_max, cmp_len));
4814 4709
4815 /* Update longest match */ 4710 /* Update longest match */
@@ -4818,38 +4713,39 @@ xd3_smatch (xd3_stream *stream, usize_t base, usize_t scksum, usize_t *match_off
4818 ( match_length) = cmp_len; 4713 ( match_length) = cmp_len;
4819 (*match_offset) = base; 4714 (*match_offset) = base;
4820 4715
4821 /* Stop if we match the entire input or discover a long_enough match. */ 4716 /* Stop if we match the entire input or have a long_enough match. */
4822 if (inp == inp_max || cmp_len >= stream->smatcher.long_enough) 4717 if (inp == inp_max || cmp_len >= stream->smatcher.long_enough)
4823 { 4718 {
4824 goto done; 4719 goto done;
4825 } 4720 }
4826 } 4721 }
4827 4722
4828 /* If we have not reached the chain limit, see if there is another previous position. */ 4723 /* If we have not reached the chain limit, see if there is another
4829 if (current) 4724 previous position. */
4725 while (--chain != 0)
4830 { 4726 {
4831 while (--chain != 0) 4727 /* Calculate the previous offset. */
4728 usize_t last_pos = stream->small_prev[base & stream->sprevmask].last_pos;
4729
4730 if (last_pos == 0)
4832 { 4731 {
4833 /* Calculate the next base offset. */ 4732 break;
4834 current = current->prev;
4835 base = current->pos;
4836
4837 /* Stop if the next position was the first. Stop if the position is wrong
4838 * (because the lists are not re-initialized across input windows). Skip if the
4839 * scksum is wrong. */
4840 if (current != first && base < stream->input_position)
4841 {
4842 if (current->scksum != scksum)
4843 {
4844 continue;
4845 }
4846 goto again;
4847 }
4848 } 4733 }
4734
4735 last_pos -= HASH_CKOFFSET;
4736 base = last_pos;
4737
4738 /* Stop if the position is wrong (because the lists are not
4739 * re-initialized across input windows). */
4740 if (base < stream->input_position)
4741 {
4742 goto again;
4743 }
4744
4745 break;
4849 } 4746 }
4850 4747
4851 done: 4748 done:
4852 SMALL_HASH_STATS (stream->sh_compares += search_cnt);
4853 return match_length; 4749 return match_length;
4854} 4750}
4855 4751
@@ -4903,8 +4799,7 @@ xd3_verify_run_state (xd3_stream *stream,
4903 Templates 4799 Templates
4904 ******************************************************************************************/ 4800 ******************************************************************************************/
4905 4801
4906/* Template macros: less than 30 lines work. the template parameters appear as, e.g., 4802/* Template macros */
4907 * SLOOK, MIN_MATCH, TRYLAZY, etc. */
4908#define XD3_TEMPLATE(x) XD3_TEMPLATE2(x,TEMPLATE) 4803#define XD3_TEMPLATE(x) XD3_TEMPLATE2(x,TEMPLATE)
4909#define XD3_TEMPLATE2(x,n) XD3_TEMPLATE3(x,n) 4804#define XD3_TEMPLATE2(x,n) XD3_TEMPLATE3(x,n)
4910#define XD3_TEMPLATE3(x,n) x ## n 4805#define XD3_TEMPLATE3(x,n) x ## n
@@ -4918,10 +4813,9 @@ static const xd3_smatcher XD3_TEMPLATE(__smatcher_) =
4918 XD3_STRINGIFY(TEMPLATE), 4813 XD3_STRINGIFY(TEMPLATE),
4919 XD3_TEMPLATE(xd3_string_match_), 4814 XD3_TEMPLATE(xd3_string_match_),
4920#if SOFTCFG == 1 4815#if SOFTCFG == 1
4921 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 4816 0, 0, 0, 0, 0, 0, 0
4922#else 4817#else
4923 LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, SSMATCH, TRYLAZY, MAXLAZY, 4818 LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, MAXLAZY, LONGENOUGH
4924 LONGENOUGH, PROMOTE
4925#endif 4819#endif
4926}; 4820};
4927 4821
@@ -4999,13 +4893,13 @@ XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream)
4999 * checks that LEN is shorter than MAXLAZY and that there is enough leftover data to 4893 * checks that LEN is shorter than MAXLAZY and that there is enough leftover data to
5000 * consider lazy matching. "Enough" is set to 2 since the next match will start at the 4894 * consider lazy matching. "Enough" is set to 2 since the next match will start at the
5001 * next offset, it must match two extra characters. */ 4895 * next offset, it must match two extra characters. */
5002#define TRYLAZYLEN(LEN,POS,MAX) ((TRYLAZY && (LEN) < MAXLAZY) && ((POS) + (LEN) <= (MAX) - 2)) 4896#define TRYLAZYLEN(LEN,POS,MAX) ((MAXLAZY) > 0 && (LEN) < (MAXLAZY) && (POS) + (LEN) <= (MAX) - 2)
5003 4897
5004 /* HANDLELAZY: This statement is called each time an instruciton is emitted (three 4898 /* HANDLELAZY: This statement is called each time an instruciton is emitted (three
5005 * cases). If the instruction is large enough, the loop is restarted, otherwise lazy 4899 * cases). If the instruction is large enough, the loop is restarted, otherwise lazy
5006 * matching may ensue. */ 4900 * matching may ensue. */
5007#define HANDLELAZY(mlen) \ 4901#define HANDLELAZY(mlen) \
5008 if (TRYLAZYLEN ((mlen), stream->input_position, stream->avail_in)) \ 4902 if (TRYLAZYLEN ((mlen), (stream->input_position), (stream->avail_in))) \
5009 { stream->min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \ 4903 { stream->min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \
5010 else \ 4904 else \
5011 { stream->input_position += (mlen); goto restartloop; } 4905 { stream->input_position += (mlen); goto restartloop; }
@@ -5104,13 +4998,6 @@ XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream)
5104 /* Insert a hash for this string. */ 4998 /* Insert a hash for this string. */
5105 xd3_scksum_insert (stream, sinx, scksum, stream->input_position); 4999 xd3_scksum_insert (stream, sinx, scksum, stream->input_position);
5106 5000
5107 /* Promote the previous match address to head of the hash bucket. This is
5108 * intended to improve the same cache hit rate. */
5109 if (match_length != 0 && PROMOTE)
5110 {
5111 xd3_scksum_insert (stream, sinx, scksum, match_offset);
5112 }
5113
5114 /* Maybe output a COPY instruction */ 5001 /* Maybe output a COPY instruction */
5115 if (unlikely (match_length >= stream->min_match)) 5002 if (unlikely (match_length >= stream->min_match))
5116 { 5003 {
@@ -5132,67 +5019,7 @@ XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream)
5132 /* address */ match_offset, 5019 /* address */ match_offset,
5133 /* is_source */ 0))) { return ret; } 5020 /* is_source */ 0))) { return ret; }
5134 5021
5135 /* SSMATCH option: search small matches: continue the incremental checksum 5022 /* Copy instruction. */
5136 * through the matched material. Only if not lazy matching. */
5137 if (SSMATCH && !TRYLAZYLEN (match_length, stream->input_position, stream->avail_in))
5138 {
5139 usize_t avail = stream->avail_in - SLOOK - stream->input_position;
5140 usize_t ml_m1 = match_length - 1;
5141 usize_t right;
5142 int aincr;
5143
5144 IF_DEBUG (usize_t nposi = stream->input_position + match_length);
5145
5146 /* Avail is the last offset we can compute an incremental cksum. If the
5147 * match length exceeds that offset then we are finished performing
5148 * incremental updates after this step. */
5149 if (ml_m1 < avail)
5150 {
5151 right = ml_m1;
5152 aincr = 1;
5153 }
5154 else
5155 {
5156 right = avail;
5157 aincr = 0;
5158 }
5159
5160 /* Compute incremental checksums within the match. */
5161 while (right > 0)
5162 {
5163 SMALL_CKSUM_UPDATE (scksum, inp, SLOOK);
5164 if (DO_LARGE && (stream->input_position + LLOOK < stream->avail_in)) {
5165 LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK);
5166 }
5167
5168 inp += 1;
5169 stream->input_position += 1;
5170 right -= 1;
5171 sinx = xd3_checksum_hash (& stream->small_hash, scksum);
5172
5173 IF_DEBUG (xd3_verify_small_state (stream, inp, scksum));
5174
5175 xd3_scksum_insert (stream, sinx, scksum, stream->input_position);
5176 }
5177
5178 if (aincr)
5179 {
5180 /* Keep searching... */
5181 if (DO_RUN) { run_l = xd3_comprun (inp+1, SLOOK-1, & run_c); }
5182 XD3_ASSERT (nposi == stream->input_position + 1);
5183 XD3_ASSERT (stream->input_position + SLOOK < stream->avail_in);
5184 stream->min_match = MIN_MATCH;
5185 goto updatesure;
5186 }
5187 else
5188 {
5189 /* Not enough input for another match. */
5190 XD3_ASSERT (stream->input_position + SLOOK >= stream->avail_in);
5191 goto loopnomore;
5192 }
5193 }
5194
5195 /* Else case: copy instruction, but no SSMATCH. */
5196 HANDLELAZY (match_length); 5023 HANDLELAZY (match_length);
5197 } 5024 }
5198 } 5025 }
@@ -5213,8 +5040,6 @@ XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream)
5213 goto loopnomore; 5040 goto loopnomore;
5214 } 5041 }
5215 5042
5216 updatesure:
5217
5218 /* Compute next RUN, CKSUM */ 5043 /* Compute next RUN, CKSUM */
5219 if (DO_RUN) { NEXTRUN (inp[SLOOK]); } 5044 if (DO_RUN) { NEXTRUN (inp[SLOOK]); }
5220 if (DO_SMALL) { SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); } 5045 if (DO_SMALL) { SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); }
diff --git a/xdelta3/xdelta3.h b/xdelta3/xdelta3.h
index 6204d3f..1f20c10 100644
--- a/xdelta3/xdelta3.h
+++ b/xdelta3/xdelta3.h
@@ -71,7 +71,7 @@
71 * larger this buffer, the longer a forced xd3_srcwin_setup() decision is held off. 71 * larger this buffer, the longer a forced xd3_srcwin_setup() decision is held off.
72 * Setting this value to 0 causes an unlimited buffer to be used. */ 72 * Setting this value to 0 causes an unlimited buffer to be used. */
73#ifndef XD3_DEFAULT_IOPT_SIZE 73#ifndef XD3_DEFAULT_IOPT_SIZE
74#define XD3_DEFAULT_IOPT_SIZE (1U<<12) 74#define XD3_DEFAULT_IOPT_SIZE (1U<<15)
75#endif 75#endif
76 76
77/* The maximum distance backward to search for small matches */ 77/* The maximum distance backward to search for small matches */
@@ -529,11 +529,8 @@ struct _xd3_smatcher
529 uint small_look; 529 uint small_look;
530 uint small_chain; 530 uint small_chain;
531 uint small_lchain; 531 uint small_lchain;
532 uint ssmatch;
533 uint try_lazy;
534 uint max_lazy; 532 uint max_lazy;
535 uint long_enough; 533 uint long_enough;
536 uint promote;
537}; 534};
538 535
539/* hash table size & power-of-two hash function. */ 536/* hash table size & power-of-two hash function. */
@@ -544,13 +541,10 @@ struct _xd3_hash_cfg
544 usize_t mask; 541 usize_t mask;
545}; 542};
546 543
547/* a hash-chain link in the small match table, embedded with position and checksum */ 544/* the sprev list */
548struct _xd3_slist 545struct _xd3_slist
549{ 546{
550 xd3_slist *next; 547 usize_t last_pos;
551 xd3_slist *prev;
552 usize_t pos;
553 usize_t scksum;
554}; 548};
555 549
556/* a decoder section (data, inst, or addr). there is an optimization to avoid copying 550/* a decoder section (data, inst, or addr). there is an optimization to avoid copying
@@ -815,9 +809,6 @@ struct _xd3_stream
815 usize_t i_slots_used; 809 usize_t i_slots_used;
816 810
817#if XD3_DEBUG 811#if XD3_DEBUG
818 usize_t sh_searches;
819 usize_t sh_compares;
820
821 usize_t large_ckcnt; 812 usize_t large_ckcnt;
822 813
823 /* memory usage */ 814 /* memory usage */
diff --git a/xdelta3/xdelta3.prj b/xdelta3/xdelta3.prj
index 10c9f77..3fa7b32 100644
--- a/xdelta3/xdelta3.prj
+++ b/xdelta3/xdelta3.prj
@@ -1,11 +1,11 @@
1;; -*- Prcs -*- 1;; -*- Prcs -*-
2(Created-By-Prcs-Version 1 3 4) 2(Created-By-Prcs-Version 1 3 4)
3(Project-Description "") 3(Project-Description "")
4(Project-Version xdelta3 0 19) 4(Project-Version xdelta3 0 27)
5(Parent-Version xdelta3 0 18) 5(Parent-Version xdelta3 0 26)
6(Version-Log "pre-3.0p") 6(Version-Log "raise default iopt_size, happy")
7(New-Version-Log "") 7(New-Version-Log "")
8(Checkin-Time "Sat, 17 Feb 2007 04:08:43 -0800") 8(Checkin-Time "Sun, 18 Feb 2007 03:09:06 -0800")
9(Checkin-Login jmacd) 9(Checkin-Login jmacd)
10(Populate-Ignore ("\\.svn")) 10(Populate-Ignore ("\\.svn"))
11(Project-Keywords 11(Project-Keywords
@@ -14,20 +14,20 @@
14(Files 14(Files
15 (COPYING (xdelta3/b/29_COPYING 1.1 744)) 15 (COPYING (xdelta3/b/29_COPYING 1.1 744))
16 16
17 (xdelta3-cfgs.h (xdelta3/9_xdelta3-cf 1.5 744)) 17 (xdelta3-cfgs.h (xdelta3/9_xdelta3-cf 1.8 744))
18 (xdelta3-decode.h (xdelta3/b/30_xdelta3-de 1.5 744)) 18 (xdelta3-decode.h (xdelta3/b/30_xdelta3-de 1.5 744))
19 (xdelta3-djw.h (xdelta3/8_xdelta3-dj 1.6 744)) 19 (xdelta3-djw.h (xdelta3/8_xdelta3-dj 1.6 744))
20 (xdelta3-fgk.h (xdelta3/7_xdelta3-fg 1.3 744)) 20 (xdelta3-fgk.h (xdelta3/7_xdelta3-fg 1.3 744))
21 (xdelta3-list.h (xdelta3/6_xdelta3-li 1.3 744)) 21 (xdelta3-list.h (xdelta3/6_xdelta3-li 1.3 744))
22 (xdelta3-main.h (xdelta3/5_xdelta3-ma 1.12 744)) 22 (xdelta3-main.h (xdelta3/5_xdelta3-ma 1.17 744))
23 (xdelta3-python.h (xdelta3/4_xdelta3-py 1.5 744)) 23 (xdelta3-python.h (xdelta3/4_xdelta3-py 1.5 744))
24 (xdelta3-regtest.py (xdelta3/10_xdelta3-re 1.11 744)) 24 (xdelta3-regtest.py (xdelta3/10_xdelta3-re 1.17 744))
25 (xdelta3-second.h (xdelta3/3_xdelta3-se 1.4 744)) 25 (xdelta3-second.h (xdelta3/3_xdelta3-se 1.4 744))
26 (xdelta3-test.h (xdelta3/2_xdelta3-te 1.10 744)) 26 (xdelta3-test.h (xdelta3/2_xdelta3-te 1.13 744))
27 (xdelta3.c (xdelta3/16_xdelta3.c 1.11 744)) 27 (xdelta3.c (xdelta3/16_xdelta3.c 1.16 744))
28 (xdelta3.h (xdelta3/1_xdelta3.h 1.9 744)) 28 (xdelta3.h (xdelta3/1_xdelta3.h 1.14 744))
29 29
30 (Makefile (xdelta3/0_Makefile 1.10 744)) 30 (Makefile (xdelta3/0_Makefile 1.11 744))
31 (setup.py (xdelta3/11_setup.py 1.5 744)) 31 (setup.py (xdelta3/11_setup.py 1.5 744))
32 32
33 (draft-korn-vcdiff.txt (xdelta3/b/22_draft-korn 1.1 744)) 33 (draft-korn-vcdiff.txt (xdelta3/b/22_draft-korn 1.1 744))
@@ -45,7 +45,7 @@
45 (xdelta3-test.py (xdelta3/b/28_xdelta3-te 1.3 744)) 45 (xdelta3-test.py (xdelta3/b/28_xdelta3-te 1.3 744))
46 46
47 (examples/Makefile (xdelta3/b/34_Makefile 1.1 744)) 47 (examples/Makefile (xdelta3/b/34_Makefile 1.1 744))
48 (examples/small_page_test.c (xdelta3/b/35_small_page 1.1 744)) 48 (examples/small_page_test.c (xdelta3/b/35_small_page 1.2 744))
49) 49)
50(Merge-Parents) 50(Merge-Parents)
51(New-Merge-Parents) 51(New-Merge-Parents)