summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjosh.macdonald <jmacd@users.noreply.github.com>2007-03-06 08:34:52 +0000
committerjosh.macdonald <jmacd@users.noreply.github.com>2007-03-06 08:34:52 +0000
commitca80197380e9e5c2add57329d0dbf79893034b6b (patch)
tree1ceb324e9f792b9736fec1bf3f3e59ecade1e764
parent350d4f60d226eb8b370e7c0b8b1157a83baa8570 (diff)
regtest is not quite summarizing correctly
-rw-r--r--xdelta3/Makefile12
-rw-r--r--xdelta3/plot.sh12
-rwxr-xr-xxdelta3/xdelta3-regtest.py241
3 files changed, 183 insertions, 82 deletions
diff --git a/xdelta3/Makefile b/xdelta3/Makefile
index 94bb698..12e680c 100644
--- a/xdelta3/Makefile
+++ b/xdelta3/Makefile
@@ -35,11 +35,11 @@ TARGETS = xdelta3-debug \
35 35
36PYTHON = python 36PYTHON = python
37 37
38#SWIGTGT = xdelta3module.so 38SWIGTGT = xdelta3module.so
39SWIGTGT = xdelta3module.dll 39#SWIGTGT = xdelta3module.dll
40 40
41#PYTGT = build/lib.linux-i686-2.4/xdelta3main.so 41PYTGT = build/lib.linux-i686-2.4/xdelta3main.so
42PYTGT = build/lib.cygwin-1.5.24-i686-2.4/xdelta3main.dll 42#PYTGT = build/lib.cygwin-1.5.24-i686-2.4/xdelta3main.dll
43 43
44EXTRA = Makefile COPYING linkxd3lib.c badcopy.c xdelta3.swig \ 44EXTRA = Makefile COPYING linkxd3lib.c badcopy.c xdelta3.swig \
45 draft-korn-vcdiff.txt xdelta3.vcproj badcopy.vcproj \ 45 draft-korn-vcdiff.txt xdelta3.vcproj badcopy.vcproj \
@@ -60,7 +60,9 @@ SWIG_FLAGS = -DXD3_DEBUG=0 \
60REL=0p 60REL=0p
61RELDIR = xdelta3$(REL) 61RELDIR = xdelta3$(REL)
62 62
63all: xdelta3-debug xdelta3 $(PYTGT) $(SWIGTGT) 63all: xdelta3-debug xdelta3
64
65all-py: all $(PYTGT) $(SWIGTGT)
64 66
65all-targets: $(TARGETS) 67all-targets: $(TARGETS)
66 68
diff --git a/xdelta3/plot.sh b/xdelta3/plot.sh
index d5f8dab..8370ae7 100644
--- a/xdelta3/plot.sh
+++ b/xdelta3/plot.sh
@@ -2,11 +2,15 @@
2 2
3G=/usr/bin/gnuplot 3G=/usr/bin/gnuplot
4 4
5D=./output_dir
6
5I=$1 7I=$1
6O=$2 8O=$D/$2
7 9
8$G > $O <<EOF 10$G > $O <<EOF
9set terminal jpeg 11
12#set terminal jpeg
13set terminal png
10 14
11f(x) = 1331000 + 30000 * (1 / (x - 2.45)) 15f(x) = 1331000 + 30000 * (1 / (x - 2.45))
12 16
@@ -14,6 +18,8 @@ f(x) = 1331000 + 30000 * (1 / (x - 2.45))
14# plot sin(x), cos(x) 18# plot sin(x), cos(x)
15# , f(x) 19# , f(x)
16 20
17plot "$I" using 1:2, f(x) 21plot "$I" using 1:2
18 22
19EOF 23EOF
24
25mv "$I" "$D"
diff --git a/xdelta3/xdelta3-regtest.py b/xdelta3/xdelta3-regtest.py
index a1704e7..0df2065 100755
--- a/xdelta3/xdelta3-regtest.py
+++ b/xdelta3/xdelta3-regtest.py
@@ -17,18 +17,17 @@
17# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 18
19# TODO: test 1.5 vs. greedy 19# TODO: test 1.5 vs. greedy
20# TODO: generate a graph
21 20
22import os, sys, math, re, time, types, array, random 21import os, sys, math, re, time, types, array, random
23import xdelta3main 22import xdelta3main
24import xdelta3 23import xdelta3
25 24
26#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS' 25#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS'
27#RCSDIR = '/tmp/PRCS_read_copy' 26RCSDIR = '/tmp/PRCS_read_copy/prcs'
28#SAMPLEDIR = "/tmp/WESNOTH_tmp/diff" 27SAMPLEDIR = "/tmp/WESNOTH_tmp/diff"
29 28
30RCSDIR = 'G:/jmacd/PRCS/prcs/b' 29#RCSDIR = 'G:/jmacd/PRCS/prcs/b'
31SAMPLEDIR = "C:/sample_data/Wesnoth/tar" 30#SAMPLEDIR = "C:/sample_data/Wesnoth/tar"
32 31
33# 32#
34MIN_SIZE = 0 33MIN_SIZE = 0
@@ -45,8 +44,8 @@ SKIP_DECODE = 1
45MIN_STDDEV_PCT = 1.5 44MIN_STDDEV_PCT = 1.5
46 45
47# How many results per round 46# How many results per round
48MAX_RESULTS = 10 47MAX_RESULTS = 40
49TEST_ROUNDS = 50 48TEST_ROUNDS = 500
50KEEP_P = (0.5) 49KEEP_P = (0.5)
51 50
52# For RCS testing, what percent to select 51# For RCS testing, what percent to select
@@ -155,7 +154,7 @@ RE_DATE = re.compile('date: ([^;]+);.*')
155RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)') 154RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)')
156RE_EXTCOMP = re.compile('XDELTA ext comp.*') 155RE_EXTCOMP = re.compile('XDELTA ext comp.*')
157 156
158def c2s(c): 157def c2str(c):
159 return ' '.join(['%s' % x for x in c]) 158 return ' '.join(['%s' % x for x in c])
160#end 159#end
161 160
@@ -689,6 +688,7 @@ class RcsFinder:
689 self.rcsfiles = [] 688 self.rcsfiles = []
690 self.others = [] 689 self.others = []
691 self.skipped = [] 690 self.skipped = []
691 self.biground = 0
692 #end 692 #end
693 693
694 def Scan1(self,dir): 694 def Scan1(self,dir):
@@ -777,7 +777,6 @@ class RcsFinder:
777 continue 777 continue
778 population.append(file) 778 population.append(file)
779 #end 779 #end
780 testkey = ''
781 f1sz = 0 780 f1sz = 0
782 f2sz = 0 781 f2sz = 0
783 fcount = int(len(population) * FILE_P) 782 fcount = int(len(population) * FILE_P)
@@ -790,8 +789,10 @@ class RcsFinder:
790 r1, r2 = rand.sample(xrange(0, len(file.versions)), 2) 789 r1, r2 = rand.sample(xrange(0, len(file.versions)), 2)
791 f1sz += file.AppendVersion(f1, r1) 790 f1sz += file.AppendVersion(f1, r1)
792 f2sz += file.AppendVersion(f2, r2) 791 f2sz += file.AppendVersion(f2, r2)
793 testkey = testkey + '%s,%s,%s ' % (file.fname, file.Vstr(r1), file.Vstr(r2)) 792 #m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):], file.Vstr(r1), file.Vstr(r2)))
794 #end 793 #end
794 testkey = 'rcs%d' % self.biground
795 self.biground = self.biground + 1
795 796
796 print 'source %u bytes; target %u bytes; %s' % (f1sz, f2sz, testkey) 797 print 'source %u bytes; target %u bytes; %s' % (f1sz, f2sz, testkey)
797 f1.close() 798 f1.close()
@@ -875,23 +876,28 @@ def ConfigToArgs(config):
875 876
876# 877#
877class RandomTest: 878class RandomTest:
878 def __init__(self, tnum, tinput, config): 879 def __init__(self, tnum, tinput, config, syntuple = None):
879 self.mytinput = tinput[2] 880 self.mytinput = tinput[2]
880 self.myconfig = config 881 self.myconfig = config
881 self.tnum = tnum 882 self.tnum = tnum
882 883
883 args = ConfigToArgs(config) 884 if syntuple:
884 result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args)) 885 self.runtime = syntuple[0]
886 self.compsize = syntuple[1]
887 self.decodetime = None
888 else:
889 args = ConfigToArgs(config)
890 result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args))
891
892 self.runtime = result.encode_time.mean
893 self.compsize = result.encode_size
894 self.decodetime = result.decode_time.mean
895 #end
885 896
886 self.runtime = result.encode_time.mean
887 self.decodetime = result.decode_time.mean
888 self.compsize = result.encode_size
889 self.score = None 897 self.score = None
890 self.time_pos = None 898 self.time_pos = None
891 self.size_pos = None 899 self.size_pos = None
892 self.score_pos = None 900 self.score_pos = None
893
894 print 'Test %d: %s' % (tnum, self)
895 #end 901 #end
896 902
897 def __str__(self): 903 def __str__(self):
@@ -902,7 +908,7 @@ class RandomTest:
902 return 'time %.6f%s size %d%s << %s >>%s' % ( 908 return 'time %.6f%s size %d%s << %s >>%s' % (
903 self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""), 909 self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""),
904 self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""), 910 self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""),
905 c2s(self.config()), 911 c2str(self.config()),
906 decodestr) 912 decodestr)
907 #end 913 #end
908 914
@@ -938,10 +944,10 @@ def PosInAlist(l, e):
938 944
939# Generates a set of num_results test configurations, given the list of 945# Generates a set of num_results test configurations, given the list of
940# retest-configs. 946# retest-configs.
941def RandomTestConfigs(rand, inputs, num_results): 947def RandomTestConfigs(rand, input_configs, num_results):
942 948
943 outputs = inputs[:] 949 outputs = input_configs[:]
944 have_set = dict([(c2s(i), i) for i in inputs]) 950 have_set = dict([(c,c) for c in input_configs])
945 951
946 # Compute a random configuration 952 # Compute a random configuration
947 def RandomConfig(): 953 def RandomConfig():
@@ -951,20 +957,17 @@ def RandomTestConfigs(rand, inputs, num_results):
951 val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap) 957 val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap)
952 config.append(val) 958 config.append(val)
953 #end 959 #end
954 return config 960 return tuple(config)
955 #end 961 #end
956 962
957 while len(outputs) < num_results: 963 while len(outputs) < num_results:
958 newc = None 964 newc = None
959 for i in xrange(10): 965 for i in xrange(10):
960 c = RandomConfig() 966 c = RandomConfig()
961 s = c2s(c) 967 if have_set.has_key(c):
962 if have_set.has_key(s):
963 print 'continue with %s' % c
964 continue 968 continue
965 #end 969 #end
966 print 'added %s' % c 970 have_set[c] = c
967 have_set[s] = c
968 newc = c 971 newc = c
969 break 972 break
970 if newc is None: 973 if newc is None:
@@ -982,10 +985,11 @@ def RunTestLoop(rand, generator, rounds):
982 for rnum in xrange(rounds): 985 for rnum in xrange(rounds):
983 configs = RandomTestConfigs(rand, configs, MAX_RESULTS) 986 configs = RandomTestConfigs(rand, configs, MAX_RESULTS)
984 tinput = generator(rand) 987 tinput = generator(rand)
985 print 'running test %s' % tinput[2]
986 tests = [] 988 tests = []
987 for x in xrange(len(configs)): 989 for x in xrange(len(configs)):
988 tests.append(RandomTest(x, tinput, configs[x])) 990 t = RandomTest(x, tinput, configs[x])
991 print 'Test %d: %s' % (x, t)
992 tests.append(t)
989 #end 993 #end
990 results = ScoreTests(tests) 994 results = ScoreTests(tests)
991 GraphResults('expt%d' % rnum, results) 995 GraphResults('expt%d' % rnum, results)
@@ -996,43 +1000,8 @@ def RunTestLoop(rand, generator, rounds):
996 #end 1000 #end
997#end 1001#end
998 1002
999def GraphResults(desc, results):
1000 f = open("data-%s.csv" % desc, "w")
1001 for r in results:
1002 f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r))
1003 #end
1004 f.close()
1005 os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc))
1006#end
1007
1008def GraphSummary(desc, results):
1009 results_by_input = {}
1010 this_tinput = results[0].tinput()
1011
1012 # all test results in this set share at least the current tinput.
1013 # find the set of all tinputs
1014 for rtest in results:
1015 s = c2s(rtest.config())
1016 all = test_state_xxx[s]
1017
1018 print '%s have %d results for %s' % (desc, len(all), s)
1019
1020 for atest in all:
1021 if not results_by_input.has_key(atest.tinput()):
1022 results_by_input[atest.tinput()] = [atest]
1023 else:
1024 results_by_input[atest.tinput()].append(atest)
1025 #end
1026 #end
1027 #end
1028
1029 for testkey, rlist in results_by_input.items():
1030
1031 #end
1032#end
1033
1034# TODO: cleanup 1003# TODO: cleanup
1035test_state_xxx = {} 1004test_all_config_results = {}
1036 1005
1037def ScoreTests(results): 1006def ScoreTests(results):
1038 scored = [] 1007 scored = []
@@ -1063,17 +1032,15 @@ def ScoreTests(results):
1063 best_by_size = [] 1032 best_by_size = []
1064 best_by_time = [] 1033 best_by_time = []
1065 1034
1066 print 'Worst: %s' % scored[len(scored)-1][1]
1067
1068 pos = 0 1035 pos = 0
1069 for (score, test) in scored: 1036 for (score, test) in scored:
1070 pos += 1 1037 pos += 1
1071 test.score_pos = pos 1038 test.score_pos = pos
1072 c = c2s(test.config()) 1039 c = test.config()
1073 if not test_state_xxx.has_key(c): 1040 if not test_all_config_results.has_key(c):
1074 test_state_xxx[c] = [test] 1041 test_all_config_results[c] = [test]
1075 else: 1042 else:
1076 test_state_xxx[c].append(test) 1043 test_all_config_results[c].append(test)
1077 #end 1044 #end
1078 #end 1045 #end
1079 1046
@@ -1085,9 +1052,9 @@ def ScoreTests(results):
1085 #end 1052 #end
1086 1053
1087 for test in scored: 1054 for test in scored:
1088 c = c2s(test.config()) 1055 c = test.config()
1089 s = 0.0 1056 s = 0.0
1090 all_r = test_state_xxx[c] 1057 all_r = test_all_config_results[c]
1091 for t in all_r: 1058 for t in all_r:
1092 s += float(t.score_pos) 1059 s += float(t.score_pos)
1093 #end 1060 #end
@@ -1108,6 +1075,132 @@ def ScoreTests(results):
1108 return scored 1075 return scored
1109#end 1076#end
1110 1077
1078def GraphResults(desc, results):
1079 f = open("data-%s.csv" % desc, "w")
1080 for r in results:
1081 f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r))
1082 #end
1083 f.close()
1084 os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc))
1085#end
1086
1087def GraphSummary(desc, results_ignore):
1088 test_population = 0
1089 config_ordered = []
1090
1091 # drops duplicate test/config pairs (TODO: don't retest them)
1092 for config, cresults in test_all_config_results.items():
1093 input_config_map = {}
1094 uniq = []
1095 for test in cresults:
1096 assert test.config() == config
1097 test_population += 1
1098 key = test.tinput()
1099 if not input_config_map.has_key(key):
1100 input_config_map[key] = {}
1101 #end
1102 if input_config_map[key].has_key(config):
1103 print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test)
1104 continue
1105 #end
1106 input_config_map[key][config] = test
1107 uniq.append(test)
1108 #end
1109 config_ordered.append(uniq)
1110 #end
1111
1112 # sort configs descending by number of tests
1113 config_ordered.sort(lambda x, y: len(y) - len(x))
1114
1115 print 'population: %d: %d configs max %d results' % \
1116 (test_population,
1117 len(config_ordered),
1118 len(config_ordered[0]))
1119
1120 if config_ordered[0] == 1:
1121 return
1122 #end
1123
1124 # a map from test-key to test-list w/ various configs
1125 input_set = {}
1126 osize = len(config_ordered)
1127
1128 for i in xrange(len(config_ordered)):
1129 config = config_ordered[i][0].config()
1130 config_tests = config_ordered[i]
1131
1132 print '%s has %d tested inputs' % (config, len(config_tests))
1133
1134 if len(input_set) == 0:
1135 input_set = dict([(t.tinput(), [t]) for t in config_tests])
1136 continue
1137 #end
1138
1139 # a map from test-key to test-list w/ various configs
1140 update_set = {}
1141 for r in config_tests:
1142 t = r.tinput()
1143 if input_set.has_key(t):
1144 update_set[t] = input_set[t] + [r]
1145 else:
1146 #print 'config %s does not have test %s' % (config, t)
1147 pass
1148 #end
1149 #end
1150
1151 if len(update_set) <= 1:
1152 break
1153 #end
1154
1155 input_set = update_set
1156
1157 # continue if there are more w/ the same number of inputs
1158 if i < (len(config_ordered) - 1) and \
1159 len(config_ordered[i + 1]) == len(config_tests):
1160 print 'b %s' % i
1161 continue
1162 #end
1163
1164 # synthesize results for multi-test inputs
1165 config_num = None
1166
1167 # map of config to sum(various test-keys)
1168 smap = {}
1169 for (key, tests) in input_set.items():
1170 if config_num == None:
1171 config_num = len(tests)
1172 smap = dict([(r.config(),
1173 (r.time(),
1174 r.size()))
1175 for r in tests])
1176 else:
1177 assert config_num == len(tests)
1178 smap = dict([(r.config(),
1179 (smap[r.config()][0] + r.time(),
1180 smap[r.config()][1] + r.size()))
1181 for r in tests])
1182 #end
1183 #end
1184
1185 if config_num == 1:
1186 print 'c %s' % smap
1187 continue
1188 #end
1189
1190 summary = '%s-%d' % (desc, len(input_set))
1191 print "what?", osize, len(input_set), input_set
1192 assert len(input_set) < osize
1193 osize = len(input_set)
1194
1195 print 'generate %s w/ %d configs' % (summary, config_num)
1196 syn = [RandomTest(0, (None, None, summary), config,
1197 syntuple = (smap[config][0], smap[config][1]))
1198 for config in smap.keys()]
1199 syn = ScoreTests(syn)
1200 GraphResults(summary, syn)
1201 #end
1202#end
1203
1111if __name__ == "__main__": 1204if __name__ == "__main__":
1112 try: 1205 try:
1113 RunCommand(['rm', '-rf', TMPDIR]) 1206 RunCommand(['rm', '-rf', TMPDIR])