diff options
Diffstat (limited to 'xdelta3/xdelta3-regtest.py')
-rwxr-xr-x | xdelta3/xdelta3-regtest.py | 241 |
1 files changed, 167 insertions, 74 deletions
diff --git a/xdelta3/xdelta3-regtest.py b/xdelta3/xdelta3-regtest.py index a1704e7..0df2065 100755 --- a/xdelta3/xdelta3-regtest.py +++ b/xdelta3/xdelta3-regtest.py | |||
@@ -17,18 +17,17 @@ | |||
17 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | 18 | ||
19 | # TODO: test 1.5 vs. greedy | 19 | # TODO: test 1.5 vs. greedy |
20 | # TODO: generate a graph | ||
21 | 20 | ||
22 | import os, sys, math, re, time, types, array, random | 21 | import os, sys, math, re, time, types, array, random |
23 | import xdelta3main | 22 | import xdelta3main |
24 | import xdelta3 | 23 | import xdelta3 |
25 | 24 | ||
26 | #RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS' | 25 | #RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS' |
27 | #RCSDIR = '/tmp/PRCS_read_copy' | 26 | RCSDIR = '/tmp/PRCS_read_copy/prcs' |
28 | #SAMPLEDIR = "/tmp/WESNOTH_tmp/diff" | 27 | SAMPLEDIR = "/tmp/WESNOTH_tmp/diff" |
29 | 28 | ||
30 | RCSDIR = 'G:/jmacd/PRCS/prcs/b' | 29 | #RCSDIR = 'G:/jmacd/PRCS/prcs/b' |
31 | SAMPLEDIR = "C:/sample_data/Wesnoth/tar" | 30 | #SAMPLEDIR = "C:/sample_data/Wesnoth/tar" |
32 | 31 | ||
33 | # | 32 | # |
34 | MIN_SIZE = 0 | 33 | MIN_SIZE = 0 |
@@ -45,8 +44,8 @@ SKIP_DECODE = 1 | |||
45 | MIN_STDDEV_PCT = 1.5 | 44 | MIN_STDDEV_PCT = 1.5 |
46 | 45 | ||
47 | # How many results per round | 46 | # How many results per round |
48 | MAX_RESULTS = 10 | 47 | MAX_RESULTS = 40 |
49 | TEST_ROUNDS = 50 | 48 | TEST_ROUNDS = 500 |
50 | KEEP_P = (0.5) | 49 | KEEP_P = (0.5) |
51 | 50 | ||
52 | # For RCS testing, what percent to select | 51 | # For RCS testing, what percent to select |
@@ -155,7 +154,7 @@ RE_DATE = re.compile('date: ([^;]+);.*') | |||
155 | RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)') | 154 | RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)') |
156 | RE_EXTCOMP = re.compile('XDELTA ext comp.*') | 155 | RE_EXTCOMP = re.compile('XDELTA ext comp.*') |
157 | 156 | ||
158 | def c2s(c): | 157 | def c2str(c): |
159 | return ' '.join(['%s' % x for x in c]) | 158 | return ' '.join(['%s' % x for x in c]) |
160 | #end | 159 | #end |
161 | 160 | ||
@@ -689,6 +688,7 @@ class RcsFinder: | |||
689 | self.rcsfiles = [] | 688 | self.rcsfiles = [] |
690 | self.others = [] | 689 | self.others = [] |
691 | self.skipped = [] | 690 | self.skipped = [] |
691 | self.biground = 0 | ||
692 | #end | 692 | #end |
693 | 693 | ||
694 | def Scan1(self,dir): | 694 | def Scan1(self,dir): |
@@ -777,7 +777,6 @@ class RcsFinder: | |||
777 | continue | 777 | continue |
778 | population.append(file) | 778 | population.append(file) |
779 | #end | 779 | #end |
780 | testkey = '' | ||
781 | f1sz = 0 | 780 | f1sz = 0 |
782 | f2sz = 0 | 781 | f2sz = 0 |
783 | fcount = int(len(population) * FILE_P) | 782 | fcount = int(len(population) * FILE_P) |
@@ -790,8 +789,10 @@ class RcsFinder: | |||
790 | r1, r2 = rand.sample(xrange(0, len(file.versions)), 2) | 789 | r1, r2 = rand.sample(xrange(0, len(file.versions)), 2) |
791 | f1sz += file.AppendVersion(f1, r1) | 790 | f1sz += file.AppendVersion(f1, r1) |
792 | f2sz += file.AppendVersion(f2, r2) | 791 | f2sz += file.AppendVersion(f2, r2) |
793 | testkey = testkey + '%s,%s,%s ' % (file.fname, file.Vstr(r1), file.Vstr(r2)) | 792 | #m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):], file.Vstr(r1), file.Vstr(r2))) |
794 | #end | 793 | #end |
794 | testkey = 'rcs%d' % self.biground | ||
795 | self.biground = self.biground + 1 | ||
795 | 796 | ||
796 | print 'source %u bytes; target %u bytes; %s' % (f1sz, f2sz, testkey) | 797 | print 'source %u bytes; target %u bytes; %s' % (f1sz, f2sz, testkey) |
797 | f1.close() | 798 | f1.close() |
@@ -875,23 +876,28 @@ def ConfigToArgs(config): | |||
875 | 876 | ||
876 | # | 877 | # |
877 | class RandomTest: | 878 | class RandomTest: |
878 | def __init__(self, tnum, tinput, config): | 879 | def __init__(self, tnum, tinput, config, syntuple = None): |
879 | self.mytinput = tinput[2] | 880 | self.mytinput = tinput[2] |
880 | self.myconfig = config | 881 | self.myconfig = config |
881 | self.tnum = tnum | 882 | self.tnum = tnum |
882 | 883 | ||
883 | args = ConfigToArgs(config) | 884 | if syntuple: |
884 | result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args)) | 885 | self.runtime = syntuple[0] |
886 | self.compsize = syntuple[1] | ||
887 | self.decodetime = None | ||
888 | else: | ||
889 | args = ConfigToArgs(config) | ||
890 | result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args)) | ||
891 | |||
892 | self.runtime = result.encode_time.mean | ||
893 | self.compsize = result.encode_size | ||
894 | self.decodetime = result.decode_time.mean | ||
895 | #end | ||
885 | 896 | ||
886 | self.runtime = result.encode_time.mean | ||
887 | self.decodetime = result.decode_time.mean | ||
888 | self.compsize = result.encode_size | ||
889 | self.score = None | 897 | self.score = None |
890 | self.time_pos = None | 898 | self.time_pos = None |
891 | self.size_pos = None | 899 | self.size_pos = None |
892 | self.score_pos = None | 900 | self.score_pos = None |
893 | |||
894 | print 'Test %d: %s' % (tnum, self) | ||
895 | #end | 901 | #end |
896 | 902 | ||
897 | def __str__(self): | 903 | def __str__(self): |
@@ -902,7 +908,7 @@ class RandomTest: | |||
902 | return 'time %.6f%s size %d%s << %s >>%s' % ( | 908 | return 'time %.6f%s size %d%s << %s >>%s' % ( |
903 | self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""), | 909 | self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""), |
904 | self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""), | 910 | self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""), |
905 | c2s(self.config()), | 911 | c2str(self.config()), |
906 | decodestr) | 912 | decodestr) |
907 | #end | 913 | #end |
908 | 914 | ||
@@ -938,10 +944,10 @@ def PosInAlist(l, e): | |||
938 | 944 | ||
939 | # Generates a set of num_results test configurations, given the list of | 945 | # Generates a set of num_results test configurations, given the list of |
940 | # retest-configs. | 946 | # retest-configs. |
941 | def RandomTestConfigs(rand, inputs, num_results): | 947 | def RandomTestConfigs(rand, input_configs, num_results): |
942 | 948 | ||
943 | outputs = inputs[:] | 949 | outputs = input_configs[:] |
944 | have_set = dict([(c2s(i), i) for i in inputs]) | 950 | have_set = dict([(c,c) for c in input_configs]) |
945 | 951 | ||
946 | # Compute a random configuration | 952 | # Compute a random configuration |
947 | def RandomConfig(): | 953 | def RandomConfig(): |
@@ -951,20 +957,17 @@ def RandomTestConfigs(rand, inputs, num_results): | |||
951 | val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap) | 957 | val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap) |
952 | config.append(val) | 958 | config.append(val) |
953 | #end | 959 | #end |
954 | return config | 960 | return tuple(config) |
955 | #end | 961 | #end |
956 | 962 | ||
957 | while len(outputs) < num_results: | 963 | while len(outputs) < num_results: |
958 | newc = None | 964 | newc = None |
959 | for i in xrange(10): | 965 | for i in xrange(10): |
960 | c = RandomConfig() | 966 | c = RandomConfig() |
961 | s = c2s(c) | 967 | if have_set.has_key(c): |
962 | if have_set.has_key(s): | ||
963 | print 'continue with %s' % c | ||
964 | continue | 968 | continue |
965 | #end | 969 | #end |
966 | print 'added %s' % c | 970 | have_set[c] = c |
967 | have_set[s] = c | ||
968 | newc = c | 971 | newc = c |
969 | break | 972 | break |
970 | if newc is None: | 973 | if newc is None: |
@@ -982,10 +985,11 @@ def RunTestLoop(rand, generator, rounds): | |||
982 | for rnum in xrange(rounds): | 985 | for rnum in xrange(rounds): |
983 | configs = RandomTestConfigs(rand, configs, MAX_RESULTS) | 986 | configs = RandomTestConfigs(rand, configs, MAX_RESULTS) |
984 | tinput = generator(rand) | 987 | tinput = generator(rand) |
985 | print 'running test %s' % tinput[2] | ||
986 | tests = [] | 988 | tests = [] |
987 | for x in xrange(len(configs)): | 989 | for x in xrange(len(configs)): |
988 | tests.append(RandomTest(x, tinput, configs[x])) | 990 | t = RandomTest(x, tinput, configs[x]) |
991 | print 'Test %d: %s' % (x, t) | ||
992 | tests.append(t) | ||
989 | #end | 993 | #end |
990 | results = ScoreTests(tests) | 994 | results = ScoreTests(tests) |
991 | GraphResults('expt%d' % rnum, results) | 995 | GraphResults('expt%d' % rnum, results) |
@@ -996,43 +1000,8 @@ def RunTestLoop(rand, generator, rounds): | |||
996 | #end | 1000 | #end |
997 | #end | 1001 | #end |
998 | 1002 | ||
999 | def GraphResults(desc, results): | ||
1000 | f = open("data-%s.csv" % desc, "w") | ||
1001 | for r in results: | ||
1002 | f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r)) | ||
1003 | #end | ||
1004 | f.close() | ||
1005 | os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc)) | ||
1006 | #end | ||
1007 | |||
1008 | def GraphSummary(desc, results): | ||
1009 | results_by_input = {} | ||
1010 | this_tinput = results[0].tinput() | ||
1011 | |||
1012 | # all test results in this set share at least the current tinput. | ||
1013 | # find the set of all tinputs | ||
1014 | for rtest in results: | ||
1015 | s = c2s(rtest.config()) | ||
1016 | all = test_state_xxx[s] | ||
1017 | |||
1018 | print '%s have %d results for %s' % (desc, len(all), s) | ||
1019 | |||
1020 | for atest in all: | ||
1021 | if not results_by_input.has_key(atest.tinput()): | ||
1022 | results_by_input[atest.tinput()] = [atest] | ||
1023 | else: | ||
1024 | results_by_input[atest.tinput()].append(atest) | ||
1025 | #end | ||
1026 | #end | ||
1027 | #end | ||
1028 | |||
1029 | for testkey, rlist in results_by_input.items(): | ||
1030 | |||
1031 | #end | ||
1032 | #end | ||
1033 | |||
1034 | # TODO: cleanup | 1003 | # TODO: cleanup |
1035 | test_state_xxx = {} | 1004 | test_all_config_results = {} |
1036 | 1005 | ||
1037 | def ScoreTests(results): | 1006 | def ScoreTests(results): |
1038 | scored = [] | 1007 | scored = [] |
@@ -1063,17 +1032,15 @@ def ScoreTests(results): | |||
1063 | best_by_size = [] | 1032 | best_by_size = [] |
1064 | best_by_time = [] | 1033 | best_by_time = [] |
1065 | 1034 | ||
1066 | print 'Worst: %s' % scored[len(scored)-1][1] | ||
1067 | |||
1068 | pos = 0 | 1035 | pos = 0 |
1069 | for (score, test) in scored: | 1036 | for (score, test) in scored: |
1070 | pos += 1 | 1037 | pos += 1 |
1071 | test.score_pos = pos | 1038 | test.score_pos = pos |
1072 | c = c2s(test.config()) | 1039 | c = test.config() |
1073 | if not test_state_xxx.has_key(c): | 1040 | if not test_all_config_results.has_key(c): |
1074 | test_state_xxx[c] = [test] | 1041 | test_all_config_results[c] = [test] |
1075 | else: | 1042 | else: |
1076 | test_state_xxx[c].append(test) | 1043 | test_all_config_results[c].append(test) |
1077 | #end | 1044 | #end |
1078 | #end | 1045 | #end |
1079 | 1046 | ||
@@ -1085,9 +1052,9 @@ def ScoreTests(results): | |||
1085 | #end | 1052 | #end |
1086 | 1053 | ||
1087 | for test in scored: | 1054 | for test in scored: |
1088 | c = c2s(test.config()) | 1055 | c = test.config() |
1089 | s = 0.0 | 1056 | s = 0.0 |
1090 | all_r = test_state_xxx[c] | 1057 | all_r = test_all_config_results[c] |
1091 | for t in all_r: | 1058 | for t in all_r: |
1092 | s += float(t.score_pos) | 1059 | s += float(t.score_pos) |
1093 | #end | 1060 | #end |
@@ -1108,6 +1075,132 @@ def ScoreTests(results): | |||
1108 | return scored | 1075 | return scored |
1109 | #end | 1076 | #end |
1110 | 1077 | ||
1078 | def GraphResults(desc, results): | ||
1079 | f = open("data-%s.csv" % desc, "w") | ||
1080 | for r in results: | ||
1081 | f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r)) | ||
1082 | #end | ||
1083 | f.close() | ||
1084 | os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc)) | ||
1085 | #end | ||
1086 | |||
1087 | def GraphSummary(desc, results_ignore): | ||
1088 | test_population = 0 | ||
1089 | config_ordered = [] | ||
1090 | |||
1091 | # drops duplicate test/config pairs (TODO: don't retest them) | ||
1092 | for config, cresults in test_all_config_results.items(): | ||
1093 | input_config_map = {} | ||
1094 | uniq = [] | ||
1095 | for test in cresults: | ||
1096 | assert test.config() == config | ||
1097 | test_population += 1 | ||
1098 | key = test.tinput() | ||
1099 | if not input_config_map.has_key(key): | ||
1100 | input_config_map[key] = {} | ||
1101 | #end | ||
1102 | if input_config_map[key].has_key(config): | ||
1103 | print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test) | ||
1104 | continue | ||
1105 | #end | ||
1106 | input_config_map[key][config] = test | ||
1107 | uniq.append(test) | ||
1108 | #end | ||
1109 | config_ordered.append(uniq) | ||
1110 | #end | ||
1111 | |||
1112 | # sort configs descending by number of tests | ||
1113 | config_ordered.sort(lambda x, y: len(y) - len(x)) | ||
1114 | |||
1115 | print 'population: %d: %d configs max %d results' % \ | ||
1116 | (test_population, | ||
1117 | len(config_ordered), | ||
1118 | len(config_ordered[0])) | ||
1119 | |||
1120 | if config_ordered[0] == 1: | ||
1121 | return | ||
1122 | #end | ||
1123 | |||
1124 | # a map from test-key to test-list w/ various configs | ||
1125 | input_set = {} | ||
1126 | osize = len(config_ordered) | ||
1127 | |||
1128 | for i in xrange(len(config_ordered)): | ||
1129 | config = config_ordered[i][0].config() | ||
1130 | config_tests = config_ordered[i] | ||
1131 | |||
1132 | print '%s has %d tested inputs' % (config, len(config_tests)) | ||
1133 | |||
1134 | if len(input_set) == 0: | ||
1135 | input_set = dict([(t.tinput(), [t]) for t in config_tests]) | ||
1136 | continue | ||
1137 | #end | ||
1138 | |||
1139 | # a map from test-key to test-list w/ various configs | ||
1140 | update_set = {} | ||
1141 | for r in config_tests: | ||
1142 | t = r.tinput() | ||
1143 | if input_set.has_key(t): | ||
1144 | update_set[t] = input_set[t] + [r] | ||
1145 | else: | ||
1146 | #print 'config %s does not have test %s' % (config, t) | ||
1147 | pass | ||
1148 | #end | ||
1149 | #end | ||
1150 | |||
1151 | if len(update_set) <= 1: | ||
1152 | break | ||
1153 | #end | ||
1154 | |||
1155 | input_set = update_set | ||
1156 | |||
1157 | # continue if there are more w/ the same number of inputs | ||
1158 | if i < (len(config_ordered) - 1) and \ | ||
1159 | len(config_ordered[i + 1]) == len(config_tests): | ||
1160 | print 'b %s' % i | ||
1161 | continue | ||
1162 | #end | ||
1163 | |||
1164 | # synthesize results for multi-test inputs | ||
1165 | config_num = None | ||
1166 | |||
1167 | # map of config to sum(various test-keys) | ||
1168 | smap = {} | ||
1169 | for (key, tests) in input_set.items(): | ||
1170 | if config_num == None: | ||
1171 | config_num = len(tests) | ||
1172 | smap = dict([(r.config(), | ||
1173 | (r.time(), | ||
1174 | r.size())) | ||
1175 | for r in tests]) | ||
1176 | else: | ||
1177 | assert config_num == len(tests) | ||
1178 | smap = dict([(r.config(), | ||
1179 | (smap[r.config()][0] + r.time(), | ||
1180 | smap[r.config()][1] + r.size())) | ||
1181 | for r in tests]) | ||
1182 | #end | ||
1183 | #end | ||
1184 | |||
1185 | if config_num == 1: | ||
1186 | print 'c %s' % smap | ||
1187 | continue | ||
1188 | #end | ||
1189 | |||
1190 | summary = '%s-%d' % (desc, len(input_set)) | ||
1191 | print "what?", osize, len(input_set), input_set | ||
1192 | assert len(input_set) < osize | ||
1193 | osize = len(input_set) | ||
1194 | |||
1195 | print 'generate %s w/ %d configs' % (summary, config_num) | ||
1196 | syn = [RandomTest(0, (None, None, summary), config, | ||
1197 | syntuple = (smap[config][0], smap[config][1])) | ||
1198 | for config in smap.keys()] | ||
1199 | syn = ScoreTests(syn) | ||
1200 | GraphResults(summary, syn) | ||
1201 | #end | ||
1202 | #end | ||
1203 | |||
1111 | if __name__ == "__main__": | 1204 | if __name__ == "__main__": |
1112 | try: | 1205 | try: |
1113 | RunCommand(['rm', '-rf', TMPDIR]) | 1206 | RunCommand(['rm', '-rf', TMPDIR]) |