summaryrefslogtreecommitdiff
path: root/xdelta3/testing/xdelta3-regtest.py
diff options
context:
space:
mode:
authorjosh.macdonald <jmacd@users.noreply.github.com>2009-10-24 01:13:46 +0000
committerjosh.macdonald <jmacd@users.noreply.github.com>2009-10-24 01:13:46 +0000
commit8681251a19d6a845a725fba60eb0a4c9fa3c375c (patch)
tree4bf7e0ec84e4d4516958a3cd52c594f828f729cd /xdelta3/testing/xdelta3-regtest.py
parent80c9e15bc657b623c19cda6af2778dacd906be81 (diff)
Move and re-invigorate the regtest, fix 1 harmless compiler warning
Diffstat (limited to 'xdelta3/testing/xdelta3-regtest.py')
-rwxr-xr-xxdelta3/testing/xdelta3-regtest.py1225
1 files changed, 1225 insertions, 0 deletions
diff --git a/xdelta3/testing/xdelta3-regtest.py b/xdelta3/testing/xdelta3-regtest.py
new file mode 100755
index 0000000..3c5bfd6
--- /dev/null
+++ b/xdelta3/testing/xdelta3-regtest.py
@@ -0,0 +1,1225 @@
1#!/usr/bin/python2.5
2# xdelta 3 - delta compression tools and library
3# Copyright (C) 2003, 2006, 2007, 2008. Joshua P. MacDonald
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 2 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program; if not, write to the Free Software
17# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19# TODO: test 1.5 vs. greedy
20
21import os, sys, math, re, time, types, array, random
22import xdelta3
23import xdelta3main
24
25#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS'
26#RCSDIR = '/tmp/PRCS_read_copy'
27#SAMPLEDIR = "/tmp/WESNOTH_tmp/diff"
28
29#RCSDIR = 'G:/jmacd/PRCS_copy'
30#SAMPLEDIR = "C:/sample_data/Wesnoth/tar"
31
32#RCSDIR = '/Users/jmacd/src/ftp.kernel.org/pub/scm/linux/kernel/bkcvs/linux-2.4/net/x25'
33RCSDIR = '/Users/jmacd/src/ftp.kernel.org/pub/scm/linux/kernel/bkcvs/linux-2.4/fs'
34RCSDIR = '/Users/jmacd/src/ftp.kernel.org'
35
36#
37MIN_SIZE = 0
38
39TIME_TOO_SHORT = 0.050
40
41SKIP_TRIALS = 2
42MIN_TRIALS = 3
43MAX_TRIALS = 15
44
45# 10 = fast 1.5 = slow
46MIN_STDDEV_PCT = 1.5
47
48# How many results per round
49MAX_RESULTS = 500
50TEST_ROUNDS = 500
51KEEP_P = (0.5)
52
53# For RCS testing, what percent to select
54FILE_P = (0.50)
55
56# For run-speed tests
57MIN_RUN = 1000 * 1000 * 1
58MAX_RUN = 1000 * 1000 * 10
59
60# Testwide defaults
61ALL_ARGS = [
62 '-vv'
63 ]
64
65# The first 7 args go to -C
66SOFT_CONFIG_CNT = 7
67
68CONFIG_ORDER = [ 'large_look',
69 'large_step',
70 'small_look',
71 'small_chain',
72 'small_lchain',
73 'max_lazy',
74 'long_enough',
75
76 # > SOFT_CONFIG_CNT
77 'nocompress',
78 'winsize',
79 'srcwinsize',
80 'sprevsz',
81 'iopt',
82 'djw',
83 'altcode',
84 ]
85
86CONFIG_ARGMAP = {
87 'winsize' : '-W',
88 'srcwinsize' : '-B',
89 'sprevsz' : '-P',
90 'iopt' : '-I',
91 'nocompress' : '-N',
92 'djw' : '-Sdjw',
93 'altcode' : '-T',
94 }
95
96def INPUT_SPEC(rand):
97 return {
98
99 # Time/space costs:
100
101 # -C 1,2,3,4,5,6,7
102 'large_look' : lambda d: rand.choice([9, 10, 11, 12]),
103 'large_step' : lambda d: rand.choice([25, 26, 27, 28, 29, 30]),
104 'small_look' : lambda d: rand.choice([4]),
105 'small_chain' : lambda d: rand.choice([1]),
106 'small_lchain' : lambda d: rand.choice([1]),
107 'max_lazy' : lambda d: rand.choice([4, 5, 6, 7, 8, 9, 10 ]),
108
109 # Note: long_enough only refers to small matching and has no effect if
110 # small_chain == 1.
111 'long_enough' : lambda d: rand.choice([4]),
112
113 # -N
114 'nocompress' : lambda d: rand.choice(['false']),
115
116 # -T
117 'altcode' : lambda d: rand.choice(['false']),
118
119 # -S djw
120 'djw' : lambda d: rand.choice(['false']),
121
122 # Memory costs:
123
124 # -W
125 'winsize' : lambda d: 8 * (1<<20),
126
127 # -B
128 'srcwinsize' : lambda d: 64 * (1<<20),
129
130 # -I 0 is unlimited
131 'iopt' : lambda d: 0,
132
133 # -P only powers of two
134 'sprevsz' : lambda d: rand.choice([x * (1<<16) for x in [4]]),
135 }
136#end
137
138#
139TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
140
141RUNFILE = os.path.join(TMPDIR, 'run')
142DFILE = os.path.join(TMPDIR, 'output')
143RFILE = os.path.join(TMPDIR, 'recon')
144
145HEAD_STATE = 0
146BAR_STATE = 1
147REV_STATE = 2
148DATE_STATE = 3
149
150#
151IGNORE_FILENAME = re.compile('.*\\.(gif|jpg).*')
152
153# rcs output
154RE_TOTREV = re.compile('total revisions: (\\d+)')
155RE_BAR = re.compile('----------------------------')
156RE_REV = re.compile('revision (.+)')
157RE_DATE = re.compile('date: ([^;]+);.*')
158# xdelta output
159RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)')
160RE_EXTCOMP = re.compile('XDELTA ext comp.*')
161
162def c2str(c):
163 return ' '.join(['%s' % x for x in c])
164#end
165
166def SumList(l):
167 return reduce(lambda x,y: x+y, l)
168#end
169
170# returns (total, mean, stddev, q2 (median),
171# (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
172class StatList:
173 def __init__(self,l,desc):
174 cnt = len(l)
175 assert(cnt > 1)
176 l.sort()
177 self.cnt = cnt
178 self.l = l
179 self.total = SumList(l)
180 self.mean = self.total / float(self.cnt)
181 self.s = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1))
182 self.q0 = l[0]
183 self.q1 = l[int(self.cnt/4.0+0.5)]
184 self.q2 = l[int(self.cnt/2.0+0.5)]
185 self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
186 self.q4 = l[self.cnt-1]+1
187 self.siqr = (self.q3-self.q1)/2.0;
188 self.spread = (self.q4-self.q0)
189 self.str = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \
190 (desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread)
191 #end
192#end
193
194def RunCommand(args, ok = [0]):
195 #print 'run command %s' % (' '.join(args))
196 p = os.spawnvp(os.P_WAIT, args[0], args)
197 if p not in ok:
198 raise CommandError(args, 'exited %d' % p)
199 #end
200#end
201
202def RunCommandIO(args,infn,outfn):
203 p = os.fork()
204 if p == 0:
205 os.dup2(os.open(infn,os.O_RDONLY),0)
206 os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1)
207 os.execvp(args[0], args)
208 else:
209 s = os.waitpid(p,0)
210 o = os.WEXITSTATUS(s[1])
211 if not os.WIFEXITED(s[1]) or o != 0:
212 raise CommandError(args, 'exited %d' % o)
213 #end
214 #end
215#end
216
217class TimedTest:
218 def __init__(self, target, source, runnable,
219 skip_trials = SKIP_TRIALS,
220 min_trials = MIN_TRIALS,
221 max_trials = MAX_TRIALS,
222 min_stddev_pct = MIN_STDDEV_PCT):
223 self.target = target
224 self.source = source
225 self.runnable = runnable
226
227 self.skip_trials = skip_trials
228 self.min_trials = min(min_trials, max_trials)
229 self.max_trials = max_trials
230 self.min_stddev_pct = min_stddev_pct
231
232 self.encode_time = self.DoTest(DFILE,
233 lambda x: x.Encode(self.target, self.source, DFILE))
234 self.encode_size = runnable.EncodeSize(DFILE)
235
236 self.decode_time = self.DoTest(RFILE,
237 lambda x: x.Decode(DFILE, self.source, RFILE),
238 )
239
240 # verify
241 runnable.Verify(self.target, RFILE)
242 #end
243
244 def DoTest(self, fname, func):
245 trials = 0
246 measured = []
247
248 while 1:
249 try:
250 os.remove(fname)
251 except OSError:
252 pass
253
254 start_time = time.time()
255 start_clock = time.clock()
256
257 func(self.runnable)
258
259 total_clock = (time.clock() - start_clock)
260 total_time = (time.time() - start_time)
261
262 elap_time = max(total_time, 0.0000001)
263 elap_clock = max(total_clock, 0.0000001)
264
265 trials = trials + 1
266
267 # skip some of the first trials
268 if trials > self.skip_trials:
269 measured.append((elap_clock, elap_time))
270 #print 'measurement total: %.1f ms' % (total_time * 1000.0)
271
272 # at least so many
273 if trials < (self.skip_trials + self.min_trials):
274 #print 'continue: need more trials: %d' % trials
275 continue
276
277 # compute %variance
278 done = 0
279 if self.skip_trials + self.min_trials <= 2:
280 measured = measured + measured;
281 done = 1
282 #end
283
284 time_stat = StatList([x[1] for x in measured], 'elap time')
285 sp = float(time_stat.s) / float(time_stat.mean)
286
287 # what if MAX_TRIALS is exceeded?
288 too_many = (trials - self.skip_trials) >= self.max_trials
289 good = (100.0 * sp) < self.min_stddev_pct
290 if done or too_many or good:
291 trials = trials - self.skip_trials
292 if not done and not good:
293 #print 'too many trials: %d' % trials
294 pass
295 #clock = StatList([x[0] for x in measured], 'elap clock')
296 return time_stat
297 #end
298 #end
299 #end
300#end
301
302def Decimals(start, end):
303 l = []
304 step = start
305 while 1:
306 r = range(step, step * 10, step)
307 l = l + r
308 if step * 10 >= end:
309 l.append(step * 10)
310 break
311 step = step * 10
312 return l
313#end
314
315# This tests the raw speed of 0-byte inputs
316def RunSpeedTest():
317 for L in Decimals(MIN_RUN, MAX_RUN):
318 SetFileSize(RUNFILE, L)
319
320 trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)]))
321 ReportSpeed(L, trx, '1MB ')
322
323 trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)]))
324 ReportSpeed(L, trx, '512k')
325
326 trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)]))
327 ReportSpeed(L, trx, '256k')
328
329 trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE))
330 ReportSpeed(L, trm, 'swig')
331
332 trg = TimedTest(RUNFILE, None, GzipRun1())
333 ReportSpeed(L,trg,'gzip')
334 #end
335#end
336
337def SetFileSize(F,L):
338 fd = os.open(F, os.O_CREAT | os.O_WRONLY)
339 os.ftruncate(fd,L)
340 assert os.fstat(fd).st_size == L
341 os.close(fd)
342#end
343
344def ReportSpeed(L,tr,desc):
345 print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \
346 (desc, L,
347 tr.encode_size,
348 tr.encode_time.mean * 1000.0,
349 tr.decode_time.mean * 1000.0)
350#end
351
352class Xdelta3RunClass:
353 def __init__(self, extra):
354 self.extra = extra
355 #end
356
357 def __str__(self):
358 return ' '.join(self.extra)
359 #end
360
361 def New(self):
362 return Xdelta3Runner(self.extra)
363 #end
364#end
365
366class Xdelta3Runner:
367 def __init__(self, extra):
368 self.extra = extra
369 #end
370
371 def Encode(self, target, source, output):
372 args = (ALL_ARGS +
373 self.extra +
374 ['-e'])
375 if source:
376 args.append('-s')
377 args.append(source)
378 #end
379 args = args + [target, output]
380 self.Main(args)
381 #end
382
383 def Decode(self, input, source, output):
384 args = (ALL_ARGS +
385 ['-d'])
386 if source:
387 args.append('-s')
388 args.append(source)
389 #end
390 args = args + [input, output]
391 self.Main(args)
392 #end
393
394 def Verify(self, target, recon):
395 RunCommand(('cmp', target, recon))
396 #end
397
398 def EncodeSize(self, output):
399 return os.stat(output).st_size
400 #end
401
402 def Main(self, args):
403 try:
404 #print 'Run %s' % (' '.join(args))
405 xdelta3.xd3_main_cmdline(args)
406 except Exception, e:
407 raise CommandError(args, "xdelta3.main exception: %s" % e)
408 #end
409 #end
410#end
411
412class Xdelta3Mod1:
413 def __init__(self, file):
414 self.target_data = open(file, 'r').read()
415 #end
416
417 def Encode(self, ignore1, ignore2, ignore3):
418 r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10)
419 if r1 != 0:
420 raise CommandError('memory', 'encode failed: %s' % r1)
421 #end
422 self.encoded = encoded
423 #end
424
425 def Decode(self, ignore1, ignore2, ignore3):
426 r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data))
427 if r2 != 0:
428 raise CommandError('memory', 'decode failed: %s' % r1)
429 #end
430 self.decoded = data1
431 #end
432
433 def Verify(self, ignore1, ignore2):
434 if self.target_data != self.decoded:
435 raise CommandError('memory', 'bad decode')
436 #end
437 #end
438
439 def EncodeSize(self, ignore1):
440 return len(self.encoded)
441 #end
442#end
443
444class GzipRun1:
445 def Encode(self, target, source, output):
446 assert source == None
447 RunCommandIO(['gzip', '-cf'], target, output)
448 #end
449
450 def Decode(self, input, source, output):
451 assert source == None
452 RunCommandIO(['gzip', '-dcf'], input, output)
453 #end
454
455 def Verify(self, target, recon):
456 RunCommand(('cmp', target, recon))
457 #end
458
459 def EncodeSize(self, output):
460 return os.stat(output).st_size
461 #end
462#end
463
464class Xdelta1RunClass:
465 def __str__(self):
466 return 'xdelta1'
467 #end
468
469 def New(self):
470 return Xdelta1Runner()
471 #end
472#end
473
474class Xdelta1Runner:
475 def Encode(self, target, source, output):
476 assert source != None
477 args = ['xdelta1', 'delta', '-q', source, target, output]
478 RunCommand(args, [0, 1])
479 #end
480
481 def Decode(self, input, source, output):
482 assert source != None
483 args = ['xdelta1', 'patch', '-q', input, source, output]
484 # Note: for dumb historical reasons, xdelta1 returns 1 or 0
485 RunCommand(args)
486 #end
487
488 def Verify(self, target, recon):
489 RunCommand(('cmp', target, recon))
490 #end
491
492 def EncodeSize(self, output):
493 return os.stat(output).st_size
494 #end
495#end
496
497# exceptions
498class SkipRcsException:
499 def __init__(self,reason):
500 self.reason = reason
501 #end
502#end
503
504class NotEnoughVersions:
505 def __init__(self):
506 pass
507 #end
508#end
509
510class CommandError:
511 def __init__(self,cmd,str):
512 if type(cmd) is types.TupleType or \
513 type(cmd) is types.ListType:
514 cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd)
515 #end
516 print 'command was: ',cmd
517 print 'command failed: ',str
518 print 'have fun debugging'
519 #end
520#end
521
522class RcsVersion:
523 def __init__(self,vstr):
524 self.vstr = vstr
525 #end
526 def __cmp__(self,other):
527 return cmp(self.date, other.date)
528 #end
529 def __str__(self):
530 return str(self.vstr)
531 #end
532#end
533
534class RcsFile:
535
536 def __init__(self, fname):
537 self.fname = fname
538 self.versions = []
539 self.state = HEAD_STATE
540 #end
541
542 def SetTotRev(self,s):
543 self.totrev = int(s)
544 #end
545
546 def Rev(self,s):
547 self.rev = RcsVersion(s)
548 if len(self.versions) >= self.totrev:
549 raise SkipRcsException('too many versions (in log messages)')
550 #end
551 self.versions.append(self.rev)
552 #end
553
554 def Date(self,s):
555 self.rev.date = s
556 #end
557
558 def Match(self, line, state, rx, gp, newstate, f):
559 if state == self.state:
560 m = rx.match(line)
561 if m:
562 if f:
563 f(m.group(gp))
564 #end
565 self.state = newstate
566 return 1
567 #end
568 #end
569 return None
570 #end
571
572 def Sum1Rlog(self):
573 f = os.popen('rlog '+self.fname, "r")
574 l = f.readline()
575 while l:
576 if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev):
577 pass
578 elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None):
579 pass
580 elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev):
581 pass
582 elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date):
583 pass
584 #end
585 l = f.readline()
586 #end
587 c = f.close()
588 if c != None:
589 raise c
590 #end
591 #end
592
593 def Sum1(self):
594 st = os.stat(self.fname)
595 self.rcssize = st.st_size
596 self.Sum1Rlog()
597 if self.totrev != len(self.versions):
598 raise SkipRcsException('wrong version count')
599 #end
600 self.versions.sort()
601 #end
602
603 def Checkout(self,n):
604 v = self.versions[n]
605 out = open(self.Verf(n), "w")
606 cmd = 'co -ko -p%s %s' % (v.vstr, self.fname)
607 total = 0
608 (inf,
609 stream,
610 err) = os.popen3(cmd, "r")
611 inf.close()
612 buf = stream.read()
613 while buf:
614 total = total + len(buf)
615 out.write(buf)
616 buf = stream.read()
617 #end
618 v.vsize = total
619 estr = ''
620 buf = err.read()
621 while buf:
622 estr = estr + buf
623 buf = err.read()
624 #end
625 if stream.close():
626 raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr))
627 #end
628 out.close()
629 err.close()
630 #end
631
632 def Vdate(self,n):
633 return self.versions[n].date
634 #end
635
636 def Vstr(self,n):
637 return self.versions[n].vstr
638 #end
639
640 def Verf(self,n):
641 return os.path.join(TMPDIR, 'input.%d' % n)
642 #end
643
644 def FilePairsByDate(self, runclass):
645 if self.totrev < 2:
646 raise NotEnoughVersions()
647 #end
648 self.Checkout(0)
649 ntrials = []
650 if self.totrev < 2:
651 return vtrials
652 #end
653 for v in range(0,self.totrev-1):
654 if v > 1:
655 os.remove(self.Verf(v-1))
656 #end
657 self.Checkout(v+1)
658 if os.stat(self.Verf(v)).st_size < MIN_SIZE or \
659 os.stat(self.Verf(v+1)).st_size < MIN_SIZE:
660 continue
661 #end
662
663 result = TimedTest(self.Verf(v+1),
664 self.Verf(v),
665 runclass.New())
666
667 target_size = os.stat(self.Verf(v+1)).st_size
668
669 ntrials.append(result)
670 #end
671
672 os.remove(self.Verf(self.totrev-1))
673 os.remove(self.Verf(self.totrev-2))
674 return ntrials
675 #end
676
677 def AppendVersion(self, f, n):
678 self.Checkout(n)
679 rf = open(self.Verf(n), "r")
680 data = rf.read()
681 f.write(data)
682 rf.close()
683 return len(data)
684 #end
685
686class RcsFinder:
687 def __init__(self):
688 self.subdirs = []
689 self.rcsfiles = []
690 self.others = []
691 self.skipped = []
692 self.biground = 0
693 #end
694
695 def Scan1(self,dir):
696 dents = os.listdir(dir)
697 subdirs = []
698 rcsfiles = []
699 others = []
700 for dent in dents:
701 full = os.path.join(dir, dent)
702 if os.path.isdir(full):
703 subdirs.append(full)
704 elif dent[len(dent)-2:] == ",v":
705 rcsfiles.append(RcsFile(full))
706 else:
707 others.append(full)
708 #end
709 #end
710 self.subdirs = self.subdirs + subdirs
711 self.rcsfiles = self.rcsfiles + rcsfiles
712 self.others = self.others + others
713 return subdirs
714 #end
715
716 def Crawl(self, dir):
717 subdirs = [dir]
718 while subdirs:
719 s1 = self.Scan1(subdirs[0])
720 subdirs = subdirs[1:] + s1
721 #end
722 #end
723
724 def Summarize(self):
725 good = []
726 for rf in self.rcsfiles:
727 try:
728 rf.Sum1()
729 if rf.totrev < 2:
730 raise SkipRcsException('too few versions (< 2)')
731 #end
732 except SkipRcsException, e:
733 #print 'skipping file %s: %s' % (rf.fname, e.reason)
734 self.skipped.append(rf)
735 else:
736 good.append(rf)
737 #end
738 self.rcsfiles = good
739 #end
740
741 def AllPairsByDate(self, runclass):
742 results = []
743 good = []
744 for rf in self.rcsfiles:
745 try:
746 results = results + rf.FilePairsByDate(runclass)
747 except SkipRcsException:
748 print 'file %s has compressed versions: skipping' % (rf.fname)
749 except NotEnoughVersions:
750 print 'testing %s on %s: not enough versions' % (runclass, rf.fname)
751 else:
752 good.append(rf)
753 #end
754 self.rcsfiles = good
755 self.ReportPairs(runclass, results)
756 return results
757 #end
758
759 def ReportPairs(self, name, results):
760 encode_time = 0
761 decode_time = 0
762 encode_size = 0
763 for r in results:
764 encode_time += r.encode_time.mean
765 decode_time += r.decode_time.mean
766 encode_size += r.encode_size
767 #end
768 print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \
769 (name, encode_time, decode_time, encode_size)
770 #end
771
772 def MakeBigFiles(self, rand):
773 f1 = open(TMPDIR + "/big.1", "w")
774 f2 = open(TMPDIR + "/big.2", "w")
775 population = []
776 for file in self.rcsfiles:
777 if len(file.versions) < 2:
778 continue
779 population.append(file)
780 #end
781 f1sz = 0
782 f2sz = 0
783 fcount = int(len(population) * FILE_P)
784 assert fcount > 0
785 for file in rand.sample(population, fcount):
786 m = IGNORE_FILENAME.match(file.fname)
787 if m != None:
788 continue
789 #end
790 r1, r2 = rand.sample(xrange(0, len(file.versions)), 2)
791 f1sz += file.AppendVersion(f1, r1)
792 f2sz += file.AppendVersion(f2, r2)
793 #m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):], file.Vstr(r1), file.Vstr(r2)))
794 #end
795 testkey = 'rcs%d' % self.biground
796 self.biground = self.biground + 1
797
798 print '%s; source %u bytes; target %u bytes' % (testkey, f1sz, f2sz)
799 f1.close()
800 f2.close()
801 return (TMPDIR + "/big.1",
802 TMPDIR + "/big.2",
803 testkey)
804 #end
805
806 def Generator(self):
807 return lambda rand: self.MakeBigFiles(rand)
808 #end
809#end
810
811# find a set of RCS files for testing
812def GetTestRcsFiles():
813 rcsf = RcsFinder()
814 rcsf.Crawl(RCSDIR)
815 if len(rcsf.rcsfiles) == 0:
816 raise CommandError('', 'no RCS files')
817 #end
818 rcsf.Summarize()
819 print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (len(rcsf.rcsfiles),
820 len(rcsf.subdirs),
821 len(rcsf.others),
822 len(rcsf.skipped))
823 print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize").str
824 print StatList([x.totrev for x in rcsf.rcsfiles], "totrev").str
825 return rcsf
826#end
827
828class SampleDataTest:
829 def __init__(self, dirs):
830 self.pairs = []
831 while dirs:
832 d = dirs[0]
833 dirs = dirs[1:]
834 l = os.listdir(d)
835 files = []
836 for e in l:
837 p = os.path.join(d, e)
838 if os.path.isdir(p):
839 dirs.append(p)
840 else:
841 files.append(p)
842 #end
843 #end
844 if len(files) > 1:
845 files.sort()
846 for x in xrange(len(files) - 1):
847 self.pairs.append((files[x], files[x+1],
848 '%s-%s' % (files[x], files[x+1])))
849 #end
850 #end
851 #end
852 #end
853
854 def Generator(self):
855 return lambda rand: rand.choice(self.pairs)
856 #end
857#end
858
859# configs are represented as a list of values,
860# program takes a list of strings:
861def ConfigToArgs(config):
862 args = [ '-C',
863 ','.join([str(x) for x in config[0:SOFT_CONFIG_CNT]])]
864 for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)):
865 key = CONFIG_ARGMAP[CONFIG_ORDER[i]]
866 val = config[i]
867 if val == 'true' or val == 'false':
868 if val == 'true':
869 args.append('%s' % key)
870 #end
871 else:
872 args.append('%s=%s' % (key, val))
873 #end
874 #end
875 return args
876#end
877
878#
879class RandomTest:
880 def __init__(self, tnum, tinput, config, syntuple = None):
881 self.mytinput = tinput[2]
882 self.myconfig = config
883 self.tnum = tnum
884
885 if syntuple != None:
886 self.runtime = syntuple[0]
887 self.compsize = syntuple[1]
888 self.decodetime = None
889 else:
890 args = ConfigToArgs(config)
891 result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args))
892
893 self.runtime = result.encode_time.mean
894 self.compsize = result.encode_size
895 self.decodetime = result.decode_time.mean
896 #end
897
898 self.score = None
899 self.time_pos = None
900 self.size_pos = None
901 self.score_pos = None
902 #end
903
904 def __str__(self):
905 decodestr = ' %s' % self.decodetime
906 return 'time %.6f%s size %d%s << %s >>%s' % (
907 self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""),
908 self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""),
909 c2str(self.config()),
910 decodestr)
911 #end
912
913 def time(self):
914 return self.runtime
915 #end
916
917 def size(self):
918 return self.compsize
919 #end
920
921 def config(self):
922 return self.myconfig
923 #end
924
925 def score(self):
926 return self.score
927 #end
928
929 def tinput(self):
930 return self.mytinput
931 #end
932#end
933
934def PosInAlist(l, e):
935 for i in range(0, len(l)):
936 if l[i][1] == e:
937 return i;
938 #end
939 #end
940 return -1
941#end
942
943# Generates a set of num_results test configurations, given the list of
944# retest-configs.
945def RandomTestConfigs(rand, input_configs, num_results):
946
947 outputs = input_configs[:]
948 have_set = dict([(c,c) for c in input_configs])
949
950 # Compute a random configuration
951 def RandomConfig():
952 config = []
953 cmap = {}
954 for key in CONFIG_ORDER:
955 val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap)
956 config.append(val)
957 #end
958 return tuple(config)
959 #end
960
961 while len(outputs) < num_results:
962 newc = None
963 for i in xrange(100):
964 c = RandomConfig()
965 if have_set.has_key(c):
966 continue
967 #end
968 have_set[c] = c
969 newc = c
970 break
971 if newc is None:
972 print 'stopped looking for configs at %d' % len(outputs)
973 break
974 #end
975 outputs.append(c)
976 #end
977 outputs.sort()
978 return outputs
979#end
980
981def RunTestLoop(rand, generator, rounds):
982 configs = []
983 for rnum in xrange(rounds):
984 configs = RandomTestConfigs(rand, configs, MAX_RESULTS)
985 tinput = generator(rand)
986 tests = []
987 for x in xrange(len(configs)):
988 t = RandomTest(x, tinput, configs[x])
989 print 'Round %d test %d: %s' % (rnum, x, t)
990 tests.append(t)
991 #end
992 results = ScoreTests(tests)
993
994 for r in results:
995 c = r.config()
996 if not test_all_config_results.has_key(c):
997 test_all_config_results[c] = [r]
998 else:
999 test_all_config_results[c].append(r)
1000 #end
1001 #end
1002
1003 #GraphResults('expt%d' % rnum, results)
1004 #GraphSummary('sum%d' % rnum, results)
1005
1006 # re-test some fraction
1007 configs = [r.config() for r in results[0:int(MAX_RESULTS * KEEP_P)]]
1008 #end
1009#end
1010
1011# TODO: cleanup
1012test_all_config_results = {}
1013
1014def ScoreTests(results):
1015 scored = []
1016 timed = []
1017 sized = []
1018
1019 t_min = float(min([test.time() for test in results]))
1020 #t_max = float(max([test.time() for test in results]))
1021 s_min = float(min([test.size() for test in results]))
1022 #s_max = float(max([test.size() for test in results]))
1023
1024 for test in results:
1025
1026 # Hyperbolic function. Smaller scores still better
1027 red = 0.999 # minimum factors for each dimension are 1/1000
1028 test.score = ((test.size() - s_min * red) *
1029 (test.time() - t_min * red))
1030
1031 scored.append((test.score, test))
1032 timed.append((test.time(), test))
1033 sized.append((test.size(), test))
1034 #end
1035
1036 scored.sort()
1037 timed.sort()
1038 sized.sort()
1039
1040 best_by_size = []
1041 best_by_time = []
1042
1043 pos = 0
1044 for (score, test) in scored:
1045 pos += 1
1046 test.score_pos = pos
1047 #end
1048
1049 scored = [x[1] for x in scored]
1050
1051 for test in scored:
1052 test.size_pos = PosInAlist(sized, test)
1053 test.time_pos = PosInAlist(timed, test)
1054 #end
1055
1056 for test in scored:
1057 c = test.config()
1058 s = 0.0
1059 print 'H-Score: %0.9f %s' % (test.score, test)
1060 #end
1061
1062 return scored
1063#end
1064
1065def GraphResults(desc, results):
1066 f = open("data-%s.csv" % desc, "w")
1067 for r in results:
1068 f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r))
1069 #end
1070 f.close()
1071 os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc))
1072#end
1073
1074def GraphSummary(desc, results_ignore):
1075 test_population = 0
1076 config_ordered = []
1077
1078 # drops duplicate test/config pairs (TODO: don't retest them)
1079 for config, cresults in test_all_config_results.items():
1080 input_config_map = {}
1081 uniq = []
1082 for test in cresults:
1083 assert test.config() == config
1084 test_population += 1
1085 key = test.tinput()
1086 if not input_config_map.has_key(key):
1087 input_config_map[key] = {}
1088 #end
1089 if input_config_map[key].has_key(config):
1090 print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test)
1091 continue
1092 #end
1093 input_config_map[key][config] = test
1094 uniq.append(test)
1095 #end
1096 config_ordered.append(uniq)
1097 #end
1098
1099 # sort configs descending by number of tests
1100 config_ordered.sort(lambda x, y: len(y) - len(x))
1101
1102 print 'population %d: %d configs %d results' % \
1103 (test_population,
1104 len(config_ordered),
1105 len(config_ordered[0]))
1106
1107 if config_ordered[0] == 1:
1108 return
1109 #end
1110
1111 # a map from test-key to test-list w/ various configs
1112 input_set = {}
1113 osize = len(config_ordered)
1114
1115 for i in xrange(len(config_ordered)):
1116 config = config_ordered[i][0].config()
1117 config_tests = config_ordered[i]
1118
1119 #print '%s has %d tested inputs' % (config, len(config_tests))
1120
1121 if len(input_set) == 0:
1122 input_set = dict([(t.tinput(), [t]) for t in config_tests])
1123 continue
1124 #end
1125
1126 # a map from test-key to test-list w/ various configs
1127 update_set = {}
1128 for r in config_tests:
1129 t = r.tinput()
1130 if input_set.has_key(t):
1131 update_set[t] = input_set[t] + [r]
1132 else:
1133 #print 'config %s does not have test %s' % (config, t)
1134 pass
1135 #end
1136 #end
1137
1138 if len(update_set) <= 1:
1139 break
1140 #end
1141
1142 input_set = update_set
1143
1144 # continue if there are more w/ the same number of inputs
1145 if i < (len(config_ordered) - 1) and \
1146 len(config_ordered[i + 1]) == len(config_tests):
1147 continue
1148 #end
1149
1150 # synthesize results for multi-test inputs
1151 config_num = None
1152
1153 # map of config to sum(various test-keys)
1154 smap = {}
1155 for (key, tests) in input_set.items():
1156 if config_num == None:
1157 # config_num should be the same in all elements
1158 config_num = len(tests)
1159 smap = dict([(r.config(),
1160 (r.time(),
1161 r.size()))
1162 for r in tests])
1163 else:
1164 # compuate the per-config sum of time/size
1165 assert config_num == len(tests)
1166 smap = dict([(r.config(),
1167 (smap[r.config()][0] + r.time(),
1168 smap[r.config()][1] + r.size()))
1169 for r in tests])
1170 #end
1171 #end
1172
1173 if config_num == 1:
1174 continue
1175 #end
1176
1177 if len(input_set) == osize:
1178 break
1179 #end
1180
1181 summary = '%s-%d' % (desc, len(input_set))
1182 osize = len(input_set)
1183
1184 print 'generate %s w/ %d configs' % (summary, config_num)
1185 syn = [RandomTest(0, (None, None, summary), config,
1186 syntuple = (smap[config][0], smap[config][1]))
1187 for config in smap.keys()]
1188 syn = ScoreTests(syn)
1189 #print 'smap is %s' % (smap,)
1190 #print 'syn is %s' % (' and '.join([str(x) for x in syn]))
1191 #GraphResults(summary, syn)
1192 #end
1193#end
1194
1195if __name__ == "__main__":
1196 try:
1197 RunCommand(['rm', '-rf', TMPDIR])
1198 os.mkdir(TMPDIR)
1199
1200 rcsf = GetTestRcsFiles()
1201 generator = rcsf.Generator()
1202
1203 #sample = SampleDataTest([SAMPLEDIR])
1204 #generator = sample.Generator()
1205
1206 rand = random.Random(135135135135135)
1207 RunTestLoop(rand, generator, TEST_ROUNDS)
1208
1209 #RunSpeedTest()
1210
1211 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-3', '-6']))
1212 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9']))
1213 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw']))
1214 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-S', 'djw']))
1215 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-T']))
1216
1217 #x1r = rcsf.AllPairsByDate(Xdelta1RunClass())
1218
1219 except CommandError:
1220 pass
1221 else:
1222 RunCommand(['rm', '-rf', TMPDIR])
1223 pass
1224 #end
1225#end