summaryrefslogtreecommitdiff
path: root/xdelta3
diff options
context:
space:
mode:
authorjosh.macdonald <jmacd@users.noreply.github.com>2007-02-13 19:07:33 +0000
committerjosh.macdonald <jmacd@users.noreply.github.com>2007-02-13 19:07:33 +0000
commit990289208d555a760499623e30682d6fe19216e7 (patch)
tree480dce4835b2aba58cb9b5d1cbd097a962156c7e /xdelta3
parent4be58a9a9ab199ef2b3765f57e649398e8dd5890 (diff)
Cleanup -regtest.py. Comment on crash in -python.h
Diffstat (limited to 'xdelta3')
-rw-r--r--xdelta3/xdelta3-python.h1
-rwxr-xr-xxdelta3/xdelta3-regtest.py878
2 files changed, 503 insertions, 376 deletions
diff --git a/xdelta3/xdelta3-python.h b/xdelta3/xdelta3-python.h
index a91b7f2..4805b17 100644
--- a/xdelta3/xdelta3-python.h
+++ b/xdelta3/xdelta3-python.h
@@ -51,6 +51,7 @@ PyObject *xdelta3_main_cmdline (PyObject *self, PyObject *args)
51 PyObject *s; 51 PyObject *s;
52 if ((s = PyList_GetItem (o, i-1)) == NULL) { goto cleanup; } 52 if ((s = PyList_GetItem (o, i-1)) == NULL) { goto cleanup; }
53 ps = PyString_AsString (s); 53 ps = PyString_AsString (s);
54 /* TODO: ps is NULL if s is not a string, crashes the interpreter */
54 argv[i] = ps; 55 argv[i] = ps;
55 } 56 }
56 57
diff --git a/xdelta3/xdelta3-regtest.py b/xdelta3/xdelta3-regtest.py
index 8aa03ac..1dc6c0a 100755
--- a/xdelta3/xdelta3-regtest.py
+++ b/xdelta3/xdelta3-regtest.py
@@ -16,12 +16,9 @@
16# along with this program; if not, write to the Free Software 16# along with this program; if not, write to the Free Software
17# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 18
19# TODO: Test IOPT (1.5 vs. greedy)
20
21# TODO: Start testing window sizes 19# TODO: Start testing window sizes
22 20# TODO: Test 1.5 vs. greedy
23# TODO: Note: xd3_encode_memory is underperforming the command-line 21# TODO: Compare w/ bsdiff and generate more summary
24# at run-speed tests (due to excess memory allocation?). Fix.
25 22
26import os, sys, math, re, time, types, array, random 23import os, sys, math, re, time, types, array, random
27import xdelta3main 24import xdelta3main
@@ -36,15 +33,13 @@ SKIP_TRIALS = 2
36MIN_TRIALS = 3 33MIN_TRIALS = 3
37MAX_TRIALS = 15 34MAX_TRIALS = 15
38 35
39#SKIP_TRIALS = 0 36MIN_STDDEV_PCT = 1.5
40#MIN_TRIALS = 1
41#MAX_TRIALS = 1
42 37
43MIN_STDDEV_PCT = 1.5 # stop 38MIN_RUN = 1000 * 1000 * 1
44MAX_RUN = 1000 * 1000 * 10 39MAX_RUN = 1000 * 1000 * 10
45 40
46# How many results per round 41# How many results per round
47MAX_RESULTS = 100 42MAX_RESULTS = 10
48KEEP_P = (0.5) 43KEEP_P = (0.5)
49FAST_P = (0.0) 44FAST_P = (0.0)
50SLOW_P = (0.0) 45SLOW_P = (0.0)
@@ -82,15 +77,15 @@ def INPUT_SPEC(rand):
82# 77#
83# 78#
84#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS' 79#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS'
85RCSDIR = '/tmp/PRCS_read_copy' 80#RCSDIR = '/tmp/PRCS_read_copy'
86#RCSDIR = 'G:/jmacd/PRCS'
87 81
88SAMPLEDIR = "C:/sample_data/WESNOTH_tmp/tar" 82RCSDIR = 'G:/jmacd/PRCS/prcs/b'
83
84SAMPLEDIR = "C:/sample_data/Wesnoth/tar"
89 85
90TMPDIR = '/tmp/xd3regtest.%d' % os.getpid() 86TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
91 87
92RUNFILE = os.path.join(TMPDIR, 'run') 88RUNFILE = os.path.join(TMPDIR, 'run')
93HFILE = os.path.join(TMPDIR, 'hdr')
94DFILE = os.path.join(TMPDIR, 'output') 89DFILE = os.path.join(TMPDIR, 'output')
95RFILE = os.path.join(TMPDIR, 'recon') 90RFILE = os.path.join(TMPDIR, 'recon')
96 91
@@ -99,7 +94,7 @@ BAR_STATE = 1
99REV_STATE = 2 94REV_STATE = 2
100DATE_STATE = 3 95DATE_STATE = 3
101 96
102# 97#
103IGNORE_FILENAME = re.compile('.*\\.(gif|jpg).*') 98IGNORE_FILENAME = re.compile('.*\\.(gif|jpg).*')
104 99
105# rcs output 100# rcs output
@@ -126,52 +121,397 @@ testwide_encode_args = [
126 121
127def c2s(c): 122def c2s(c):
128 return ' '.join(['%02d' % x for x in c]) 123 return ' '.join(['%02d' % x for x in c])
124#end
125
126def SumList(l):
127 return reduce(lambda x,y: x+y, l)
128#end
129
130# returns (total, mean, stddev, q2 (median),
131# (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
132class StatList:
133 def __init__(self,l,desc,hist=0):
134 cnt = len(l)
135 assert(cnt > 1)
136 l.sort()
137 self.cnt = cnt
138 self.l = l
139 self.total = SumList(l)
140 self.mean = self.total / float(self.cnt)
141 self.s = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1))
142 self.q0 = l[0]
143 self.q1 = l[int(self.cnt/4.0+0.5)]
144 self.q2 = l[int(self.cnt/2.0+0.5)]
145 self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
146 self.q4 = l[self.cnt-1]+1
147 self.siqr = (self.q3-self.q1)/2.0;
148 self.spread = (self.q4-self.q0)
149 self.str = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \
150 (desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread)
151 #end
152#end
153
154def RunCommand(args, ok = [0]):
155 #print 'run command %s' % (' '.join(args))
156 p = os.spawnvp(os.P_WAIT, args[0], args)
157 if p not in ok:
158 raise CommandError(args, 'exited %d' % p)
159 #end
160#end
161
162def RunCommandIO(args,infn,outfn):
163 p = os.fork()
164 if p == 0:
165 os.dup2(os.open(infn,os.O_RDONLY),0)
166 os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1)
167 os.execvp(args[0], args)
168 else:
169 s = os.waitpid(p,0)
170 o = os.WEXITSTATUS(s[1])
171 if not os.WIFEXITED(s[1]) or o != 0:
172 raise CommandError(args, 'exited %d' % o)
173 #end
174 #end
175#end
176
177class TimedTest:
178 def __init__(self, target, source, runnable,
179 skip_trials = SKIP_TRIALS,
180 min_trials = MIN_TRIALS,
181 max_trials = MAX_TRIALS,
182 min_stddev_pct = MIN_STDDEV_PCT):
183 self.target = target
184 self.source = source
185 self.runnable = runnable
186
187 self.skip_trials = skip_trials
188 self.min_trials = min(min_trials, max_trials)
189 self.max_trials = max_trials
190 self.min_stddev_pct = min_stddev_pct
191
192 self.encode_time = self.DoTest(DFILE,
193 lambda x: x.Encode(self.target, self.source, DFILE))
194 self.encode_size = runnable.EncodeSize(DFILE)
195 self.decode_time = self.DoTest(RFILE,
196 lambda x: x.Decode(DFILE, self.source, RFILE))
197
198 # verify
199 runnable.Verify(self.target, RFILE)
200 #end
201
202 def DoTest(self, fname, func):
203 trials = 0
204 measured = []
205
206 while 1:
207 try:
208 os.remove(fname)
209 except OSError:
210 pass
211
212 start_time = time.time()
213 start_clock = time.clock()
214
215 func(self.runnable)
216
217 total_clock = (time.clock() - start_clock)
218 total_time = (time.time() - start_time)
219
220 elap_time = max(total_time, 0.0000001)
221 elap_clock = max(total_clock, 0.0000001)
222
223 trials = trials + 1
224
225 # skip some of the first trials
226 if trials > self.skip_trials:
227 measured.append((elap_clock, elap_time))
228 #print 'measurement total: %.1f ms' % (total_time * 1000.0)
229
230 # at least so many
231 if trials < (self.skip_trials + self.min_trials):
232 #print 'continue: need more trials: %d' % trials
233 continue
234
235 # compute %variance
236 done = 0
237 if self.skip_trials + self.min_trials <= 2:
238 measured = measured + measured;
239 done = 1
240 #end
241
242 time_stat = StatList([x[1] for x in measured], 'elap time')
243 sp = float(time_stat.s) / float(time_stat.mean)
244
245 # what if MAX_TRIALS is exceeded?
246 too_many = (trials - self.skip_trials) >= self.max_trials
247 good = (100.0 * sp) < self.min_stddev_pct
248 if done or too_many or good:
249 trials = trials - self.skip_trials
250 if not done and not good:
251 #print 'too many trials: %d' % trials
252 pass
253 #clock = StatList([x[0] for x in measured], 'elap clock')
254 return time_stat
255 #end
256 #end
257 #end
258#end
259
260def Decimals(start, end):
261 l = []
262 step = start
263 while 1:
264 r = range(step, step * 10, step)
265 l = l + r
266 if step * 10 >= end:
267 l.append(step * 10)
268 break
269 step = step * 10
270 return l
271#end
272
273# This tests the raw speed of 0-byte inputs
274def RunSpeedTest():
275 for L in Decimals(MIN_RUN, MAX_RUN):
276 SetFileSize(RUNFILE, L)
277
278 trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)]))
279 ReportSpeed(L, trx, '1MB ')
280
281 trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)]))
282 ReportSpeed(L, trx, '512k')
283
284 trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)]))
285 ReportSpeed(L, trx, '256k')
286
287 trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE))
288 ReportSpeed(L, trm, 'swig')
289
290 trg = TimedTest(RUNFILE, None, GzipRun1())
291 ReportSpeed(L,trg,'gzip')
292 #end
293#end
294
295def SetFileSize(F,L):
296 fd = os.open(F, os.O_CREAT | os.O_WRONLY)
297 os.ftruncate(fd,L)
298 assert os.fstat(fd).st_size == L
299 os.close(fd)
300#end
301
302def ReportSpeed(L,tr,desc):
303 print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \
304 (desc, L,
305 tr.encode_size,
306 tr.encode_time.mean * 1000.0,
307 tr.decode_time.mean * 1000.0)
308#end
309
310class Xdelta3RunClass:
311 def __init__(self, extra):
312 self.extra = extra
313 #end
314
315 def __str__(self):
316 return 'xdelta3'
317 #end
318
319 def New(self):
320 return Xdelta3Runner(self.extra)
321 #end
322#end
323
324class Xdelta3Runner:
325 def __init__(self, extra):
326 self.extra = extra
327 #end
328
329 def Encode(self, target, source, output):
330 args = (testwide_encode_args +
331 self.extra +
332 ['-eqf'])
333 if source:
334 args.append('-s')
335 args.append(source)
336 #end
337 args = args + [target, output]
338 self.Main(args)
339 #end
340
341 def Decode(self, input, source, output):
342 args = ['-dqf']
343 if source:
344 args.append('-s')
345 args.append(source)
346 #end
347 args = args + [input, output]
348 self.Main(args)
349 #end
350
351 def Verify(self, target, recon):
352 RunCommand(('cmp', target, recon))
353 #end
354
355 def EncodeSize(self, output):
356 return os.stat(output).st_size
357 #end
358
359 def Main(self, args):
360 try:
361 xdelta3main.main(args)
362 except Exception, e:
363 raise CommandError(args, "xdelta3.main exception")
364 #end
365 #end
366#end
367
368class Xdelta3Mod1:
369 def __init__(self, file):
370 self.target_data = open(file, 'r').read()
371 #end
372
373 def Encode(self, ignore1, ignore2, ignore3):
374 r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10)
375 if r1 != 0:
376 raise CommandError('memory', 'encode failed: %s' % r1)
377 #end
378 self.encoded = encoded
379 #end
380
381 def Decode(self, ignore1, ignore2, ignore3):
382 r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data))
383 if r2 != 0:
384 raise CommandError('memory', 'decode failed: %s' % r1)
385 #end
386 self.decoded = data1
387 #end
388
389 def Verify(self, ignore1, ignore2):
390 if self.target_data != self.decoded:
391 raise CommandError('memory', 'bad decode')
392 #end
393 #end
394
395 def EncodeSize(self, ignore1):
396 return len(self.encoded)
397 #end
398#end
399
400class GzipRun1:
401 def Encode(self, target, source, output):
402 assert source == None
403 RunCommandIO(['gzip', '-cf'], target, output)
404 #end
405
406 def Decode(self, input, source, output):
407 assert source == None
408 RunCommandIO(['gzip', '-dcf'], input, output)
409 #end
410
411 def Verify(self, target, recon):
412 RunCommand(('cmp', target, recon))
413 #end
414
415 def EncodeSize(self, output):
416 return os.stat(output).st_size
417 #end
418#end
419
420class Xdelta1RunClass:
421 def __str__(self):
422 return 'xdelta1'
423 #end
424
425 def New(self):
426 return Xdelta1Runner()
427 #end
428#end
429
430class Xdelta1Runner:
431 def Encode(self, target, source, output):
432 assert source != None
433 args = ['xdelta1', 'delta', '-q', source, target, output]
434 RunCommand(args, [0, 1])
435 #end
436
437 def Decode(self, input, source, output):
438 assert source != None
439 args = ['xdelta1', 'patch', '-q', input, source, output]
440 # Note: for dumb historical reasons, xdelta1 returns 1 or 0
441 RunCommand(args)
442 #end
443
444 def Verify(self, target, recon):
445 RunCommand(('cmp', target, recon))
446 #end
447
448 def EncodeSize(self, output):
449 return os.stat(output).st_size
450 #end
451#end
452
453# TODO: cleanup below this line
129 454
130#
131# exceptions 455# exceptions
132class SkipRcsException: 456class SkipRcsException:
133 def __init__(self,reason): 457 def __init__(self,reason):
134 self.reason = reason 458 self.reason = reason
459 #end
460#end
461
135class NotEnoughVersions: 462class NotEnoughVersions:
136 def __init__(self): 463 def __init__(self):
137 pass 464 pass
465 #end
466#end
467
138class CommandError: 468class CommandError:
139 def __init__(self,cmd,str): 469 def __init__(self,cmd,str):
140 if type(cmd) is types.TupleType or \ 470 if type(cmd) is types.TupleType or \
141 type(cmd) is types.ListType: 471 type(cmd) is types.ListType:
142 cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd) 472 cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd)
473 #end
143 print 'command was: ',cmd 474 print 'command was: ',cmd
144 print 'command failed: ',str 475 print 'command failed: ',str
145 print 'have fun debugging' 476 print 'have fun debugging'
146# 477 #end
147# one version 478#end
479
148class RcsVersion: 480class RcsVersion:
149 def __init__(self,vstr): 481 def __init__(self,vstr):
150 self.vstr = vstr 482 self.vstr = vstr
483 #end
151 def __cmp__(self,other): 484 def __cmp__(self,other):
152 return cmp(self.date, other.date) 485 return cmp(self.date, other.date)
486 #end
153 def __str__(self): 487 def __str__(self):
154 return str(self.vstr) 488 return str(self.vstr)
155# 489 #end
156# one rcsfile 490#end
491
157class RcsFile: 492class RcsFile:
158 493
159 def __init__(self, fname): 494 def __init__(self, fname):
160 self.fname = fname 495 self.fname = fname
161 self.versions = [] 496 self.versions = []
162 self.state = HEAD_STATE 497 self.state = HEAD_STATE
498 #end
163 499
164 def SetTotRev(self,s): 500 def SetTotRev(self,s):
165 self.totrev = int(s) 501 self.totrev = int(s)
502 #end
166 503
167 def Rev(self,s): 504 def Rev(self,s):
168 self.rev = RcsVersion(s) 505 self.rev = RcsVersion(s)
169 if len(self.versions) >= self.totrev: 506 if len(self.versions) >= self.totrev:
170 raise SkipRcsException('too many versions (in log messages)') 507 raise SkipRcsException('too many versions (in log messages)')
508 #end
171 self.versions.append(self.rev) 509 self.versions.append(self.rev)
510 #end
172 511
173 def Date(self,s): 512 def Date(self,s):
174 self.rev.date = s 513 self.rev.date = s
514 #end
175 515
176 def Match(self, line, state, rx, gp, newstate, f): 516 def Match(self, line, state, rx, gp, newstate, f):
177 if state == self.state: 517 if state == self.state:
@@ -179,9 +519,13 @@ class RcsFile:
179 if m: 519 if m:
180 if f: 520 if f:
181 f(m.group(gp)) 521 f(m.group(gp))
522 #end
182 self.state = newstate 523 self.state = newstate
183 return 1 524 return 1
525 #end
526 #end
184 return None 527 return None
528 #end
185 529
186 def Sum1Rlog(self): 530 def Sum1Rlog(self):
187 f = os.popen('rlog '+self.fname, "r") 531 f = os.popen('rlog '+self.fname, "r")
@@ -195,13 +539,14 @@ class RcsFile:
195 pass 539 pass
196 elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date): 540 elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date):
197 pass 541 pass
542 #end
198 l = f.readline() 543 l = f.readline()
544 #end
199 c = f.close() 545 c = f.close()
200 if c != None: 546 if c != None:
201 raise c 547 raise c
202 #print '%s versions %d' % (self.fname, len(self.versions)) 548 #end
203 #for v in self.versions: 549 #end
204 # v.Print()
205 550
206 def Sum1(self): 551 def Sum1(self):
207 st = os.stat(self.fname) 552 st = os.stat(self.fname)
@@ -209,7 +554,9 @@ class RcsFile:
209 self.Sum1Rlog() 554 self.Sum1Rlog()
210 if self.totrev != len(self.versions): 555 if self.totrev != len(self.versions):
211 raise SkipRcsException('wrong version count') 556 raise SkipRcsException('wrong version count')
557 #end
212 self.versions.sort() 558 self.versions.sort()
559 #end
213 560
214 def Checkout(self,n): 561 def Checkout(self,n):
215 v = self.versions[n] 562 v = self.versions[n]
@@ -225,57 +572,72 @@ class RcsFile:
225 total = total + len(buf) 572 total = total + len(buf)
226 out.write(buf) 573 out.write(buf)
227 buf = stream.read() 574 buf = stream.read()
575 #end
228 v.vsize = total 576 v.vsize = total
229 estr = '' 577 estr = ''
230 buf = err.read() 578 buf = err.read()
231 while buf: 579 while buf:
232 estr = estr + buf 580 estr = estr + buf
233 buf = err.read() 581 buf = err.read()
582 #end
234 if stream.close(): 583 if stream.close():
235 raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr)) 584 raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr))
585 #end
236 out.close() 586 out.close()
237 err.close() 587 err.close()
588 #end
238 589
239 def Vdate(self,n): 590 def Vdate(self,n):
240 return self.versions[n].date 591 return self.versions[n].date
592 #end
241 593
242 def Vstr(self,n): 594 def Vstr(self,n):
243 return self.versions[n].vstr 595 return self.versions[n].vstr
596 #end
244 597
245 def Verf(self,n): 598 def Verf(self,n):
246 return os.path.join(TMPDIR, 'input.%d' % n) 599 return os.path.join(TMPDIR, 'input.%d' % n)
600 #end
247 601
248 def PairsByDate(self,runnable): 602 def FilePairsByDate(self, runclass):
249 if self.totrev < 2: 603 if self.totrev < 2:
250 raise NotEnoughVersions() 604 raise NotEnoughVersions()
605 #end
251 self.Checkout(0) 606 self.Checkout(0)
252 ntrials = [] 607 ntrials = []
253 if self.totrev < 2: 608 if self.totrev < 2:
254 return vtrials 609 return vtrials
610 #end
255 for v in range(0,self.totrev-1): 611 for v in range(0,self.totrev-1):
256 if v > 1: 612 if v > 1:
257 os.remove(self.Verf(v-1)) 613 os.remove(self.Verf(v-1))
614 #end
258 self.Checkout(v+1) 615 self.Checkout(v+1)
259 if os.stat(self.Verf(v)).st_size < MIN_SIZE or \ 616 if os.stat(self.Verf(v)).st_size < MIN_SIZE or \
260 os.stat(self.Verf(v+1)).st_size < MIN_SIZE: 617 os.stat(self.Verf(v+1)).st_size < MIN_SIZE:
261 continue 618 continue
619 #end
620
621 result = TimedTest(self.Verf(v+1),
622 self.Verf(v),
623 runclass.New())
262 624
263 runnable.SetInputs(self.Verf(v), 625 target_size = os.stat(self.Verf(v+1)).st_size
264 self.Vstr(v), 626
265 self.Verf(v+1), 627 print '%s %s %s: %.2f%% encode %.3f ms: decode %.3f ms' % \
266 self.Vstr(v+1)) 628 (runclass,
267 result = TimedTest(runnable) 629 os.path.basename(self.fname),
268 print 'testing %s %s: ratio %.3f%%: time %.7f: in %u trials' % \
269 (os.path.basename(self.fname),
270 self.Vstr(v+1), 630 self.Vstr(v+1),
271 result.r1.ratio, 631 target_size > 0 and (100.0 * result.encode_size / target_size) or 0,
272 result.time.mean, 632 result.encode_time.mean * 1000.0,
273 result.trials) 633 result.decode_time.mean * 1000.0)
274 ntrials.append(result) 634 ntrials.append(result)
635 #end
275 636
276 os.remove(self.Verf(self.totrev-1)) 637 os.remove(self.Verf(self.totrev-1))
277 os.remove(self.Verf(self.totrev-2)) 638 os.remove(self.Verf(self.totrev-2))
278 return ntrials 639 return ntrials
640 #end
279 641
280 def AppendVersion(self, f, n): 642 def AppendVersion(self, f, n):
281 self.Checkout(n) 643 self.Checkout(n)
@@ -284,15 +646,15 @@ class RcsFile:
284 f.write(data) 646 f.write(data)
285 rf.close() 647 rf.close()
286 return len(data) 648 return len(data)
649 #end
287 650
288#
289# This class recursively scans a directory for rcsfiles
290class RcsFinder: 651class RcsFinder:
291 def __init__(self): 652 def __init__(self):
292 self.subdirs = [] 653 self.subdirs = []
293 self.rcsfiles = [] 654 self.rcsfiles = []
294 self.others = [] 655 self.others = []
295 self.skipped = [] 656 self.skipped = []
657 #end
296 658
297 def Scan1(self,dir): 659 def Scan1(self,dir):
298 dents = os.listdir(dir) 660 dents = os.listdir(dir)
@@ -307,16 +669,21 @@ class RcsFinder:
307 rcsfiles.append(RcsFile(full)) 669 rcsfiles.append(RcsFile(full))
308 else: 670 else:
309 others.append(full) 671 others.append(full)
672 #end
673 #end
310 self.subdirs = self.subdirs + subdirs 674 self.subdirs = self.subdirs + subdirs
311 self.rcsfiles = self.rcsfiles + rcsfiles 675 self.rcsfiles = self.rcsfiles + rcsfiles
312 self.others = self.others + others 676 self.others = self.others + others
313 return subdirs 677 return subdirs
678 #end
314 679
315 def Crawl(self, dir): 680 def Crawl(self, dir):
316 subdirs = [dir] 681 subdirs = [dir]
317 while subdirs: 682 while subdirs:
318 s1 = self.Scan1(subdirs[0]) 683 s1 = self.Scan1(subdirs[0])
319 subdirs = subdirs[1:] + s1 684 subdirs = subdirs[1:] + s1
685 #end
686 #end
320 687
321 def Summarize(self): 688 def Summarize(self):
322 good = [] 689 good = []
@@ -325,259 +692,39 @@ class RcsFinder:
325 rf.Sum1() 692 rf.Sum1()
326 if rf.totrev < 2: 693 if rf.totrev < 2:
327 raise SkipRcsException('too few versions (< 2)') 694 raise SkipRcsException('too few versions (< 2)')
695 #end
328 except SkipRcsException, e: 696 except SkipRcsException, e:
329 #print 'skipping file %s: %s' % (rf.fname, e.reason) 697 #print 'skipping file %s: %s' % (rf.fname, e.reason)
330 self.skipped.append(rf) 698 self.skipped.append(rf)
331 else: 699 else:
332 good.append(rf) 700 good.append(rf)
701 #end
333 self.rcsfiles = good 702 self.rcsfiles = good
703 #end
334 704
335 def PairsByDate(self,runnable): 705 def AllPairsByDate(self,runclass):
336 allvtrials = [] 706 results = []
337 good = [] 707 good = []
338 for rf in self.rcsfiles: 708 for rf in self.rcsfiles:
339 print 'testing %s on %s with %d versions' % (runnable.type, rf.fname, rf.totrev)
340 try: 709 try:
341 allvtrials.append(rf.PairsByDate(runnable)) 710 results = results + rf.FilePairsByDate(runclass)
342 except SkipRcsException: 711 except SkipRcsException:
343 print 'file %s has compressed versions: skipping' % (rf.fname) 712 print 'file %s has compressed versions: skipping' % (rf.fname)
344 except NotEnoughVersions: 713 except NotEnoughVersions:
345 print 'testing %s on %s: not enough versions' % (runnable.type, rf.fname) 714 print 'testing %s on %s: not enough versions' % (runclass, rf.fname)
346 else: 715 else:
347 good.append(rf) 716 good.append(rf)
717 #end
348 self.rcsfiles = good 718 self.rcsfiles = good
349 return allvtrials 719 return results
350# 720 #end
351#
352class Bucks:
353 def __init__(self,low,high):
354 self.low = low
355 self.high = high
356 self.spread = high - low
357 self.bucks = []
358 for i in range(0,HIST_SIZE):
359 self.bucks.append([low+(self.spread * (i+0.0) / float(HIST_SIZE)),
360 low+(self.spread * (i+0.5) / float(HIST_SIZE)),
361 low+(self.spread * (i+1.0) / float(HIST_SIZE)),
362 0])
363 def Add(self, x):
364 assert(x>=self.low)
365 assert(x<self.high)
366 t = self.bucks[int((x-self.low)/float(self.spread)*HIST_SIZE)]
367 t[3] = t[3] + 1
368 def Print(self, f):
369 for i in self.bucks:
370 # gnuplot -persist "plot %s using 2:4
371 f.write("%.1f %.1f %.1f %d\n" % (i[0],i[1],i[2],i[3]))
372#
373#
374class TimedTest:
375 def __init__(self,runnable,
376 skip_trials=SKIP_TRIALS,
377 min_trials=MIN_TRIALS,
378 max_trials=MAX_TRIALS,
379 min_stddev_pct=MIN_STDDEV_PCT):
380
381 min_trials = min(min_trials,max_trials)
382 self.trials = 0
383 self.measured = []
384 self.r1 = None
385 while 1:
386 try:
387 os.remove(DFILE)
388 os.remove(RFILE)
389 except OSError:
390 pass
391
392 start_time = time.time()
393 start_clock = time.clock()
394
395 result = runnable.Run(self.trials)
396
397 if self.r1 == None:
398 self.r1 = result
399
400 total_clock = (time.clock() - start_clock)
401 total_time = (time.time() - start_time)
402
403 elap_time = max((total_time), 0.000001)
404 elap_clock = max((total_clock), 0.000001)
405
406 self.trials = self.trials + 1
407
408 # skip some of the first trials
409 if self.trials > skip_trials:
410 self.measured.append((elap_clock,elap_time))
411 #print 'measurement total: %.1f ms' % (total_time * 1000.0)
412
413 # at least so many
414 if self.trials < (skip_trials + min_trials):
415 #print 'continue: need more trials: %d' % self.trials
416 continue
417
418 # compute %variance
419 done = 0
420 if skip_trials + min_trials <= 2:
421 done = 1
422 self.measured = self.measured + self.measured;
423
424 self.time = StatList([x[1] for x in self.measured], 'elap time')
425 sp = float(self.time.s) / float(self.time.mean)
426
427 # what if MAX_TRIALS is exceeded?
428 too_many = (self.trials-skip_trials) >= max_trials
429 good = (100.0 * sp) < min_stddev_pct
430 if done or too_many or good:
431 self.trials = self.trials - skip_trials
432 if not done and not good:
433 #print 'too many trials: %d' % self.trials
434 pass
435 self.clock = StatList([x[0] for x in self.measured], 'elap clock')
436 return
437#
438#
439#
440def SumList(l):
441 return reduce(lambda x,y: x+y, l)
442#
443# returns (total, mean, stddev, q2 (median),
444# (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
445class StatList:
446 def __init__(self,l,desc,hist=0):
447 cnt = len(l)
448 assert(cnt > 1)
449 l.sort()
450 self.cnt = cnt
451 self.l = l
452 self.total = SumList(l)
453 self.mean = self.total / float(self.cnt)
454 self.s = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1))
455 self.q0 = l[0]
456 self.q1 = l[int(self.cnt/4.0+0.5)]
457 self.q2 = l[int(self.cnt/2.0+0.5)]
458 self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
459 self.q4 = l[self.cnt-1]+1
460 self.hf = "./%s.hist" % desc
461 self.siqr = (self.q3-self.q1)/2.0;
462 self.spread = (self.q4-self.q0)
463 self.str = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \
464 (desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread)
465 if hist:
466 f = open(self.hf, "w")
467 self.bucks = Bucks(self.q0,self.q4)
468 for i in l:
469 self.bucks.Add(i)
470 self.bucks.Print(f)
471 f.close()
472
473def RunCommand(args):
474 #print "run command", args
475 p = os.spawnvp(os.P_WAIT, args[0], args)
476 if p != 0:
477 raise CommandError(args, 'exited %d' % p)
478
479def RunCommandIO(args,infn,outfn):
480 p = os.fork()
481 if p == 0:
482 os.dup2(os.open(infn,os.O_RDONLY),0)
483 os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1)
484 os.execvp(args[0], args)
485 else:
486 s = os.waitpid(p,0)
487 o = os.WEXITSTATUS(s[1])
488 if not os.WIFEXITED(s[1]) or o != 0:
489 raise CommandError(args, 'exited %d' % o)
490 721
491def RunXdelta3(args): 722def GetTestRcsFiles():
492 try:
493 xdelta3main.main(args)
494 except Exception, e:
495 raise CommandError(args, "xdelta3.main exception")
496
497class GzipInfo:
498 def __init__(self,target,delta):
499 self.tgtsize = os.stat(target).st_size
500 self.dsize = os.stat(delta).st_size
501
502class Xdelta3Info:
503 def __init__(self,target,delta):
504 self.tgtsize = os.stat(target).st_size
505 self.dsize = os.stat(delta).st_size
506 if self.tgtsize > 0:
507 self.ratio = 100.0 * self.dsize / self.tgtsize;
508 else:
509 self.ratio = 0.0
510
511class Xdelta3ModInfo:
512 def __init__(self,target,delta):
513 #tmp = open(DFILE, 'w')
514 #tmp.write(patch)
515 #tmp.close()
516 #r3 = xdelta3.xd3_main_cmdline(['xdelta3', 'printhdr', DFILE, RFILE])
517 #if r3 != 0:
518 # raise CommandError('memory', 'print failed: %s' % r3)
519 #hdr = open(RFILE, 'r').read()
520 #print hdr
521 self.tgtsize = len(target)
522 self.dsize = len(delta)
523 if self.tgtsize > 0:
524 self.ratio = 100.0 * self.dsize / self.tgtsize;
525 else:
526 self.ratio = 0.0
527
528class Xdelta3Pair:
529 def __init__(self, extra):
530 self.type = 'xdelta3'
531 self.decode_args = '-dqf'
532 self.encode_args = '-eqf'
533 self.extra = extra
534 self.presrc = '-s'
535 self.canrep = 1
536
537 def SetInputs(self,old,oldv,new,newv):
538 self.old = old
539 self.oldv = oldv
540 self.new = new
541 self.newv = newv
542 return self
543
544 def Run(self,trial):
545
546 encode_args = self.extra + \
547 testwide_encode_args + \
548 [self.encode_args,
549 self.presrc,
550 self.old,
551 self.new,
552 DFILE]
553
554 decode_args = [self.decode_args,
555 self.presrc,
556 self.old,
557 DFILE,
558 RFILE]
559 try:
560 RunXdelta3(encode_args)
561 if trial > 0:
562 return None
563 self.dinfo = Xdelta3Info(self.new,DFILE)
564 if self.dinfo.extcomp:
565 raise SkipRcsException('ext comp')
566 RunXdelta3(decode_args)
567 RunCommand(('cmp',
568 self.new,
569 RFILE))
570 return self.dinfo
571 except CommandError:
572 print 'encode args: %s' % ' '.join(encode_args)
573 print 'decode args: %s' % ' '.join(decode_args)
574 raise CommandError("Run failed")
575
576def Test():
577 rcsf = RcsFinder() 723 rcsf = RcsFinder()
578 rcsf.Crawl(RCSDIR) 724 rcsf.Crawl(RCSDIR)
579 if len(rcsf.rcsfiles) == 0: 725 if len(rcsf.rcsfiles) == 0:
580 sys.exit(1) 726 raise CommandError('', 'no RCS files')
727 #end
581 rcsf.Summarize() 728 rcsf.Summarize()
582 print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (len(rcsf.rcsfiles), 729 print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (len(rcsf.rcsfiles),
583 len(rcsf.subdirs), 730 len(rcsf.subdirs),
@@ -586,66 +733,11 @@ def Test():
586 print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize", 1).str 733 print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize", 1).str
587 print StatList([x.totrev for x in rcsf.rcsfiles], "totrev", 1).str 734 print StatList([x.totrev for x in rcsf.rcsfiles], "totrev", 1).str
588 return rcsf 735 return rcsf
736#end
589 737
590def Decimals(max): 738# TODO: cleanup below this line
591 l = [0]
592 step = 1
593 while 1:
594 r = range(step, step * 10, step)
595 l = l + r
596 if step * 10 >= max:
597 l.append(step * 10)
598 break
599 step = step * 10
600 return l
601
602class Xdelta3Run1:
603 def __init__(self,file):
604 self.file = file
605 def Run(self,trial):
606 RunXdelta3(testwide_encode_args +
607 ['-efqW', str(1<<20), self.file, DFILE])
608 if trial > 0:
609 return None
610 return Xdelta3Info(self.file,DFILE)
611
612class Xdelta3Mod1:
613 def __init__(self,file):
614 self.data = open(file, 'r').read()
615 def Run(self,trial):
616 r1, patch = xdelta3.xd3_encode_memory(self.data, None, 1000000, 1<<10)
617 if r1 != 0:
618 raise CommandError('memory', 'encode failed: %s' % r1)
619 if trial > 0:
620 return None
621 r2, data1 = xdelta3.xd3_decode_memory(patch, None, len(self.data))
622 if r2 != 0:
623 raise CommandError('memory', 'decode failed: %s' % r1)
624 if self.data != data1:
625 raise CommandError('memory', 'bad output: %s' % self.data, data1)
626 return Xdelta3ModInfo(self.data,patch)
627
628class GzipRun1:
629 def __init__(self,file):
630 self.file = file
631 self.canrep = 0
632 def Run(self,trial):
633 RunCommandIO(['gzip', '-cf'], self.file, DFILE)
634 if trial > 0:
635 return None
636 return GzipInfo(self.file,DFILE)
637
638def SetFileSize(F,L):
639 fd = os.open(F, os.O_CREAT | os.O_WRONLY)
640 os.ftruncate(fd,L)
641 assert(os.fstat(fd).st_size == L)
642 os.close(fd)
643
644def ReportSpeed(L,tr,desc):
645 print '%s 0-run length %u: dsize %u: time %.3f ms: encode %.0f B/sec: in %u trials' % \
646 (desc, L, tr.r1.dsize, tr.time.mean * 1000.0,
647 ((L+tr.r1.dsize) / tr.time.mean), tr.trials)
648 739
740#
649class RandomTestResult: 741class RandomTestResult:
650 def __init__(self, round, config, runtime, compsize): 742 def __init__(self, round, config, runtime, compsize):
651 self.round = round 743 self.round = round
@@ -762,7 +854,7 @@ class RandomTester:
762 f2sz += file.AppendVersion(f2, r2) 854 f2sz += file.AppendVersion(f2, r2)
763 #end 855 #end
764 856
765 print 'from %u; to %u' % (f1sz, f2sz) 857 print 'source %u bytes; target %u bytes' % (f1sz, f2sz)
766 f1.close() 858 f1.close()
767 f2.close() 859 f2.close()
768 return (TMPDIR + "/big.1", 860 return (TMPDIR + "/big.1",
@@ -779,22 +871,17 @@ class RandomTester:
779 config = self.RandomConfig() 871 config = self.RandomConfig()
780 #end 872 #end
781 873
782 runner = Xdelta3Pair([ '-C', ','.join([str(x) for x in config]) ]) 874 args = [ '-C', ','.join([str(x) for x in config]) ]
783 runner.SetInputs(f1, 1, f2, 2) 875 result = TimedTest(f2, f1, Xdelta3Runner(args))
784 result = TimedTest(runner)
785 876
786 tr = RandomTestResult(self.round_num, 877 tr = RandomTestResult(self.round_num,
787 config, 878 config,
788 result.time.mean, 879 result.encode_time.mean,
789 result.r1.dsize) 880 result.encode_size)
790 881
791 self.results.append(tr) 882 self.results.append(tr)
792 883
793 print 'Test %d: %s in %u trials' % \ 884 print 'Test %d: %s' % (self.trial_num, tr)
794 (self.trial_num,
795 tr,
796 result.trials)
797
798 self.trial_num += 1 885 self.trial_num += 1
799 return 886 return
800 #end 887 #end
@@ -902,22 +989,74 @@ class RandomTester:
902 (len(all_r) > 2) and 989 (len(all_r) > 2) and
903 (' in %d' % len(all_r)) or "") 990 (' in %d' % len(all_r)) or "")
904 #end 991 #end
905 992
906 return r 993 return r
907 #end 994 #end
908#end 995#end
909 996
910# This tests the raw speed of 0-byte inputs 997def RunRandomRcsTest(rcsf):
911def RunSpeed(): 998 configs = []
912 for L in Decimals(MAX_RUN): 999 while 1:
913 SetFileSize(RUNFILE, L) 1000 test = RandomTester(configs)
914 trx = TimedTest(Xdelta3Run1(RUNFILE)) 1001 f1, f2 = test.MakeBigFiles(rcsf)
915 ReportSpeed(L,trx,'xdelta3') 1002 while not test.HasEnoughResults():
916 trm = TimedTest(Xdelta3Mod1(RUNFILE)) 1003 test.RandomFileTest(f1, f2)
917 ReportSpeed(L,trm,'module ') 1004 #end
918 trg = TimedTest(GzipRun1(RUNFILE)) 1005 configs = test.ScoreTests()
919 ReportSpeed(L,trg,'gzip ') 1006 test.Reset()
1007 #end
1008#end
1009
1010def RunSampleTest(d, files):
1011 # TODO: consolidate w/ the above
1012 print 'testing %s with %d files' % (d, len(files))
1013 configs = []
1014 while len(files) > 1:
1015 test = RandomTester(configs)
1016 f1 = files[0]
1017 f2 = files[1]
1018 while not test.HasEnoughResults():
1019 test.RandomFileTest(f1, f2)
1020 #end
1021 configs = test.ScoreTests()
1022 test.Reset()
1023 files = files[1:]
1024 #end
1025#end
1026
1027def RunSampleDataTest():
1028 dirs = [SAMPLEDIR]
1029 while dirs:
1030 d = dirs[0]
1031 dirs = dirs[1:]
1032 l = os.listdir(d)
1033 files = []
1034 for e in l:
1035 p = os.path.join(d, e)
1036 if os.path.isdir(p):
1037 dirs.append(p)
1038 else:
1039 files.append(p)
1040 #end
1041 #end
1042 if files:
1043 files.sort()
1044 RunSampleTest(d, files)
1045 #end
1046 #end
1047#end
1048
1049def ReportPairs(name, results):
1050 encode_time = 0
1051 decode_time = 0
1052 encode_size = 0
1053 for r in results:
1054 encode_time += r.encode_time.mean
1055 decode_time += r.decode_time.mean
1056 encode_size += r.encode_size
920 #end 1057 #end
1058 print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \
1059 (name, encode_time, decode_time, encode_size)
921#end 1060#end
922 1061
923if __name__ == "__main__": 1062if __name__ == "__main__":
@@ -925,34 +1064,21 @@ if __name__ == "__main__":
925 RunCommand(['rm', '-rf', TMPDIR]) 1064 RunCommand(['rm', '-rf', TMPDIR])
926 os.mkdir(TMPDIR) 1065 os.mkdir(TMPDIR)
927 1066
928 RunSpeed() 1067 #RunSpeedTest()
929
930 # This tests pairwise (date-ordered) performance
931 #rcsf = Test()
932 #rcsf.PairsByDate(Xdelta3Pair([]))
933
934 configs = []
935
936 while 0:
937 #f1 = '/tmp/big.1'
938 #f2 = '/tmp/big.2'
939 test = RandomTester(configs)
940 #f1, f2 = test.MakeBigFiles(rcsf)
941 while not test.HasEnoughResults():
942 f1 = '/tmp/WESNOTH_tmp/wesnoth-1.1.12.tar'
943 f2 = '/tmp/WESNOTH_tmp/wesnoth-1.1.13.tar'
944 #f1 = '/tmp/big.1'
945 #f2 = '/tmp/big.2'
946 test.RandomFileTest(f1, f2)
947 #end
948 configs = test.ScoreTests()
949 1068
950 #test.Reset() 1069 rcsf = GetTestRcsFiles()
951 test.results = test.results[0:len(configs)] 1070
952 configs = [] 1071 x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw']))
953 #break 1072 ReportPairs('xd3 -9', x3r)
954 #end 1073
955 #end 1074 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass([]))
1075 #ReportPairs('xdelta3', x3r)
1076
1077 #x1r = rcsf.AllPairsByDate(Xdelta1RunClass())
1078 #ReportPairs('xdelta1', x1r)
1079
1080 #RunRandomRcsTest(rcsf)
1081 #RunSampleDataTest()
956 1082
957 except CommandError: 1083 except CommandError:
958 pass 1084 pass