diff options
author | josh.macdonald <jmacd@users.noreply.github.com> | 2007-11-26 12:15:18 +0000 |
---|---|---|
committer | josh.macdonald <jmacd@users.noreply.github.com> | 2007-11-26 12:15:18 +0000 |
commit | ce6a092d2ac6723acd4de0b89a7832a027722027 (patch) | |
tree | 1b2dda028daf9a690a6ba971cd936aa0a985c9f5 /xdelta3/examples/checksum_test.cc | |
parent | d8d2ab4c809485480727ee7a943163f272d22b11 (diff) |
The rabin-karp checksum looks better in testing but doesn't really
seem to improve things in practice. Removed HASH_PRIME.
Diffstat (limited to 'xdelta3/examples/checksum_test.cc')
-rw-r--r-- | xdelta3/examples/checksum_test.cc | 163 |
1 files changed, 112 insertions, 51 deletions
diff --git a/xdelta3/examples/checksum_test.cc b/xdelta3/examples/checksum_test.cc index 40e2df1..15c6087 100644 --- a/xdelta3/examples/checksum_test.cc +++ b/xdelta3/examples/checksum_test.cc | |||
@@ -48,12 +48,6 @@ struct plain { | |||
48 | } | 48 | } |
49 | }; | 49 | }; |
50 | 50 | ||
51 | struct permute { | ||
52 | int operator()(const uint8_t &c) { | ||
53 | return __single_hash[c]; | ||
54 | } | ||
55 | }; | ||
56 | |||
57 | template <typename Word> | 51 | template <typename Word> |
58 | struct hhash { // take "h" of the high-bits as a hash value for this | 52 | struct hhash { // take "h" of the high-bits as a hash value for this |
59 | // checksum, which are the most "distant" in terms of the | 53 | // checksum, which are the most "distant" in terms of the |
@@ -152,6 +146,34 @@ struct rabin_karp { | |||
152 | Word incr_state; | 146 | Word incr_state; |
153 | }; | 147 | }; |
154 | 148 | ||
149 | MEMBER | ||
150 | struct adler32_cksum { | ||
151 | typedef Word word_type; | ||
152 | typedef Permute permute_type; | ||
153 | typedef Hash hash_type; | ||
154 | |||
155 | enum { cksum_size = CksumSize, | ||
156 | cksum_skip = CksumSkip, | ||
157 | compaction = Compaction, | ||
158 | }; | ||
159 | |||
160 | Word step(const uint8_t *ptr) { | ||
161 | return xd3_lcksum (ptr, cksum_size); | ||
162 | } | ||
163 | |||
164 | Word state0(const uint8_t *ptr) { | ||
165 | incr_state = step(ptr); | ||
166 | return incr_state; | ||
167 | } | ||
168 | |||
169 | Word incr(const uint8_t *ptr) { | ||
170 | LARGE_CKSUM_UPDATE(incr_state, ptr - 1, cksum_size); | ||
171 | return incr_state; | ||
172 | } | ||
173 | |||
174 | Word incr_state; | ||
175 | }; | ||
176 | |||
155 | // TESTS | 177 | // TESTS |
156 | 178 | ||
157 | template <typename Word> | 179 | template <typename Word> |
@@ -222,12 +244,15 @@ struct test_result_base { | |||
222 | } | 244 | } |
223 | virtual void reset() = 0; | 245 | virtual void reset() = 0; |
224 | virtual void print() = 0; | 246 | virtual void print() = 0; |
225 | virtual void print_accum() = 0; | ||
226 | virtual void get(const uint8_t* buf, const int buf_size, int iters) = 0; | 247 | virtual void get(const uint8_t* buf, const int buf_size, int iters) = 0; |
227 | virtual void stat() = 0; | 248 | virtual void stat() = 0; |
228 | virtual int count() = 0; | 249 | virtual int count() = 0; |
229 | virtual int dups() = 0; | 250 | virtual int dups() = 0; |
230 | virtual double uniqueness() = 0; | 251 | virtual double uniqueness() = 0; |
252 | virtual double fullness() = 0; | ||
253 | virtual double collisions() = 0; | ||
254 | virtual double coverage() = 0; | ||
255 | virtual double compression() = 0; | ||
231 | virtual double time() = 0; | 256 | virtual double time() = 0; |
232 | virtual double score() = 0; | 257 | virtual double score() = 0; |
233 | virtual void set_score(double min_dups_frac, double min_time) = 0; | 258 | virtual void set_score(double min_dups_frac, double min_time) = 0; |
@@ -345,6 +370,18 @@ struct test_result : public test_result_base { | |||
345 | return (double) h_buckets_full / (1 << h_bits); | 370 | return (double) h_buckets_full / (1 << h_bits); |
346 | } | 371 | } |
347 | 372 | ||
373 | double collisions() { | ||
374 | return (double) colls() / fstats.unique; | ||
375 | } | ||
376 | |||
377 | double coverage() { | ||
378 | return (double) h_buckets_full / uniqueness() / count(); | ||
379 | } | ||
380 | |||
381 | double compression() { | ||
382 | return 1.0 - coverage(); | ||
383 | } | ||
384 | |||
348 | double time() { | 385 | double time() { |
349 | return (double) accum_millis / accum_iters; | 386 | return (double) accum_millis / accum_iters; |
350 | } | 387 | } |
@@ -353,8 +390,9 @@ struct test_result : public test_result_base { | |||
353 | return h_score; | 390 | return h_score; |
354 | } | 391 | } |
355 | 392 | ||
356 | void set_score(double min_uniqueness, double min_time) { | 393 | void set_score(double min_compression, double min_time) { |
357 | h_score = time(); | 394 | h_score = (compression() - 0.99 * min_compression) |
395 | * (time() - 0.99 * min_time); | ||
358 | } | 396 | } |
359 | 397 | ||
360 | double total_time() { | 398 | double total_time() { |
@@ -381,29 +419,21 @@ struct test_result : public test_result_base { | |||
381 | accum_size += test_size; | 419 | accum_size += test_size; |
382 | } | 420 | } |
383 | 421 | ||
384 | void print_accum() { | ||
385 | printf("%s: (%u#%u) count %u dups %0.2f%% %.4f MB/s score %0.6f\n", | ||
386 | test_name, | ||
387 | cksum_size, | ||
388 | cksum_skip, | ||
389 | total_count(), | ||
390 | 100.0 * total_dups() / total_count(), | ||
391 | 0.001 * accum_size / time(), | ||
392 | h_score); | ||
393 | } | ||
394 | |||
395 | void print() { | 422 | void print() { |
396 | if (fstats.count != count()) { | 423 | if (fstats.count != count()) { |
397 | fprintf(stderr, "internal error: %d != %d\n", fstats.count, count()); | 424 | fprintf(stderr, "internal error: %d != %d\n", fstats.count, count()); |
398 | abort(); | 425 | abort(); |
399 | } | 426 | } |
400 | printf("%s: (%u#%u) count %u uniq %0.2f%% full %0.4f%% of 1<<%d @ %.4f MB/s %u iters\n", | 427 | printf("%s: (%u#%u) count %u uniq %0.2f%% full %u (%0.4f%% coll %0.4f%%) covers %0.2f%% w/ 2^%d @ %.4f MB/s %u iters\n", |
401 | test_name, | 428 | test_name, |
402 | cksum_size, | 429 | cksum_size, |
403 | cksum_skip, | 430 | cksum_skip, |
404 | count(), | 431 | count(), |
405 | 100.0 * uniqueness(), | 432 | 100.0 * uniqueness(), |
433 | h_buckets_full, | ||
406 | 100.0 * fullness(), | 434 | 100.0 * fullness(), |
435 | 100.0 * collisions(), | ||
436 | 100.0 * coverage(), | ||
407 | h_bits, | 437 | h_bits, |
408 | 0.001 * accum_iters * test_size / accum_millis, | 438 | 0.001 * accum_iters * test_size / accum_millis, |
409 | accum_iters); | 439 | accum_iters); |
@@ -457,6 +487,7 @@ struct test_result : public test_result_base { | |||
457 | 487 | ||
458 | void get(const uint8_t* buf, const int buf_size, int test_iters) { | 488 | void get(const uint8_t* buf, const int buf_size, int test_iters) { |
459 | rabin_karp<SELF> test; | 489 | rabin_karp<SELF> test; |
490 | //adler32_cksum<SELF> test; | ||
460 | hash_type hash; | 491 | hash_type hash; |
461 | const uint8_t *ptr; | 492 | const uint8_t *ptr; |
462 | const uint8_t *end; | 493 | const uint8_t *end; |
@@ -545,6 +576,17 @@ struct test_result : public test_result_base { | |||
545 | } | 576 | } |
546 | }; | 577 | }; |
547 | 578 | ||
579 | template <typename Word> | ||
580 | void print_array(const char *tname) { | ||
581 | printf("static const %s hash_multiplier[64] = {\n", tname); | ||
582 | Word p = 1; | ||
583 | for (int i = 0; i < 64; i++) { | ||
584 | printf(" %uU,\n", p); | ||
585 | p *= good_word<Word>(); | ||
586 | } | ||
587 | printf("};\n", tname); | ||
588 | } | ||
589 | |||
548 | int main(int argc, char** argv) { | 590 | int main(int argc, char** argv) { |
549 | int i; | 591 | int i; |
550 | uint8_t *buf = NULL; | 592 | uint8_t *buf = NULL; |
@@ -556,21 +598,42 @@ int main(int argc, char** argv) { | |||
556 | return 1; | 598 | return 1; |
557 | } | 599 | } |
558 | 600 | ||
601 | //print_array<uint32_t>("uint32_t"); | ||
602 | |||
559 | #define TEST(T,Z,S,P,H,C) test_result<T,Z,S,P,H<T>,C> \ | 603 | #define TEST(T,Z,S,P,H,C) test_result<T,Z,S,P,H<T>,C> \ |
560 | _ ## T ## _ ## Z ## _ ## S ## _ ## P ## _ ## H ## _ ## C \ | 604 | _ ## T ## _ ## Z ## _ ## S ## _ ## P ## _ ## H ## _ ## C \ |
561 | (#T "_" #Z "_" #S "_" #P "_" #H "_" #C) | 605 | (#T "_" #Z "_" #S "_" #P "_" #H "_" #C) |
562 | 606 | ||
607 | #if 0 | ||
608 | |||
609 | TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \ | ||
610 | TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \ | ||
611 | TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \ | ||
612 | TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \ | ||
613 | |||
614 | #endif | ||
615 | |||
563 | #define TESTS(SKIP) \ | 616 | #define TESTS(SKIP) \ |
617 | TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \ | ||
618 | TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \ | ||
619 | TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \ | ||
620 | TEST(uint32_t, 9, SKIP, plain, hhash, 3) | ||
621 | |||
622 | #define TESTS_ALL(SKIP) \ | ||
564 | TEST(uint32_t, 3, SKIP, plain, hhash, 0); \ | 623 | TEST(uint32_t, 3, SKIP, plain, hhash, 0); \ |
565 | TEST(uint32_t, 3, SKIP, plain, hhash, 1); \ | 624 | TEST(uint32_t, 3, SKIP, plain, hhash, 1); \ |
566 | TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \ | 625 | TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \ |
567 | TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \ | 626 | TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \ |
627 | TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \ | ||
628 | TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \ | ||
568 | TEST(uint32_t, 5, SKIP, plain, hhash, 0); \ | 629 | TEST(uint32_t, 5, SKIP, plain, hhash, 0); \ |
569 | TEST(uint32_t, 5, SKIP, plain, hhash, 1); \ | 630 | TEST(uint32_t, 5, SKIP, plain, hhash, 1); \ |
570 | TEST(uint32_t, 8, SKIP, plain, hhash, 0); \ | 631 | TEST(uint32_t, 8, SKIP, plain, hhash, 0); \ |
571 | TEST(uint32_t, 8, SKIP, plain, hhash, 1); \ | 632 | TEST(uint32_t, 8, SKIP, plain, hhash, 1); \ |
572 | TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \ | 633 | TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \ |
573 | TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \ | 634 | TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \ |
635 | TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \ | ||
636 | TEST(uint32_t, 9, SKIP, plain, hhash, 3); /* x */ \ | ||
574 | TEST(uint32_t, 11, SKIP, plain, hhash, 0); /* x */ \ | 637 | TEST(uint32_t, 11, SKIP, plain, hhash, 0); /* x */ \ |
575 | TEST(uint32_t, 11, SKIP, plain, hhash, 1); /* x */ \ | 638 | TEST(uint32_t, 11, SKIP, plain, hhash, 1); /* x */ \ |
576 | TEST(uint32_t, 13, SKIP, plain, hhash, 0); \ | 639 | TEST(uint32_t, 13, SKIP, plain, hhash, 0); \ |
@@ -584,24 +647,22 @@ int main(int argc, char** argv) { | |||
584 | TEST(uint32_t, 34, SKIP, plain, hhash, 0); \ | 647 | TEST(uint32_t, 34, SKIP, plain, hhash, 0); \ |
585 | TEST(uint32_t, 34, SKIP, plain, hhash, 1); \ | 648 | TEST(uint32_t, 34, SKIP, plain, hhash, 1); \ |
586 | TEST(uint32_t, 55, SKIP, plain, hhash, 0); \ | 649 | TEST(uint32_t, 55, SKIP, plain, hhash, 0); \ |
587 | TEST(uint32_t, 55, SKIP, plain, hhash, 1); \ | 650 | TEST(uint32_t, 55, SKIP, plain, hhash, 1) |
588 | TEST(uint32_t, 89, SKIP, plain, hhash, 0); \ | ||
589 | TEST(uint32_t, 89, SKIP, plain, hhash, 1) | ||
590 | 651 | ||
591 | TESTS(1); // * | 652 | TESTS(1); // * |
592 | TESTS(2); // * | 653 | // TESTS(2); // * |
593 | TESTS(3); // * | 654 | // TESTS(3); // * |
594 | TESTS(5); // * | 655 | // TESTS(5); // * |
595 | TESTS(8); // * | 656 | // TESTS(8); // * |
596 | TESTS(9); | 657 | // TESTS(9); |
597 | TESTS(11); | 658 | // TESTS(11); |
598 | TESTS(13); // * | 659 | // TESTS(13); // * |
599 | TESTS(15); | 660 | TESTS(15); |
600 | TESTS(16); | 661 | // TESTS(16); |
601 | TESTS(21); // * | 662 | // TESTS(21); // * |
602 | TESTS(34); // * | 663 | // TESTS(34); // * |
603 | TESTS(55); // * | 664 | // TESTS(55); // * |
604 | TESTS(89); // * | 665 | // TESTS(89); // * |
605 | 666 | ||
606 | for (i = 1; i < argc; i++) { | 667 | for (i = 1; i < argc; i++) { |
607 | if ((ret = read_whole_file(argv[i], | 668 | if ((ret = read_whole_file(argv[i], |
@@ -614,7 +675,7 @@ int main(int argc, char** argv) { | |||
614 | argv[i], buf_len); | 675 | argv[i], buf_len); |
615 | 676 | ||
616 | double min_time = -1.0; | 677 | double min_time = -1.0; |
617 | double min_uniqueness = 0.0; | 678 | double min_compression = 0.0; |
618 | 679 | ||
619 | for (vector<test_result_base*>::iterator i = all_tests.begin(); | 680 | for (vector<test_result_base*>::iterator i = all_tests.begin(); |
620 | i != all_tests.end(); ++i) { | 681 | i != all_tests.end(); ++i) { |
@@ -633,7 +694,7 @@ int main(int argc, char** argv) { | |||
633 | test->stat(); | 694 | test->stat(); |
634 | 695 | ||
635 | if (min_time < 0.0) { | 696 | if (min_time < 0.0) { |
636 | min_uniqueness = test->uniqueness(); | 697 | min_compression = test->compression(); |
637 | min_time = test->time(); | 698 | min_time = test->time(); |
638 | } | 699 | } |
639 | 700 | ||
@@ -641,26 +702,26 @@ int main(int argc, char** argv) { | |||
641 | min_time = test->time(); | 702 | min_time = test->time(); |
642 | } | 703 | } |
643 | 704 | ||
644 | if (min_uniqueness > test->uniqueness()) { | 705 | if (min_compression > test->compression()) { |
645 | min_uniqueness = test->uniqueness(); | 706 | min_compression = test->compression(); |
646 | } | 707 | } |
647 | 708 | ||
648 | test->print(); | 709 | test->print(); |
649 | } | 710 | } |
650 | 711 | ||
651 | for (vector<test_result_base*>::iterator i = all_tests.begin(); | 712 | // for (vector<test_result_base*>::iterator i = all_tests.begin(); |
652 | i != all_tests.end(); ++i) { | 713 | // i != all_tests.end(); ++i) { |
653 | test_result_base *test = *i; | 714 | // test_result_base *test = *i; |
654 | test->set_score(min_uniqueness, min_time); | 715 | // test->set_score(min_compression, min_time); |
655 | } | 716 | // } |
656 | 717 | ||
657 | sort(all_tests.begin(), all_tests.end(), compare_h()); | 718 | // sort(all_tests.begin(), all_tests.end(), compare_h()); |
658 | 719 | ||
659 | for (vector<test_result_base*>::iterator i = all_tests.begin(); | 720 | // for (vector<test_result_base*>::iterator i = all_tests.begin(); |
660 | i != all_tests.end(); ++i) { | 721 | // i != all_tests.end(); ++i) { |
661 | test_result_base *test = *i; | 722 | // test_result_base *test = *i; |
662 | test->print_accum(); | 723 | // test->print(); |
663 | } | 724 | // } |
664 | 725 | ||
665 | free(buf); | 726 | free(buf); |
666 | buf = NULL; | 727 | buf = NULL; |