summaryrefslogtreecommitdiff
path: root/xdelta3/examples/checksum_test.cc
diff options
context:
space:
mode:
authorjosh.macdonald <jmacd@users.noreply.github.com>2007-11-26 12:15:18 +0000
committerjosh.macdonald <jmacd@users.noreply.github.com>2007-11-26 12:15:18 +0000
commitce6a092d2ac6723acd4de0b89a7832a027722027 (patch)
tree1b2dda028daf9a690a6ba971cd936aa0a985c9f5 /xdelta3/examples/checksum_test.cc
parentd8d2ab4c809485480727ee7a943163f272d22b11 (diff)
The rabin-karp checksum looks better in testing but doesn't really
seem to improve things in practice. Removed HASH_PRIME.
Diffstat (limited to 'xdelta3/examples/checksum_test.cc')
-rw-r--r--xdelta3/examples/checksum_test.cc163
1 files changed, 112 insertions, 51 deletions
diff --git a/xdelta3/examples/checksum_test.cc b/xdelta3/examples/checksum_test.cc
index 40e2df1..15c6087 100644
--- a/xdelta3/examples/checksum_test.cc
+++ b/xdelta3/examples/checksum_test.cc
@@ -48,12 +48,6 @@ struct plain {
48 } 48 }
49}; 49};
50 50
51struct permute {
52 int operator()(const uint8_t &c) {
53 return __single_hash[c];
54 }
55};
56
57template <typename Word> 51template <typename Word>
58struct hhash { // take "h" of the high-bits as a hash value for this 52struct hhash { // take "h" of the high-bits as a hash value for this
59 // checksum, which are the most "distant" in terms of the 53 // checksum, which are the most "distant" in terms of the
@@ -152,6 +146,34 @@ struct rabin_karp {
152 Word incr_state; 146 Word incr_state;
153}; 147};
154 148
149MEMBER
150struct adler32_cksum {
151 typedef Word word_type;
152 typedef Permute permute_type;
153 typedef Hash hash_type;
154
155 enum { cksum_size = CksumSize,
156 cksum_skip = CksumSkip,
157 compaction = Compaction,
158 };
159
160 Word step(const uint8_t *ptr) {
161 return xd3_lcksum (ptr, cksum_size);
162 }
163
164 Word state0(const uint8_t *ptr) {
165 incr_state = step(ptr);
166 return incr_state;
167 }
168
169 Word incr(const uint8_t *ptr) {
170 LARGE_CKSUM_UPDATE(incr_state, ptr - 1, cksum_size);
171 return incr_state;
172 }
173
174 Word incr_state;
175};
176
155// TESTS 177// TESTS
156 178
157template <typename Word> 179template <typename Word>
@@ -222,12 +244,15 @@ struct test_result_base {
222 } 244 }
223 virtual void reset() = 0; 245 virtual void reset() = 0;
224 virtual void print() = 0; 246 virtual void print() = 0;
225 virtual void print_accum() = 0;
226 virtual void get(const uint8_t* buf, const int buf_size, int iters) = 0; 247 virtual void get(const uint8_t* buf, const int buf_size, int iters) = 0;
227 virtual void stat() = 0; 248 virtual void stat() = 0;
228 virtual int count() = 0; 249 virtual int count() = 0;
229 virtual int dups() = 0; 250 virtual int dups() = 0;
230 virtual double uniqueness() = 0; 251 virtual double uniqueness() = 0;
252 virtual double fullness() = 0;
253 virtual double collisions() = 0;
254 virtual double coverage() = 0;
255 virtual double compression() = 0;
231 virtual double time() = 0; 256 virtual double time() = 0;
232 virtual double score() = 0; 257 virtual double score() = 0;
233 virtual void set_score(double min_dups_frac, double min_time) = 0; 258 virtual void set_score(double min_dups_frac, double min_time) = 0;
@@ -345,6 +370,18 @@ struct test_result : public test_result_base {
345 return (double) h_buckets_full / (1 << h_bits); 370 return (double) h_buckets_full / (1 << h_bits);
346 } 371 }
347 372
373 double collisions() {
374 return (double) colls() / fstats.unique;
375 }
376
377 double coverage() {
378 return (double) h_buckets_full / uniqueness() / count();
379 }
380
381 double compression() {
382 return 1.0 - coverage();
383 }
384
348 double time() { 385 double time() {
349 return (double) accum_millis / accum_iters; 386 return (double) accum_millis / accum_iters;
350 } 387 }
@@ -353,8 +390,9 @@ struct test_result : public test_result_base {
353 return h_score; 390 return h_score;
354 } 391 }
355 392
356 void set_score(double min_uniqueness, double min_time) { 393 void set_score(double min_compression, double min_time) {
357 h_score = time(); 394 h_score = (compression() - 0.99 * min_compression)
395 * (time() - 0.99 * min_time);
358 } 396 }
359 397
360 double total_time() { 398 double total_time() {
@@ -381,29 +419,21 @@ struct test_result : public test_result_base {
381 accum_size += test_size; 419 accum_size += test_size;
382 } 420 }
383 421
384 void print_accum() {
385 printf("%s: (%u#%u) count %u dups %0.2f%% %.4f MB/s score %0.6f\n",
386 test_name,
387 cksum_size,
388 cksum_skip,
389 total_count(),
390 100.0 * total_dups() / total_count(),
391 0.001 * accum_size / time(),
392 h_score);
393 }
394
395 void print() { 422 void print() {
396 if (fstats.count != count()) { 423 if (fstats.count != count()) {
397 fprintf(stderr, "internal error: %d != %d\n", fstats.count, count()); 424 fprintf(stderr, "internal error: %d != %d\n", fstats.count, count());
398 abort(); 425 abort();
399 } 426 }
400 printf("%s: (%u#%u) count %u uniq %0.2f%% full %0.4f%% of 1<<%d @ %.4f MB/s %u iters\n", 427 printf("%s: (%u#%u) count %u uniq %0.2f%% full %u (%0.4f%% coll %0.4f%%) covers %0.2f%% w/ 2^%d @ %.4f MB/s %u iters\n",
401 test_name, 428 test_name,
402 cksum_size, 429 cksum_size,
403 cksum_skip, 430 cksum_skip,
404 count(), 431 count(),
405 100.0 * uniqueness(), 432 100.0 * uniqueness(),
433 h_buckets_full,
406 100.0 * fullness(), 434 100.0 * fullness(),
435 100.0 * collisions(),
436 100.0 * coverage(),
407 h_bits, 437 h_bits,
408 0.001 * accum_iters * test_size / accum_millis, 438 0.001 * accum_iters * test_size / accum_millis,
409 accum_iters); 439 accum_iters);
@@ -457,6 +487,7 @@ struct test_result : public test_result_base {
457 487
458 void get(const uint8_t* buf, const int buf_size, int test_iters) { 488 void get(const uint8_t* buf, const int buf_size, int test_iters) {
459 rabin_karp<SELF> test; 489 rabin_karp<SELF> test;
490 //adler32_cksum<SELF> test;
460 hash_type hash; 491 hash_type hash;
461 const uint8_t *ptr; 492 const uint8_t *ptr;
462 const uint8_t *end; 493 const uint8_t *end;
@@ -545,6 +576,17 @@ struct test_result : public test_result_base {
545 } 576 }
546}; 577};
547 578
579template <typename Word>
580void print_array(const char *tname) {
581 printf("static const %s hash_multiplier[64] = {\n", tname);
582 Word p = 1;
583 for (int i = 0; i < 64; i++) {
584 printf(" %uU,\n", p);
585 p *= good_word<Word>();
586 }
587 printf("};\n", tname);
588}
589
548int main(int argc, char** argv) { 590int main(int argc, char** argv) {
549 int i; 591 int i;
550 uint8_t *buf = NULL; 592 uint8_t *buf = NULL;
@@ -556,21 +598,42 @@ int main(int argc, char** argv) {
556 return 1; 598 return 1;
557 } 599 }
558 600
601 //print_array<uint32_t>("uint32_t");
602
559#define TEST(T,Z,S,P,H,C) test_result<T,Z,S,P,H<T>,C> \ 603#define TEST(T,Z,S,P,H,C) test_result<T,Z,S,P,H<T>,C> \
560 _ ## T ## _ ## Z ## _ ## S ## _ ## P ## _ ## H ## _ ## C \ 604 _ ## T ## _ ## Z ## _ ## S ## _ ## P ## _ ## H ## _ ## C \
561 (#T "_" #Z "_" #S "_" #P "_" #H "_" #C) 605 (#T "_" #Z "_" #S "_" #P "_" #H "_" #C)
562 606
607#if 0
608
609 TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \
610 TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \
611 TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \
612 TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \
613
614#endif
615
563#define TESTS(SKIP) \ 616#define TESTS(SKIP) \
617 TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \
618 TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \
619 TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \
620 TEST(uint32_t, 9, SKIP, plain, hhash, 3)
621
622#define TESTS_ALL(SKIP) \
564 TEST(uint32_t, 3, SKIP, plain, hhash, 0); \ 623 TEST(uint32_t, 3, SKIP, plain, hhash, 0); \
565 TEST(uint32_t, 3, SKIP, plain, hhash, 1); \ 624 TEST(uint32_t, 3, SKIP, plain, hhash, 1); \
566 TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \ 625 TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \
567 TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \ 626 TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \
627 TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \
628 TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \
568 TEST(uint32_t, 5, SKIP, plain, hhash, 0); \ 629 TEST(uint32_t, 5, SKIP, plain, hhash, 0); \
569 TEST(uint32_t, 5, SKIP, plain, hhash, 1); \ 630 TEST(uint32_t, 5, SKIP, plain, hhash, 1); \
570 TEST(uint32_t, 8, SKIP, plain, hhash, 0); \ 631 TEST(uint32_t, 8, SKIP, plain, hhash, 0); \
571 TEST(uint32_t, 8, SKIP, plain, hhash, 1); \ 632 TEST(uint32_t, 8, SKIP, plain, hhash, 1); \
572 TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \ 633 TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \
573 TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \ 634 TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \
635 TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \
636 TEST(uint32_t, 9, SKIP, plain, hhash, 3); /* x */ \
574 TEST(uint32_t, 11, SKIP, plain, hhash, 0); /* x */ \ 637 TEST(uint32_t, 11, SKIP, plain, hhash, 0); /* x */ \
575 TEST(uint32_t, 11, SKIP, plain, hhash, 1); /* x */ \ 638 TEST(uint32_t, 11, SKIP, plain, hhash, 1); /* x */ \
576 TEST(uint32_t, 13, SKIP, plain, hhash, 0); \ 639 TEST(uint32_t, 13, SKIP, plain, hhash, 0); \
@@ -584,24 +647,22 @@ int main(int argc, char** argv) {
584 TEST(uint32_t, 34, SKIP, plain, hhash, 0); \ 647 TEST(uint32_t, 34, SKIP, plain, hhash, 0); \
585 TEST(uint32_t, 34, SKIP, plain, hhash, 1); \ 648 TEST(uint32_t, 34, SKIP, plain, hhash, 1); \
586 TEST(uint32_t, 55, SKIP, plain, hhash, 0); \ 649 TEST(uint32_t, 55, SKIP, plain, hhash, 0); \
587 TEST(uint32_t, 55, SKIP, plain, hhash, 1); \ 650 TEST(uint32_t, 55, SKIP, plain, hhash, 1)
588 TEST(uint32_t, 89, SKIP, plain, hhash, 0); \
589 TEST(uint32_t, 89, SKIP, plain, hhash, 1)
590 651
591 TESTS(1); // * 652 TESTS(1); // *
592 TESTS(2); // * 653// TESTS(2); // *
593 TESTS(3); // * 654// TESTS(3); // *
594 TESTS(5); // * 655// TESTS(5); // *
595 TESTS(8); // * 656// TESTS(8); // *
596 TESTS(9); 657// TESTS(9);
597 TESTS(11); 658// TESTS(11);
598 TESTS(13); // * 659// TESTS(13); // *
599 TESTS(15); 660 TESTS(15);
600 TESTS(16); 661// TESTS(16);
601 TESTS(21); // * 662// TESTS(21); // *
602 TESTS(34); // * 663// TESTS(34); // *
603 TESTS(55); // * 664// TESTS(55); // *
604 TESTS(89); // * 665// TESTS(89); // *
605 666
606 for (i = 1; i < argc; i++) { 667 for (i = 1; i < argc; i++) {
607 if ((ret = read_whole_file(argv[i], 668 if ((ret = read_whole_file(argv[i],
@@ -614,7 +675,7 @@ int main(int argc, char** argv) {
614 argv[i], buf_len); 675 argv[i], buf_len);
615 676
616 double min_time = -1.0; 677 double min_time = -1.0;
617 double min_uniqueness = 0.0; 678 double min_compression = 0.0;
618 679
619 for (vector<test_result_base*>::iterator i = all_tests.begin(); 680 for (vector<test_result_base*>::iterator i = all_tests.begin();
620 i != all_tests.end(); ++i) { 681 i != all_tests.end(); ++i) {
@@ -633,7 +694,7 @@ int main(int argc, char** argv) {
633 test->stat(); 694 test->stat();
634 695
635 if (min_time < 0.0) { 696 if (min_time < 0.0) {
636 min_uniqueness = test->uniqueness(); 697 min_compression = test->compression();
637 min_time = test->time(); 698 min_time = test->time();
638 } 699 }
639 700
@@ -641,26 +702,26 @@ int main(int argc, char** argv) {
641 min_time = test->time(); 702 min_time = test->time();
642 } 703 }
643 704
644 if (min_uniqueness > test->uniqueness()) { 705 if (min_compression > test->compression()) {
645 min_uniqueness = test->uniqueness(); 706 min_compression = test->compression();
646 } 707 }
647 708
648 test->print(); 709 test->print();
649 } 710 }
650 711
651 for (vector<test_result_base*>::iterator i = all_tests.begin(); 712// for (vector<test_result_base*>::iterator i = all_tests.begin();
652 i != all_tests.end(); ++i) { 713// i != all_tests.end(); ++i) {
653 test_result_base *test = *i; 714// test_result_base *test = *i;
654 test->set_score(min_uniqueness, min_time); 715// test->set_score(min_compression, min_time);
655 } 716// }
656 717
657 sort(all_tests.begin(), all_tests.end(), compare_h()); 718// sort(all_tests.begin(), all_tests.end(), compare_h());
658 719
659 for (vector<test_result_base*>::iterator i = all_tests.begin(); 720// for (vector<test_result_base*>::iterator i = all_tests.begin();
660 i != all_tests.end(); ++i) { 721// i != all_tests.end(); ++i) {
661 test_result_base *test = *i; 722// test_result_base *test = *i;
662 test->print_accum(); 723// test->print();
663 } 724// }
664 725
665 free(buf); 726 free(buf);
666 buf = NULL; 727 buf = NULL;