summaryrefslogtreecommitdiff
path: root/xdelta3/testing/checksum_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'xdelta3/testing/checksum_test.cc')
-rw-r--r--xdelta3/testing/checksum_test.cc230
1 files changed, 175 insertions, 55 deletions
diff --git a/xdelta3/testing/checksum_test.cc b/xdelta3/testing/checksum_test.cc
index b9a4b0b..353d705 100644
--- a/xdelta3/testing/checksum_test.cc
+++ b/xdelta3/testing/checksum_test.cc
@@ -7,6 +7,16 @@
7#include <map> 7#include <map>
8#include <algorithm> 8#include <algorithm>
9 9
10extern "C" {
11uint32_t xd3_large32_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
12uint32_t xd3_large32_cksum_update_old (xd3_hash_cfg *cfg, uint32_t cksum,
13 const uint8_t *base, const usize_t look);
14
15uint64_t xd3_large64_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
16uint64_t xd3_large64_cksum_update_old (xd3_hash_cfg *cfg, uint64_t cksum,
17 const uint8_t *base, const usize_t look);
18}
19
10using std::list; 20using std::list;
11using std::map; 21using std::map;
12using std::vector; 22using std::vector;
@@ -24,6 +34,14 @@ uint64_t good_64bit_values[] = {
24 7664345821815920749ULL, // ... 34 7664345821815920749ULL, // ...
25}; 35};
26 36
37void print_header() {
38 static int hdr_cnt = 0;
39 if (hdr_cnt++ % 20 == 0) {
40 printf("Name\t\t\t\tConf\t\tCount\tUniq\tFull\tCover\tColls"
41 "\tMB/s\tIters\t#Colls\n");
42 }
43}
44
27struct true_type { }; 45struct true_type { };
28struct false_type { }; 46struct false_type { };
29 47
@@ -51,13 +69,6 @@ struct hhash { // shift "s" bits leaving the high bits as a hash value for
51}; 69};
52 70
53template <typename Word> 71template <typename Word>
54struct nonhash {
55 Word operator()(const Word t, const Word h, const Word mask) {
56 return (t >> h) & mask;
57 }
58};
59
60template <typename Word>
61Word good_word(); 72Word good_word();
62 73
63template<> 74template<>
@@ -92,14 +103,18 @@ struct cksum_params {
92MEMBER 103MEMBER
93struct rabin_karp : public cksum_params<SELF> { 104struct rabin_karp : public cksum_params<SELF> {
94 // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ... 105 // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ...
95 rabin_karp() { 106 rabin_karp()
96 multiplier = good_word<Word>(); 107 : powers(make_powers()),
97 powers = new Word[CksumSize]; 108 product(powers[0] * good_word<Word>()),
98 powers[CksumSize - 1] = 1; 109 incr_state(0) { }
110
111 static Word* make_powers() {
112 Word *p = new Word[CksumSize];
113 p[CksumSize - 1] = 1;
99 for (int i = CksumSize - 2; i >= 0; i--) { 114 for (int i = CksumSize - 2; i >= 0; i--) {
100 powers[i] = powers[i + 1] * multiplier; 115 p[i] = p[i + 1] * good_word<Word>();
101 } 116 }
102 product = powers[0] * multiplier; 117 return p;
103 } 118 }
104 119
105 ~rabin_karp() { 120 ~rabin_karp() {
@@ -120,21 +135,69 @@ struct rabin_karp : public cksum_params<SELF> {
120 } 135 }
121 136
122 Word incr(const uint8_t *ptr) { 137 Word incr(const uint8_t *ptr) {
123 incr_state = multiplier * incr_state - 138 incr_state = good_word<Word>() * incr_state -
124 product * (ptr[-1]) + (ptr[CksumSize - 1]); 139 product * (ptr[-1]) + (ptr[CksumSize - 1]);
125 return incr_state; 140 return incr_state;
126 } 141 }
127 142
128 Word *powers; 143 const Word *const powers;
129 Word product; 144 const Word product;
130 Word multiplier; 145 Word incr_state;
131 Word incr_state; 146};
147
148MEMBER
149struct with_stream : public cksum_params<SELF> {
150 xd3_stream stream;
151
152 with_stream()
153 {
154 xd3_config cfg;
155 memset (&stream, 0, sizeof (stream));
156 xd3_init_config (&cfg, 0);
157 cfg.smatch_cfg = XD3_SMATCH_SOFT;
158 cfg.smatcher_soft.large_look = CksumSize;
159 cfg.smatcher_soft.large_step = CksumSkip;
160 cfg.smatcher_soft.small_look = 4;
161 cfg.smatcher_soft.small_chain = 4;
162 cfg.smatcher_soft.small_lchain = 4;
163 cfg.smatcher_soft.max_lazy = 4;
164 cfg.smatcher_soft.long_enough = 4;
165 CHECK_EQ(0, xd3_config_stream (&stream, &cfg));
166
167 CHECK_EQ(0, xd3_size_hashtable (&stream,
168 1<<10 /* ignored */,
169 stream.smatcher.large_look,
170 & stream.large_hash));
171 }
172 ~with_stream()
173 {
174 xd3_free_stream (&stream);
175 }
176};
177
178MEMBER
179struct large64_cksum : public with_stream<SELF> {
180 Word step(const uint8_t *ptr) {
181 return xd3_large64_cksum (&this->stream.large_hash, ptr, CksumSize);
182 }
183
184 Word state0(const uint8_t *ptr) {
185 incr_state = step(ptr);
186 return incr_state;
187 }
188
189 Word incr(const uint8_t *ptr) {
190 incr_state = xd3_large64_cksum_update (&this->stream.large_hash, incr_state, ptr - 1, CksumSize);
191 return incr_state;
192 }
193
194 Word incr_state;
132}; 195};
133 196
134MEMBER 197MEMBER
135struct large_cksum : public cksum_params<SELF> { 198struct large64_cksum_old : public with_stream<SELF> {
136 Word step(const uint8_t *ptr) { 199 Word step(const uint8_t *ptr) {
137 return xd3_large_cksum (ptr, CksumSize); 200 return xd3_large64_cksum_old (&this->stream.large_hash, ptr, CksumSize);
138 } 201 }
139 202
140 Word state0(const uint8_t *ptr) { 203 Word state0(const uint8_t *ptr) {
@@ -143,13 +206,67 @@ struct large_cksum : public cksum_params<SELF> {
143 } 206 }
144 207
145 Word incr(const uint8_t *ptr) { 208 Word incr(const uint8_t *ptr) {
146 incr_state = xd3_large_cksum_update (incr_state, ptr - 1, CksumSize); 209 incr_state = xd3_large64_cksum_update_old (&this->stream.large_hash, incr_state, ptr - 1, CksumSize);
147 return incr_state; 210 return incr_state;
148 } 211 }
149 212
150 Word incr_state; 213 Word incr_state;
151}; 214};
152 215
216MEMBER
217struct large32_cksum : public with_stream<SELF> {
218 Word step(const uint8_t *ptr) {
219 return xd3_large32_cksum (&this->stream.large_hash, ptr, CksumSize);
220 }
221
222 Word state0(const uint8_t *ptr) {
223 incr_state = step(ptr);
224 return incr_state;
225 }
226
227 Word incr(const uint8_t *ptr) {
228 incr_state = xd3_large32_cksum_update (&this->stream.large_hash, incr_state, ptr - 1, CksumSize);
229 return incr_state;
230 }
231
232 Word incr_state;
233};
234
235MEMBER
236struct large32_cksum_old : public with_stream<SELF> {
237 Word step(const uint8_t *ptr) {
238 return xd3_large32_cksum_old (&this->stream.large_hash, ptr, CksumSize);
239 }
240
241 Word state0(const uint8_t *ptr) {
242 incr_state = step(ptr);
243 return incr_state;
244 }
245
246 Word incr(const uint8_t *ptr) {
247 incr_state = xd3_large32_cksum_update_old (&this->stream.large_hash, incr_state, ptr - 1, CksumSize);
248 return incr_state;
249 }
250
251 Word incr_state;
252};
253
254MEMBER
255struct large_cksum
256 : std::conditional<std::is_same<Word, uint32_t>::value,
257 large32_cksum<SELF>,
258 large64_cksum<SELF>>::type
259{
260};
261
262MEMBER
263struct large_cksum_old
264 : std::conditional<std::is_same<Word, uint32_t>::value,
265 large32_cksum_old<SELF>,
266 large64_cksum_old<SELF>>::type
267{
268};
269
153// TESTS 270// TESTS
154 271
155template <typename Word> 272template <typename Word>
@@ -193,13 +310,21 @@ struct file_stats {
193 310
194 ptr_list &pl = table[word]; 311 ptr_list &pl = table[word];
195 312
313 int collisions = 0;
196 for (ptr_iterator p_i = pl.begin(); 314 for (ptr_iterator p_i = pl.begin();
197 p_i != pl.end(); 315 p_i != pl.end();
198 ++p_i) { 316 ++p_i) {
199 if (memcmp(*p_i, ptr, cksum_size) == 0) { 317 if (memcmp(*p_i, ptr, cksum_size) == 0) {
200 return; 318 return;
201 } 319 }
320 collisions++;
202 } 321 }
322 if (collisions >= 10000)
323 {
324 fprintf(stderr, "Something is not right, lots of collisions=%d\n",
325 collisions);
326 abort();
327 }
203 328
204 unique++; 329 unique++;
205 pl.push_back(ptr); 330 pl.push_back(ptr);
@@ -320,7 +445,7 @@ struct test_result : public test_result_base {
320 /* Fraction of distinct strings of length cksum_size which are not 445 /* Fraction of distinct strings of length cksum_size which are not
321 * represented in the hash table. */ 446 * represented in the hash table. */
322 double collisions() { 447 double collisions() {
323 return (fstats.unique - fstats.unique_values) / fstats.unique; 448 return (fstats.unique - fstats.unique_values) / (double) fstats.unique;
324 } 449 }
325 usize_t colls() { 450 usize_t colls() {
326 return (fstats.unique - fstats.unique_values); 451 return (fstats.unique - fstats.unique_values);
@@ -375,13 +500,8 @@ struct test_result : public test_result_base {
375 fprintf(stderr, "internal error: %d != %d\n", fstats.count, count()); 500 fprintf(stderr, "internal error: %d != %d\n", fstats.count, count());
376 abort(); 501 abort();
377 } 502 }
378 { 503 print_header();
379 static int hdr_cnt = 0; 504 printf("%s\t%d/%d 2^%d\t%u\t%0.4f\t%.4f\t%.4f\t%.1e\t%.2f\t%u\t%u\n",
380 if (hdr_cnt++ % 20 == 0) {
381 printf("Name\tConf\tCount\tUniq\tFull\tColl\tCov\tMB/s\tIters\n");
382 }
383 }
384 printf("%s\t%d/%d/1<<%d\t%u\t%0.2f\t%.2f\t%.2f\t%.2f\t%.4f\t%u\n",
385 test_name, 505 test_name,
386 Checksum::cksum_size, 506 Checksum::cksum_size,
387 Checksum::cksum_skip, 507 Checksum::cksum_skip,
@@ -389,10 +509,11 @@ struct test_result : public test_result_base {
389 count(), 509 count(),
390 uniqueness(), 510 uniqueness(),
391 fullness(), 511 fullness(),
392 collisions(),
393 coverage(), 512 coverage(),
513 collisions(),
394 0.001 * accum_iters * test_size / accum_millis, 514 0.001 * accum_iters * test_size / accum_millis,
395 accum_iters); 515 accum_iters,
516 colls());
396 } 517 }
397 518
398 usize_t size_log2 (usize_t slots) { 519 usize_t size_log2 (usize_t slots) {
@@ -445,8 +566,8 @@ struct test_result : public test_result_base {
445 const uint8_t *ptr; 566 const uint8_t *ptr;
446 const uint8_t *end; 567 const uint8_t *end;
447 usize_t periods; 568 usize_t periods;
448 int last_offset; 569 int64_t last_offset;
449 int stop; 570 int64_t stop;
450 571
451 test_size = buf_size; 572 test_size = buf_size;
452 last_offset = buf_size - Checksum::cksum_size; 573 last_offset = buf_size - Checksum::cksum_size;
@@ -472,7 +593,7 @@ struct test_result : public test_result_base {
472 } else { 593 } else {
473 ptr = buf + last_offset; 594 ptr = buf + last_offset;
474 end = buf + stop; 595 end = buf + stop;
475 596
476 for (; ptr != end; ptr -= Checksum::cksum_skip) { 597 for (; ptr != end; ptr -= Checksum::cksum_skip) {
477 fstats.update(hash(cksum.step(ptr), s_bits, s_mask), ptr); 598 fstats.update(hash(cksum.step(ptr), s_bits, s_mask), ptr);
478 } 599 }
@@ -503,7 +624,6 @@ struct test_result : public test_result_base {
503 } 624 }
504 625
505 if (Checksum::cksum_skip == 1) { 626 if (Checksum::cksum_skip == 1) {
506
507 new_table(n_incrs); 627 new_table(n_incrs);
508 628
509 for (usize_t i = 0; i < test_iters; i++) { 629 for (usize_t i = 0; i < test_iters; i++) {
@@ -516,7 +636,7 @@ struct test_result : public test_result_base {
516 636
517 for (; ptr != end; ptr++) { 637 for (; ptr != end; ptr++) {
518 typename Checksum::word_type w = cksum.incr(ptr); 638 typename Checksum::word_type w = cksum.incr(ptr);
519 assert(w == cksum.step(ptr)); 639 CHECK_EQ(w, cksum.step(ptr));
520 set_table_bit(hash(w, s_bits, s_mask)); 640 set_table_bit(hash(w, s_bits, s_mask));
521 } 641 }
522 } 642 }
@@ -586,30 +706,30 @@ int main(int argc, char** argv) {
586 return 1; 706 return 1;
587 } 707 }
588 708
589#define TEST(T,Z,S,H,C) \ 709#define TEST(T,Z,S,C) \
590 test_result<rabin_karp<T,Z,S,H<T>,C>> \ 710 test_result<rabin_karp<T,Z,S,hhash<T>,C>> \
591 _rka_ ## T ## _ ## Z ## _ ## S ## _ ## H ## _ ## C \ 711 _rka_ ## T ## _ ## Z ## _ ## S ## _ ## C \
592 ("rka_" #T "_" #Z "_" #S "_" #H "_" #C); \ 712 ("rka_" #T "_" #Z "_" #S "_" #C); \
593 test_result<large_cksum<T,Z,S,H<T>,C>> \ 713 test_result<large_cksum<T,Z,S,hhash<T>,C>> \
594 _xck_ ## T ## _ ## Z ## _ ## S ## _ ## H ## _ ## C \ 714 _xck_ ## T ## _ ## Z ## _ ## S ## _ ## C \
595 ("xck_" #T "_" #Z "_" #S "_" #H "_" #C) 715 ("xck_" #T "_" #Z "_" #S "_" #C); \
596 716 test_result<large_cksum_old<T,Z,S,hhash<T>,C>> \
717 _old_ ## T ## _ ## Z ## _ ## S ## _ ## C \
718 ("old_" #T "_" #Z "_" #S "_" #C)
597 719
598#define TESTS(SIZE, SKIP) \ 720#define TESTS(SIZE, SKIP) \
599 TEST(uint32_t, SIZE, SKIP, nonhash, 1); \ 721 TEST(uint32_t, SIZE, SKIP, 1); \
600 TEST(uint32_t, SIZE, SKIP, nonhash, 2); \ 722 TEST(uint32_t, SIZE, SKIP, 2); \
601 TEST(uint32_t, SIZE, SKIP, hhash, 1); \ 723 TEST(uint64_t, SIZE, SKIP, 1); \
602 TEST(uint32_t, SIZE, SKIP, hhash, 2); \ 724 TEST(uint64_t, SIZE, SKIP, 2)
603 TEST(uint64_t, SIZE, SKIP, nonhash, 1); \
604 TEST(uint64_t, SIZE, SKIP, nonhash, 2); \
605 TEST(uint64_t, SIZE, SKIP, hhash, 1); \
606 TEST(uint64_t, SIZE, SKIP, hhash, 2)
607 725
726 TESTS(9, 1);
608 TESTS(9, 9); 727 TESTS(9, 9);
609 // TESTS(9, 15); 728 TESTS(15, 1);
610 TESTS(15, 15); 729 TESTS(15, 15);
730 TESTS(127, 1);
611 TESTS(127, 127); 731 TESTS(127, 127);
612 // TESTS(127, 211); 732 TESTS(211, 1);
613 TESTS(211, 211); 733 TESTS(211, 211);
614 734
615 for (i = 1; i < argc; i++) { 735 for (i = 1; i < argc; i++) {
@@ -630,7 +750,7 @@ int main(int argc, char** argv) {
630 test_result_base *test = *iter; 750 test_result_base *test = *iter;
631 test->reset(); 751 test->reset();
632 752
633 usize_t iters = 100; 753 usize_t iters = 200;
634 long start_test = get_millisecs_now(); 754 long start_test = get_millisecs_now();
635 755
636 do { 756 do {