summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xxdelta3/examples/Makefile2
-rw-r--r--xdelta3/examples/checksum_test.cc417
2 files changed, 284 insertions, 135 deletions
diff --git a/xdelta3/examples/Makefile b/xdelta3/examples/Makefile
index abdb6be..f1caa57 100755
--- a/xdelta3/examples/Makefile
+++ b/xdelta3/examples/Makefile
@@ -1,5 +1,5 @@
1#CFLAGS = -g -Wall -I.. -DXD3_DEBUG=1 1#CFLAGS = -g -Wall -I.. -DXD3_DEBUG=1
2CFLAGS = -O3 -Wall -I.. -DXD3_DEBUG=0 -fno-builtin 2CFLAGS = -O3 -Wall -I.. -DXD3_DEBUG=0 -fno-builtin -DNDEBUG=1
3# -pg 3# -pg
4 4
5SOURCES = small_page_test.c encode_decode_test.c speed_test.c 5SOURCES = small_page_test.c encode_decode_test.c speed_test.c
diff --git a/xdelta3/examples/checksum_test.cc b/xdelta3/examples/checksum_test.cc
index a48c6ef..6a696cf 100644
--- a/xdelta3/examples/checksum_test.cc
+++ b/xdelta3/examples/checksum_test.cc
@@ -11,26 +11,22 @@ extern "C" {
11using std::list; 11using std::list;
12using std::map; 12using std::map;
13 13
14// Need gcc4
15// template <typename T, int TestCklen>
16// struct cksum_params {
17// typedef T cksum_type;
18// enum { test_cklen = TestCklen };
19// };
20
21
22// MLCG parameters 14// MLCG parameters
15// a, a*
23uint32_t good_32bit_values[] = { 16uint32_t good_32bit_values[] = {
24// 741103597U, 887987685U, 17 1597334677U, // ...
25 1597334677U, 18 741103597U, 887987685U,
26}; 19};
27 20
28// a, a* 21// a, a*
29uint64_t good_64bit_values[] = { 22uint64_t good_64bit_values[] = {
30// 1181783497276652981ULL, 4292484099903637661ULL, 23 1181783497276652981ULL, 4292484099903637661ULL,
31 7664345821815920749ULL, 24 7664345821815920749ULL, // ...
32}; 25};
33 26
27struct true_type { };
28struct false_type { };
29
34template <typename Word> 30template <typename Word>
35int bitsof(); 31int bitsof();
36 32
@@ -44,7 +40,7 @@ int bitsof<uint64_t>() {
44 return 64; 40 return 64;
45} 41}
46 42
47struct noperm { 43struct no_permute {
48 int operator()(const uint8_t &c) { 44 int operator()(const uint8_t &c) {
49 return c; 45 return c;
50 } 46 }
@@ -57,6 +53,13 @@ struct permute {
57}; 53};
58 54
59template <typename Word> 55template <typename Word>
56struct both {
57 Word operator()(const Word& t, const int &bits, const int &mask) {
58 return (t >> bits) ^ (t & mask);
59 }
60};
61
62template <typename Word>
60Word good_word(); 63Word good_word();
61 64
62template<> 65template<>
@@ -87,15 +90,38 @@ size_log2 (int slots)
87 return bits; 90 return bits;
88} 91}
89 92
90template <typename Word, int CksumSize, int CksumSkip, typename Permute> 93// CLASSES
94
95#define SELF Word, CksumSize, CksumSkip, Permute, Hash
96#define MEMBER template <typename Word, \
97 int CksumSize, \
98 int CksumSkip, \
99 typename Permute, \
100 typename Hash>
101
102MEMBER
103struct cksum_params {
104 typedef Word word_type;
105 typedef Permute permute_type;
106 typedef Hash hash_type;
107
108 enum { cksum_size = CksumSize,
109 cksum_skip = CksumSkip,
110 };
111};
112
113
114MEMBER
91struct rabin_karp { 115struct rabin_karp {
92 typedef Word word_type; 116 typedef Word word_type;
93 typedef Permute permute_type; 117 typedef Permute permute_type;
118 typedef Hash hash_type;
94 119
95 enum { cksum_size = CksumSize, 120 enum { cksum_size = CksumSize,
96 cksum_skip = CksumSkip, 121 cksum_skip = CksumSkip,
97 }; 122 };
98 123
124 // In this code, we use (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ...
99 rabin_karp() { 125 rabin_karp() {
100 multiplier = good_word<Word>(); 126 multiplier = good_word<Word>();
101 powers = new Word[cksum_size]; 127 powers = new Word[cksum_size];
@@ -106,6 +132,10 @@ struct rabin_karp {
106 product = powers[0] * multiplier; 132 product = powers[0] * multiplier;
107 } 133 }
108 134
135 ~rabin_karp() {
136 delete [] powers;
137 }
138
109 Word step(const uint8_t *ptr) { 139 Word step(const uint8_t *ptr) {
110 Word h = 0; 140 Word h = 0;
111 for (int i = 0; i < cksum_size; i++) { 141 for (int i = 0; i < cksum_size; i++) {
@@ -115,21 +145,75 @@ struct rabin_karp {
115 } 145 }
116 146
117 Word state0(const uint8_t *ptr) { 147 Word state0(const uint8_t *ptr) {
118 state = step(ptr); 148 incr_state = step(ptr);
119 return state; 149 return incr_state;
120 } 150 }
121 151
122 Word incr(const uint8_t *ptr) { 152 Word incr(const uint8_t *ptr) {
123 state = state - 153 incr_state = multiplier * incr_state -
124 product * permute_type()(ptr[-1]) + 154 product * permute_type()(ptr[-1]) +
125 permute_type()(ptr[cksum_size - 1]); 155 permute_type()(ptr[cksum_size - 1]);
126 return state; 156 return incr_state;
127 } 157 }
128 158
129 Word *powers; 159 Word *powers;
130 Word product; 160 Word product;
131 Word multiplier; 161 Word multiplier;
132 Word state; 162 Word incr_state;
163};
164
165// TESTS
166
167template <typename Word>
168struct file_stats {
169 typedef list<const uint8_t*> ptr_list;
170 typedef Word word_type;
171 typedef map<word_type, ptr_list> table_type;
172 typedef typename table_type::iterator table_iterator;
173 typedef typename ptr_list::iterator ptr_iterator;
174
175 int cksum_size;
176 int cksum_skip;
177 int unique;
178 int unique_values;
179 int count;
180 table_type table;
181
182 file_stats(int size, int skip)
183 : cksum_size(size),
184 cksum_skip(skip),
185 unique(0),
186 unique_values(0),
187 count(0) {
188 }
189
190 void update(const word_type &word, const uint8_t *ptr) {
191 table_iterator t_i = table.find(word);
192
193 count++;
194
195 if (t_i == table.end()) {
196 table.insert(make_pair(word, ptr_list()));
197 }
198
199 ptr_list &pl = table[word];
200
201 for (ptr_iterator p_i = pl.begin();
202 p_i != pl.end();
203 ++p_i) {
204 if (memcmp(*p_i, ptr, cksum_size) == 0) {
205 return;
206 }
207 }
208
209 unique++;
210 pl.push_back(ptr);
211 }
212
213 void freeze() {
214 table.clear();
215 unique_values = table.size();
216 }
133}; 217};
134 218
135struct test_result_base; 219struct test_result_base;
@@ -139,126 +223,202 @@ static list<test_result_base*> all_tests;
139struct test_result_base { 223struct test_result_base {
140 virtual ~test_result_base() { 224 virtual ~test_result_base() {
141 } 225 }
226 virtual void reset() = 0;
142 virtual void print() = 0; 227 virtual void print() = 0;
143 virtual void get(const uint8_t* buf, const int buf_size) = 0; 228 virtual void get(const uint8_t* buf, const int buf_size, int iters) = 0;
144}; 229};
145 230
146template<typename T> 231MEMBER
147struct test_result : public test_result_base { 232struct test_result : public test_result_base {
233 typedef Word word_type;
234 typedef Permute permute_type;
235 typedef Hash hash_type;
236
237 enum { cksum_size = CksumSize,
238 cksum_skip = CksumSkip,
239 };
240
241 const char *test_name;
242 file_stats<Word> fstats;
243 int test_size;
244 int test_iters;
148 int n_steps; 245 int n_steps;
149 int n_incrs; 246 int n_incrs;
150 int s_bits; 247 int s_bits;
151 int s_mask; 248 int s_mask;
152 int h_bits;
153 int t_entries; 249 int t_entries;
154 double step_fill; 250 int h_bits;
155 double incr_fill; 251 double h_fill;
156 long start_step, end_step; 252 double l_fill;
157 long start_incr, end_incr; 253 char *hash_table;
158 254 long start_test, end_test;
159 test_result() { 255
256 test_result(const char *name)
257 : test_name(name),
258 fstats(cksum_size, cksum_skip),
259 hash_table(NULL) {
160 all_tests.push_back(this); 260 all_tests.push_back(this);
161 } 261 }
162 262
263 ~test_result() {
264 reset();
265 }
266
267 void reset() {
268 test_size = -1;
269 test_iters = -1;
270 n_steps = -1;
271 n_incrs = -1;
272 s_bits = -1;
273 s_mask = -1;
274 t_entries = -1;
275 h_bits = -1;
276 h_fill = 0.0;
277 l_fill = 0.0;
278 if (hash_table) {
279 delete(hash_table);
280 hash_table = NULL;
281 }
282 }
283
284 int count() {
285 if (cksum_skip == 1) {
286 return n_incrs;
287 } else {
288 return n_steps;
289 }
290 }
291
163 void print() { 292 void print() {
164 fprintf(stderr, 293
165 "cksum size %u: skip %u: %u steps: %u incrs: " 294 printf("%s: (%u#%u) count %u dups %0.2f%% coll %0.2f%% fill %0.2f%% heff %0.2f%% %.4f MB/s\n",
166 "s_fill %0.2f%% i_fill %0.2f%%\n", 295 test_name,
167 T::cksum_size, 296 cksum_size,
168 T::cksum_skip, 297 cksum_skip,
169 n_steps, 298 count(),
170 n_incrs, 299 100.0 * (fstats.count - fstats.unique) / fstats.count,
171 100.0 * step_fill, 300 100.0 * (fstats.unique - fstats.unique_values) / fstats.unique,
172 100.0 * incr_fill); 301 100.0 * h_fill,
302 100.0 * l_fill,
303 0.001 * test_iters * test_size / (end_test - start_test));
173 } 304 }
174 305
175 int* new_table(int entries) { 306 void new_table(int entries) {
176 t_entries = entries; 307 t_entries = entries;
177 h_bits = size_log2(entries); 308 h_bits = size_log2(entries);
178 s_bits = bitsof<typename T::word_type>() - h_bits; 309 s_bits = bitsof<word_type>() - h_bits;
179 s_mask = (1 << h_bits) - 1; 310 s_mask = (1 << h_bits) - 1;
180 311
181 int n = 1 << h_bits; 312 int n = 1 << h_bits;
182 int *t = new int[n]; 313 hash_table = new char[n / 8];
183 memset(t, 0, sizeof(int) * n); 314 memset(hash_table, 0, n / 8);
184 return t;
185 } 315 }
186 316
187 double summarize_table(int* table) { 317 int get_table_bit(int i) {
318 return hash_table[i/8] & (1 << i%8);
319 }
320
321 int set_table_bit(int i) {
322 return hash_table[i/8] |= (1 << i%8);
323 }
324
325 void summarize_table() {
188 int n = 1 << h_bits; 326 int n = 1 << h_bits;
189 int f = 0; 327 int f = 0;
190 for (int i = 0; i < n; i++) { 328 for (int i = 0; i < n; i++) {
191 if (table[i] != 0) { 329 if (get_table_bit(i)) {
192 f++; 330 f++;
193 } 331 }
194 } 332 }
195 delete [] table; 333 h_fill = (double) f / (double) t_entries;
196 return (double) f / (double) t_entries; 334 l_fill = (double) f / (double) fstats.unique;
197 } 335 }
198 336
199 void get(const uint8_t* buf, const int buf_size) { 337 void get(const uint8_t* buf, const int buf_size, int iters) {
338 rabin_karp<SELF> test;
339 hash_type hash;
200 const uint8_t *ptr; 340 const uint8_t *ptr;
201 const uint8_t *end; 341 const uint8_t *end;
202 int last_offset; 342 int last_offset;
203 int periods; 343 int periods;
204 int stop; 344 int stop;
205 int *hash_table;
206 345
207 last_offset = buf_size - T::cksum_size; 346 test_size = buf_size;
347 test_iters = iters;
348 last_offset = buf_size - cksum_size;
208 349
209 if (last_offset < 0) { 350 if (last_offset < 0) {
210 periods = 0; 351 periods = 0;
211 n_steps = 0; 352 n_steps = 0;
212 n_incrs = 0; 353 n_incrs = 0;
213 stop = -T::cksum_size; 354 stop = -cksum_size;
214 } else { 355 } else {
215 periods = last_offset / T::cksum_skip; 356 periods = last_offset / cksum_skip;
216 n_steps = periods; 357 n_steps = periods + 1;
217 n_incrs = last_offset; 358 n_incrs = last_offset;
218 stop = last_offset - (periods + 1) * T::cksum_skip; 359 stop = last_offset - (periods + 1) * cksum_skip;
219 } 360 }
220 361
221 hash_table = new_table(n_steps); 362 // Compute file stats
222 363 if (cksum_skip == 1) {
223 start_step = get_millisecs_now(); 364 for (int i = 0; i <= buf_size - cksum_size; i++) {
365 fstats.update(hash(test.step(buf + i), s_bits, s_mask), buf + i);
366 }
367 } else {
368 ptr = buf + last_offset;
369 end = buf + stop;
370
371 for (; ptr != end; ptr -= cksum_skip) {
372 fstats.update(hash(test.step(ptr), s_bits, s_mask), ptr);
373 }
374 }
375 fstats.freeze();
224 376
225 ptr = buf + last_offset; 377 start_test = get_millisecs_now();
226 end = buf + stop;
227 378
228 T t; 379 if (cksum_skip != 1) {
229 typename T::word_type w; 380 new_table(n_steps);
230 381
231 for (; ptr != end; ptr -= T::cksum_skip) { 382 for (int i = 0; i < iters; i++) {
232 w = t.step(ptr); 383 ptr = buf + last_offset;
233 ++hash_table[(w >> s_bits) ^ (w & s_mask)]; 384 end = buf + stop;
234 }
235 385
236 end_step = get_millisecs_now(); 386 for (; ptr != end; ptr -= cksum_skip) {
387 set_table_bit(hash(test.step(ptr), s_bits, s_mask));
388 }
389 }
237 390
238 step_fill = summarize_table(hash_table); 391 summarize_table();
239 hash_table = new_table(n_incrs); 392 }
240 393
241 stop = buf_size - T::cksum_size + 1; 394 stop = buf_size - cksum_size + 1;
242 if (stop < 0) { 395 if (stop < 0) {
243 stop = 0; 396 stop = 0;
244 } 397 }
245 398
246 start_incr = end_step; 399 if (cksum_skip == 1) {
247 400
248 ptr = buf; 401 new_table(n_incrs);
249 end = buf + stop;
250 if (ptr != end) {
251 w = t.state0(ptr++);
252 ++hash_table[(w >> s_bits) ^ (w & s_mask)];
253 }
254 for (; ptr != end; ptr++) {
255 w = t.incr(ptr);
256 ++hash_table[(w >> s_bits) ^ (w & s_mask)];
257 }
258 402
259 end_incr = get_millisecs_now(); 403 for (int i = 0; i < iters; i++) {
404 ptr = buf;
405 end = buf + stop;
260 406
261 incr_fill = summarize_table(hash_table); 407 if (ptr != end) {
408 set_table_bit(hash(test.state0(ptr++), s_bits, s_mask));
409 }
410
411 for (; ptr != end; ptr++) {
412 Word w = test.incr(ptr);
413 assert(w == test.step(ptr));
414 set_table_bit(hash(w, s_bits, s_mask));
415 }
416 }
417
418 summarize_table();
419 }
420
421 end_test = get_millisecs_now();
262 } 422 }
263}; 423};
264 424
@@ -273,57 +433,41 @@ int main(int argc, char** argv) {
273 return 1; 433 return 1;
274 } 434 }
275 435
276 test_result<rabin_karp<uint32_t, 4, 1, noperm> > small_1_cksum; 436#define TEST(T,Z,S,P,H) test_result<T,Z,S,P,H<T> > \
277 test_result<rabin_karp<uint32_t, 4, 4, noperm> > small_4_cksum; 437 _ ## T ## Z ## S ## P ## H (#T "_" #Z "_" #S "_" #P "_" #H)
278 test_result<rabin_karp<uint32_t, 9, 1, noperm> > large_1_cksum; 438
279 test_result<rabin_karp<uint32_t, 9, 2, noperm> > large_2_cksum; 439 TEST(uint32_t, 4, 1, no_permute, both);
280 test_result<rabin_karp<uint32_t, 9, 3, noperm> > large_3_cksum; 440 TEST(uint32_t, 4, 2, no_permute, both);
281 test_result<rabin_karp<uint32_t, 9, 5, noperm> > large_5_cksum; 441 TEST(uint32_t, 4, 3, no_permute, both);
282 test_result<rabin_karp<uint32_t, 9, 6, noperm> > large_6_cksum; 442 TEST(uint32_t, 4, 4, no_permute, both);
283 test_result<rabin_karp<uint32_t, 9, 7, noperm> > large_7_cksum; 443
284 test_result<rabin_karp<uint32_t, 9, 8, noperm> > large_8_cksum; 444 TEST(uint32_t, 7, 15, no_permute, both);
285 test_result<rabin_karp<uint32_t, 9, 15, noperm> > large_15_cksum; 445 TEST(uint32_t, 7, 55, no_permute, both);
286 test_result<rabin_karp<uint32_t, 9, 26, noperm> > large_26_cksum; 446
287 test_result<rabin_karp<uint32_t, 9, 55, noperm> > large_55_cksum; 447 TEST(uint32_t, 9, 1, no_permute, both);
288 448 TEST(uint32_t, 9, 2, no_permute, both);
289 test_result<rabin_karp<uint32_t, 4, 1, permute> > small_1_cksum_p; 449 TEST(uint32_t, 9, 3, no_permute, both);
290 test_result<rabin_karp<uint32_t, 4, 4, permute> > small_4_cksum_p; 450 TEST(uint32_t, 9, 4, no_permute, both);
291 test_result<rabin_karp<uint32_t, 9, 1, permute> > large_1_cksum_p; 451 TEST(uint32_t, 9, 5, no_permute, both);
292 test_result<rabin_karp<uint32_t, 9, 2, permute> > large_2_cksum_p; 452 TEST(uint32_t, 9, 6, no_permute, both);
293 test_result<rabin_karp<uint32_t, 9, 3, permute> > large_3_cksum_p; 453 TEST(uint32_t, 9, 7, no_permute, both);
294 test_result<rabin_karp<uint32_t, 9, 5, permute> > large_5_cksum_p; 454 TEST(uint32_t, 9, 8, no_permute, both);
295 test_result<rabin_karp<uint32_t, 9, 6, permute> > large_6_cksum_p; 455 TEST(uint32_t, 9, 15, no_permute, both);
296 test_result<rabin_karp<uint32_t, 9, 7, permute> > large_7_cksum_p; 456 TEST(uint32_t, 9, 26, no_permute, both);
297 test_result<rabin_karp<uint32_t, 9, 8, permute> > large_8_cksum_p; 457 TEST(uint32_t, 9, 55, no_permute, both);
298 test_result<rabin_karp<uint32_t, 9, 15, permute> > large_15_cksum_p; 458
299 test_result<rabin_karp<uint32_t, 9, 26, permute> > large_26_cksum_p; 459 TEST(uint32_t, 10, 14, no_permute, both);
300 test_result<rabin_karp<uint32_t, 9, 55, permute> > large_55_cksum_p; 460 TEST(uint32_t, 10, 25, no_permute, both);
301 461 TEST(uint32_t, 10, 54, no_permute, both);
302 test_result<rabin_karp<uint64_t, 4, 1, noperm> > small_1_cksum_64; 462
303 test_result<rabin_karp<uint64_t, 4, 4, noperm> > small_4_cksum_64; 463 TEST(uint32_t, 11, 13, no_permute, both);
304 test_result<rabin_karp<uint64_t, 9, 1, noperm> > large_1_cksum_64; 464 TEST(uint32_t, 11, 24, no_permute, both);
305 test_result<rabin_karp<uint64_t, 9, 2, noperm> > large_2_cksum_64; 465 TEST(uint32_t, 11, 53, no_permute, both);
306 test_result<rabin_karp<uint64_t, 9, 3, noperm> > large_3_cksum_64; 466
307 test_result<rabin_karp<uint64_t, 9, 5, noperm> > large_5_cksum_64; 467 TEST(uint32_t, 16, 16, no_permute, both);
308 test_result<rabin_karp<uint64_t, 9, 6, noperm> > large_6_cksum_64; 468 TEST(uint32_t, 16, 31, no_permute, both);
309 test_result<rabin_karp<uint64_t, 9, 7, noperm> > large_7_cksum_64; 469 TEST(uint32_t, 16, 32, no_permute, both);
310 test_result<rabin_karp<uint64_t, 9, 8, noperm> > large_8_cksum_64; 470 TEST(uint32_t, 16, 48, no_permute, both);
311 test_result<rabin_karp<uint64_t, 9, 15, noperm> > large_15_cksum_64;
312 test_result<rabin_karp<uint64_t, 9, 26, noperm> > large_26_cksum_64;
313 test_result<rabin_karp<uint64_t, 9, 55, noperm> > large_55_cksum_64;
314
315 test_result<rabin_karp<uint64_t, 4, 1, permute> > small_1_cksum_p_64;
316 test_result<rabin_karp<uint64_t, 4, 4, permute> > small_4_cksum_p_64;
317 test_result<rabin_karp<uint64_t, 9, 1, permute> > large_1_cksum_p_64;
318 test_result<rabin_karp<uint64_t, 9, 2, permute> > large_2_cksum_p_64;
319 test_result<rabin_karp<uint64_t, 9, 3, permute> > large_3_cksum_p_64;
320 test_result<rabin_karp<uint64_t, 9, 5, permute> > large_5_cksum_p_64;
321 test_result<rabin_karp<uint64_t, 9, 6, permute> > large_6_cksum_p_64;
322 test_result<rabin_karp<uint64_t, 9, 7, permute> > large_7_cksum_p_64;
323 test_result<rabin_karp<uint64_t, 9, 8, permute> > large_8_cksum_p_64;
324 test_result<rabin_karp<uint64_t, 9, 15, permute> > large_15_cksum_p_64;
325 test_result<rabin_karp<uint64_t, 9, 26, permute> > large_26_cksum_p_64;
326 test_result<rabin_karp<uint64_t, 9, 55, permute> > large_55_cksum_p_64;
327 471
328 for (i = 1; i < argc; i++) { 472 for (i = 1; i < argc; i++) {
329 if ((ret = read_whole_file(argv[i], 473 if ((ret = read_whole_file(argv[i],
@@ -332,11 +476,16 @@ int main(int argc, char** argv) {
332 return 1; 476 return 1;
333 } 477 }
334 478
335 fprintf(stderr, "file %s is %u bytes\n", argv[i], buf_len); 479 int target = 100 << 20;
480 int iters = (target / buf_len) + 1;
481
482 fprintf(stderr, "file %s is %u bytes %u iters\n",
483 argv[i], buf_len, iters);
336 484
337 for (list<test_result_base*>::iterator i = all_tests.begin(); 485 for (list<test_result_base*>::iterator i = all_tests.begin();
338 i != all_tests.end(); ++i) { 486 i != all_tests.end(); ++i) {
339 (*i)->get(buf, buf_len); 487 (*i)->reset();
488 (*i)->get(buf, buf_len, iters);
340 (*i)->print(); 489 (*i)->print();
341 } 490 }
342 491