diff options
author | josh.macdonald <jmacd@users.noreply.github.com> | 2007-11-13 04:02:15 +0000 |
---|---|---|
committer | josh.macdonald <jmacd@users.noreply.github.com> | 2007-11-13 04:02:15 +0000 |
commit | ba57c64d224ed4feb37ff7005e33f6a7d70cbbd9 (patch) | |
tree | fecf5dfdec9d4a49d5e81df5608f4e027dc7a8fd /xdelta3/examples | |
parent | a4b5480a34e217e93e147f460b47e27741d809c1 (diff) |
Diffstat (limited to 'xdelta3/examples')
-rw-r--r-- | xdelta3/examples/checksum_test.cc | 327 |
1 files changed, 298 insertions, 29 deletions
diff --git a/xdelta3/examples/checksum_test.cc b/xdelta3/examples/checksum_test.cc index 821291a..a48c6ef 100644 --- a/xdelta3/examples/checksum_test.cc +++ b/xdelta3/examples/checksum_test.cc | |||
@@ -5,49 +5,263 @@ extern "C" { | |||
5 | #include <assert.h> | 5 | #include <assert.h> |
6 | } | 6 | } |
7 | 7 | ||
8 | // template <typename T, int Cklen> | 8 | #include <list> |
9 | #include <map> | ||
10 | |||
11 | using std::list; | ||
12 | using std::map; | ||
13 | |||
14 | // Need gcc4 | ||
15 | // template <typename T, int TestCklen> | ||
9 | // struct cksum_params { | 16 | // struct cksum_params { |
10 | // typedef T cksum_type; | 17 | // typedef T cksum_type; |
11 | // enum { cklen = Cklen }; | 18 | // enum { test_cklen = TestCklen }; |
12 | // }; | 19 | // }; |
13 | 20 | ||
14 | template <int Cklen> | 21 | |
15 | struct rabin_karp { | 22 | // MLCG parameters |
16 | enum { cklen = Cklen, }; | 23 | uint32_t good_32bit_values[] = { |
24 | // 741103597U, 887987685U, | ||
25 | 1597334677U, | ||
17 | }; | 26 | }; |
18 | 27 | ||
19 | template<typename T> | 28 | // a, a* |
20 | struct test_result { | 29 | uint64_t good_64bit_values[] = { |
30 | // 1181783497276652981ULL, 4292484099903637661ULL, | ||
31 | 7664345821815920749ULL, | ||
32 | }; | ||
21 | 33 | ||
22 | int n_data; | 34 | template <typename Word> |
35 | int bitsof(); | ||
23 | 36 | ||
24 | test_result() | 37 | template<> |
25 | : n_data(0) { | 38 | int bitsof<uint32_t>() { |
26 | } | 39 | return 32; |
40 | } | ||
27 | 41 | ||
28 | void print() { | 42 | template<> |
29 | fprintf(stderr, "cklen %u: %u results\n", T::cklen, n_data); | 43 | int bitsof<uint64_t>() { |
30 | } | 44 | return 64; |
45 | } | ||
31 | 46 | ||
32 | void add(uint8_t* ptr) { | 47 | struct noperm { |
33 | n_data++; | 48 | int operator()(const uint8_t &c) { |
34 | } | 49 | return c; |
50 | } | ||
35 | }; | 51 | }; |
36 | 52 | ||
37 | typedef rabin_karp<4> small_cksum; | 53 | struct permute { |
38 | typedef rabin_karp<9> large_cksum; | 54 | int operator()(const uint8_t &c) { |
55 | return __single_hash[c]; | ||
56 | } | ||
57 | }; | ||
39 | 58 | ||
40 | template<typename T> | 59 | template <typename Word> |
41 | void test(uint8_t* buf, usize_t buf_len) { | 60 | Word good_word(); |
42 | test_result<T> result; | ||
43 | 61 | ||
44 | for (usize_t i = 0; i < buf_len - T::cklen; i++) { | 62 | template<> |
45 | result.add(buf + i); | 63 | uint32_t good_word<uint32_t>() { |
46 | } | 64 | return good_32bit_values[0]; |
65 | } | ||
47 | 66 | ||
48 | result.print(); | 67 | template<> |
68 | uint64_t good_word<uint64_t>() { | ||
69 | return good_64bit_values[0]; | ||
49 | } | 70 | } |
50 | 71 | ||
72 | int | ||
73 | size_log2 (int slots) | ||
74 | { | ||
75 | int bits = 31; | ||
76 | int i; | ||
77 | |||
78 | for (i = 3; i <= bits; i += 1) | ||
79 | { | ||
80 | if (slots <= (1 << i)) | ||
81 | { | ||
82 | bits = i; | ||
83 | break; | ||
84 | } | ||
85 | } | ||
86 | |||
87 | return bits; | ||
88 | } | ||
89 | |||
90 | template <typename Word, int CksumSize, int CksumSkip, typename Permute> | ||
91 | struct rabin_karp { | ||
92 | typedef Word word_type; | ||
93 | typedef Permute permute_type; | ||
94 | |||
95 | enum { cksum_size = CksumSize, | ||
96 | cksum_skip = CksumSkip, | ||
97 | }; | ||
98 | |||
99 | rabin_karp() { | ||
100 | multiplier = good_word<Word>(); | ||
101 | powers = new Word[cksum_size]; | ||
102 | powers[cksum_size - 1] = 1; | ||
103 | for (int i = cksum_size - 2; i >= 0; i--) { | ||
104 | powers[i] = powers[i + 1] * multiplier; | ||
105 | } | ||
106 | product = powers[0] * multiplier; | ||
107 | } | ||
108 | |||
109 | Word step(const uint8_t *ptr) { | ||
110 | Word h = 0; | ||
111 | for (int i = 0; i < cksum_size; i++) { | ||
112 | h += permute_type()(ptr[i]) * powers[i]; | ||
113 | } | ||
114 | return h; | ||
115 | } | ||
116 | |||
117 | Word state0(const uint8_t *ptr) { | ||
118 | state = step(ptr); | ||
119 | return state; | ||
120 | } | ||
121 | |||
122 | Word incr(const uint8_t *ptr) { | ||
123 | state = state - | ||
124 | product * permute_type()(ptr[-1]) + | ||
125 | permute_type()(ptr[cksum_size - 1]); | ||
126 | return state; | ||
127 | } | ||
128 | |||
129 | Word *powers; | ||
130 | Word product; | ||
131 | Word multiplier; | ||
132 | Word state; | ||
133 | }; | ||
134 | |||
135 | struct test_result_base; | ||
136 | |||
137 | static list<test_result_base*> all_tests; | ||
138 | |||
139 | struct test_result_base { | ||
140 | virtual ~test_result_base() { | ||
141 | } | ||
142 | virtual void print() = 0; | ||
143 | virtual void get(const uint8_t* buf, const int buf_size) = 0; | ||
144 | }; | ||
145 | |||
146 | template<typename T> | ||
147 | struct test_result : public test_result_base { | ||
148 | int n_steps; | ||
149 | int n_incrs; | ||
150 | int s_bits; | ||
151 | int s_mask; | ||
152 | int h_bits; | ||
153 | int t_entries; | ||
154 | double step_fill; | ||
155 | double incr_fill; | ||
156 | long start_step, end_step; | ||
157 | long start_incr, end_incr; | ||
158 | |||
159 | test_result() { | ||
160 | all_tests.push_back(this); | ||
161 | } | ||
162 | |||
163 | void print() { | ||
164 | fprintf(stderr, | ||
165 | "cksum size %u: skip %u: %u steps: %u incrs: " | ||
166 | "s_fill %0.2f%% i_fill %0.2f%%\n", | ||
167 | T::cksum_size, | ||
168 | T::cksum_skip, | ||
169 | n_steps, | ||
170 | n_incrs, | ||
171 | 100.0 * step_fill, | ||
172 | 100.0 * incr_fill); | ||
173 | } | ||
174 | |||
175 | int* new_table(int entries) { | ||
176 | t_entries = entries; | ||
177 | h_bits = size_log2(entries); | ||
178 | s_bits = bitsof<typename T::word_type>() - h_bits; | ||
179 | s_mask = (1 << h_bits) - 1; | ||
180 | |||
181 | int n = 1 << h_bits; | ||
182 | int *t = new int[n]; | ||
183 | memset(t, 0, sizeof(int) * n); | ||
184 | return t; | ||
185 | } | ||
186 | |||
187 | double summarize_table(int* table) { | ||
188 | int n = 1 << h_bits; | ||
189 | int f = 0; | ||
190 | for (int i = 0; i < n; i++) { | ||
191 | if (table[i] != 0) { | ||
192 | f++; | ||
193 | } | ||
194 | } | ||
195 | delete [] table; | ||
196 | return (double) f / (double) t_entries; | ||
197 | } | ||
198 | |||
199 | void get(const uint8_t* buf, const int buf_size) { | ||
200 | const uint8_t *ptr; | ||
201 | const uint8_t *end; | ||
202 | int last_offset; | ||
203 | int periods; | ||
204 | int stop; | ||
205 | int *hash_table; | ||
206 | |||
207 | last_offset = buf_size - T::cksum_size; | ||
208 | |||
209 | if (last_offset < 0) { | ||
210 | periods = 0; | ||
211 | n_steps = 0; | ||
212 | n_incrs = 0; | ||
213 | stop = -T::cksum_size; | ||
214 | } else { | ||
215 | periods = last_offset / T::cksum_skip; | ||
216 | n_steps = periods; | ||
217 | n_incrs = last_offset; | ||
218 | stop = last_offset - (periods + 1) * T::cksum_skip; | ||
219 | } | ||
220 | |||
221 | hash_table = new_table(n_steps); | ||
222 | |||
223 | start_step = get_millisecs_now(); | ||
224 | |||
225 | ptr = buf + last_offset; | ||
226 | end = buf + stop; | ||
227 | |||
228 | T t; | ||
229 | typename T::word_type w; | ||
230 | |||
231 | for (; ptr != end; ptr -= T::cksum_skip) { | ||
232 | w = t.step(ptr); | ||
233 | ++hash_table[(w >> s_bits) ^ (w & s_mask)]; | ||
234 | } | ||
235 | |||
236 | end_step = get_millisecs_now(); | ||
237 | |||
238 | step_fill = summarize_table(hash_table); | ||
239 | hash_table = new_table(n_incrs); | ||
240 | |||
241 | stop = buf_size - T::cksum_size + 1; | ||
242 | if (stop < 0) { | ||
243 | stop = 0; | ||
244 | } | ||
245 | |||
246 | start_incr = end_step; | ||
247 | |||
248 | ptr = buf; | ||
249 | end = buf + stop; | ||
250 | if (ptr != end) { | ||
251 | w = t.state0(ptr++); | ||
252 | ++hash_table[(w >> s_bits) ^ (w & s_mask)]; | ||
253 | } | ||
254 | for (; ptr != end; ptr++) { | ||
255 | w = t.incr(ptr); | ||
256 | ++hash_table[(w >> s_bits) ^ (w & s_mask)]; | ||
257 | } | ||
258 | |||
259 | end_incr = get_millisecs_now(); | ||
260 | |||
261 | incr_fill = summarize_table(hash_table); | ||
262 | } | ||
263 | }; | ||
264 | |||
51 | int main(int argc, char** argv) { | 265 | int main(int argc, char** argv) { |
52 | int i; | 266 | int i; |
53 | uint8_t *buf = NULL; | 267 | uint8_t *buf = NULL; |
@@ -55,10 +269,62 @@ int main(int argc, char** argv) { | |||
55 | int ret; | 269 | int ret; |
56 | 270 | ||
57 | if (argc <= 1) { | 271 | if (argc <= 1) { |
58 | fprintf(stderr, "usage: %s file ...", argv[0]); | 272 | fprintf(stderr, "usage: %s file ...\n", argv[0]); |
59 | return 1; | 273 | return 1; |
60 | } | 274 | } |
61 | 275 | ||
276 | test_result<rabin_karp<uint32_t, 4, 1, noperm> > small_1_cksum; | ||
277 | test_result<rabin_karp<uint32_t, 4, 4, noperm> > small_4_cksum; | ||
278 | test_result<rabin_karp<uint32_t, 9, 1, noperm> > large_1_cksum; | ||
279 | test_result<rabin_karp<uint32_t, 9, 2, noperm> > large_2_cksum; | ||
280 | test_result<rabin_karp<uint32_t, 9, 3, noperm> > large_3_cksum; | ||
281 | test_result<rabin_karp<uint32_t, 9, 5, noperm> > large_5_cksum; | ||
282 | test_result<rabin_karp<uint32_t, 9, 6, noperm> > large_6_cksum; | ||
283 | test_result<rabin_karp<uint32_t, 9, 7, noperm> > large_7_cksum; | ||
284 | test_result<rabin_karp<uint32_t, 9, 8, noperm> > large_8_cksum; | ||
285 | test_result<rabin_karp<uint32_t, 9, 15, noperm> > large_15_cksum; | ||
286 | test_result<rabin_karp<uint32_t, 9, 26, noperm> > large_26_cksum; | ||
287 | test_result<rabin_karp<uint32_t, 9, 55, noperm> > large_55_cksum; | ||
288 | |||
289 | test_result<rabin_karp<uint32_t, 4, 1, permute> > small_1_cksum_p; | ||
290 | test_result<rabin_karp<uint32_t, 4, 4, permute> > small_4_cksum_p; | ||
291 | test_result<rabin_karp<uint32_t, 9, 1, permute> > large_1_cksum_p; | ||
292 | test_result<rabin_karp<uint32_t, 9, 2, permute> > large_2_cksum_p; | ||
293 | test_result<rabin_karp<uint32_t, 9, 3, permute> > large_3_cksum_p; | ||
294 | test_result<rabin_karp<uint32_t, 9, 5, permute> > large_5_cksum_p; | ||
295 | test_result<rabin_karp<uint32_t, 9, 6, permute> > large_6_cksum_p; | ||
296 | test_result<rabin_karp<uint32_t, 9, 7, permute> > large_7_cksum_p; | ||
297 | test_result<rabin_karp<uint32_t, 9, 8, permute> > large_8_cksum_p; | ||
298 | test_result<rabin_karp<uint32_t, 9, 15, permute> > large_15_cksum_p; | ||
299 | test_result<rabin_karp<uint32_t, 9, 26, permute> > large_26_cksum_p; | ||
300 | test_result<rabin_karp<uint32_t, 9, 55, permute> > large_55_cksum_p; | ||
301 | |||
302 | test_result<rabin_karp<uint64_t, 4, 1, noperm> > small_1_cksum_64; | ||
303 | test_result<rabin_karp<uint64_t, 4, 4, noperm> > small_4_cksum_64; | ||
304 | test_result<rabin_karp<uint64_t, 9, 1, noperm> > large_1_cksum_64; | ||
305 | test_result<rabin_karp<uint64_t, 9, 2, noperm> > large_2_cksum_64; | ||
306 | test_result<rabin_karp<uint64_t, 9, 3, noperm> > large_3_cksum_64; | ||
307 | test_result<rabin_karp<uint64_t, 9, 5, noperm> > large_5_cksum_64; | ||
308 | test_result<rabin_karp<uint64_t, 9, 6, noperm> > large_6_cksum_64; | ||
309 | test_result<rabin_karp<uint64_t, 9, 7, noperm> > large_7_cksum_64; | ||
310 | test_result<rabin_karp<uint64_t, 9, 8, noperm> > large_8_cksum_64; | ||
311 | test_result<rabin_karp<uint64_t, 9, 15, noperm> > large_15_cksum_64; | ||
312 | test_result<rabin_karp<uint64_t, 9, 26, noperm> > large_26_cksum_64; | ||
313 | test_result<rabin_karp<uint64_t, 9, 55, noperm> > large_55_cksum_64; | ||
314 | |||
315 | test_result<rabin_karp<uint64_t, 4, 1, permute> > small_1_cksum_p_64; | ||
316 | test_result<rabin_karp<uint64_t, 4, 4, permute> > small_4_cksum_p_64; | ||
317 | test_result<rabin_karp<uint64_t, 9, 1, permute> > large_1_cksum_p_64; | ||
318 | test_result<rabin_karp<uint64_t, 9, 2, permute> > large_2_cksum_p_64; | ||
319 | test_result<rabin_karp<uint64_t, 9, 3, permute> > large_3_cksum_p_64; | ||
320 | test_result<rabin_karp<uint64_t, 9, 5, permute> > large_5_cksum_p_64; | ||
321 | test_result<rabin_karp<uint64_t, 9, 6, permute> > large_6_cksum_p_64; | ||
322 | test_result<rabin_karp<uint64_t, 9, 7, permute> > large_7_cksum_p_64; | ||
323 | test_result<rabin_karp<uint64_t, 9, 8, permute> > large_8_cksum_p_64; | ||
324 | test_result<rabin_karp<uint64_t, 9, 15, permute> > large_15_cksum_p_64; | ||
325 | test_result<rabin_karp<uint64_t, 9, 26, permute> > large_26_cksum_p_64; | ||
326 | test_result<rabin_karp<uint64_t, 9, 55, permute> > large_55_cksum_p_64; | ||
327 | |||
62 | for (i = 1; i < argc; i++) { | 328 | for (i = 1; i < argc; i++) { |
63 | if ((ret = read_whole_file(argv[i], | 329 | if ((ret = read_whole_file(argv[i], |
64 | & buf, | 330 | & buf, |
@@ -68,8 +334,11 @@ int main(int argc, char** argv) { | |||
68 | 334 | ||
69 | fprintf(stderr, "file %s is %u bytes\n", argv[i], buf_len); | 335 | fprintf(stderr, "file %s is %u bytes\n", argv[i], buf_len); |
70 | 336 | ||
71 | test<small_cksum>(buf, buf_len); | 337 | for (list<test_result_base*>::iterator i = all_tests.begin(); |
72 | test<large_cksum>(buf, buf_len); | 338 | i != all_tests.end(); ++i) { |
339 | (*i)->get(buf, buf_len); | ||
340 | (*i)->print(); | ||
341 | } | ||
73 | 342 | ||
74 | free(buf); | 343 | free(buf); |
75 | buf = NULL; | 344 | buf = NULL; |