From ba57c64d224ed4feb37ff7005e33f6a7d70cbbd9 Mon Sep 17 00:00:00 2001 From: "josh.macdonald" Date: Tue, 13 Nov 2007 04:02:15 +0000 Subject: --- xdelta3/examples/checksum_test.cc | 327 ++++++++++++++++++++++++++++++++++---- xdelta3/xdelta3-hash.h | 10 +- xdelta3/xdelta3-list.h | 212 ++++++++++++------------ xdelta3/xdelta3-test.h | 11 +- xdelta3/xdelta3.c | 11 +- 5 files changed, 422 insertions(+), 149 deletions(-) diff --git a/xdelta3/examples/checksum_test.cc b/xdelta3/examples/checksum_test.cc index 821291a..a48c6ef 100644 --- a/xdelta3/examples/checksum_test.cc +++ b/xdelta3/examples/checksum_test.cc @@ -5,49 +5,263 @@ extern "C" { #include } -// template +#include +#include + +using std::list; +using std::map; + +// Need gcc4 +// template // struct cksum_params { // typedef T cksum_type; -// enum { cklen = Cklen }; +// enum { test_cklen = TestCklen }; // }; -template -struct rabin_karp { - enum { cklen = Cklen, }; + +// MLCG parameters +uint32_t good_32bit_values[] = { +// 741103597U, 887987685U, + 1597334677U, }; -template -struct test_result { +// a, a* +uint64_t good_64bit_values[] = { +// 1181783497276652981ULL, 4292484099903637661ULL, + 7664345821815920749ULL, +}; - int n_data; +template +int bitsof(); - test_result() - : n_data(0) { - } +template<> +int bitsof() { + return 32; +} - void print() { - fprintf(stderr, "cklen %u: %u results\n", T::cklen, n_data); - } +template<> +int bitsof() { + return 64; +} - void add(uint8_t* ptr) { - n_data++; - } +struct noperm { + int operator()(const uint8_t &c) { + return c; + } }; -typedef rabin_karp<4> small_cksum; -typedef rabin_karp<9> large_cksum; +struct permute { + int operator()(const uint8_t &c) { + return __single_hash[c]; + } +}; -template -void test(uint8_t* buf, usize_t buf_len) { - test_result result; +template +Word good_word(); - for (usize_t i = 0; i < buf_len - T::cklen; i++) { - result.add(buf + i); - } +template<> +uint32_t good_word() { + return good_32bit_values[0]; +} - result.print(); +template<> +uint64_t good_word() { + return good_64bit_values[0]; } +int +size_log2 (int slots) +{ + int bits = 31; + int i; + + for (i = 3; i <= bits; i += 1) + { + if (slots <= (1 << i)) + { + bits = i; + break; + } + } + + return bits; +} + +template +struct rabin_karp { + typedef Word word_type; + typedef Permute permute_type; + + enum { cksum_size = CksumSize, + cksum_skip = CksumSkip, + }; + + rabin_karp() { + multiplier = good_word(); + powers = new Word[cksum_size]; + powers[cksum_size - 1] = 1; + for (int i = cksum_size - 2; i >= 0; i--) { + powers[i] = powers[i + 1] * multiplier; + } + product = powers[0] * multiplier; + } + + Word step(const uint8_t *ptr) { + Word h = 0; + for (int i = 0; i < cksum_size; i++) { + h += permute_type()(ptr[i]) * powers[i]; + } + return h; + } + + Word state0(const uint8_t *ptr) { + state = step(ptr); + return state; + } + + Word incr(const uint8_t *ptr) { + state = state - + product * permute_type()(ptr[-1]) + + permute_type()(ptr[cksum_size - 1]); + return state; + } + + Word *powers; + Word product; + Word multiplier; + Word state; +}; + +struct test_result_base; + +static list all_tests; + +struct test_result_base { + virtual ~test_result_base() { + } + virtual void print() = 0; + virtual void get(const uint8_t* buf, const int buf_size) = 0; +}; + +template +struct test_result : public test_result_base { + int n_steps; + int n_incrs; + int s_bits; + int s_mask; + int h_bits; + int t_entries; + double step_fill; + double incr_fill; + long start_step, end_step; + long start_incr, end_incr; + + test_result() { + all_tests.push_back(this); + } + + void print() { + fprintf(stderr, + "cksum size %u: skip %u: %u steps: %u incrs: " + "s_fill %0.2f%% i_fill %0.2f%%\n", + T::cksum_size, + T::cksum_skip, + n_steps, + n_incrs, + 100.0 * step_fill, + 100.0 * incr_fill); + } + + int* new_table(int entries) { + t_entries = entries; + h_bits = size_log2(entries); + s_bits = bitsof() - h_bits; + s_mask = (1 << h_bits) - 1; + + int n = 1 << h_bits; + int *t = new int[n]; + memset(t, 0, sizeof(int) * n); + return t; + } + + double summarize_table(int* table) { + int n = 1 << h_bits; + int f = 0; + for (int i = 0; i < n; i++) { + if (table[i] != 0) { + f++; + } + } + delete [] table; + return (double) f / (double) t_entries; + } + + void get(const uint8_t* buf, const int buf_size) { + const uint8_t *ptr; + const uint8_t *end; + int last_offset; + int periods; + int stop; + int *hash_table; + + last_offset = buf_size - T::cksum_size; + + if (last_offset < 0) { + periods = 0; + n_steps = 0; + n_incrs = 0; + stop = -T::cksum_size; + } else { + periods = last_offset / T::cksum_skip; + n_steps = periods; + n_incrs = last_offset; + stop = last_offset - (periods + 1) * T::cksum_skip; + } + + hash_table = new_table(n_steps); + + start_step = get_millisecs_now(); + + ptr = buf + last_offset; + end = buf + stop; + + T t; + typename T::word_type w; + + for (; ptr != end; ptr -= T::cksum_skip) { + w = t.step(ptr); + ++hash_table[(w >> s_bits) ^ (w & s_mask)]; + } + + end_step = get_millisecs_now(); + + step_fill = summarize_table(hash_table); + hash_table = new_table(n_incrs); + + stop = buf_size - T::cksum_size + 1; + if (stop < 0) { + stop = 0; + } + + start_incr = end_step; + + ptr = buf; + end = buf + stop; + if (ptr != end) { + w = t.state0(ptr++); + ++hash_table[(w >> s_bits) ^ (w & s_mask)]; + } + for (; ptr != end; ptr++) { + w = t.incr(ptr); + ++hash_table[(w >> s_bits) ^ (w & s_mask)]; + } + + end_incr = get_millisecs_now(); + + incr_fill = summarize_table(hash_table); + } +}; + int main(int argc, char** argv) { int i; uint8_t *buf = NULL; @@ -55,10 +269,62 @@ int main(int argc, char** argv) { int ret; if (argc <= 1) { - fprintf(stderr, "usage: %s file ...", argv[0]); + fprintf(stderr, "usage: %s file ...\n", argv[0]); return 1; } + test_result > small_1_cksum; + test_result > small_4_cksum; + test_result > large_1_cksum; + test_result > large_2_cksum; + test_result > large_3_cksum; + test_result > large_5_cksum; + test_result > large_6_cksum; + test_result > large_7_cksum; + test_result > large_8_cksum; + test_result > large_15_cksum; + test_result > large_26_cksum; + test_result > large_55_cksum; + + test_result > small_1_cksum_p; + test_result > small_4_cksum_p; + test_result > large_1_cksum_p; + test_result > large_2_cksum_p; + test_result > large_3_cksum_p; + test_result > large_5_cksum_p; + test_result > large_6_cksum_p; + test_result > large_7_cksum_p; + test_result > large_8_cksum_p; + test_result > large_15_cksum_p; + test_result > large_26_cksum_p; + test_result > large_55_cksum_p; + + test_result > small_1_cksum_64; + test_result > small_4_cksum_64; + test_result > large_1_cksum_64; + test_result > large_2_cksum_64; + test_result > large_3_cksum_64; + test_result > large_5_cksum_64; + test_result > large_6_cksum_64; + test_result > large_7_cksum_64; + test_result > large_8_cksum_64; + test_result > large_15_cksum_64; + test_result > large_26_cksum_64; + test_result > large_55_cksum_64; + + test_result > small_1_cksum_p_64; + test_result > small_4_cksum_p_64; + test_result > large_1_cksum_p_64; + test_result > large_2_cksum_p_64; + test_result > large_3_cksum_p_64; + test_result > large_5_cksum_p_64; + test_result > large_6_cksum_p_64; + test_result > large_7_cksum_p_64; + test_result > large_8_cksum_p_64; + test_result > large_15_cksum_p_64; + test_result > large_26_cksum_p_64; + test_result > large_55_cksum_p_64; + for (i = 1; i < argc; i++) { if ((ret = read_whole_file(argv[i], & buf, @@ -68,8 +334,11 @@ int main(int argc, char** argv) { fprintf(stderr, "file %s is %u bytes\n", argv[i], buf_len); - test(buf, buf_len); - test(buf, buf_len); + for (list::iterator i = all_tests.begin(); + i != all_tests.end(); ++i) { + (*i)->get(buf, buf_len); + (*i)->print(); + } free(buf); buf = NULL; diff --git a/xdelta3/xdelta3-hash.h b/xdelta3/xdelta3-hash.h index a1cc9d8..5d12804 100644 --- a/xdelta3/xdelta3-hash.h +++ b/xdelta3/xdelta3-hash.h @@ -77,8 +77,8 @@ static const uint16_t __single_hash[256] = { - /* Random numbers generated using SLIB's pseudo-random number generator. This hashes - * the input alphabet. */ + /* Random numbers generated using SLIB's pseudo-random number generator. + * This hashes the input alphabet. */ 0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46, 0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602, 0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6, @@ -208,9 +208,9 @@ xd3_size_hashtable (xd3_stream *stream, usize_t slots, xd3_hash_cfg *cfg) { - /* initialize ctable: the number of hash buckets is computed from the table of primes or - * the nearest power-of-two, in both cases rounding down in favor of using less - * memory. */ + /* initialize ctable: the number of hash buckets is computed from the table + * of primes or the nearest power-of-two, in both cases rounding down in + * favor of using less memory. */ #if HASH_PRIME usize_t i; diff --git a/xdelta3/xdelta3-list.h b/xdelta3/xdelta3-list.h index 8d49e45..3c0df5e 100644 --- a/xdelta3/xdelta3-list.h +++ b/xdelta3/xdelta3-list.h @@ -19,112 +19,112 @@ #ifndef __XDELTA3_LIST__ #define __XDELTA3_LIST__ -#define XD3_MAKELIST(LTYPE,ETYPE,LNAME) \ - \ -static inline ETYPE* \ -LTYPE ## _entry (LTYPE* l) \ -{ \ - return (ETYPE*) ((char*) l - (unsigned long) &((ETYPE*) 0)->LNAME); \ -} \ - \ -static inline void \ -LTYPE ## _init (LTYPE *l) \ -{ \ - l->next = l; \ - l->prev = l; \ -} \ - \ -static inline void \ -LTYPE ## _add (LTYPE *prev, LTYPE *next, LTYPE *ins) \ -{ \ - next->prev = ins; \ - prev->next = ins; \ - ins->next = next; \ - ins->prev = prev; \ -} \ - \ -static inline void \ -LTYPE ## _push_back (LTYPE *l, ETYPE *i) \ -{ \ - LTYPE ## _add (l->prev, l, & i->LNAME); \ -} \ - \ -static inline void \ -LTYPE ## _del (LTYPE *next, \ - LTYPE *prev) \ -{ \ - next->prev = prev; \ - prev->next = next; \ -} \ - \ -static inline ETYPE* \ -LTYPE ## _remove (ETYPE *f) \ -{ \ - LTYPE *i = f->LNAME.next; \ - LTYPE ## _del (f->LNAME.next, f->LNAME.prev); \ - return LTYPE ## _entry (i); \ -} \ - \ -static inline ETYPE* \ -LTYPE ## _pop_back (LTYPE *l) \ -{ \ - LTYPE *i = l->prev; \ - LTYPE ## _del (i->next, i->prev); \ - return LTYPE ## _entry (i); \ -} \ - \ -static inline ETYPE* \ -LTYPE ## _pop_front (LTYPE *l) \ -{ \ - LTYPE *i = l->next; \ - LTYPE ## _del (i->next, i->prev); \ - return LTYPE ## _entry (i); \ -} \ - \ -static inline int \ -LTYPE ## _empty (LTYPE *l) \ -{ \ - return l == l->next; \ -} \ - \ -static inline ETYPE* \ -LTYPE ## _front (LTYPE *f) \ -{ \ - return LTYPE ## _entry (f->next); \ -} \ - \ -static inline ETYPE* \ -LTYPE ## _back (LTYPE *f) \ -{ \ - return LTYPE ## _entry (f->prev); \ -} \ - \ -static inline int \ -LTYPE ## _end (LTYPE *f, ETYPE *i) \ -{ \ - return f == & i->LNAME; \ -} \ - \ -static inline ETYPE* \ -LTYPE ## _next (ETYPE *f) \ -{ \ - return LTYPE ## _entry (f->LNAME.next); \ -} \ - \ -static inline int \ -LTYPE ## _length (LTYPE *l) \ -{ \ - LTYPE *p; \ - int c = 0; \ - \ - for (p = l->next; p != l; p = p->next) \ - { \ - c += 1; \ - } \ - \ - return c; \ -} \ - \ +#define XD3_MAKELIST(LTYPE,ETYPE,LNAME) \ + \ +static inline ETYPE* \ +LTYPE ## _entry (LTYPE* l) \ +{ \ + return (ETYPE*) ((char*) l - (unsigned long) &((ETYPE*) 0)->LNAME); \ +} \ + \ +static inline void \ +LTYPE ## _init (LTYPE *l) \ +{ \ + l->next = l; \ + l->prev = l; \ +} \ + \ +static inline void \ +LTYPE ## _add (LTYPE *prev, LTYPE *next, LTYPE *ins) \ +{ \ + next->prev = ins; \ + prev->next = ins; \ + ins->next = next; \ + ins->prev = prev; \ +} \ + \ +static inline void \ +LTYPE ## _push_back (LTYPE *l, ETYPE *i) \ +{ \ + LTYPE ## _add (l->prev, l, & i->LNAME); \ +} \ + \ +static inline void \ +LTYPE ## _del (LTYPE *next, \ + LTYPE *prev) \ +{ \ + next->prev = prev; \ + prev->next = next; \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _remove (ETYPE *f) \ +{ \ + LTYPE *i = f->LNAME.next; \ + LTYPE ## _del (f->LNAME.next, f->LNAME.prev); \ + return LTYPE ## _entry (i); \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _pop_back (LTYPE *l) \ +{ \ + LTYPE *i = l->prev; \ + LTYPE ## _del (i->next, i->prev); \ + return LTYPE ## _entry (i); \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _pop_front (LTYPE *l) \ +{ \ + LTYPE *i = l->next; \ + LTYPE ## _del (i->next, i->prev); \ + return LTYPE ## _entry (i); \ +} \ + \ +static inline int \ +LTYPE ## _empty (LTYPE *l) \ +{ \ + return l == l->next; \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _front (LTYPE *f) \ +{ \ + return LTYPE ## _entry (f->next); \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _back (LTYPE *f) \ +{ \ + return LTYPE ## _entry (f->prev); \ +} \ + \ +static inline int \ +LTYPE ## _end (LTYPE *f, ETYPE *i) \ +{ \ + return f == & i->LNAME; \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _next (ETYPE *f) \ +{ \ + return LTYPE ## _entry (f->LNAME.next); \ +} \ + \ +static inline usize_t \ +LTYPE ## _length (LTYPE *l) \ +{ \ + LTYPE *p; \ + int c = 0; \ + \ + for (p = l->next; p != l; p = p->next) \ + { \ + c += 1; \ + } \ + \ + return c; \ +} \ + \ typedef int unused_ ## LTYPE #endif diff --git a/xdelta3/xdelta3-test.h b/xdelta3/xdelta3-test.h index c01f2e1..ee32565 100644 --- a/xdelta3/xdelta3-test.h +++ b/xdelta3/xdelta3-test.h @@ -2392,18 +2392,19 @@ xd3_selftest (void) DO_TEST (iopt_flush_instructions, 0, 0); DO_TEST (source_cksum_offset, 0, 0); - IF_DJW (DO_TEST (secondary_huff, 0, DJW_MAX_GROUPS)); - IF_FGK (DO_TEST (secondary_fgk, 0, 1)); - DO_TEST (decompress_single_bit_error, 0, 3); DO_TEST (decompress_single_bit_error, XD3_ADLER32, 3); IF_FGK (DO_TEST (decompress_single_bit_error, XD3_SEC_FGK, 3)); - IF_DJW (DO_TEST (decompress_single_bit_error, XD3_SEC_DJW, 18)); + IF_DJW (DO_TEST (decompress_single_bit_error, XD3_SEC_DJW, 17)); /* There are many expected non-failures for ALT_CODE_TABLE because * not all of the instruction codes are used. */ - IF_GENCODETBL (DO_TEST (decompress_single_bit_error, XD3_ALT_CODE_TABLE, 224)); + IF_GENCODETBL ( + DO_TEST (decompress_single_bit_error, XD3_ALT_CODE_TABLE, 224)); + + IF_DJW (DO_TEST (secondary_huff, 0, DJW_MAX_GROUPS)); + IF_FGK (DO_TEST (secondary_fgk, 0, 1)); #ifndef WIN32 DO_TEST (force_behavior, 0, 0); diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c index 0f739f2..723a62a 100644 --- a/xdelta3/xdelta3.c +++ b/xdelta3/xdelta3.c @@ -2656,8 +2656,8 @@ xd3_set_appheader (xd3_stream *stream, static int xd3_iopt_check (xd3_stream *stream) { - int ul = xd3_rlist_length (& stream->iopt_used); - int fl = xd3_rlist_length (& stream->iopt_free); + usize_t ul = xd3_rlist_length (& stream->iopt_used); + usize_t fl = xd3_rlist_length (& stream->iopt_free); return (ul + fl + (stream->iout ? 1 : 0)) == stream->iopt_size; } @@ -3525,7 +3525,10 @@ xd3_encode_init (xd3_stream *stream) if (small_comp) { - usize_t hash_values = min(stream->winsize, stream->sprevsz); + /* TODO: This is under devel: used to have min(sprevsz) here, which sort + * of makes sense, but observed fast performance w/ larger tables, which + * also sort of makes sense. @@@ */ + usize_t hash_values = stream->winsize; xd3_size_hashtable (stream, hash_values, @@ -4579,7 +4582,7 @@ static int xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0, const uint8_t *inp_max, usize_t cmp_len) { - int i; + usize_t i; for (i = 0; i < cmp_len; i += 1) { -- cgit v1.2.3