diff options
Diffstat (limited to 'xdelta3/xdelta3.c')
-rw-r--r-- | xdelta3/xdelta3.c | 317 |
1 files changed, 71 insertions, 246 deletions
diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c index b6ac778..c0e7880 100644 --- a/xdelta3/xdelta3.c +++ b/xdelta3/xdelta3.c | |||
@@ -61,12 +61,7 @@ | |||
61 | copies within the TARGET. Small matching, which is more expensive, | 61 | copies within the TARGET. Small matching, which is more expensive, |
62 | usually dominates the large STRING-MATCH costs in this code - the | 62 | usually dominates the large STRING-MATCH costs in this code - the |
63 | more exhaustive the search, the better the results. Either of the | 63 | more exhaustive the search, the better the results. Either of the |
64 | two string-matching mechanisms may be disabled. Currently, large | 64 | two string-matching mechanisms may be disabled. |
65 | checksums are only performed in the source file, if present, and | ||
66 | small checksums are performed only in the left-over target input. | ||
67 | However, small matches are possible in the source file too, with a | ||
68 | range of possibilities. [I've seen a paper on this subject, but | ||
69 | I lost it.] | ||
70 | 65 | ||
71 | 2. INSTRUCTION SELECTION. The IOPT buffer here represents a queue | 66 | 2. INSTRUCTION SELECTION. The IOPT buffer here represents a queue |
72 | used to store overlapping copy instructions. There are two possible | 67 | used to store overlapping copy instructions. There are two possible |
@@ -486,11 +481,9 @@ IF_BUILD_DEFAULT(static const xd3_smatcher __smatcher_default;) | |||
486 | #define SMALL_HASH_DEBUG2(s,inp) \ | 481 | #define SMALL_HASH_DEBUG2(s,inp) \ |
487 | XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \ | 482 | XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \ |
488 | xd3_scksum ((inp), (s)->smatcher.small_look))) | 483 | xd3_scksum ((inp), (s)->smatcher.small_look))) |
489 | #define SMALL_HASH_STATS(x) x | ||
490 | #else | 484 | #else |
491 | #define SMALL_HASH_DEBUG1(s,inp) | 485 | #define SMALL_HASH_DEBUG1(s,inp) |
492 | #define SMALL_HASH_DEBUG2(s,inp) | 486 | #define SMALL_HASH_DEBUG2(s,inp) |
493 | #define SMALL_HASH_STATS(x) | ||
494 | #endif /* XD3_DEBUG */ | 487 | #endif /* XD3_DEBUG */ |
495 | 488 | ||
496 | /* Update the run-length state */ | 489 | /* Update the run-length state */ |
@@ -1169,6 +1162,7 @@ static usize_t __alternate_code_table_compressed_size; | |||
1169 | int xd3_compute_code_table_encoding (xd3_stream *in_stream, const xd3_dinst *code_table, | 1162 | int xd3_compute_code_table_encoding (xd3_stream *in_stream, const xd3_dinst *code_table, |
1170 | uint8_t *comp_string, usize_t *comp_string_size) | 1163 | uint8_t *comp_string, usize_t *comp_string_size) |
1171 | { | 1164 | { |
1165 | /* TODO: use xd3_encode_memory() */ | ||
1172 | uint8_t dflt_string[CODE_TABLE_STRING_SIZE]; | 1166 | uint8_t dflt_string[CODE_TABLE_STRING_SIZE]; |
1173 | uint8_t code_string[CODE_TABLE_STRING_SIZE]; | 1167 | uint8_t code_string[CODE_TABLE_STRING_SIZE]; |
1174 | xd3_stream stream; | 1168 | xd3_stream stream; |
@@ -1194,11 +1188,8 @@ int xd3_compute_code_table_encoding (xd3_stream *in_stream, const xd3_dinst *cod | |||
1194 | config.smatcher_soft.small_look = 4; | 1188 | config.smatcher_soft.small_look = 4; |
1195 | config.smatcher_soft.small_chain = CODE_TABLE_STRING_SIZE; | 1189 | config.smatcher_soft.small_chain = CODE_TABLE_STRING_SIZE; |
1196 | config.smatcher_soft.small_lchain = CODE_TABLE_STRING_SIZE; | 1190 | config.smatcher_soft.small_lchain = CODE_TABLE_STRING_SIZE; |
1197 | config.smatcher_soft.ssmatch = 1; | ||
1198 | config.smatcher_soft.try_lazy = 1; | ||
1199 | config.smatcher_soft.max_lazy = CODE_TABLE_STRING_SIZE; | 1191 | config.smatcher_soft.max_lazy = CODE_TABLE_STRING_SIZE; |
1200 | config.smatcher_soft.long_enough = CODE_TABLE_STRING_SIZE; | 1192 | config.smatcher_soft.long_enough = CODE_TABLE_STRING_SIZE; |
1201 | config.smatcher_soft.promote = 1; | ||
1202 | 1193 | ||
1203 | if ((ret = xd3_config_stream (& stream, & config))) { goto fail; } | 1194 | if ((ret = xd3_config_stream (& stream, & config))) { goto fail; } |
1204 | 1195 | ||
@@ -2503,11 +2494,7 @@ xd3_config_stream(xd3_stream *stream, | |||
2503 | smatcher->name = __smatcher_soft.name; | 2494 | smatcher->name = __smatcher_soft.name; |
2504 | if (smatcher->large_look < MIN_MATCH || | 2495 | if (smatcher->large_look < MIN_MATCH || |
2505 | smatcher->large_step < 1 || | 2496 | smatcher->large_step < 1 || |
2506 | smatcher->small_look < MIN_MATCH || | 2497 | smatcher->small_look < MIN_MATCH) |
2507 | smatcher->small_chain < 1 || | ||
2508 | smatcher->large_look < smatcher->small_look || | ||
2509 | smatcher->small_chain < smatcher->small_lchain || | ||
2510 | (smatcher->small_lchain == 0 && smatcher->try_lazy)) | ||
2511 | { | 2498 | { |
2512 | stream->msg = "invalid soft string-match config"; | 2499 | stream->msg = "invalid soft string-match config"; |
2513 | return XD3_INVALID; | 2500 | return XD3_INVALID; |
@@ -3581,22 +3568,6 @@ xd3_encode_init (xd3_stream *stream) | |||
3581 | return ENOMEM; | 3568 | return ENOMEM; |
3582 | } | 3569 | } |
3583 | 3570 | ||
3584 | #if XD3_DEBUG | ||
3585 | static int | ||
3586 | xd3_check_sprevlist (xd3_stream *stream) | ||
3587 | { | ||
3588 | int i; | ||
3589 | for (i = 0; i < stream->sprevsz; i += 1) | ||
3590 | { | ||
3591 | xd3_slist *l = & stream->small_prev[i]; | ||
3592 | |||
3593 | XD3_ASSERT (l->prev->next == l); | ||
3594 | XD3_ASSERT (l->next->prev == l); | ||
3595 | } | ||
3596 | return 1; | ||
3597 | } | ||
3598 | #endif | ||
3599 | |||
3600 | /* Called after the ENC_POSTOUT state, this puts the output buffers back into separate | 3571 | /* Called after the ENC_POSTOUT state, this puts the output buffers back into separate |
3601 | * lists and re-initializes some variables. (The output lists were spliced together | 3572 | * lists and re-initializes some variables. (The output lists were spliced together |
3602 | * during the ENC_FLUSH state.) */ | 3573 | * during the ENC_FLUSH state.) */ |
@@ -3606,8 +3577,6 @@ xd3_encode_reset (xd3_stream *stream) | |||
3606 | int i; | 3577 | int i; |
3607 | xd3_output *olist; | 3578 | xd3_output *olist; |
3608 | 3579 | ||
3609 | XD3_ASSERT (stream->small_prev == NULL || xd3_check_sprevlist (stream)); | ||
3610 | |||
3611 | IF_DEBUG (stream->n_emit = 0); | 3580 | IF_DEBUG (stream->n_emit = 0); |
3612 | stream->avail_in = 0; | 3581 | stream->avail_in = 0; |
3613 | stream->small_reset = 1; | 3582 | stream->small_reset = 1; |
@@ -3930,9 +3899,6 @@ xd3_process_memory (int is_encode, | |||
3930 | return XD3_INTERNAL; | 3899 | return XD3_INTERNAL; |
3931 | } | 3900 | } |
3932 | 3901 | ||
3933 | /* TODO: for small inputs, the xd3_stream could be configured to allocate MUCH less | ||
3934 | * memory. Most of the allocations sizes are defaulted (see xdelta3.h), and assume | ||
3935 | * large inputs. */ | ||
3936 | memset (& stream, 0, sizeof (stream)); | 3902 | memset (& stream, 0, sizeof (stream)); |
3937 | memset (& config, 0, sizeof (config)); | 3903 | memset (& config, 0, sizeof (config)); |
3938 | 3904 | ||
@@ -3940,9 +3906,10 @@ xd3_process_memory (int is_encode, | |||
3940 | 3906 | ||
3941 | if (is_encode) | 3907 | if (is_encode) |
3942 | { | 3908 | { |
3943 | /* TODO: for large inputs, limit window size, need to select a default ... */ | ||
3944 | config.srcwin_maxsz = source_size; | 3909 | config.srcwin_maxsz = source_size; |
3945 | config.winsize = min(input_size, (usize_t) (1<<20)); | 3910 | config.winsize = min(input_size, (usize_t) (1<<20)); |
3911 | config.sprevsz = min(input_size, XD3_DEFAULT_SPREVSZ); | ||
3912 | config.iopt_size = min(input_size / 32, XD3_DEFAULT_IOPT_SIZE); | ||
3946 | } | 3913 | } |
3947 | 3914 | ||
3948 | if ((ret = xd3_config_stream (&stream, &config)) != 0) | 3915 | if ((ret = xd3_config_stream (&stream, &config)) != 0) |
@@ -4056,6 +4023,14 @@ xd3_string_match_init (xd3_stream *stream) | |||
4056 | const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); | 4023 | const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); |
4057 | const int DO_LARGE = (stream->src != NULL); | 4024 | const int DO_LARGE = (stream->src != NULL); |
4058 | 4025 | ||
4026 | if (DO_LARGE && stream->large_table == NULL) | ||
4027 | { | ||
4028 | if ((stream->large_table = xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL) | ||
4029 | { | ||
4030 | return ENOMEM; | ||
4031 | } | ||
4032 | } | ||
4033 | |||
4059 | if (DO_SMALL) | 4034 | if (DO_SMALL) |
4060 | { | 4035 | { |
4061 | /* Subsequent calls can return immediately after checking reset. */ | 4036 | /* Subsequent calls can return immediately after checking reset. */ |
@@ -4063,7 +4038,7 @@ xd3_string_match_init (xd3_stream *stream) | |||
4063 | { | 4038 | { |
4064 | /* The target hash table is reinitialized once per window. */ | 4039 | /* The target hash table is reinitialized once per window. */ |
4065 | /* TODO: This would not have to be reinitialized if absolute offsets | 4040 | /* TODO: This would not have to be reinitialized if absolute offsets |
4066 | * were being stored. */ | 4041 | * were being stored, as we would do for VCD_TARGET encoding. */ |
4067 | if (stream->small_reset) | 4042 | if (stream->small_reset) |
4068 | { | 4043 | { |
4069 | stream->small_reset = 0; | 4044 | stream->small_reset = 0; |
@@ -4081,31 +4056,15 @@ xd3_string_match_init (xd3_stream *stream) | |||
4081 | } | 4056 | } |
4082 | 4057 | ||
4083 | /* If there is a previous table needed. */ | 4058 | /* If there is a previous table needed. */ |
4084 | if (stream->smatcher.small_chain > 1) | 4059 | if (stream->smatcher.small_lchain > 1 || |
4060 | stream->smatcher.small_chain > 1) | ||
4085 | { | 4061 | { |
4086 | xd3_slist *p, *m; | ||
4087 | |||
4088 | if ((stream->small_prev = xd3_alloc (stream, | 4062 | if ((stream->small_prev = xd3_alloc (stream, |
4089 | stream->sprevsz, | 4063 | stream->sprevsz, |
4090 | sizeof (xd3_slist))) == NULL) | 4064 | sizeof (xd3_slist))) == NULL) |
4091 | { | 4065 | { |
4092 | return ENOMEM; | 4066 | return ENOMEM; |
4093 | } | 4067 | } |
4094 | |||
4095 | /* Initialize circular lists. */ | ||
4096 | for (p = stream->small_prev, m = stream->small_prev + stream->sprevsz; p != m; p += 1) | ||
4097 | { | ||
4098 | p->next = p; | ||
4099 | p->prev = p; | ||
4100 | } | ||
4101 | } | ||
4102 | } | ||
4103 | |||
4104 | if (DO_LARGE && stream->large_table == NULL) | ||
4105 | { | ||
4106 | if ((stream->large_table = xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL) | ||
4107 | { | ||
4108 | return ENOMEM; | ||
4109 | } | 4068 | } |
4110 | } | 4069 | } |
4111 | 4070 | ||
@@ -4648,67 +4607,22 @@ xd3_source_extend_match (xd3_stream *stream) | |||
4648 | return 0; | 4607 | return 0; |
4649 | } | 4608 | } |
4650 | 4609 | ||
4651 | /* Update the small hash. Values in the small_table are offset by HASH_CKOFFSET (1) to | 4610 | /* Update the small hash. Values in the small_table are offset by |
4652 | * distinguish empty buckets the zero offset. This maintains the previous linked lists. | 4611 | * HASH_CKOFFSET (1) to distinguish empty buckets from real offsets. */ |
4653 | * If owrite is true then this entry is replacing the existing record, otherwise it is | ||
4654 | * merely being called to promote the existing record in the hash bucket (for the same | ||
4655 | * address cache). */ | ||
4656 | static void | 4612 | static void |
4657 | xd3_scksum_insert (xd3_stream *stream, usize_t inx, usize_t scksum, usize_t pos) | 4613 | xd3_scksum_insert (xd3_stream *stream, |
4614 | usize_t inx, | ||
4615 | usize_t scksum, | ||
4616 | usize_t pos) | ||
4658 | { | 4617 | { |
4659 | /* If we are maintaining previous links. */ | 4618 | /* If we are maintaining previous duplicates. */ |
4660 | if (stream->small_prev) | 4619 | if (stream->small_prev) |
4661 | { | 4620 | { |
4662 | usize_t last_pos = stream->small_table[inx]; | 4621 | usize_t last_pos = stream->small_table[inx]; |
4663 | xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask]; | 4622 | xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask]; |
4664 | xd3_slist *prev = pos_list->prev; | ||
4665 | xd3_slist *next = pos_list->next; | ||
4666 | |||
4667 | /* Assert link structure, update pos, cksum */ | ||
4668 | XD3_ASSERT (prev->next == pos_list); | ||
4669 | XD3_ASSERT (next->prev == pos_list); | ||
4670 | pos_list->pos = pos; | ||
4671 | pos_list->scksum = scksum; | ||
4672 | |||
4673 | /* Subtract HASH_CKOFFSET and test for a previous offset. */ | ||
4674 | if (last_pos-- != 0) | ||
4675 | { | ||
4676 | xd3_slist *last_list = & stream->small_prev[last_pos & stream->sprevmask]; | ||
4677 | xd3_slist *last_next; | ||
4678 | |||
4679 | /* Verify existing entry. */ | ||
4680 | SMALL_HASH_DEBUG1 (stream, stream->next_in + last_pos); | ||
4681 | SMALL_HASH_DEBUG2 (stream, stream->next_in + pos); | ||
4682 | |||
4683 | /* The two positions (mod sprevsz) may have the same checksum, making the old | ||
4684 | * and new entries the same. That is why the removal step is not before the | ||
4685 | * above if-stmt. */ | ||
4686 | if (last_list != pos_list) | ||
4687 | { | ||
4688 | /* Remove current position from any list it may belong to. */ | ||
4689 | next->prev = prev; | ||
4690 | prev->next = next; | ||
4691 | |||
4692 | /* The ordinary case, add current position to last_list. */ | ||
4693 | last_next = last_list->next; | ||
4694 | |||
4695 | pos_list->next = last_next; | ||
4696 | pos_list->prev = last_list; | ||
4697 | |||
4698 | last_next->prev = pos_list; | ||
4699 | last_list->next = pos_list; | ||
4700 | } | ||
4701 | } | ||
4702 | else | ||
4703 | { | ||
4704 | /* Remove current position from any list it may belong to. */ | ||
4705 | next->prev = prev; | ||
4706 | prev->next = next; | ||
4707 | 4623 | ||
4708 | /* Re-initialize current position. */ | 4624 | /* Note last_pos is offset by HASH_CKOFFSET. */ |
4709 | pos_list->next = pos_list; | 4625 | pos_list->last_pos = last_pos; |
4710 | pos_list->prev = pos_list; | ||
4711 | } | ||
4712 | } | 4626 | } |
4713 | 4627 | ||
4714 | /* Enter the new position into the hash bucket. */ | 4628 | /* Enter the new position into the hash bucket. */ |
@@ -4736,62 +4650,42 @@ xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0, | |||
4736 | } | 4650 | } |
4737 | #endif /* XD3_DEBUG */ | 4651 | #endif /* XD3_DEBUG */ |
4738 | 4652 | ||
4739 | /* When the hash table indicates a possible small string match, it calls this routine to | 4653 | /* When the hash table indicates a possible small string match, it |
4740 | * find the best match. The first matching position is taken from the small_table, | 4654 | * calls this routine to find the best match. The first matching |
4741 | * HASH_CKOFFSET is subtracted to get the actual position. After checking that match, if | 4655 | * position is taken from the small_table, HASH_CKOFFSET is subtracted |
4742 | * previous linked lists are in use (because stream->smatcher.small_chain > 1), previous matches | 4656 | * to get the actual position. After checking that match, if previous |
4743 | * are tested searching for the longest match. If (stream->min_match > MIN_MATCH) then a lazy | 4657 | * linked lists are in use (because stream->smatcher.small_chain > 1), |
4744 | * match is in effect. | 4658 | * previous matches are tested searching for the longest match. If |
4745 | * | 4659 | * (stream->min_match > MIN_MATCH) then a lazy match is in effect. |
4746 | * OPT: This is by far the most expensive function. The slowdown is in part due to the data | ||
4747 | * structure it maintains, which is relatively more expensive than it needs to be (in | ||
4748 | * comparison to zlib) in order to support the PROMOTE decision, which is to prefer the | ||
4749 | * most recently used matching address of a certain string to aid the VCDIFF same cache. | ||
4750 | * | ||
4751 | * --- TODO: declare the PROMOTE experiment a failure -- remove the extra LIST -- | ||
4752 | * | 4660 | * |
4753 | * Weak reasoning? it's time to modularize this routine...? Let's say the PROMOTE | 4661 | * TODO: This is the second most-expensive function, after |
4754 | * feature supported by this slow data structure contributes around 2% improvement in | 4662 | * xd3_srcwin_move_point(). |
4755 | * compressed size, is there a better code table that doesn't use the SAME address cache, | ||
4756 | * for which the speedup-discount could produce a better encoding? | ||
4757 | */ | 4663 | */ |
4758 | static /*inline*/ usize_t | 4664 | static usize_t |
4759 | xd3_smatch (xd3_stream *stream, usize_t base, usize_t scksum, usize_t *match_offset) | 4665 | xd3_smatch (xd3_stream *stream, |
4666 | usize_t base, | ||
4667 | usize_t scksum, | ||
4668 | usize_t *match_offset) | ||
4760 | { | 4669 | { |
4761 | usize_t cmp_len; | 4670 | usize_t cmp_len; |
4762 | usize_t match_length = 0; | 4671 | usize_t match_length = 0; |
4763 | usize_t chain = (stream->min_match == MIN_MATCH ? | 4672 | usize_t chain = (stream->min_match == MIN_MATCH ? |
4764 | stream->smatcher.small_chain : | 4673 | stream->smatcher.small_chain : |
4765 | stream->smatcher.small_lchain); | 4674 | stream->smatcher.small_lchain); |
4766 | xd3_slist *current = NULL; | ||
4767 | xd3_slist *first = NULL; | ||
4768 | const uint8_t *inp_max = stream->next_in + stream->avail_in; | 4675 | const uint8_t *inp_max = stream->next_in + stream->avail_in; |
4769 | const uint8_t *inp; | 4676 | const uint8_t *inp; |
4770 | const uint8_t *ref; | 4677 | const uint8_t *ref; |
4771 | 4678 | ||
4772 | SMALL_HASH_STATS (usize_t search_cnt = 0); | ||
4773 | SMALL_HASH_DEBUG1 (stream, stream->next_in + stream->input_position); | 4679 | SMALL_HASH_DEBUG1 (stream, stream->next_in + stream->input_position); |
4774 | SMALL_HASH_STATS (stream->sh_searches += 1); | ||
4775 | 4680 | ||
4776 | XD3_ASSERT (stream->min_match + stream->input_position <= stream->avail_in); | 4681 | XD3_ASSERT (stream->min_match + stream->input_position <= stream->avail_in); |
4777 | 4682 | ||
4778 | base -= HASH_CKOFFSET; | 4683 | base -= HASH_CKOFFSET; |
4779 | 4684 | ||
4780 | /* Initialize the chain. */ | ||
4781 | if (stream->small_prev != NULL) | ||
4782 | { | ||
4783 | first = current = & stream->small_prev[base & stream->sprevmask]; | ||
4784 | |||
4785 | /* Check if current->pos is correct. */ | ||
4786 | if (current->pos != base) { goto done; } | ||
4787 | } | ||
4788 | |||
4789 | again: | 4685 | again: |
4790 | 4686 | ||
4791 | SMALL_HASH_STATS (search_cnt += 1); | 4687 | /* For small matches, we can always go to the end-of-input because |
4792 | 4688 | * the matching position must be less than the input position. */ | |
4793 | /* For small matches, we can always go to the end-of-input because the matching position | ||
4794 | * must be less than the input position. */ | ||
4795 | XD3_ASSERT (base < stream->input_position); | 4689 | XD3_ASSERT (base < stream->input_position); |
4796 | 4690 | ||
4797 | ref = stream->next_in + base; | 4691 | ref = stream->next_in + base; |
@@ -4809,7 +4703,8 @@ xd3_smatch (xd3_stream *stream, usize_t base, usize_t scksum, usize_t *match_off | |||
4809 | cmp_len = inp - (stream->next_in + stream->input_position); | 4703 | cmp_len = inp - (stream->next_in + stream->input_position); |
4810 | 4704 | ||
4811 | /* Verify correctness */ | 4705 | /* Verify correctness */ |
4812 | XD3_ASSERT (xd3_check_smatch (stream->next_in + base, stream->next_in + stream->input_position, | 4706 | XD3_ASSERT (xd3_check_smatch (stream->next_in + base, |
4707 | stream->next_in + stream->input_position, | ||
4813 | inp_max, cmp_len)); | 4708 | inp_max, cmp_len)); |
4814 | 4709 | ||
4815 | /* Update longest match */ | 4710 | /* Update longest match */ |
@@ -4818,38 +4713,39 @@ xd3_smatch (xd3_stream *stream, usize_t base, usize_t scksum, usize_t *match_off | |||
4818 | ( match_length) = cmp_len; | 4713 | ( match_length) = cmp_len; |
4819 | (*match_offset) = base; | 4714 | (*match_offset) = base; |
4820 | 4715 | ||
4821 | /* Stop if we match the entire input or discover a long_enough match. */ | 4716 | /* Stop if we match the entire input or have a long_enough match. */ |
4822 | if (inp == inp_max || cmp_len >= stream->smatcher.long_enough) | 4717 | if (inp == inp_max || cmp_len >= stream->smatcher.long_enough) |
4823 | { | 4718 | { |
4824 | goto done; | 4719 | goto done; |
4825 | } | 4720 | } |
4826 | } | 4721 | } |
4827 | 4722 | ||
4828 | /* If we have not reached the chain limit, see if there is another previous position. */ | 4723 | /* If we have not reached the chain limit, see if there is another |
4829 | if (current) | 4724 | previous position. */ |
4725 | while (--chain != 0) | ||
4830 | { | 4726 | { |
4831 | while (--chain != 0) | 4727 | /* Calculate the previous offset. */ |
4728 | usize_t last_pos = stream->small_prev[base & stream->sprevmask].last_pos; | ||
4729 | |||
4730 | if (last_pos == 0) | ||
4832 | { | 4731 | { |
4833 | /* Calculate the next base offset. */ | 4732 | break; |
4834 | current = current->prev; | ||
4835 | base = current->pos; | ||
4836 | |||
4837 | /* Stop if the next position was the first. Stop if the position is wrong | ||
4838 | * (because the lists are not re-initialized across input windows). Skip if the | ||
4839 | * scksum is wrong. */ | ||
4840 | if (current != first && base < stream->input_position) | ||
4841 | { | ||
4842 | if (current->scksum != scksum) | ||
4843 | { | ||
4844 | continue; | ||
4845 | } | ||
4846 | goto again; | ||
4847 | } | ||
4848 | } | 4733 | } |
4734 | |||
4735 | last_pos -= HASH_CKOFFSET; | ||
4736 | base = last_pos; | ||
4737 | |||
4738 | /* Stop if the position is wrong (because the lists are not | ||
4739 | * re-initialized across input windows). */ | ||
4740 | if (base < stream->input_position) | ||
4741 | { | ||
4742 | goto again; | ||
4743 | } | ||
4744 | |||
4745 | break; | ||
4849 | } | 4746 | } |
4850 | 4747 | ||
4851 | done: | 4748 | done: |
4852 | SMALL_HASH_STATS (stream->sh_compares += search_cnt); | ||
4853 | return match_length; | 4749 | return match_length; |
4854 | } | 4750 | } |
4855 | 4751 | ||
@@ -4903,8 +4799,7 @@ xd3_verify_run_state (xd3_stream *stream, | |||
4903 | Templates | 4799 | Templates |
4904 | ******************************************************************************************/ | 4800 | ******************************************************************************************/ |
4905 | 4801 | ||
4906 | /* Template macros: less than 30 lines work. the template parameters appear as, e.g., | 4802 | /* Template macros */ |
4907 | * SLOOK, MIN_MATCH, TRYLAZY, etc. */ | ||
4908 | #define XD3_TEMPLATE(x) XD3_TEMPLATE2(x,TEMPLATE) | 4803 | #define XD3_TEMPLATE(x) XD3_TEMPLATE2(x,TEMPLATE) |
4909 | #define XD3_TEMPLATE2(x,n) XD3_TEMPLATE3(x,n) | 4804 | #define XD3_TEMPLATE2(x,n) XD3_TEMPLATE3(x,n) |
4910 | #define XD3_TEMPLATE3(x,n) x ## n | 4805 | #define XD3_TEMPLATE3(x,n) x ## n |
@@ -4918,10 +4813,9 @@ static const xd3_smatcher XD3_TEMPLATE(__smatcher_) = | |||
4918 | XD3_STRINGIFY(TEMPLATE), | 4813 | XD3_STRINGIFY(TEMPLATE), |
4919 | XD3_TEMPLATE(xd3_string_match_), | 4814 | XD3_TEMPLATE(xd3_string_match_), |
4920 | #if SOFTCFG == 1 | 4815 | #if SOFTCFG == 1 |
4921 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | 4816 | 0, 0, 0, 0, 0, 0, 0 |
4922 | #else | 4817 | #else |
4923 | LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, SSMATCH, TRYLAZY, MAXLAZY, | 4818 | LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, MAXLAZY, LONGENOUGH |
4924 | LONGENOUGH, PROMOTE | ||
4925 | #endif | 4819 | #endif |
4926 | }; | 4820 | }; |
4927 | 4821 | ||
@@ -4999,13 +4893,13 @@ XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream) | |||
4999 | * checks that LEN is shorter than MAXLAZY and that there is enough leftover data to | 4893 | * checks that LEN is shorter than MAXLAZY and that there is enough leftover data to |
5000 | * consider lazy matching. "Enough" is set to 2 since the next match will start at the | 4894 | * consider lazy matching. "Enough" is set to 2 since the next match will start at the |
5001 | * next offset, it must match two extra characters. */ | 4895 | * next offset, it must match two extra characters. */ |
5002 | #define TRYLAZYLEN(LEN,POS,MAX) ((TRYLAZY && (LEN) < MAXLAZY) && ((POS) + (LEN) <= (MAX) - 2)) | 4896 | #define TRYLAZYLEN(LEN,POS,MAX) ((MAXLAZY) > 0 && (LEN) < (MAXLAZY) && (POS) + (LEN) <= (MAX) - 2) |
5003 | 4897 | ||
5004 | /* HANDLELAZY: This statement is called each time an instruciton is emitted (three | 4898 | /* HANDLELAZY: This statement is called each time an instruciton is emitted (three |
5005 | * cases). If the instruction is large enough, the loop is restarted, otherwise lazy | 4899 | * cases). If the instruction is large enough, the loop is restarted, otherwise lazy |
5006 | * matching may ensue. */ | 4900 | * matching may ensue. */ |
5007 | #define HANDLELAZY(mlen) \ | 4901 | #define HANDLELAZY(mlen) \ |
5008 | if (TRYLAZYLEN ((mlen), stream->input_position, stream->avail_in)) \ | 4902 | if (TRYLAZYLEN ((mlen), (stream->input_position), (stream->avail_in))) \ |
5009 | { stream->min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \ | 4903 | { stream->min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \ |
5010 | else \ | 4904 | else \ |
5011 | { stream->input_position += (mlen); goto restartloop; } | 4905 | { stream->input_position += (mlen); goto restartloop; } |
@@ -5104,13 +4998,6 @@ XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream) | |||
5104 | /* Insert a hash for this string. */ | 4998 | /* Insert a hash for this string. */ |
5105 | xd3_scksum_insert (stream, sinx, scksum, stream->input_position); | 4999 | xd3_scksum_insert (stream, sinx, scksum, stream->input_position); |
5106 | 5000 | ||
5107 | /* Promote the previous match address to head of the hash bucket. This is | ||
5108 | * intended to improve the same cache hit rate. */ | ||
5109 | if (match_length != 0 && PROMOTE) | ||
5110 | { | ||
5111 | xd3_scksum_insert (stream, sinx, scksum, match_offset); | ||
5112 | } | ||
5113 | |||
5114 | /* Maybe output a COPY instruction */ | 5001 | /* Maybe output a COPY instruction */ |
5115 | if (unlikely (match_length >= stream->min_match)) | 5002 | if (unlikely (match_length >= stream->min_match)) |
5116 | { | 5003 | { |
@@ -5132,67 +5019,7 @@ XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream) | |||
5132 | /* address */ match_offset, | 5019 | /* address */ match_offset, |
5133 | /* is_source */ 0))) { return ret; } | 5020 | /* is_source */ 0))) { return ret; } |
5134 | 5021 | ||
5135 | /* SSMATCH option: search small matches: continue the incremental checksum | 5022 | /* Copy instruction. */ |
5136 | * through the matched material. Only if not lazy matching. */ | ||
5137 | if (SSMATCH && !TRYLAZYLEN (match_length, stream->input_position, stream->avail_in)) | ||
5138 | { | ||
5139 | usize_t avail = stream->avail_in - SLOOK - stream->input_position; | ||
5140 | usize_t ml_m1 = match_length - 1; | ||
5141 | usize_t right; | ||
5142 | int aincr; | ||
5143 | |||
5144 | IF_DEBUG (usize_t nposi = stream->input_position + match_length); | ||
5145 | |||
5146 | /* Avail is the last offset we can compute an incremental cksum. If the | ||
5147 | * match length exceeds that offset then we are finished performing | ||
5148 | * incremental updates after this step. */ | ||
5149 | if (ml_m1 < avail) | ||
5150 | { | ||
5151 | right = ml_m1; | ||
5152 | aincr = 1; | ||
5153 | } | ||
5154 | else | ||
5155 | { | ||
5156 | right = avail; | ||
5157 | aincr = 0; | ||
5158 | } | ||
5159 | |||
5160 | /* Compute incremental checksums within the match. */ | ||
5161 | while (right > 0) | ||
5162 | { | ||
5163 | SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); | ||
5164 | if (DO_LARGE && (stream->input_position + LLOOK < stream->avail_in)) { | ||
5165 | LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK); | ||
5166 | } | ||
5167 | |||
5168 | inp += 1; | ||
5169 | stream->input_position += 1; | ||
5170 | right -= 1; | ||
5171 | sinx = xd3_checksum_hash (& stream->small_hash, scksum); | ||
5172 | |||
5173 | IF_DEBUG (xd3_verify_small_state (stream, inp, scksum)); | ||
5174 | |||
5175 | xd3_scksum_insert (stream, sinx, scksum, stream->input_position); | ||
5176 | } | ||
5177 | |||
5178 | if (aincr) | ||
5179 | { | ||
5180 | /* Keep searching... */ | ||
5181 | if (DO_RUN) { run_l = xd3_comprun (inp+1, SLOOK-1, & run_c); } | ||
5182 | XD3_ASSERT (nposi == stream->input_position + 1); | ||
5183 | XD3_ASSERT (stream->input_position + SLOOK < stream->avail_in); | ||
5184 | stream->min_match = MIN_MATCH; | ||
5185 | goto updatesure; | ||
5186 | } | ||
5187 | else | ||
5188 | { | ||
5189 | /* Not enough input for another match. */ | ||
5190 | XD3_ASSERT (stream->input_position + SLOOK >= stream->avail_in); | ||
5191 | goto loopnomore; | ||
5192 | } | ||
5193 | } | ||
5194 | |||
5195 | /* Else case: copy instruction, but no SSMATCH. */ | ||
5196 | HANDLELAZY (match_length); | 5023 | HANDLELAZY (match_length); |
5197 | } | 5024 | } |
5198 | } | 5025 | } |
@@ -5213,8 +5040,6 @@ XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream) | |||
5213 | goto loopnomore; | 5040 | goto loopnomore; |
5214 | } | 5041 | } |
5215 | 5042 | ||
5216 | updatesure: | ||
5217 | |||
5218 | /* Compute next RUN, CKSUM */ | 5043 | /* Compute next RUN, CKSUM */ |
5219 | if (DO_RUN) { NEXTRUN (inp[SLOOK]); } | 5044 | if (DO_RUN) { NEXTRUN (inp[SLOOK]); } |
5220 | if (DO_SMALL) { SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); } | 5045 | if (DO_SMALL) { SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); } |