summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjosh.macdonald <jmacd@users.noreply.github.com>2007-10-28 10:08:29 +0000
committerjosh.macdonald <jmacd@users.noreply.github.com>2007-10-28 10:08:29 +0000
commit7c16f576f4c8e292a93b5855e05eda5fedfba31a (patch)
treefd9ad8dc3cf8e7f62e5e6c35e087c54abd3f39c1
parentf920533ee0105ea7cde76fff2db37bb8fe40f0ca (diff)
ENC_FLUSH -> ENC_INSTR
-rw-r--r--xdelta3/xdelta3.c298
-rw-r--r--xdelta3/xdelta3.h25
2 files changed, 190 insertions, 133 deletions
diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c
index faf4dd7..5a79bf4 100644
--- a/xdelta3/xdelta3.c
+++ b/xdelta3/xdelta3.c
@@ -2000,7 +2000,7 @@ xd3_alloc_cache (xd3_stream *stream)
2000 return 0; 2000 return 0;
2001} 2001}
2002 2002
2003static void 2003void
2004xd3_init_cache (xd3_addr_cache* acache) 2004xd3_init_cache (xd3_addr_cache* acache)
2005{ 2005{
2006 if (acache->s_near > 0) 2006 if (acache->s_near > 0)
@@ -2746,9 +2746,10 @@ xd3_iopt_free_nonadd (xd3_stream *stream, xd3_rinst *i)
2746 } 2746 }
2747} 2747}
2748 2748
2749/* When an instruction is ready to flush from the iopt buffer, this function is called to 2749/* When an instruction is ready to flush from the iopt buffer, this
2750 * produce an encoding. It writes the instruction plus size, address, and data to the 2750 * function is called to produce an encoding. It writes the
2751 * various encoding sections. */ 2751 * instruction plus size, address, and data to the various encoding
2752 * sections. */
2752static int 2753static int
2753xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst) 2754xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
2754{ 2755{
@@ -2767,9 +2768,10 @@ xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
2767 2768
2768 if (src != NULL) 2769 if (src != NULL)
2769 { 2770 {
2770 /* If there is a source copy, the source must have its source window decided 2771 /* If there is a source copy, the source must have its
2771 * before we can encode. This can be bad -- we have to make this decision 2772 * source window decided before we can encode. This can
2772 * even if no source matches have been found. */ 2773 * be bad -- we have to make this decision even if no
2774 * source matches have been found. */
2773 if (stream->srcwin_decided == 0) 2775 if (stream->srcwin_decided == 0)
2774 { 2776 {
2775 if ((ret = xd3_srcwin_setup (stream))) { return ret; } 2777 if ((ret = xd3_srcwin_setup (stream))) { return ret; }
@@ -2881,8 +2883,9 @@ xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
2881 return 0; 2883 return 0;
2882} 2884}
2883 2885
2884/* This possibly encodes an add instruction, iadd, which must remain on the stack until 2886/* This possibly encodes an add instruction, iadd, which must remain
2885 * the following call to xd3_iopt_finish_encoding. */ 2887 * on the stack until the following call to
2888 * xd3_iopt_finish_encoding. */
2886static int 2889static int
2887xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd) 2890xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd)
2888{ 2891{
@@ -2901,8 +2904,9 @@ xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd)
2901 return 0; 2904 return 0;
2902} 2905}
2903 2906
2904/* This function calls xd3_iopt_finish_encoding to finish encoding an instruction, and it 2907/* This function calls xd3_iopt_finish_encoding to finish encoding an
2905 * may also produce an add instruction for an unmatched region. */ 2908 * instruction, and it may also produce an add instruction for an
2909 * unmatched region. */
2906static int 2910static int
2907xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst) 2911xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst)
2908{ 2912{
@@ -2936,9 +2940,10 @@ xd3_iopt_add_finalize (xd3_stream *stream)
2936 return 0; 2940 return 0;
2937} 2941}
2938 2942
2939/* Compact the instruction buffer by choosing the best non-overlapping instructions when 2943/* Compact the instruction buffer by choosing the best non-overlapping
2940 * lazy string-matching. There are no ADDs in the iopt buffer because those are 2944 * instructions when lazy string-matching. There are no ADDs in the
2941 * synthesized in xd3_iopt_add_encoding and during xd3_iopt_add_finalize. */ 2945 * iopt buffer because those are synthesized in xd3_iopt_add_encoding
2946 * and during xd3_iopt_add_finalize. */
2942static int 2947static int
2943xd3_iopt_flush_instructions (xd3_stream *stream, int force) 2948xd3_iopt_flush_instructions (xd3_stream *stream, int force)
2944{ 2949{
@@ -2955,9 +2960,9 @@ xd3_iopt_flush_instructions (xd3_stream *stream, int force)
2955 2960
2956 XD3_ASSERT (xd3_iopt_check (stream)); 2961 XD3_ASSERT (xd3_iopt_check (stream));
2957 2962
2958 /* Note: once tried to skip this step if it's possible to assert there are no 2963 /* Note: once tried to skip this step if it's possible to assert
2959 * overlapping instructions. Doesn't work because xd3_opt_erase leaves overlapping 2964 * there are no overlapping instructions. Doesn't work because
2960 * instructions. */ 2965 * xd3_opt_erase leaves overlapping instructions. */
2961 while (! xd3_rlist_end (& stream->iopt_used, r1) && 2966 while (! xd3_rlist_end (& stream->iopt_used, r1) &&
2962 ! xd3_rlist_end (& stream->iopt_used, r2 = xd3_rlist_next (r1))) 2967 ! xd3_rlist_end (& stream->iopt_used, r2 = xd3_rlist_next (r1)))
2963 { 2968 {
@@ -2995,8 +3000,9 @@ xd3_iopt_flush_instructions (xd3_stream *stream, int force)
2995 r2moff = r2end - r1end; 3000 r2moff = r2end - r1end;
2996 gap = r2end - r1->pos; 3001 gap = r2end - r1->pos;
2997 3002
2998 /* If the two matches overlap almost entirely, choose the better match and discard 3003 /* If the two matches overlap almost entirely, choose the better
2999 * the other. This heuristic is BLACK MAGIC. Havesomething better? */ 3004 * match and discard the other. This heuristic is BLACK MAGIC.
3005 * Havesomething better? */
3000 if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2) 3006 if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2)
3001 { 3007 {
3002 /* Only one match should be used, choose the longer one. */ 3008 /* Only one match should be used, choose the longer one. */
@@ -3014,16 +3020,16 @@ xd3_iopt_flush_instructions (xd3_stream *stream, int force)
3014 } 3020 }
3015 else 3021 else
3016 { 3022 {
3017 /* Shorten one of the instructions -- could be optimized based on the address 3023 /* Shorten one of the instructions -- could be optimized
3018 * cache. */ 3024 * based on the address cache. */
3019 usize_t average; 3025 usize_t average;
3020 usize_t newsize; 3026 usize_t newsize;
3021 usize_t adjust1; 3027 usize_t adjust1;
3022 3028
3023 XD3_ASSERT (r1end > r2->pos && r2end > r1->pos); 3029 XD3_ASSERT (r1end > r2->pos && r2end > r1->pos);
3024 3030
3025 /* Try to balance the length of both instructions, but avoid making both longer 3031 /* Try to balance the length of both instructions, but avoid
3026 * than MAX_MATCH_SPLIT . */ 3032 * making both longer than MAX_MATCH_SPLIT . */
3027 average = (gap) / 2; 3033 average = (gap) / 2;
3028 newsize = min (MAX_MATCH_SPLIT, gap - average); 3034 newsize = min (MAX_MATCH_SPLIT, gap - average);
3029 3035
@@ -3081,8 +3087,8 @@ xd3_iopt_flush_instructions (xd3_stream *stream, int force)
3081 3087
3082 XD3_ASSERT (xd3_iopt_check (stream)); 3088 XD3_ASSERT (xd3_iopt_check (stream));
3083 3089
3084 /* If forcing, pick instructions until the list is empty, otherwise this empties 50% of 3090 /* If forcing, pick instructions until the list is empty, otherwise
3085 * the queue. */ 3091 * this empties 50% of the queue. */
3086 for (flushed = 0; ! xd3_rlist_empty (& stream->iopt_used); ) 3092 for (flushed = 0; ! xd3_rlist_empty (& stream->iopt_used); )
3087 { 3093 {
3088 xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt_used); 3094 xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt_used);
@@ -3098,9 +3104,9 @@ xd3_iopt_flush_instructions (xd3_stream *stream, int force)
3098 break; 3104 break;
3099 } 3105 }
3100 3106
3101 /* If there are only two instructions remaining, break, because they were 3107 /* If there are only two instructions remaining, break,
3102 * not optimized. This means there were more than 50% eliminated by the 3108 * because they were not optimized. This means there were
3103 * loop above. */ 3109 * more than 50% eliminated by the loop above. */
3104 r1 = xd3_rlist_front (& stream->iopt_used); 3110 r1 = xd3_rlist_front (& stream->iopt_used);
3105 if (xd3_rlist_end(& stream->iopt_used, r1) || 3111 if (xd3_rlist_end(& stream->iopt_used, r1) ||
3106 xd3_rlist_end(& stream->iopt_used, r2 = xd3_rlist_next (r1)) || 3112 xd3_rlist_end(& stream->iopt_used, r2 = xd3_rlist_next (r1)) ||
@@ -3156,10 +3162,11 @@ xd3_iopt_get_slot (xd3_stream *stream, xd3_rinst** iptr)
3156 return 0; 3162 return 0;
3157} 3163}
3158 3164
3159/* A copy is about to be emitted that extends backwards to POS, therefore it may 3165/* A copy is about to be emitted that extends backwards to POS,
3160 * completely cover some existing instructions in the buffer. If an instruction is 3166 * therefore it may completely cover some existing instructions in the
3161 * completely covered by this new match, erase it. If the new instruction is covered by 3167 * buffer. If an instruction is completely covered by this new match,
3162 * the previous one, return 1 to skip it. */ 3168 * erase it. If the new instruction is covered by the previous one,
3169 * return 1 to skip it. */
3163static void 3170static void
3164xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size) 3171xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size)
3165{ 3172{
@@ -3167,21 +3174,22 @@ xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size)
3167 { 3174 {
3168 xd3_rinst *r = xd3_rlist_back (& stream->iopt_used); 3175 xd3_rinst *r = xd3_rlist_back (& stream->iopt_used);
3169 3176
3170 /* Verify that greedy is working. The previous instruction should end before the 3177 /* Verify that greedy is working. The previous instruction
3171 * new one begins. */ 3178 * should end before the new one begins. */
3172 XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos)); 3179 XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos));
3173 /* Verify that min_match is working. The previous instruction should end before the 3180 /* Verify that min_match is working. The previous instruction
3174 * new one ends. */ 3181 * should end before the new one ends. */
3175 XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size)); 3182 XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size));
3176 3183
3177 /* See if the last instruction starts before the new instruction. If so, there is 3184 /* See if the last instruction starts before the new
3178 * nothing to erase. */ 3185 * instruction. If so, there is nothing to erase. */
3179 if (r->pos < pos) 3186 if (r->pos < pos)
3180 { 3187 {
3181 return; 3188 return;
3182 } 3189 }
3183 3190
3184 /* Otherwise, the new instruction covers the old one, delete it and repeat. */ 3191 /* Otherwise, the new instruction covers the old one, delete it
3192 and repeat. */
3185 xd3_rlist_remove (r); 3193 xd3_rlist_remove (r);
3186 xd3_rlist_push_back (& stream->iopt_free, r); 3194 xd3_rlist_push_back (& stream->iopt_free, r);
3187 --stream->i_slots_used; 3195 --stream->i_slots_used;
@@ -3204,9 +3212,9 @@ xd3_iopt_last_matched (xd3_stream *stream)
3204 return r->pos + r->size; 3212 return r->pos + r->size;
3205} 3213}
3206 3214
3207/****************************************************************************************** 3215/*********************************************************
3208 Emit routines 3216 Emit routines
3209 ******************************************************************************************/ 3217 ***********************************************************/
3210 3218
3211static int 3219static int
3212xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code) 3220xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code)
@@ -3273,10 +3281,11 @@ xd3_emit_run (xd3_stream *stream, usize_t pos, usize_t size, uint8_t run_c)
3273 return 0; 3281 return 0;
3274} 3282}
3275 3283
3276/* This enters a potential copy instruction into the iopt buffer. The position argument 3284/* This enters a potential copy instruction into the iopt buffer. The
3277 * is relative to the target window.. */ 3285 * position argument is relative to the target window.. */
3278static inline int 3286int
3279xd3_found_match (xd3_stream *stream, usize_t pos, usize_t size, xoff_t addr, int is_source) 3287xd3_found_match (xd3_stream *stream, usize_t pos,
3288 usize_t size, xoff_t addr, int is_source)
3280{ 3289{
3281 xd3_rinst* ri; 3290 xd3_rinst* ri;
3282 int ret; 3291 int ret;
@@ -3557,15 +3566,15 @@ xd3_encode_init_buffers (xd3_stream *stream)
3557} 3566}
3558 3567
3559/* This function allocates all memory initially used by the encoder. */ 3568/* This function allocates all memory initially used by the encoder. */
3560static int 3569int
3561xd3_encode_init (xd3_stream *stream) 3570xd3_encode_init (xd3_stream *stream)
3562{ 3571{
3563 int large_comp = (stream->src != NULL); 3572 int large_comp = (stream->src != NULL);
3564 int small_comp = ! (stream->flags & XD3_NOCOMPRESS); 3573 int small_comp = ! (stream->flags & XD3_NOCOMPRESS);
3565 3574
3566 /* Memory allocations for checksum tables are delayed until xd3_string_match_init in the 3575 /* Memory allocations for checksum tables are delayed until
3567 * first call to string_match--that way identical or short inputs require no table 3576 * xd3_string_match_init in the first call to string_match--that way
3568 * allocation. */ 3577 * identical or short inputs require no table allocation. */
3569 if (large_comp) 3578 if (large_comp)
3570 { 3579 {
3571 usize_t hash_values = (stream->srcwin_maxsz / stream->smatcher.large_step); 3580 usize_t hash_values = (stream->srcwin_maxsz / stream->smatcher.large_step);
@@ -3608,9 +3617,9 @@ xd3_encode_init (xd3_stream *stream)
3608 return ENOMEM; 3617 return ENOMEM;
3609} 3618}
3610 3619
3611/* Called after the ENC_POSTOUT state, this puts the output buffers back into separate 3620/* Called after the ENC_POSTOUT state, this puts the output buffers
3612 * lists and re-initializes some variables. (The output lists were spliced together 3621 * back into separate lists and re-initializes some variables. (The
3613 * during the ENC_FLUSH state.) */ 3622 * output lists were spliced together during the ENC_FLUSH state.) */
3614static void 3623static void
3615xd3_encode_reset (xd3_stream *stream) 3624xd3_encode_reset (xd3_stream *stream)
3616{ 3625{
@@ -3674,21 +3683,24 @@ xd3_encode_input (xd3_stream *stream)
3674 3683
3675 case ENC_INPUT: 3684 case ENC_INPUT:
3676 3685
3677 /* If there is no input yet, just return. This checks for next_in == NULL, not 3686 /* If there is no input yet, just return. This checks for
3678 * avail_in == 0 since zero bytes is a valid input. There is an assertion in 3687 * next_in == NULL, not avail_in == 0 since zero bytes is a
3679 * xd3_avail_input() that next_in != NULL for this reason. By returning right away 3688 * valid input. There is an assertion in xd3_avail_input() that
3680 * we avoid creating an input buffer before the caller has supplied its first data. 3689 * next_in != NULL for this reason. By returning right away we
3681 * It is possible for xd3_avail_input to be called both before and after the first 3690 * avoid creating an input buffer before the caller has supplied
3682 * call to xd3_encode_input(). */ 3691 * its first data. It is possible for xd3_avail_input to be
3692 * called both before and after the first call to
3693 * xd3_encode_input(). */
3683 if (stream->next_in == NULL) 3694 if (stream->next_in == NULL)
3684 { 3695 {
3685 return XD3_INPUT; 3696 return XD3_INPUT;
3686 } 3697 }
3687 3698
3688 enc_flush: 3699 enc_flush:
3689 /* See if we should buffer the input: either if there is already a leftover buffer, 3700 /* See if we should buffer the input: either if there is already
3690 * or if the input is short of winsize without flush. The label at this point is 3701 * a leftover buffer, or if the input is short of winsize
3691 * reached by a goto below, when there is leftover input after postout. */ 3702 * without flush. The label at this point is reached by a goto
3703 * below, when there is leftover input after postout. */
3692 if ((stream->buf_leftover != NULL) || 3704 if ((stream->buf_leftover != NULL) ||
3693 (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH))) 3705 (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH)))
3694 { 3706 {
@@ -3717,14 +3729,17 @@ xd3_encode_input (xd3_stream *stream)
3717 switch (stream->match_state) 3729 switch (stream->match_state)
3718 { 3730 {
3719 case MATCH_TARGET: 3731 case MATCH_TARGET:
3720 /* Try matching forward at the start of the target. This is entered the 3732 /* Try matching forward at the start of the target.
3721 * first time through, to check for a perfect match, and whenever there is a 3733 * This is entered the first time through, to check for
3722 * source match that extends to the end of the previous window. The 3734 * a perfect match, and whenever there is a source match
3723 * match_srcpos field is initially zero and later set during 3735 * that extends to the end of the previous window. The
3724 * xd3_source_extend_match. */ 3736 * match_srcpos field is initially zero and later set
3737 * during xd3_source_extend_match. */
3738
3725 if (stream->avail_in > 0) 3739 if (stream->avail_in > 0)
3726 { 3740 {
3727 /* This call can't fail because the source window is unrestricted. */ 3741 /* This call can't fail because the source window is
3742 unrestricted. */
3728 ret = xd3_source_match_setup (stream, stream->match_srcpos); 3743 ret = xd3_source_match_setup (stream, stream->match_srcpos);
3729 XD3_ASSERT (ret == 0); 3744 XD3_ASSERT (ret == 0);
3730 stream->match_state = MATCH_FORWARD; 3745 stream->match_state = MATCH_FORWARD;
@@ -3748,9 +3763,10 @@ xd3_encode_input (xd3_stream *stream)
3748 } 3763 }
3749 3764
3750 case MATCH_SEARCHING: 3765 case MATCH_SEARCHING:
3751 /* Continue string matching. (It's possible that the initial match 3766 /* Continue string matching. (It's possible that the
3752 * continued through the entire input, in which case we're still in 3767 * initial match continued through the entire input, in
3753 * MATCH_FORWARD and should remain so for the next input window.) */ 3768 * which case we're still in MATCH_FORWARD and should
3769 * remain so for the next input window.) */
3754 break; 3770 break;
3755 } 3771 }
3756 } 3772 }
@@ -3762,8 +3778,14 @@ xd3_encode_input (xd3_stream *stream)
3762 return ret; 3778 return ret;
3763 } 3779 }
3764 3780
3765 /* Flush the instrution buffer, then possibly add one more instruction, then emit 3781 stream->enc_state = ENC_INSTR;
3766 * the header. */ 3782
3783 case ENC_INSTR:
3784 /* Note: Jump here to encode VCDIFF deltas w/o using this
3785 * string-matching code. */
3786
3787 /* Flush the instrution buffer, then possibly add one more
3788 * instruction, then emit the header. */
3767 if ((ret = xd3_iopt_flush_instructions (stream, 1)) || 3789 if ((ret = xd3_iopt_flush_instructions (stream, 1)) ||
3768 (ret = xd3_iopt_add_finalize (stream))) 3790 (ret = xd3_iopt_add_finalize (stream)))
3769 { 3791 {
@@ -3773,7 +3795,8 @@ xd3_encode_input (xd3_stream *stream)
3773 stream->enc_state = ENC_FLUSH; 3795 stream->enc_state = ENC_FLUSH;
3774 3796
3775 case ENC_FLUSH: 3797 case ENC_FLUSH:
3776 /* Note: main_recode_func() bypasses string-matching by setting ENC_FLUSH. */ 3798 /* Note: main_recode_func() bypasses string-matching by setting
3799 * ENC_FLUSH. */
3777 if ((ret = xd3_emit_hdr (stream))) 3800 if ((ret = xd3_emit_hdr (stream)))
3778 { 3801 {
3779 return ret; 3802 return ret;
@@ -3782,9 +3805,9 @@ xd3_encode_input (xd3_stream *stream)
3782 /* Begin output. */ 3805 /* Begin output. */
3783 stream->enc_current = HDR_HEAD (stream); 3806 stream->enc_current = HDR_HEAD (stream);
3784 3807
3785 /* Chain all the outputs together. After doing this, it looks as if there is only 3808 /* Chain all the outputs together. After doing this, it looks
3786 * one section. The other enc_heads are set to NULL to avoid freeing them more than 3809 * as if there is only one section. The other enc_heads are set
3787 * once. */ 3810 * to NULL to avoid freeing them more than once. */
3788 for (i = 1; i < ENC_SECTS; i += 1) 3811 for (i = 1; i < ENC_SECTS; i += 1)
3789 { 3812 {
3790 stream->enc_tails[i-1]->next_page = stream->enc_heads[i]; 3813 stream->enc_tails[i-1]->next_page = stream->enc_heads[i];
@@ -3798,9 +3821,9 @@ xd3_encode_input (xd3_stream *stream)
3798 stream->avail_out = stream->enc_current->next; 3821 stream->avail_out = stream->enc_current->next;
3799 stream->total_out += (xoff_t) stream->avail_out; 3822 stream->total_out += (xoff_t) stream->avail_out;
3800 3823
3801 /* If there is any output in this buffer, return it, otherwise fall through to 3824 /* If there is any output in this buffer, return it, otherwise
3802 * handle the next buffer or finish the window after all buffers have been 3825 * fall through to handle the next buffer or finish the window
3803 * output. */ 3826 * after all buffers have been output. */
3804 if (stream->avail_out > 0) 3827 if (stream->avail_out > 0)
3805 { 3828 {
3806 /* This is the only place xd3_encode returns XD3_OUTPUT */ 3829 /* This is the only place xd3_encode returns XD3_OUTPUT */
@@ -3849,9 +3872,9 @@ xd3_encode_input (xd3_stream *stream)
3849} 3872}
3850#endif /* XD3_ENCODER */ 3873#endif /* XD3_ENCODER */
3851 3874
3852/****************************************************************************************** 3875/*****************************************************************
3853 Client convenience functions 3876 Client convenience functions
3854 ******************************************************************************************/ 3877 ******************************************************************/
3855 3878
3856static int 3879static int
3857xd3_process_stream (int is_encode, 3880xd3_process_stream (int is_encode,
@@ -4067,13 +4090,14 @@ xd3_encode_memory (const uint8_t *input,
4067#endif 4090#endif
4068 4091
4069 4092
4070/****************************************************************************************** 4093/*************************************************************
4071 String matching helpers 4094 String matching helpers
4072 ******************************************************************************************/ 4095 *************************************************************/
4073 4096
4074#if XD3_ENCODER 4097#if XD3_ENCODER
4075/* Do the initial xd3_string_match() checksum table setup. Allocations are delayed until 4098/* Do the initial xd3_string_match() checksum table setup.
4076 * first use to avoid allocation sometimes (e.g., perfect matches, zero-length inputs). */ 4099 * Allocations are delayed until first use to avoid allocation
4100 * sometimes (e.g., perfect matches, zero-length inputs). */
4077static int 4101static int
4078xd3_string_match_init (xd3_stream *stream) 4102xd3_string_match_init (xd3_stream *stream)
4079{ 4103{
@@ -4160,9 +4184,9 @@ xd3_source_cksum_offset(xd3_stream *stream, usize_t low)
4160} 4184}
4161#endif 4185#endif
4162 4186
4163/* This function sets up the stream->src fields srcbase, srclen. The call is delayed 4187/* This function sets up the stream->src fields srcbase, srclen. The
4164 * until these values are needed to encode a copy address. At this point the decision has 4188 * call is delayed until these values are needed to encode a copy
4165 * to be made. */ 4189 * address. At this point the decision has to be made. */
4166static int 4190static int
4167xd3_srcwin_setup (xd3_stream *stream) 4191xd3_srcwin_setup (xd3_stream *stream)
4168{ 4192{
@@ -4175,17 +4199,19 @@ xd3_srcwin_setup (xd3_stream *stream)
4175 /* Avoid repeating this call. */ 4199 /* Avoid repeating this call. */
4176 stream->srcwin_decided = 1; 4200 stream->srcwin_decided = 1;
4177 4201
4178 /* If the stream is flushing, then the iopt buffer was able to contain the complete 4202 /* If the stream is flushing, then the iopt buffer was able to
4179 * encoding. If no copies were issued no source window is actually needed. This 4203 * contain the complete encoding. If no copies were issued no
4180 * prevents the VCDIFF header from including source base/len. xd3_emit_hdr checks 4204 * source window is actually needed. This prevents the VCDIFF
4181 * for srclen == 0. */ 4205 * header from including source base/len. xd3_emit_hdr checks for
4182 if (stream->enc_state == ENC_FLUSH && stream->match_maxaddr == 0) 4206 * srclen == 0. */
4207 if (stream->enc_state == ENC_INSTR && stream->match_maxaddr == 0)
4183 { 4208 {
4184 goto done; 4209 goto done;
4185 } 4210 }
4186 4211
4187 /* Check for overflow, srclen is usize_t - this can't happen unless XD3_DEFAULT_SRCBACK 4212 /* Check for overflow, srclen is usize_t - this can't happen unless
4188 * and related parameters are extreme - should use smaller windows. */ 4213 * XD3_DEFAULT_SRCBACK and related parameters are extreme - should
4214 * use smaller windows. */
4189 length = stream->match_maxaddr - stream->match_minaddr; 4215 length = stream->match_maxaddr - stream->match_minaddr;
4190 4216
4191 xoff_t x = (xoff_t) USIZE_T_MAX; 4217 xoff_t x = (xoff_t) USIZE_T_MAX;
@@ -4195,9 +4221,9 @@ xd3_srcwin_setup (xd3_stream *stream)
4195 return XD3_INTERNAL; 4221 return XD3_INTERNAL;
4196 } 4222 }
4197 4223
4198 /* If ENC_FLUSH, then we know the exact source window to use because no more copies can 4224 /* If ENC_INSTR, then we know the exact source window to use because
4199 * be issued. */ 4225 * no more copies can be issued. */
4200 if (stream->enc_state == ENC_FLUSH) 4226 if (stream->enc_state == ENC_INSTR)
4201 { 4227 {
4202 src->srcbase = stream->match_minaddr; 4228 src->srcbase = stream->match_minaddr;
4203 src->srclen = (usize_t) length; 4229 src->srclen = (usize_t) length;
@@ -4205,8 +4231,9 @@ xd3_srcwin_setup (xd3_stream *stream)
4205 goto done; 4231 goto done;
4206 } 4232 }
4207 4233
4208 /* Otherwise, we have to make a guess. More copies may still be issued, but we have to 4234 /* Otherwise, we have to make a guess. More copies may still be
4209 * decide the source window base and length now. */ 4235 * issued, but we have to decide the source window base and length
4236 * now. */
4210 src->srcbase = stream->match_minaddr; 4237 src->srcbase = stream->match_minaddr;
4211 src->srclen = max ((usize_t) length, stream->avail_in + (stream->avail_in >> 2)); 4238 src->srclen = max ((usize_t) length, stream->avail_in + (stream->avail_in >> 2));
4212 if (src->size < src->srcbase + (xoff_t) src->srclen) 4239 if (src->size < src->srcbase + (xoff_t) src->srclen)
@@ -4217,16 +4244,18 @@ xd3_srcwin_setup (xd3_stream *stream)
4217 4244
4218 XD3_ASSERT (src->srclen); 4245 XD3_ASSERT (src->srclen);
4219 done: 4246 done:
4220 /* Set the taroff. This convenience variable is used even when stream->src == NULL. */ 4247 /* Set the taroff. This convenience variable is used even when
4248 stream->src == NULL. */
4221 stream->taroff = src->srclen; 4249 stream->taroff = src->srclen;
4222 return 0; 4250 return 0;
4223} 4251}
4224 4252
4225/* Sets the bounding region for a newly discovered source match, prior to calling 4253/* Sets the bounding region for a newly discovered source match, prior
4226 * xd3_source_extend_match(). This sets the match_maxfwd, match_maxback variables. Note: 4254 * to calling xd3_source_extend_match(). This sets the match_maxfwd,
4227 * srcpos is an absolute position (xoff_t) but the match_maxfwd, match_maxback variables 4255 * match_maxback variables. Note: srcpos is an absolute position
4228 * are usize_t. Returns 0 if the setup succeeds, or 1 if the source position lies outside 4256 * (xoff_t) but the match_maxfwd, match_maxback variables are usize_t.
4229 * an already-decided srcbase/srclen window. */ 4257 * Returns 0 if the setup succeeds, or 1 if the source position lies
4258 * outside an already-decided srcbase/srclen window. */
4230static int 4259static int
4231xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos) 4260xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
4232{ 4261{
@@ -4238,19 +4267,22 @@ xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
4238 stream->match_back = 0; 4267 stream->match_back = 0;
4239 stream->match_fwd = 0; 4268 stream->match_fwd = 0;
4240 4269
4241 /* Going backwards, the 1.5-pass algorithm allows some already-matched input may be 4270 /* Going backwards, the 1.5-pass algorithm allows some
4242 * covered by a longer source match. The greedy algorithm does not allow this. */ 4271 * already-matched input may be covered by a longer source match.
4272 * The greedy algorithm does not allow this. */
4243 if (stream->flags & XD3_BEGREEDY) 4273 if (stream->flags & XD3_BEGREEDY)
4244 { 4274 {
4245 /* The greedy algorithm allows backward matching to the last matched position. */ 4275 /* The greedy algorithm allows backward matching to the last
4276 matched position. */
4246 greedy_or_not = xd3_iopt_last_matched (stream); 4277 greedy_or_not = xd3_iopt_last_matched (stream);
4247 } 4278 }
4248 else 4279 else
4249 { 4280 {
4250 /* The 1.5-pass algorithm allows backward matching to go back as far as the 4281 /* The 1.5-pass algorithm allows backward matching to go back as
4251 * unencoded offset, which is updated as instructions pass out of the iopt buffer. 4282 * far as the unencoded offset, which is updated as instructions
4252 * If this (default) is chosen, it means xd3_iopt_erase may be called to eliminate 4283 * pass out of the iopt buffer. If this (default) is chosen, it
4253 * instructions when a covering source match is found. */ 4284 * means xd3_iopt_erase may be called to eliminate instructions
4285 * when a covering source match is found. */
4254 greedy_or_not = stream->unencoded_offset; 4286 greedy_or_not = stream->unencoded_offset;
4255 } 4287 }
4256 4288
@@ -4264,13 +4296,14 @@ xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
4264 XD3_ASSERT (stream->avail_in > stream->input_position); 4296 XD3_ASSERT (stream->avail_in > stream->input_position);
4265 stream->match_maxfwd = stream->avail_in - stream->input_position; 4297 stream->match_maxfwd = stream->avail_in - stream->input_position;
4266 4298
4267 /* Now we take the source position into account. It depends whether the srclen/srcbase 4299 /* Now we take the source position into account. It depends whether
4268 * have been decided yet. */ 4300 * the srclen/srcbase have been decided yet. */
4269 if (stream->srcwin_decided == 0) 4301 if (stream->srcwin_decided == 0)
4270 { 4302 {
4271 /* Unrestricted case: the match can cover the entire source, 0--src->size. We 4303 /* Unrestricted case: the match can cover the entire source,
4272 * compare the usize_t match_maxfwd/match_maxback against the xoff_t src->size/srcpos values 4304 * 0--src->size. We compare the usize_t
4273 * and take the min. */ 4305 * match_maxfwd/match_maxback against the xoff_t
4306 * src->size/srcpos values and take the min. */
4274 xoff_t srcavail; 4307 xoff_t srcavail;
4275 4308
4276 if (srcpos < (xoff_t) stream->match_maxback) 4309 if (srcpos < (xoff_t) stream->match_maxback)
@@ -4323,23 +4356,29 @@ xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
4323 return 1; 4356 return 1;
4324} 4357}
4325 4358
4326/* This function expands the source match backward and forward. It is reentrant, since 4359/* This function expands the source match backward and forward. It is
4327 * xd3_getblk may return XD3_GETSRCBLK, so most variables are kept in xd3_stream. There 4360 * reentrant, since xd3_getblk may return XD3_GETSRCBLK, so most
4328 * are two callers of this function, the string_matching routine when a checksum match is 4361 * variables are kept in xd3_stream. There are two callers of this
4329 * discovered, and xd3_encode_input whenever a continuing (or initial) match is suspected. 4362 * function, the string_matching routine when a checksum match is
4330 * The two callers do different things with the input_position, thus this function leaves 4363 * discovered, and xd3_encode_input whenever a continuing (or initial)
4331 * that variable untouched. If a match is taken the resulting stream->match_fwd is left 4364 * match is suspected. The two callers do different things with the
4365 * input_position, thus this function leaves that variable untouched.
4366 * If a match is taken the resulting stream->match_fwd is left
4332 * non-zero. */ 4367 * non-zero. */
4333static int 4368static int
4334xd3_source_extend_match (xd3_stream *stream) 4369xd3_source_extend_match (xd3_stream *stream)
4335{ 4370{
4336 int ret; 4371 int ret;
4337 xd3_source *src = stream->src; 4372 xd3_source *src = stream->src;
4338 xoff_t matchoff; /* matchoff is the current right/left-boundary of the source match being tested. */ 4373 xoff_t matchoff; /* matchoff is the current right/left-boundary of
4339 usize_t streamoff; /* streamoff is the current right/left-boundary of the input match being tested. */ 4374 the source match being tested. */
4340 xoff_t tryblk; /* tryblk, tryoff are the block, offset position of matchoff */ 4375 usize_t streamoff; /* streamoff is the current right/left-boundary
4376 of the input match being tested. */
4377 xoff_t tryblk; /* tryblk, tryoff are the block, offset position
4378 of matchoff */
4341 usize_t tryoff; 4379 usize_t tryoff;
4342 usize_t tryrem; /* tryrem is the number of matchable bytes on the source block */ 4380 usize_t tryrem; /* tryrem is the number of matchable bytes on the
4381 source block */
4343 4382
4344 XD3_ASSERT (src != NULL); 4383 XD3_ASSERT (src != NULL);
4345 4384
@@ -4376,7 +4415,8 @@ xd3_source_extend_match (xd3_stream *stream)
4376 } 4415 }
4377 4416
4378 /* OPT: This code can be optimized. */ 4417 /* OPT: This code can be optimized. */
4379 for (tryrem = min (tryoff, stream->match_maxback - stream->match_back); 4418 for (tryrem = min (tryoff, stream->match_maxback -
4419 stream->match_back);
4380 tryrem != 0; 4420 tryrem != 0;
4381 tryrem -= 1, stream->match_back += 1) 4421 tryrem -= 1, stream->match_back += 1)
4382 { 4422 {
diff --git a/xdelta3/xdelta3.h b/xdelta3/xdelta3.h
index 19f316d..ce90cd7 100644
--- a/xdelta3/xdelta3.h
+++ b/xdelta3/xdelta3.h
@@ -423,10 +423,11 @@ typedef enum {
423 ENC_INIT = 0, /* xd3_encode_input has never been called. */ 423 ENC_INIT = 0, /* xd3_encode_input has never been called. */
424 ENC_INPUT = 1, /* waiting for xd3_avail_input () to be called. */ 424 ENC_INPUT = 1, /* waiting for xd3_avail_input () to be called. */
425 ENC_SEARCH = 2, /* currently searching for matches. */ 425 ENC_SEARCH = 2, /* currently searching for matches. */
426 ENC_FLUSH = 3, /* currently emitting output. */ 426 ENC_INSTR = 3, /* currently formatting output. */
427 ENC_POSTOUT = 4, /* after an output section. */ 427 ENC_FLUSH = 4, /* currently emitting output. */
428 ENC_POSTWIN = 5, /* after all output sections. */ 428 ENC_POSTOUT = 5, /* after an output section. */
429 ENC_ABORTED = 6, /* abort. */ 429 ENC_POSTWIN = 6, /* after all output sections. */
430 ENC_ABORTED = 7, /* abort. */
430} xd3_encode_state; 431} xd3_encode_state;
431 432
432/* The xd3_decode_input state machine steps through these states in 433/* The xd3_decode_input state machine steps through these states in
@@ -1023,6 +1024,22 @@ int xd3_get_appheader (xd3_stream *stream,
1023 * data. */ 1024 * data. */
1024int xd3_decoder_needs_source (xd3_stream *stream); 1025int xd3_decoder_needs_source (xd3_stream *stream);
1025 1026
1027
1028/* To generate a VCDIFF encoded delta with xd3_encode_init() from
1029 * another format, use:
1030 *
1031 * xd3_encode_init() -- initialze encoder state
1032 * xd3_init_cache() -- reset VCDIFF address cache
1033 * xd3_found_match() -- to report a copy instruction
1034 *
1035 * set stream->enc_state to ENC_INSTR and call xd3_encode_input as usual.
1036 */
1037int xd3_encode_init (xd3_stream *stream);
1038void xd3_init_cache (xd3_addr_cache* acache);
1039int xd3_found_match (xd3_stream *stream,
1040 usize_t pos, usize_t size,
1041 xoff_t addr, int is_source);
1042
1026/* Gives an error string for xdelta3-speficic errors, returns NULL for 1043/* Gives an error string for xdelta3-speficic errors, returns NULL for
1027 system errors */ 1044 system errors */
1028const char* xd3_strerror (int ret); 1045const char* xd3_strerror (int ret);