summaryrefslogtreecommitdiff
path: root/xdelta3/xdelta3.c
diff options
context:
space:
mode:
Diffstat (limited to 'xdelta3/xdelta3.c')
-rwxr-xr-xxdelta3/xdelta3.c6022
1 files changed, 6022 insertions, 0 deletions
diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c
new file mode 100755
index 0000000..fb9a09f
--- /dev/null
+++ b/xdelta3/xdelta3.c
@@ -0,0 +1,6022 @@
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2001, 2003, 2004, 2005, 2006. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
18 -------------------------------------------------------------------
19
20 Xdelta 3
21
22 The goal of this library is to to implement both the (stand-alone)
23 data-compression and delta-compression aspects of VCDIFF encoding, and
24 to support a programming interface that works like Zlib
25 (http://www.gzip.org/zlib.html). See RFC3284: The VCDIFF Generic
26 Differencing and Compression Data Format.
27
28 VCDIFF is a unified encoding that combines data-compression and
29 delta-encoding ("differencing").
30
31 VCDIFF has a detailed byte-code instruction set with many features.
32 The instruction format supports an immediate size operand for small
33 COPYs and ADDs (e.g., under 18 bytes). There are also instruction
34 "modes", which are used to compress COPY addresses by using two
35 address caches. An instruction mode refers to slots in the NEAR
36 and SAME caches for recent addresses. NEAR remembers the
37 previous 4 (by default) COPY addresses, and SAME catches
38 frequent re-uses of the same address using a 3-way (by default)
39 256-entry associative cache of [ADDR mod 256], the encoded byte.
40 A hit in the NEAR/SAME cache requires 0/1 ADDR bytes.
41
42 VCDIFF has a default instruction table, but an alternate
43 instruction tables may themselves be be delta-compressed and
44 included in the encoding header. This allows even more freedom.
45 There are 9 instruction modes in the default code table, 4 near, 3
46 same, VCD_SELF (absolute encoding) and VCD_HERE (relative to the
47 current position).
48
49 ----------------------------------------------------------------------
50
51 Algorithms
52
53 Aside from the details of encoding and decoding, there are a bunch
54 of algorithms needed.
55
56 1. STRING-MATCH. A two-level fingerprinting approach is used. A
57 single loop computes the two checksums -- small and large -- at
58 successive offsets in the TARGET file. The large checksum is more
59 accurate and is used to discover SOURCE matches, which are
60 potentially very long. The small checksum is used to discover
61 copies within the TARGET. Small matching, which is more expensive,
62 usually dominates the large STRING-MATCH costs in this code - the
63 more exhaustive the search, the better the results. Either of the
64 two string-matching mechanisms may be disabled. Currently, large
65 checksums are only performed in the source file, if present, and
66 small checksums are performed only in the left-over target input.
67 However, small matches are possible in the source file too, with a
68 range of possibilities. [I've seen a paper on this subject, but
69 I lost it.]
70
71 2. INSTRUCTION SELECTION. The IOPT buffer here represents a queue
72 used to store overlapping copy instructions. There are two possible
73 optimizations that go beyond a greedy search. Both of these fall
74 into the category of "non-greedy matching" optimizations.
75
76 The first optimization stems from backward SOURCE-COPY matching.
77 When a new SOURCE-COPY instruction covers a previous instruction in
78 the target completely, it is erased from the queue. Randal Burns
79 originally analyzed these algorithms and did a lot of related work
80 (\cite the 1.5-pass algorithm).
81
82 The second optimization comes by the encoding of common very-small
83 COPY and ADD instructions, for which there are special DOUBLE-code
84 instructions, which code two instructions in a single byte.
85
86 The cost of bad instruction-selection overhead is relatively high
87 for data-compression, relative to delta-compression, so this second
88 optimization is fairly important. With "lazy" matching (the name
89 used in Zlib for a similar optimization), the string-match
90 algorithm searches after a match for potential overlapping copy
91 instructions. In Xdelta and by default, VCDIFF, the minimum match
92 size is 4 bytes, whereas Zlib searches with a 3-byte minimum. This
93 feature, combined with double instructions, provides a nice
94 challenge. Search in this file for "black magic", a heuristic.
95
96 3. STREAM ALIGNMENT. Stream alignment is needed to compress large
97 inputs in constant space. TODO: redocument
98
99 4. WINDOW SELECTION. When the IOPT buffer flushes, in the first call
100 to xd3_iopt_finish_encoding containing any kind of copy instruction,
101 the parameters of the source window must be decided: the offset into
102 the source and the length of the window. Since the IOPT buffer is
103 finite, the program may be forced to fix these values before knowing
104 the best offset/length. XD3_DEFAULT_SRCBACK limits the length, but a
105 smaller length is preferred because all target copies are addressed
106 after source copies in the VCDIFF address space. Picking too large a
107 source window means larger address encoding.
108
109 If the IOPT buffer is filling easily, perhaps the target window is
110 too large. In any case, a decision is made (though an alternative is
111 to emit the sub-window right away, to reduce the winsize
112 automatically - not implemented, another alternative is to grow the
113 IOPT buffer, it is after all bounded in size by winsize.)
114
115 The algorithm is in xd3_srcwin_setup.
116
117 5. SECONDARY COMPRESSION. VCDIFF supports a secondary encoding to
118 be applied to the individual sections of the data format, which are
119 ADDRess, INSTruction, and DATA. Several secondary compressor
120 variations are implemented here, although none is standardized yet.
121
122 One is an adaptive huffman algorithm -- the FGK algorithm (Faller,
123 Gallager, and Knuth, 1985). This compressor is extremely slow.
124
125 The other is a simple static Huffman routine, which is the base
126 case of a semi-adaptive scheme published by D.J. Wheeler and first
127 widely used in bzip2 (by Julian Seward). This is a very
128 interesting algorithm, originally published in nearly cryptic form
129 by D.J. Wheeler. !!!NOTE!!! Because these are not standardized, the
130 -S option (no secondary compression) remains on by default.
131 ftp://ftp.cl.cam.ac.uk/users/djw3/bred3.{c,ps}
132 --------------------------------------------------------------------
133
134 Other Features
135
136 1. USER CONVENIENCE
137
138 For user convenience, it is essential to recognize Gzip-compressed
139 files and automatically Gzip-decompress them prior to
140 delta-compression (or else no delta-compression will be achieved
141 unless the user manually decompresses the inputs). The compressed
142 represention competes with Xdelta, and this must be hidden from the
143 command-line user interface. The Xdelta-1.x encoding was simple, not
144 compressed itself, so Xdelta-1.x uses Zlib internally to compress the
145 representation.
146
147 This implementation supports external compression, which implements
148 the necessary fork() and pipe() mechanics. There is a tricky step
149 involved to support automatic detection of a compressed input in a
150 non-seekable input. First you read a bit of the input to detect
151 magic headers. When a compressed format is recognized, exec() the
152 external compression program and create a second child process to
153 copy the original input stream. [Footnote: There is a difficulty
154 related to using Gzip externally. It is not possible to decompress
155 and recompress a Gzip file transparently. If FILE.GZ had a
156 cryptographic signature, then, after: (1) Gzip-decompression, (2)
157 Xdelta-encoding, (3) Gzip-compression the signature could be
158 broken. The only way to solve this problem is to guess at Gzip's
159 compression level or control it by other means. I recommend that
160 specific implementations of any compression scheme store
161 information needed to exactly re-compress the input, that way
162 external compression is transparent - however, this won't happen
163 here until it has stabilized.]
164
165 2. APPLICATION-HEADER
166
167 This feature was introduced in RFC3284. It allows any application
168 to include a header within the VCDIFF file format. This allows
169 general inter-application data exchange with support for
170 application-specific extensions to communicate metadata.
171
172 3. VCDIFF CHECKSUM
173
174 An optional checksum value is included with each window, which can
175 be used to validate the final result. This verifies the correct source
176 file was used for decompression as well as the obvious advantage:
177 checking the implementation (and underlying) correctness.
178
179 4. LIGHT WEIGHT
180
181 The code makes efforts to avoid copying data more than necessary.
182 The code delays many initialization tasks until the first use, it
183 optimizes for identical (perfectly matching) inputs. It does not
184 compute any checksums until the first lookup misses. Memory usage
185 is reduced. String-matching is templatized (by slightly gross use
186 of CPP) to hard-code alternative compile-time defaults. The code
187 has few outside dependencies.
188 ----------------------------------------------------------------------
189
190 The default rfc3284 instruction table:
191 (see RFC for the explanation)
192
193 TYPE SIZE MODE TYPE SIZE MODE INDEX
194 --------------------------------------------------------------------
195 1. Run 0 0 Noop 0 0 0
196 2. Add 0, [1,17] 0 Noop 0 0 [1,18]
197 3. Copy 0, [4,18] 0 Noop 0 0 [19,34]
198 4. Copy 0, [4,18] 1 Noop 0 0 [35,50]
199 5. Copy 0, [4,18] 2 Noop 0 0 [51,66]
200 6. Copy 0, [4,18] 3 Noop 0 0 [67,82]
201 7. Copy 0, [4,18] 4 Noop 0 0 [83,98]
202 8. Copy 0, [4,18] 5 Noop 0 0 [99,114]
203 9. Copy 0, [4,18] 6 Noop 0 0 [115,130]
204 10. Copy 0, [4,18] 7 Noop 0 0 [131,146]
205 11. Copy 0, [4,18] 8 Noop 0 0 [147,162]
206 12. Add [1,4] 0 Copy [4,6] 0 [163,174]
207 13. Add [1,4] 0 Copy [4,6] 1 [175,186]
208 14. Add [1,4] 0 Copy [4,6] 2 [187,198]
209 15. Add [1,4] 0 Copy [4,6] 3 [199,210]
210 16. Add [1,4] 0 Copy [4,6] 4 [211,222]
211 17. Add [1,4] 0 Copy [4,6] 5 [223,234]
212 18. Add [1,4] 0 Copy 4 6 [235,238]
213 19. Add [1,4] 0 Copy 4 7 [239,242]
214 20. Add [1,4] 0 Copy 4 8 [243,246]
215 21. Copy 4 [0,8] Add 1 0 [247,255]
216 --------------------------------------------------------------------
217
218 Reading the source: Overview
219
220 This file includes itself in several passes to macro-expand certain
221 sections with variable forms. Just read ahead, there's only a
222 little confusion. I know this sounds ugly, but hard-coding some of
223 the string-matching parameters results in a 10-15% increase in
224 string-match performance. The only time this hurts is when you have
225 unbalanced #if/endifs.
226
227 A single compilation unit tames the Makefile. In short, this is to
228 allow the above-described hack without an explodingMakefile. The
229 single compilation unit includes the core library features,
230 configurable string-match templates, optional main() command-line
231 tool, misc optional features, and a regression test. Features are
232 controled with CPP #defines, see Makefile.am.
233
234 The initial __XDELTA3_C_HEADER_PASS__ starts first, the INLINE and
235 TEMPLATE sections follow. Easy stuff first, hard stuff last.
236
237 Optional features include:
238
239 xdelta3-main.h The command-line interface, external compression
240 support, POSIX-specific, info & VCDIFF-debug tools.
241 xdelta3-second.h The common secondary compression routines.
242 xdelta3-djw.h The semi-adaptive huffman secondary encoder.
243 xdelta3-fgk.h The adaptive huffman secondary encoder.
244 xdelta3-test.h The unit test covers major algorithms,
245 encoding and decoding. There are single-bit
246 error decoding tests. There are 32/64-bit file size
247 boundary tests. There are command-line tests.
248 There are compression tests. There are external
249 compression tests. There are string-matching tests.
250 There should be more tests...
251
252 Additional headers include:
253
254 xdelta3.h The public header file.
255 xdelta3-cfgs.h The default settings for default, built-in
256 encoders. These are hard-coded at
257 compile-time. There is also a single
258 soft-coded string matcher for experimenting
259 with arbitrary values.
260 xdelta3-list.h A cyclic list template
261
262 Misc little debug utilities:
263
264 badcopy.c Randomly modifies an input file based on two
265 parameters: (1) the probability that a byte in
266 the file is replaced with a pseudo-random value,
267 and (2) the mean change size. Changes are
268 generated using an expoential distribution
269 which approximates the expected error_prob
270 distribution.
271 show.c Prints an offset/length segment from a file.
272 testh.c Checks that xdelta3.h is can be #included
273 --------------------------------------------------------------------
274
275 This file itself is unusually large. I hope to defend this layout
276 with lots of comments. Everything in this file is related to
277 encoding and decoding. I like it all together - the template stuff
278 is just a hack. */
279
280#ifndef __XDELTA3_C_HEADER_PASS__
281#define __XDELTA3_C_HEADER_PASS__
282
283#include <errno.h>
284#include <string.h>
285
286#include "xdelta3.h"
287
288/******************************************************************************************
289 STATIC CONFIGURATION
290 ******************************************************************************************/
291
292#ifndef XD3_MAIN /* the main application */
293#define XD3_MAIN 0
294#endif
295
296#ifndef VCDIFF_TOOLS
297#define VCDIFF_TOOLS XD3_MAIN
298#endif
299
300#ifndef SECONDARY_FGK /* one from the algorithm preservation department: */
301#define SECONDARY_FGK 0 /* adaptive Huffman routines */
302#endif
303
304#ifndef SECONDARY_DJW /* semi-adaptive/static Huffman for the eventual */
305#define SECONDARY_DJW 0 /* standardization, off by default until such time. */
306#endif
307
308#ifndef GENERIC_ENCODE_TABLES /* These three are the RFC-spec'd app-specific */
309#define GENERIC_ENCODE_TABLES 0 /* code features. This is tested but not recommended */
310#endif /* unless there's a real application. */
311#ifndef GENERIC_ENCODE_TABLES_COMPUTE
312#define GENERIC_ENCODE_TABLES_COMPUTE 0
313#endif
314#ifndef GENERIC_ENCODE_TABLES_COMPUTE_PRINT
315#define GENERIC_ENCODE_TABLES_COMPUTE_PRINT 0
316#endif
317
318#if XD3_USE_LARGEFILE64 /* How does everyone else do this? */
319#define Q "q"
320#else
321#define Q
322#endif
323
324#if XD3_ENCODER
325#define IF_ENCODER(x) x
326#else
327#define IF_ENCODER(x)
328#endif
329
330/******************************************************************************************/
331
332typedef enum {
333
334 /* header indicator bits */
335 VCD_SECONDARY = (1 << 0), /* uses secondary compressor */
336 VCD_CODETABLE = (1 << 1), /* supplies code table data */
337 VCD_APPHEADER = (1 << 2), /* supplies application data */
338 VCD_INVHDR = ~7U,
339
340 /* window indicator bits */
341 VCD_SOURCE = (1 << 0), /* copy window in source file */
342 VCD_TARGET = (1 << 1), /* copy window in target file */
343 VCD_ADLER32 = (1 << 2), /* has adler32 checksum */
344 VCD_INVWIN = ~7U,
345
346 VCD_SRCORTGT = VCD_SOURCE | VCD_TARGET,
347
348 /* delta indicator bits */
349 VCD_DATACOMP = (1 << 0),
350 VCD_INSTCOMP = (1 << 1),
351 VCD_ADDRCOMP = (1 << 2),
352 VCD_INVDEL = ~0x7U,
353
354} xd3_indicator;
355
356typedef enum {
357 VCD_DJW_ID = 1,
358 VCD_FGK_ID = 16, /* !!!Note: these are not a standard IANA-allocated ID!!! */
359} xd3_secondary_ids;
360
361typedef enum {
362 SEC_NOFLAGS = 0,
363 SEC_COUNT_FREQS = (1 << 0), /* OPT: Not implemented: Could eliminate first pass of Huffman... */
364} xd3_secondary_flags;
365
366typedef enum {
367 DATA_SECTION, /* These indicate which section to the secondary compressor. */
368 INST_SECTION, /* The header section is not compressed, therefore not listed here. */
369 ADDR_SECTION,
370} xd3_section_type;
371
372typedef enum
373{
374 XD3_NOOP = 0,
375 XD3_ADD = 1,
376 XD3_RUN = 2,
377 XD3_CPY = 3, /* XD3_CPY rtypes are represented as (XD3_CPY + copy-mode value) */
378} xd3_rtype;
379
380/******************************************************************************************/
381
382#include "xdelta3-list.h"
383
384XD3_MAKELIST(xd3_rlist, xd3_rinst, link);
385
386/******************************************************************************************/
387
388#ifndef unlikely /* The unlikely macro - any good? */
389#if defined(__GNUC__) && __GNUC__ >= 3
390#define unlikely(x) __builtin_expect((x),0)
391#define likely(x) __builtin_expect((x),1)
392#else
393#define unlikely(x) (x)
394#define likely(x) (x)
395#endif
396#endif
397
398#define SECONDARY_MIN_SAVINGS 2 /* Secondary compression has to save at least this many bytes. */
399#define SECONDARY_MIN_INPUT 10 /* Secondary compression needs at least this many bytes. */
400
401#define VCDIFF_MAGIC1 0xd6 /* 1st file byte */
402#define VCDIFF_MAGIC2 0xc3 /* 2nd file byte */
403#define VCDIFF_MAGIC3 0xc4 /* 3rd file byte */
404#define VCDIFF_VERSION 0x00 /* 4th file byte */
405
406#define VCD_SELF 0 /* 1st address mode */
407#define VCD_HERE 1 /* 2nd address mode */
408
409#define CODE_TABLE_STRING_SIZE (6 * 256) /* Should fit a code table string. */
410#define CODE_TABLE_VCDIFF_SIZE (6 * 256) /* Should fit a compressed code table string */
411
412#define SECONDARY_ANY (SECONDARY_DJW || SECONDARY_FGK) /* True if any secondary compressor is used. */
413
414#define ALPHABET_SIZE 256 /* Used in test code--size of the secondary compressor alphabet. */
415
416#define HASH_PRIME 0 /* Old hashing experiments */
417#define HASH_PERMUTE 1
418#define ARITH_SMALL_CKSUM 1
419
420#define HASH_CKOFFSET 1U /* Table entries distinguish "no-entry" from offset 0 using this offset. */
421
422#define MIN_SMALL_LOOK 2U /* Match-optimization stuff. */
423#define MIN_LARGE_LOOK 2U
424#define MIN_MATCH_OFFSET 1U
425#define MAX_MATCH_SPLIT 18U /* VCDIFF code table: 18 is the default limit for direct-coded ADD sizes */
426
427#define LEAST_MATCH_INCR 0 /* The least number of bytes an overlapping match must beat
428 * the preceding match by. This is a bias for the lazy
429 * match optimization. A non-zero value means that an
430 * adjacent match has to be better by more than the step
431 * between them. 0. */
432
433#define MIN_MATCH 4U /* VCDIFF code table: MIN_MATCH=4 */
434#define MIN_ADD 1U /* 1 */
435#define MIN_RUN 8U /* The shortest run, if it is shorter than this an immediate
436 * add/copy will be just as good. ADD1/COPY6 = 1I+1D+1A bytes,
437 * RUN18 = 1I+1D+1A. */
438
439#define MAX_MODES 9 /* Maximum number of nodes used for compression--does not limit decompression. */
440
441#define ENC_SECTS 4 /* Number of separate output sections. */
442
443#define HDR_TAIL(s) (stream->enc_tails[0])
444#define DATA_TAIL(s) (stream->enc_tails[1])
445#define INST_TAIL(s) (stream->enc_tails[2])
446#define ADDR_TAIL(s) (stream->enc_tails[3])
447
448#define HDR_HEAD(s) (stream->enc_heads[0])
449#define DATA_HEAD(s) (stream->enc_heads[1])
450#define INST_HEAD(s) (stream->enc_heads[2])
451#define ADDR_HEAD(s) (stream->enc_heads[3])
452
453#define SIZEOF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
454
455#define TOTAL_MODES(x) (2+(x)->acache.s_same+(x)->acache.s_near)
456
457/* Template instances. */
458#if XD3_BUILD_SLOW
459#define IF_BUILD_SLOW(x) x
460#else
461#define IF_BUILD_SLOW(x)
462#endif
463#if XD3_BUILD_FAST
464#define IF_BUILD_FAST(x) x
465#else
466#define IF_BUILD_FAST(x)
467#endif
468#if XD3_BUILD_SOFT
469#define IF_BUILD_SOFT(x) x
470#else
471#define IF_BUILD_SOFT(x)
472#endif
473
474IF_BUILD_SOFT(static const xd3_smatcher __smatcher_soft;)
475IF_BUILD_FAST(static const xd3_smatcher __smatcher_fast;)
476IF_BUILD_SLOW(static const xd3_smatcher __smatcher_slow;)
477
478#if XD3_DEBUG
479#define SMALL_HASH_DEBUG1(s,inp) \
480 usize_t debug_hval = xd3_checksum_hash (& (s)->small_hash, \
481 xd3_scksum ((inp), (s)->small_look))
482#define SMALL_HASH_DEBUG2(s,inp) \
483 XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \
484 xd3_scksum ((inp), (s)->small_look)))
485#define SMALL_HASH_STATS(x) x
486#else
487#define SMALL_HASH_DEBUG1(s,inp)
488#define SMALL_HASH_DEBUG2(s,inp)
489#define SMALL_HASH_STATS(x)
490#endif /* XD3_DEBUG */
491
492/* Config fields: three structures contain these variables, so this is non-typed. */
493#define XD3_COPY_CONFIG_FIELDS(dst,src) \
494 do { \
495 (dst)->large_look = (src)->large_look; \
496 (dst)->large_step = (src)->large_step; \
497 (dst)->small_look = (src)->small_look; \
498 (dst)->small_chain = (src)->small_chain; \
499 (dst)->small_lchain = (src)->small_lchain; \
500 (dst)->ssmatch = (src)->ssmatch; \
501 (dst)->try_lazy = (src)->try_lazy; \
502 (dst)->max_lazy = (src)->max_lazy; \
503 (dst)->long_enough = (src)->long_enough; \
504 (dst)->promote = (src)->promote; \
505 } while (0)
506
507/* Update the run-length state */
508#define NEXTRUN(c) do { if ((c) == run_c) { run_l += 1; } else { run_c = (c); run_l = 1; } } while (0)
509
510/* Update the checksum state. */
511#define LARGE_CKSUM_UPDATE(cksum,base,look) \
512 do { \
513 uint32_t old_c = PERMUTE((base)[0]); \
514 uint32_t new_c = PERMUTE((base)[(look)]); \
515 uint32_t low = (((cksum) & 0xffff) - old_c + new_c) & 0xffff; \
516 uint32_t high = (((cksum) >> 16) - (old_c * (look)) + low) & 0xffff; \
517 (cksum) = (high << 16) | low; \
518 } while (0)
519
520/* Multiply and add hash function */
521#if ARITH_SMALL_CKSUM
522#define SMALL_CKSUM_UPDATE(cksum,base,look) (cksum) = ((*(unsigned long*)(base+1)) * 71143)
523#else
524#define SMALL_CKSUM_UPDATE LARGE_CKSUM_UPDATE
525#endif
526
527/* Consume N bytes of input, only used by the decoder. */
528#define DECODE_INPUT(n) \
529 do { \
530 stream->total_in += (xoff_t) (n); \
531 stream->avail_in -= (n); \
532 stream->next_in += (n); \
533 } while (0)
534
535/* This CPP-conditional stuff can be cleaned up... */
536#if XD3_DEBUG
537#define IF_DEBUG(x) x
538#define DEBUG_ARG(x) , x
539#else
540#define IF_DEBUG(x)
541#define DEBUG_ARG(x)
542#endif
543#if XD3_DEBUG > 1
544#define IF_DEBUG1(x) x
545#else
546#define IF_DEBUG1(x)
547#endif
548#if REGRESSION_TEST
549#define IF_REGRESSION(x) x
550#else
551#define IF_REGRESSION(x)
552#endif
553
554/******************************************************************************************/
555
556#if XD3_ENCODER
557static void* xd3_alloc0 (xd3_stream *stream,
558 usize_t elts,
559 usize_t size);
560
561
562static xd3_output* xd3_alloc_output (xd3_stream *stream,
563 xd3_output *old_output);
564
565
566
567static void xd3_free_output (xd3_stream *stream,
568 xd3_output *output);
569
570static int xd3_emit_byte (xd3_stream *stream,
571 xd3_output **outputp,
572 uint8_t code);
573
574static int xd3_emit_bytes (xd3_stream *stream,
575 xd3_output **outputp,
576 const uint8_t *base,
577 usize_t size);
578
579static int xd3_emit_double (xd3_stream *stream, xd3_rinst *first, xd3_rinst *second, uint code);
580static int xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code);
581
582static usize_t xd3_sizeof_output (xd3_output *output);
583
584static int xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos);
585static int xd3_source_extend_match (xd3_stream *stream);
586static int xd3_srcwin_setup (xd3_stream *stream);
587static int xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point);
588static usize_t xd3_iopt_last_matched (xd3_stream *stream);
589static int xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num);
590
591#endif /* XD3_ENCODER */
592
593static int xd3_decode_allocate (xd3_stream *stream, usize_t size,
594 uint8_t **copied1, usize_t *alloc1,
595 uint8_t **copied2, usize_t *alloc2);
596
597static void xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str);
598static void* xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size);
599static void xd3_free (xd3_stream *stream, void *ptr);
600
601static int xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp,
602 const uint8_t *max, uint32_t *valp);
603
604#if REGRESSION_TEST
605static int xd3_selftest (void);
606#endif
607
608/******************************************************************************************/
609
610#define UINT32_OFLOW_MASK 0xfe000000U
611#define UINT64_OFLOW_MASK 0xfe00000000000000ULL
612
613#define UINT32_MAX 4294967295U
614#define UINT64_MAX 18446744073709551615ULL
615
616#if SIZEOF_USIZE_T == 4
617#define USIZE_T_MAX UINT32_MAX
618#define xd3_decode_size xd3_decode_uint32_t
619#define xd3_emit_size xd3_emit_uint32_t
620#define xd3_sizeof_size xd3_sizeof_uint32_t
621#define xd3_read_size xd3_read_uint32_t
622#elif SIZEOF_USIZE_T == 8
623#define USIZE_T_MAX UINT64_MAX
624#define xd3_decode_size xd3_decode_uint64_t
625#define xd3_emit_size xd3_emit_uint64_t
626#define xd3_sizeof_size xd3_sizeof_uint64_t
627#define xd3_read_size xd3_read_uint64_t
628#endif
629
630#if SIZEOF_XOFF_T == 4
631#define XOFF_T_MAX UINT32_MAX
632#define xd3_decode_offset xd3_decode_uint32_t
633//#define xd3_emit_offset xd3_emit_uint32_t
634//#define xd3_sizeof_offset xd3_sizeof_uint32_t
635//#define xd3_read_offset xd3_read_uint32_t
636#elif SIZEOF_XOFF_T == 8
637#define XOFF_T_MAX UINT64_MAX
638#define xd3_decode_offset xd3_decode_uint64_t
639//#define xd3_emit_offset xd3_emit_uint64_t
640//#define xd3_sizeof_offset xd3_sizeof_uint64_t
641//#define xd3_read_offset xd3_read_uint64_t
642#endif
643
644#define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b))
645#define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b))
646
647const char* xd3_strerror (int ret)
648{
649 switch (ret)
650 {
651 case XD3_INPUT: return "XD3_INPUT";
652 case XD3_OUTPUT: return "XD3_OUTPUT";
653 case XD3_GETSRCBLK: return "XD3_GETSRCBLK";
654 case XD3_GOTHEADER: return "XD3_GOTHEADER";
655 case XD3_WINSTART: return "XD3_WINSTART";
656 case XD3_WINFINISH: return "XD3_WINFINISH";
657 }
658 return strerror (ret);
659}
660
661/******************************************************************************************/
662
663#if SECONDARY_ANY == 0
664#define IF_SEC(x)
665#define IF_NSEC(x) x
666#else /* yuck */
667#define IF_SEC(x) x
668#define IF_NSEC(x)
669#include "xdelta3-second.h"
670#endif /* SECONDARY_ANY */
671
672#if SECONDARY_FGK
673#include "xdelta3-fgk.h"
674
675static const xd3_sec_type fgk_sec_type =
676{
677 VCD_FGK_ID,
678 "FGK Adaptive Huffman",
679 SEC_NOFLAGS,
680 (xd3_sec_stream* (*)()) fgk_alloc,
681 (void (*)()) fgk_destroy,
682 (void (*)()) fgk_init,
683 (int (*)()) xd3_decode_fgk,
684 IF_ENCODER((int (*)()) xd3_encode_fgk)
685};
686
687#define IF_FGK(x) x
688#define FGK_CASE(s) \
689 s->sec_type = & fgk_sec_type; \
690 break;
691#else
692#define IF_FGK(x)
693#define FGK_CASE(s) \
694 s->msg = "unavailable secondary compressor: FGK Adaptive Huffman"; \
695 return EINVAL;
696#endif
697
698#if SECONDARY_DJW
699#include "xdelta3-djw.h"
700
701static const xd3_sec_type djw_sec_type =
702{
703 VCD_DJW_ID,
704 "Static Huffman",
705 SEC_COUNT_FREQS,
706 (xd3_sec_stream* (*)()) djw_alloc,
707 (void (*)()) djw_destroy,
708 (void (*)()) djw_init,
709 (int (*)()) xd3_decode_huff,
710 IF_ENCODER((int (*)()) xd3_encode_huff)
711};
712
713#define IF_DJW(x) x
714#define DJW_CASE(s) \
715 s->sec_type = & djw_sec_type; \
716 break;
717#else
718#define IF_DJW(x)
719#define DJW_CASE(s) \
720 s->msg = "unavailable secondary compressor: DJW Static Huffman"; \
721 return EINVAL;
722#endif
723
724/******************************************************************************************/
725
726/* Abbreviate frequently referenced fields. */
727#define max_in stream->avail_in
728#define pos_in stream->input_position
729#define min_match stream->min_match
730
731/* Process the inline pass. */
732#define __XDELTA3_C_INLINE_PASS__
733#include "xdelta3.c"
734#undef __XDELTA3_C_INLINE_PASS__
735
736/* Process template passes - this includes xdelta3.c several times. */
737#define __XDELTA3_C_TEMPLATE_PASS__
738#include "xdelta3-cfgs.h"
739#undef __XDELTA3_C_TEMPLATE_PASS__
740
741#undef max_in
742#undef pos_in
743#undef min_match
744
745#if XD3_MAIN || PYTHON_MODULE
746#include "xdelta3-main.h"
747#endif
748
749#if REGRESSION_TEST
750#include "xdelta3-test.h"
751#endif
752
753#if PYTHON_MODULE
754#include "xdelta3-python.h"
755#endif
756
757#endif /* __XDELTA3_C_HEADER_PASS__ */
758#ifdef __XDELTA3_C_INLINE_PASS__
759
760/******************************************************************************************
761 Instruction tables
762 ******************************************************************************************/
763
764/* The following code implements a parametrized description of the
765 * code table given above for a few reasons. It is not necessary for
766 * implementing the standard, to support compression with variable
767 * tables, so an implementation is only required to know the default
768 * code table to begin decompression. (If the encoder uses an
769 * alternate table, the table is included in compressed form inside
770 * the VCDIFF file.)
771 *
772 * Before adding variable-table support there were two functions which
773 * were hard-coded to the default table above.
774 * xd3_compute_default_table() would create the default table by
775 * filling a 256-elt array of xd3_dinst values. The corresponding
776 * function, xd3_choose_instruction(), would choose an instruction
777 * based on the hard-coded parameters of the default code table.
778 *
779 * Notes: The parametrized code table description here only generates
780 * tables of a certain regularity similar to the default table by
781 * allowing to vary the distribution of single- and
782 * double-instructions and change the number of near and same copy
783 * modes. More exotic tables are only possible by extending this
784 * code, but a detailed experiment would need to be carried out first,
785 * probably using separate code. I would like to experiment with a
786 * double-copy instruction, for example.
787 *
788 * For performance reasons, both the parametrized and non-parametrized
789 * versions of xd3_choose_instruction remain. The parametrized
790 * version is only needed for testing multi-table decoding support.
791 * If ever multi-table encoding is required, this can be optimized by
792 * compiling static functions for each table.
793 */
794
795/* The XD3_CHOOSE_INSTRUCTION calls xd3_choose_instruction with the
796 * table description when GENERIC_ENCODE_TABLES are in use. The
797 * IF_GENCODETBL macro enables generic-code-table specific code. */
798#if GENERIC_ENCODE_TABLES
799#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (stream->code_table_desc, prev, inst)
800#define IF_GENCODETBL(x) x
801#else
802#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (prev, inst)
803#define IF_GENCODETBL(x)
804#endif
805
806/* This structure maintains information needed by
807 * xd3_choose_instruction to compute the code for a double instruction
808 * by first indexing an array of code_table_sizes by copy mode, then
809 * using (offset + (muliplier * X)) */
810struct _xd3_code_table_sizes {
811 uint8_t cpy_max;
812 uint8_t offset;
813 uint8_t mult;
814};
815
816/* This contains a complete description of a code table. */
817struct _xd3_code_table_desc
818{
819 /* Assumes a single RUN instruction */
820 /* Assumes that MIN_MATCH is 4 */
821
822 uint8_t add_sizes; /* Number of immediate-size single adds (default 17) */
823 uint8_t near_modes; /* Number of near copy modes (default 4) */
824 uint8_t same_modes; /* Number of same copy modes (default 3) */
825 uint8_t cpy_sizes; /* Number of immediate-size single copies (default 15) */
826
827 uint8_t addcopy_add_max; /* Maximum add size for an add-copy double instruction, all modes (default 4) */
828 uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy double instruction, up through VCD_NEAR modes (default 6) */
829 uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy double instruction, VCD_SAME modes (default 4) */
830
831 uint8_t copyadd_add_max; /* Maximum add size for a copy-add double instruction, all modes (default 1) */
832 uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add double instruction, up through VCD_NEAR modes (default 4) */
833 uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add double instruction, VCD_SAME modes (default 4) */
834
835 xd3_code_table_sizes addcopy_max_sizes[MAX_MODES];
836 xd3_code_table_sizes copyadd_max_sizes[MAX_MODES];
837};
838
839/* The rfc3284 code table is represented: */
840static const xd3_code_table_desc __rfc3284_code_table_desc = {
841 17, /* add sizes */
842 4, /* near modes */
843 3, /* same modes */
844 15, /* copy sizes */
845
846 4, /* add-copy max add */
847 6, /* add-copy max cpy, near */
848 4, /* add-copy max cpy, same */
849
850 1, /* copy-add max add */
851 4, /* copy-add max cpy, near */
852 4, /* copy-add max cpy, same */
853
854 /* addcopy */
855 { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{4,235,1},{4,239,1},{4,243,1} },
856 /* copyadd */
857 { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1},{4,253,1},{4,254,1},{4,255,1} },
858};
859
860#if GENERIC_ENCODE_TABLES
861/* An alternate code table for testing (5 near, 0 same):
862 *
863 * TYPE SIZE MODE TYPE SIZE MODE INDEX
864 * ---------------------------------------------------------------
865 * 1. Run 0 0 Noop 0 0 0
866 * 2. Add 0, [1,23] 0 Noop 0 0 [1,24]
867 * 3. Copy 0, [4,20] 0 Noop 0 0 [25,42]
868 * 4. Copy 0, [4,20] 1 Noop 0 0 [43,60]
869 * 5. Copy 0, [4,20] 2 Noop 0 0 [61,78]
870 * 6. Copy 0, [4,20] 3 Noop 0 0 [79,96]
871 * 7. Copy 0, [4,20] 4 Noop 0 0 [97,114]
872 * 8. Copy 0, [4,20] 5 Noop 0 0 [115,132]
873 * 9. Copy 0, [4,20] 6 Noop 0 0 [133,150]
874 * 10. Add [1,4] 0 Copy [4,6] 0 [151,162]
875 * 11. Add [1,4] 0 Copy [4,6] 1 [163,174]
876 * 12. Add [1,4] 0 Copy [4,6] 2 [175,186]
877 * 13. Add [1,4] 0 Copy [4,6] 3 [187,198]
878 * 14. Add [1,4] 0 Copy [4,6] 4 [199,210]
879 * 15. Add [1,4] 0 Copy [4,6] 5 [211,222]
880 * 16. Add [1,4] 0 Copy [4,6] 6 [223,234]
881 * 17. Copy 4 [0,6] Add [1,3] 0 [235,255]
882 * --------------------------------------------------------------- */
883static const xd3_code_table_desc __alternate_code_table_desc = {
884 23, /* add sizes */
885 5, /* near modes */
886 0, /* same modes */
887 17, /* copy sizes */
888
889 4, /* add-copy max add */
890 6, /* add-copy max cpy, near */
891 0, /* add-copy max cpy, same */
892
893 3, /* copy-add max add */
894 4, /* copy-add max cpy, near */
895 0, /* copy-add max cpy, same */
896
897 /* addcopy */
898 { {6,151,3},{6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{0,0,0},{0,0,0} },
899 /* copyadd */
900 { {4,235,1},{4,238,1},{4,241,1},{4,244,1},{4,247,1},{4,250,1},{4,253,1},{0,0,0},{0,0,0} },
901};
902#endif
903
904/* Computes code table entries of TBL using the specified description. */
905static void
906xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl)
907{
908 int size1, size2, mode;
909 int cpy_modes = 2 + desc->near_modes + desc->same_modes;
910 xd3_dinst *d = tbl;
911
912 (d++)->type1 = XD3_RUN;
913 (d++)->type1 = XD3_ADD;
914
915 for (size1 = 1; size1 <= desc->add_sizes; size1 += 1, d += 1)
916 {
917 d->type1 = XD3_ADD;
918 d->size1 = size1;
919 }
920
921 for (mode = 0; mode < cpy_modes; mode += 1)
922 {
923 (d++)->type1 = XD3_CPY + mode;
924
925 for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes; size1 += 1, d += 1)
926 {
927 d->type1 = XD3_CPY + mode;
928 d->size1 = size1;
929 }
930 }
931
932 for (mode = 0; mode < cpy_modes; mode += 1)
933 {
934 for (size1 = 1; size1 <= desc->addcopy_add_max; size1 += 1)
935 {
936 int max = (mode < 2 + desc->near_modes) ? desc->addcopy_near_cpy_max : desc->addcopy_same_cpy_max;
937
938 for (size2 = MIN_MATCH; size2 <= max; size2 += 1, d += 1)
939 {
940 d->type1 = XD3_ADD;
941 d->size1 = size1;
942 d->type2 = XD3_CPY + mode;
943 d->size2 = size2;
944 }
945 }
946 }
947
948 for (mode = 0; mode < cpy_modes; mode += 1)
949 {
950 int max = (mode < 2 + desc->near_modes) ? desc->copyadd_near_cpy_max : desc->copyadd_same_cpy_max;
951
952 for (size1 = MIN_MATCH; size1 <= max; size1 += 1)
953 {
954 for (size2 = 1; size2 <= desc->copyadd_add_max; size2 += 1, d += 1)
955 {
956 d->type1 = XD3_CPY + mode;
957 d->size1 = size1;
958 d->type2 = XD3_ADD;
959 d->size2 = size2;
960 }
961 }
962 }
963
964 XD3_ASSERT (d - tbl == 256);
965}
966
967/* This function generates the static default code table. */
968static const xd3_dinst*
969xd3_rfc3284_code_table (void)
970{
971 static xd3_dinst __rfc3284_code_table[256];
972
973 if (__rfc3284_code_table[0].type1 != XD3_RUN)
974 {
975 xd3_build_code_table (& __rfc3284_code_table_desc, __rfc3284_code_table);
976 }
977
978 return __rfc3284_code_table;
979}
980
981#if XD3_ENCODER
982#if GENERIC_ENCODE_TABLES
983/* This function generates the alternate code table. */
984static const xd3_dinst*
985xd3_alternate_code_table (void)
986{
987 static xd3_dinst __alternate_code_table[256];
988
989 if (__alternate_code_table[0].type1 != XD3_RUN)
990 {
991 xd3_build_code_table (& __alternate_code_table_desc, __alternate_code_table);
992 }
993
994 return __alternate_code_table;
995}
996
997/* This function computes the ideal second instruction INST based on preceding instruction
998 * PREV. If it is possible to issue a double instruction based on this pair it sets
999 * PREV->code2, otherwise it sets INST->code1. */
1000static void
1001xd3_choose_instruction (const xd3_code_table_desc *desc, xd3_rinst *prev, xd3_rinst *inst)
1002{
1003 switch (inst->type)
1004 {
1005 case XD3_RUN:
1006 /* The 0th instruction is RUN */
1007 inst->code1 = 0;
1008 break;
1009
1010 case XD3_ADD:
1011
1012 if (inst->size > desc->add_sizes)
1013 {
1014 /* The first instruction is non-immediate ADD */
1015 inst->code1 = 1;
1016 }
1017 else
1018 {
1019 /* The following ADD_SIZES instructions are immediate ADDs */
1020 inst->code1 = 1 + inst->size;
1021
1022 /* Now check for a possible COPY-ADD double instruction */
1023 if (prev != NULL)
1024 {
1025 int prev_mode = prev->type - XD3_CPY;
1026
1027 /* If previous is a copy. Note: as long as the previous is not a RUN
1028 * instruction, it should be a copy because it cannot be an add. This check
1029 * is more clear. */
1030 if (prev_mode >= 0 && inst->size <= desc->copyadd_add_max)
1031 {
1032 const xd3_code_table_sizes *sizes = & desc->copyadd_max_sizes[prev_mode];
1033
1034 /* This check and the inst->size-<= above are == in the default table. */
1035 if (prev->size <= sizes->cpy_max)
1036 {
1037 /* The second and third exprs are 0 in the default table. */
1038 prev->code2 = sizes->offset + (sizes->mult * (prev->size - MIN_MATCH)) + (inst->size - MIN_ADD);
1039 }
1040 }
1041 }
1042 }
1043 break;
1044
1045 default:
1046 {
1047 int mode = inst->type - XD3_CPY;
1048
1049 /* The large copy instruction is offset by the run, large add, and immediate adds,
1050 * then multipled by the number of immediate copies plus one (the large copy)
1051 * (i.e., if there are 15 immediate copy instructions then there are 16 copy
1052 * instructions per mode). */
1053 inst->code1 = 2 + desc->add_sizes + (1 + desc->cpy_sizes) * mode;
1054
1055 /* Now if the copy is short enough for an immediate instruction. */
1056 if (inst->size < MIN_MATCH + desc->cpy_sizes)
1057 {
1058 inst->code1 += inst->size + 1 - MIN_MATCH;
1059
1060 /* Now check for a possible ADD-COPY double instruction. */
1061 if ( (prev != NULL) &&
1062 (prev->type == XD3_ADD) &&
1063 (prev->size <= desc->addcopy_add_max) )
1064 {
1065 const xd3_code_table_sizes *sizes = & desc->addcopy_max_sizes[mode];
1066
1067 if (inst->size <= sizes->cpy_max)
1068 {
1069 prev->code2 = sizes->offset + (sizes->mult * (prev->size - MIN_ADD)) + (inst->size - MIN_MATCH);
1070 }
1071 }
1072 }
1073 }
1074 }
1075}
1076#else /* GENERIC_ENCODE_TABLES */
1077
1078/* This version of xd3_choose_instruction is hard-coded for the default table. */
1079static void
1080xd3_choose_instruction (/* const xd3_code_table_desc *desc,*/ xd3_rinst *prev, xd3_rinst *inst)
1081{
1082 switch (inst->type)
1083 {
1084 case XD3_RUN:
1085 inst->code1 = 0;
1086 break;
1087
1088 case XD3_ADD:
1089 inst->code1 = 1;
1090
1091 if (inst->size <= 17)
1092 {
1093 inst->code1 += inst->size;
1094
1095 if ( (inst->size == 1) &&
1096 (prev != NULL) &&
1097 (prev->size == 4) &&
1098 (prev->type >= XD3_CPY) )
1099 {
1100 prev->code2 = 247 + (prev->type - XD3_CPY);
1101 }
1102 }
1103
1104 break;
1105
1106 default:
1107 {
1108 int mode = inst->type - XD3_CPY;
1109
1110 XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12);
1111
1112 inst->code1 = 19 + 16 * mode;
1113
1114 if (inst->size <= 18)
1115 {
1116 inst->code1 += inst->size - 3;
1117
1118 if ( (prev != NULL) &&
1119 (prev->type == XD3_ADD) &&
1120 (prev->size <= 4) )
1121 {
1122 if ( (inst->size <= 6) &&
1123 (mode <= 5) )
1124 {
1125 prev->code2 = 163 + (mode * 12) + (3 * (prev->size - 1)) + (inst->size - 4);
1126
1127 XD3_ASSERT (prev->code2 <= 234);
1128 }
1129 else if ( (inst->size == 4) &&
1130 (mode >= 6) )
1131 {
1132 prev->code2 = 235 + ((mode - 6) * 4) + (prev->size - 1);
1133
1134 XD3_ASSERT (prev->code2 <= 246);
1135 }
1136 }
1137 }
1138
1139 XD3_ASSERT (inst->code1 <= 162);
1140 }
1141 break;
1142 }
1143}
1144#endif /* GENERIC_ENCODE_TABLES */
1145
1146/******************************************************************************************
1147 Instruction table encoder/decoder
1148 ******************************************************************************************/
1149
1150#if GENERIC_ENCODE_TABLES
1151#if GENERIC_ENCODE_TABLES_COMPUTE == 0
1152
1153/* In this case, we hard-code the result of compute_code_table_encoding for each alternate
1154 * code table, presuming that saves time/space. This has been 131 bytes, but secondary
1155 * compression was turned off. */
1156static const uint8_t __alternate_code_table_compressed[178] =
1157{0xd6,0xc3,0xc4,0x00,0x00,0x01,0x8a,0x6f,0x40,0x81,0x27,0x8c,0x00,0x00,0x4a,0x4a,0x0d,0x02,0x01,0x03,
11580x01,0x03,0x00,0x01,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,
11590x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x00,0x01,0x01,0x01,0x02,0x02,0x02,0x03,0x03,0x03,0x04,
11600x04,0x04,0x04,0x00,0x04,0x05,0x06,0x01,0x02,0x03,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x05,0x05,0x05,
11610x06,0x06,0x06,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x00,0x02,0x00,0x18,0x13,0x63,0x00,0x1b,0x00,0x54,
11620x00,0x15,0x23,0x6f,0x00,0x28,0x13,0x54,0x00,0x15,0x01,0x1a,0x31,0x23,0x6c,0x0d,0x23,0x48,0x00,0x15,
11630x93,0x6f,0x00,0x28,0x04,0x23,0x51,0x04,0x32,0x00,0x2b,0x00,0x12,0x00,0x12,0x00,0x12,0x00,0x12,0x00,
11640x12,0x00,0x12,0x53,0x57,0x9c,0x07,0x43,0x6f,0x00,0x34,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00,
11650x0c,0x00,0x0c,0x00,0x15,0x00,0x82,0x6f,0x00,0x15,0x12,0x0c,0x00,0x03,0x03,0x00,0x06,0x00,};
1166
1167static int
1168xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size)
1169{
1170 (*data) = __alternate_code_table_compressed;
1171 (*size) = sizeof (__alternate_code_table_compressed);
1172 return 0;
1173}
1174
1175#else
1176
1177/* The alternate code table will be computed and stored here. */
1178static uint8_t __alternate_code_table_compressed[CODE_TABLE_VCDIFF_SIZE];
1179static usize_t __alternate_code_table_compressed_size;
1180
1181/* This function generates a delta describing the code table for encoding within a VCDIFF
1182 * file. This function is NOT thread safe because it is only intended that this function
1183 * is used to generate statically-compiled strings. */
1184int xd3_compute_code_table_encoding (xd3_stream *in_stream, const xd3_dinst *code_table,
1185 uint8_t *comp_string, usize_t *comp_string_size)
1186{
1187 uint8_t dflt_string[CODE_TABLE_STRING_SIZE];
1188 uint8_t code_string[CODE_TABLE_STRING_SIZE];
1189 xd3_stream stream;
1190 xd3_source source;
1191 xd3_config config;
1192 int ret;
1193
1194 memset (& source, 0, sizeof (source));
1195
1196 xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string);
1197 xd3_compute_code_table_string (code_table, code_string);
1198
1199 /* Use DJW secondary compression if it is on by default. This saves about 20 bytes. */
1200 xd3_init_config (& config, XD3_FLUSH | (SECONDARY_DJW ? XD3_SEC_DJW : 0));
1201
1202 /* Be exhaustive. */
1203 config.sprevsz = 1<<11;
1204 config.memsize = CODE_TABLE_STRING_SIZE * 10;
1205
1206 config.large_look = 4;
1207 config.large_step = 1;
1208 config.small_look = 4;
1209 config.small_chain = CODE_TABLE_STRING_SIZE;
1210 config.small_lchain = CODE_TABLE_STRING_SIZE;
1211 config.ssmatch = 1;
1212 config.try_lazy = 1;
1213 config.max_lazy = CODE_TABLE_STRING_SIZE;
1214 config.long_enough = CODE_TABLE_STRING_SIZE;
1215 config.promote = 1;
1216 config.srcwin_size = CODE_TABLE_STRING_SIZE;
1217 config.srcwin_maxsz = CODE_TABLE_STRING_SIZE;
1218
1219 if ((ret = xd3_config_stream (& stream, & config))) { goto fail; }
1220
1221 source.size = CODE_TABLE_STRING_SIZE;
1222 source.blksize = CODE_TABLE_STRING_SIZE;
1223 source.onblk = CODE_TABLE_STRING_SIZE;
1224 source.name = "";
1225 source.curblk = dflt_string;
1226 source.curblkno = 0;
1227
1228 if ((ret = xd3_set_source (& stream, & source))) { goto fail; }
1229
1230 if ((ret = xd3_encode_completely (& stream, code_string, CODE_TABLE_STRING_SIZE,
1231 comp_string, comp_string_size, CODE_TABLE_VCDIFF_SIZE))) { goto fail; }
1232
1233 fail:
1234
1235 in_stream->msg = stream.msg;
1236 xd3_free_stream (& stream);
1237 return ret;
1238}
1239
1240/* Compute a delta between alternate and rfc3284 tables. As soon as another alternate
1241 * table is added, this code should become generic. For now there is only one alternate
1242 * table for testing. */
1243static int
1244xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size)
1245{
1246 int ret;
1247
1248 if (__alternate_code_table_compressed[0] == 0)
1249 {
1250 if ((ret = xd3_compute_code_table_encoding (stream, xd3_alternate_code_table (),
1251 __alternate_code_table_compressed,
1252 & __alternate_code_table_compressed_size)))
1253 {
1254 return ret;
1255 }
1256
1257 /* During development of a new code table, enable this variable to print the new
1258 * static contents and determine its size. At run time the table will be filled in
1259 * appropriately, but at least it should have the proper size beforehand. */
1260#if GENERIC_ENCODE_TABLES_COMPUTE_PRINT
1261 {
1262 int i;
1263
1264 P(RINT, "\nstatic const usize_t __alternate_code_table_compressed_size = %u;\n",
1265 __alternate_code_table_compressed_size);
1266
1267 P(RINT, "static const uint8_t __alternate_code_table_compressed[%u] =\n{",
1268 __alternate_code_table_compressed_size);
1269
1270 for (i = 0; i < __alternate_code_table_compressed_size; i += 1)
1271 {
1272 P(RINT, "0x%02x,", __alternate_code_table_compressed[i]);
1273 if ((i % 20) == 19) { P(RINT, "\n"); }
1274 }
1275
1276 P(RINT, "};\n");
1277 }
1278#endif
1279 }
1280
1281 (*data) = __alternate_code_table_compressed;
1282 (*size) = __alternate_code_table_compressed_size;
1283
1284 return 0;
1285}
1286#endif /* GENERIC_ENCODE_TABLES_COMPUTE != 0 */
1287#endif /* GENERIC_ENCODE_TABLES */
1288
1289#endif /* XD3_ENCODER */
1290
1291/* This function generates the 1536-byte string specified in sections 5.4 and 7 of
1292 * rfc3284, which is used to represent a code table within a VCDIFF file. */
1293void xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str)
1294{
1295 int i, s;
1296
1297 XD3_ASSERT (CODE_TABLE_STRING_SIZE == 6 * 256);
1298
1299 for (s = 0; s < 6; s += 1)
1300 {
1301 for (i = 0; i < 256; i += 1)
1302 {
1303 switch (s)
1304 {
1305 case 0: *str++ = (code_table[i].type1 >= XD3_CPY ? XD3_CPY : code_table[i].type1); break;
1306 case 1: *str++ = (code_table[i].type2 >= XD3_CPY ? XD3_CPY : code_table[i].type2); break;
1307 case 2: *str++ = (code_table[i].size1); break;
1308 case 3: *str++ = (code_table[i].size2); break;
1309 case 4: *str++ = (code_table[i].type1 >= XD3_CPY ? code_table[i].type1 - XD3_CPY : 0); break;
1310 case 5: *str++ = (code_table[i].type2 >= XD3_CPY ? code_table[i].type2 - XD3_CPY : 0); break;
1311 }
1312 }
1313 }
1314}
1315
1316/* This function translates the code table string into the internal representation. The
1317 * stream's near and same-modes should already be set. */
1318static int
1319xd3_apply_table_string (xd3_stream *stream, const uint8_t *code_string)
1320{
1321 int i, s;
1322 int modes = TOTAL_MODES (stream);
1323 xd3_dinst *code_table;
1324
1325 if ((code_table = stream->code_table_alloc = xd3_alloc (stream, sizeof (xd3_dinst), 256)) == NULL)
1326 {
1327 return ENOMEM;
1328 }
1329
1330 for (s = 0; s < 6; s += 1)
1331 {
1332 for (i = 0; i < 256; i += 1)
1333 {
1334 switch (s)
1335 {
1336 case 0:
1337 if (*code_string > XD3_CPY)
1338 {
1339 stream->msg = "invalid code-table opcode";
1340 return EINVAL;
1341 }
1342 code_table[i].type1 = *code_string++;
1343 break;
1344 case 1:
1345 if (*code_string > XD3_CPY)
1346 {
1347 stream->msg = "invalid code-table opcode";
1348 return EINVAL;
1349 }
1350 code_table[i].type2 = *code_string++;
1351 break;
1352 case 2:
1353 if (*code_string != 0 && code_table[i].type1 == XD3_NOOP)
1354 {
1355 stream->msg = "invalid code-table size";
1356 return EINVAL;
1357 }
1358 code_table[i].size1 = *code_string++;
1359 break;
1360 case 3:
1361 if (*code_string != 0 && code_table[i].type2 == XD3_NOOP)
1362 {
1363 stream->msg = "invalid code-table size";
1364 return EINVAL;
1365 }
1366 code_table[i].size2 = *code_string++;
1367 break;
1368 case 4:
1369 if (*code_string >= modes)
1370 {
1371 stream->msg = "invalid code-table mode";
1372 return EINVAL;
1373 }
1374 if (*code_string != 0 && code_table[i].type1 != XD3_CPY)
1375 {
1376 stream->msg = "invalid code-table mode";
1377 return EINVAL;
1378 }
1379 code_table[i].type1 += *code_string++;
1380 break;
1381 case 5:
1382 if (*code_string >= modes)
1383 {
1384 stream->msg = "invalid code-table mode";
1385 return EINVAL;
1386 }
1387 if (*code_string != 0 && code_table[i].type2 != XD3_CPY)
1388 {
1389 stream->msg = "invalid code-table mode";
1390 return EINVAL;
1391 }
1392 code_table[i].type2 += *code_string++;
1393 break;
1394 }
1395 }
1396 }
1397
1398 stream->code_table = code_table;
1399 return 0;
1400}
1401
1402/* This function applies a code table delta and returns an actual code table. */
1403static int
1404xd3_apply_table_encoding (xd3_stream *in_stream, const uint8_t *data, usize_t size)
1405{
1406 uint8_t dflt_string[CODE_TABLE_STRING_SIZE];
1407 uint8_t code_string[CODE_TABLE_STRING_SIZE];
1408 usize_t code_size;
1409 xd3_stream stream;
1410 xd3_source source;
1411 int ret;
1412
1413 /* The default code table string can be cached if alternate code tables ever become
1414 * popular. */
1415 xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string);
1416
1417 source.size = CODE_TABLE_STRING_SIZE;
1418 source.blksize = CODE_TABLE_STRING_SIZE;
1419 source.onblk = CODE_TABLE_STRING_SIZE;
1420 source.name = "rfc3284 code table";
1421 source.curblk = dflt_string;
1422 source.curblkno = 0;
1423
1424 if ((ret = xd3_config_stream (& stream, NULL)) ||
1425 (ret = xd3_set_source (& stream, & source)) ||
1426 (ret = xd3_decode_completely (& stream, data, size, code_string, & code_size, sizeof (code_string))))
1427 {
1428 in_stream->msg = stream.msg;
1429 goto fail;
1430 }
1431
1432 if (code_size != sizeof (code_string))
1433 {
1434 stream.msg = "corrupt code-table encoding";
1435 ret = EINVAL;
1436 goto fail;
1437 }
1438
1439 if ((ret = xd3_apply_table_string (in_stream, code_string))) { goto fail; }
1440
1441 fail:
1442
1443 xd3_free_stream (& stream);
1444 return ret;
1445}
1446
1447/******************************************************************************************
1448 Permute stuff
1449 ******************************************************************************************/
1450
1451#if HASH_PERMUTE == 0
1452#define PERMUTE(x) (x)
1453#else
1454#define PERMUTE(x) (__single_hash[(uint)x])
1455
1456static const uint16_t __single_hash[256] =
1457{
1458 /* Random numbers generated using SLIB's pseudo-random number generator. This hashes
1459 * the input alphabet. */
1460 0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46,
1461 0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602,
1462 0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6,
1463 0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a,
1464 0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58,
1465 0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b,
1466 0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94,
1467 0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176,
1468 0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743,
1469 0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6,
1470 0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227,
1471 0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d,
1472 0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a,
1473 0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31,
1474 0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9,
1475 0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6,
1476 0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f,
1477 0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f,
1478 0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49,
1479 0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037,
1480 0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad,
1481 0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f,
1482 0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d,
1483 0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b,
1484 0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624,
1485 0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272,
1486 0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806,
1487 0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050,
1488 0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113,
1489 0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2,
1490 0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25,
1491 0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5
1492};
1493#endif
1494
1495/******************************************************************************************
1496 Ctable stuff
1497 ******************************************************************************************/
1498
1499#if HASH_PRIME
1500static const usize_t __primes[] =
1501{
1502 11, 19, 37, 73, 109,
1503 163, 251, 367, 557, 823,
1504 1237, 1861, 2777, 4177, 6247,
1505 9371, 14057, 21089, 31627, 47431,
1506 71143, 106721, 160073, 240101, 360163,
1507 540217, 810343, 1215497, 1823231, 2734867,
1508 4102283, 6153409, 9230113, 13845163, 20767711,
1509 31151543, 46727321, 70090921, 105136301, 157704401,
1510 236556601, 354834919, 532252367, 798378509, 1197567719,
1511 1796351503
1512};
1513
1514static const usize_t __nprimes = SIZEOF_ARRAY (__primes);
1515#endif
1516
1517static INLINE uint32_t
1518xd3_checksum_hash (const xd3_hash_cfg *cfg, const uint32_t cksum)
1519{
1520#if HASH_PRIME
1521 /* If the table is prime compute the modulus. */
1522 return (cksum % cfg->size);
1523#else
1524 /* If the table is power-of-two compute the mask.*/
1525 return (cksum ^ (cksum >> cfg->shift)) & cfg->mask;
1526#endif
1527}
1528
1529/******************************************************************************************
1530 Create the hash table.
1531 ******************************************************************************************/
1532
1533static INLINE void
1534xd3_swap_uint8p (uint8_t** p1, uint8_t** p2)
1535{
1536 uint8_t *t = (*p1);
1537 (*p1) = (*p2);
1538 (*p2) = t;
1539}
1540
1541static INLINE void
1542xd3_swap_usize_t (usize_t* p1, usize_t* p2)
1543{
1544 usize_t t = (*p1);
1545 (*p1) = (*p2);
1546 (*p2) = t;
1547}
1548
1549/* It's not constant time, but it computes the log. */
1550static int
1551xd3_check_pow2 (usize_t value, usize_t *logof)
1552{
1553 usize_t x = 1;
1554 usize_t nolog;
1555 if (logof == NULL) {
1556 logof = &nolog;
1557 }
1558
1559 *logof = 0;
1560
1561 for (; x != 0; x <<= 1, *logof += 1)
1562 {
1563 if (x == value)
1564 {
1565 return 0;
1566 }
1567 }
1568
1569 return EINVAL;
1570}
1571
1572static usize_t
1573xd3_round_blksize (usize_t sz, usize_t blksz)
1574{
1575 usize_t mod = sz & (blksz-1);
1576
1577 XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0);
1578
1579 return mod ? (sz + (blksz - mod)) : sz;
1580}
1581
1582#if XD3_ENCODER
1583#if !HASH_PRIME
1584static usize_t
1585xd3_size_log2 (usize_t slots)
1586{
1587 int bits = 28; /* This should not be an unreasonable limit. */
1588 int i;
1589
1590 for (i = 3; i <= bits; i += 1)
1591 {
1592 if (slots < (1 << i))
1593 {
1594 bits = i-1;
1595 break;
1596 }
1597 }
1598
1599 return bits;
1600}
1601#endif
1602
1603static void
1604xd3_size_hashtable (xd3_stream *stream,
1605 usize_t space,
1606 xd3_hash_cfg *cfg)
1607{
1608 usize_t slots = space / sizeof (usize_t);
1609
1610 /* initialize ctable: the number of hash buckets is computed from the table of primes or
1611 * the nearest power-of-two, in both cases rounding down in favor of using less
1612 * memory. */
1613
1614#if HASH_PRIME
1615 usize_t i;
1616
1617 cfg->size = __primes[__nprimes-1];
1618
1619 for (i = 1; i < __nprimes; i += 1)
1620 {
1621 if (slots < __primes[i])
1622 {
1623 cfg->size = __primes[i-1];
1624 break;
1625 }
1626 }
1627#else
1628 int bits = xd3_size_log2 (slots);
1629
1630 cfg->size = (1 << bits);
1631 cfg->mask = (cfg->size - 1);
1632 cfg->shift = min (32 - bits, 16);
1633#endif
1634}
1635#endif
1636
1637/******************************************************************************************
1638 Cksum function
1639 ******************************************************************************************/
1640
1641/* OPT: It turns out that the compiler can't unroll the loop as well as you can by hand. */
1642static INLINE uint32_t
1643xd3_lcksum (const uint8_t *seg, const int ln)
1644{
1645 int i = 0;
1646 uint32_t low = 0;
1647 uint32_t high = 0;
1648
1649 for (; i < ln; i += 1)
1650 {
1651 low += PERMUTE(*seg++);
1652 high += low;
1653 }
1654
1655 return ((high & 0xffff) << 16) | (low & 0xffff);
1656}
1657
1658#if ARITH_SMALL_CKSUM
1659static INLINE usize_t
1660xd3_scksum (const uint8_t *seg, const int ln)
1661{
1662 usize_t c;
1663 /* The -1 is because UPDATE operates on seg[1..ln] */
1664 SMALL_CKSUM_UPDATE (c,(seg-1),ln);
1665 return c;
1666}
1667#else
1668#define xd3_scksum(seg,ln) xd3_lcksum(seg,ln)
1669#endif
1670
1671/******************************************************************************************
1672 Adler32 stream function: code copied from Zlib, defined in RFC1950
1673 ******************************************************************************************/
1674
1675#define A32_BASE 65521L /* Largest prime smaller than 2^16 */
1676#define A32_NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
1677
1678#define A32_DO1(buf,i) {s1 += buf[i]; s2 += s1;}
1679#define A32_DO2(buf,i) A32_DO1(buf,i); A32_DO1(buf,i+1);
1680#define A32_DO4(buf,i) A32_DO2(buf,i); A32_DO2(buf,i+2);
1681#define A32_DO8(buf,i) A32_DO4(buf,i); A32_DO4(buf,i+4);
1682#define A32_DO16(buf) A32_DO8(buf,0); A32_DO8(buf,8);
1683
1684static unsigned long adler32 (unsigned long adler, const uint8_t *buf, usize_t len)
1685{
1686 unsigned long s1 = adler & 0xffff;
1687 unsigned long s2 = (adler >> 16) & 0xffff;
1688 int k;
1689
1690 while (len > 0)
1691 {
1692 k = (len < A32_NMAX) ? len : A32_NMAX;
1693 len -= k;
1694
1695 while (k >= 16)
1696 {
1697 A32_DO16(buf);
1698 buf += 16;
1699 k -= 16;
1700 }
1701
1702 if (k != 0)
1703 {
1704 do
1705 {
1706 s1 += *buf++;
1707 s2 += s1;
1708 }
1709 while (--k);
1710 }
1711
1712 s1 %= A32_BASE;
1713 s2 %= A32_BASE;
1714 }
1715
1716 return (s2 << 16) | s1;
1717}
1718
1719/******************************************************************************************
1720 Run-length function
1721 ******************************************************************************************/
1722
1723static INLINE int
1724xd3_comprun (const uint8_t *seg, int slook, uint8_t *run_cp)
1725{
1726 int i;
1727 int run_l = 0;
1728 uint8_t run_c = 0;
1729
1730 for (i = 0; i < slook; i += 1)
1731 {
1732 NEXTRUN(seg[i]);
1733 }
1734
1735 (*run_cp) = run_c;
1736
1737 return run_l;
1738}
1739
1740/******************************************************************************************
1741 Basic encoder/decoder functions
1742 ******************************************************************************************/
1743
1744static int
1745xd3_decode_byte (xd3_stream *stream, uint *val)
1746{
1747 if (stream->avail_in == 0)
1748 {
1749 stream->msg = "further input required";
1750 return XD3_INPUT;
1751 }
1752
1753 (*val) = stream->next_in[0];
1754
1755 DECODE_INPUT (1);
1756 return 0;
1757}
1758
1759static int
1760xd3_decode_bytes (xd3_stream *stream, uint8_t *buf, usize_t *pos, usize_t size)
1761{
1762 usize_t want;
1763 usize_t take;
1764
1765 /* Note: The case where (*pos == size) happens when a zero-length appheader or code
1766 * table is transmitted, but there is nothing in the standard against that. */
1767
1768 while (*pos < size)
1769 {
1770 if (stream->avail_in == 0)
1771 {
1772 stream->msg = "further input required";
1773 return XD3_INPUT;
1774 }
1775
1776 want = size - *pos;
1777 take = min (want, stream->avail_in);
1778
1779 memcpy (buf + *pos, stream->next_in, take);
1780
1781 DECODE_INPUT (take);
1782 (*pos) += take;
1783 }
1784
1785 return 0;
1786}
1787
1788#if XD3_ENCODER
1789static int
1790xd3_emit_byte (xd3_stream *stream,
1791 xd3_output **outputp,
1792 uint8_t code)
1793{
1794 xd3_output *output = (*outputp);
1795
1796 if (output->next == output->avail)
1797 {
1798 xd3_output *aoutput;
1799
1800 if ((aoutput = xd3_alloc_output (stream, output)) == NULL)
1801 {
1802 return ENOMEM;
1803 }
1804
1805 output = (*outputp) = aoutput;
1806 }
1807
1808 output->base[output->next++] = code;
1809
1810 return 0;
1811}
1812
1813static int
1814xd3_emit_bytes (xd3_stream *stream,
1815 xd3_output **outputp,
1816 const uint8_t *base,
1817 usize_t size)
1818{
1819 xd3_output *output = (*outputp);
1820
1821 do
1822 {
1823 usize_t take;
1824
1825 if (output->next == output->avail)
1826 {
1827 xd3_output *aoutput;
1828
1829 if ((aoutput = xd3_alloc_output (stream, output)) == NULL)
1830 {
1831 return ENOMEM;
1832 }
1833
1834 output = (*outputp) = aoutput;
1835 }
1836
1837 take = min (output->avail - output->next, size);
1838
1839 memcpy (output->base + output->next, base, take);
1840
1841 output->next += take;
1842 size -= take;
1843 base += take;
1844 }
1845 while (size > 0);
1846
1847 return 0;
1848}
1849#endif /* XD3_ENCODER */
1850
1851/******************************************************************************************
1852 Integer encoder/decoder functions
1853 ******************************************************************************************/
1854
1855#define DECODE_INTEGER_TYPE(PART,OFLOW) \
1856 while (stream->avail_in != 0) \
1857 { \
1858 uint next = stream->next_in[0]; \
1859 \
1860 DECODE_INPUT(1); \
1861 \
1862 if (PART & OFLOW) \
1863 { \
1864 stream->msg = "overflow in decode_integer"; \
1865 return EINVAL; \
1866 } \
1867 \
1868 PART = (PART << 7) | (next & 127); \
1869 \
1870 if ((next & 128) == 0) \
1871 { \
1872 (*val) = PART; \
1873 PART = 0; \
1874 return 0; \
1875 } \
1876 } \
1877 \
1878 stream->msg = "further input required"; \
1879 return XD3_INPUT
1880
1881#define READ_INTEGER_TYPE(TYPE, OFLOW) \
1882 TYPE val = 0; \
1883 const uint8_t *inp = (*inpp); \
1884 uint next; \
1885 \
1886 do \
1887 { \
1888 if (inp == max) \
1889 { \
1890 stream->msg = "end-of-input in read_integer"; \
1891 return EINVAL; \
1892 } \
1893 \
1894 if (val & OFLOW) \
1895 { \
1896 stream->msg = "overflow in read_intger"; \
1897 return EINVAL; \
1898 } \
1899 \
1900 next = (*inp++); \
1901 val = (val << 7) | (next & 127); \
1902 } \
1903 while (next & 128); \
1904 \
1905 (*valp) = val; \
1906 (*inpp) = inp; \
1907 \
1908 return 0
1909
1910#define EMIT_INTEGER_TYPE() \
1911 /* max 64-bit value in base-7 encoding is 9.1 bytes */ \
1912 uint8_t buf[10]; \
1913 usize_t bufi = 10; \
1914 \
1915 XD3_ASSERT (num >= 0); \
1916 \
1917 /* This loop performs division and turns on all MSBs. */ \
1918 do \
1919 { \
1920 buf[--bufi] = (num & 127) | 128; \
1921 num >>= 7; \
1922 } \
1923 while (num != 0); \
1924 \
1925 /* Turn off MSB of the last byte. */ \
1926 buf[9] &= 127; \
1927 \
1928 XD3_ASSERT (bufi >= 0); \
1929 \
1930 return xd3_emit_bytes (stream, output, buf + bufi, 10 - bufi)
1931
1932#define IF_SIZEOF32(x) if (num < (1U << (7 * (x)))) return (x);
1933#define IF_SIZEOF64(x) if (num < (1ULL << (7 * (x)))) return (x);
1934
1935#if USE_UINT32
1936static uint
1937xd3_sizeof_uint32_t (uint32_t num)
1938{
1939 IF_SIZEOF32(1);
1940 IF_SIZEOF32(2);
1941 IF_SIZEOF32(3);
1942 IF_SIZEOF32(4);
1943
1944 return 5;
1945}
1946
1947static int
1948xd3_decode_uint32_t (xd3_stream *stream, uint32_t *val)
1949{ DECODE_INTEGER_TYPE (stream->dec_32part, UINT32_OFLOW_MASK); }
1950static int
1951xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, const uint8_t *max, uint32_t *valp)
1952{ READ_INTEGER_TYPE (uint32_t, UINT32_OFLOW_MASK); }
1953#if XD3_ENCODER
1954static int
1955xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num)
1956{ EMIT_INTEGER_TYPE (); }
1957#endif
1958#endif
1959
1960#if USE_UINT64
1961/* We only ever decode offsets, but the other three are part of the regression test
1962 * anyway. */
1963static int
1964xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val)
1965{ DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); }
1966#if REGRESSION_TEST
1967#if XD3_ENCODER
1968static int
1969xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num)
1970{ EMIT_INTEGER_TYPE (); }
1971#endif
1972static int
1973xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp, const uint8_t *max, uint64_t *valp)
1974{ READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); }
1975
1976static uint
1977xd3_sizeof_uint64_t (uint64_t num)
1978{
1979 IF_SIZEOF64(1);
1980 IF_SIZEOF64(2);
1981 IF_SIZEOF64(3);
1982 IF_SIZEOF64(4);
1983 IF_SIZEOF64(5);
1984 IF_SIZEOF64(6);
1985 IF_SIZEOF64(7);
1986 IF_SIZEOF64(8);
1987 IF_SIZEOF64(9);
1988
1989 return 10;
1990}
1991#endif
1992#endif
1993
1994/******************************************************************************************
1995 Debug instruction statistics
1996 ******************************************************************************************/
1997
1998#if XD3_DEBUG
1999static void
2000xd3_count_inst (xd3_stream *stream, uint code)
2001{
2002 IF_DEBUG1 ({
2003 if (stream->i_freqs == NULL &&
2004 (stream->i_freqs = xd3_alloc0 (stream, sizeof (stream->i_freqs[0]), 256)) == NULL) { abort (); }
2005
2006 stream->i_freqs[code] += 1;
2007 });
2008 stream->n_ibytes += 1;
2009}
2010
2011static void
2012xd3_count_mode (xd3_stream *stream, uint mode)
2013{
2014 IF_DEBUG1 ({
2015 if (stream->i_modes == NULL &&
2016 (stream->i_modes = xd3_alloc0 (stream, sizeof (stream->i_modes[0]), TOTAL_MODES (stream))) == NULL) { abort (); }
2017 stream->i_modes[mode] += 1;
2018 });
2019}
2020
2021static void
2022xd3_count_size (xd3_stream *stream, usize_t size)
2023{
2024 IF_DEBUG1({
2025 if (stream->i_sizes == NULL &&
2026 (stream->i_sizes = xd3_alloc0 (stream, sizeof (stream->i_sizes[0]), 64)) == NULL) { abort (); }
2027
2028 if (size < 64) { stream->i_sizes[size] += 1; }
2029 });
2030 stream->n_sbytes += xd3_sizeof_size (size);
2031}
2032#endif
2033
2034/******************************************************************************************
2035 Address cache stuff
2036 ******************************************************************************************/
2037
2038static int
2039xd3_alloc_cache (xd3_stream *stream)
2040{
2041 if (((stream->acache.s_near > 0) &&
2042 (stream->acache.near_array = xd3_alloc (stream, stream->acache.s_near, sizeof (usize_t))) == NULL) ||
2043 ((stream->acache.s_same > 0) &&
2044 (stream->acache.same_array = xd3_alloc (stream, stream->acache.s_same * 256, sizeof (usize_t))) == NULL))
2045 {
2046 return ENOMEM;
2047 }
2048
2049 return 0;
2050}
2051
2052static void
2053xd3_init_cache (xd3_addr_cache* acache)
2054{
2055 if (acache->s_near > 0)
2056 {
2057 memset (acache->near_array, 0, acache->s_near * sizeof (usize_t));
2058 acache->next_slot = 0;
2059 }
2060
2061 if (acache->s_same > 0)
2062 {
2063 memset (acache->same_array, 0, acache->s_same * 256 * sizeof (usize_t));
2064 }
2065}
2066
2067static void
2068xd3_update_cache (xd3_addr_cache* acache, usize_t addr)
2069{
2070 if (acache->s_near > 0)
2071 {
2072 acache->near_array[acache->next_slot] = addr;
2073 acache->next_slot = (acache->next_slot + 1) % acache->s_near;
2074 }
2075
2076 if (acache->s_same > 0)
2077 {
2078 acache->same_array[addr % (acache->s_same*256)] = addr;
2079 }
2080}
2081
2082#if XD3_ENCODER
2083/* OPT: this gets called a lot, can it be optimized? */
2084static int
2085xd3_encode_address (xd3_stream *stream, usize_t addr, usize_t here, uint8_t* mode)
2086{
2087 usize_t d, bestd;
2088 int i, bestm, ret;
2089 xd3_addr_cache* acache = & stream->acache;
2090
2091#define SMALLEST_INT(x) do { if (((x) & ~127) == 0) { goto good; } } while (0)
2092
2093 /* Attempt to find the address mode that yields the smallest integer value for "d", the
2094 * encoded address value, thereby minimizing the encoded size of the address. */
2095 bestd = addr;
2096 bestm = VCD_SELF;
2097
2098 XD3_ASSERT (addr < here);
2099
2100 SMALLEST_INT (bestd);
2101
2102 if ((d = here-addr) < bestd)
2103 {
2104 bestd = d;
2105 bestm = VCD_HERE;
2106
2107 SMALLEST_INT (bestd);
2108 }
2109
2110 for (i = 0; i < acache->s_near; i += 1)
2111 {
2112 d = addr - acache->near_array[i];
2113
2114 if (d >= 0 && d < bestd)
2115 {
2116 bestd = d;
2117 bestm = i+2; /* 2 counts the VCD_SELF, VCD_HERE modes */
2118
2119 SMALLEST_INT (bestd);
2120 }
2121 }
2122
2123 if (acache->s_same > 0 && acache->same_array[d = addr%(acache->s_same*256)] == addr)
2124 {
2125 bestd = d%256;
2126 bestm = acache->s_near + 2 + d/256; /* 2 + s_near offsets past the VCD_NEAR modes */
2127
2128 if ((ret = xd3_emit_byte (stream, & ADDR_TAIL (stream), bestd))) { return ret; }
2129 }
2130 else
2131 {
2132 good:
2133
2134 if ((ret = xd3_emit_size (stream, & ADDR_TAIL (stream), bestd))) { return ret; }
2135 }
2136
2137 xd3_update_cache (acache, addr);
2138
2139 IF_DEBUG (xd3_count_mode (stream, bestm));
2140
2141 (*mode) += bestm;
2142
2143 return 0;
2144}
2145#endif
2146
2147static int
2148xd3_decode_address (xd3_stream *stream, usize_t here, uint mode, const uint8_t **inpp, const uint8_t *max, uint32_t *valp)
2149{
2150 int ret;
2151 uint same_start = 2 + stream->acache.s_near;
2152
2153 if (mode < same_start)
2154 {
2155 if ((ret = xd3_read_size (stream, inpp, max, valp))) { return ret; }
2156
2157 switch (mode)
2158 {
2159 case VCD_SELF:
2160 break;
2161 case VCD_HERE:
2162 (*valp) = here - (*valp);
2163 break;
2164 default:
2165 (*valp) += stream->acache.near_array[mode - 2];
2166 break;
2167 }
2168 }
2169 else
2170 {
2171 if (*inpp == max)
2172 {
2173 stream->msg = "address underflow";
2174 return EINVAL;
2175 }
2176
2177 mode -= same_start;
2178
2179 (*valp) = stream->acache.same_array[mode*256 + (**inpp)];
2180
2181 (*inpp) += 1;
2182 }
2183
2184 xd3_update_cache (& stream->acache, *valp);
2185
2186 return 0;
2187}
2188
2189/******************************************************************************************
2190 Alloc/free
2191 ******************************************************************************************/
2192
2193static void*
2194__xd3_alloc_func (void* opaque, usize_t items, usize_t size)
2195{
2196 return malloc (items * size);
2197}
2198
2199static void
2200__xd3_free_func (void* opaque, void* address)
2201{
2202 free (address);
2203}
2204
2205static void*
2206xd3_alloc (xd3_stream *stream,
2207 usize_t elts,
2208 usize_t size)
2209{
2210 void *a = stream->alloc (stream->opaque, elts, size);
2211
2212 if (a != NULL)
2213 {
2214 IF_DEBUG (stream->alloc_cnt += 1);
2215 }
2216 else
2217 {
2218 stream->msg = "out of memory";
2219 }
2220
2221 return a;
2222}
2223
2224static void
2225xd3_free (xd3_stream *stream,
2226 void *ptr)
2227{
2228 if (ptr != NULL)
2229 {
2230 IF_DEBUG (stream->free_cnt += 1);
2231 XD3_ASSERT (stream->free_cnt <= stream->alloc_cnt);
2232 stream->free (stream->opaque, ptr);
2233 }
2234}
2235
2236#if XD3_ENCODER
2237static void*
2238xd3_alloc0 (xd3_stream *stream,
2239 usize_t elts,
2240 usize_t size)
2241{
2242 void *a = xd3_alloc (stream, elts, size);
2243
2244 if (a != NULL)
2245 {
2246 memset (a, 0, elts * size);
2247 }
2248
2249 return a;
2250}
2251
2252static xd3_output*
2253xd3_alloc_output (xd3_stream *stream,
2254 xd3_output *old_output)
2255{
2256 xd3_output *output;
2257 uint8_t *base;
2258
2259 if (stream->enc_free != NULL)
2260 {
2261 output = stream->enc_free;
2262 stream->enc_free = output->next_page;
2263 }
2264 else
2265 {
2266 if ((output = xd3_alloc (stream, 1, sizeof (xd3_output))) == NULL)
2267 {
2268 return NULL;
2269 }
2270
2271 if ((base = xd3_alloc (stream, XD3_ALLOCSIZE, sizeof (uint8_t))) == NULL)
2272 {
2273 xd3_free (stream, output);
2274 return NULL;
2275 }
2276
2277 output->base = base;
2278 output->avail = XD3_ALLOCSIZE;
2279 }
2280
2281 output->next = 0;
2282
2283 if (old_output)
2284 {
2285 old_output->next_page = output;
2286 }
2287
2288 output->next_page = NULL;
2289
2290 return output;
2291}
2292
2293static usize_t
2294xd3_sizeof_output (xd3_output *output)
2295{
2296 usize_t s = 0;
2297
2298 for (; output; output = output->next_page)
2299 {
2300 s += output->next;
2301 }
2302
2303 return s;
2304}
2305
2306static void
2307xd3_freelist_output (xd3_stream *stream,
2308 xd3_output *output)
2309{
2310 xd3_output *tmp;
2311
2312 while (output)
2313 {
2314 tmp = output;
2315 output = output->next_page;
2316
2317 tmp->next = 0;
2318 tmp->next_page = stream->enc_free;
2319 stream->enc_free = tmp;
2320 }
2321}
2322
2323static void
2324xd3_free_output (xd3_stream *stream,
2325 xd3_output *output)
2326{
2327 xd3_output *next;
2328
2329 again:
2330 if (output == NULL)
2331 {
2332 return;
2333 }
2334
2335 next = output->next_page;
2336
2337 xd3_free (stream, output->base);
2338 xd3_free (stream, output);
2339
2340 output = next;
2341 goto again;
2342}
2343#endif /* XD3_ENCODER */
2344
2345void
2346xd3_free_stream (xd3_stream *stream)
2347{
2348
2349 xd3_free (stream, stream->large_table);
2350 xd3_free (stream, stream->small_table);
2351 xd3_free (stream, stream->small_prev);
2352 xd3_free (stream, stream->iopt.buffer);
2353
2354#if XD3_ENCODER
2355 {
2356 int i;
2357 for (i = 0; i < ENC_SECTS; i += 1)
2358 {
2359 xd3_free_output (stream, stream->enc_heads[i]);
2360 }
2361 xd3_free_output (stream, stream->enc_free);
2362 }
2363#endif
2364
2365 xd3_free (stream, stream->acache.near_array);
2366 xd3_free (stream, stream->acache.same_array);
2367
2368 xd3_free (stream, stream->inst_sect.copied1);
2369 xd3_free (stream, stream->addr_sect.copied1);
2370 xd3_free (stream, stream->data_sect.copied1);
2371
2372 xd3_free (stream, stream->dec_buffer);
2373 xd3_free (stream, (uint8_t*) stream->dec_lastwin);
2374
2375 xd3_free (stream, stream->buf_in);
2376 xd3_free (stream, stream->dec_appheader);
2377 xd3_free (stream, stream->dec_codetbl);
2378 xd3_free (stream, stream->code_table_alloc);
2379
2380#if SECONDARY_ANY
2381 xd3_free (stream, stream->inst_sect.copied2);
2382 xd3_free (stream, stream->addr_sect.copied2);
2383 xd3_free (stream, stream->data_sect.copied2);
2384
2385 if (stream->sec_type != NULL)
2386 {
2387 stream->sec_type->destroy (stream, stream->sec_stream_d);
2388 stream->sec_type->destroy (stream, stream->sec_stream_i);
2389 stream->sec_type->destroy (stream, stream->sec_stream_a);
2390 }
2391#endif
2392
2393 IF_DEBUG (xd3_free (stream, stream->i_freqs));
2394 IF_DEBUG (xd3_free (stream, stream->i_modes));
2395 IF_DEBUG (xd3_free (stream, stream->i_sizes));
2396
2397 XD3_ASSERT (stream->alloc_cnt == stream->free_cnt);
2398
2399 memset (stream, 0, sizeof (xd3_stream));
2400}
2401
2402#if (XD3_DEBUG || VCDIFF_TOOLS)
2403static const char*
2404xd3_rtype_to_string (xd3_rtype type, int print_mode)
2405{
2406 switch (type)
2407 {
2408 case XD3_NOOP:
2409 return "NOOP ";
2410 case XD3_RUN:
2411 return "RUN ";
2412 case XD3_ADD:
2413 return "ADD ";
2414 default: break;
2415 }
2416 if (! print_mode)
2417 {
2418 return "CPY ";
2419 }
2420 switch (type)
2421 {
2422 case XD3_CPY + 0: return "CPY_0";
2423 case XD3_CPY + 1: return "CPY_1";
2424 case XD3_CPY + 2: return "CPY_2";
2425 case XD3_CPY + 3: return "CPY_3";
2426 case XD3_CPY + 4: return "CPY_4";
2427 case XD3_CPY + 5: return "CPY_5";
2428 case XD3_CPY + 6: return "CPY_6";
2429 case XD3_CPY + 7: return "CPY_7";
2430 case XD3_CPY + 8: return "CPY_8";
2431 case XD3_CPY + 9: return "CPY_9";
2432 default: return "CPY>9";
2433 }
2434}
2435#endif
2436
2437/******************************************************************************************
2438 Stream configuration
2439 ******************************************************************************************/
2440
2441int
2442xd3_config_stream(xd3_stream *stream,
2443 xd3_config *config)
2444{
2445 int ret;
2446 xd3_config defcfg;
2447 const xd3_smatcher* smatcher;
2448
2449 if (config == NULL)
2450 {
2451 config = & defcfg;
2452 memset (config, 0, sizeof (*config));
2453 }
2454
2455 /* Initial setup: no error checks yet */
2456 memset (stream, 0, sizeof (*stream));
2457
2458 stream->memsize = config->memsize ? config->memsize : XD3_DEFAULT_MEMSIZE;
2459 stream->winsize = config->winsize ? config->winsize : XD3_DEFAULT_WINSIZE;
2460 stream->sprevsz = config->sprevsz ? config->sprevsz : XD3_DEFAULT_SPREVSZ;
2461 stream->srcwin_size = config->srcwin_size ? config->srcwin_size : XD3_DEFAULT_START_CKSUM_ADVANCE;
2462 stream->srcwin_maxsz = config->srcwin_maxsz ? config->srcwin_maxsz : XD3_DEFAULT_MAX_CKSUM_ADVANCE;
2463 stream->iopt_size = config->iopt_size ? config->iopt_size : XD3_DEFAULT_IOPT_SIZE;
2464 stream->getblk = config->getblk;
2465 stream->alloc = config->alloc ? config->alloc : __xd3_alloc_func;
2466 stream->free = config->freef ? config->freef : __xd3_free_func;
2467 stream->opaque = config->opaque;
2468 stream->flags = config->flags;
2469
2470 XD3_ASSERT (stream->winsize > 0);
2471
2472 /* Secondary setup. */
2473 stream->sec_data = config->sec_data;
2474 stream->sec_inst = config->sec_inst;
2475 stream->sec_addr = config->sec_addr;
2476
2477 stream->sec_data.data_type = DATA_SECTION;
2478 stream->sec_inst.data_type = INST_SECTION;
2479 stream->sec_addr.data_type = ADDR_SECTION;
2480
2481 /* Check static sizes. */
2482 if (sizeof (usize_t) != SIZEOF_USIZE_T ||
2483 sizeof (xoff_t) != SIZEOF_XOFF_T ||
2484 (ret = xd3_check_pow2(XD3_ALLOCSIZE, NULL)))
2485 {
2486 stream->msg = "incorrect compilation: wrong integer sizes";
2487 return EINVAL;
2488 }
2489
2490 /* Check/set secondary compressor. */
2491 switch (stream->flags & XD3_SEC_TYPE)
2492 {
2493 case 0:
2494 if (stream->flags & XD3_SEC_OTHER)
2495 {
2496 stream->msg = "XD3_SEC flags require a secondary compressor type";
2497 return EINVAL;
2498 }
2499 break;
2500 case XD3_SEC_FGK:
2501 FGK_CASE (stream);
2502 case XD3_SEC_DJW:
2503 DJW_CASE (stream);
2504 default:
2505 stream->msg = "too many secondary compressor types set";
2506 return EINVAL;
2507 }
2508
2509 /* Check/set encoder code table. */
2510 switch (stream->flags & XD3_ALT_CODE_TABLE) {
2511 case 0:
2512 stream->code_table_desc = & __rfc3284_code_table_desc;
2513 stream->code_table_func = xd3_rfc3284_code_table;
2514 break;
2515#if GENERIC_ENCODE_TABLES
2516 case XD3_ALT_CODE_TABLE:
2517 stream->code_table_desc = & __alternate_code_table_desc;
2518 stream->code_table_func = xd3_alternate_code_table;
2519 stream->comp_table_func = xd3_compute_alternate_table_encoding;
2520 break;
2521#endif
2522 default:
2523 stream->msg = "alternate code table support was not compiled";
2524 return EINVAL;
2525 }
2526
2527 /* Check sprevsz */
2528 if (config->small_chain == 1)
2529 {
2530 stream->sprevsz = 0;
2531 }
2532 else
2533 {
2534 if ((ret = xd3_check_pow2 (stream->sprevsz, NULL)))
2535 {
2536 stream->msg = "sprevsz is required to be a power of two";
2537 return EINVAL;
2538 }
2539
2540 stream->sprevmask = stream->sprevsz - 1;
2541 }
2542
2543 /* Default scanner settings. */
2544 switch (config->smatch_cfg)
2545 {
2546 IF_BUILD_SOFT(case XD3_SMATCH_SOFT:
2547 smatcher = & __smatcher_soft; break;
2548
2549 if (config->large_look < MIN_MATCH ||
2550 config->large_step < 1 ||
2551 config->small_look < MIN_MATCH ||
2552 config->small_chain < 1 ||
2553 config->large_look < config->small_look ||
2554 config->small_chain < config->small_lchain ||
2555 (config->small_lchain == 0 && config->try_lazy) ||
2556 config->srcwin_size < stream->large_look ||
2557 config->srcwin_maxsz < stream->srcwin_size)
2558 {
2559 stream->msg = "invalid soft string-match config";
2560 return EINVAL;
2561 }
2562 break;)
2563
2564 IF_BUILD_SLOW(case XD3_SMATCH_DEFAULT:)
2565 IF_BUILD_SLOW(case XD3_SMATCH_SLOW: smatcher = & __smatcher_slow; break;)
2566 IF_BUILD_FAST(case XD3_SMATCH_FAST: smatcher = & __smatcher_fast; break;)
2567 default:
2568 stream->msg = "invalid string match config type";
2569 return EINVAL;
2570 }
2571
2572 stream->string_match = smatcher->string_match;
2573 XD3_ASSERT(stream->string_match);
2574
2575 XD3_COPY_CONFIG_FIELDS (stream, smatcher);
2576
2577 /* If it is a soft config, the smatcher fields didn't set anything, copy from config
2578 * instead. */
2579 if (stream->large_look == 0)
2580 {
2581 XD3_COPY_CONFIG_FIELDS (stream, config);
2582 }
2583
2584 IF_DEBUG1 (P(RINT "[stream cfg] llook %u lstep %u slook %u\n",
2585 stream->large_look, stream->large_step, stream->small_look));
2586 return 0;
2587}
2588
2589/******************************************************************************************
2590 Getblk interface
2591 ******************************************************************************************/
2592
2593/* This function interfaces with the client getblk function, checks its results, etc. */
2594static int
2595xd3_getblk (xd3_stream *stream/*, xd3_source *source*/, xoff_t blkno)
2596{
2597 int ret;
2598 xd3_source *source = stream->src;
2599
2600 if (blkno >= source->blocks)
2601 {
2602 stream->msg = "source file too short";
2603 return EINVAL;
2604 }
2605
2606 if (blkno != source->curblkno || source->curblk == NULL)
2607 {
2608 XD3_ASSERT (source->curblk != NULL || blkno != source->curblkno);
2609
2610 source->getblkno = blkno;
2611
2612 if (stream->getblk == NULL)
2613 {
2614 stream->msg = "getblk source input";
2615 return XD3_GETSRCBLK;
2616 }
2617 else if ((ret = stream->getblk (stream, source, blkno)) != 0)
2618 {
2619 stream->msg = "getblk failed";
2620 return ret;
2621 }
2622
2623 XD3_ASSERT (source->curblk != NULL);
2624 }
2625
2626 if (source->onblk != xd3_bytes_on_srcblk (source, blkno))
2627 {
2628 stream->msg = "getblk returned short block";
2629 return EINVAL;
2630 }
2631
2632 return 0;
2633}
2634
2635/******************************************************************************************
2636 Stream open/close
2637 ******************************************************************************************/
2638
2639int
2640xd3_set_source (xd3_stream *stream,
2641 xd3_source *src)
2642{
2643 xoff_t blk_num;
2644 xoff_t tail_size;
2645
2646 IF_DEBUG1 (P(RINT "[set source] size %"Q"u\n", src->size));
2647
2648 if (src == NULL || src->size < stream->large_look) { return 0; }
2649
2650 stream->src = src;
2651 blk_num = src->size / src->blksize;
2652 tail_size = src->size % src->blksize;
2653 src->blocks = blk_num + (tail_size > 0);
2654 src->srclen = 0;
2655 src->srcbase = 0;
2656
2657 return 0;
2658}
2659
2660void
2661xd3_abort_stream (xd3_stream *stream)
2662{
2663 stream->dec_state = DEC_ABORTED;
2664 stream->enc_state = ENC_ABORTED;
2665}
2666
2667int
2668xd3_close_stream (xd3_stream *stream)
2669{
2670 if (stream->enc_state != 0 && stream->enc_state != ENC_ABORTED)
2671 {
2672 /* If encoding, should be ready for more input but not actually have any. */
2673 if (stream->enc_state != ENC_INPUT || stream->avail_in != 0)
2674 {
2675 stream->msg = "encoding is incomplete";
2676 return EINVAL;
2677 }
2678 }
2679 else
2680 {
2681 switch (stream->dec_state)
2682 {
2683 case DEC_VCHEAD:
2684 case DEC_WININD:
2685 /* TODO: Address the zero-byte ambiguity. Does the encoder emit a window or
2686 * not? If so, then catch an error here. If not, need another routine to say
2687 * decode_at_least_one_if_empty. */
2688 case DEC_ABORTED:
2689 break;
2690 default:
2691 /* If decoding, should be ready for the next window. */
2692 stream->msg = "EOF in decode";
2693 return EINVAL;
2694 }
2695 }
2696
2697 return 0;
2698}
2699
2700/******************************************************************************************
2701 Application header
2702 ******************************************************************************************/
2703
2704int
2705xd3_get_appheader (xd3_stream *stream,
2706 uint8_t **data,
2707 usize_t *size)
2708{
2709 if (stream->dec_state < DEC_WININD)
2710 {
2711 stream->msg = "application header not available";
2712 return EINVAL;
2713 }
2714
2715 (*data) = stream->dec_appheader;
2716 (*size) = stream->dec_appheadsz;
2717 return 0;
2718}
2719
2720#if XD3_ENCODER
2721void
2722xd3_set_appheader (xd3_stream *stream,
2723 const uint8_t *data,
2724 usize_t size)
2725{
2726 stream->enc_appheader = data;
2727 stream->enc_appheadsz = size;
2728}
2729
2730/******************************************************************************************
2731 Encoder stuff
2732 ******************************************************************************************/
2733
2734#if XD3_DEBUG
2735static int
2736xd3_iopt_check (xd3_stream *stream)
2737{
2738 int ul = xd3_rlist_length (& stream->iopt.used);
2739 int fl = xd3_rlist_length (& stream->iopt.free);
2740
2741 return (ul + fl + (stream->iout ? 1 : 0)) == stream->iopt_size;
2742}
2743#endif
2744
2745static xd3_rinst*
2746xd3_iopt_free (xd3_stream *stream, xd3_rinst *i)
2747{
2748 xd3_rinst *n = xd3_rlist_remove (i);
2749 xd3_rlist_push_back (& stream->iopt.free, i);
2750 return n;
2751}
2752
2753static void
2754xd3_iopt_free_nonadd (xd3_stream *stream, xd3_rinst *i)
2755{
2756 if (i->type != XD3_ADD)
2757 {
2758 xd3_rlist_push_back (& stream->iopt.free, i);
2759 }
2760}
2761
2762/* When an instruction is ready to flush from the iopt buffer, this function is called to
2763 * produce an encoding. It writes the instruction plus size, address, and data to the
2764 * various encoding sections. */
2765static int
2766xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
2767{
2768 int ret;
2769
2770 /* Check for input overflow. */
2771 XD3_ASSERT (inst->pos + inst->size <= stream->avail_in);
2772
2773 switch (inst->type)
2774 {
2775 case XD3_CPY:
2776 {
2777 /* the address may have an offset if there is a source window. */
2778 usize_t addr;
2779 xd3_source *src = stream->src;
2780
2781 if (src != NULL)
2782 {
2783 /* If there is a source copy, the source must have its source window decided
2784 * before we can encode. This can be bad -- we have to make this decision
2785 * even if no source matches have been found. */
2786 if (stream->srcwin_decided == 0)
2787 {
2788 if ((ret = xd3_srcwin_setup (stream))) { return ret; }
2789 }
2790
2791 /* xtra field indicates the copy is from the source */
2792 if (inst->xtra)
2793 {
2794 XD3_ASSERT (inst->addr >= src->srcbase);
2795 XD3_ASSERT (inst->addr + inst->size <= src->srcbase + src->srclen);
2796 addr = (inst->addr - src->srcbase);
2797 }
2798 else
2799 {
2800 /* with source window: target copy address is offset by taroff. */
2801 addr = stream->taroff + (usize_t) inst->addr;
2802 }
2803 }
2804 else
2805 {
2806 addr = (usize_t) inst->addr;
2807 }
2808
2809 XD3_ASSERT (inst->size >= MIN_MATCH);
2810
2811 /* the "here" position is always offset by taroff */
2812 if ((ret = xd3_encode_address (stream, addr, inst->pos + stream->taroff, & inst->type)))
2813 {
2814 return ret;
2815 }
2816
2817 IF_DEBUG (stream->n_cpy += 1);
2818 IF_DEBUG (stream->l_cpy += inst->size);
2819
2820 IF_DEBUG1 ({
2821 static int cnt;
2822 P(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %u\n",
2823 cnt++,
2824 stream->total_in + inst->pos,
2825 stream->total_in + inst->pos + inst->size,
2826 inst->addr, inst->addr + inst->size, inst->size);
2827 });
2828 break;
2829 }
2830 case XD3_RUN:
2831 {
2832 XD3_ASSERT (inst->size >= MIN_MATCH);
2833
2834 if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { return ret; }
2835
2836 IF_DEBUG (stream->n_run += 1);
2837 IF_DEBUG (stream->l_run += inst->size);
2838 IF_DEBUG (stream->n_dbytes += 1);
2839
2840 IF_DEBUG1 ({
2841 static int cnt;
2842 P(RINT "[iopt run:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size);
2843 });
2844 break;
2845 }
2846 case XD3_ADD:
2847 {
2848 if ((ret = xd3_emit_bytes (stream, & DATA_TAIL (stream),
2849 stream->next_in + inst->pos, inst->size))) { return ret; }
2850
2851 IF_DEBUG (stream->n_add += 1);
2852 IF_DEBUG (stream->l_add += inst->size);
2853 IF_DEBUG (stream->n_dbytes += inst->size);
2854
2855 IF_DEBUG1 ({
2856 static int cnt;
2857 P(RINT "[iopt add:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size);
2858 });
2859
2860 break;
2861 }
2862 }
2863
2864 /* This is the only place stream->unencoded_offset is incremented. */
2865 XD3_ASSERT (stream->unencoded_offset == inst->pos);
2866 stream->unencoded_offset += inst->size;
2867
2868 IF_DEBUG (stream->n_emit += inst->size);
2869
2870 inst->code2 = 0;
2871
2872 XD3_CHOOSE_INSTRUCTION (stream, stream->iout, inst);
2873
2874 if (stream->iout != NULL)
2875 {
2876 if (stream->iout->code2 != 0)
2877 {
2878 if ((ret = xd3_emit_double (stream, stream->iout, inst, stream->iout->code2))) { return ret; }
2879
2880 xd3_iopt_free_nonadd (stream, stream->iout);
2881 xd3_iopt_free_nonadd (stream, inst);
2882 stream->iout = NULL;
2883 return 0;
2884 }
2885 else
2886 {
2887 if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; }
2888
2889 xd3_iopt_free_nonadd (stream, stream->iout);
2890 }
2891 }
2892
2893 stream->iout = inst;
2894
2895 return 0;
2896}
2897
2898/* This possibly encodes an add instruction, iadd, which must remain on the stack until
2899 * the following call to xd3_iopt_finish_encoding. */
2900static int
2901xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd)
2902{
2903 int ret;
2904 usize_t off = stream->unencoded_offset;
2905
2906 if (pos > off)
2907 {
2908 iadd->type = XD3_ADD;
2909 iadd->pos = off;
2910 iadd->size = pos - off;
2911
2912 if ((ret = xd3_iopt_finish_encoding (stream, iadd))) { return ret; }
2913 }
2914
2915 return 0;
2916}
2917
2918/* This function calls xd3_iopt_finish_encoding to finish encoding an instruction, and it
2919 * may also produce an add instruction for an unmatched region. */
2920static int
2921xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst)
2922{
2923 int ret;
2924 xd3_rinst iadd;
2925
2926 if ((ret = xd3_iopt_add (stream, inst->pos, & iadd))) { return ret; }
2927
2928 if ((ret = xd3_iopt_finish_encoding (stream, inst))) { return ret; }
2929
2930 return 0;
2931}
2932
2933/* Generates a final add instruction to encode the remaining input. */
2934static int
2935xd3_iopt_add_finalize (xd3_stream *stream)
2936{
2937 int ret;
2938 xd3_rinst iadd;
2939
2940 if ((ret = xd3_iopt_add (stream, stream->avail_in, & iadd))) { return ret; }
2941
2942 if (stream->iout)
2943 {
2944 if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; }
2945
2946 xd3_iopt_free_nonadd (stream, stream->iout);
2947 stream->iout = NULL;
2948 }
2949
2950 return 0;
2951}
2952
2953/* Compact the instruction buffer by choosing the best non-overlapping instructions when
2954 * lazy string-matching. There are no ADDs in the iopt buffer because those are
2955 * synthesized in xd3_iopt_add_encoding and during xd3_iopt_add_finalize. */
2956static int
2957xd3_iopt_flush_instructions (xd3_stream *stream, int force)
2958{
2959 xd3_rinst *r1 = xd3_rlist_front (& stream->iopt.used);
2960 xd3_rinst *r2;
2961 xd3_rinst *r3;
2962 usize_t r1end;
2963 usize_t r2end;
2964 usize_t r2off;
2965 usize_t r2moff;
2966 usize_t gap;
2967 usize_t flushed;
2968 int ret;
2969
2970 XD3_ASSERT (xd3_iopt_check (stream));
2971
2972 /* Note: once tried to skip this step if it's possible to assert there are no
2973 * overlapping instructions. Doesn't work because xd3_opt_erase leaves overlapping
2974 * instructions. */
2975 while (! xd3_rlist_end (& stream->iopt.used, r1) &&
2976 ! xd3_rlist_end (& stream->iopt.used, r2 = xd3_rlist_next (r1)))
2977 {
2978 r1end = r1->pos + r1->size;
2979
2980 /* If the instructions do not overlap, continue. */
2981 if (r1end <= r2->pos)
2982 {
2983 r1 = r2;
2984 continue;
2985 }
2986
2987 r2end = r2->pos + r2->size;
2988
2989 /* The min_match adjustments prevent this. */
2990 XD3_ASSERT (r2end > (r1end + LEAST_MATCH_INCR));
2991
2992 /* If r3 is available... */
2993 if (! xd3_rlist_end (& stream->iopt.used, r3 = xd3_rlist_next (r2)))
2994 {
2995 /* If r3 starts before r1 finishes or just about, r2 is irrelevant */
2996 if (r3->pos <= r1end + 1)
2997 {
2998 xd3_iopt_free (stream, r2);
2999 continue;
3000 }
3001 }
3002 else if (! force)
3003 {
3004 /* Unless force, end the loop when r3 is not available. */
3005 break;
3006 }
3007
3008 r2off = r2->pos - r1->pos;
3009 r2moff = r2end - r1end;
3010 gap = r2end - r1->pos;
3011
3012 /* If the two matches overlap almost entirely, choose the better match and discard
3013 * the other. This heuristic is BLACK MAGIC. Havesomething better? */
3014 if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2)
3015 {
3016 /* Only one match should be used, choose the longer one. */
3017 if (r1->size < r2->size)
3018 {
3019 xd3_iopt_free (stream, r1);
3020 r1 = r2;
3021 }
3022 else
3023 {
3024 /* We are guaranteed that r1 does not overlap now, so advance past r2 */
3025 r1 = xd3_iopt_free (stream, r2);
3026 }
3027 continue;
3028 }
3029 else
3030 {
3031 /* Shorten one of the instructions -- could be optimized based on the address
3032 * cache. */
3033 usize_t average;
3034 usize_t newsize;
3035 usize_t adjust1;
3036
3037 XD3_ASSERT (r1end > r2->pos && r2end > r1->pos);
3038
3039 /* Try to balance the length of both instructions, but avoid making both longer
3040 * than MAX_MATCH_SPLIT . */
3041 average = (gap) / 2;
3042 newsize = min (MAX_MATCH_SPLIT, gap - average);
3043
3044 /* Should be possible to simplify this code. */
3045 if (newsize > r1->size)
3046 {
3047 /* shorten r2 */
3048 adjust1 = r1end - r2->pos;
3049 }
3050 else if (newsize > r2->size)
3051 {
3052 /* shorten r1 */
3053 adjust1 = r1end - r2->pos;
3054
3055 XD3_ASSERT (r1->size > adjust1);
3056
3057 r1->size -= adjust1;
3058
3059 /* don't shorten r2 */
3060 adjust1 = 0;
3061 }
3062 else
3063 {
3064 /* shorten r1 */
3065 adjust1 = r1->size - newsize;
3066
3067 if (r2->pos > r1end - adjust1)
3068 {
3069 adjust1 -= r2->pos - (r1end - adjust1);
3070 }
3071
3072 XD3_ASSERT (r1->size > adjust1);
3073
3074 r1->size -= adjust1;
3075
3076 /* shorten r2 */
3077 XD3_ASSERT (r1->pos + r1->size >= r2->pos);
3078
3079 adjust1 = r1->pos + r1->size - r2->pos;
3080 }
3081
3082 /* Fallthrough above if-else, shorten r2 */
3083 XD3_ASSERT (r2->size > adjust1);
3084
3085 r2->size -= adjust1;
3086 r2->pos += adjust1;
3087 r2->addr += adjust1;
3088
3089 XD3_ASSERT (r1->size >= MIN_MATCH);
3090 XD3_ASSERT (r2->size >= MIN_MATCH);
3091
3092 r1 = r2;
3093 }
3094 }
3095
3096 XD3_ASSERT (xd3_iopt_check (stream));
3097
3098 /* If forcing, pick instructions until the list is empty, otherwise this empties 50% of
3099 * the queue. */
3100 for (flushed = 0; ! xd3_rlist_empty (& stream->iopt.used); )
3101 {
3102 xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt.used);
3103 if ((ret = xd3_iopt_add_encoding (stream, renc)))
3104 {
3105 return ret;
3106 }
3107
3108 if (! force)
3109 {
3110 if (++flushed > stream->iopt_size / 2)
3111 {
3112 break;
3113 }
3114
3115 /* If there are only two instructions remaining, break, because they were
3116 * not optimized. This means there were more than 50% eliminated by the
3117 * loop above. */
3118 r1 = xd3_rlist_front (& stream->iopt.used);
3119 if (xd3_rlist_end(& stream->iopt.used, r1) ||
3120 xd3_rlist_end(& stream->iopt.used, r2 = xd3_rlist_next (r1)) ||
3121 xd3_rlist_end(& stream->iopt.used, r3 = xd3_rlist_next (r2)))
3122 {
3123 break;
3124 }
3125 }
3126 }
3127
3128 XD3_ASSERT (xd3_iopt_check (stream));
3129
3130 XD3_ASSERT (!force || xd3_rlist_length (& stream->iopt.used) == 0);
3131
3132 return 0;
3133}
3134
3135static int
3136xd3_iopt_get_slot (xd3_stream *stream, xd3_rinst** iptr)
3137{
3138 xd3_rinst *i;
3139 int ret;
3140
3141 if (xd3_rlist_empty (& stream->iopt.free))
3142 {
3143 if ((ret = xd3_iopt_flush_instructions (stream, 0))) { return ret; }
3144
3145 XD3_ASSERT (! xd3_rlist_empty (& stream->iopt.free));
3146 }
3147
3148 i = xd3_rlist_pop_back (& stream->iopt.free);
3149
3150 xd3_rlist_push_back (& stream->iopt.used, i);
3151
3152 (*iptr) = i;
3153
3154 return 0;
3155}
3156
3157/* A copy is about to be emitted that extends backwards to POS, therefore it may
3158 * completely cover some existing instructions in the buffer. If an instruction is
3159 * completely covered by this new match, erase it. If the new instruction is covered by
3160 * the previous one, return 1 to skip it. */
3161static void
3162xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size)
3163{
3164 while (! xd3_rlist_empty (& stream->iopt.used))
3165 {
3166 xd3_rinst *r = xd3_rlist_back (& stream->iopt.used);
3167
3168 /* Verify that greedy is working. The previous instruction should end before the
3169 * new one begins. */
3170 XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos));
3171 /* Verify that min_match is working. The previous instruction should end before the
3172 * new one ends. */
3173 XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size));
3174
3175 /* See if the last instruction starts before the new instruction. If so, there is
3176 * nothing to erase. */
3177 if (r->pos < pos)
3178 {
3179 return;
3180 }
3181
3182 /* Otherwise, the new instruction covers the old one, delete it and repeat. */
3183 xd3_rlist_remove (r);
3184 xd3_rlist_push_back (& stream->iopt.free, r);
3185 }
3186}
3187
3188/* This function tells the last matched input position. */
3189static usize_t
3190xd3_iopt_last_matched (xd3_stream *stream)
3191{
3192 xd3_rinst *r;
3193
3194 if (xd3_rlist_empty (& stream->iopt.used))
3195 {
3196 return 0;
3197 }
3198
3199 r = xd3_rlist_back (& stream->iopt.used);
3200
3201 return r->pos + r->size;
3202}
3203
3204/******************************************************************************************
3205 Emit routines
3206 ******************************************************************************************/
3207
3208static int
3209xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code)
3210{
3211 int has_size = stream->code_table[code].size1 == 0;
3212 int ret;
3213
3214 IF_DEBUG1 (P(RINT "[emit1] %u %s (%u) code %u\n",
3215 single->pos,
3216 xd3_rtype_to_string (single->type, 0),
3217 single->size,
3218 code));
3219
3220 if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) { return ret; }
3221
3222 if (has_size)
3223 {
3224 if ((ret = xd3_emit_size (stream, & INST_TAIL (stream), single->size))) { return ret; }
3225
3226 IF_DEBUG (xd3_count_size (stream, single->size));
3227 }
3228
3229 IF_DEBUG (xd3_count_inst (stream, code));
3230
3231 return 0;
3232}
3233
3234static int
3235xd3_emit_double (xd3_stream *stream, xd3_rinst *first, xd3_rinst *second, uint code)
3236{
3237 int ret;
3238
3239 /* All double instructions use fixed sizes, so all we need to do is output the
3240 * instruction code, no sizes. */
3241 XD3_ASSERT (stream->code_table[code].size1 != 0 &&
3242 stream->code_table[code].size2 != 0);
3243
3244 if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) { return ret; }
3245
3246 IF_DEBUG1 (P(RINT "[emit2]: %u %s (%u) %s (%u) code %u\n",
3247 first->pos,
3248 xd3_rtype_to_string (first->type, 0),
3249 first->size,
3250 xd3_rtype_to_string (second->type, 0),
3251 second->size,
3252 code));
3253
3254 IF_DEBUG (xd3_count_inst (stream, code));
3255
3256 return 0;
3257}
3258
3259/* This enters a potential run instruction into the iopt buffer. The position argument is
3260 * relative to the target window. */
3261static INLINE int
3262xd3_emit_run (xd3_stream *stream, usize_t pos, usize_t size, uint8_t run_c)
3263{
3264 xd3_rinst* ri;
3265 int ret;
3266
3267 XD3_ASSERT (pos + size <= stream->avail_in);
3268
3269 if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; }
3270
3271 ri->type = XD3_RUN;
3272 ri->xtra = run_c;
3273 ri->pos = pos;
3274 ri->size = size;
3275
3276 return 0;
3277}
3278
3279/* This enters a potential copy instruction into the iopt buffer. The position argument
3280 * is relative to the target window.. */
3281static INLINE int
3282xd3_found_match (xd3_stream *stream, usize_t pos, usize_t size, xoff_t addr, int is_source)
3283{
3284 xd3_rinst* ri;
3285 int ret;
3286
3287 XD3_ASSERT (pos + size <= stream->avail_in);
3288
3289 if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; }
3290
3291 ri->type = XD3_CPY;
3292 ri->xtra = is_source;
3293 ri->pos = pos;
3294 ri->size = size;
3295 ri->addr = addr;
3296
3297 return 0;
3298}
3299
3300static int
3301xd3_emit_hdr (xd3_stream *stream)
3302{
3303 int ret;
3304 int use_secondary = stream->sec_type != NULL;
3305 int use_adler32 = stream->flags & XD3_ADLER32;
3306 int vcd_source = xd3_encoder_used_source (stream);
3307 uint win_ind = 0;
3308 uint del_ind = 0;
3309 usize_t enc_len;
3310 usize_t tgt_len;
3311 usize_t data_len;
3312 usize_t inst_len;
3313 usize_t addr_len;
3314
3315 XD3_ASSERT (stream->n_emit == stream->avail_in);
3316
3317 if (stream->current_window == 0)
3318 {
3319 uint hdr_ind = 0;
3320 int use_appheader = stream->enc_appheader != NULL;
3321 int use_gencodetbl = GENERIC_ENCODE_TABLES && (stream->code_table_desc != & __rfc3284_code_table_desc);
3322
3323 if (use_secondary) { hdr_ind |= VCD_SECONDARY; }
3324 if (use_gencodetbl) { hdr_ind |= VCD_CODETABLE; }
3325 if (use_appheader) { hdr_ind |= VCD_APPHEADER; }
3326
3327 if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC1)) != 0 ||
3328 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC2)) != 0 ||
3329 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC3)) != 0 ||
3330 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_VERSION)) != 0 ||
3331 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), hdr_ind)) != 0)
3332 {
3333 return ret;
3334 }
3335
3336 /* Secondary compressor ID */
3337#if SECONDARY_ANY
3338 if (use_secondary && (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->sec_type->id))) { return ret; }
3339#endif
3340
3341 /* Compressed code table */
3342 if (use_gencodetbl)
3343 {
3344 usize_t code_table_size;
3345 const uint8_t *code_table_data;
3346
3347 if ((ret = stream->comp_table_func (stream, & code_table_data, & code_table_size))) { return ret; }
3348
3349 if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), code_table_size + 2)) ||
3350 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->code_table_desc->near_modes)) ||
3351 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->code_table_desc->same_modes)) ||
3352 (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), code_table_data, code_table_size))) { return ret; }
3353 }
3354
3355 /* Application header */
3356 if (use_appheader)
3357 {
3358 if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->enc_appheadsz)) ||
3359 (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), stream->enc_appheader, stream->enc_appheadsz)))
3360 {
3361 return ret;
3362 }
3363 }
3364 }
3365
3366 /* try to compress this window */
3367#if SECONDARY_ANY
3368 if (use_secondary)
3369 {
3370 int data_sec = 0;
3371 int inst_sec = 0;
3372 int addr_sec = 0;
3373
3374# define ENCODE_SECONDARY_SECTION(UPPER,LOWER) \
3375 ((stream->flags & XD3_SEC_NO ## UPPER) == 0 && \
3376 (ret = xd3_encode_secondary (stream, & UPPER ## _HEAD (stream), & UPPER ## _TAIL (stream), \
3377 & xd3_sec_ ## LOWER (stream), \
3378 & stream->sec_ ## LOWER, & LOWER ## _sec)))
3379
3380 if (ENCODE_SECONDARY_SECTION (DATA, data) ||
3381 ENCODE_SECONDARY_SECTION (INST, inst) ||
3382 ENCODE_SECONDARY_SECTION (ADDR, addr))
3383 {
3384 return ret;
3385 }
3386
3387 del_ind |= (data_sec ? VCD_DATACOMP : 0);
3388 del_ind |= (inst_sec ? VCD_INSTCOMP : 0);
3389 del_ind |= (addr_sec ? VCD_ADDRCOMP : 0);
3390 }
3391#endif
3392
3393 /* if (vcd_target) { win_ind |= VCD_TARGET; } */
3394 if (vcd_source) { win_ind |= VCD_SOURCE; }
3395 if (use_adler32) { win_ind |= VCD_ADLER32; }
3396
3397 /* window indicator */
3398 if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), win_ind))) { return ret; }
3399
3400 /* source window */
3401 if (vcd_source)
3402 {
3403 /* or (vcd_target) { ... } */
3404 if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->src->srclen)) ||
3405 (ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->src->srcbase))) { return ret; }
3406 }
3407
3408 tgt_len = stream->avail_in;
3409 data_len = xd3_sizeof_output (DATA_HEAD (stream));
3410 inst_len = xd3_sizeof_output (INST_HEAD (stream));
3411 addr_len = xd3_sizeof_output (ADDR_HEAD (stream));
3412
3413 /* The enc_len field is redundent... doh! */
3414 enc_len = (1 + (xd3_sizeof_size (tgt_len) +
3415 xd3_sizeof_size (data_len) +
3416 xd3_sizeof_size (inst_len) +
3417 xd3_sizeof_size (addr_len)) +
3418 data_len +
3419 inst_len +
3420 addr_len +
3421 (use_adler32 ? 4 : 0));
3422
3423 if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), enc_len)) ||
3424 (ret = xd3_emit_size (stream, & HDR_TAIL (stream), tgt_len)) ||
3425 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), del_ind)) ||
3426 (ret = xd3_emit_size (stream, & HDR_TAIL (stream), data_len)) ||
3427 (ret = xd3_emit_size (stream, & HDR_TAIL (stream), inst_len)) ||
3428 (ret = xd3_emit_size (stream, & HDR_TAIL (stream), addr_len)))
3429 {
3430 return ret;
3431 }
3432
3433 if (use_adler32)
3434 {
3435 uint8_t send[4];
3436 uint32_t a32 = adler32 (1L, stream->next_in, stream->avail_in);
3437
3438 send[0] = (a32 >> 24);
3439 send[1] = (a32 >> 16);
3440 send[2] = (a32 >> 8);
3441 send[3] = (a32 & 0xff);
3442
3443 if ((ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), send, 4))) { return ret; }
3444 }
3445
3446 return 0;
3447}
3448
3449/******************************************************************************************
3450 Encode routines
3451 ******************************************************************************************/
3452
3453static int
3454xd3_encode_buffer_leftover (xd3_stream *stream)
3455{
3456 usize_t take;
3457 usize_t room;
3458
3459 /* Allocate the buffer. */
3460 if (stream->buf_in == NULL && (stream->buf_in = xd3_alloc (stream, stream->winsize, 1)) == NULL)
3461 {
3462 return ENOMEM;
3463 }
3464
3465 /* Take leftover input first. */
3466 if (stream->buf_leftover != NULL)
3467 {
3468 XD3_ASSERT (stream->buf_avail == 0);
3469 XD3_ASSERT (stream->buf_leftavail < stream->winsize);
3470
3471 IF_DEBUG1 (P(RINT "[leftover] previous %u avail %u\n", stream->buf_leftavail, stream->avail_in));
3472
3473 memcpy (stream->buf_in, stream->buf_leftover, stream->buf_leftavail);
3474
3475 stream->buf_leftover = NULL;
3476 stream->buf_avail = stream->buf_leftavail;
3477 }
3478
3479 /* Copy into the buffer. */
3480 room = stream->winsize - stream->buf_avail;
3481 take = min (room, stream->avail_in);
3482
3483 memcpy (stream->buf_in + stream->buf_avail, stream->next_in, take);
3484
3485 stream->buf_avail += take;
3486
3487 if (take < stream->avail_in)
3488 {
3489 /* Buffer is full */
3490 stream->buf_leftover = stream->next_in + take;
3491 stream->buf_leftavail = stream->avail_in - take;
3492
3493 IF_DEBUG1 (P(RINT "[leftover] take %u remaining %u\n", take, stream->buf_leftavail));
3494 }
3495 else if ((stream->buf_avail < stream->winsize) && !(stream->flags & XD3_FLUSH))
3496 {
3497 /* Buffer has space */
3498 IF_DEBUG1 (P(RINT "[leftover] %u emptied\n", take));
3499 return XD3_INPUT;
3500 }
3501
3502 /* Use the buffer: */
3503 stream->next_in = stream->buf_in;
3504 stream->avail_in = stream->buf_avail;
3505 stream->buf_avail = 0;
3506
3507 return 0;
3508}
3509
3510/* This function allocates all memory initially used by the encoder. */
3511static int
3512xd3_encode_init (xd3_stream *stream)
3513{
3514 int i;
3515 int large_comp = (stream->src != NULL);
3516 int small_comp = ! (stream->flags & XD3_NOCOMPRESS);
3517 /*int small_prev = (stream->small_chain > 1);*/
3518 int space_fact = (large_comp + small_comp);
3519 int memsize = stream->memsize;
3520
3521 /* Memory allocations for checksum tables are delayed until xd3_string_match_init in the
3522 * first call to string_match--that way identical or short inputs require no table
3523 * allocation. */
3524 if (large_comp)
3525 {
3526 xd3_size_hashtable (stream, memsize / space_fact, & stream->large_hash);
3527 }
3528
3529 if (small_comp)
3530 {
3531 xd3_size_hashtable (stream, memsize / space_fact, & stream->small_hash);
3532 }
3533
3534 for (i = 0; i < ENC_SECTS; i += 1)
3535 {
3536 if ((stream->enc_heads[i] = stream->enc_tails[i] =
3537 xd3_alloc_output (stream, NULL)) == NULL)
3538 {
3539 goto fail;
3540 }
3541 }
3542
3543 /* iopt buffer */
3544 xd3_rlist_init (& stream->iopt.used);
3545 xd3_rlist_init (& stream->iopt.free);
3546
3547 if ((stream->iopt.buffer = xd3_alloc (stream, sizeof (xd3_rinst), stream->iopt_size)) == NULL)
3548 {
3549 goto fail;
3550 }
3551
3552 for (i = 0; i < stream->iopt_size; i += 1)
3553 {
3554 xd3_rlist_push_back (& stream->iopt.free, & stream->iopt.buffer[i]);
3555 }
3556
3557 XD3_ASSERT (xd3_rlist_length (& stream->iopt.free) == stream->iopt_size);
3558 XD3_ASSERT (xd3_rlist_length (& stream->iopt.used) == 0);
3559
3560 /* address cache, code table */
3561 stream->acache.s_near = stream->code_table_desc->near_modes;
3562 stream->acache.s_same = stream->code_table_desc->same_modes;
3563 stream->code_table = stream->code_table_func ();
3564
3565 return xd3_alloc_cache (stream);
3566
3567 fail:
3568
3569 return ENOMEM;
3570}
3571
3572#if XD3_DEBUG
3573static int
3574xd3_check_sprevlist (xd3_stream *stream)
3575{
3576 int i;
3577 for (i = 0; i < stream->sprevsz; i += 1)
3578 {
3579 xd3_slist *l = & stream->small_prev[i];
3580
3581 XD3_ASSERT (l->prev->next == l);
3582 XD3_ASSERT (l->next->prev == l);
3583 }
3584 return 1;
3585}
3586#endif
3587
3588/* Called after the ENC_POSTOUT state, this puts the output buffers back into separate
3589 * lists and re-initializes some variables. (The output lists were spliced together
3590 * during the ENC_FLUSH state.) */
3591static void
3592xd3_encode_reset (xd3_stream *stream)
3593{
3594 int i;
3595 xd3_output *olist;
3596
3597 XD3_ASSERT (stream->small_prev == NULL || xd3_check_sprevlist (stream));
3598
3599 IF_DEBUG (stream->n_emit = 0);
3600 stream->avail_in = 0;
3601 stream->small_reset = 1;
3602
3603 if (stream->src != NULL)
3604 {
3605 stream->src->srcbase = 0;
3606 stream->src->srclen = 0;
3607 stream->srcwin_decided = 0;
3608 stream->match_minaddr = 0;
3609 stream->match_maxaddr = 0;
3610 stream->taroff = 0;
3611 }
3612
3613 /* Reset output chains. */
3614 olist = stream->enc_heads[0];
3615
3616 for (i = 0; i < ENC_SECTS; i += 1)
3617 {
3618 XD3_ASSERT (olist != NULL);
3619
3620 stream->enc_heads[i] = olist;
3621 stream->enc_tails[i] = olist;
3622 olist = olist->next_page;
3623
3624 stream->enc_heads[i]->next = 0;
3625 stream->enc_heads[i]->next_page = NULL;
3626
3627 stream->enc_tails[i]->next_page = NULL;
3628 stream->enc_tails[i] = stream->enc_heads[i];
3629 }
3630
3631 xd3_freelist_output (stream, olist);
3632}
3633
3634/* The main encoding routine. */
3635int
3636xd3_encode_input (xd3_stream *stream)
3637{
3638 int ret, i;
3639
3640 if (stream->dec_state != 0)
3641 {
3642 stream->msg = "encoder/decoder transition";
3643 return EINVAL;
3644 }
3645
3646 switch (stream->enc_state)
3647 {
3648 case ENC_INIT:
3649 /* Only reached on first time through: memory setup. */
3650 if ((ret = xd3_encode_init (stream))) { return ret; }
3651
3652 stream->enc_state = ENC_INPUT;
3653
3654 case ENC_INPUT:
3655
3656 /* If there is no input yet, just return. This checks for next_in == NULL, not
3657 * avail_in == 0 since zero bytes is a valid input. There is an assertion in
3658 * xd3_avail_input() that next_in != NULL for this reason. By returning right away
3659 * we avoid creating an input buffer before the caller has supplied its first data.
3660 * It is possible for xd3_avail_input to be called both before and after the first
3661 * call to xd3_encode_input(). */
3662 if (stream->next_in == NULL)
3663 {
3664 return XD3_INPUT;
3665 }
3666
3667 enc_flush:
3668 /* See if we should buffer the input: either if there is already a leftover buffer,
3669 * or if the input is short of winsize without flush. The label at this point is
3670 * reached by a goto below, when there is leftover input after postout. */
3671 if ((stream->buf_leftover != NULL) ||
3672 (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH)))
3673 {
3674 if ((ret = xd3_encode_buffer_leftover (stream))) { return ret; }
3675 }
3676
3677 /* Initalize the address cache before each window. */
3678 xd3_init_cache (& stream->acache);
3679
3680 pos_in = 0;
3681 min_match = MIN_MATCH;
3682 stream->unencoded_offset = 0;
3683
3684 stream->enc_state = ENC_SEARCH;
3685
3686 IF_DEBUG1 (P(RINT "[input window:%"Q"u] input bytes %u offset %"Q"u\n",
3687 stream->current_window, stream->avail_in, stream->total_in));
3688
3689 return XD3_WINSTART;
3690
3691 case ENC_SEARCH:
3692
3693 /* Reentrant matching. */
3694 if (stream->src != NULL)
3695 {
3696 switch (stream->match_state)
3697 {
3698 case MATCH_TARGET:
3699 /* Try matching forward at the start of the target. This is entered the
3700 * first time through, to check for a perfect match, and whenever there is a
3701 * source match that extends to the end of the previous window. The
3702 * match_srcpos field is initially zero and later set during
3703 * xd3_source_extend_match. */
3704 if (stream->avail_in > 0) {
3705 /* This call can't fail because the source window is unrestricted. */
3706 ret = xd3_source_match_setup (stream, stream->match_srcpos);
3707 XD3_ASSERT (ret == 0);
3708 stream->match_state = MATCH_FORWARD;
3709 } else {
3710 stream->match_state = MATCH_SEARCHING;
3711 }
3712 XD3_ASSERT (stream->match_fwd == 0);
3713
3714 case MATCH_FORWARD:
3715 case MATCH_BACKWARD:
3716 if (stream->avail_in != 0)
3717 {
3718 if ((ret = xd3_source_extend_match (stream)) != 0)
3719 {
3720 return ret;
3721 }
3722
3723 stream->input_position += stream->match_fwd;
3724 }
3725
3726 case MATCH_SEARCHING:
3727 /* Continue string matching. (It's possible that the initial match
3728 * continued through the entire input, in which case we're still in
3729 * MATCH_FORWARD and should remain so for the next input window.) */
3730 break;
3731 }
3732 }
3733
3734 /* String matching... */
3735 if (stream->avail_in != 0 &&
3736 (ret = stream->string_match (stream)))
3737 {
3738 return ret;
3739 }
3740
3741 /* Flush the instrution buffer, then possibly add one more instruction, then emit
3742 * the header. */
3743 stream->enc_state = ENC_FLUSH;
3744 if ((ret = xd3_iopt_flush_instructions (stream, 1)) ||
3745 (ret = xd3_iopt_add_finalize (stream)) ||
3746 (ret = xd3_emit_hdr (stream)))
3747 {
3748 return ret;
3749 }
3750
3751 /* Begin output. */
3752 stream->enc_current = HDR_HEAD (stream);
3753
3754 /* Chain all the outputs together. After doing this, it looks as if there is only
3755 * one section. The other enc_heads are set to NULL to avoid freeing them more than
3756 * once. */
3757 for (i = 1; i < ENC_SECTS; i += 1)
3758 {
3759 stream->enc_tails[i-1]->next_page = stream->enc_heads[i];
3760 stream->enc_heads[i] = NULL;
3761 }
3762
3763 enc_output:
3764
3765 stream->enc_state = ENC_POSTOUT;
3766 stream->next_out = stream->enc_current->base;
3767 stream->avail_out = stream->enc_current->next;
3768 stream->total_out += (xoff_t) stream->avail_out;
3769
3770 /* If there is any output in this buffer, return it, otherwise fall through to
3771 * handle the next buffer or finish the window after all buffers have been
3772 * output. */
3773 if (stream->avail_out > 0)
3774 {
3775 /* This is the only place xd3_encode returns XD3_OUTPUT */
3776 return XD3_OUTPUT;
3777 }
3778
3779 case ENC_POSTOUT:
3780
3781 if (stream->avail_out != 0)
3782 {
3783 stream->msg = "missed call to consume output";
3784 return EINVAL;
3785 }
3786
3787 /* Continue outputting one buffer at a time, until the next is NULL. */
3788 if ((stream->enc_current = stream->enc_current->next_page) != NULL)
3789 {
3790 goto enc_output;
3791 }
3792
3793 stream->total_in += (xoff_t) stream->avail_in;
3794 stream->enc_state = ENC_POSTWIN;
3795
3796 return XD3_WINFINISH;
3797
3798 case ENC_POSTWIN:
3799
3800 xd3_encode_reset (stream);
3801
3802 stream->current_window += 1;
3803 stream->enc_state = ENC_INPUT;
3804
3805 /* If there is leftover input to flush, repeat. */
3806 if ((stream->buf_leftover != NULL) && (stream->flags & XD3_FLUSH))
3807 {
3808 goto enc_flush;
3809 }
3810
3811 /* Ready for more input. */
3812 return XD3_INPUT;
3813
3814 default:
3815 stream->msg = "invalid state";
3816 return EINVAL;
3817 }
3818}
3819#endif /* XD3_ENCODER */
3820
3821/******************************************************************************************
3822 Client convenience functions
3823 ******************************************************************************************/
3824
3825/* This function invokes either encode or decode to and from in-memory arrays. The output array
3826 * must be large enough to hold the output or else ENOSPC is returned. */
3827static int
3828xd3_process_completely (xd3_stream *stream,
3829 int (*func) (xd3_stream *),
3830 int close_stream,
3831 const uint8_t *input,
3832 usize_t input_size,
3833 uint8_t *output,
3834 usize_t *output_size,
3835 usize_t avail_size)
3836{
3837 (*output_size) = 0;
3838
3839 stream->flags |= XD3_FLUSH;
3840
3841 xd3_avail_input (stream, input, input_size);
3842
3843 for (;;)
3844 {
3845 int ret;
3846 switch((ret = func (stream)))
3847 {
3848 case XD3_OUTPUT: { /* memcpy below */ break; }
3849 case XD3_INPUT: { /* this means EOF */ goto done; }
3850 case XD3_GOTHEADER: { /* ignore */ continue; }
3851 case XD3_WINSTART: { /* ignore */ continue; }
3852 case XD3_WINFINISH: { /* ignore */ continue; }
3853 case XD3_GETSRCBLK:
3854 {
3855 stream->msg = "stream requires source input";
3856 return EINVAL;
3857 }
3858 case 0: /* there is no plain "success" return for xd3_encode/decode */
3859 XD3_ASSERT (ret != 0);
3860 default:
3861 return ret;
3862 }
3863
3864 if (*output_size + stream->avail_out > avail_size)
3865 {
3866 stream->msg = "insufficient output space";
3867 return ENOSPC;
3868 }
3869
3870 memcpy (output + *output_size, stream->next_out, stream->avail_out);
3871
3872 *output_size += stream->avail_out;
3873
3874 xd3_consume_output (stream);
3875 }
3876 done:
3877 return (close_stream == 0) ? 0 : xd3_close_stream (stream);
3878}
3879
3880int
3881xd3_decode_completely (xd3_stream *stream,
3882 const uint8_t *input,
3883 usize_t input_size,
3884 uint8_t *output,
3885 usize_t *output_size,
3886 usize_t avail_size)
3887{
3888 return xd3_process_completely (stream, & xd3_decode_input, 1,
3889 input, input_size,
3890 output, output_size, avail_size);
3891}
3892
3893#if XD3_ENCODER
3894int
3895xd3_encode_completely (xd3_stream *stream,
3896 const uint8_t *input,
3897 usize_t input_size,
3898 uint8_t *output,
3899 usize_t *output_size,
3900 usize_t avail_size)
3901{
3902 return xd3_process_completely (stream, & xd3_encode_input, 1,
3903 input, input_size,
3904 output, output_size, avail_size);
3905}
3906#endif
3907
3908/******************************************************************************************
3909 DECODE stuff
3910 ******************************************************************************************/
3911
3912/* Return true if the caller must provide a source. Theoretically, this has to be checked
3913 * after every window. It could be that the first window requires no source, but the
3914 * second window does. In practice? */
3915int xd3_decoder_needs_source (xd3_stream *stream)
3916{
3917 return stream->dec_win_ind & VCD_SOURCE;
3918}
3919
3920/* Initialize the decoder for a new window. The dec_tgtlen value is preserved across
3921 * successive window decodings, and the update to dec_winstart is delayed until a new
3922 * window actually starts. This is to avoid throwing an error due to overflow until the
3923 * last possible moment. This makes it possible to encode exactly 4GB through a 32-bit
3924 * encoder. */
3925static int
3926xd3_decode_init_window (xd3_stream *stream)
3927{
3928 stream->dec_cpylen = 0;
3929 stream->dec_cpyoff = 0;
3930 stream->dec_cksumbytes = 0;
3931
3932 xd3_init_cache (& stream->acache);
3933
3934 return 0;
3935}
3936
3937/* Allocates buffer space for the target window and possibly the VCD_TARGET copy-window.
3938 * Also sets the base of the two copy segments. */
3939static int
3940xd3_decode_setup_buffers (xd3_stream *stream)
3941{
3942 /* If VCD_TARGET is set then the previous buffer may be reused. */
3943 if (stream->dec_win_ind & VCD_TARGET)
3944 {
3945 /* But this implementation only supports copying from the last target window. If the
3946 * offset is outside that range, it can't be done. */
3947 if (stream->dec_cpyoff < stream->dec_laststart)
3948 {
3949 stream->msg = "unsupported VCD_TARGET offset";
3950 return EINVAL;
3951 }
3952
3953 /* See if the two windows are the same. This indicates the first time VCD_TARGET is
3954 * used. This causes a second buffer to be allocated, after that the two are
3955 * swapped in the DEC_FINISH case. */
3956 if (stream->dec_lastwin == stream->next_out)
3957 {
3958 stream->next_out = NULL;
3959 stream->space_out = 0;
3960 }
3961
3962 stream->dec_cpyaddrbase = stream->dec_lastwin + (usize_t) (stream->dec_cpyoff - stream->dec_laststart);
3963 }
3964
3965 /* See if the current output window is large enough. */
3966 if (stream->space_out < stream->dec_tgtlen)
3967 {
3968 xd3_free (stream, stream->dec_buffer);
3969
3970 stream->space_out = xd3_round_blksize (stream->dec_tgtlen, XD3_ALLOCSIZE);
3971
3972 if ((stream->dec_buffer = xd3_alloc (stream, stream->space_out, 1)) == NULL)
3973 {
3974 return ENOMEM;
3975 }
3976
3977 stream->next_out = stream->dec_buffer;
3978 }
3979
3980 /* dec_tgtaddrbase refers to an invalid base address, but it is always used with a
3981 * sufficiently large instruction offset (i.e., beyond the copy window). This condition
3982 * is enforced by xd3_decode_output_halfinst. */
3983 stream->dec_tgtaddrbase = stream->next_out - stream->dec_cpylen;
3984
3985 return 0;
3986}
3987
3988static int
3989xd3_decode_allocate (xd3_stream *stream,
3990 usize_t size,
3991 uint8_t **copied1,
3992 usize_t *alloc1,
3993 uint8_t **copied2,
3994 usize_t *alloc2)
3995{
3996 if (*copied1 != NULL && *alloc1 < size)
3997 {
3998 xd3_free (stream, *copied1);
3999 *copied1 = NULL;
4000 }
4001
4002 if (*copied1 == NULL)
4003 {
4004#if SECONDARY_ANY
4005 /* Borrow from the secondary compressor's allocation. */
4006 if (copied2 != NULL && *copied2 != NULL && *alloc2 < size)
4007 {
4008 *copied1 = *copied2;
4009 *alloc1 = *alloc2;
4010 *copied2 = NULL;
4011 *alloc2 = 0;
4012 }
4013 else
4014#endif
4015 {
4016 *alloc1 = xd3_round_blksize (size, XD3_ALLOCSIZE);
4017
4018 if ((*copied1 = xd3_alloc (stream, *alloc1, 1)) == NULL)
4019 {
4020 return ENOMEM;
4021 }
4022 }
4023 }
4024
4025 return 0;
4026}
4027
4028static int
4029xd3_decode_section (xd3_stream *stream,
4030 xd3_desect *section,
4031 xd3_decode_state nstate,
4032 int copy)
4033{
4034 XD3_ASSERT (section->pos <= section->size);
4035 XD3_ASSERT (stream->dec_state != nstate);
4036
4037 if (section->pos < section->size)
4038 {
4039 usize_t sect_take;
4040
4041 if (stream->avail_in == 0)
4042 {
4043 return XD3_INPUT;
4044 }
4045
4046 if ((copy == 0) && (section->pos == 0))
4047 {
4048 /* No allocation/copy needed */
4049 section->buf = stream->next_in;
4050 sect_take = section->size;
4051 }
4052 else
4053 {
4054 usize_t sect_need = section->size - section->pos;
4055
4056 /* Allocate and copy */
4057 sect_take = min (sect_need, stream->avail_in);
4058
4059 if (section->pos == 0)
4060 {
4061 int ret;
4062
4063 if ((ret = xd3_decode_allocate (stream,
4064 section->size,
4065 & section->copied1,
4066 & section->alloc1,
4067 & section->copied2,
4068 & section->alloc2))) { return ret; }
4069
4070 section->buf = section->copied1;
4071 }
4072
4073 memcpy (section->copied1 + section->pos,
4074 stream->next_in,
4075 sect_take);
4076 }
4077
4078 section->pos += sect_take;
4079
4080 stream->dec_winbytes += sect_take;
4081
4082 DECODE_INPUT (sect_take);
4083 }
4084
4085 if (section->pos < section->size)
4086 {
4087 stream->msg = "further input required";
4088 return XD3_INPUT;
4089 }
4090
4091 XD3_ASSERT (section->pos == section->size);
4092
4093 stream->dec_state = nstate;
4094 section->buf_max = section->buf + section->size;
4095 section->pos = 0;
4096 return 0;
4097}
4098
4099/* Decode the size and address for half of an instruction (i.e., a single opcode). This
4100 * updates the stream->dec_position, which are bytes already output prior to processing
4101 * this instruction. Perform bounds checking for sizes and copy addresses, which uses the
4102 * dec_position (which is why these checks are done here). */
4103static int
4104xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst)
4105{
4106 int ret;
4107
4108 /* If the size from the instruction table is zero then read a size value. */
4109 if ((inst->size == 0) &&
4110 (ret = xd3_read_size (stream,
4111 & stream->inst_sect.buf,
4112 stream->inst_sect.buf_max,
4113
4114 & inst->size)))
4115 {
4116 return EINVAL;
4117 }
4118
4119 /* For copy instructions, read address. */
4120 if (inst->type >= XD3_CPY)
4121 {
4122 IF_DEBUG1 ({
4123 static int cnt = 0;
4124 P(RINT "DECODE:%u: COPY at %"Q"u (winoffset %u) size %u winaddr %u\n",
4125 cnt++,
4126 stream->total_out + (stream->dec_position - stream->dec_cpylen),
4127 (stream->dec_position - stream->dec_cpylen),
4128 inst->size,
4129 inst->addr);
4130 });
4131
4132 if ((ret = xd3_decode_address (stream,
4133 stream->dec_position,
4134 inst->type - XD3_CPY,
4135 & stream->addr_sect.buf,
4136 stream->addr_sect.buf_max,
4137 & inst->addr)))
4138 {
4139 return ret;
4140 }
4141
4142 /* Cannot copy an address before it is filled-in. */
4143 if (inst->addr >= stream->dec_position)
4144 {
4145 stream->msg = "address too large";
4146 return EINVAL;
4147 }
4148
4149 /* Check: a VCD_TARGET or VCD_SOURCE copy cannot exceed the remaining buffer space
4150 * in its own segment. */
4151 if (inst->addr < stream->dec_cpylen && inst->addr + inst->size > stream->dec_cpylen)
4152 {
4153 stream->msg = "size too large";
4154 return EINVAL;
4155 }
4156 }
4157 else
4158 {
4159 IF_DEBUG1 ({
4160 if (inst->type == XD3_ADD)
4161 {
4162 static int cnt;
4163 P(RINT "DECODE:%d: ADD at %"Q"u (winoffset %u) size %u\n",
4164 cnt++,
4165 stream->total_out + stream->dec_position - stream->dec_cpylen,
4166 stream->dec_position - stream->dec_cpylen,
4167 inst->size);
4168 }
4169 else
4170 {
4171 static int cnt;
4172 XD3_ASSERT (inst->type == XD3_RUN);
4173 P(RINT "DECODE:%d: RUN at %"Q"u (winoffset %u) size %u\n",
4174 cnt++,
4175 stream->total_out + stream->dec_position - stream->dec_cpylen,
4176 stream->dec_position - stream->dec_cpylen,
4177 inst->size);
4178 }
4179 });
4180 }
4181
4182 /* Check: The instruction will not overflow the output buffer. */
4183 if (stream->dec_position + inst->size > stream->dec_maxpos)
4184 {
4185 stream->msg = "size too large";
4186 return EINVAL;
4187 }
4188
4189 stream->dec_position += inst->size;
4190 return 0;
4191}
4192
4193/* Decode a single opcode and then decode the two half-instructions. */
4194static int
4195xd3_decode_instruction (xd3_stream *stream)
4196{
4197 int ret;
4198 const xd3_dinst *inst;
4199
4200 if (stream->inst_sect.buf == stream->inst_sect.buf_max)
4201 {
4202 stream->msg = "instruction underflow";
4203 return EINVAL;
4204 }
4205
4206 inst = &stream->code_table[*stream->inst_sect.buf++];
4207
4208 stream->dec_current1.type = inst->type1;
4209 stream->dec_current2.type = inst->type2;
4210 stream->dec_current1.size = inst->size1;
4211 stream->dec_current2.size = inst->size2;
4212
4213 /* For each instruction with a real operation, decode the corresponding size and
4214 * addresses if necessary. Assume a code-table may have NOOP in either position,
4215 * although this is unlikely. */
4216 if (inst->type1 != XD3_NOOP && (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current1)))
4217 {
4218 return ret;
4219 }
4220 if (inst->type2 != XD3_NOOP && (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current2)))
4221 {
4222 return ret;
4223 }
4224 return 0;
4225}
4226
4227/* Output the result of a single half-instruction. OPT: This the decoder hotspot. */
4228static int
4229xd3_decode_output_halfinst (xd3_stream *stream, xd3_hinst *inst)
4230{
4231 /* To make this reentrant, set take = min (inst->size, available space)... */
4232 usize_t take = inst->size;
4233
4234 XD3_ASSERT (inst->type != XD3_NOOP);
4235
4236 switch (inst->type)
4237 {
4238 case XD3_RUN:
4239 {
4240 /* Only require a single data byte. */
4241 if (stream->data_sect.buf == stream->data_sect.buf_max)
4242 {
4243 stream->msg = "data underflow";
4244 return EINVAL;
4245 }
4246
4247 /* TUNE: Probably want to eliminate memset/memcpy here */
4248 memset (stream->next_out + stream->avail_out,
4249 stream->data_sect.buf[0],
4250 take);
4251
4252 stream->data_sect.buf += 1;
4253 stream->avail_out += take;
4254 inst->type = XD3_NOOP;
4255 break;
4256 }
4257 case XD3_ADD:
4258 {
4259 /* Require at least TAKE data bytes. */
4260 if (stream->data_sect.buf + take > stream->data_sect.buf_max)
4261 {
4262 stream->msg = "data underflow";
4263 return EINVAL;
4264 }
4265
4266 memcpy (stream->next_out + stream->avail_out,
4267 stream->data_sect.buf,
4268 take);
4269
4270 stream->data_sect.buf += take;
4271 stream->avail_out += take;
4272 inst->type = XD3_NOOP;
4273 break;
4274 }
4275 default:
4276 {
4277 usize_t i;
4278 const uint8_t *src;
4279 uint8_t *dst;
4280
4281 /* See if it copies from the VCD_TARGET/VCD_SOURCE window or the target window.
4282 * Out-of-bounds checks for the addresses and sizes are performed in
4283 * xd3_decode_parse_halfinst. */
4284 if (inst->addr < stream->dec_cpylen)
4285 {
4286 if (stream->dec_win_ind & VCD_TARGET)
4287 {
4288 /* For VCD_TARGET we know the entire range is in-memory, as established by
4289 * decode_setup_buffers. */
4290 src = stream->dec_cpyaddrbase + inst->addr;
4291 inst->type = XD3_NOOP;
4292 inst->size = 0;
4293 }
4294 else
4295 {
4296 /* In this case we have to read a source block, which could return control
4297 * to the caller. We need to know the first block number needed for this
4298 * copy. */
4299 xd3_source *source;
4300 xoff_t block;
4301 usize_t blkoff;
4302 usize_t blksize;
4303 int ret;
4304
4305 more:
4306
4307 source = stream->src;
4308 block = source->cpyoff_blocks;
4309 blkoff = source->cpyoff_blkoff + inst->addr;
4310 blksize = source->blksize;
4311
4312 while (blkoff >= blksize)
4313 {
4314 block += 1;
4315 blkoff -= blksize;
4316 }
4317
4318 if ((ret = xd3_getblk (stream, block)))
4319 {
4320 /* could be a XD3_GETSRCBLK failure. */
4321 return ret;
4322 }
4323
4324 src = source->curblk + blkoff;
4325
4326 /* This block either contains enough data or the source file is
4327 * short. */
4328 if ((source->onblk != blksize) && (blkoff + take > source->onblk))
4329 {
4330 stream->msg = "source file too short";
4331 return EINVAL;
4332
4333 }
4334
4335 XD3_ASSERT (blkoff != blksize);
4336
4337 if (blkoff + take <= blksize)
4338 {
4339 inst->type = XD3_NOOP;
4340 inst->size = 0;
4341 }
4342 else
4343 {
4344 /* This block doesn't contain all the data, modify the instruction, do
4345 * not set to XD3_NOOP. */
4346 take = blksize - blkoff;
4347 inst->size -= take;
4348 inst->addr += take;
4349 }
4350 }
4351 }
4352 else
4353 {
4354 /* For a target-window copy, we know the entire range is in-memory. The
4355 * dec_tgtaddrbase is negatively offset by dec_cpylen because the addresses
4356 * start beyond that point. */
4357 src = stream->dec_tgtaddrbase + inst->addr;
4358 inst->type = XD3_NOOP;
4359 inst->size = 0;
4360 }
4361
4362 dst = stream->next_out + stream->avail_out;
4363
4364 stream->avail_out += take;
4365
4366 /* Can't just memcpy here due to possible overlap. */
4367 for (i = take; i != 0; i -= 1)
4368 {
4369 *dst++ = *src++;
4370 }
4371
4372 take = inst->size;
4373
4374 /* If there is more to copy, call getblk again. */
4375 if (inst->type != XD3_NOOP)
4376 {
4377 XD3_ASSERT (take > 0);
4378 goto more;
4379 }
4380 else
4381 {
4382 XD3_ASSERT (take == 0);
4383 }
4384 }
4385 }
4386
4387 return 0;
4388}
4389
4390static int
4391xd3_decode_finish_window (xd3_stream *stream)
4392{
4393 stream->dec_winbytes = 0;
4394 stream->dec_state = DEC_FINISH;
4395
4396 stream->data_sect.pos = 0;
4397 stream->inst_sect.pos = 0;
4398 stream->addr_sect.pos = 0;
4399
4400 return XD3_OUTPUT;
4401}
4402
4403static int
4404xd3_decode_sections (xd3_stream *stream)
4405{
4406 usize_t need, more, take;
4407 int copy, ret;
4408
4409 if ((stream->flags & XD3_JUST_HDR) != 0)
4410 {
4411 /* Nothing left to do. */
4412 return xd3_decode_finish_window (stream);
4413 }
4414
4415 /* To avoid copying, need this much data available */
4416 need = (stream->inst_sect.size +
4417 stream->addr_sect.size +
4418 stream->data_sect.size);
4419
4420 /* The window may be entirely processed. */
4421 XD3_ASSERT (stream->dec_winbytes <= need);
4422
4423 /* Compute how much more input is needed. */
4424 more = (need - stream->dec_winbytes);
4425
4426 /* How much to consume. */
4427 take = min (more, stream->avail_in);
4428
4429 /* See if the input is completely available, to avoid copy. */
4430 copy = (take != more);
4431
4432 /* If the window is skipped... */
4433 if ((stream->flags & XD3_SKIP_WINDOW) != 0)
4434 {
4435 /* Skip the available input. */
4436 DECODE_INPUT (take);
4437
4438 stream->dec_winbytes += take;
4439
4440 if (copy)
4441 {
4442 stream->msg = "further input required";
4443 return XD3_INPUT;
4444 }
4445
4446 return xd3_decode_finish_window (stream);
4447 }
4448
4449 /* Process all but the DATA section. */
4450 switch (stream->dec_state)
4451 {
4452 default:
4453 stream->msg = "internal error";
4454 return EINVAL;
4455
4456 case DEC_DATA:
4457 if ((ret = xd3_decode_section (stream, & stream->data_sect, DEC_INST, copy))) { return ret; }
4458 case DEC_INST:
4459 if ((ret = xd3_decode_section (stream, & stream->inst_sect, DEC_ADDR, copy))) { return ret; }
4460 case DEC_ADDR:
4461 if ((ret = xd3_decode_section (stream, & stream->addr_sect, DEC_EMIT, copy))) { return ret; }
4462 }
4463
4464 XD3_ASSERT (stream->dec_winbytes == need);
4465
4466#if SECONDARY_ANY
4467#define DECODE_SECONDARY_SECTION(UPPER,LOWER) \
4468 ((stream->dec_del_ind & VCD_ ## UPPER ## COMP) && \
4469 (ret = xd3_decode_secondary (stream, & stream-> LOWER ## _sect, \
4470 & xd3_sec_ ## LOWER (stream))))
4471
4472 if (DECODE_SECONDARY_SECTION (DATA, data) ||
4473 DECODE_SECONDARY_SECTION (INST, inst) ||
4474 DECODE_SECONDARY_SECTION (ADDR, addr))
4475 {
4476 return ret;
4477 }
4478#endif
4479
4480 if (stream->flags & XD3_SKIP_EMIT)
4481 {
4482 return xd3_decode_finish_window (stream);
4483 }
4484
4485 /* OPT: A possible optimization is to avoid allocating memory in decode_setup_buffers
4486 * and to avoid a large memcpy when the window consists of a single VCD_SOURCE copy
4487 * instruction. The only potential problem is if the following window is a VCD_TARGET,
4488 * then you need to remember... */
4489 if ((ret = xd3_decode_setup_buffers (stream))) { return ret; }
4490
4491 return 0;
4492}
4493
4494static int
4495xd3_decode_emit (xd3_stream *stream)
4496{
4497 int ret;
4498
4499 /* Produce output: originally structured to allow reentrant code that fills as much of
4500 * the output buffer as possible, but VCDIFF semantics allows to copy from anywhere from
4501 * the target window, so instead allocate a sufficiently sized buffer after the target
4502 * window length is decoded.
4503 *
4504 * This code still needs to be reentrant to allow XD3_GETSRCBLK to return control. This
4505 * is handled by setting the stream->dec_currentN instruction types to XD3_NOOP after
4506 * they have been processed. */
4507 XD3_ASSERT (! (stream->flags & XD3_SKIP_EMIT));
4508 XD3_ASSERT (stream->avail_out == 0);
4509 XD3_ASSERT (stream->dec_tgtlen <= stream->space_out);
4510
4511 while (stream->inst_sect.buf != stream->inst_sect.buf_max)
4512 {
4513 /* Decode next instruction pair. */
4514 if ((stream->dec_current1.type == XD3_NOOP) &&
4515 (stream->dec_current2.type == XD3_NOOP) &&
4516 (ret = xd3_decode_instruction (stream))) { return ret; }
4517
4518 /* Output for each instruction. */
4519 if ((stream->dec_current1.type != XD3_NOOP) &&
4520 (ret = xd3_decode_output_halfinst (stream, & stream->dec_current1))) { return ret; }
4521
4522 if ((stream->dec_current2.type != XD3_NOOP) &&
4523 (ret = xd3_decode_output_halfinst (stream, & stream->dec_current2))) { return ret; }
4524 }
4525
4526 if (stream->avail_out != stream->dec_tgtlen)
4527 {
4528 IF_DEBUG1 (P(RINT "AVAIL_OUT(%d) != DEC_TGTLEN(%d)\n", stream->avail_out, stream->dec_tgtlen));
4529 stream->msg = "wrong window length";
4530 return EINVAL;
4531 }
4532
4533 if (stream->data_sect.buf != stream->data_sect.buf_max)
4534 {
4535 stream->msg = "extra data section";
4536 return EINVAL;
4537 }
4538
4539 if (stream->addr_sect.buf != stream->addr_sect.buf_max)
4540 {
4541 stream->msg = "extra address section";
4542 return EINVAL;
4543 }
4544
4545 /* OPT: Should cksum computation be combined with the above loop? */
4546 if ((stream->dec_win_ind & VCD_ADLER32) != 0 &&
4547 (stream->flags & XD3_ADLER32_NOVER) == 0)
4548 {
4549 uint32_t a32 = adler32 (1L, stream->next_out, stream->avail_out);
4550
4551 if (a32 != stream->dec_adler32)
4552 {
4553 stream->msg = "target window checksum mismatch";
4554 return EINVAL;
4555 }
4556 }
4557
4558 /* Finished with a window. */
4559 return xd3_decode_finish_window (stream);
4560}
4561
4562int
4563xd3_decode_input (xd3_stream *stream)
4564{
4565 int ret;
4566
4567 if (stream->enc_state != 0)
4568 {
4569 stream->msg = "encoder/decoder transition";
4570 return EINVAL;
4571 }
4572
4573#define BYTE_CASE(expr,x,nstate) \
4574 do { \
4575 if ( (expr) && \
4576 ((ret = xd3_decode_byte (stream, & (x))) != 0) ) { return ret; } \
4577 stream->dec_state = (nstate); \
4578 } while (0)
4579
4580#define OFFSET_CASE(expr,x,nstate) \
4581 do { \
4582 if ( (expr) && \
4583 ((ret = xd3_decode_offset (stream, & (x))) != 0) ) { return ret; } \
4584 stream->dec_state = (nstate); \
4585 } while (0)
4586
4587#define SIZE_CASE(expr,x,nstate) \
4588 do { \
4589 if ( (expr) && \
4590 ((ret = xd3_decode_size (stream, & (x))) != 0) ) { return ret; } \
4591 stream->dec_state = (nstate); \
4592 } while (0)
4593
4594#define SRCORTGT(x) (((x) & VCD_SRCORTGT) == VCD_SOURCE || \
4595 ((x) & VCD_SRCORTGT) == VCD_TARGET)
4596
4597 switch (stream->dec_state)
4598 {
4599 case DEC_VCHEAD:
4600 {
4601 if ((ret = xd3_decode_bytes (stream, stream->dec_magic, & stream->dec_magicbytes, 4))) { return ret; }
4602
4603 if (stream->dec_magic[0] != VCDIFF_MAGIC1 ||
4604 stream->dec_magic[1] != VCDIFF_MAGIC2 ||
4605 stream->dec_magic[2] != VCDIFF_MAGIC3)
4606 {
4607 stream->msg = "not a VCDIFF input";
4608 return EINVAL;
4609 }
4610
4611 if (stream->dec_magic[3] != 0)
4612 {
4613 stream->msg = "VCDIFF input version > 0 is not supported";
4614 return EINVAL;
4615 }
4616
4617 stream->dec_state = DEC_HDRIND;
4618 }
4619 case DEC_HDRIND:
4620 {
4621 if ((ret = xd3_decode_byte (stream, & stream->dec_hdr_ind))) { return ret; }
4622
4623 if ((stream->dec_hdr_ind & VCD_INVHDR) != 0)
4624 {
4625 stream->msg = "unrecognized header indicator bits set";
4626 return EINVAL;
4627 }
4628
4629 stream->dec_state = DEC_SECONDID;
4630 }
4631
4632 case DEC_SECONDID:
4633 /* Secondary compressor ID: only if VCD_SECONDARY is set */
4634 if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0)
4635 {
4636 BYTE_CASE (1, stream->dec_secondid, DEC_TABLEN);
4637
4638 switch (stream->dec_secondid)
4639 {
4640 case VCD_FGK_ID:
4641 FGK_CASE (stream);
4642 case VCD_DJW_ID:
4643 DJW_CASE (stream);
4644 default:
4645 stream->msg = "unknown secondary compressor ID";
4646 return EINVAL;
4647 }
4648 }
4649
4650 case DEC_TABLEN:
4651 /* Length of code table data: only if VCD_CODETABLE is set */
4652 SIZE_CASE ((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->dec_codetblsz, DEC_NEAR);
4653
4654 /* The codetblsz counts the two NEAR/SAME bytes */
4655 if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) {
4656 if (stream->dec_codetblsz <= 2) {
4657 stream->msg = "invalid code table size";
4658 return ENOMEM;
4659 }
4660 stream->dec_codetblsz -= 2;
4661 }
4662 case DEC_NEAR:
4663 /* Near modes: only if VCD_CODETABLE is set */
4664 BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->acache.s_near, DEC_SAME);
4665 case DEC_SAME:
4666 /* Same modes: only if VCD_CODETABLE is set */
4667 BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->acache.s_same, DEC_TABDAT);
4668 case DEC_TABDAT:
4669 /* Compressed code table data */
4670
4671 if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0)
4672 {
4673 /* Get the code table data. */
4674 if ((stream->dec_codetbl == NULL) &&
4675 (stream->dec_codetbl = xd3_alloc (stream, stream->dec_codetblsz, 1)) == NULL) { return ENOMEM; }
4676
4677 if ((ret = xd3_decode_bytes (stream, stream->dec_codetbl, & stream->dec_codetblbytes, stream->dec_codetblsz)))
4678 {
4679 return ret;
4680 }
4681
4682 if ((ret = xd3_apply_table_encoding (stream, stream->dec_codetbl, stream->dec_codetblbytes)))
4683 {
4684 return ret;
4685 }
4686 }
4687 else
4688 {
4689 /* Use the default table. */
4690 stream->acache.s_near = __rfc3284_code_table_desc.near_modes;
4691 stream->acache.s_same = __rfc3284_code_table_desc.same_modes;
4692 stream->code_table = xd3_rfc3284_code_table ();
4693 }
4694
4695 if ((ret = xd3_alloc_cache (stream))) { return ret; }
4696
4697 stream->dec_state = DEC_APPLEN;
4698
4699 case DEC_APPLEN:
4700 /* Length of application data */
4701 SIZE_CASE((stream->dec_hdr_ind & VCD_APPHEADER) != 0, stream->dec_appheadsz, DEC_APPDAT);
4702
4703 case DEC_APPDAT:
4704 /* Application data */
4705 if (stream->dec_hdr_ind & VCD_APPHEADER)
4706 {
4707 /* Note: we add an additional byte for padding, to allow 0-termination. */
4708 if ((stream->dec_appheader == NULL) &&
4709 (stream->dec_appheader = xd3_alloc (stream, stream->dec_appheadsz+1, 1)) == NULL) { return ENOMEM; }
4710
4711 stream->dec_appheader[stream->dec_appheadsz] = 0;
4712
4713 if ((ret = xd3_decode_bytes (stream, stream->dec_appheader, & stream->dec_appheadbytes, stream->dec_appheadsz)))
4714 {
4715 return ret;
4716 }
4717 }
4718
4719 stream->dec_hdrsize = stream->total_in;
4720 stream->dec_state = DEC_WININD;
4721
4722 case DEC_WININD:
4723 {
4724 /* Start of a window: the window indicator */
4725
4726 if ((ret = xd3_decode_byte (stream, & stream->dec_win_ind))) { return ret; }
4727
4728 stream->current_window = stream->dec_window_count;
4729
4730 if (XOFF_T_OVERFLOW (stream->dec_winstart, stream->dec_tgtlen))
4731 {
4732 stream->msg = "decoder file offset overflow";
4733 return EINVAL;
4734 }
4735
4736 stream->dec_winstart += stream->dec_tgtlen;
4737
4738 if ((stream->dec_win_ind & VCD_INVWIN) != 0)
4739 {
4740 stream->msg = "unrecognized window indicator bits set";
4741 return EINVAL;
4742 }
4743
4744 if ((ret = xd3_decode_init_window (stream))) { return ret; }
4745
4746 stream->dec_state = DEC_CPYLEN;
4747
4748 IF_DEBUG1 (P(RINT "--------- TARGET WINDOW %"Q"u ------------------\n", stream->current_window));
4749 }
4750
4751 case DEC_CPYLEN:
4752 /* Copy window length: only if VCD_SOURCE or VCD_TARGET is set */
4753 SIZE_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpylen, DEC_CPYOFF);
4754
4755 /* Set the initial, logical decoder position (HERE address) in dec_position. This
4756 * is set to just after the source/copy window, as we are just about to output the
4757 * first byte of target window. */
4758 stream->dec_position = stream->dec_cpylen;
4759
4760 case DEC_CPYOFF:
4761 /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */
4762 OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff, DEC_ENCLEN);
4763
4764 /* Copy offset and copy length may not overflow. */
4765 if (XOFF_T_OVERFLOW (stream->dec_cpyoff, stream->dec_cpylen))
4766 {
4767 stream->msg = "decoder copy window overflows a file offset";
4768 return EINVAL;
4769 }
4770
4771 /* Check copy window bounds: VCD_TARGET window may not exceed current position. */
4772 if ((stream->dec_win_ind & VCD_TARGET) &&
4773 (stream->dec_cpyoff + (xoff_t) stream->dec_cpylen > stream->dec_winstart))
4774 {
4775 stream->msg = "VCD_TARGET window out of bounds";
4776 return EINVAL;
4777 }
4778
4779 case DEC_ENCLEN:
4780 /* Length of the delta encoding */
4781 SIZE_CASE(1, stream->dec_enclen, DEC_TGTLEN);
4782 case DEC_TGTLEN:
4783 /* Length of target window */
4784 SIZE_CASE(1, stream->dec_tgtlen, DEC_DELIND);
4785
4786 /* Set the maximum decoder position, beyond which we should not decode any data.
4787 * This is the maximum value for dec_position. This may not exceed the size of a
4788 * usize_t. */
4789 if (USIZE_T_OVERFLOW (stream->dec_cpylen, stream->dec_tgtlen))
4790 {
4791 stream->msg = "decoder target window overflows a usize_t";
4792 return EINVAL;
4793 }
4794
4795 /* Check for malicious files. */
4796 if (stream->dec_tgtlen > XD3_HARDMAXWINSIZE)
4797 {
4798 stream->msg = "hard window size exceeded";
4799 return EINVAL;
4800 }
4801
4802 stream->dec_maxpos = stream->dec_cpylen + stream->dec_tgtlen;
4803
4804 case DEC_DELIND:
4805 /* Delta indicator */
4806 BYTE_CASE(1, stream->dec_del_ind, DEC_DATALEN);
4807
4808 if ((stream->dec_del_ind & VCD_INVDEL) != 0)
4809 {
4810 stream->msg = "unrecognized delta indicator bits set";
4811 return EINVAL;
4812 }
4813
4814 /* Delta indicator is only used with secondary compression. */
4815 if ((stream->dec_del_ind != 0) && (stream->sec_type == NULL))
4816 {
4817 stream->msg = "invalid delta indicator bits set";
4818 return EINVAL;
4819 }
4820
4821 /* Section lengths */
4822 case DEC_DATALEN:
4823 SIZE_CASE(1, stream->data_sect.size, DEC_INSTLEN);
4824 case DEC_INSTLEN:
4825 SIZE_CASE(1, stream->inst_sect.size, DEC_ADDRLEN);
4826 case DEC_ADDRLEN:
4827 SIZE_CASE(1, stream->addr_sect.size, DEC_CKSUM);
4828
4829 case DEC_CKSUM:
4830 /* Window checksum. */
4831 if ((stream->dec_win_ind & VCD_ADLER32) != 0)
4832 {
4833 int i;
4834
4835 if ((ret = xd3_decode_bytes (stream, stream->dec_cksum, & stream->dec_cksumbytes, 4))) { return ret; }
4836
4837 for (i = 0; i < 4; i += 1)
4838 {
4839 stream->dec_adler32 = (stream->dec_adler32 << 8) | stream->dec_cksum[i];
4840 }
4841 }
4842
4843 stream->dec_state = DEC_DATA;
4844
4845 /* Check dec_enclen for redundency, otherwise it is not really used. */
4846 {
4847 usize_t enclen_check = (1 + (xd3_sizeof_size (stream->dec_tgtlen) +
4848 xd3_sizeof_size (stream->data_sect.size) +
4849 xd3_sizeof_size (stream->inst_sect.size) +
4850 xd3_sizeof_size (stream->addr_sect.size)) +
4851 stream->data_sect.size +
4852 stream->inst_sect.size +
4853 stream->addr_sect.size +
4854 ((stream->dec_win_ind & VCD_ADLER32) ? 4 : 0));
4855
4856 if (stream->dec_enclen != enclen_check)
4857 {
4858 stream->msg = "incorrect encoding length (redundent)";
4859 return EINVAL;
4860 }
4861 }
4862
4863 /* Returning here gives the application a chance to inspect the header, skip the
4864 * window, etc. */
4865 if (stream->current_window == 0) { return XD3_GOTHEADER; }
4866 else { return XD3_WINSTART; }
4867
4868 case DEC_DATA:
4869 case DEC_INST:
4870 case DEC_ADDR:
4871 /* Next read the three sections. */
4872 if ((ret = xd3_decode_sections (stream))) { return ret; }
4873
4874 case DEC_EMIT:
4875
4876 /* To speed VCD_SOURCE block-address calculations, the source cpyoff_blocks and
4877 * cpyoff_blkoff are pre-computed. */
4878 if (stream->dec_win_ind & VCD_SOURCE)
4879 {
4880 xd3_source *src = stream->src;
4881
4882 if (src == NULL)
4883 {
4884 stream->msg = "source input required";
4885 return EINVAL;
4886 }
4887
4888 src->cpyoff_blocks = stream->dec_cpyoff / src->blksize;
4889 src->cpyoff_blkoff = stream->dec_cpyoff % src->blksize;
4890 }
4891
4892 /* xd3_decode_emit returns XD3_OUTPUT on every success. */
4893 if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT)
4894 {
4895 stream->total_out += (xoff_t) stream->avail_out;
4896 }
4897
4898 return ret;
4899
4900 case DEC_FINISH:
4901 {
4902 if (stream->dec_win_ind & VCD_TARGET)
4903 {
4904 if (stream->dec_lastwin == NULL)
4905 {
4906 stream->dec_lastwin = stream->next_out;
4907 stream->dec_lastspace = stream->space_out;
4908 }
4909 else
4910 {
4911 xd3_swap_uint8p (& stream->dec_lastwin, & stream->next_out);
4912 xd3_swap_usize_t (& stream->dec_lastspace, & stream->space_out);
4913 }
4914 }
4915
4916 stream->dec_lastlen = stream->dec_tgtlen;
4917 stream->dec_laststart = stream->dec_winstart;
4918 stream->dec_window_count += 1;
4919
4920 /* Note: the updates to dec_winstart & current_window are deferred until after the
4921 * next DEC_WININD byte is read. */
4922 stream->dec_state = DEC_WININD;
4923 return XD3_WINFINISH;
4924 }
4925
4926 default:
4927 stream->msg = "invalid state";
4928 return EINVAL;
4929 }
4930}
4931
4932/******************************************************************************************
4933 String matching helpers
4934 ******************************************************************************************/
4935
4936#if XD3_ENCODER
4937/* Do the initial xd3_string_match() checksum table setup. Allocations are delayed until
4938 * first use to avoid allocation sometimes (e.g., perfect matches, zero-length inputs). */
4939static int
4940xd3_string_match_init (xd3_stream *stream)
4941{
4942 const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS);
4943 const int DO_LARGE = (stream->src != NULL);
4944
4945 if (DO_SMALL)
4946 {
4947 /* Subsequent calls can return immediately after checking reset. */
4948 if (stream->small_table != NULL)
4949 {
4950 /* The target hash table is reinitialized once per window. */
4951 if (stream->small_reset)
4952 {
4953 stream->small_reset = 0;
4954 memset (stream->small_table, 0, sizeof (usize_t) * stream->small_hash.size);
4955 }
4956
4957 return 0;
4958 }
4959
4960 if ((stream->small_table = xd3_alloc0 (stream, stream->small_hash.size, sizeof (usize_t))) == NULL)
4961 {
4962 return ENOMEM;
4963 }
4964
4965 /* If there is a previous table needed. */
4966 if (stream->small_chain > 1)
4967 {
4968 xd3_slist *p, *m;
4969
4970 if ((stream->small_prev = xd3_alloc (stream, stream->sprevsz, sizeof (xd3_slist))) == NULL)
4971 {
4972 return ENOMEM;
4973 }
4974
4975 /* Initialize circular lists. */
4976 for (p = stream->small_prev, m = stream->small_prev + stream->sprevsz; p != m; p += 1)
4977 {
4978 p->next = p;
4979 p->prev = p;
4980 }
4981 }
4982 }
4983
4984 if (DO_LARGE && stream->large_table == NULL)
4985 {
4986 if ((stream->large_table = xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL)
4987 {
4988 return ENOMEM;
4989 }
4990 }
4991
4992 return 0;
4993}
4994
4995/* Called at every entrance to the string-match loop and each time
4996 * stream->input_position the value returned as *next_move_point.
4997 * This function computes more source checksums to advance the window. */
4998static int
4999xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
5000{
5001 // The input offset at which the source should ideally be scanned
5002 xoff_t logical_input_cksum_pos = stream->total_in + pos_in + stream->srcwin_size;
5003
5004 if (stream->srcwin_cksum_pos >= stream->src->size)
5005 {
5006 *next_move_point = USIZE_T_MAX;
5007 return 0;
5008 }
5009
5010 if (stream->srcwin_cksum_pos > logical_input_cksum_pos)
5011 {
5012 *next_move_point = stream->srcwin_cksum_pos - logical_input_cksum_pos;
5013 return 0;
5014 }
5015
5016 IF_DEBUG1 (P(RINT "[move_p1] size=%d T=%"Q"d S=%"Q"d\n", stream->srcwin_size,
5017 stream->total_in + pos_in, stream->srcwin_cksum_pos));
5018
5019 *next_move_point = pos_in + stream->srcwin_size;
5020
5021 if (stream->srcwin_cksum_pos == 0)
5022 {
5023 // Two windows to start with
5024 logical_input_cksum_pos += stream->srcwin_size;
5025 }
5026 else
5027 {
5028 // Otherwise double and add
5029 stream->srcwin_size = min(stream->srcwin_maxsz, stream->srcwin_size * 2);
5030 logical_input_cksum_pos += stream->srcwin_size;
5031 }
5032
5033 while (stream->srcwin_cksum_pos < logical_input_cksum_pos &&
5034 stream->srcwin_cksum_pos < stream->src->size)
5035 {
5036 xoff_t blkno = stream->srcwin_cksum_pos / stream->src->blksize;
5037 usize_t blkoff = stream->srcwin_cksum_pos % stream->src->blksize;
5038 usize_t onblk = xd3_bytes_on_srcblk (stream->src, blkno);
5039 int ret;
5040
5041 if (blkoff + stream->large_look >= onblk)
5042 {
5043 /* Next block */
5044 stream->srcwin_cksum_pos = (blkno * stream->src->blksize) + onblk;
5045 continue;
5046 }
5047
5048 if ((ret = xd3_getblk (stream, blkno)))
5049 {
5050 return ret;
5051 }
5052
5053 usize_t diff = logical_input_cksum_pos - stream->srcwin_cksum_pos;
5054
5055 onblk = min(onblk, diff + blkoff + stream->large_look);
5056
5057 while (blkoff + stream->large_look <= onblk)
5058 {
5059 uint32_t cksum = xd3_lcksum (stream->src->curblk + blkoff, stream->large_look);
5060 usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum);
5061
5062 stream->large_table[hval] = stream->srcwin_cksum_pos + HASH_CKOFFSET;
5063
5064 blkoff += stream->large_step;
5065 stream->srcwin_cksum_pos += stream->large_step;
5066 IF_DEBUG (stream->large_ckcnt += 1);
5067 }
5068 }
5069
5070 IF_DEBUG1 (P(RINT "[move_p2] size=%d T=%"Q"d S=%"Q"d next_move=%d\n", stream->srcwin_size,
5071 stream->total_in + pos_in, stream->srcwin_cksum_pos, *next_move_point));
5072
5073 return 0;
5074}
5075
5076/* This function sets up the stream->src fields srcbase, srclen. The call is delayed
5077 * until these values are needed to encode a copy address. At this point the decision has
5078 * to be made. */
5079static int
5080xd3_srcwin_setup (xd3_stream *stream)
5081{
5082 xd3_source *src = stream->src;
5083 xoff_t length;
5084
5085 IF_DEBUG1 (P(RINT "[srcwin setup:%"Q"u] iopt buffer %s\n",
5086 stream->current_window,
5087 stream->enc_state < ENC_FLUSH ? "overflow" : "fit"));
5088
5089 /* Check the undecided state. */
5090 XD3_ASSERT (src->srclen == 0 && src->srcbase == 0);
5091
5092 /* Avoid repeating this call. */
5093 stream->srcwin_decided = 1;
5094
5095 /* If the stream is flushing, then the iopt buffer was able to contain the complete
5096 * encoding. If no copies were issued no source window is actually needed. This
5097 * prevents the VCDIFF header from including source base/len. xd3_emit_hdr checks
5098 * for srclen == 0. */
5099 if (stream->enc_state == ENC_FLUSH && stream->match_maxaddr == 0)
5100 {
5101 goto done;
5102 }
5103
5104 /* Check for overflow, srclen is usize_t - this can't happen unless XD3_DEFAULT_SRCBACK
5105 * and related parameters are extreme - should use smaller windows. */
5106 length = stream->match_maxaddr - stream->match_minaddr;
5107
5108 if (length > (xoff_t) USIZE_T_MAX)
5109 {
5110 stream->msg = "source window length overflow (not 64bit)";
5111 return EINVAL;
5112 }
5113
5114 /* If ENC_FLUSH, then we know the exact source window to use because no more copies can
5115 * be issued. */
5116 if (stream->enc_state == ENC_FLUSH)
5117 {
5118 src->srcbase = stream->match_minaddr;
5119 src->srclen = (usize_t) length;
5120 XD3_ASSERT (src->srclen);
5121 goto done;
5122 }
5123
5124 /* Otherwise, we have to make a guess. More copies may still be issued, but we have to
5125 * decide the source window base and length now. */
5126 src->srcbase = stream->match_minaddr;
5127 src->srclen = max ((usize_t) length, stream->avail_in + (stream->avail_in >> 2));
5128 if (src->size < src->srcbase + (xoff_t) src->srclen)
5129 {
5130 /* Could reduce srcbase, as well. */
5131 src->srclen = src->size - src->srcbase;
5132 }
5133
5134 XD3_ASSERT (src->srclen);
5135 done:
5136 IF_DEBUG1 (P(RINT "[srcwin setup:%"Q"u] base %"Q"u size %u\n",
5137 stream->current_window,
5138 src->srcbase,
5139 src->srclen));
5140 /* Set the taroff. This convenience variable is used even when stream->src == NULL. */
5141 stream->taroff = src->srclen;
5142 return 0;
5143}
5144
5145/* Sets the bounding region for a newly discovered source match, prior to calling
5146 * xd3_source_extend_match(). This sets the match_maxfwd, match_maxback variables. Note:
5147 * srcpos is an absolute position (xoff_t) but the match_maxfwd, match_maxback variables
5148 * are usize_t. Returns 0 if the setup succeeds, or 1 if the source position lies outside
5149 * an already-decided srcbase/srclen window. */
5150static int
5151xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
5152{
5153 xd3_source *src = stream->src;
5154 usize_t greedy_or_not;
5155 xoff_t farthest_src;
5156
5157 stream->match_maxback = 0;
5158 stream->match_maxfwd = 0;
5159 stream->match_back = 0;
5160 stream->match_fwd = 0;
5161
5162 farthest_src = max(stream->srcwin_cksum_pos, srcpos);
5163
5164 XD3_ASSERT (stream->srcwin_maxsz > src->blksize);
5165
5166 /* This prevents the encoder from seeking back more than srcwin_maxsz. Using
5167 * srcwin_maxsz is incorrect. TODO: Possibly an new option here, how far back to
5168 * seek? */
5169 if (max_in == 0 ||
5170 farthest_src - srcpos > stream->srcwin_maxsz - src->blksize)
5171 {
5172 goto bad; // TODO! Note: this prevents catching the TODO/bug below
5173 }
5174
5175 /* TODO: check for boundary crossing */
5176
5177 /* Going backwards, the 1.5-pass algorithm allows some already-matched input may be
5178 * covered by a longer source match. The greedy algorithm does not allow this. */
5179 if (stream->flags & XD3_BEGREEDY)
5180 {
5181 /* The greedy algorithm allows backward matching to the last matched position. */
5182 greedy_or_not = xd3_iopt_last_matched (stream);
5183 }
5184 else
5185 {
5186 /* The 1.5-pass algorithm allows backward matching to go back as far as the
5187 * unencoded offset, which is updated as instructions pass out of the iopt buffer.
5188 * If this (default) is chosen, it means xd3_iopt_erase may be called to eliminate
5189 * instructions when a covering source match is found. */
5190 greedy_or_not = stream->unencoded_offset;
5191 }
5192
5193 /* Backward target match limit. */
5194 XD3_ASSERT (pos_in >= greedy_or_not);
5195 stream->match_maxback = pos_in - greedy_or_not;
5196
5197 /* Forward target match limit. */
5198 XD3_ASSERT (max_in > pos_in);
5199 stream->match_maxfwd = max_in - pos_in;
5200
5201 /* Now we take the source position into account. It depends whether the srclen/srcbase
5202 * have been decided yet. */
5203 if (stream->srcwin_decided == 0)
5204 {
5205 /* Unrestricted case: the match can cover the entire source, 0--src->size. We
5206 * compare the usize_t match_maxfwd/match_maxback against the xoff_t src->size/srcpos values
5207 * and take the min. */
5208 xoff_t srcavail;
5209
5210 if (srcpos < (xoff_t) stream->match_maxback)
5211 {
5212 stream->match_maxback = srcpos;
5213 }
5214
5215 srcavail = src->size - srcpos;
5216 if (srcavail < (xoff_t) stream->match_maxfwd)
5217 {
5218 stream->match_maxfwd = srcavail;
5219 }
5220
5221 goto good;
5222 }
5223
5224 /* Decided some source window. */
5225 XD3_ASSERT (src->srclen > 0);
5226
5227 /* Restricted case: fail if the srcpos lies outside the source window */
5228 if ((srcpos < src->srcbase) || (srcpos > (src->srcbase + (xoff_t) src->srclen)))
5229 {
5230 goto bad;
5231 }
5232 else
5233 {
5234 usize_t srcavail;
5235
5236 srcavail = (usize_t) (srcpos - src->srcbase);
5237 if (srcavail < stream->match_maxback)
5238 {
5239 stream->match_maxback = srcavail;
5240 }
5241
5242 srcavail = (usize_t) (src->srcbase + (xoff_t) src->srclen - srcpos);
5243 if (srcavail < stream->match_maxfwd) {
5244 stream->match_maxfwd = srcavail;
5245 }
5246
5247 goto good;
5248 }
5249
5250 good:
5251 stream->match_state = MATCH_BACKWARD;
5252 stream->match_srcpos = srcpos;
5253 return 0;
5254
5255 bad:
5256 stream->match_state = MATCH_SEARCHING;
5257 return 1;
5258}
5259
5260/* This function expands the source match backward and forward. It is reentrant, since
5261 * xd3_getblk may return XD3_GETSRCBLK, so most variables are kept in xd3_stream. There
5262 * are two callers of this function, the string_matching routine when a checksum match is
5263 * discovered, and xd3_encode_input whenever a continuing (or initial) match is suspected.
5264 * The two callers do different things with the input_position, thus this function leaves
5265 * that variable untouched. If a match is taken the resulting stream->match_fwd is left
5266 * non-zero. */
5267static int
5268xd3_source_extend_match (xd3_stream *stream)
5269{
5270 int ret;
5271 xd3_source *src = stream->src;
5272 xoff_t matchoff; /* matchoff is the current right/left-boundary of the source match being tested. */
5273 usize_t streamoff; /* streamoff is the current right/left-boundary of the input match being tested. */
5274 xoff_t tryblk; /* tryblk, tryoff are the block, offset position of matchoff */
5275 usize_t tryoff;
5276 usize_t tryrem; /* tryrem is the number of matchable bytes on the source block */
5277
5278 XD3_ASSERT (src != NULL);
5279
5280 /* Does it make sense to compute backward match AFTER forward match? */
5281 if (stream->match_state == MATCH_BACKWARD)
5282 {
5283 /* Note: this code is practically duplicated below, substituting
5284 * match_fwd/match_back and direction. Consolidate? */
5285 matchoff = stream->match_srcpos - stream->match_back;
5286 streamoff = pos_in - stream->match_back;
5287 tryblk = matchoff / src->blksize;
5288 tryoff = matchoff % src->blksize;
5289
5290 /* this loops backward over source blocks */
5291 while (stream->match_back < stream->match_maxback)
5292 {
5293 /* see if we're backing across a source block boundary */
5294 if (tryoff == 0)
5295 {
5296 tryoff = src->blksize;
5297 tryblk -= 1;
5298 }
5299
5300 if ((ret = xd3_getblk (stream, tryblk)))
5301 {
5302 /* could be a XD3_GETSRCBLK failure. */
5303 return ret;
5304 }
5305
5306 /* OPT: This code can be optimized. */
5307 for (tryrem = min (tryoff, stream->match_maxback - stream->match_back);
5308 tryrem != 0;
5309 tryrem -= 1, stream->match_back += 1)
5310 {
5311 if (src->curblk[tryoff-1] != stream->next_in[streamoff-1])
5312 {
5313 goto doneback;
5314 }
5315
5316 tryoff -= 1;
5317 streamoff -= 1;
5318 }
5319 }
5320
5321 doneback:
5322 stream->match_state = MATCH_FORWARD;
5323 }
5324
5325 XD3_ASSERT (stream->match_state == MATCH_FORWARD);
5326
5327 matchoff = stream->match_srcpos + stream->match_fwd;
5328 streamoff = pos_in + stream->match_fwd;
5329 tryblk = matchoff / src->blksize;
5330 tryoff = matchoff % src->blksize;
5331
5332 /* Note: practically the same code as backwards case above: same comments */
5333 while (stream->match_fwd < stream->match_maxfwd)
5334 {
5335 if ((ret = xd3_getblk (stream, tryblk)))
5336 {
5337 return ret;
5338 }
5339
5340 /* There's a good speedup for doing word comparions: see zlib. */
5341 for (tryrem = min(stream->match_maxfwd - stream->match_fwd,
5342 src->blksize - tryoff);
5343 tryrem != 0;
5344 tryrem -= 1, stream->match_fwd += 1)
5345 {
5346 if (src->curblk[tryoff] != stream->next_in[streamoff])
5347 {
5348 goto donefwd;
5349 }
5350
5351 tryoff += 1;
5352 streamoff += 1;
5353 }
5354
5355 if (tryoff == src->blksize)
5356 {
5357 tryoff = 0;
5358 tryblk += 1;
5359 }
5360 }
5361
5362 donefwd:
5363 stream->match_state = MATCH_SEARCHING;
5364
5365 /* Now decide whether to take the match. There are several ways to answer this
5366 * question and this is likely the best answer. There is currently an assertion
5367 * in xd3_iopt_erase that checks whether min_match works. This variable maintains
5368 * that every match exceeds the end of the previous match. However, it is
5369 * possible that match_back allows us to find a match that goes a long way back
5370 * but not enough forward. We could try an alternate approach, which might help
5371 * or it might just be extra complexity: eliminate the next match_fwd >= min_match
5372 * test and call xd3_iopt_erase right away. Erase instructions as far as it goes
5373 * back, then either remember what was deleted and re-insert it, or count on the
5374 * string-matching algorithm to find that match again. I think it is more
5375 * worthwhile to implement large_hash duplicates. */
5376 if (stream->match_fwd < min_match)
5377 {
5378 stream->match_fwd = 0;
5379 }
5380 else
5381 {
5382 usize_t total = stream->match_fwd + stream->match_back;
5383 xoff_t match_end;
5384
5385 /* Correct the variables to remove match_back from the equation. */
5386 stream->input_position -= stream->match_back;
5387 stream->match_srcpos -= stream->match_back;
5388 stream->match_fwd += stream->match_back;
5389 match_end = stream->match_srcpos + stream->match_fwd;
5390
5391 /* At this point we may have to erase any iopt-buffer instructions that are
5392 * fully covered by a backward-extending copy. */
5393 if (stream->match_back > 0)
5394 {
5395 xd3_iopt_erase (stream, pos_in, total);
5396 }
5397
5398 stream->match_back = 0;
5399
5400 /* Update ranges. The first source match occurs with both values set to 0. */
5401 if (stream->match_maxaddr == 0 ||
5402 stream->match_srcpos < stream->match_minaddr)
5403 {
5404 stream->match_minaddr = stream->match_srcpos;
5405 }
5406
5407 if (match_end > stream->match_maxaddr)
5408 {
5409 stream->match_maxaddr = match_end;
5410 }
5411
5412 IF_DEBUG1 ({
5413 static int x = 0;
5414 P(RINT "[source match:%d] <inp %"Q"u %"Q"u> <src %"Q"u %"Q"u> (%s) [ %u bytes ]\n",
5415 x++,
5416 stream->total_in + pos_in,
5417 stream->total_in + pos_in + stream->match_fwd,
5418 stream->match_srcpos,
5419 stream->match_srcpos + stream->match_fwd,
5420 (stream->total_in + stream->input_position == stream->match_srcpos) ? "same" : "diff",
5421 stream->match_fwd);
5422 });
5423
5424 if ((ret = xd3_found_match (stream,
5425 /* decoder position */ pos_in,
5426 /* length */ stream->match_fwd,
5427 /* address */ stream->match_srcpos,
5428 /* is_source */ 1)))
5429 {
5430 return ret;
5431 }
5432
5433 // TODO: ideally, we would update srcwin_cksum_pos to avoid computing checksums in
5434 // the middle of an already-discovered long match.
5435
5436 /* If the match ends with the available input: */
5437 if (pos_in + stream->match_fwd == max_in)
5438 {
5439 /* Setup continuing match for the next window. */
5440 stream->match_state = MATCH_TARGET;
5441 stream->match_srcpos += stream->match_fwd;
5442 }
5443 }
5444
5445 return 0;
5446}
5447
5448/* Update the small hash. Values in the small_table are offset by HASH_CKOFFSET (1) to
5449 * distinguish empty buckets the zero offset. This maintains the previous linked lists.
5450 * If owrite is true then this entry is replacing the existing record, otherwise it is
5451 * merely being called to promote the existing record in the hash bucket (for the same
5452 * address cache). */
5453static void
5454xd3_scksum_insert (xd3_stream *stream, usize_t inx, usize_t scksum, usize_t pos)
5455{
5456 /* If we are maintaining previous links. */
5457 if (stream->small_prev)
5458 {
5459 usize_t last_pos = stream->small_table[inx];
5460 xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask];
5461 xd3_slist *prev = pos_list->prev;
5462 xd3_slist *next = pos_list->next;
5463
5464 /* Assert link structure, update pos, cksum */
5465 XD3_ASSERT (prev->next == pos_list);
5466 XD3_ASSERT (next->prev == pos_list);
5467 pos_list->pos = pos;
5468 pos_list->scksum = scksum;
5469
5470 /* Subtract HASH_CKOFFSET and test for a previous offset. */
5471 if (last_pos-- != 0)
5472 {
5473 xd3_slist *last_list = & stream->small_prev[last_pos & stream->sprevmask];
5474 xd3_slist *last_next;
5475
5476 /* Verify existing entry. */
5477 SMALL_HASH_DEBUG1 (stream, stream->next_in + last_pos);
5478 SMALL_HASH_DEBUG2 (stream, stream->next_in + pos);
5479
5480 /* The two positions (mod sprevsz) may have the same checksum, making the old
5481 * and new entries the same. That is why the removal step is not before the
5482 * above if-stmt. */
5483 if (last_list != pos_list)
5484 {
5485 /* Remove current position from any list it may belong to. */
5486 next->prev = prev;
5487 prev->next = next;
5488
5489 /* The ordinary case, add current position to last_list. */
5490 last_next = last_list->next;
5491
5492 pos_list->next = last_next;
5493 pos_list->prev = last_list;
5494
5495 last_next->prev = pos_list;
5496 last_list->next = pos_list;
5497 }
5498 }
5499 else
5500 {
5501 /* Remove current position from any list it may belong to. */
5502 next->prev = prev;
5503 prev->next = next;
5504
5505 /* Re-initialize current position. */
5506 pos_list->next = pos_list;
5507 pos_list->prev = pos_list;
5508 }
5509 }
5510
5511 /* Enter the new position into the hash bucket. */
5512 stream->small_table[inx] = pos + HASH_CKOFFSET;
5513}
5514
5515#if XD3_DEBUG
5516static int
5517xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0,
5518 const uint8_t *inp_max, usize_t cmp_len)
5519{
5520 int i;
5521
5522 for (i = 0; i < cmp_len; i += 1)
5523 {
5524 XD3_ASSERT (ref0[i] == inp0[i]);
5525 }
5526
5527 if (inp0 + cmp_len < inp_max)
5528 {
5529 XD3_ASSERT (inp0[i] != ref0[i]);
5530 }
5531
5532 return 1;
5533}
5534#endif /* XD3_DEBUG */
5535
5536/* When the hash table indicates a possible small string match, it calls this routine to
5537 * find the best match. The first matching position is taken from the small_table,
5538 * HASH_CKOFFSET is subtracted to get the actual position. After checking that match, if
5539 * previous linked lists are in use (because stream->small_chain > 1), previous matches
5540 * are tested searching for the longest match. If (min_match > MIN_MATCH) then a lazy
5541 * match is in effect.
5542 *
5543 * OPT: This is by far the most expensive function. The slowdown is in part due to the data
5544 * structure it maintains, which is relatively more expensive than it needs to be (in
5545 * comparison to zlib) in order to support the PROMOTE decision, which is to prefer the
5546 * most recently used matching address of a certain string to aid the VCDIFF same cache.
5547 *
5548 * Weak reasoning? it's time to modularize this routine...? Let's say the PROMOTE
5549 * feature supported by this slow data structure contributes around 2% improvement in
5550 * compressed size, is there a better code table that doesn't use the SAME address cache,
5551 * for which the speedup-discount could produce a better encoding?
5552 */
5553static /*inline*/ usize_t
5554xd3_smatch (xd3_stream *stream, usize_t base, usize_t scksum, usize_t *match_offset)
5555{
5556 usize_t cmp_len;
5557 usize_t match_length = 0;
5558 usize_t chain = (min_match == MIN_MATCH ?
5559 stream->small_chain :
5560 stream->small_lchain);
5561 xd3_slist *current = NULL;
5562 xd3_slist *first = NULL;
5563 const uint8_t *inp_max = stream->next_in + max_in;
5564 const uint8_t *inp;
5565 const uint8_t *ref;
5566
5567 SMALL_HASH_STATS (usize_t search_cnt = 0);
5568 SMALL_HASH_DEBUG1 (stream, stream->next_in + pos_in);
5569 SMALL_HASH_STATS (stream->sh_searches += 1);
5570
5571 XD3_ASSERT (min_match + pos_in <= max_in);
5572
5573 base -= HASH_CKOFFSET;
5574
5575 /* Initialize the chain. */
5576 if (stream->small_prev != NULL)
5577 {
5578 first = current = & stream->small_prev[base & stream->sprevmask];
5579
5580 /* Check if current->pos is correct. */
5581 if (current->pos != base) { goto done; }
5582 }
5583
5584 again:
5585
5586 SMALL_HASH_STATS (search_cnt += 1);
5587
5588 /* For small matches, we can always go to the end-of-input because the matching position
5589 * must be less than the input position. */
5590 XD3_ASSERT (base < pos_in);
5591
5592 ref = stream->next_in + base;
5593 inp = stream->next_in + pos_in;
5594
5595 SMALL_HASH_DEBUG2 (stream, ref);
5596
5597 /* Expand potential match forward. */
5598 while (inp < inp_max && *inp == *ref)
5599 {
5600 ++inp;
5601 ++ref;
5602 }
5603
5604 cmp_len = inp - (stream->next_in + pos_in);
5605
5606 /* Verify correctness */
5607 XD3_ASSERT (xd3_check_smatch (stream->next_in + base, stream->next_in + pos_in,
5608 inp_max, cmp_len));
5609
5610 /* Update longest match */
5611 if (cmp_len > match_length)
5612 {
5613 ( match_length) = cmp_len;
5614 (*match_offset) = base;
5615
5616 /* Stop if we match the entire input or discover a long_enough match. */
5617 if (inp == inp_max || cmp_len >= stream->long_enough)
5618 {
5619 goto done;
5620 }
5621 }
5622
5623 /* If we have not reached the chain limit, see if there is another previous position. */
5624 if (current)
5625 {
5626 while (--chain != 0)
5627 {
5628 /* Calculate the next base offset. */
5629 current = current->prev;
5630 base = current->pos;
5631
5632 /* Stop if the next position was the first. Stop if the position is wrong
5633 * (because the lists are not re-initialized across input windows). Skip if the
5634 * scksum is wrong. */
5635 if (current != first && base < pos_in)
5636 {
5637 if (current->scksum != scksum)
5638 {
5639 continue;
5640 }
5641 goto again;
5642 }
5643 }
5644 }
5645
5646 done:
5647 SMALL_HASH_STATS (stream->sh_compares += search_cnt);
5648 return match_length;
5649}
5650
5651#if XD3_DEBUG
5652static void
5653xd3_verify_small_state (xd3_stream *stream,
5654 const uint8_t *inp,
5655 uint32_t x_cksum)
5656{
5657 uint32_t cksum = xd3_scksum (inp, stream->small_look);
5658
5659 XD3_ASSERT (cksum == x_cksum);
5660}
5661
5662static void
5663xd3_verify_large_state (xd3_stream *stream,
5664 const uint8_t *inp,
5665 uint32_t x_cksum)
5666{
5667 uint32_t cksum = xd3_lcksum (inp, stream->large_look);
5668
5669 XD3_ASSERT (cksum == x_cksum);
5670}
5671
5672static void
5673xd3_verify_run_state (xd3_stream *stream,
5674 const uint8_t *inp,
5675 int x_run_l,
5676 uint8_t x_run_c)
5677{
5678 int slook = stream->small_look;
5679 uint8_t run_c;
5680 int run_l = xd3_comprun (inp, slook, &run_c);
5681
5682 XD3_ASSERT (run_l == 0 || run_c == x_run_c);
5683 XD3_ASSERT (x_run_l > slook || run_l == x_run_l);
5684}
5685#endif /* XD3_DEBUG */
5686#endif /* XD3_ENCODER */
5687
5688/******************************************************************************************
5689 TEMPLATE pass
5690 ******************************************************************************************/
5691
5692#endif /* __XDELTA3_C_INLINE_PASS__ */
5693#ifdef __XDELTA3_C_TEMPLATE_PASS__
5694
5695#if XD3_ENCODER
5696
5697/******************************************************************************************
5698 Templates
5699 ******************************************************************************************/
5700
5701/* Template macros: less than 30 lines work. the template parameters appear as, e.g.,
5702 * SLOOK, MIN_MATCH, TRYLAZY, etc. */
5703#define XD3_TEMPLATE(x) XD3_TEMPLATE2(x,TEMPLATE)
5704#define XD3_TEMPLATE2(x,n) XD3_TEMPLATE3(x,n)
5705#define XD3_TEMPLATE3(x,n) x ## n
5706#define XD3_STRINGIFY(x) XD3_STRINGIFY2(x)
5707#define XD3_STRINGIFY2(x) #x
5708
5709static int XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream);
5710
5711static const xd3_smatcher XD3_TEMPLATE(__smatcher_) =
5712{
5713 XD3_STRINGIFY(TEMPLATE),
5714 XD3_TEMPLATE(xd3_string_match_),
5715#if SOFTCFG == 1
5716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5717#else
5718 LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, SSMATCH, TRYLAZY, MAXLAZY,
5719 LONGENOUGH, PROMOTE
5720#endif
5721};
5722
5723static int
5724XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream)
5725{
5726 /* TODO config: These next three variables should be statically compliled in various
5727 * scan_cfg configurations? */
5728 const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS);
5729 const int DO_LARGE = (stream->src != NULL);
5730 const int DO_RUN = (1);
5731
5732 const uint8_t *inp;
5733 uint32_t scksum = 0;
5734 uint32_t lcksum = 0;
5735 usize_t sinx;
5736 usize_t linx;
5737 uint8_t run_c;
5738 int run_l;
5739 int ret;
5740 usize_t match_length;
5741 usize_t match_offset; // Note: "may be unused" warnings are bogus
5742 usize_t next_move_point;
5743
5744 /* If there will be no compression due to settings or short input, skip it entirely. */
5745 if (! (DO_SMALL || DO_LARGE || DO_RUN) || pos_in + SLOOK > max_in) { goto loopnomore; }
5746
5747 if ((ret = xd3_string_match_init (stream))) { return ret; }
5748
5749 /* The restartloop label is reached when the incremental loop state needs to be
5750 * reset. */
5751 restartloop:
5752
5753 /* If there is not enough input remaining for any kind of match, skip it. */
5754 if (pos_in + SLOOK > max_in) { goto loopnomore; }
5755
5756 IF_DEBUG1 ({
5757 static int x = 0;
5758 P(RINT "[string match:%d] pos_in %d; \n",
5759 x++, pos_in);
5760 });
5761
5762 /* Now reset the incremental loop state: */
5763
5764 /* The min_match variable is updated to avoid matching the same lazy match over and over
5765 * again. For example, if you find a (small) match of length 9 at one position, you
5766 * will likely find a match of length 8 at the next position. */
5767 min_match = MIN_MATCH;
5768
5769 /* The current input byte. */
5770 inp = stream->next_in + pos_in;
5771
5772 /* Small match state. */
5773 if (DO_SMALL)
5774 {
5775 scksum = xd3_scksum (inp, SLOOK);
5776 }
5777
5778 /* Run state. */
5779 if (DO_RUN)
5780 {
5781 run_l = xd3_comprun (inp, SLOOK, & run_c);
5782 }
5783
5784 /* Large match state. We continue the loop even after not enough bytes for LLOOK
5785 * remain, so always check pos_in in DO_LARGE code. */
5786 if (DO_LARGE && (pos_in + LLOOK <= max_in))
5787 {
5788 /* Source window: next_move_point is the point that pos_in must reach before
5789 * computing more source checksum. */
5790 if ((ret = xd3_srcwin_move_point (stream, & next_move_point)))
5791 {
5792 return ret;
5793 }
5794
5795 lcksum = xd3_lcksum (inp, LLOOK);
5796 }
5797
5798 /* TRYLAZYLEN: True if a certain length match should be followed by lazy search. This
5799 * checks that LEN is shorter than MAXLAZY and that there is enough leftover data to
5800 * consider lazy matching. "Enough" is set to 2 since the next match will start at the
5801 * next offset, it must match two extra characters. */
5802#define TRYLAZYLEN(LEN,POS,MAX) ((TRYLAZY && (LEN) < MAXLAZY) && ((POS) + (LEN) <= (MAX) - 2))
5803
5804 /* HANDLELAZY: This statement is called each time an instruciton is emitted (three
5805 * cases). If the instruction is large enough, the loop is restarted, otherwise lazy
5806 * matching may ensue. */
5807#define HANDLELAZY(mlen) \
5808 if (TRYLAZYLEN ((mlen), pos_in, max_in)) \
5809 { min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \
5810 else \
5811 { pos_in += (mlen); goto restartloop; }
5812
5813 /* Now loop over one input byte at a time until a match is found... */
5814 for (;; inp += 1, pos_in += 1)
5815 {
5816 /* Now we try three kinds of string match in order of expense:
5817 * run, large match, small match. */
5818
5819 /* Expand the start of a RUN. The test for (run_l == SLOOK) avoids repeating this
5820 * check when we pass through a run area performing lazy matching. The run is only
5821 * expanded once when the min_match is first reached. If lazy matching is
5822 * performed, the run_l variable will remain inconsistent until the first
5823 * non-running input character is reached, at which time the run_l may then again
5824 * grow to SLOOK. */
5825 if (DO_RUN && run_l == SLOOK)
5826 {
5827 usize_t max_len = max_in - pos_in;
5828
5829 IF_DEBUG (xd3_verify_run_state (stream, inp, run_l, run_c));
5830
5831 while (run_l < max_len && inp[run_l] == run_c) { run_l += 1; }
5832
5833 /* Output a RUN instruction. */
5834 if (run_l >= min_match && run_l >= MIN_RUN)
5835 {
5836 if ((ret = xd3_emit_run (stream, pos_in, run_l, run_c))) { return ret; }
5837
5838 HANDLELAZY (run_l);
5839 }
5840 }
5841
5842 /* If there is enough input remaining. */
5843 if (DO_LARGE && (pos_in + LLOOK <= max_in))
5844 {
5845 if ((pos_in >= next_move_point) &&
5846 (ret = xd3_srcwin_move_point (stream, & next_move_point)))
5847 {
5848 return ret;
5849 }
5850
5851 linx = xd3_checksum_hash (& stream->large_hash, lcksum);
5852
5853 IF_DEBUG (xd3_verify_large_state (stream, inp, lcksum));
5854
5855 /* Note: To handle large checksum duplicates, this code should be rearranged to
5856 * resemble the small_match case more. But how much of the code can be truly
5857 * shared? The main difference is the need for xd3_source_extend_match to work
5858 * outside of xd3_string_match, in the case where inputs are identical. */
5859 if (unlikely (stream->large_table[linx] != 0))
5860 {
5861 /* the match_setup will fail if the source window has been decided and the
5862 * match lies outside it. You could consider forcing a window at this point
5863 * to permit a new source window. */
5864 if (xd3_source_match_setup (stream, stream->large_table[linx] - HASH_CKOFFSET) == 0)
5865 {
5866 if ((ret = xd3_source_extend_match (stream))) { return ret; }
5867
5868 /* Update stream position. match_fwd is zero if no match. */
5869 if (stream->match_fwd > 0)
5870 {
5871 HANDLELAZY (stream->match_fwd);
5872 }
5873 }
5874 }
5875 }
5876
5877 /* Small matches. */
5878 if (DO_SMALL)
5879 {
5880 sinx = xd3_checksum_hash (& stream->small_hash, scksum);
5881
5882 /* Verify incremental state in debugging mode. */
5883 IF_DEBUG (xd3_verify_small_state (stream, inp, scksum));
5884
5885 /* Search for the longest match */
5886 if (unlikely (stream->small_table[sinx] != 0))
5887 {
5888 match_length = xd3_smatch (stream,
5889 stream->small_table[sinx],
5890 scksum,
5891 & match_offset);
5892 }
5893 else
5894 {
5895 match_length = 0;
5896 }
5897
5898 /* Insert a hash for this string. */
5899 xd3_scksum_insert (stream, sinx, scksum, pos_in);
5900
5901 /* Promote the previous match address to head of the hash bucket. This is
5902 * intended to improve the same cache hit rate. */
5903 if (match_length != 0 && PROMOTE)
5904 {
5905 xd3_scksum_insert (stream, sinx, scksum, match_offset);
5906 }
5907
5908 /* Maybe output a COPY instruction */
5909 if (unlikely (match_length >= min_match))
5910 {
5911 IF_DEBUG1 ({
5912 static int x = 0;
5913 P(RINT "[target match:%d] <inp %u %u> <cpy %u %u> (-%d) [ %u bytes ]\n",
5914 x++,
5915 pos_in,
5916 pos_in + match_length,
5917 match_offset,
5918 match_offset + match_length,
5919 pos_in - match_offset,
5920 match_length);
5921 });
5922
5923 if ((ret = xd3_found_match (stream,
5924 /* decoder position */ pos_in,
5925 /* length */ match_length,
5926 /* address */ match_offset,
5927 /* is_source */ 0))) { return ret; }
5928
5929 /* SSMATCH option: search small matches: continue the incremental checksum
5930 * through the matched material. Only if not lazy matching. */
5931 if (SSMATCH && !TRYLAZYLEN (match_length, pos_in, max_in))
5932 {
5933 usize_t avail = max_in - SLOOK - pos_in;
5934 usize_t ml_m1 = match_length - 1;
5935 usize_t right;
5936 int aincr;
5937
5938 IF_DEBUG (usize_t nposi = pos_in + match_length);
5939
5940 /* Avail is the last offset we can compute an incremental cksum. If the
5941 * match length exceeds that offset then we are finished performing
5942 * incremental updates after this step. */
5943 if (ml_m1 < avail)
5944 {
5945 right = ml_m1;
5946 aincr = 1;
5947 }
5948 else
5949 {
5950 right = avail;
5951 aincr = 0;
5952 }
5953
5954 /* Compute incremental checksums within the match. */
5955 while (right > 0)
5956 {
5957 SMALL_CKSUM_UPDATE (scksum, inp, SLOOK);
5958 if (DO_LARGE && (pos_in + LLOOK < max_in)) {
5959 LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK);
5960 }
5961
5962 inp += 1;
5963 pos_in += 1;
5964 right -= 1;
5965 sinx = xd3_checksum_hash (& stream->small_hash, scksum);
5966
5967 IF_DEBUG (xd3_verify_small_state (stream, inp, scksum));
5968
5969 xd3_scksum_insert (stream, sinx, scksum, pos_in);
5970 }
5971
5972 if (aincr)
5973 {
5974 /* Keep searching... */
5975 if (DO_RUN) { run_l = xd3_comprun (inp+1, SLOOK-1, & run_c); }
5976 XD3_ASSERT (nposi == pos_in + 1);
5977 XD3_ASSERT (pos_in + SLOOK < max_in);
5978 min_match = MIN_MATCH;
5979 goto updatesure;
5980 }
5981 else
5982 {
5983 /* Not enough input for another match. */
5984 XD3_ASSERT (pos_in + SLOOK >= max_in);
5985 goto loopnomore;
5986 }
5987 }
5988
5989 /* Else case: copy instruction, but no SSMATCH. */
5990 HANDLELAZY (match_length);
5991 }
5992 }
5993
5994 /* The logic above prevents excess work during lazy matching by increasing min_match
5995 * to avoid smaller matches. Each time we advance pos_in by one, the minimum match
5996 * shortens as well. */
5997 if (min_match > MIN_MATCH)
5998 {
5999 min_match -= 1;
6000 }
6001
6002 updateone:
6003
6004 /* See if there are no more incremental cksums to compute. */
6005 if (pos_in + SLOOK == max_in)
6006 {
6007 goto loopnomore;
6008 }
6009
6010 updatesure:
6011
6012 /* Compute next RUN, CKSUM */
6013 if (DO_RUN) { NEXTRUN (inp[SLOOK]); }
6014 if (DO_SMALL) { SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); }
6015 if (DO_LARGE && (pos_in + LLOOK < max_in)) { LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK); }
6016 }
6017
6018 loopnomore:
6019 return 0;
6020}
6021#endif /* XD3_ENCODER */
6022#endif /* __XDELTA3_C_TEMPLATE_PASS__ */