diff options
Diffstat (limited to 'xdelta3/xdelta3.c')
-rwxr-xr-x | xdelta3/xdelta3.c | 6022 |
1 files changed, 6022 insertions, 0 deletions
diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c new file mode 100755 index 0000000..fb9a09f --- /dev/null +++ b/xdelta3/xdelta3.c | |||
@@ -0,0 +1,6022 @@ | |||
1 | /* xdelta 3 - delta compression tools and library | ||
2 | * Copyright (C) 2001, 2003, 2004, 2005, 2006. Joshua P. MacDonald | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | |||
18 | ------------------------------------------------------------------- | ||
19 | |||
20 | Xdelta 3 | ||
21 | |||
22 | The goal of this library is to to implement both the (stand-alone) | ||
23 | data-compression and delta-compression aspects of VCDIFF encoding, and | ||
24 | to support a programming interface that works like Zlib | ||
25 | (http://www.gzip.org/zlib.html). See RFC3284: The VCDIFF Generic | ||
26 | Differencing and Compression Data Format. | ||
27 | |||
28 | VCDIFF is a unified encoding that combines data-compression and | ||
29 | delta-encoding ("differencing"). | ||
30 | |||
31 | VCDIFF has a detailed byte-code instruction set with many features. | ||
32 | The instruction format supports an immediate size operand for small | ||
33 | COPYs and ADDs (e.g., under 18 bytes). There are also instruction | ||
34 | "modes", which are used to compress COPY addresses by using two | ||
35 | address caches. An instruction mode refers to slots in the NEAR | ||
36 | and SAME caches for recent addresses. NEAR remembers the | ||
37 | previous 4 (by default) COPY addresses, and SAME catches | ||
38 | frequent re-uses of the same address using a 3-way (by default) | ||
39 | 256-entry associative cache of [ADDR mod 256], the encoded byte. | ||
40 | A hit in the NEAR/SAME cache requires 0/1 ADDR bytes. | ||
41 | |||
42 | VCDIFF has a default instruction table, but an alternate | ||
43 | instruction tables may themselves be be delta-compressed and | ||
44 | included in the encoding header. This allows even more freedom. | ||
45 | There are 9 instruction modes in the default code table, 4 near, 3 | ||
46 | same, VCD_SELF (absolute encoding) and VCD_HERE (relative to the | ||
47 | current position). | ||
48 | |||
49 | ---------------------------------------------------------------------- | ||
50 | |||
51 | Algorithms | ||
52 | |||
53 | Aside from the details of encoding and decoding, there are a bunch | ||
54 | of algorithms needed. | ||
55 | |||
56 | 1. STRING-MATCH. A two-level fingerprinting approach is used. A | ||
57 | single loop computes the two checksums -- small and large -- at | ||
58 | successive offsets in the TARGET file. The large checksum is more | ||
59 | accurate and is used to discover SOURCE matches, which are | ||
60 | potentially very long. The small checksum is used to discover | ||
61 | copies within the TARGET. Small matching, which is more expensive, | ||
62 | usually dominates the large STRING-MATCH costs in this code - the | ||
63 | more exhaustive the search, the better the results. Either of the | ||
64 | two string-matching mechanisms may be disabled. Currently, large | ||
65 | checksums are only performed in the source file, if present, and | ||
66 | small checksums are performed only in the left-over target input. | ||
67 | However, small matches are possible in the source file too, with a | ||
68 | range of possibilities. [I've seen a paper on this subject, but | ||
69 | I lost it.] | ||
70 | |||
71 | 2. INSTRUCTION SELECTION. The IOPT buffer here represents a queue | ||
72 | used to store overlapping copy instructions. There are two possible | ||
73 | optimizations that go beyond a greedy search. Both of these fall | ||
74 | into the category of "non-greedy matching" optimizations. | ||
75 | |||
76 | The first optimization stems from backward SOURCE-COPY matching. | ||
77 | When a new SOURCE-COPY instruction covers a previous instruction in | ||
78 | the target completely, it is erased from the queue. Randal Burns | ||
79 | originally analyzed these algorithms and did a lot of related work | ||
80 | (\cite the 1.5-pass algorithm). | ||
81 | |||
82 | The second optimization comes by the encoding of common very-small | ||
83 | COPY and ADD instructions, for which there are special DOUBLE-code | ||
84 | instructions, which code two instructions in a single byte. | ||
85 | |||
86 | The cost of bad instruction-selection overhead is relatively high | ||
87 | for data-compression, relative to delta-compression, so this second | ||
88 | optimization is fairly important. With "lazy" matching (the name | ||
89 | used in Zlib for a similar optimization), the string-match | ||
90 | algorithm searches after a match for potential overlapping copy | ||
91 | instructions. In Xdelta and by default, VCDIFF, the minimum match | ||
92 | size is 4 bytes, whereas Zlib searches with a 3-byte minimum. This | ||
93 | feature, combined with double instructions, provides a nice | ||
94 | challenge. Search in this file for "black magic", a heuristic. | ||
95 | |||
96 | 3. STREAM ALIGNMENT. Stream alignment is needed to compress large | ||
97 | inputs in constant space. TODO: redocument | ||
98 | |||
99 | 4. WINDOW SELECTION. When the IOPT buffer flushes, in the first call | ||
100 | to xd3_iopt_finish_encoding containing any kind of copy instruction, | ||
101 | the parameters of the source window must be decided: the offset into | ||
102 | the source and the length of the window. Since the IOPT buffer is | ||
103 | finite, the program may be forced to fix these values before knowing | ||
104 | the best offset/length. XD3_DEFAULT_SRCBACK limits the length, but a | ||
105 | smaller length is preferred because all target copies are addressed | ||
106 | after source copies in the VCDIFF address space. Picking too large a | ||
107 | source window means larger address encoding. | ||
108 | |||
109 | If the IOPT buffer is filling easily, perhaps the target window is | ||
110 | too large. In any case, a decision is made (though an alternative is | ||
111 | to emit the sub-window right away, to reduce the winsize | ||
112 | automatically - not implemented, another alternative is to grow the | ||
113 | IOPT buffer, it is after all bounded in size by winsize.) | ||
114 | |||
115 | The algorithm is in xd3_srcwin_setup. | ||
116 | |||
117 | 5. SECONDARY COMPRESSION. VCDIFF supports a secondary encoding to | ||
118 | be applied to the individual sections of the data format, which are | ||
119 | ADDRess, INSTruction, and DATA. Several secondary compressor | ||
120 | variations are implemented here, although none is standardized yet. | ||
121 | |||
122 | One is an adaptive huffman algorithm -- the FGK algorithm (Faller, | ||
123 | Gallager, and Knuth, 1985). This compressor is extremely slow. | ||
124 | |||
125 | The other is a simple static Huffman routine, which is the base | ||
126 | case of a semi-adaptive scheme published by D.J. Wheeler and first | ||
127 | widely used in bzip2 (by Julian Seward). This is a very | ||
128 | interesting algorithm, originally published in nearly cryptic form | ||
129 | by D.J. Wheeler. !!!NOTE!!! Because these are not standardized, the | ||
130 | -S option (no secondary compression) remains on by default. | ||
131 | ftp://ftp.cl.cam.ac.uk/users/djw3/bred3.{c,ps} | ||
132 | -------------------------------------------------------------------- | ||
133 | |||
134 | Other Features | ||
135 | |||
136 | 1. USER CONVENIENCE | ||
137 | |||
138 | For user convenience, it is essential to recognize Gzip-compressed | ||
139 | files and automatically Gzip-decompress them prior to | ||
140 | delta-compression (or else no delta-compression will be achieved | ||
141 | unless the user manually decompresses the inputs). The compressed | ||
142 | represention competes with Xdelta, and this must be hidden from the | ||
143 | command-line user interface. The Xdelta-1.x encoding was simple, not | ||
144 | compressed itself, so Xdelta-1.x uses Zlib internally to compress the | ||
145 | representation. | ||
146 | |||
147 | This implementation supports external compression, which implements | ||
148 | the necessary fork() and pipe() mechanics. There is a tricky step | ||
149 | involved to support automatic detection of a compressed input in a | ||
150 | non-seekable input. First you read a bit of the input to detect | ||
151 | magic headers. When a compressed format is recognized, exec() the | ||
152 | external compression program and create a second child process to | ||
153 | copy the original input stream. [Footnote: There is a difficulty | ||
154 | related to using Gzip externally. It is not possible to decompress | ||
155 | and recompress a Gzip file transparently. If FILE.GZ had a | ||
156 | cryptographic signature, then, after: (1) Gzip-decompression, (2) | ||
157 | Xdelta-encoding, (3) Gzip-compression the signature could be | ||
158 | broken. The only way to solve this problem is to guess at Gzip's | ||
159 | compression level or control it by other means. I recommend that | ||
160 | specific implementations of any compression scheme store | ||
161 | information needed to exactly re-compress the input, that way | ||
162 | external compression is transparent - however, this won't happen | ||
163 | here until it has stabilized.] | ||
164 | |||
165 | 2. APPLICATION-HEADER | ||
166 | |||
167 | This feature was introduced in RFC3284. It allows any application | ||
168 | to include a header within the VCDIFF file format. This allows | ||
169 | general inter-application data exchange with support for | ||
170 | application-specific extensions to communicate metadata. | ||
171 | |||
172 | 3. VCDIFF CHECKSUM | ||
173 | |||
174 | An optional checksum value is included with each window, which can | ||
175 | be used to validate the final result. This verifies the correct source | ||
176 | file was used for decompression as well as the obvious advantage: | ||
177 | checking the implementation (and underlying) correctness. | ||
178 | |||
179 | 4. LIGHT WEIGHT | ||
180 | |||
181 | The code makes efforts to avoid copying data more than necessary. | ||
182 | The code delays many initialization tasks until the first use, it | ||
183 | optimizes for identical (perfectly matching) inputs. It does not | ||
184 | compute any checksums until the first lookup misses. Memory usage | ||
185 | is reduced. String-matching is templatized (by slightly gross use | ||
186 | of CPP) to hard-code alternative compile-time defaults. The code | ||
187 | has few outside dependencies. | ||
188 | ---------------------------------------------------------------------- | ||
189 | |||
190 | The default rfc3284 instruction table: | ||
191 | (see RFC for the explanation) | ||
192 | |||
193 | TYPE SIZE MODE TYPE SIZE MODE INDEX | ||
194 | -------------------------------------------------------------------- | ||
195 | 1. Run 0 0 Noop 0 0 0 | ||
196 | 2. Add 0, [1,17] 0 Noop 0 0 [1,18] | ||
197 | 3. Copy 0, [4,18] 0 Noop 0 0 [19,34] | ||
198 | 4. Copy 0, [4,18] 1 Noop 0 0 [35,50] | ||
199 | 5. Copy 0, [4,18] 2 Noop 0 0 [51,66] | ||
200 | 6. Copy 0, [4,18] 3 Noop 0 0 [67,82] | ||
201 | 7. Copy 0, [4,18] 4 Noop 0 0 [83,98] | ||
202 | 8. Copy 0, [4,18] 5 Noop 0 0 [99,114] | ||
203 | 9. Copy 0, [4,18] 6 Noop 0 0 [115,130] | ||
204 | 10. Copy 0, [4,18] 7 Noop 0 0 [131,146] | ||
205 | 11. Copy 0, [4,18] 8 Noop 0 0 [147,162] | ||
206 | 12. Add [1,4] 0 Copy [4,6] 0 [163,174] | ||
207 | 13. Add [1,4] 0 Copy [4,6] 1 [175,186] | ||
208 | 14. Add [1,4] 0 Copy [4,6] 2 [187,198] | ||
209 | 15. Add [1,4] 0 Copy [4,6] 3 [199,210] | ||
210 | 16. Add [1,4] 0 Copy [4,6] 4 [211,222] | ||
211 | 17. Add [1,4] 0 Copy [4,6] 5 [223,234] | ||
212 | 18. Add [1,4] 0 Copy 4 6 [235,238] | ||
213 | 19. Add [1,4] 0 Copy 4 7 [239,242] | ||
214 | 20. Add [1,4] 0 Copy 4 8 [243,246] | ||
215 | 21. Copy 4 [0,8] Add 1 0 [247,255] | ||
216 | -------------------------------------------------------------------- | ||
217 | |||
218 | Reading the source: Overview | ||
219 | |||
220 | This file includes itself in several passes to macro-expand certain | ||
221 | sections with variable forms. Just read ahead, there's only a | ||
222 | little confusion. I know this sounds ugly, but hard-coding some of | ||
223 | the string-matching parameters results in a 10-15% increase in | ||
224 | string-match performance. The only time this hurts is when you have | ||
225 | unbalanced #if/endifs. | ||
226 | |||
227 | A single compilation unit tames the Makefile. In short, this is to | ||
228 | allow the above-described hack without an explodingMakefile. The | ||
229 | single compilation unit includes the core library features, | ||
230 | configurable string-match templates, optional main() command-line | ||
231 | tool, misc optional features, and a regression test. Features are | ||
232 | controled with CPP #defines, see Makefile.am. | ||
233 | |||
234 | The initial __XDELTA3_C_HEADER_PASS__ starts first, the INLINE and | ||
235 | TEMPLATE sections follow. Easy stuff first, hard stuff last. | ||
236 | |||
237 | Optional features include: | ||
238 | |||
239 | xdelta3-main.h The command-line interface, external compression | ||
240 | support, POSIX-specific, info & VCDIFF-debug tools. | ||
241 | xdelta3-second.h The common secondary compression routines. | ||
242 | xdelta3-djw.h The semi-adaptive huffman secondary encoder. | ||
243 | xdelta3-fgk.h The adaptive huffman secondary encoder. | ||
244 | xdelta3-test.h The unit test covers major algorithms, | ||
245 | encoding and decoding. There are single-bit | ||
246 | error decoding tests. There are 32/64-bit file size | ||
247 | boundary tests. There are command-line tests. | ||
248 | There are compression tests. There are external | ||
249 | compression tests. There are string-matching tests. | ||
250 | There should be more tests... | ||
251 | |||
252 | Additional headers include: | ||
253 | |||
254 | xdelta3.h The public header file. | ||
255 | xdelta3-cfgs.h The default settings for default, built-in | ||
256 | encoders. These are hard-coded at | ||
257 | compile-time. There is also a single | ||
258 | soft-coded string matcher for experimenting | ||
259 | with arbitrary values. | ||
260 | xdelta3-list.h A cyclic list template | ||
261 | |||
262 | Misc little debug utilities: | ||
263 | |||
264 | badcopy.c Randomly modifies an input file based on two | ||
265 | parameters: (1) the probability that a byte in | ||
266 | the file is replaced with a pseudo-random value, | ||
267 | and (2) the mean change size. Changes are | ||
268 | generated using an expoential distribution | ||
269 | which approximates the expected error_prob | ||
270 | distribution. | ||
271 | show.c Prints an offset/length segment from a file. | ||
272 | testh.c Checks that xdelta3.h is can be #included | ||
273 | -------------------------------------------------------------------- | ||
274 | |||
275 | This file itself is unusually large. I hope to defend this layout | ||
276 | with lots of comments. Everything in this file is related to | ||
277 | encoding and decoding. I like it all together - the template stuff | ||
278 | is just a hack. */ | ||
279 | |||
280 | #ifndef __XDELTA3_C_HEADER_PASS__ | ||
281 | #define __XDELTA3_C_HEADER_PASS__ | ||
282 | |||
283 | #include <errno.h> | ||
284 | #include <string.h> | ||
285 | |||
286 | #include "xdelta3.h" | ||
287 | |||
288 | /****************************************************************************************** | ||
289 | STATIC CONFIGURATION | ||
290 | ******************************************************************************************/ | ||
291 | |||
292 | #ifndef XD3_MAIN /* the main application */ | ||
293 | #define XD3_MAIN 0 | ||
294 | #endif | ||
295 | |||
296 | #ifndef VCDIFF_TOOLS | ||
297 | #define VCDIFF_TOOLS XD3_MAIN | ||
298 | #endif | ||
299 | |||
300 | #ifndef SECONDARY_FGK /* one from the algorithm preservation department: */ | ||
301 | #define SECONDARY_FGK 0 /* adaptive Huffman routines */ | ||
302 | #endif | ||
303 | |||
304 | #ifndef SECONDARY_DJW /* semi-adaptive/static Huffman for the eventual */ | ||
305 | #define SECONDARY_DJW 0 /* standardization, off by default until such time. */ | ||
306 | #endif | ||
307 | |||
308 | #ifndef GENERIC_ENCODE_TABLES /* These three are the RFC-spec'd app-specific */ | ||
309 | #define GENERIC_ENCODE_TABLES 0 /* code features. This is tested but not recommended */ | ||
310 | #endif /* unless there's a real application. */ | ||
311 | #ifndef GENERIC_ENCODE_TABLES_COMPUTE | ||
312 | #define GENERIC_ENCODE_TABLES_COMPUTE 0 | ||
313 | #endif | ||
314 | #ifndef GENERIC_ENCODE_TABLES_COMPUTE_PRINT | ||
315 | #define GENERIC_ENCODE_TABLES_COMPUTE_PRINT 0 | ||
316 | #endif | ||
317 | |||
318 | #if XD3_USE_LARGEFILE64 /* How does everyone else do this? */ | ||
319 | #define Q "q" | ||
320 | #else | ||
321 | #define Q | ||
322 | #endif | ||
323 | |||
324 | #if XD3_ENCODER | ||
325 | #define IF_ENCODER(x) x | ||
326 | #else | ||
327 | #define IF_ENCODER(x) | ||
328 | #endif | ||
329 | |||
330 | /******************************************************************************************/ | ||
331 | |||
332 | typedef enum { | ||
333 | |||
334 | /* header indicator bits */ | ||
335 | VCD_SECONDARY = (1 << 0), /* uses secondary compressor */ | ||
336 | VCD_CODETABLE = (1 << 1), /* supplies code table data */ | ||
337 | VCD_APPHEADER = (1 << 2), /* supplies application data */ | ||
338 | VCD_INVHDR = ~7U, | ||
339 | |||
340 | /* window indicator bits */ | ||
341 | VCD_SOURCE = (1 << 0), /* copy window in source file */ | ||
342 | VCD_TARGET = (1 << 1), /* copy window in target file */ | ||
343 | VCD_ADLER32 = (1 << 2), /* has adler32 checksum */ | ||
344 | VCD_INVWIN = ~7U, | ||
345 | |||
346 | VCD_SRCORTGT = VCD_SOURCE | VCD_TARGET, | ||
347 | |||
348 | /* delta indicator bits */ | ||
349 | VCD_DATACOMP = (1 << 0), | ||
350 | VCD_INSTCOMP = (1 << 1), | ||
351 | VCD_ADDRCOMP = (1 << 2), | ||
352 | VCD_INVDEL = ~0x7U, | ||
353 | |||
354 | } xd3_indicator; | ||
355 | |||
356 | typedef enum { | ||
357 | VCD_DJW_ID = 1, | ||
358 | VCD_FGK_ID = 16, /* !!!Note: these are not a standard IANA-allocated ID!!! */ | ||
359 | } xd3_secondary_ids; | ||
360 | |||
361 | typedef enum { | ||
362 | SEC_NOFLAGS = 0, | ||
363 | SEC_COUNT_FREQS = (1 << 0), /* OPT: Not implemented: Could eliminate first pass of Huffman... */ | ||
364 | } xd3_secondary_flags; | ||
365 | |||
366 | typedef enum { | ||
367 | DATA_SECTION, /* These indicate which section to the secondary compressor. */ | ||
368 | INST_SECTION, /* The header section is not compressed, therefore not listed here. */ | ||
369 | ADDR_SECTION, | ||
370 | } xd3_section_type; | ||
371 | |||
372 | typedef enum | ||
373 | { | ||
374 | XD3_NOOP = 0, | ||
375 | XD3_ADD = 1, | ||
376 | XD3_RUN = 2, | ||
377 | XD3_CPY = 3, /* XD3_CPY rtypes are represented as (XD3_CPY + copy-mode value) */ | ||
378 | } xd3_rtype; | ||
379 | |||
380 | /******************************************************************************************/ | ||
381 | |||
382 | #include "xdelta3-list.h" | ||
383 | |||
384 | XD3_MAKELIST(xd3_rlist, xd3_rinst, link); | ||
385 | |||
386 | /******************************************************************************************/ | ||
387 | |||
388 | #ifndef unlikely /* The unlikely macro - any good? */ | ||
389 | #if defined(__GNUC__) && __GNUC__ >= 3 | ||
390 | #define unlikely(x) __builtin_expect((x),0) | ||
391 | #define likely(x) __builtin_expect((x),1) | ||
392 | #else | ||
393 | #define unlikely(x) (x) | ||
394 | #define likely(x) (x) | ||
395 | #endif | ||
396 | #endif | ||
397 | |||
398 | #define SECONDARY_MIN_SAVINGS 2 /* Secondary compression has to save at least this many bytes. */ | ||
399 | #define SECONDARY_MIN_INPUT 10 /* Secondary compression needs at least this many bytes. */ | ||
400 | |||
401 | #define VCDIFF_MAGIC1 0xd6 /* 1st file byte */ | ||
402 | #define VCDIFF_MAGIC2 0xc3 /* 2nd file byte */ | ||
403 | #define VCDIFF_MAGIC3 0xc4 /* 3rd file byte */ | ||
404 | #define VCDIFF_VERSION 0x00 /* 4th file byte */ | ||
405 | |||
406 | #define VCD_SELF 0 /* 1st address mode */ | ||
407 | #define VCD_HERE 1 /* 2nd address mode */ | ||
408 | |||
409 | #define CODE_TABLE_STRING_SIZE (6 * 256) /* Should fit a code table string. */ | ||
410 | #define CODE_TABLE_VCDIFF_SIZE (6 * 256) /* Should fit a compressed code table string */ | ||
411 | |||
412 | #define SECONDARY_ANY (SECONDARY_DJW || SECONDARY_FGK) /* True if any secondary compressor is used. */ | ||
413 | |||
414 | #define ALPHABET_SIZE 256 /* Used in test code--size of the secondary compressor alphabet. */ | ||
415 | |||
416 | #define HASH_PRIME 0 /* Old hashing experiments */ | ||
417 | #define HASH_PERMUTE 1 | ||
418 | #define ARITH_SMALL_CKSUM 1 | ||
419 | |||
420 | #define HASH_CKOFFSET 1U /* Table entries distinguish "no-entry" from offset 0 using this offset. */ | ||
421 | |||
422 | #define MIN_SMALL_LOOK 2U /* Match-optimization stuff. */ | ||
423 | #define MIN_LARGE_LOOK 2U | ||
424 | #define MIN_MATCH_OFFSET 1U | ||
425 | #define MAX_MATCH_SPLIT 18U /* VCDIFF code table: 18 is the default limit for direct-coded ADD sizes */ | ||
426 | |||
427 | #define LEAST_MATCH_INCR 0 /* The least number of bytes an overlapping match must beat | ||
428 | * the preceding match by. This is a bias for the lazy | ||
429 | * match optimization. A non-zero value means that an | ||
430 | * adjacent match has to be better by more than the step | ||
431 | * between them. 0. */ | ||
432 | |||
433 | #define MIN_MATCH 4U /* VCDIFF code table: MIN_MATCH=4 */ | ||
434 | #define MIN_ADD 1U /* 1 */ | ||
435 | #define MIN_RUN 8U /* The shortest run, if it is shorter than this an immediate | ||
436 | * add/copy will be just as good. ADD1/COPY6 = 1I+1D+1A bytes, | ||
437 | * RUN18 = 1I+1D+1A. */ | ||
438 | |||
439 | #define MAX_MODES 9 /* Maximum number of nodes used for compression--does not limit decompression. */ | ||
440 | |||
441 | #define ENC_SECTS 4 /* Number of separate output sections. */ | ||
442 | |||
443 | #define HDR_TAIL(s) (stream->enc_tails[0]) | ||
444 | #define DATA_TAIL(s) (stream->enc_tails[1]) | ||
445 | #define INST_TAIL(s) (stream->enc_tails[2]) | ||
446 | #define ADDR_TAIL(s) (stream->enc_tails[3]) | ||
447 | |||
448 | #define HDR_HEAD(s) (stream->enc_heads[0]) | ||
449 | #define DATA_HEAD(s) (stream->enc_heads[1]) | ||
450 | #define INST_HEAD(s) (stream->enc_heads[2]) | ||
451 | #define ADDR_HEAD(s) (stream->enc_heads[3]) | ||
452 | |||
453 | #define SIZEOF_ARRAY(x) (sizeof(x) / sizeof(x[0])) | ||
454 | |||
455 | #define TOTAL_MODES(x) (2+(x)->acache.s_same+(x)->acache.s_near) | ||
456 | |||
457 | /* Template instances. */ | ||
458 | #if XD3_BUILD_SLOW | ||
459 | #define IF_BUILD_SLOW(x) x | ||
460 | #else | ||
461 | #define IF_BUILD_SLOW(x) | ||
462 | #endif | ||
463 | #if XD3_BUILD_FAST | ||
464 | #define IF_BUILD_FAST(x) x | ||
465 | #else | ||
466 | #define IF_BUILD_FAST(x) | ||
467 | #endif | ||
468 | #if XD3_BUILD_SOFT | ||
469 | #define IF_BUILD_SOFT(x) x | ||
470 | #else | ||
471 | #define IF_BUILD_SOFT(x) | ||
472 | #endif | ||
473 | |||
474 | IF_BUILD_SOFT(static const xd3_smatcher __smatcher_soft;) | ||
475 | IF_BUILD_FAST(static const xd3_smatcher __smatcher_fast;) | ||
476 | IF_BUILD_SLOW(static const xd3_smatcher __smatcher_slow;) | ||
477 | |||
478 | #if XD3_DEBUG | ||
479 | #define SMALL_HASH_DEBUG1(s,inp) \ | ||
480 | usize_t debug_hval = xd3_checksum_hash (& (s)->small_hash, \ | ||
481 | xd3_scksum ((inp), (s)->small_look)) | ||
482 | #define SMALL_HASH_DEBUG2(s,inp) \ | ||
483 | XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \ | ||
484 | xd3_scksum ((inp), (s)->small_look))) | ||
485 | #define SMALL_HASH_STATS(x) x | ||
486 | #else | ||
487 | #define SMALL_HASH_DEBUG1(s,inp) | ||
488 | #define SMALL_HASH_DEBUG2(s,inp) | ||
489 | #define SMALL_HASH_STATS(x) | ||
490 | #endif /* XD3_DEBUG */ | ||
491 | |||
492 | /* Config fields: three structures contain these variables, so this is non-typed. */ | ||
493 | #define XD3_COPY_CONFIG_FIELDS(dst,src) \ | ||
494 | do { \ | ||
495 | (dst)->large_look = (src)->large_look; \ | ||
496 | (dst)->large_step = (src)->large_step; \ | ||
497 | (dst)->small_look = (src)->small_look; \ | ||
498 | (dst)->small_chain = (src)->small_chain; \ | ||
499 | (dst)->small_lchain = (src)->small_lchain; \ | ||
500 | (dst)->ssmatch = (src)->ssmatch; \ | ||
501 | (dst)->try_lazy = (src)->try_lazy; \ | ||
502 | (dst)->max_lazy = (src)->max_lazy; \ | ||
503 | (dst)->long_enough = (src)->long_enough; \ | ||
504 | (dst)->promote = (src)->promote; \ | ||
505 | } while (0) | ||
506 | |||
507 | /* Update the run-length state */ | ||
508 | #define NEXTRUN(c) do { if ((c) == run_c) { run_l += 1; } else { run_c = (c); run_l = 1; } } while (0) | ||
509 | |||
510 | /* Update the checksum state. */ | ||
511 | #define LARGE_CKSUM_UPDATE(cksum,base,look) \ | ||
512 | do { \ | ||
513 | uint32_t old_c = PERMUTE((base)[0]); \ | ||
514 | uint32_t new_c = PERMUTE((base)[(look)]); \ | ||
515 | uint32_t low = (((cksum) & 0xffff) - old_c + new_c) & 0xffff; \ | ||
516 | uint32_t high = (((cksum) >> 16) - (old_c * (look)) + low) & 0xffff; \ | ||
517 | (cksum) = (high << 16) | low; \ | ||
518 | } while (0) | ||
519 | |||
520 | /* Multiply and add hash function */ | ||
521 | #if ARITH_SMALL_CKSUM | ||
522 | #define SMALL_CKSUM_UPDATE(cksum,base,look) (cksum) = ((*(unsigned long*)(base+1)) * 71143) | ||
523 | #else | ||
524 | #define SMALL_CKSUM_UPDATE LARGE_CKSUM_UPDATE | ||
525 | #endif | ||
526 | |||
527 | /* Consume N bytes of input, only used by the decoder. */ | ||
528 | #define DECODE_INPUT(n) \ | ||
529 | do { \ | ||
530 | stream->total_in += (xoff_t) (n); \ | ||
531 | stream->avail_in -= (n); \ | ||
532 | stream->next_in += (n); \ | ||
533 | } while (0) | ||
534 | |||
535 | /* This CPP-conditional stuff can be cleaned up... */ | ||
536 | #if XD3_DEBUG | ||
537 | #define IF_DEBUG(x) x | ||
538 | #define DEBUG_ARG(x) , x | ||
539 | #else | ||
540 | #define IF_DEBUG(x) | ||
541 | #define DEBUG_ARG(x) | ||
542 | #endif | ||
543 | #if XD3_DEBUG > 1 | ||
544 | #define IF_DEBUG1(x) x | ||
545 | #else | ||
546 | #define IF_DEBUG1(x) | ||
547 | #endif | ||
548 | #if REGRESSION_TEST | ||
549 | #define IF_REGRESSION(x) x | ||
550 | #else | ||
551 | #define IF_REGRESSION(x) | ||
552 | #endif | ||
553 | |||
554 | /******************************************************************************************/ | ||
555 | |||
556 | #if XD3_ENCODER | ||
557 | static void* xd3_alloc0 (xd3_stream *stream, | ||
558 | usize_t elts, | ||
559 | usize_t size); | ||
560 | |||
561 | |||
562 | static xd3_output* xd3_alloc_output (xd3_stream *stream, | ||
563 | xd3_output *old_output); | ||
564 | |||
565 | |||
566 | |||
567 | static void xd3_free_output (xd3_stream *stream, | ||
568 | xd3_output *output); | ||
569 | |||
570 | static int xd3_emit_byte (xd3_stream *stream, | ||
571 | xd3_output **outputp, | ||
572 | uint8_t code); | ||
573 | |||
574 | static int xd3_emit_bytes (xd3_stream *stream, | ||
575 | xd3_output **outputp, | ||
576 | const uint8_t *base, | ||
577 | usize_t size); | ||
578 | |||
579 | static int xd3_emit_double (xd3_stream *stream, xd3_rinst *first, xd3_rinst *second, uint code); | ||
580 | static int xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code); | ||
581 | |||
582 | static usize_t xd3_sizeof_output (xd3_output *output); | ||
583 | |||
584 | static int xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos); | ||
585 | static int xd3_source_extend_match (xd3_stream *stream); | ||
586 | static int xd3_srcwin_setup (xd3_stream *stream); | ||
587 | static int xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point); | ||
588 | static usize_t xd3_iopt_last_matched (xd3_stream *stream); | ||
589 | static int xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num); | ||
590 | |||
591 | #endif /* XD3_ENCODER */ | ||
592 | |||
593 | static int xd3_decode_allocate (xd3_stream *stream, usize_t size, | ||
594 | uint8_t **copied1, usize_t *alloc1, | ||
595 | uint8_t **copied2, usize_t *alloc2); | ||
596 | |||
597 | static void xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str); | ||
598 | static void* xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size); | ||
599 | static void xd3_free (xd3_stream *stream, void *ptr); | ||
600 | |||
601 | static int xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, | ||
602 | const uint8_t *max, uint32_t *valp); | ||
603 | |||
604 | #if REGRESSION_TEST | ||
605 | static int xd3_selftest (void); | ||
606 | #endif | ||
607 | |||
608 | /******************************************************************************************/ | ||
609 | |||
610 | #define UINT32_OFLOW_MASK 0xfe000000U | ||
611 | #define UINT64_OFLOW_MASK 0xfe00000000000000ULL | ||
612 | |||
613 | #define UINT32_MAX 4294967295U | ||
614 | #define UINT64_MAX 18446744073709551615ULL | ||
615 | |||
616 | #if SIZEOF_USIZE_T == 4 | ||
617 | #define USIZE_T_MAX UINT32_MAX | ||
618 | #define xd3_decode_size xd3_decode_uint32_t | ||
619 | #define xd3_emit_size xd3_emit_uint32_t | ||
620 | #define xd3_sizeof_size xd3_sizeof_uint32_t | ||
621 | #define xd3_read_size xd3_read_uint32_t | ||
622 | #elif SIZEOF_USIZE_T == 8 | ||
623 | #define USIZE_T_MAX UINT64_MAX | ||
624 | #define xd3_decode_size xd3_decode_uint64_t | ||
625 | #define xd3_emit_size xd3_emit_uint64_t | ||
626 | #define xd3_sizeof_size xd3_sizeof_uint64_t | ||
627 | #define xd3_read_size xd3_read_uint64_t | ||
628 | #endif | ||
629 | |||
630 | #if SIZEOF_XOFF_T == 4 | ||
631 | #define XOFF_T_MAX UINT32_MAX | ||
632 | #define xd3_decode_offset xd3_decode_uint32_t | ||
633 | //#define xd3_emit_offset xd3_emit_uint32_t | ||
634 | //#define xd3_sizeof_offset xd3_sizeof_uint32_t | ||
635 | //#define xd3_read_offset xd3_read_uint32_t | ||
636 | #elif SIZEOF_XOFF_T == 8 | ||
637 | #define XOFF_T_MAX UINT64_MAX | ||
638 | #define xd3_decode_offset xd3_decode_uint64_t | ||
639 | //#define xd3_emit_offset xd3_emit_uint64_t | ||
640 | //#define xd3_sizeof_offset xd3_sizeof_uint64_t | ||
641 | //#define xd3_read_offset xd3_read_uint64_t | ||
642 | #endif | ||
643 | |||
644 | #define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b)) | ||
645 | #define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b)) | ||
646 | |||
647 | const char* xd3_strerror (int ret) | ||
648 | { | ||
649 | switch (ret) | ||
650 | { | ||
651 | case XD3_INPUT: return "XD3_INPUT"; | ||
652 | case XD3_OUTPUT: return "XD3_OUTPUT"; | ||
653 | case XD3_GETSRCBLK: return "XD3_GETSRCBLK"; | ||
654 | case XD3_GOTHEADER: return "XD3_GOTHEADER"; | ||
655 | case XD3_WINSTART: return "XD3_WINSTART"; | ||
656 | case XD3_WINFINISH: return "XD3_WINFINISH"; | ||
657 | } | ||
658 | return strerror (ret); | ||
659 | } | ||
660 | |||
661 | /******************************************************************************************/ | ||
662 | |||
663 | #if SECONDARY_ANY == 0 | ||
664 | #define IF_SEC(x) | ||
665 | #define IF_NSEC(x) x | ||
666 | #else /* yuck */ | ||
667 | #define IF_SEC(x) x | ||
668 | #define IF_NSEC(x) | ||
669 | #include "xdelta3-second.h" | ||
670 | #endif /* SECONDARY_ANY */ | ||
671 | |||
672 | #if SECONDARY_FGK | ||
673 | #include "xdelta3-fgk.h" | ||
674 | |||
675 | static const xd3_sec_type fgk_sec_type = | ||
676 | { | ||
677 | VCD_FGK_ID, | ||
678 | "FGK Adaptive Huffman", | ||
679 | SEC_NOFLAGS, | ||
680 | (xd3_sec_stream* (*)()) fgk_alloc, | ||
681 | (void (*)()) fgk_destroy, | ||
682 | (void (*)()) fgk_init, | ||
683 | (int (*)()) xd3_decode_fgk, | ||
684 | IF_ENCODER((int (*)()) xd3_encode_fgk) | ||
685 | }; | ||
686 | |||
687 | #define IF_FGK(x) x | ||
688 | #define FGK_CASE(s) \ | ||
689 | s->sec_type = & fgk_sec_type; \ | ||
690 | break; | ||
691 | #else | ||
692 | #define IF_FGK(x) | ||
693 | #define FGK_CASE(s) \ | ||
694 | s->msg = "unavailable secondary compressor: FGK Adaptive Huffman"; \ | ||
695 | return EINVAL; | ||
696 | #endif | ||
697 | |||
698 | #if SECONDARY_DJW | ||
699 | #include "xdelta3-djw.h" | ||
700 | |||
701 | static const xd3_sec_type djw_sec_type = | ||
702 | { | ||
703 | VCD_DJW_ID, | ||
704 | "Static Huffman", | ||
705 | SEC_COUNT_FREQS, | ||
706 | (xd3_sec_stream* (*)()) djw_alloc, | ||
707 | (void (*)()) djw_destroy, | ||
708 | (void (*)()) djw_init, | ||
709 | (int (*)()) xd3_decode_huff, | ||
710 | IF_ENCODER((int (*)()) xd3_encode_huff) | ||
711 | }; | ||
712 | |||
713 | #define IF_DJW(x) x | ||
714 | #define DJW_CASE(s) \ | ||
715 | s->sec_type = & djw_sec_type; \ | ||
716 | break; | ||
717 | #else | ||
718 | #define IF_DJW(x) | ||
719 | #define DJW_CASE(s) \ | ||
720 | s->msg = "unavailable secondary compressor: DJW Static Huffman"; \ | ||
721 | return EINVAL; | ||
722 | #endif | ||
723 | |||
724 | /******************************************************************************************/ | ||
725 | |||
726 | /* Abbreviate frequently referenced fields. */ | ||
727 | #define max_in stream->avail_in | ||
728 | #define pos_in stream->input_position | ||
729 | #define min_match stream->min_match | ||
730 | |||
731 | /* Process the inline pass. */ | ||
732 | #define __XDELTA3_C_INLINE_PASS__ | ||
733 | #include "xdelta3.c" | ||
734 | #undef __XDELTA3_C_INLINE_PASS__ | ||
735 | |||
736 | /* Process template passes - this includes xdelta3.c several times. */ | ||
737 | #define __XDELTA3_C_TEMPLATE_PASS__ | ||
738 | #include "xdelta3-cfgs.h" | ||
739 | #undef __XDELTA3_C_TEMPLATE_PASS__ | ||
740 | |||
741 | #undef max_in | ||
742 | #undef pos_in | ||
743 | #undef min_match | ||
744 | |||
745 | #if XD3_MAIN || PYTHON_MODULE | ||
746 | #include "xdelta3-main.h" | ||
747 | #endif | ||
748 | |||
749 | #if REGRESSION_TEST | ||
750 | #include "xdelta3-test.h" | ||
751 | #endif | ||
752 | |||
753 | #if PYTHON_MODULE | ||
754 | #include "xdelta3-python.h" | ||
755 | #endif | ||
756 | |||
757 | #endif /* __XDELTA3_C_HEADER_PASS__ */ | ||
758 | #ifdef __XDELTA3_C_INLINE_PASS__ | ||
759 | |||
760 | /****************************************************************************************** | ||
761 | Instruction tables | ||
762 | ******************************************************************************************/ | ||
763 | |||
764 | /* The following code implements a parametrized description of the | ||
765 | * code table given above for a few reasons. It is not necessary for | ||
766 | * implementing the standard, to support compression with variable | ||
767 | * tables, so an implementation is only required to know the default | ||
768 | * code table to begin decompression. (If the encoder uses an | ||
769 | * alternate table, the table is included in compressed form inside | ||
770 | * the VCDIFF file.) | ||
771 | * | ||
772 | * Before adding variable-table support there were two functions which | ||
773 | * were hard-coded to the default table above. | ||
774 | * xd3_compute_default_table() would create the default table by | ||
775 | * filling a 256-elt array of xd3_dinst values. The corresponding | ||
776 | * function, xd3_choose_instruction(), would choose an instruction | ||
777 | * based on the hard-coded parameters of the default code table. | ||
778 | * | ||
779 | * Notes: The parametrized code table description here only generates | ||
780 | * tables of a certain regularity similar to the default table by | ||
781 | * allowing to vary the distribution of single- and | ||
782 | * double-instructions and change the number of near and same copy | ||
783 | * modes. More exotic tables are only possible by extending this | ||
784 | * code, but a detailed experiment would need to be carried out first, | ||
785 | * probably using separate code. I would like to experiment with a | ||
786 | * double-copy instruction, for example. | ||
787 | * | ||
788 | * For performance reasons, both the parametrized and non-parametrized | ||
789 | * versions of xd3_choose_instruction remain. The parametrized | ||
790 | * version is only needed for testing multi-table decoding support. | ||
791 | * If ever multi-table encoding is required, this can be optimized by | ||
792 | * compiling static functions for each table. | ||
793 | */ | ||
794 | |||
795 | /* The XD3_CHOOSE_INSTRUCTION calls xd3_choose_instruction with the | ||
796 | * table description when GENERIC_ENCODE_TABLES are in use. The | ||
797 | * IF_GENCODETBL macro enables generic-code-table specific code. */ | ||
798 | #if GENERIC_ENCODE_TABLES | ||
799 | #define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (stream->code_table_desc, prev, inst) | ||
800 | #define IF_GENCODETBL(x) x | ||
801 | #else | ||
802 | #define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (prev, inst) | ||
803 | #define IF_GENCODETBL(x) | ||
804 | #endif | ||
805 | |||
806 | /* This structure maintains information needed by | ||
807 | * xd3_choose_instruction to compute the code for a double instruction | ||
808 | * by first indexing an array of code_table_sizes by copy mode, then | ||
809 | * using (offset + (muliplier * X)) */ | ||
810 | struct _xd3_code_table_sizes { | ||
811 | uint8_t cpy_max; | ||
812 | uint8_t offset; | ||
813 | uint8_t mult; | ||
814 | }; | ||
815 | |||
816 | /* This contains a complete description of a code table. */ | ||
817 | struct _xd3_code_table_desc | ||
818 | { | ||
819 | /* Assumes a single RUN instruction */ | ||
820 | /* Assumes that MIN_MATCH is 4 */ | ||
821 | |||
822 | uint8_t add_sizes; /* Number of immediate-size single adds (default 17) */ | ||
823 | uint8_t near_modes; /* Number of near copy modes (default 4) */ | ||
824 | uint8_t same_modes; /* Number of same copy modes (default 3) */ | ||
825 | uint8_t cpy_sizes; /* Number of immediate-size single copies (default 15) */ | ||
826 | |||
827 | uint8_t addcopy_add_max; /* Maximum add size for an add-copy double instruction, all modes (default 4) */ | ||
828 | uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy double instruction, up through VCD_NEAR modes (default 6) */ | ||
829 | uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy double instruction, VCD_SAME modes (default 4) */ | ||
830 | |||
831 | uint8_t copyadd_add_max; /* Maximum add size for a copy-add double instruction, all modes (default 1) */ | ||
832 | uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add double instruction, up through VCD_NEAR modes (default 4) */ | ||
833 | uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add double instruction, VCD_SAME modes (default 4) */ | ||
834 | |||
835 | xd3_code_table_sizes addcopy_max_sizes[MAX_MODES]; | ||
836 | xd3_code_table_sizes copyadd_max_sizes[MAX_MODES]; | ||
837 | }; | ||
838 | |||
839 | /* The rfc3284 code table is represented: */ | ||
840 | static const xd3_code_table_desc __rfc3284_code_table_desc = { | ||
841 | 17, /* add sizes */ | ||
842 | 4, /* near modes */ | ||
843 | 3, /* same modes */ | ||
844 | 15, /* copy sizes */ | ||
845 | |||
846 | 4, /* add-copy max add */ | ||
847 | 6, /* add-copy max cpy, near */ | ||
848 | 4, /* add-copy max cpy, same */ | ||
849 | |||
850 | 1, /* copy-add max add */ | ||
851 | 4, /* copy-add max cpy, near */ | ||
852 | 4, /* copy-add max cpy, same */ | ||
853 | |||
854 | /* addcopy */ | ||
855 | { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{4,235,1},{4,239,1},{4,243,1} }, | ||
856 | /* copyadd */ | ||
857 | { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1},{4,253,1},{4,254,1},{4,255,1} }, | ||
858 | }; | ||
859 | |||
860 | #if GENERIC_ENCODE_TABLES | ||
861 | /* An alternate code table for testing (5 near, 0 same): | ||
862 | * | ||
863 | * TYPE SIZE MODE TYPE SIZE MODE INDEX | ||
864 | * --------------------------------------------------------------- | ||
865 | * 1. Run 0 0 Noop 0 0 0 | ||
866 | * 2. Add 0, [1,23] 0 Noop 0 0 [1,24] | ||
867 | * 3. Copy 0, [4,20] 0 Noop 0 0 [25,42] | ||
868 | * 4. Copy 0, [4,20] 1 Noop 0 0 [43,60] | ||
869 | * 5. Copy 0, [4,20] 2 Noop 0 0 [61,78] | ||
870 | * 6. Copy 0, [4,20] 3 Noop 0 0 [79,96] | ||
871 | * 7. Copy 0, [4,20] 4 Noop 0 0 [97,114] | ||
872 | * 8. Copy 0, [4,20] 5 Noop 0 0 [115,132] | ||
873 | * 9. Copy 0, [4,20] 6 Noop 0 0 [133,150] | ||
874 | * 10. Add [1,4] 0 Copy [4,6] 0 [151,162] | ||
875 | * 11. Add [1,4] 0 Copy [4,6] 1 [163,174] | ||
876 | * 12. Add [1,4] 0 Copy [4,6] 2 [175,186] | ||
877 | * 13. Add [1,4] 0 Copy [4,6] 3 [187,198] | ||
878 | * 14. Add [1,4] 0 Copy [4,6] 4 [199,210] | ||
879 | * 15. Add [1,4] 0 Copy [4,6] 5 [211,222] | ||
880 | * 16. Add [1,4] 0 Copy [4,6] 6 [223,234] | ||
881 | * 17. Copy 4 [0,6] Add [1,3] 0 [235,255] | ||
882 | * --------------------------------------------------------------- */ | ||
883 | static const xd3_code_table_desc __alternate_code_table_desc = { | ||
884 | 23, /* add sizes */ | ||
885 | 5, /* near modes */ | ||
886 | 0, /* same modes */ | ||
887 | 17, /* copy sizes */ | ||
888 | |||
889 | 4, /* add-copy max add */ | ||
890 | 6, /* add-copy max cpy, near */ | ||
891 | 0, /* add-copy max cpy, same */ | ||
892 | |||
893 | 3, /* copy-add max add */ | ||
894 | 4, /* copy-add max cpy, near */ | ||
895 | 0, /* copy-add max cpy, same */ | ||
896 | |||
897 | /* addcopy */ | ||
898 | { {6,151,3},{6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{0,0,0},{0,0,0} }, | ||
899 | /* copyadd */ | ||
900 | { {4,235,1},{4,238,1},{4,241,1},{4,244,1},{4,247,1},{4,250,1},{4,253,1},{0,0,0},{0,0,0} }, | ||
901 | }; | ||
902 | #endif | ||
903 | |||
904 | /* Computes code table entries of TBL using the specified description. */ | ||
905 | static void | ||
906 | xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl) | ||
907 | { | ||
908 | int size1, size2, mode; | ||
909 | int cpy_modes = 2 + desc->near_modes + desc->same_modes; | ||
910 | xd3_dinst *d = tbl; | ||
911 | |||
912 | (d++)->type1 = XD3_RUN; | ||
913 | (d++)->type1 = XD3_ADD; | ||
914 | |||
915 | for (size1 = 1; size1 <= desc->add_sizes; size1 += 1, d += 1) | ||
916 | { | ||
917 | d->type1 = XD3_ADD; | ||
918 | d->size1 = size1; | ||
919 | } | ||
920 | |||
921 | for (mode = 0; mode < cpy_modes; mode += 1) | ||
922 | { | ||
923 | (d++)->type1 = XD3_CPY + mode; | ||
924 | |||
925 | for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes; size1 += 1, d += 1) | ||
926 | { | ||
927 | d->type1 = XD3_CPY + mode; | ||
928 | d->size1 = size1; | ||
929 | } | ||
930 | } | ||
931 | |||
932 | for (mode = 0; mode < cpy_modes; mode += 1) | ||
933 | { | ||
934 | for (size1 = 1; size1 <= desc->addcopy_add_max; size1 += 1) | ||
935 | { | ||
936 | int max = (mode < 2 + desc->near_modes) ? desc->addcopy_near_cpy_max : desc->addcopy_same_cpy_max; | ||
937 | |||
938 | for (size2 = MIN_MATCH; size2 <= max; size2 += 1, d += 1) | ||
939 | { | ||
940 | d->type1 = XD3_ADD; | ||
941 | d->size1 = size1; | ||
942 | d->type2 = XD3_CPY + mode; | ||
943 | d->size2 = size2; | ||
944 | } | ||
945 | } | ||
946 | } | ||
947 | |||
948 | for (mode = 0; mode < cpy_modes; mode += 1) | ||
949 | { | ||
950 | int max = (mode < 2 + desc->near_modes) ? desc->copyadd_near_cpy_max : desc->copyadd_same_cpy_max; | ||
951 | |||
952 | for (size1 = MIN_MATCH; size1 <= max; size1 += 1) | ||
953 | { | ||
954 | for (size2 = 1; size2 <= desc->copyadd_add_max; size2 += 1, d += 1) | ||
955 | { | ||
956 | d->type1 = XD3_CPY + mode; | ||
957 | d->size1 = size1; | ||
958 | d->type2 = XD3_ADD; | ||
959 | d->size2 = size2; | ||
960 | } | ||
961 | } | ||
962 | } | ||
963 | |||
964 | XD3_ASSERT (d - tbl == 256); | ||
965 | } | ||
966 | |||
967 | /* This function generates the static default code table. */ | ||
968 | static const xd3_dinst* | ||
969 | xd3_rfc3284_code_table (void) | ||
970 | { | ||
971 | static xd3_dinst __rfc3284_code_table[256]; | ||
972 | |||
973 | if (__rfc3284_code_table[0].type1 != XD3_RUN) | ||
974 | { | ||
975 | xd3_build_code_table (& __rfc3284_code_table_desc, __rfc3284_code_table); | ||
976 | } | ||
977 | |||
978 | return __rfc3284_code_table; | ||
979 | } | ||
980 | |||
981 | #if XD3_ENCODER | ||
982 | #if GENERIC_ENCODE_TABLES | ||
983 | /* This function generates the alternate code table. */ | ||
984 | static const xd3_dinst* | ||
985 | xd3_alternate_code_table (void) | ||
986 | { | ||
987 | static xd3_dinst __alternate_code_table[256]; | ||
988 | |||
989 | if (__alternate_code_table[0].type1 != XD3_RUN) | ||
990 | { | ||
991 | xd3_build_code_table (& __alternate_code_table_desc, __alternate_code_table); | ||
992 | } | ||
993 | |||
994 | return __alternate_code_table; | ||
995 | } | ||
996 | |||
997 | /* This function computes the ideal second instruction INST based on preceding instruction | ||
998 | * PREV. If it is possible to issue a double instruction based on this pair it sets | ||
999 | * PREV->code2, otherwise it sets INST->code1. */ | ||
1000 | static void | ||
1001 | xd3_choose_instruction (const xd3_code_table_desc *desc, xd3_rinst *prev, xd3_rinst *inst) | ||
1002 | { | ||
1003 | switch (inst->type) | ||
1004 | { | ||
1005 | case XD3_RUN: | ||
1006 | /* The 0th instruction is RUN */ | ||
1007 | inst->code1 = 0; | ||
1008 | break; | ||
1009 | |||
1010 | case XD3_ADD: | ||
1011 | |||
1012 | if (inst->size > desc->add_sizes) | ||
1013 | { | ||
1014 | /* The first instruction is non-immediate ADD */ | ||
1015 | inst->code1 = 1; | ||
1016 | } | ||
1017 | else | ||
1018 | { | ||
1019 | /* The following ADD_SIZES instructions are immediate ADDs */ | ||
1020 | inst->code1 = 1 + inst->size; | ||
1021 | |||
1022 | /* Now check for a possible COPY-ADD double instruction */ | ||
1023 | if (prev != NULL) | ||
1024 | { | ||
1025 | int prev_mode = prev->type - XD3_CPY; | ||
1026 | |||
1027 | /* If previous is a copy. Note: as long as the previous is not a RUN | ||
1028 | * instruction, it should be a copy because it cannot be an add. This check | ||
1029 | * is more clear. */ | ||
1030 | if (prev_mode >= 0 && inst->size <= desc->copyadd_add_max) | ||
1031 | { | ||
1032 | const xd3_code_table_sizes *sizes = & desc->copyadd_max_sizes[prev_mode]; | ||
1033 | |||
1034 | /* This check and the inst->size-<= above are == in the default table. */ | ||
1035 | if (prev->size <= sizes->cpy_max) | ||
1036 | { | ||
1037 | /* The second and third exprs are 0 in the default table. */ | ||
1038 | prev->code2 = sizes->offset + (sizes->mult * (prev->size - MIN_MATCH)) + (inst->size - MIN_ADD); | ||
1039 | } | ||
1040 | } | ||
1041 | } | ||
1042 | } | ||
1043 | break; | ||
1044 | |||
1045 | default: | ||
1046 | { | ||
1047 | int mode = inst->type - XD3_CPY; | ||
1048 | |||
1049 | /* The large copy instruction is offset by the run, large add, and immediate adds, | ||
1050 | * then multipled by the number of immediate copies plus one (the large copy) | ||
1051 | * (i.e., if there are 15 immediate copy instructions then there are 16 copy | ||
1052 | * instructions per mode). */ | ||
1053 | inst->code1 = 2 + desc->add_sizes + (1 + desc->cpy_sizes) * mode; | ||
1054 | |||
1055 | /* Now if the copy is short enough for an immediate instruction. */ | ||
1056 | if (inst->size < MIN_MATCH + desc->cpy_sizes) | ||
1057 | { | ||
1058 | inst->code1 += inst->size + 1 - MIN_MATCH; | ||
1059 | |||
1060 | /* Now check for a possible ADD-COPY double instruction. */ | ||
1061 | if ( (prev != NULL) && | ||
1062 | (prev->type == XD3_ADD) && | ||
1063 | (prev->size <= desc->addcopy_add_max) ) | ||
1064 | { | ||
1065 | const xd3_code_table_sizes *sizes = & desc->addcopy_max_sizes[mode]; | ||
1066 | |||
1067 | if (inst->size <= sizes->cpy_max) | ||
1068 | { | ||
1069 | prev->code2 = sizes->offset + (sizes->mult * (prev->size - MIN_ADD)) + (inst->size - MIN_MATCH); | ||
1070 | } | ||
1071 | } | ||
1072 | } | ||
1073 | } | ||
1074 | } | ||
1075 | } | ||
1076 | #else /* GENERIC_ENCODE_TABLES */ | ||
1077 | |||
1078 | /* This version of xd3_choose_instruction is hard-coded for the default table. */ | ||
1079 | static void | ||
1080 | xd3_choose_instruction (/* const xd3_code_table_desc *desc,*/ xd3_rinst *prev, xd3_rinst *inst) | ||
1081 | { | ||
1082 | switch (inst->type) | ||
1083 | { | ||
1084 | case XD3_RUN: | ||
1085 | inst->code1 = 0; | ||
1086 | break; | ||
1087 | |||
1088 | case XD3_ADD: | ||
1089 | inst->code1 = 1; | ||
1090 | |||
1091 | if (inst->size <= 17) | ||
1092 | { | ||
1093 | inst->code1 += inst->size; | ||
1094 | |||
1095 | if ( (inst->size == 1) && | ||
1096 | (prev != NULL) && | ||
1097 | (prev->size == 4) && | ||
1098 | (prev->type >= XD3_CPY) ) | ||
1099 | { | ||
1100 | prev->code2 = 247 + (prev->type - XD3_CPY); | ||
1101 | } | ||
1102 | } | ||
1103 | |||
1104 | break; | ||
1105 | |||
1106 | default: | ||
1107 | { | ||
1108 | int mode = inst->type - XD3_CPY; | ||
1109 | |||
1110 | XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12); | ||
1111 | |||
1112 | inst->code1 = 19 + 16 * mode; | ||
1113 | |||
1114 | if (inst->size <= 18) | ||
1115 | { | ||
1116 | inst->code1 += inst->size - 3; | ||
1117 | |||
1118 | if ( (prev != NULL) && | ||
1119 | (prev->type == XD3_ADD) && | ||
1120 | (prev->size <= 4) ) | ||
1121 | { | ||
1122 | if ( (inst->size <= 6) && | ||
1123 | (mode <= 5) ) | ||
1124 | { | ||
1125 | prev->code2 = 163 + (mode * 12) + (3 * (prev->size - 1)) + (inst->size - 4); | ||
1126 | |||
1127 | XD3_ASSERT (prev->code2 <= 234); | ||
1128 | } | ||
1129 | else if ( (inst->size == 4) && | ||
1130 | (mode >= 6) ) | ||
1131 | { | ||
1132 | prev->code2 = 235 + ((mode - 6) * 4) + (prev->size - 1); | ||
1133 | |||
1134 | XD3_ASSERT (prev->code2 <= 246); | ||
1135 | } | ||
1136 | } | ||
1137 | } | ||
1138 | |||
1139 | XD3_ASSERT (inst->code1 <= 162); | ||
1140 | } | ||
1141 | break; | ||
1142 | } | ||
1143 | } | ||
1144 | #endif /* GENERIC_ENCODE_TABLES */ | ||
1145 | |||
1146 | /****************************************************************************************** | ||
1147 | Instruction table encoder/decoder | ||
1148 | ******************************************************************************************/ | ||
1149 | |||
1150 | #if GENERIC_ENCODE_TABLES | ||
1151 | #if GENERIC_ENCODE_TABLES_COMPUTE == 0 | ||
1152 | |||
1153 | /* In this case, we hard-code the result of compute_code_table_encoding for each alternate | ||
1154 | * code table, presuming that saves time/space. This has been 131 bytes, but secondary | ||
1155 | * compression was turned off. */ | ||
1156 | static const uint8_t __alternate_code_table_compressed[178] = | ||
1157 | {0xd6,0xc3,0xc4,0x00,0x00,0x01,0x8a,0x6f,0x40,0x81,0x27,0x8c,0x00,0x00,0x4a,0x4a,0x0d,0x02,0x01,0x03, | ||
1158 | 0x01,0x03,0x00,0x01,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e, | ||
1159 | 0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x00,0x01,0x01,0x01,0x02,0x02,0x02,0x03,0x03,0x03,0x04, | ||
1160 | 0x04,0x04,0x04,0x00,0x04,0x05,0x06,0x01,0x02,0x03,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x05,0x05,0x05, | ||
1161 | 0x06,0x06,0x06,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x00,0x02,0x00,0x18,0x13,0x63,0x00,0x1b,0x00,0x54, | ||
1162 | 0x00,0x15,0x23,0x6f,0x00,0x28,0x13,0x54,0x00,0x15,0x01,0x1a,0x31,0x23,0x6c,0x0d,0x23,0x48,0x00,0x15, | ||
1163 | 0x93,0x6f,0x00,0x28,0x04,0x23,0x51,0x04,0x32,0x00,0x2b,0x00,0x12,0x00,0x12,0x00,0x12,0x00,0x12,0x00, | ||
1164 | 0x12,0x00,0x12,0x53,0x57,0x9c,0x07,0x43,0x6f,0x00,0x34,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00, | ||
1165 | 0x0c,0x00,0x0c,0x00,0x15,0x00,0x82,0x6f,0x00,0x15,0x12,0x0c,0x00,0x03,0x03,0x00,0x06,0x00,}; | ||
1166 | |||
1167 | static int | ||
1168 | xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size) | ||
1169 | { | ||
1170 | (*data) = __alternate_code_table_compressed; | ||
1171 | (*size) = sizeof (__alternate_code_table_compressed); | ||
1172 | return 0; | ||
1173 | } | ||
1174 | |||
1175 | #else | ||
1176 | |||
1177 | /* The alternate code table will be computed and stored here. */ | ||
1178 | static uint8_t __alternate_code_table_compressed[CODE_TABLE_VCDIFF_SIZE]; | ||
1179 | static usize_t __alternate_code_table_compressed_size; | ||
1180 | |||
1181 | /* This function generates a delta describing the code table for encoding within a VCDIFF | ||
1182 | * file. This function is NOT thread safe because it is only intended that this function | ||
1183 | * is used to generate statically-compiled strings. */ | ||
1184 | int xd3_compute_code_table_encoding (xd3_stream *in_stream, const xd3_dinst *code_table, | ||
1185 | uint8_t *comp_string, usize_t *comp_string_size) | ||
1186 | { | ||
1187 | uint8_t dflt_string[CODE_TABLE_STRING_SIZE]; | ||
1188 | uint8_t code_string[CODE_TABLE_STRING_SIZE]; | ||
1189 | xd3_stream stream; | ||
1190 | xd3_source source; | ||
1191 | xd3_config config; | ||
1192 | int ret; | ||
1193 | |||
1194 | memset (& source, 0, sizeof (source)); | ||
1195 | |||
1196 | xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string); | ||
1197 | xd3_compute_code_table_string (code_table, code_string); | ||
1198 | |||
1199 | /* Use DJW secondary compression if it is on by default. This saves about 20 bytes. */ | ||
1200 | xd3_init_config (& config, XD3_FLUSH | (SECONDARY_DJW ? XD3_SEC_DJW : 0)); | ||
1201 | |||
1202 | /* Be exhaustive. */ | ||
1203 | config.sprevsz = 1<<11; | ||
1204 | config.memsize = CODE_TABLE_STRING_SIZE * 10; | ||
1205 | |||
1206 | config.large_look = 4; | ||
1207 | config.large_step = 1; | ||
1208 | config.small_look = 4; | ||
1209 | config.small_chain = CODE_TABLE_STRING_SIZE; | ||
1210 | config.small_lchain = CODE_TABLE_STRING_SIZE; | ||
1211 | config.ssmatch = 1; | ||
1212 | config.try_lazy = 1; | ||
1213 | config.max_lazy = CODE_TABLE_STRING_SIZE; | ||
1214 | config.long_enough = CODE_TABLE_STRING_SIZE; | ||
1215 | config.promote = 1; | ||
1216 | config.srcwin_size = CODE_TABLE_STRING_SIZE; | ||
1217 | config.srcwin_maxsz = CODE_TABLE_STRING_SIZE; | ||
1218 | |||
1219 | if ((ret = xd3_config_stream (& stream, & config))) { goto fail; } | ||
1220 | |||
1221 | source.size = CODE_TABLE_STRING_SIZE; | ||
1222 | source.blksize = CODE_TABLE_STRING_SIZE; | ||
1223 | source.onblk = CODE_TABLE_STRING_SIZE; | ||
1224 | source.name = ""; | ||
1225 | source.curblk = dflt_string; | ||
1226 | source.curblkno = 0; | ||
1227 | |||
1228 | if ((ret = xd3_set_source (& stream, & source))) { goto fail; } | ||
1229 | |||
1230 | if ((ret = xd3_encode_completely (& stream, code_string, CODE_TABLE_STRING_SIZE, | ||
1231 | comp_string, comp_string_size, CODE_TABLE_VCDIFF_SIZE))) { goto fail; } | ||
1232 | |||
1233 | fail: | ||
1234 | |||
1235 | in_stream->msg = stream.msg; | ||
1236 | xd3_free_stream (& stream); | ||
1237 | return ret; | ||
1238 | } | ||
1239 | |||
1240 | /* Compute a delta between alternate and rfc3284 tables. As soon as another alternate | ||
1241 | * table is added, this code should become generic. For now there is only one alternate | ||
1242 | * table for testing. */ | ||
1243 | static int | ||
1244 | xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size) | ||
1245 | { | ||
1246 | int ret; | ||
1247 | |||
1248 | if (__alternate_code_table_compressed[0] == 0) | ||
1249 | { | ||
1250 | if ((ret = xd3_compute_code_table_encoding (stream, xd3_alternate_code_table (), | ||
1251 | __alternate_code_table_compressed, | ||
1252 | & __alternate_code_table_compressed_size))) | ||
1253 | { | ||
1254 | return ret; | ||
1255 | } | ||
1256 | |||
1257 | /* During development of a new code table, enable this variable to print the new | ||
1258 | * static contents and determine its size. At run time the table will be filled in | ||
1259 | * appropriately, but at least it should have the proper size beforehand. */ | ||
1260 | #if GENERIC_ENCODE_TABLES_COMPUTE_PRINT | ||
1261 | { | ||
1262 | int i; | ||
1263 | |||
1264 | P(RINT, "\nstatic const usize_t __alternate_code_table_compressed_size = %u;\n", | ||
1265 | __alternate_code_table_compressed_size); | ||
1266 | |||
1267 | P(RINT, "static const uint8_t __alternate_code_table_compressed[%u] =\n{", | ||
1268 | __alternate_code_table_compressed_size); | ||
1269 | |||
1270 | for (i = 0; i < __alternate_code_table_compressed_size; i += 1) | ||
1271 | { | ||
1272 | P(RINT, "0x%02x,", __alternate_code_table_compressed[i]); | ||
1273 | if ((i % 20) == 19) { P(RINT, "\n"); } | ||
1274 | } | ||
1275 | |||
1276 | P(RINT, "};\n"); | ||
1277 | } | ||
1278 | #endif | ||
1279 | } | ||
1280 | |||
1281 | (*data) = __alternate_code_table_compressed; | ||
1282 | (*size) = __alternate_code_table_compressed_size; | ||
1283 | |||
1284 | return 0; | ||
1285 | } | ||
1286 | #endif /* GENERIC_ENCODE_TABLES_COMPUTE != 0 */ | ||
1287 | #endif /* GENERIC_ENCODE_TABLES */ | ||
1288 | |||
1289 | #endif /* XD3_ENCODER */ | ||
1290 | |||
1291 | /* This function generates the 1536-byte string specified in sections 5.4 and 7 of | ||
1292 | * rfc3284, which is used to represent a code table within a VCDIFF file. */ | ||
1293 | void xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str) | ||
1294 | { | ||
1295 | int i, s; | ||
1296 | |||
1297 | XD3_ASSERT (CODE_TABLE_STRING_SIZE == 6 * 256); | ||
1298 | |||
1299 | for (s = 0; s < 6; s += 1) | ||
1300 | { | ||
1301 | for (i = 0; i < 256; i += 1) | ||
1302 | { | ||
1303 | switch (s) | ||
1304 | { | ||
1305 | case 0: *str++ = (code_table[i].type1 >= XD3_CPY ? XD3_CPY : code_table[i].type1); break; | ||
1306 | case 1: *str++ = (code_table[i].type2 >= XD3_CPY ? XD3_CPY : code_table[i].type2); break; | ||
1307 | case 2: *str++ = (code_table[i].size1); break; | ||
1308 | case 3: *str++ = (code_table[i].size2); break; | ||
1309 | case 4: *str++ = (code_table[i].type1 >= XD3_CPY ? code_table[i].type1 - XD3_CPY : 0); break; | ||
1310 | case 5: *str++ = (code_table[i].type2 >= XD3_CPY ? code_table[i].type2 - XD3_CPY : 0); break; | ||
1311 | } | ||
1312 | } | ||
1313 | } | ||
1314 | } | ||
1315 | |||
1316 | /* This function translates the code table string into the internal representation. The | ||
1317 | * stream's near and same-modes should already be set. */ | ||
1318 | static int | ||
1319 | xd3_apply_table_string (xd3_stream *stream, const uint8_t *code_string) | ||
1320 | { | ||
1321 | int i, s; | ||
1322 | int modes = TOTAL_MODES (stream); | ||
1323 | xd3_dinst *code_table; | ||
1324 | |||
1325 | if ((code_table = stream->code_table_alloc = xd3_alloc (stream, sizeof (xd3_dinst), 256)) == NULL) | ||
1326 | { | ||
1327 | return ENOMEM; | ||
1328 | } | ||
1329 | |||
1330 | for (s = 0; s < 6; s += 1) | ||
1331 | { | ||
1332 | for (i = 0; i < 256; i += 1) | ||
1333 | { | ||
1334 | switch (s) | ||
1335 | { | ||
1336 | case 0: | ||
1337 | if (*code_string > XD3_CPY) | ||
1338 | { | ||
1339 | stream->msg = "invalid code-table opcode"; | ||
1340 | return EINVAL; | ||
1341 | } | ||
1342 | code_table[i].type1 = *code_string++; | ||
1343 | break; | ||
1344 | case 1: | ||
1345 | if (*code_string > XD3_CPY) | ||
1346 | { | ||
1347 | stream->msg = "invalid code-table opcode"; | ||
1348 | return EINVAL; | ||
1349 | } | ||
1350 | code_table[i].type2 = *code_string++; | ||
1351 | break; | ||
1352 | case 2: | ||
1353 | if (*code_string != 0 && code_table[i].type1 == XD3_NOOP) | ||
1354 | { | ||
1355 | stream->msg = "invalid code-table size"; | ||
1356 | return EINVAL; | ||
1357 | } | ||
1358 | code_table[i].size1 = *code_string++; | ||
1359 | break; | ||
1360 | case 3: | ||
1361 | if (*code_string != 0 && code_table[i].type2 == XD3_NOOP) | ||
1362 | { | ||
1363 | stream->msg = "invalid code-table size"; | ||
1364 | return EINVAL; | ||
1365 | } | ||
1366 | code_table[i].size2 = *code_string++; | ||
1367 | break; | ||
1368 | case 4: | ||
1369 | if (*code_string >= modes) | ||
1370 | { | ||
1371 | stream->msg = "invalid code-table mode"; | ||
1372 | return EINVAL; | ||
1373 | } | ||
1374 | if (*code_string != 0 && code_table[i].type1 != XD3_CPY) | ||
1375 | { | ||
1376 | stream->msg = "invalid code-table mode"; | ||
1377 | return EINVAL; | ||
1378 | } | ||
1379 | code_table[i].type1 += *code_string++; | ||
1380 | break; | ||
1381 | case 5: | ||
1382 | if (*code_string >= modes) | ||
1383 | { | ||
1384 | stream->msg = "invalid code-table mode"; | ||
1385 | return EINVAL; | ||
1386 | } | ||
1387 | if (*code_string != 0 && code_table[i].type2 != XD3_CPY) | ||
1388 | { | ||
1389 | stream->msg = "invalid code-table mode"; | ||
1390 | return EINVAL; | ||
1391 | } | ||
1392 | code_table[i].type2 += *code_string++; | ||
1393 | break; | ||
1394 | } | ||
1395 | } | ||
1396 | } | ||
1397 | |||
1398 | stream->code_table = code_table; | ||
1399 | return 0; | ||
1400 | } | ||
1401 | |||
1402 | /* This function applies a code table delta and returns an actual code table. */ | ||
1403 | static int | ||
1404 | xd3_apply_table_encoding (xd3_stream *in_stream, const uint8_t *data, usize_t size) | ||
1405 | { | ||
1406 | uint8_t dflt_string[CODE_TABLE_STRING_SIZE]; | ||
1407 | uint8_t code_string[CODE_TABLE_STRING_SIZE]; | ||
1408 | usize_t code_size; | ||
1409 | xd3_stream stream; | ||
1410 | xd3_source source; | ||
1411 | int ret; | ||
1412 | |||
1413 | /* The default code table string can be cached if alternate code tables ever become | ||
1414 | * popular. */ | ||
1415 | xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string); | ||
1416 | |||
1417 | source.size = CODE_TABLE_STRING_SIZE; | ||
1418 | source.blksize = CODE_TABLE_STRING_SIZE; | ||
1419 | source.onblk = CODE_TABLE_STRING_SIZE; | ||
1420 | source.name = "rfc3284 code table"; | ||
1421 | source.curblk = dflt_string; | ||
1422 | source.curblkno = 0; | ||
1423 | |||
1424 | if ((ret = xd3_config_stream (& stream, NULL)) || | ||
1425 | (ret = xd3_set_source (& stream, & source)) || | ||
1426 | (ret = xd3_decode_completely (& stream, data, size, code_string, & code_size, sizeof (code_string)))) | ||
1427 | { | ||
1428 | in_stream->msg = stream.msg; | ||
1429 | goto fail; | ||
1430 | } | ||
1431 | |||
1432 | if (code_size != sizeof (code_string)) | ||
1433 | { | ||
1434 | stream.msg = "corrupt code-table encoding"; | ||
1435 | ret = EINVAL; | ||
1436 | goto fail; | ||
1437 | } | ||
1438 | |||
1439 | if ((ret = xd3_apply_table_string (in_stream, code_string))) { goto fail; } | ||
1440 | |||
1441 | fail: | ||
1442 | |||
1443 | xd3_free_stream (& stream); | ||
1444 | return ret; | ||
1445 | } | ||
1446 | |||
1447 | /****************************************************************************************** | ||
1448 | Permute stuff | ||
1449 | ******************************************************************************************/ | ||
1450 | |||
1451 | #if HASH_PERMUTE == 0 | ||
1452 | #define PERMUTE(x) (x) | ||
1453 | #else | ||
1454 | #define PERMUTE(x) (__single_hash[(uint)x]) | ||
1455 | |||
1456 | static const uint16_t __single_hash[256] = | ||
1457 | { | ||
1458 | /* Random numbers generated using SLIB's pseudo-random number generator. This hashes | ||
1459 | * the input alphabet. */ | ||
1460 | 0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46, | ||
1461 | 0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602, | ||
1462 | 0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6, | ||
1463 | 0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a, | ||
1464 | 0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58, | ||
1465 | 0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b, | ||
1466 | 0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94, | ||
1467 | 0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176, | ||
1468 | 0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743, | ||
1469 | 0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6, | ||
1470 | 0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227, | ||
1471 | 0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d, | ||
1472 | 0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a, | ||
1473 | 0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31, | ||
1474 | 0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9, | ||
1475 | 0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6, | ||
1476 | 0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f, | ||
1477 | 0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f, | ||
1478 | 0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49, | ||
1479 | 0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037, | ||
1480 | 0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad, | ||
1481 | 0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f, | ||
1482 | 0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d, | ||
1483 | 0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b, | ||
1484 | 0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624, | ||
1485 | 0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272, | ||
1486 | 0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806, | ||
1487 | 0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050, | ||
1488 | 0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113, | ||
1489 | 0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2, | ||
1490 | 0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25, | ||
1491 | 0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5 | ||
1492 | }; | ||
1493 | #endif | ||
1494 | |||
1495 | /****************************************************************************************** | ||
1496 | Ctable stuff | ||
1497 | ******************************************************************************************/ | ||
1498 | |||
1499 | #if HASH_PRIME | ||
1500 | static const usize_t __primes[] = | ||
1501 | { | ||
1502 | 11, 19, 37, 73, 109, | ||
1503 | 163, 251, 367, 557, 823, | ||
1504 | 1237, 1861, 2777, 4177, 6247, | ||
1505 | 9371, 14057, 21089, 31627, 47431, | ||
1506 | 71143, 106721, 160073, 240101, 360163, | ||
1507 | 540217, 810343, 1215497, 1823231, 2734867, | ||
1508 | 4102283, 6153409, 9230113, 13845163, 20767711, | ||
1509 | 31151543, 46727321, 70090921, 105136301, 157704401, | ||
1510 | 236556601, 354834919, 532252367, 798378509, 1197567719, | ||
1511 | 1796351503 | ||
1512 | }; | ||
1513 | |||
1514 | static const usize_t __nprimes = SIZEOF_ARRAY (__primes); | ||
1515 | #endif | ||
1516 | |||
1517 | static INLINE uint32_t | ||
1518 | xd3_checksum_hash (const xd3_hash_cfg *cfg, const uint32_t cksum) | ||
1519 | { | ||
1520 | #if HASH_PRIME | ||
1521 | /* If the table is prime compute the modulus. */ | ||
1522 | return (cksum % cfg->size); | ||
1523 | #else | ||
1524 | /* If the table is power-of-two compute the mask.*/ | ||
1525 | return (cksum ^ (cksum >> cfg->shift)) & cfg->mask; | ||
1526 | #endif | ||
1527 | } | ||
1528 | |||
1529 | /****************************************************************************************** | ||
1530 | Create the hash table. | ||
1531 | ******************************************************************************************/ | ||
1532 | |||
1533 | static INLINE void | ||
1534 | xd3_swap_uint8p (uint8_t** p1, uint8_t** p2) | ||
1535 | { | ||
1536 | uint8_t *t = (*p1); | ||
1537 | (*p1) = (*p2); | ||
1538 | (*p2) = t; | ||
1539 | } | ||
1540 | |||
1541 | static INLINE void | ||
1542 | xd3_swap_usize_t (usize_t* p1, usize_t* p2) | ||
1543 | { | ||
1544 | usize_t t = (*p1); | ||
1545 | (*p1) = (*p2); | ||
1546 | (*p2) = t; | ||
1547 | } | ||
1548 | |||
1549 | /* It's not constant time, but it computes the log. */ | ||
1550 | static int | ||
1551 | xd3_check_pow2 (usize_t value, usize_t *logof) | ||
1552 | { | ||
1553 | usize_t x = 1; | ||
1554 | usize_t nolog; | ||
1555 | if (logof == NULL) { | ||
1556 | logof = &nolog; | ||
1557 | } | ||
1558 | |||
1559 | *logof = 0; | ||
1560 | |||
1561 | for (; x != 0; x <<= 1, *logof += 1) | ||
1562 | { | ||
1563 | if (x == value) | ||
1564 | { | ||
1565 | return 0; | ||
1566 | } | ||
1567 | } | ||
1568 | |||
1569 | return EINVAL; | ||
1570 | } | ||
1571 | |||
1572 | static usize_t | ||
1573 | xd3_round_blksize (usize_t sz, usize_t blksz) | ||
1574 | { | ||
1575 | usize_t mod = sz & (blksz-1); | ||
1576 | |||
1577 | XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0); | ||
1578 | |||
1579 | return mod ? (sz + (blksz - mod)) : sz; | ||
1580 | } | ||
1581 | |||
1582 | #if XD3_ENCODER | ||
1583 | #if !HASH_PRIME | ||
1584 | static usize_t | ||
1585 | xd3_size_log2 (usize_t slots) | ||
1586 | { | ||
1587 | int bits = 28; /* This should not be an unreasonable limit. */ | ||
1588 | int i; | ||
1589 | |||
1590 | for (i = 3; i <= bits; i += 1) | ||
1591 | { | ||
1592 | if (slots < (1 << i)) | ||
1593 | { | ||
1594 | bits = i-1; | ||
1595 | break; | ||
1596 | } | ||
1597 | } | ||
1598 | |||
1599 | return bits; | ||
1600 | } | ||
1601 | #endif | ||
1602 | |||
1603 | static void | ||
1604 | xd3_size_hashtable (xd3_stream *stream, | ||
1605 | usize_t space, | ||
1606 | xd3_hash_cfg *cfg) | ||
1607 | { | ||
1608 | usize_t slots = space / sizeof (usize_t); | ||
1609 | |||
1610 | /* initialize ctable: the number of hash buckets is computed from the table of primes or | ||
1611 | * the nearest power-of-two, in both cases rounding down in favor of using less | ||
1612 | * memory. */ | ||
1613 | |||
1614 | #if HASH_PRIME | ||
1615 | usize_t i; | ||
1616 | |||
1617 | cfg->size = __primes[__nprimes-1]; | ||
1618 | |||
1619 | for (i = 1; i < __nprimes; i += 1) | ||
1620 | { | ||
1621 | if (slots < __primes[i]) | ||
1622 | { | ||
1623 | cfg->size = __primes[i-1]; | ||
1624 | break; | ||
1625 | } | ||
1626 | } | ||
1627 | #else | ||
1628 | int bits = xd3_size_log2 (slots); | ||
1629 | |||
1630 | cfg->size = (1 << bits); | ||
1631 | cfg->mask = (cfg->size - 1); | ||
1632 | cfg->shift = min (32 - bits, 16); | ||
1633 | #endif | ||
1634 | } | ||
1635 | #endif | ||
1636 | |||
1637 | /****************************************************************************************** | ||
1638 | Cksum function | ||
1639 | ******************************************************************************************/ | ||
1640 | |||
1641 | /* OPT: It turns out that the compiler can't unroll the loop as well as you can by hand. */ | ||
1642 | static INLINE uint32_t | ||
1643 | xd3_lcksum (const uint8_t *seg, const int ln) | ||
1644 | { | ||
1645 | int i = 0; | ||
1646 | uint32_t low = 0; | ||
1647 | uint32_t high = 0; | ||
1648 | |||
1649 | for (; i < ln; i += 1) | ||
1650 | { | ||
1651 | low += PERMUTE(*seg++); | ||
1652 | high += low; | ||
1653 | } | ||
1654 | |||
1655 | return ((high & 0xffff) << 16) | (low & 0xffff); | ||
1656 | } | ||
1657 | |||
1658 | #if ARITH_SMALL_CKSUM | ||
1659 | static INLINE usize_t | ||
1660 | xd3_scksum (const uint8_t *seg, const int ln) | ||
1661 | { | ||
1662 | usize_t c; | ||
1663 | /* The -1 is because UPDATE operates on seg[1..ln] */ | ||
1664 | SMALL_CKSUM_UPDATE (c,(seg-1),ln); | ||
1665 | return c; | ||
1666 | } | ||
1667 | #else | ||
1668 | #define xd3_scksum(seg,ln) xd3_lcksum(seg,ln) | ||
1669 | #endif | ||
1670 | |||
1671 | /****************************************************************************************** | ||
1672 | Adler32 stream function: code copied from Zlib, defined in RFC1950 | ||
1673 | ******************************************************************************************/ | ||
1674 | |||
1675 | #define A32_BASE 65521L /* Largest prime smaller than 2^16 */ | ||
1676 | #define A32_NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ | ||
1677 | |||
1678 | #define A32_DO1(buf,i) {s1 += buf[i]; s2 += s1;} | ||
1679 | #define A32_DO2(buf,i) A32_DO1(buf,i); A32_DO1(buf,i+1); | ||
1680 | #define A32_DO4(buf,i) A32_DO2(buf,i); A32_DO2(buf,i+2); | ||
1681 | #define A32_DO8(buf,i) A32_DO4(buf,i); A32_DO4(buf,i+4); | ||
1682 | #define A32_DO16(buf) A32_DO8(buf,0); A32_DO8(buf,8); | ||
1683 | |||
1684 | static unsigned long adler32 (unsigned long adler, const uint8_t *buf, usize_t len) | ||
1685 | { | ||
1686 | unsigned long s1 = adler & 0xffff; | ||
1687 | unsigned long s2 = (adler >> 16) & 0xffff; | ||
1688 | int k; | ||
1689 | |||
1690 | while (len > 0) | ||
1691 | { | ||
1692 | k = (len < A32_NMAX) ? len : A32_NMAX; | ||
1693 | len -= k; | ||
1694 | |||
1695 | while (k >= 16) | ||
1696 | { | ||
1697 | A32_DO16(buf); | ||
1698 | buf += 16; | ||
1699 | k -= 16; | ||
1700 | } | ||
1701 | |||
1702 | if (k != 0) | ||
1703 | { | ||
1704 | do | ||
1705 | { | ||
1706 | s1 += *buf++; | ||
1707 | s2 += s1; | ||
1708 | } | ||
1709 | while (--k); | ||
1710 | } | ||
1711 | |||
1712 | s1 %= A32_BASE; | ||
1713 | s2 %= A32_BASE; | ||
1714 | } | ||
1715 | |||
1716 | return (s2 << 16) | s1; | ||
1717 | } | ||
1718 | |||
1719 | /****************************************************************************************** | ||
1720 | Run-length function | ||
1721 | ******************************************************************************************/ | ||
1722 | |||
1723 | static INLINE int | ||
1724 | xd3_comprun (const uint8_t *seg, int slook, uint8_t *run_cp) | ||
1725 | { | ||
1726 | int i; | ||
1727 | int run_l = 0; | ||
1728 | uint8_t run_c = 0; | ||
1729 | |||
1730 | for (i = 0; i < slook; i += 1) | ||
1731 | { | ||
1732 | NEXTRUN(seg[i]); | ||
1733 | } | ||
1734 | |||
1735 | (*run_cp) = run_c; | ||
1736 | |||
1737 | return run_l; | ||
1738 | } | ||
1739 | |||
1740 | /****************************************************************************************** | ||
1741 | Basic encoder/decoder functions | ||
1742 | ******************************************************************************************/ | ||
1743 | |||
1744 | static int | ||
1745 | xd3_decode_byte (xd3_stream *stream, uint *val) | ||
1746 | { | ||
1747 | if (stream->avail_in == 0) | ||
1748 | { | ||
1749 | stream->msg = "further input required"; | ||
1750 | return XD3_INPUT; | ||
1751 | } | ||
1752 | |||
1753 | (*val) = stream->next_in[0]; | ||
1754 | |||
1755 | DECODE_INPUT (1); | ||
1756 | return 0; | ||
1757 | } | ||
1758 | |||
1759 | static int | ||
1760 | xd3_decode_bytes (xd3_stream *stream, uint8_t *buf, usize_t *pos, usize_t size) | ||
1761 | { | ||
1762 | usize_t want; | ||
1763 | usize_t take; | ||
1764 | |||
1765 | /* Note: The case where (*pos == size) happens when a zero-length appheader or code | ||
1766 | * table is transmitted, but there is nothing in the standard against that. */ | ||
1767 | |||
1768 | while (*pos < size) | ||
1769 | { | ||
1770 | if (stream->avail_in == 0) | ||
1771 | { | ||
1772 | stream->msg = "further input required"; | ||
1773 | return XD3_INPUT; | ||
1774 | } | ||
1775 | |||
1776 | want = size - *pos; | ||
1777 | take = min (want, stream->avail_in); | ||
1778 | |||
1779 | memcpy (buf + *pos, stream->next_in, take); | ||
1780 | |||
1781 | DECODE_INPUT (take); | ||
1782 | (*pos) += take; | ||
1783 | } | ||
1784 | |||
1785 | return 0; | ||
1786 | } | ||
1787 | |||
1788 | #if XD3_ENCODER | ||
1789 | static int | ||
1790 | xd3_emit_byte (xd3_stream *stream, | ||
1791 | xd3_output **outputp, | ||
1792 | uint8_t code) | ||
1793 | { | ||
1794 | xd3_output *output = (*outputp); | ||
1795 | |||
1796 | if (output->next == output->avail) | ||
1797 | { | ||
1798 | xd3_output *aoutput; | ||
1799 | |||
1800 | if ((aoutput = xd3_alloc_output (stream, output)) == NULL) | ||
1801 | { | ||
1802 | return ENOMEM; | ||
1803 | } | ||
1804 | |||
1805 | output = (*outputp) = aoutput; | ||
1806 | } | ||
1807 | |||
1808 | output->base[output->next++] = code; | ||
1809 | |||
1810 | return 0; | ||
1811 | } | ||
1812 | |||
1813 | static int | ||
1814 | xd3_emit_bytes (xd3_stream *stream, | ||
1815 | xd3_output **outputp, | ||
1816 | const uint8_t *base, | ||
1817 | usize_t size) | ||
1818 | { | ||
1819 | xd3_output *output = (*outputp); | ||
1820 | |||
1821 | do | ||
1822 | { | ||
1823 | usize_t take; | ||
1824 | |||
1825 | if (output->next == output->avail) | ||
1826 | { | ||
1827 | xd3_output *aoutput; | ||
1828 | |||
1829 | if ((aoutput = xd3_alloc_output (stream, output)) == NULL) | ||
1830 | { | ||
1831 | return ENOMEM; | ||
1832 | } | ||
1833 | |||
1834 | output = (*outputp) = aoutput; | ||
1835 | } | ||
1836 | |||
1837 | take = min (output->avail - output->next, size); | ||
1838 | |||
1839 | memcpy (output->base + output->next, base, take); | ||
1840 | |||
1841 | output->next += take; | ||
1842 | size -= take; | ||
1843 | base += take; | ||
1844 | } | ||
1845 | while (size > 0); | ||
1846 | |||
1847 | return 0; | ||
1848 | } | ||
1849 | #endif /* XD3_ENCODER */ | ||
1850 | |||
1851 | /****************************************************************************************** | ||
1852 | Integer encoder/decoder functions | ||
1853 | ******************************************************************************************/ | ||
1854 | |||
1855 | #define DECODE_INTEGER_TYPE(PART,OFLOW) \ | ||
1856 | while (stream->avail_in != 0) \ | ||
1857 | { \ | ||
1858 | uint next = stream->next_in[0]; \ | ||
1859 | \ | ||
1860 | DECODE_INPUT(1); \ | ||
1861 | \ | ||
1862 | if (PART & OFLOW) \ | ||
1863 | { \ | ||
1864 | stream->msg = "overflow in decode_integer"; \ | ||
1865 | return EINVAL; \ | ||
1866 | } \ | ||
1867 | \ | ||
1868 | PART = (PART << 7) | (next & 127); \ | ||
1869 | \ | ||
1870 | if ((next & 128) == 0) \ | ||
1871 | { \ | ||
1872 | (*val) = PART; \ | ||
1873 | PART = 0; \ | ||
1874 | return 0; \ | ||
1875 | } \ | ||
1876 | } \ | ||
1877 | \ | ||
1878 | stream->msg = "further input required"; \ | ||
1879 | return XD3_INPUT | ||
1880 | |||
1881 | #define READ_INTEGER_TYPE(TYPE, OFLOW) \ | ||
1882 | TYPE val = 0; \ | ||
1883 | const uint8_t *inp = (*inpp); \ | ||
1884 | uint next; \ | ||
1885 | \ | ||
1886 | do \ | ||
1887 | { \ | ||
1888 | if (inp == max) \ | ||
1889 | { \ | ||
1890 | stream->msg = "end-of-input in read_integer"; \ | ||
1891 | return EINVAL; \ | ||
1892 | } \ | ||
1893 | \ | ||
1894 | if (val & OFLOW) \ | ||
1895 | { \ | ||
1896 | stream->msg = "overflow in read_intger"; \ | ||
1897 | return EINVAL; \ | ||
1898 | } \ | ||
1899 | \ | ||
1900 | next = (*inp++); \ | ||
1901 | val = (val << 7) | (next & 127); \ | ||
1902 | } \ | ||
1903 | while (next & 128); \ | ||
1904 | \ | ||
1905 | (*valp) = val; \ | ||
1906 | (*inpp) = inp; \ | ||
1907 | \ | ||
1908 | return 0 | ||
1909 | |||
1910 | #define EMIT_INTEGER_TYPE() \ | ||
1911 | /* max 64-bit value in base-7 encoding is 9.1 bytes */ \ | ||
1912 | uint8_t buf[10]; \ | ||
1913 | usize_t bufi = 10; \ | ||
1914 | \ | ||
1915 | XD3_ASSERT (num >= 0); \ | ||
1916 | \ | ||
1917 | /* This loop performs division and turns on all MSBs. */ \ | ||
1918 | do \ | ||
1919 | { \ | ||
1920 | buf[--bufi] = (num & 127) | 128; \ | ||
1921 | num >>= 7; \ | ||
1922 | } \ | ||
1923 | while (num != 0); \ | ||
1924 | \ | ||
1925 | /* Turn off MSB of the last byte. */ \ | ||
1926 | buf[9] &= 127; \ | ||
1927 | \ | ||
1928 | XD3_ASSERT (bufi >= 0); \ | ||
1929 | \ | ||
1930 | return xd3_emit_bytes (stream, output, buf + bufi, 10 - bufi) | ||
1931 | |||
1932 | #define IF_SIZEOF32(x) if (num < (1U << (7 * (x)))) return (x); | ||
1933 | #define IF_SIZEOF64(x) if (num < (1ULL << (7 * (x)))) return (x); | ||
1934 | |||
1935 | #if USE_UINT32 | ||
1936 | static uint | ||
1937 | xd3_sizeof_uint32_t (uint32_t num) | ||
1938 | { | ||
1939 | IF_SIZEOF32(1); | ||
1940 | IF_SIZEOF32(2); | ||
1941 | IF_SIZEOF32(3); | ||
1942 | IF_SIZEOF32(4); | ||
1943 | |||
1944 | return 5; | ||
1945 | } | ||
1946 | |||
1947 | static int | ||
1948 | xd3_decode_uint32_t (xd3_stream *stream, uint32_t *val) | ||
1949 | { DECODE_INTEGER_TYPE (stream->dec_32part, UINT32_OFLOW_MASK); } | ||
1950 | static int | ||
1951 | xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, const uint8_t *max, uint32_t *valp) | ||
1952 | { READ_INTEGER_TYPE (uint32_t, UINT32_OFLOW_MASK); } | ||
1953 | #if XD3_ENCODER | ||
1954 | static int | ||
1955 | xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num) | ||
1956 | { EMIT_INTEGER_TYPE (); } | ||
1957 | #endif | ||
1958 | #endif | ||
1959 | |||
1960 | #if USE_UINT64 | ||
1961 | /* We only ever decode offsets, but the other three are part of the regression test | ||
1962 | * anyway. */ | ||
1963 | static int | ||
1964 | xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val) | ||
1965 | { DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); } | ||
1966 | #if REGRESSION_TEST | ||
1967 | #if XD3_ENCODER | ||
1968 | static int | ||
1969 | xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num) | ||
1970 | { EMIT_INTEGER_TYPE (); } | ||
1971 | #endif | ||
1972 | static int | ||
1973 | xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp, const uint8_t *max, uint64_t *valp) | ||
1974 | { READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); } | ||
1975 | |||
1976 | static uint | ||
1977 | xd3_sizeof_uint64_t (uint64_t num) | ||
1978 | { | ||
1979 | IF_SIZEOF64(1); | ||
1980 | IF_SIZEOF64(2); | ||
1981 | IF_SIZEOF64(3); | ||
1982 | IF_SIZEOF64(4); | ||
1983 | IF_SIZEOF64(5); | ||
1984 | IF_SIZEOF64(6); | ||
1985 | IF_SIZEOF64(7); | ||
1986 | IF_SIZEOF64(8); | ||
1987 | IF_SIZEOF64(9); | ||
1988 | |||
1989 | return 10; | ||
1990 | } | ||
1991 | #endif | ||
1992 | #endif | ||
1993 | |||
1994 | /****************************************************************************************** | ||
1995 | Debug instruction statistics | ||
1996 | ******************************************************************************************/ | ||
1997 | |||
1998 | #if XD3_DEBUG | ||
1999 | static void | ||
2000 | xd3_count_inst (xd3_stream *stream, uint code) | ||
2001 | { | ||
2002 | IF_DEBUG1 ({ | ||
2003 | if (stream->i_freqs == NULL && | ||
2004 | (stream->i_freqs = xd3_alloc0 (stream, sizeof (stream->i_freqs[0]), 256)) == NULL) { abort (); } | ||
2005 | |||
2006 | stream->i_freqs[code] += 1; | ||
2007 | }); | ||
2008 | stream->n_ibytes += 1; | ||
2009 | } | ||
2010 | |||
2011 | static void | ||
2012 | xd3_count_mode (xd3_stream *stream, uint mode) | ||
2013 | { | ||
2014 | IF_DEBUG1 ({ | ||
2015 | if (stream->i_modes == NULL && | ||
2016 | (stream->i_modes = xd3_alloc0 (stream, sizeof (stream->i_modes[0]), TOTAL_MODES (stream))) == NULL) { abort (); } | ||
2017 | stream->i_modes[mode] += 1; | ||
2018 | }); | ||
2019 | } | ||
2020 | |||
2021 | static void | ||
2022 | xd3_count_size (xd3_stream *stream, usize_t size) | ||
2023 | { | ||
2024 | IF_DEBUG1({ | ||
2025 | if (stream->i_sizes == NULL && | ||
2026 | (stream->i_sizes = xd3_alloc0 (stream, sizeof (stream->i_sizes[0]), 64)) == NULL) { abort (); } | ||
2027 | |||
2028 | if (size < 64) { stream->i_sizes[size] += 1; } | ||
2029 | }); | ||
2030 | stream->n_sbytes += xd3_sizeof_size (size); | ||
2031 | } | ||
2032 | #endif | ||
2033 | |||
2034 | /****************************************************************************************** | ||
2035 | Address cache stuff | ||
2036 | ******************************************************************************************/ | ||
2037 | |||
2038 | static int | ||
2039 | xd3_alloc_cache (xd3_stream *stream) | ||
2040 | { | ||
2041 | if (((stream->acache.s_near > 0) && | ||
2042 | (stream->acache.near_array = xd3_alloc (stream, stream->acache.s_near, sizeof (usize_t))) == NULL) || | ||
2043 | ((stream->acache.s_same > 0) && | ||
2044 | (stream->acache.same_array = xd3_alloc (stream, stream->acache.s_same * 256, sizeof (usize_t))) == NULL)) | ||
2045 | { | ||
2046 | return ENOMEM; | ||
2047 | } | ||
2048 | |||
2049 | return 0; | ||
2050 | } | ||
2051 | |||
2052 | static void | ||
2053 | xd3_init_cache (xd3_addr_cache* acache) | ||
2054 | { | ||
2055 | if (acache->s_near > 0) | ||
2056 | { | ||
2057 | memset (acache->near_array, 0, acache->s_near * sizeof (usize_t)); | ||
2058 | acache->next_slot = 0; | ||
2059 | } | ||
2060 | |||
2061 | if (acache->s_same > 0) | ||
2062 | { | ||
2063 | memset (acache->same_array, 0, acache->s_same * 256 * sizeof (usize_t)); | ||
2064 | } | ||
2065 | } | ||
2066 | |||
2067 | static void | ||
2068 | xd3_update_cache (xd3_addr_cache* acache, usize_t addr) | ||
2069 | { | ||
2070 | if (acache->s_near > 0) | ||
2071 | { | ||
2072 | acache->near_array[acache->next_slot] = addr; | ||
2073 | acache->next_slot = (acache->next_slot + 1) % acache->s_near; | ||
2074 | } | ||
2075 | |||
2076 | if (acache->s_same > 0) | ||
2077 | { | ||
2078 | acache->same_array[addr % (acache->s_same*256)] = addr; | ||
2079 | } | ||
2080 | } | ||
2081 | |||
2082 | #if XD3_ENCODER | ||
2083 | /* OPT: this gets called a lot, can it be optimized? */ | ||
2084 | static int | ||
2085 | xd3_encode_address (xd3_stream *stream, usize_t addr, usize_t here, uint8_t* mode) | ||
2086 | { | ||
2087 | usize_t d, bestd; | ||
2088 | int i, bestm, ret; | ||
2089 | xd3_addr_cache* acache = & stream->acache; | ||
2090 | |||
2091 | #define SMALLEST_INT(x) do { if (((x) & ~127) == 0) { goto good; } } while (0) | ||
2092 | |||
2093 | /* Attempt to find the address mode that yields the smallest integer value for "d", the | ||
2094 | * encoded address value, thereby minimizing the encoded size of the address. */ | ||
2095 | bestd = addr; | ||
2096 | bestm = VCD_SELF; | ||
2097 | |||
2098 | XD3_ASSERT (addr < here); | ||
2099 | |||
2100 | SMALLEST_INT (bestd); | ||
2101 | |||
2102 | if ((d = here-addr) < bestd) | ||
2103 | { | ||
2104 | bestd = d; | ||
2105 | bestm = VCD_HERE; | ||
2106 | |||
2107 | SMALLEST_INT (bestd); | ||
2108 | } | ||
2109 | |||
2110 | for (i = 0; i < acache->s_near; i += 1) | ||
2111 | { | ||
2112 | d = addr - acache->near_array[i]; | ||
2113 | |||
2114 | if (d >= 0 && d < bestd) | ||
2115 | { | ||
2116 | bestd = d; | ||
2117 | bestm = i+2; /* 2 counts the VCD_SELF, VCD_HERE modes */ | ||
2118 | |||
2119 | SMALLEST_INT (bestd); | ||
2120 | } | ||
2121 | } | ||
2122 | |||
2123 | if (acache->s_same > 0 && acache->same_array[d = addr%(acache->s_same*256)] == addr) | ||
2124 | { | ||
2125 | bestd = d%256; | ||
2126 | bestm = acache->s_near + 2 + d/256; /* 2 + s_near offsets past the VCD_NEAR modes */ | ||
2127 | |||
2128 | if ((ret = xd3_emit_byte (stream, & ADDR_TAIL (stream), bestd))) { return ret; } | ||
2129 | } | ||
2130 | else | ||
2131 | { | ||
2132 | good: | ||
2133 | |||
2134 | if ((ret = xd3_emit_size (stream, & ADDR_TAIL (stream), bestd))) { return ret; } | ||
2135 | } | ||
2136 | |||
2137 | xd3_update_cache (acache, addr); | ||
2138 | |||
2139 | IF_DEBUG (xd3_count_mode (stream, bestm)); | ||
2140 | |||
2141 | (*mode) += bestm; | ||
2142 | |||
2143 | return 0; | ||
2144 | } | ||
2145 | #endif | ||
2146 | |||
2147 | static int | ||
2148 | xd3_decode_address (xd3_stream *stream, usize_t here, uint mode, const uint8_t **inpp, const uint8_t *max, uint32_t *valp) | ||
2149 | { | ||
2150 | int ret; | ||
2151 | uint same_start = 2 + stream->acache.s_near; | ||
2152 | |||
2153 | if (mode < same_start) | ||
2154 | { | ||
2155 | if ((ret = xd3_read_size (stream, inpp, max, valp))) { return ret; } | ||
2156 | |||
2157 | switch (mode) | ||
2158 | { | ||
2159 | case VCD_SELF: | ||
2160 | break; | ||
2161 | case VCD_HERE: | ||
2162 | (*valp) = here - (*valp); | ||
2163 | break; | ||
2164 | default: | ||
2165 | (*valp) += stream->acache.near_array[mode - 2]; | ||
2166 | break; | ||
2167 | } | ||
2168 | } | ||
2169 | else | ||
2170 | { | ||
2171 | if (*inpp == max) | ||
2172 | { | ||
2173 | stream->msg = "address underflow"; | ||
2174 | return EINVAL; | ||
2175 | } | ||
2176 | |||
2177 | mode -= same_start; | ||
2178 | |||
2179 | (*valp) = stream->acache.same_array[mode*256 + (**inpp)]; | ||
2180 | |||
2181 | (*inpp) += 1; | ||
2182 | } | ||
2183 | |||
2184 | xd3_update_cache (& stream->acache, *valp); | ||
2185 | |||
2186 | return 0; | ||
2187 | } | ||
2188 | |||
2189 | /****************************************************************************************** | ||
2190 | Alloc/free | ||
2191 | ******************************************************************************************/ | ||
2192 | |||
2193 | static void* | ||
2194 | __xd3_alloc_func (void* opaque, usize_t items, usize_t size) | ||
2195 | { | ||
2196 | return malloc (items * size); | ||
2197 | } | ||
2198 | |||
2199 | static void | ||
2200 | __xd3_free_func (void* opaque, void* address) | ||
2201 | { | ||
2202 | free (address); | ||
2203 | } | ||
2204 | |||
2205 | static void* | ||
2206 | xd3_alloc (xd3_stream *stream, | ||
2207 | usize_t elts, | ||
2208 | usize_t size) | ||
2209 | { | ||
2210 | void *a = stream->alloc (stream->opaque, elts, size); | ||
2211 | |||
2212 | if (a != NULL) | ||
2213 | { | ||
2214 | IF_DEBUG (stream->alloc_cnt += 1); | ||
2215 | } | ||
2216 | else | ||
2217 | { | ||
2218 | stream->msg = "out of memory"; | ||
2219 | } | ||
2220 | |||
2221 | return a; | ||
2222 | } | ||
2223 | |||
2224 | static void | ||
2225 | xd3_free (xd3_stream *stream, | ||
2226 | void *ptr) | ||
2227 | { | ||
2228 | if (ptr != NULL) | ||
2229 | { | ||
2230 | IF_DEBUG (stream->free_cnt += 1); | ||
2231 | XD3_ASSERT (stream->free_cnt <= stream->alloc_cnt); | ||
2232 | stream->free (stream->opaque, ptr); | ||
2233 | } | ||
2234 | } | ||
2235 | |||
2236 | #if XD3_ENCODER | ||
2237 | static void* | ||
2238 | xd3_alloc0 (xd3_stream *stream, | ||
2239 | usize_t elts, | ||
2240 | usize_t size) | ||
2241 | { | ||
2242 | void *a = xd3_alloc (stream, elts, size); | ||
2243 | |||
2244 | if (a != NULL) | ||
2245 | { | ||
2246 | memset (a, 0, elts * size); | ||
2247 | } | ||
2248 | |||
2249 | return a; | ||
2250 | } | ||
2251 | |||
2252 | static xd3_output* | ||
2253 | xd3_alloc_output (xd3_stream *stream, | ||
2254 | xd3_output *old_output) | ||
2255 | { | ||
2256 | xd3_output *output; | ||
2257 | uint8_t *base; | ||
2258 | |||
2259 | if (stream->enc_free != NULL) | ||
2260 | { | ||
2261 | output = stream->enc_free; | ||
2262 | stream->enc_free = output->next_page; | ||
2263 | } | ||
2264 | else | ||
2265 | { | ||
2266 | if ((output = xd3_alloc (stream, 1, sizeof (xd3_output))) == NULL) | ||
2267 | { | ||
2268 | return NULL; | ||
2269 | } | ||
2270 | |||
2271 | if ((base = xd3_alloc (stream, XD3_ALLOCSIZE, sizeof (uint8_t))) == NULL) | ||
2272 | { | ||
2273 | xd3_free (stream, output); | ||
2274 | return NULL; | ||
2275 | } | ||
2276 | |||
2277 | output->base = base; | ||
2278 | output->avail = XD3_ALLOCSIZE; | ||
2279 | } | ||
2280 | |||
2281 | output->next = 0; | ||
2282 | |||
2283 | if (old_output) | ||
2284 | { | ||
2285 | old_output->next_page = output; | ||
2286 | } | ||
2287 | |||
2288 | output->next_page = NULL; | ||
2289 | |||
2290 | return output; | ||
2291 | } | ||
2292 | |||
2293 | static usize_t | ||
2294 | xd3_sizeof_output (xd3_output *output) | ||
2295 | { | ||
2296 | usize_t s = 0; | ||
2297 | |||
2298 | for (; output; output = output->next_page) | ||
2299 | { | ||
2300 | s += output->next; | ||
2301 | } | ||
2302 | |||
2303 | return s; | ||
2304 | } | ||
2305 | |||
2306 | static void | ||
2307 | xd3_freelist_output (xd3_stream *stream, | ||
2308 | xd3_output *output) | ||
2309 | { | ||
2310 | xd3_output *tmp; | ||
2311 | |||
2312 | while (output) | ||
2313 | { | ||
2314 | tmp = output; | ||
2315 | output = output->next_page; | ||
2316 | |||
2317 | tmp->next = 0; | ||
2318 | tmp->next_page = stream->enc_free; | ||
2319 | stream->enc_free = tmp; | ||
2320 | } | ||
2321 | } | ||
2322 | |||
2323 | static void | ||
2324 | xd3_free_output (xd3_stream *stream, | ||
2325 | xd3_output *output) | ||
2326 | { | ||
2327 | xd3_output *next; | ||
2328 | |||
2329 | again: | ||
2330 | if (output == NULL) | ||
2331 | { | ||
2332 | return; | ||
2333 | } | ||
2334 | |||
2335 | next = output->next_page; | ||
2336 | |||
2337 | xd3_free (stream, output->base); | ||
2338 | xd3_free (stream, output); | ||
2339 | |||
2340 | output = next; | ||
2341 | goto again; | ||
2342 | } | ||
2343 | #endif /* XD3_ENCODER */ | ||
2344 | |||
2345 | void | ||
2346 | xd3_free_stream (xd3_stream *stream) | ||
2347 | { | ||
2348 | |||
2349 | xd3_free (stream, stream->large_table); | ||
2350 | xd3_free (stream, stream->small_table); | ||
2351 | xd3_free (stream, stream->small_prev); | ||
2352 | xd3_free (stream, stream->iopt.buffer); | ||
2353 | |||
2354 | #if XD3_ENCODER | ||
2355 | { | ||
2356 | int i; | ||
2357 | for (i = 0; i < ENC_SECTS; i += 1) | ||
2358 | { | ||
2359 | xd3_free_output (stream, stream->enc_heads[i]); | ||
2360 | } | ||
2361 | xd3_free_output (stream, stream->enc_free); | ||
2362 | } | ||
2363 | #endif | ||
2364 | |||
2365 | xd3_free (stream, stream->acache.near_array); | ||
2366 | xd3_free (stream, stream->acache.same_array); | ||
2367 | |||
2368 | xd3_free (stream, stream->inst_sect.copied1); | ||
2369 | xd3_free (stream, stream->addr_sect.copied1); | ||
2370 | xd3_free (stream, stream->data_sect.copied1); | ||
2371 | |||
2372 | xd3_free (stream, stream->dec_buffer); | ||
2373 | xd3_free (stream, (uint8_t*) stream->dec_lastwin); | ||
2374 | |||
2375 | xd3_free (stream, stream->buf_in); | ||
2376 | xd3_free (stream, stream->dec_appheader); | ||
2377 | xd3_free (stream, stream->dec_codetbl); | ||
2378 | xd3_free (stream, stream->code_table_alloc); | ||
2379 | |||
2380 | #if SECONDARY_ANY | ||
2381 | xd3_free (stream, stream->inst_sect.copied2); | ||
2382 | xd3_free (stream, stream->addr_sect.copied2); | ||
2383 | xd3_free (stream, stream->data_sect.copied2); | ||
2384 | |||
2385 | if (stream->sec_type != NULL) | ||
2386 | { | ||
2387 | stream->sec_type->destroy (stream, stream->sec_stream_d); | ||
2388 | stream->sec_type->destroy (stream, stream->sec_stream_i); | ||
2389 | stream->sec_type->destroy (stream, stream->sec_stream_a); | ||
2390 | } | ||
2391 | #endif | ||
2392 | |||
2393 | IF_DEBUG (xd3_free (stream, stream->i_freqs)); | ||
2394 | IF_DEBUG (xd3_free (stream, stream->i_modes)); | ||
2395 | IF_DEBUG (xd3_free (stream, stream->i_sizes)); | ||
2396 | |||
2397 | XD3_ASSERT (stream->alloc_cnt == stream->free_cnt); | ||
2398 | |||
2399 | memset (stream, 0, sizeof (xd3_stream)); | ||
2400 | } | ||
2401 | |||
2402 | #if (XD3_DEBUG || VCDIFF_TOOLS) | ||
2403 | static const char* | ||
2404 | xd3_rtype_to_string (xd3_rtype type, int print_mode) | ||
2405 | { | ||
2406 | switch (type) | ||
2407 | { | ||
2408 | case XD3_NOOP: | ||
2409 | return "NOOP "; | ||
2410 | case XD3_RUN: | ||
2411 | return "RUN "; | ||
2412 | case XD3_ADD: | ||
2413 | return "ADD "; | ||
2414 | default: break; | ||
2415 | } | ||
2416 | if (! print_mode) | ||
2417 | { | ||
2418 | return "CPY "; | ||
2419 | } | ||
2420 | switch (type) | ||
2421 | { | ||
2422 | case XD3_CPY + 0: return "CPY_0"; | ||
2423 | case XD3_CPY + 1: return "CPY_1"; | ||
2424 | case XD3_CPY + 2: return "CPY_2"; | ||
2425 | case XD3_CPY + 3: return "CPY_3"; | ||
2426 | case XD3_CPY + 4: return "CPY_4"; | ||
2427 | case XD3_CPY + 5: return "CPY_5"; | ||
2428 | case XD3_CPY + 6: return "CPY_6"; | ||
2429 | case XD3_CPY + 7: return "CPY_7"; | ||
2430 | case XD3_CPY + 8: return "CPY_8"; | ||
2431 | case XD3_CPY + 9: return "CPY_9"; | ||
2432 | default: return "CPY>9"; | ||
2433 | } | ||
2434 | } | ||
2435 | #endif | ||
2436 | |||
2437 | /****************************************************************************************** | ||
2438 | Stream configuration | ||
2439 | ******************************************************************************************/ | ||
2440 | |||
2441 | int | ||
2442 | xd3_config_stream(xd3_stream *stream, | ||
2443 | xd3_config *config) | ||
2444 | { | ||
2445 | int ret; | ||
2446 | xd3_config defcfg; | ||
2447 | const xd3_smatcher* smatcher; | ||
2448 | |||
2449 | if (config == NULL) | ||
2450 | { | ||
2451 | config = & defcfg; | ||
2452 | memset (config, 0, sizeof (*config)); | ||
2453 | } | ||
2454 | |||
2455 | /* Initial setup: no error checks yet */ | ||
2456 | memset (stream, 0, sizeof (*stream)); | ||
2457 | |||
2458 | stream->memsize = config->memsize ? config->memsize : XD3_DEFAULT_MEMSIZE; | ||
2459 | stream->winsize = config->winsize ? config->winsize : XD3_DEFAULT_WINSIZE; | ||
2460 | stream->sprevsz = config->sprevsz ? config->sprevsz : XD3_DEFAULT_SPREVSZ; | ||
2461 | stream->srcwin_size = config->srcwin_size ? config->srcwin_size : XD3_DEFAULT_START_CKSUM_ADVANCE; | ||
2462 | stream->srcwin_maxsz = config->srcwin_maxsz ? config->srcwin_maxsz : XD3_DEFAULT_MAX_CKSUM_ADVANCE; | ||
2463 | stream->iopt_size = config->iopt_size ? config->iopt_size : XD3_DEFAULT_IOPT_SIZE; | ||
2464 | stream->getblk = config->getblk; | ||
2465 | stream->alloc = config->alloc ? config->alloc : __xd3_alloc_func; | ||
2466 | stream->free = config->freef ? config->freef : __xd3_free_func; | ||
2467 | stream->opaque = config->opaque; | ||
2468 | stream->flags = config->flags; | ||
2469 | |||
2470 | XD3_ASSERT (stream->winsize > 0); | ||
2471 | |||
2472 | /* Secondary setup. */ | ||
2473 | stream->sec_data = config->sec_data; | ||
2474 | stream->sec_inst = config->sec_inst; | ||
2475 | stream->sec_addr = config->sec_addr; | ||
2476 | |||
2477 | stream->sec_data.data_type = DATA_SECTION; | ||
2478 | stream->sec_inst.data_type = INST_SECTION; | ||
2479 | stream->sec_addr.data_type = ADDR_SECTION; | ||
2480 | |||
2481 | /* Check static sizes. */ | ||
2482 | if (sizeof (usize_t) != SIZEOF_USIZE_T || | ||
2483 | sizeof (xoff_t) != SIZEOF_XOFF_T || | ||
2484 | (ret = xd3_check_pow2(XD3_ALLOCSIZE, NULL))) | ||
2485 | { | ||
2486 | stream->msg = "incorrect compilation: wrong integer sizes"; | ||
2487 | return EINVAL; | ||
2488 | } | ||
2489 | |||
2490 | /* Check/set secondary compressor. */ | ||
2491 | switch (stream->flags & XD3_SEC_TYPE) | ||
2492 | { | ||
2493 | case 0: | ||
2494 | if (stream->flags & XD3_SEC_OTHER) | ||
2495 | { | ||
2496 | stream->msg = "XD3_SEC flags require a secondary compressor type"; | ||
2497 | return EINVAL; | ||
2498 | } | ||
2499 | break; | ||
2500 | case XD3_SEC_FGK: | ||
2501 | FGK_CASE (stream); | ||
2502 | case XD3_SEC_DJW: | ||
2503 | DJW_CASE (stream); | ||
2504 | default: | ||
2505 | stream->msg = "too many secondary compressor types set"; | ||
2506 | return EINVAL; | ||
2507 | } | ||
2508 | |||
2509 | /* Check/set encoder code table. */ | ||
2510 | switch (stream->flags & XD3_ALT_CODE_TABLE) { | ||
2511 | case 0: | ||
2512 | stream->code_table_desc = & __rfc3284_code_table_desc; | ||
2513 | stream->code_table_func = xd3_rfc3284_code_table; | ||
2514 | break; | ||
2515 | #if GENERIC_ENCODE_TABLES | ||
2516 | case XD3_ALT_CODE_TABLE: | ||
2517 | stream->code_table_desc = & __alternate_code_table_desc; | ||
2518 | stream->code_table_func = xd3_alternate_code_table; | ||
2519 | stream->comp_table_func = xd3_compute_alternate_table_encoding; | ||
2520 | break; | ||
2521 | #endif | ||
2522 | default: | ||
2523 | stream->msg = "alternate code table support was not compiled"; | ||
2524 | return EINVAL; | ||
2525 | } | ||
2526 | |||
2527 | /* Check sprevsz */ | ||
2528 | if (config->small_chain == 1) | ||
2529 | { | ||
2530 | stream->sprevsz = 0; | ||
2531 | } | ||
2532 | else | ||
2533 | { | ||
2534 | if ((ret = xd3_check_pow2 (stream->sprevsz, NULL))) | ||
2535 | { | ||
2536 | stream->msg = "sprevsz is required to be a power of two"; | ||
2537 | return EINVAL; | ||
2538 | } | ||
2539 | |||
2540 | stream->sprevmask = stream->sprevsz - 1; | ||
2541 | } | ||
2542 | |||
2543 | /* Default scanner settings. */ | ||
2544 | switch (config->smatch_cfg) | ||
2545 | { | ||
2546 | IF_BUILD_SOFT(case XD3_SMATCH_SOFT: | ||
2547 | smatcher = & __smatcher_soft; break; | ||
2548 | |||
2549 | if (config->large_look < MIN_MATCH || | ||
2550 | config->large_step < 1 || | ||
2551 | config->small_look < MIN_MATCH || | ||
2552 | config->small_chain < 1 || | ||
2553 | config->large_look < config->small_look || | ||
2554 | config->small_chain < config->small_lchain || | ||
2555 | (config->small_lchain == 0 && config->try_lazy) || | ||
2556 | config->srcwin_size < stream->large_look || | ||
2557 | config->srcwin_maxsz < stream->srcwin_size) | ||
2558 | { | ||
2559 | stream->msg = "invalid soft string-match config"; | ||
2560 | return EINVAL; | ||
2561 | } | ||
2562 | break;) | ||
2563 | |||
2564 | IF_BUILD_SLOW(case XD3_SMATCH_DEFAULT:) | ||
2565 | IF_BUILD_SLOW(case XD3_SMATCH_SLOW: smatcher = & __smatcher_slow; break;) | ||
2566 | IF_BUILD_FAST(case XD3_SMATCH_FAST: smatcher = & __smatcher_fast; break;) | ||
2567 | default: | ||
2568 | stream->msg = "invalid string match config type"; | ||
2569 | return EINVAL; | ||
2570 | } | ||
2571 | |||
2572 | stream->string_match = smatcher->string_match; | ||
2573 | XD3_ASSERT(stream->string_match); | ||
2574 | |||
2575 | XD3_COPY_CONFIG_FIELDS (stream, smatcher); | ||
2576 | |||
2577 | /* If it is a soft config, the smatcher fields didn't set anything, copy from config | ||
2578 | * instead. */ | ||
2579 | if (stream->large_look == 0) | ||
2580 | { | ||
2581 | XD3_COPY_CONFIG_FIELDS (stream, config); | ||
2582 | } | ||
2583 | |||
2584 | IF_DEBUG1 (P(RINT "[stream cfg] llook %u lstep %u slook %u\n", | ||
2585 | stream->large_look, stream->large_step, stream->small_look)); | ||
2586 | return 0; | ||
2587 | } | ||
2588 | |||
2589 | /****************************************************************************************** | ||
2590 | Getblk interface | ||
2591 | ******************************************************************************************/ | ||
2592 | |||
2593 | /* This function interfaces with the client getblk function, checks its results, etc. */ | ||
2594 | static int | ||
2595 | xd3_getblk (xd3_stream *stream/*, xd3_source *source*/, xoff_t blkno) | ||
2596 | { | ||
2597 | int ret; | ||
2598 | xd3_source *source = stream->src; | ||
2599 | |||
2600 | if (blkno >= source->blocks) | ||
2601 | { | ||
2602 | stream->msg = "source file too short"; | ||
2603 | return EINVAL; | ||
2604 | } | ||
2605 | |||
2606 | if (blkno != source->curblkno || source->curblk == NULL) | ||
2607 | { | ||
2608 | XD3_ASSERT (source->curblk != NULL || blkno != source->curblkno); | ||
2609 | |||
2610 | source->getblkno = blkno; | ||
2611 | |||
2612 | if (stream->getblk == NULL) | ||
2613 | { | ||
2614 | stream->msg = "getblk source input"; | ||
2615 | return XD3_GETSRCBLK; | ||
2616 | } | ||
2617 | else if ((ret = stream->getblk (stream, source, blkno)) != 0) | ||
2618 | { | ||
2619 | stream->msg = "getblk failed"; | ||
2620 | return ret; | ||
2621 | } | ||
2622 | |||
2623 | XD3_ASSERT (source->curblk != NULL); | ||
2624 | } | ||
2625 | |||
2626 | if (source->onblk != xd3_bytes_on_srcblk (source, blkno)) | ||
2627 | { | ||
2628 | stream->msg = "getblk returned short block"; | ||
2629 | return EINVAL; | ||
2630 | } | ||
2631 | |||
2632 | return 0; | ||
2633 | } | ||
2634 | |||
2635 | /****************************************************************************************** | ||
2636 | Stream open/close | ||
2637 | ******************************************************************************************/ | ||
2638 | |||
2639 | int | ||
2640 | xd3_set_source (xd3_stream *stream, | ||
2641 | xd3_source *src) | ||
2642 | { | ||
2643 | xoff_t blk_num; | ||
2644 | xoff_t tail_size; | ||
2645 | |||
2646 | IF_DEBUG1 (P(RINT "[set source] size %"Q"u\n", src->size)); | ||
2647 | |||
2648 | if (src == NULL || src->size < stream->large_look) { return 0; } | ||
2649 | |||
2650 | stream->src = src; | ||
2651 | blk_num = src->size / src->blksize; | ||
2652 | tail_size = src->size % src->blksize; | ||
2653 | src->blocks = blk_num + (tail_size > 0); | ||
2654 | src->srclen = 0; | ||
2655 | src->srcbase = 0; | ||
2656 | |||
2657 | return 0; | ||
2658 | } | ||
2659 | |||
2660 | void | ||
2661 | xd3_abort_stream (xd3_stream *stream) | ||
2662 | { | ||
2663 | stream->dec_state = DEC_ABORTED; | ||
2664 | stream->enc_state = ENC_ABORTED; | ||
2665 | } | ||
2666 | |||
2667 | int | ||
2668 | xd3_close_stream (xd3_stream *stream) | ||
2669 | { | ||
2670 | if (stream->enc_state != 0 && stream->enc_state != ENC_ABORTED) | ||
2671 | { | ||
2672 | /* If encoding, should be ready for more input but not actually have any. */ | ||
2673 | if (stream->enc_state != ENC_INPUT || stream->avail_in != 0) | ||
2674 | { | ||
2675 | stream->msg = "encoding is incomplete"; | ||
2676 | return EINVAL; | ||
2677 | } | ||
2678 | } | ||
2679 | else | ||
2680 | { | ||
2681 | switch (stream->dec_state) | ||
2682 | { | ||
2683 | case DEC_VCHEAD: | ||
2684 | case DEC_WININD: | ||
2685 | /* TODO: Address the zero-byte ambiguity. Does the encoder emit a window or | ||
2686 | * not? If so, then catch an error here. If not, need another routine to say | ||
2687 | * decode_at_least_one_if_empty. */ | ||
2688 | case DEC_ABORTED: | ||
2689 | break; | ||
2690 | default: | ||
2691 | /* If decoding, should be ready for the next window. */ | ||
2692 | stream->msg = "EOF in decode"; | ||
2693 | return EINVAL; | ||
2694 | } | ||
2695 | } | ||
2696 | |||
2697 | return 0; | ||
2698 | } | ||
2699 | |||
2700 | /****************************************************************************************** | ||
2701 | Application header | ||
2702 | ******************************************************************************************/ | ||
2703 | |||
2704 | int | ||
2705 | xd3_get_appheader (xd3_stream *stream, | ||
2706 | uint8_t **data, | ||
2707 | usize_t *size) | ||
2708 | { | ||
2709 | if (stream->dec_state < DEC_WININD) | ||
2710 | { | ||
2711 | stream->msg = "application header not available"; | ||
2712 | return EINVAL; | ||
2713 | } | ||
2714 | |||
2715 | (*data) = stream->dec_appheader; | ||
2716 | (*size) = stream->dec_appheadsz; | ||
2717 | return 0; | ||
2718 | } | ||
2719 | |||
2720 | #if XD3_ENCODER | ||
2721 | void | ||
2722 | xd3_set_appheader (xd3_stream *stream, | ||
2723 | const uint8_t *data, | ||
2724 | usize_t size) | ||
2725 | { | ||
2726 | stream->enc_appheader = data; | ||
2727 | stream->enc_appheadsz = size; | ||
2728 | } | ||
2729 | |||
2730 | /****************************************************************************************** | ||
2731 | Encoder stuff | ||
2732 | ******************************************************************************************/ | ||
2733 | |||
2734 | #if XD3_DEBUG | ||
2735 | static int | ||
2736 | xd3_iopt_check (xd3_stream *stream) | ||
2737 | { | ||
2738 | int ul = xd3_rlist_length (& stream->iopt.used); | ||
2739 | int fl = xd3_rlist_length (& stream->iopt.free); | ||
2740 | |||
2741 | return (ul + fl + (stream->iout ? 1 : 0)) == stream->iopt_size; | ||
2742 | } | ||
2743 | #endif | ||
2744 | |||
2745 | static xd3_rinst* | ||
2746 | xd3_iopt_free (xd3_stream *stream, xd3_rinst *i) | ||
2747 | { | ||
2748 | xd3_rinst *n = xd3_rlist_remove (i); | ||
2749 | xd3_rlist_push_back (& stream->iopt.free, i); | ||
2750 | return n; | ||
2751 | } | ||
2752 | |||
2753 | static void | ||
2754 | xd3_iopt_free_nonadd (xd3_stream *stream, xd3_rinst *i) | ||
2755 | { | ||
2756 | if (i->type != XD3_ADD) | ||
2757 | { | ||
2758 | xd3_rlist_push_back (& stream->iopt.free, i); | ||
2759 | } | ||
2760 | } | ||
2761 | |||
2762 | /* When an instruction is ready to flush from the iopt buffer, this function is called to | ||
2763 | * produce an encoding. It writes the instruction plus size, address, and data to the | ||
2764 | * various encoding sections. */ | ||
2765 | static int | ||
2766 | xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst) | ||
2767 | { | ||
2768 | int ret; | ||
2769 | |||
2770 | /* Check for input overflow. */ | ||
2771 | XD3_ASSERT (inst->pos + inst->size <= stream->avail_in); | ||
2772 | |||
2773 | switch (inst->type) | ||
2774 | { | ||
2775 | case XD3_CPY: | ||
2776 | { | ||
2777 | /* the address may have an offset if there is a source window. */ | ||
2778 | usize_t addr; | ||
2779 | xd3_source *src = stream->src; | ||
2780 | |||
2781 | if (src != NULL) | ||
2782 | { | ||
2783 | /* If there is a source copy, the source must have its source window decided | ||
2784 | * before we can encode. This can be bad -- we have to make this decision | ||
2785 | * even if no source matches have been found. */ | ||
2786 | if (stream->srcwin_decided == 0) | ||
2787 | { | ||
2788 | if ((ret = xd3_srcwin_setup (stream))) { return ret; } | ||
2789 | } | ||
2790 | |||
2791 | /* xtra field indicates the copy is from the source */ | ||
2792 | if (inst->xtra) | ||
2793 | { | ||
2794 | XD3_ASSERT (inst->addr >= src->srcbase); | ||
2795 | XD3_ASSERT (inst->addr + inst->size <= src->srcbase + src->srclen); | ||
2796 | addr = (inst->addr - src->srcbase); | ||
2797 | } | ||
2798 | else | ||
2799 | { | ||
2800 | /* with source window: target copy address is offset by taroff. */ | ||
2801 | addr = stream->taroff + (usize_t) inst->addr; | ||
2802 | } | ||
2803 | } | ||
2804 | else | ||
2805 | { | ||
2806 | addr = (usize_t) inst->addr; | ||
2807 | } | ||
2808 | |||
2809 | XD3_ASSERT (inst->size >= MIN_MATCH); | ||
2810 | |||
2811 | /* the "here" position is always offset by taroff */ | ||
2812 | if ((ret = xd3_encode_address (stream, addr, inst->pos + stream->taroff, & inst->type))) | ||
2813 | { | ||
2814 | return ret; | ||
2815 | } | ||
2816 | |||
2817 | IF_DEBUG (stream->n_cpy += 1); | ||
2818 | IF_DEBUG (stream->l_cpy += inst->size); | ||
2819 | |||
2820 | IF_DEBUG1 ({ | ||
2821 | static int cnt; | ||
2822 | P(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %u\n", | ||
2823 | cnt++, | ||
2824 | stream->total_in + inst->pos, | ||
2825 | stream->total_in + inst->pos + inst->size, | ||
2826 | inst->addr, inst->addr + inst->size, inst->size); | ||
2827 | }); | ||
2828 | break; | ||
2829 | } | ||
2830 | case XD3_RUN: | ||
2831 | { | ||
2832 | XD3_ASSERT (inst->size >= MIN_MATCH); | ||
2833 | |||
2834 | if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { return ret; } | ||
2835 | |||
2836 | IF_DEBUG (stream->n_run += 1); | ||
2837 | IF_DEBUG (stream->l_run += inst->size); | ||
2838 | IF_DEBUG (stream->n_dbytes += 1); | ||
2839 | |||
2840 | IF_DEBUG1 ({ | ||
2841 | static int cnt; | ||
2842 | P(RINT "[iopt run:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size); | ||
2843 | }); | ||
2844 | break; | ||
2845 | } | ||
2846 | case XD3_ADD: | ||
2847 | { | ||
2848 | if ((ret = xd3_emit_bytes (stream, & DATA_TAIL (stream), | ||
2849 | stream->next_in + inst->pos, inst->size))) { return ret; } | ||
2850 | |||
2851 | IF_DEBUG (stream->n_add += 1); | ||
2852 | IF_DEBUG (stream->l_add += inst->size); | ||
2853 | IF_DEBUG (stream->n_dbytes += inst->size); | ||
2854 | |||
2855 | IF_DEBUG1 ({ | ||
2856 | static int cnt; | ||
2857 | P(RINT "[iopt add:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size); | ||
2858 | }); | ||
2859 | |||
2860 | break; | ||
2861 | } | ||
2862 | } | ||
2863 | |||
2864 | /* This is the only place stream->unencoded_offset is incremented. */ | ||
2865 | XD3_ASSERT (stream->unencoded_offset == inst->pos); | ||
2866 | stream->unencoded_offset += inst->size; | ||
2867 | |||
2868 | IF_DEBUG (stream->n_emit += inst->size); | ||
2869 | |||
2870 | inst->code2 = 0; | ||
2871 | |||
2872 | XD3_CHOOSE_INSTRUCTION (stream, stream->iout, inst); | ||
2873 | |||
2874 | if (stream->iout != NULL) | ||
2875 | { | ||
2876 | if (stream->iout->code2 != 0) | ||
2877 | { | ||
2878 | if ((ret = xd3_emit_double (stream, stream->iout, inst, stream->iout->code2))) { return ret; } | ||
2879 | |||
2880 | xd3_iopt_free_nonadd (stream, stream->iout); | ||
2881 | xd3_iopt_free_nonadd (stream, inst); | ||
2882 | stream->iout = NULL; | ||
2883 | return 0; | ||
2884 | } | ||
2885 | else | ||
2886 | { | ||
2887 | if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; } | ||
2888 | |||
2889 | xd3_iopt_free_nonadd (stream, stream->iout); | ||
2890 | } | ||
2891 | } | ||
2892 | |||
2893 | stream->iout = inst; | ||
2894 | |||
2895 | return 0; | ||
2896 | } | ||
2897 | |||
2898 | /* This possibly encodes an add instruction, iadd, which must remain on the stack until | ||
2899 | * the following call to xd3_iopt_finish_encoding. */ | ||
2900 | static int | ||
2901 | xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd) | ||
2902 | { | ||
2903 | int ret; | ||
2904 | usize_t off = stream->unencoded_offset; | ||
2905 | |||
2906 | if (pos > off) | ||
2907 | { | ||
2908 | iadd->type = XD3_ADD; | ||
2909 | iadd->pos = off; | ||
2910 | iadd->size = pos - off; | ||
2911 | |||
2912 | if ((ret = xd3_iopt_finish_encoding (stream, iadd))) { return ret; } | ||
2913 | } | ||
2914 | |||
2915 | return 0; | ||
2916 | } | ||
2917 | |||
2918 | /* This function calls xd3_iopt_finish_encoding to finish encoding an instruction, and it | ||
2919 | * may also produce an add instruction for an unmatched region. */ | ||
2920 | static int | ||
2921 | xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst) | ||
2922 | { | ||
2923 | int ret; | ||
2924 | xd3_rinst iadd; | ||
2925 | |||
2926 | if ((ret = xd3_iopt_add (stream, inst->pos, & iadd))) { return ret; } | ||
2927 | |||
2928 | if ((ret = xd3_iopt_finish_encoding (stream, inst))) { return ret; } | ||
2929 | |||
2930 | return 0; | ||
2931 | } | ||
2932 | |||
2933 | /* Generates a final add instruction to encode the remaining input. */ | ||
2934 | static int | ||
2935 | xd3_iopt_add_finalize (xd3_stream *stream) | ||
2936 | { | ||
2937 | int ret; | ||
2938 | xd3_rinst iadd; | ||
2939 | |||
2940 | if ((ret = xd3_iopt_add (stream, stream->avail_in, & iadd))) { return ret; } | ||
2941 | |||
2942 | if (stream->iout) | ||
2943 | { | ||
2944 | if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; } | ||
2945 | |||
2946 | xd3_iopt_free_nonadd (stream, stream->iout); | ||
2947 | stream->iout = NULL; | ||
2948 | } | ||
2949 | |||
2950 | return 0; | ||
2951 | } | ||
2952 | |||
2953 | /* Compact the instruction buffer by choosing the best non-overlapping instructions when | ||
2954 | * lazy string-matching. There are no ADDs in the iopt buffer because those are | ||
2955 | * synthesized in xd3_iopt_add_encoding and during xd3_iopt_add_finalize. */ | ||
2956 | static int | ||
2957 | xd3_iopt_flush_instructions (xd3_stream *stream, int force) | ||
2958 | { | ||
2959 | xd3_rinst *r1 = xd3_rlist_front (& stream->iopt.used); | ||
2960 | xd3_rinst *r2; | ||
2961 | xd3_rinst *r3; | ||
2962 | usize_t r1end; | ||
2963 | usize_t r2end; | ||
2964 | usize_t r2off; | ||
2965 | usize_t r2moff; | ||
2966 | usize_t gap; | ||
2967 | usize_t flushed; | ||
2968 | int ret; | ||
2969 | |||
2970 | XD3_ASSERT (xd3_iopt_check (stream)); | ||
2971 | |||
2972 | /* Note: once tried to skip this step if it's possible to assert there are no | ||
2973 | * overlapping instructions. Doesn't work because xd3_opt_erase leaves overlapping | ||
2974 | * instructions. */ | ||
2975 | while (! xd3_rlist_end (& stream->iopt.used, r1) && | ||
2976 | ! xd3_rlist_end (& stream->iopt.used, r2 = xd3_rlist_next (r1))) | ||
2977 | { | ||
2978 | r1end = r1->pos + r1->size; | ||
2979 | |||
2980 | /* If the instructions do not overlap, continue. */ | ||
2981 | if (r1end <= r2->pos) | ||
2982 | { | ||
2983 | r1 = r2; | ||
2984 | continue; | ||
2985 | } | ||
2986 | |||
2987 | r2end = r2->pos + r2->size; | ||
2988 | |||
2989 | /* The min_match adjustments prevent this. */ | ||
2990 | XD3_ASSERT (r2end > (r1end + LEAST_MATCH_INCR)); | ||
2991 | |||
2992 | /* If r3 is available... */ | ||
2993 | if (! xd3_rlist_end (& stream->iopt.used, r3 = xd3_rlist_next (r2))) | ||
2994 | { | ||
2995 | /* If r3 starts before r1 finishes or just about, r2 is irrelevant */ | ||
2996 | if (r3->pos <= r1end + 1) | ||
2997 | { | ||
2998 | xd3_iopt_free (stream, r2); | ||
2999 | continue; | ||
3000 | } | ||
3001 | } | ||
3002 | else if (! force) | ||
3003 | { | ||
3004 | /* Unless force, end the loop when r3 is not available. */ | ||
3005 | break; | ||
3006 | } | ||
3007 | |||
3008 | r2off = r2->pos - r1->pos; | ||
3009 | r2moff = r2end - r1end; | ||
3010 | gap = r2end - r1->pos; | ||
3011 | |||
3012 | /* If the two matches overlap almost entirely, choose the better match and discard | ||
3013 | * the other. This heuristic is BLACK MAGIC. Havesomething better? */ | ||
3014 | if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2) | ||
3015 | { | ||
3016 | /* Only one match should be used, choose the longer one. */ | ||
3017 | if (r1->size < r2->size) | ||
3018 | { | ||
3019 | xd3_iopt_free (stream, r1); | ||
3020 | r1 = r2; | ||
3021 | } | ||
3022 | else | ||
3023 | { | ||
3024 | /* We are guaranteed that r1 does not overlap now, so advance past r2 */ | ||
3025 | r1 = xd3_iopt_free (stream, r2); | ||
3026 | } | ||
3027 | continue; | ||
3028 | } | ||
3029 | else | ||
3030 | { | ||
3031 | /* Shorten one of the instructions -- could be optimized based on the address | ||
3032 | * cache. */ | ||
3033 | usize_t average; | ||
3034 | usize_t newsize; | ||
3035 | usize_t adjust1; | ||
3036 | |||
3037 | XD3_ASSERT (r1end > r2->pos && r2end > r1->pos); | ||
3038 | |||
3039 | /* Try to balance the length of both instructions, but avoid making both longer | ||
3040 | * than MAX_MATCH_SPLIT . */ | ||
3041 | average = (gap) / 2; | ||
3042 | newsize = min (MAX_MATCH_SPLIT, gap - average); | ||
3043 | |||
3044 | /* Should be possible to simplify this code. */ | ||
3045 | if (newsize > r1->size) | ||
3046 | { | ||
3047 | /* shorten r2 */ | ||
3048 | adjust1 = r1end - r2->pos; | ||
3049 | } | ||
3050 | else if (newsize > r2->size) | ||
3051 | { | ||
3052 | /* shorten r1 */ | ||
3053 | adjust1 = r1end - r2->pos; | ||
3054 | |||
3055 | XD3_ASSERT (r1->size > adjust1); | ||
3056 | |||
3057 | r1->size -= adjust1; | ||
3058 | |||
3059 | /* don't shorten r2 */ | ||
3060 | adjust1 = 0; | ||
3061 | } | ||
3062 | else | ||
3063 | { | ||
3064 | /* shorten r1 */ | ||
3065 | adjust1 = r1->size - newsize; | ||
3066 | |||
3067 | if (r2->pos > r1end - adjust1) | ||
3068 | { | ||
3069 | adjust1 -= r2->pos - (r1end - adjust1); | ||
3070 | } | ||
3071 | |||
3072 | XD3_ASSERT (r1->size > adjust1); | ||
3073 | |||
3074 | r1->size -= adjust1; | ||
3075 | |||
3076 | /* shorten r2 */ | ||
3077 | XD3_ASSERT (r1->pos + r1->size >= r2->pos); | ||
3078 | |||
3079 | adjust1 = r1->pos + r1->size - r2->pos; | ||
3080 | } | ||
3081 | |||
3082 | /* Fallthrough above if-else, shorten r2 */ | ||
3083 | XD3_ASSERT (r2->size > adjust1); | ||
3084 | |||
3085 | r2->size -= adjust1; | ||
3086 | r2->pos += adjust1; | ||
3087 | r2->addr += adjust1; | ||
3088 | |||
3089 | XD3_ASSERT (r1->size >= MIN_MATCH); | ||
3090 | XD3_ASSERT (r2->size >= MIN_MATCH); | ||
3091 | |||
3092 | r1 = r2; | ||
3093 | } | ||
3094 | } | ||
3095 | |||
3096 | XD3_ASSERT (xd3_iopt_check (stream)); | ||
3097 | |||
3098 | /* If forcing, pick instructions until the list is empty, otherwise this empties 50% of | ||
3099 | * the queue. */ | ||
3100 | for (flushed = 0; ! xd3_rlist_empty (& stream->iopt.used); ) | ||
3101 | { | ||
3102 | xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt.used); | ||
3103 | if ((ret = xd3_iopt_add_encoding (stream, renc))) | ||
3104 | { | ||
3105 | return ret; | ||
3106 | } | ||
3107 | |||
3108 | if (! force) | ||
3109 | { | ||
3110 | if (++flushed > stream->iopt_size / 2) | ||
3111 | { | ||
3112 | break; | ||
3113 | } | ||
3114 | |||
3115 | /* If there are only two instructions remaining, break, because they were | ||
3116 | * not optimized. This means there were more than 50% eliminated by the | ||
3117 | * loop above. */ | ||
3118 | r1 = xd3_rlist_front (& stream->iopt.used); | ||
3119 | if (xd3_rlist_end(& stream->iopt.used, r1) || | ||
3120 | xd3_rlist_end(& stream->iopt.used, r2 = xd3_rlist_next (r1)) || | ||
3121 | xd3_rlist_end(& stream->iopt.used, r3 = xd3_rlist_next (r2))) | ||
3122 | { | ||
3123 | break; | ||
3124 | } | ||
3125 | } | ||
3126 | } | ||
3127 | |||
3128 | XD3_ASSERT (xd3_iopt_check (stream)); | ||
3129 | |||
3130 | XD3_ASSERT (!force || xd3_rlist_length (& stream->iopt.used) == 0); | ||
3131 | |||
3132 | return 0; | ||
3133 | } | ||
3134 | |||
3135 | static int | ||
3136 | xd3_iopt_get_slot (xd3_stream *stream, xd3_rinst** iptr) | ||
3137 | { | ||
3138 | xd3_rinst *i; | ||
3139 | int ret; | ||
3140 | |||
3141 | if (xd3_rlist_empty (& stream->iopt.free)) | ||
3142 | { | ||
3143 | if ((ret = xd3_iopt_flush_instructions (stream, 0))) { return ret; } | ||
3144 | |||
3145 | XD3_ASSERT (! xd3_rlist_empty (& stream->iopt.free)); | ||
3146 | } | ||
3147 | |||
3148 | i = xd3_rlist_pop_back (& stream->iopt.free); | ||
3149 | |||
3150 | xd3_rlist_push_back (& stream->iopt.used, i); | ||
3151 | |||
3152 | (*iptr) = i; | ||
3153 | |||
3154 | return 0; | ||
3155 | } | ||
3156 | |||
3157 | /* A copy is about to be emitted that extends backwards to POS, therefore it may | ||
3158 | * completely cover some existing instructions in the buffer. If an instruction is | ||
3159 | * completely covered by this new match, erase it. If the new instruction is covered by | ||
3160 | * the previous one, return 1 to skip it. */ | ||
3161 | static void | ||
3162 | xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size) | ||
3163 | { | ||
3164 | while (! xd3_rlist_empty (& stream->iopt.used)) | ||
3165 | { | ||
3166 | xd3_rinst *r = xd3_rlist_back (& stream->iopt.used); | ||
3167 | |||
3168 | /* Verify that greedy is working. The previous instruction should end before the | ||
3169 | * new one begins. */ | ||
3170 | XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos)); | ||
3171 | /* Verify that min_match is working. The previous instruction should end before the | ||
3172 | * new one ends. */ | ||
3173 | XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size)); | ||
3174 | |||
3175 | /* See if the last instruction starts before the new instruction. If so, there is | ||
3176 | * nothing to erase. */ | ||
3177 | if (r->pos < pos) | ||
3178 | { | ||
3179 | return; | ||
3180 | } | ||
3181 | |||
3182 | /* Otherwise, the new instruction covers the old one, delete it and repeat. */ | ||
3183 | xd3_rlist_remove (r); | ||
3184 | xd3_rlist_push_back (& stream->iopt.free, r); | ||
3185 | } | ||
3186 | } | ||
3187 | |||
3188 | /* This function tells the last matched input position. */ | ||
3189 | static usize_t | ||
3190 | xd3_iopt_last_matched (xd3_stream *stream) | ||
3191 | { | ||
3192 | xd3_rinst *r; | ||
3193 | |||
3194 | if (xd3_rlist_empty (& stream->iopt.used)) | ||
3195 | { | ||
3196 | return 0; | ||
3197 | } | ||
3198 | |||
3199 | r = xd3_rlist_back (& stream->iopt.used); | ||
3200 | |||
3201 | return r->pos + r->size; | ||
3202 | } | ||
3203 | |||
3204 | /****************************************************************************************** | ||
3205 | Emit routines | ||
3206 | ******************************************************************************************/ | ||
3207 | |||
3208 | static int | ||
3209 | xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code) | ||
3210 | { | ||
3211 | int has_size = stream->code_table[code].size1 == 0; | ||
3212 | int ret; | ||
3213 | |||
3214 | IF_DEBUG1 (P(RINT "[emit1] %u %s (%u) code %u\n", | ||
3215 | single->pos, | ||
3216 | xd3_rtype_to_string (single->type, 0), | ||
3217 | single->size, | ||
3218 | code)); | ||
3219 | |||
3220 | if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) { return ret; } | ||
3221 | |||
3222 | if (has_size) | ||
3223 | { | ||
3224 | if ((ret = xd3_emit_size (stream, & INST_TAIL (stream), single->size))) { return ret; } | ||
3225 | |||
3226 | IF_DEBUG (xd3_count_size (stream, single->size)); | ||
3227 | } | ||
3228 | |||
3229 | IF_DEBUG (xd3_count_inst (stream, code)); | ||
3230 | |||
3231 | return 0; | ||
3232 | } | ||
3233 | |||
3234 | static int | ||
3235 | xd3_emit_double (xd3_stream *stream, xd3_rinst *first, xd3_rinst *second, uint code) | ||
3236 | { | ||
3237 | int ret; | ||
3238 | |||
3239 | /* All double instructions use fixed sizes, so all we need to do is output the | ||
3240 | * instruction code, no sizes. */ | ||
3241 | XD3_ASSERT (stream->code_table[code].size1 != 0 && | ||
3242 | stream->code_table[code].size2 != 0); | ||
3243 | |||
3244 | if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) { return ret; } | ||
3245 | |||
3246 | IF_DEBUG1 (P(RINT "[emit2]: %u %s (%u) %s (%u) code %u\n", | ||
3247 | first->pos, | ||
3248 | xd3_rtype_to_string (first->type, 0), | ||
3249 | first->size, | ||
3250 | xd3_rtype_to_string (second->type, 0), | ||
3251 | second->size, | ||
3252 | code)); | ||
3253 | |||
3254 | IF_DEBUG (xd3_count_inst (stream, code)); | ||
3255 | |||
3256 | return 0; | ||
3257 | } | ||
3258 | |||
3259 | /* This enters a potential run instruction into the iopt buffer. The position argument is | ||
3260 | * relative to the target window. */ | ||
3261 | static INLINE int | ||
3262 | xd3_emit_run (xd3_stream *stream, usize_t pos, usize_t size, uint8_t run_c) | ||
3263 | { | ||
3264 | xd3_rinst* ri; | ||
3265 | int ret; | ||
3266 | |||
3267 | XD3_ASSERT (pos + size <= stream->avail_in); | ||
3268 | |||
3269 | if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; } | ||
3270 | |||
3271 | ri->type = XD3_RUN; | ||
3272 | ri->xtra = run_c; | ||
3273 | ri->pos = pos; | ||
3274 | ri->size = size; | ||
3275 | |||
3276 | return 0; | ||
3277 | } | ||
3278 | |||
3279 | /* This enters a potential copy instruction into the iopt buffer. The position argument | ||
3280 | * is relative to the target window.. */ | ||
3281 | static INLINE int | ||
3282 | xd3_found_match (xd3_stream *stream, usize_t pos, usize_t size, xoff_t addr, int is_source) | ||
3283 | { | ||
3284 | xd3_rinst* ri; | ||
3285 | int ret; | ||
3286 | |||
3287 | XD3_ASSERT (pos + size <= stream->avail_in); | ||
3288 | |||
3289 | if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; } | ||
3290 | |||
3291 | ri->type = XD3_CPY; | ||
3292 | ri->xtra = is_source; | ||
3293 | ri->pos = pos; | ||
3294 | ri->size = size; | ||
3295 | ri->addr = addr; | ||
3296 | |||
3297 | return 0; | ||
3298 | } | ||
3299 | |||
3300 | static int | ||
3301 | xd3_emit_hdr (xd3_stream *stream) | ||
3302 | { | ||
3303 | int ret; | ||
3304 | int use_secondary = stream->sec_type != NULL; | ||
3305 | int use_adler32 = stream->flags & XD3_ADLER32; | ||
3306 | int vcd_source = xd3_encoder_used_source (stream); | ||
3307 | uint win_ind = 0; | ||
3308 | uint del_ind = 0; | ||
3309 | usize_t enc_len; | ||
3310 | usize_t tgt_len; | ||
3311 | usize_t data_len; | ||
3312 | usize_t inst_len; | ||
3313 | usize_t addr_len; | ||
3314 | |||
3315 | XD3_ASSERT (stream->n_emit == stream->avail_in); | ||
3316 | |||
3317 | if (stream->current_window == 0) | ||
3318 | { | ||
3319 | uint hdr_ind = 0; | ||
3320 | int use_appheader = stream->enc_appheader != NULL; | ||
3321 | int use_gencodetbl = GENERIC_ENCODE_TABLES && (stream->code_table_desc != & __rfc3284_code_table_desc); | ||
3322 | |||
3323 | if (use_secondary) { hdr_ind |= VCD_SECONDARY; } | ||
3324 | if (use_gencodetbl) { hdr_ind |= VCD_CODETABLE; } | ||
3325 | if (use_appheader) { hdr_ind |= VCD_APPHEADER; } | ||
3326 | |||
3327 | if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC1)) != 0 || | ||
3328 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC2)) != 0 || | ||
3329 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC3)) != 0 || | ||
3330 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_VERSION)) != 0 || | ||
3331 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), hdr_ind)) != 0) | ||
3332 | { | ||
3333 | return ret; | ||
3334 | } | ||
3335 | |||
3336 | /* Secondary compressor ID */ | ||
3337 | #if SECONDARY_ANY | ||
3338 | if (use_secondary && (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->sec_type->id))) { return ret; } | ||
3339 | #endif | ||
3340 | |||
3341 | /* Compressed code table */ | ||
3342 | if (use_gencodetbl) | ||
3343 | { | ||
3344 | usize_t code_table_size; | ||
3345 | const uint8_t *code_table_data; | ||
3346 | |||
3347 | if ((ret = stream->comp_table_func (stream, & code_table_data, & code_table_size))) { return ret; } | ||
3348 | |||
3349 | if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), code_table_size + 2)) || | ||
3350 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->code_table_desc->near_modes)) || | ||
3351 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->code_table_desc->same_modes)) || | ||
3352 | (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), code_table_data, code_table_size))) { return ret; } | ||
3353 | } | ||
3354 | |||
3355 | /* Application header */ | ||
3356 | if (use_appheader) | ||
3357 | { | ||
3358 | if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->enc_appheadsz)) || | ||
3359 | (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), stream->enc_appheader, stream->enc_appheadsz))) | ||
3360 | { | ||
3361 | return ret; | ||
3362 | } | ||
3363 | } | ||
3364 | } | ||
3365 | |||
3366 | /* try to compress this window */ | ||
3367 | #if SECONDARY_ANY | ||
3368 | if (use_secondary) | ||
3369 | { | ||
3370 | int data_sec = 0; | ||
3371 | int inst_sec = 0; | ||
3372 | int addr_sec = 0; | ||
3373 | |||
3374 | # define ENCODE_SECONDARY_SECTION(UPPER,LOWER) \ | ||
3375 | ((stream->flags & XD3_SEC_NO ## UPPER) == 0 && \ | ||
3376 | (ret = xd3_encode_secondary (stream, & UPPER ## _HEAD (stream), & UPPER ## _TAIL (stream), \ | ||
3377 | & xd3_sec_ ## LOWER (stream), \ | ||
3378 | & stream->sec_ ## LOWER, & LOWER ## _sec))) | ||
3379 | |||
3380 | if (ENCODE_SECONDARY_SECTION (DATA, data) || | ||
3381 | ENCODE_SECONDARY_SECTION (INST, inst) || | ||
3382 | ENCODE_SECONDARY_SECTION (ADDR, addr)) | ||
3383 | { | ||
3384 | return ret; | ||
3385 | } | ||
3386 | |||
3387 | del_ind |= (data_sec ? VCD_DATACOMP : 0); | ||
3388 | del_ind |= (inst_sec ? VCD_INSTCOMP : 0); | ||
3389 | del_ind |= (addr_sec ? VCD_ADDRCOMP : 0); | ||
3390 | } | ||
3391 | #endif | ||
3392 | |||
3393 | /* if (vcd_target) { win_ind |= VCD_TARGET; } */ | ||
3394 | if (vcd_source) { win_ind |= VCD_SOURCE; } | ||
3395 | if (use_adler32) { win_ind |= VCD_ADLER32; } | ||
3396 | |||
3397 | /* window indicator */ | ||
3398 | if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), win_ind))) { return ret; } | ||
3399 | |||
3400 | /* source window */ | ||
3401 | if (vcd_source) | ||
3402 | { | ||
3403 | /* or (vcd_target) { ... } */ | ||
3404 | if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->src->srclen)) || | ||
3405 | (ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->src->srcbase))) { return ret; } | ||
3406 | } | ||
3407 | |||
3408 | tgt_len = stream->avail_in; | ||
3409 | data_len = xd3_sizeof_output (DATA_HEAD (stream)); | ||
3410 | inst_len = xd3_sizeof_output (INST_HEAD (stream)); | ||
3411 | addr_len = xd3_sizeof_output (ADDR_HEAD (stream)); | ||
3412 | |||
3413 | /* The enc_len field is redundent... doh! */ | ||
3414 | enc_len = (1 + (xd3_sizeof_size (tgt_len) + | ||
3415 | xd3_sizeof_size (data_len) + | ||
3416 | xd3_sizeof_size (inst_len) + | ||
3417 | xd3_sizeof_size (addr_len)) + | ||
3418 | data_len + | ||
3419 | inst_len + | ||
3420 | addr_len + | ||
3421 | (use_adler32 ? 4 : 0)); | ||
3422 | |||
3423 | if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), enc_len)) || | ||
3424 | (ret = xd3_emit_size (stream, & HDR_TAIL (stream), tgt_len)) || | ||
3425 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), del_ind)) || | ||
3426 | (ret = xd3_emit_size (stream, & HDR_TAIL (stream), data_len)) || | ||
3427 | (ret = xd3_emit_size (stream, & HDR_TAIL (stream), inst_len)) || | ||
3428 | (ret = xd3_emit_size (stream, & HDR_TAIL (stream), addr_len))) | ||
3429 | { | ||
3430 | return ret; | ||
3431 | } | ||
3432 | |||
3433 | if (use_adler32) | ||
3434 | { | ||
3435 | uint8_t send[4]; | ||
3436 | uint32_t a32 = adler32 (1L, stream->next_in, stream->avail_in); | ||
3437 | |||
3438 | send[0] = (a32 >> 24); | ||
3439 | send[1] = (a32 >> 16); | ||
3440 | send[2] = (a32 >> 8); | ||
3441 | send[3] = (a32 & 0xff); | ||
3442 | |||
3443 | if ((ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), send, 4))) { return ret; } | ||
3444 | } | ||
3445 | |||
3446 | return 0; | ||
3447 | } | ||
3448 | |||
3449 | /****************************************************************************************** | ||
3450 | Encode routines | ||
3451 | ******************************************************************************************/ | ||
3452 | |||
3453 | static int | ||
3454 | xd3_encode_buffer_leftover (xd3_stream *stream) | ||
3455 | { | ||
3456 | usize_t take; | ||
3457 | usize_t room; | ||
3458 | |||
3459 | /* Allocate the buffer. */ | ||
3460 | if (stream->buf_in == NULL && (stream->buf_in = xd3_alloc (stream, stream->winsize, 1)) == NULL) | ||
3461 | { | ||
3462 | return ENOMEM; | ||
3463 | } | ||
3464 | |||
3465 | /* Take leftover input first. */ | ||
3466 | if (stream->buf_leftover != NULL) | ||
3467 | { | ||
3468 | XD3_ASSERT (stream->buf_avail == 0); | ||
3469 | XD3_ASSERT (stream->buf_leftavail < stream->winsize); | ||
3470 | |||
3471 | IF_DEBUG1 (P(RINT "[leftover] previous %u avail %u\n", stream->buf_leftavail, stream->avail_in)); | ||
3472 | |||
3473 | memcpy (stream->buf_in, stream->buf_leftover, stream->buf_leftavail); | ||
3474 | |||
3475 | stream->buf_leftover = NULL; | ||
3476 | stream->buf_avail = stream->buf_leftavail; | ||
3477 | } | ||
3478 | |||
3479 | /* Copy into the buffer. */ | ||
3480 | room = stream->winsize - stream->buf_avail; | ||
3481 | take = min (room, stream->avail_in); | ||
3482 | |||
3483 | memcpy (stream->buf_in + stream->buf_avail, stream->next_in, take); | ||
3484 | |||
3485 | stream->buf_avail += take; | ||
3486 | |||
3487 | if (take < stream->avail_in) | ||
3488 | { | ||
3489 | /* Buffer is full */ | ||
3490 | stream->buf_leftover = stream->next_in + take; | ||
3491 | stream->buf_leftavail = stream->avail_in - take; | ||
3492 | |||
3493 | IF_DEBUG1 (P(RINT "[leftover] take %u remaining %u\n", take, stream->buf_leftavail)); | ||
3494 | } | ||
3495 | else if ((stream->buf_avail < stream->winsize) && !(stream->flags & XD3_FLUSH)) | ||
3496 | { | ||
3497 | /* Buffer has space */ | ||
3498 | IF_DEBUG1 (P(RINT "[leftover] %u emptied\n", take)); | ||
3499 | return XD3_INPUT; | ||
3500 | } | ||
3501 | |||
3502 | /* Use the buffer: */ | ||
3503 | stream->next_in = stream->buf_in; | ||
3504 | stream->avail_in = stream->buf_avail; | ||
3505 | stream->buf_avail = 0; | ||
3506 | |||
3507 | return 0; | ||
3508 | } | ||
3509 | |||
3510 | /* This function allocates all memory initially used by the encoder. */ | ||
3511 | static int | ||
3512 | xd3_encode_init (xd3_stream *stream) | ||
3513 | { | ||
3514 | int i; | ||
3515 | int large_comp = (stream->src != NULL); | ||
3516 | int small_comp = ! (stream->flags & XD3_NOCOMPRESS); | ||
3517 | /*int small_prev = (stream->small_chain > 1);*/ | ||
3518 | int space_fact = (large_comp + small_comp); | ||
3519 | int memsize = stream->memsize; | ||
3520 | |||
3521 | /* Memory allocations for checksum tables are delayed until xd3_string_match_init in the | ||
3522 | * first call to string_match--that way identical or short inputs require no table | ||
3523 | * allocation. */ | ||
3524 | if (large_comp) | ||
3525 | { | ||
3526 | xd3_size_hashtable (stream, memsize / space_fact, & stream->large_hash); | ||
3527 | } | ||
3528 | |||
3529 | if (small_comp) | ||
3530 | { | ||
3531 | xd3_size_hashtable (stream, memsize / space_fact, & stream->small_hash); | ||
3532 | } | ||
3533 | |||
3534 | for (i = 0; i < ENC_SECTS; i += 1) | ||
3535 | { | ||
3536 | if ((stream->enc_heads[i] = stream->enc_tails[i] = | ||
3537 | xd3_alloc_output (stream, NULL)) == NULL) | ||
3538 | { | ||
3539 | goto fail; | ||
3540 | } | ||
3541 | } | ||
3542 | |||
3543 | /* iopt buffer */ | ||
3544 | xd3_rlist_init (& stream->iopt.used); | ||
3545 | xd3_rlist_init (& stream->iopt.free); | ||
3546 | |||
3547 | if ((stream->iopt.buffer = xd3_alloc (stream, sizeof (xd3_rinst), stream->iopt_size)) == NULL) | ||
3548 | { | ||
3549 | goto fail; | ||
3550 | } | ||
3551 | |||
3552 | for (i = 0; i < stream->iopt_size; i += 1) | ||
3553 | { | ||
3554 | xd3_rlist_push_back (& stream->iopt.free, & stream->iopt.buffer[i]); | ||
3555 | } | ||
3556 | |||
3557 | XD3_ASSERT (xd3_rlist_length (& stream->iopt.free) == stream->iopt_size); | ||
3558 | XD3_ASSERT (xd3_rlist_length (& stream->iopt.used) == 0); | ||
3559 | |||
3560 | /* address cache, code table */ | ||
3561 | stream->acache.s_near = stream->code_table_desc->near_modes; | ||
3562 | stream->acache.s_same = stream->code_table_desc->same_modes; | ||
3563 | stream->code_table = stream->code_table_func (); | ||
3564 | |||
3565 | return xd3_alloc_cache (stream); | ||
3566 | |||
3567 | fail: | ||
3568 | |||
3569 | return ENOMEM; | ||
3570 | } | ||
3571 | |||
3572 | #if XD3_DEBUG | ||
3573 | static int | ||
3574 | xd3_check_sprevlist (xd3_stream *stream) | ||
3575 | { | ||
3576 | int i; | ||
3577 | for (i = 0; i < stream->sprevsz; i += 1) | ||
3578 | { | ||
3579 | xd3_slist *l = & stream->small_prev[i]; | ||
3580 | |||
3581 | XD3_ASSERT (l->prev->next == l); | ||
3582 | XD3_ASSERT (l->next->prev == l); | ||
3583 | } | ||
3584 | return 1; | ||
3585 | } | ||
3586 | #endif | ||
3587 | |||
3588 | /* Called after the ENC_POSTOUT state, this puts the output buffers back into separate | ||
3589 | * lists and re-initializes some variables. (The output lists were spliced together | ||
3590 | * during the ENC_FLUSH state.) */ | ||
3591 | static void | ||
3592 | xd3_encode_reset (xd3_stream *stream) | ||
3593 | { | ||
3594 | int i; | ||
3595 | xd3_output *olist; | ||
3596 | |||
3597 | XD3_ASSERT (stream->small_prev == NULL || xd3_check_sprevlist (stream)); | ||
3598 | |||
3599 | IF_DEBUG (stream->n_emit = 0); | ||
3600 | stream->avail_in = 0; | ||
3601 | stream->small_reset = 1; | ||
3602 | |||
3603 | if (stream->src != NULL) | ||
3604 | { | ||
3605 | stream->src->srcbase = 0; | ||
3606 | stream->src->srclen = 0; | ||
3607 | stream->srcwin_decided = 0; | ||
3608 | stream->match_minaddr = 0; | ||
3609 | stream->match_maxaddr = 0; | ||
3610 | stream->taroff = 0; | ||
3611 | } | ||
3612 | |||
3613 | /* Reset output chains. */ | ||
3614 | olist = stream->enc_heads[0]; | ||
3615 | |||
3616 | for (i = 0; i < ENC_SECTS; i += 1) | ||
3617 | { | ||
3618 | XD3_ASSERT (olist != NULL); | ||
3619 | |||
3620 | stream->enc_heads[i] = olist; | ||
3621 | stream->enc_tails[i] = olist; | ||
3622 | olist = olist->next_page; | ||
3623 | |||
3624 | stream->enc_heads[i]->next = 0; | ||
3625 | stream->enc_heads[i]->next_page = NULL; | ||
3626 | |||
3627 | stream->enc_tails[i]->next_page = NULL; | ||
3628 | stream->enc_tails[i] = stream->enc_heads[i]; | ||
3629 | } | ||
3630 | |||
3631 | xd3_freelist_output (stream, olist); | ||
3632 | } | ||
3633 | |||
3634 | /* The main encoding routine. */ | ||
3635 | int | ||
3636 | xd3_encode_input (xd3_stream *stream) | ||
3637 | { | ||
3638 | int ret, i; | ||
3639 | |||
3640 | if (stream->dec_state != 0) | ||
3641 | { | ||
3642 | stream->msg = "encoder/decoder transition"; | ||
3643 | return EINVAL; | ||
3644 | } | ||
3645 | |||
3646 | switch (stream->enc_state) | ||
3647 | { | ||
3648 | case ENC_INIT: | ||
3649 | /* Only reached on first time through: memory setup. */ | ||
3650 | if ((ret = xd3_encode_init (stream))) { return ret; } | ||
3651 | |||
3652 | stream->enc_state = ENC_INPUT; | ||
3653 | |||
3654 | case ENC_INPUT: | ||
3655 | |||
3656 | /* If there is no input yet, just return. This checks for next_in == NULL, not | ||
3657 | * avail_in == 0 since zero bytes is a valid input. There is an assertion in | ||
3658 | * xd3_avail_input() that next_in != NULL for this reason. By returning right away | ||
3659 | * we avoid creating an input buffer before the caller has supplied its first data. | ||
3660 | * It is possible for xd3_avail_input to be called both before and after the first | ||
3661 | * call to xd3_encode_input(). */ | ||
3662 | if (stream->next_in == NULL) | ||
3663 | { | ||
3664 | return XD3_INPUT; | ||
3665 | } | ||
3666 | |||
3667 | enc_flush: | ||
3668 | /* See if we should buffer the input: either if there is already a leftover buffer, | ||
3669 | * or if the input is short of winsize without flush. The label at this point is | ||
3670 | * reached by a goto below, when there is leftover input after postout. */ | ||
3671 | if ((stream->buf_leftover != NULL) || | ||
3672 | (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH))) | ||
3673 | { | ||
3674 | if ((ret = xd3_encode_buffer_leftover (stream))) { return ret; } | ||
3675 | } | ||
3676 | |||
3677 | /* Initalize the address cache before each window. */ | ||
3678 | xd3_init_cache (& stream->acache); | ||
3679 | |||
3680 | pos_in = 0; | ||
3681 | min_match = MIN_MATCH; | ||
3682 | stream->unencoded_offset = 0; | ||
3683 | |||
3684 | stream->enc_state = ENC_SEARCH; | ||
3685 | |||
3686 | IF_DEBUG1 (P(RINT "[input window:%"Q"u] input bytes %u offset %"Q"u\n", | ||
3687 | stream->current_window, stream->avail_in, stream->total_in)); | ||
3688 | |||
3689 | return XD3_WINSTART; | ||
3690 | |||
3691 | case ENC_SEARCH: | ||
3692 | |||
3693 | /* Reentrant matching. */ | ||
3694 | if (stream->src != NULL) | ||
3695 | { | ||
3696 | switch (stream->match_state) | ||
3697 | { | ||
3698 | case MATCH_TARGET: | ||
3699 | /* Try matching forward at the start of the target. This is entered the | ||
3700 | * first time through, to check for a perfect match, and whenever there is a | ||
3701 | * source match that extends to the end of the previous window. The | ||
3702 | * match_srcpos field is initially zero and later set during | ||
3703 | * xd3_source_extend_match. */ | ||
3704 | if (stream->avail_in > 0) { | ||
3705 | /* This call can't fail because the source window is unrestricted. */ | ||
3706 | ret = xd3_source_match_setup (stream, stream->match_srcpos); | ||
3707 | XD3_ASSERT (ret == 0); | ||
3708 | stream->match_state = MATCH_FORWARD; | ||
3709 | } else { | ||
3710 | stream->match_state = MATCH_SEARCHING; | ||
3711 | } | ||
3712 | XD3_ASSERT (stream->match_fwd == 0); | ||
3713 | |||
3714 | case MATCH_FORWARD: | ||
3715 | case MATCH_BACKWARD: | ||
3716 | if (stream->avail_in != 0) | ||
3717 | { | ||
3718 | if ((ret = xd3_source_extend_match (stream)) != 0) | ||
3719 | { | ||
3720 | return ret; | ||
3721 | } | ||
3722 | |||
3723 | stream->input_position += stream->match_fwd; | ||
3724 | } | ||
3725 | |||
3726 | case MATCH_SEARCHING: | ||
3727 | /* Continue string matching. (It's possible that the initial match | ||
3728 | * continued through the entire input, in which case we're still in | ||
3729 | * MATCH_FORWARD and should remain so for the next input window.) */ | ||
3730 | break; | ||
3731 | } | ||
3732 | } | ||
3733 | |||
3734 | /* String matching... */ | ||
3735 | if (stream->avail_in != 0 && | ||
3736 | (ret = stream->string_match (stream))) | ||
3737 | { | ||
3738 | return ret; | ||
3739 | } | ||
3740 | |||
3741 | /* Flush the instrution buffer, then possibly add one more instruction, then emit | ||
3742 | * the header. */ | ||
3743 | stream->enc_state = ENC_FLUSH; | ||
3744 | if ((ret = xd3_iopt_flush_instructions (stream, 1)) || | ||
3745 | (ret = xd3_iopt_add_finalize (stream)) || | ||
3746 | (ret = xd3_emit_hdr (stream))) | ||
3747 | { | ||
3748 | return ret; | ||
3749 | } | ||
3750 | |||
3751 | /* Begin output. */ | ||
3752 | stream->enc_current = HDR_HEAD (stream); | ||
3753 | |||
3754 | /* Chain all the outputs together. After doing this, it looks as if there is only | ||
3755 | * one section. The other enc_heads are set to NULL to avoid freeing them more than | ||
3756 | * once. */ | ||
3757 | for (i = 1; i < ENC_SECTS; i += 1) | ||
3758 | { | ||
3759 | stream->enc_tails[i-1]->next_page = stream->enc_heads[i]; | ||
3760 | stream->enc_heads[i] = NULL; | ||
3761 | } | ||
3762 | |||
3763 | enc_output: | ||
3764 | |||
3765 | stream->enc_state = ENC_POSTOUT; | ||
3766 | stream->next_out = stream->enc_current->base; | ||
3767 | stream->avail_out = stream->enc_current->next; | ||
3768 | stream->total_out += (xoff_t) stream->avail_out; | ||
3769 | |||
3770 | /* If there is any output in this buffer, return it, otherwise fall through to | ||
3771 | * handle the next buffer or finish the window after all buffers have been | ||
3772 | * output. */ | ||
3773 | if (stream->avail_out > 0) | ||
3774 | { | ||
3775 | /* This is the only place xd3_encode returns XD3_OUTPUT */ | ||
3776 | return XD3_OUTPUT; | ||
3777 | } | ||
3778 | |||
3779 | case ENC_POSTOUT: | ||
3780 | |||
3781 | if (stream->avail_out != 0) | ||
3782 | { | ||
3783 | stream->msg = "missed call to consume output"; | ||
3784 | return EINVAL; | ||
3785 | } | ||
3786 | |||
3787 | /* Continue outputting one buffer at a time, until the next is NULL. */ | ||
3788 | if ((stream->enc_current = stream->enc_current->next_page) != NULL) | ||
3789 | { | ||
3790 | goto enc_output; | ||
3791 | } | ||
3792 | |||
3793 | stream->total_in += (xoff_t) stream->avail_in; | ||
3794 | stream->enc_state = ENC_POSTWIN; | ||
3795 | |||
3796 | return XD3_WINFINISH; | ||
3797 | |||
3798 | case ENC_POSTWIN: | ||
3799 | |||
3800 | xd3_encode_reset (stream); | ||
3801 | |||
3802 | stream->current_window += 1; | ||
3803 | stream->enc_state = ENC_INPUT; | ||
3804 | |||
3805 | /* If there is leftover input to flush, repeat. */ | ||
3806 | if ((stream->buf_leftover != NULL) && (stream->flags & XD3_FLUSH)) | ||
3807 | { | ||
3808 | goto enc_flush; | ||
3809 | } | ||
3810 | |||
3811 | /* Ready for more input. */ | ||
3812 | return XD3_INPUT; | ||
3813 | |||
3814 | default: | ||
3815 | stream->msg = "invalid state"; | ||
3816 | return EINVAL; | ||
3817 | } | ||
3818 | } | ||
3819 | #endif /* XD3_ENCODER */ | ||
3820 | |||
3821 | /****************************************************************************************** | ||
3822 | Client convenience functions | ||
3823 | ******************************************************************************************/ | ||
3824 | |||
3825 | /* This function invokes either encode or decode to and from in-memory arrays. The output array | ||
3826 | * must be large enough to hold the output or else ENOSPC is returned. */ | ||
3827 | static int | ||
3828 | xd3_process_completely (xd3_stream *stream, | ||
3829 | int (*func) (xd3_stream *), | ||
3830 | int close_stream, | ||
3831 | const uint8_t *input, | ||
3832 | usize_t input_size, | ||
3833 | uint8_t *output, | ||
3834 | usize_t *output_size, | ||
3835 | usize_t avail_size) | ||
3836 | { | ||
3837 | (*output_size) = 0; | ||
3838 | |||
3839 | stream->flags |= XD3_FLUSH; | ||
3840 | |||
3841 | xd3_avail_input (stream, input, input_size); | ||
3842 | |||
3843 | for (;;) | ||
3844 | { | ||
3845 | int ret; | ||
3846 | switch((ret = func (stream))) | ||
3847 | { | ||
3848 | case XD3_OUTPUT: { /* memcpy below */ break; } | ||
3849 | case XD3_INPUT: { /* this means EOF */ goto done; } | ||
3850 | case XD3_GOTHEADER: { /* ignore */ continue; } | ||
3851 | case XD3_WINSTART: { /* ignore */ continue; } | ||
3852 | case XD3_WINFINISH: { /* ignore */ continue; } | ||
3853 | case XD3_GETSRCBLK: | ||
3854 | { | ||
3855 | stream->msg = "stream requires source input"; | ||
3856 | return EINVAL; | ||
3857 | } | ||
3858 | case 0: /* there is no plain "success" return for xd3_encode/decode */ | ||
3859 | XD3_ASSERT (ret != 0); | ||
3860 | default: | ||
3861 | return ret; | ||
3862 | } | ||
3863 | |||
3864 | if (*output_size + stream->avail_out > avail_size) | ||
3865 | { | ||
3866 | stream->msg = "insufficient output space"; | ||
3867 | return ENOSPC; | ||
3868 | } | ||
3869 | |||
3870 | memcpy (output + *output_size, stream->next_out, stream->avail_out); | ||
3871 | |||
3872 | *output_size += stream->avail_out; | ||
3873 | |||
3874 | xd3_consume_output (stream); | ||
3875 | } | ||
3876 | done: | ||
3877 | return (close_stream == 0) ? 0 : xd3_close_stream (stream); | ||
3878 | } | ||
3879 | |||
3880 | int | ||
3881 | xd3_decode_completely (xd3_stream *stream, | ||
3882 | const uint8_t *input, | ||
3883 | usize_t input_size, | ||
3884 | uint8_t *output, | ||
3885 | usize_t *output_size, | ||
3886 | usize_t avail_size) | ||
3887 | { | ||
3888 | return xd3_process_completely (stream, & xd3_decode_input, 1, | ||
3889 | input, input_size, | ||
3890 | output, output_size, avail_size); | ||
3891 | } | ||
3892 | |||
3893 | #if XD3_ENCODER | ||
3894 | int | ||
3895 | xd3_encode_completely (xd3_stream *stream, | ||
3896 | const uint8_t *input, | ||
3897 | usize_t input_size, | ||
3898 | uint8_t *output, | ||
3899 | usize_t *output_size, | ||
3900 | usize_t avail_size) | ||
3901 | { | ||
3902 | return xd3_process_completely (stream, & xd3_encode_input, 1, | ||
3903 | input, input_size, | ||
3904 | output, output_size, avail_size); | ||
3905 | } | ||
3906 | #endif | ||
3907 | |||
3908 | /****************************************************************************************** | ||
3909 | DECODE stuff | ||
3910 | ******************************************************************************************/ | ||
3911 | |||
3912 | /* Return true if the caller must provide a source. Theoretically, this has to be checked | ||
3913 | * after every window. It could be that the first window requires no source, but the | ||
3914 | * second window does. In practice? */ | ||
3915 | int xd3_decoder_needs_source (xd3_stream *stream) | ||
3916 | { | ||
3917 | return stream->dec_win_ind & VCD_SOURCE; | ||
3918 | } | ||
3919 | |||
3920 | /* Initialize the decoder for a new window. The dec_tgtlen value is preserved across | ||
3921 | * successive window decodings, and the update to dec_winstart is delayed until a new | ||
3922 | * window actually starts. This is to avoid throwing an error due to overflow until the | ||
3923 | * last possible moment. This makes it possible to encode exactly 4GB through a 32-bit | ||
3924 | * encoder. */ | ||
3925 | static int | ||
3926 | xd3_decode_init_window (xd3_stream *stream) | ||
3927 | { | ||
3928 | stream->dec_cpylen = 0; | ||
3929 | stream->dec_cpyoff = 0; | ||
3930 | stream->dec_cksumbytes = 0; | ||
3931 | |||
3932 | xd3_init_cache (& stream->acache); | ||
3933 | |||
3934 | return 0; | ||
3935 | } | ||
3936 | |||
3937 | /* Allocates buffer space for the target window and possibly the VCD_TARGET copy-window. | ||
3938 | * Also sets the base of the two copy segments. */ | ||
3939 | static int | ||
3940 | xd3_decode_setup_buffers (xd3_stream *stream) | ||
3941 | { | ||
3942 | /* If VCD_TARGET is set then the previous buffer may be reused. */ | ||
3943 | if (stream->dec_win_ind & VCD_TARGET) | ||
3944 | { | ||
3945 | /* But this implementation only supports copying from the last target window. If the | ||
3946 | * offset is outside that range, it can't be done. */ | ||
3947 | if (stream->dec_cpyoff < stream->dec_laststart) | ||
3948 | { | ||
3949 | stream->msg = "unsupported VCD_TARGET offset"; | ||
3950 | return EINVAL; | ||
3951 | } | ||
3952 | |||
3953 | /* See if the two windows are the same. This indicates the first time VCD_TARGET is | ||
3954 | * used. This causes a second buffer to be allocated, after that the two are | ||
3955 | * swapped in the DEC_FINISH case. */ | ||
3956 | if (stream->dec_lastwin == stream->next_out) | ||
3957 | { | ||
3958 | stream->next_out = NULL; | ||
3959 | stream->space_out = 0; | ||
3960 | } | ||
3961 | |||
3962 | stream->dec_cpyaddrbase = stream->dec_lastwin + (usize_t) (stream->dec_cpyoff - stream->dec_laststart); | ||
3963 | } | ||
3964 | |||
3965 | /* See if the current output window is large enough. */ | ||
3966 | if (stream->space_out < stream->dec_tgtlen) | ||
3967 | { | ||
3968 | xd3_free (stream, stream->dec_buffer); | ||
3969 | |||
3970 | stream->space_out = xd3_round_blksize (stream->dec_tgtlen, XD3_ALLOCSIZE); | ||
3971 | |||
3972 | if ((stream->dec_buffer = xd3_alloc (stream, stream->space_out, 1)) == NULL) | ||
3973 | { | ||
3974 | return ENOMEM; | ||
3975 | } | ||
3976 | |||
3977 | stream->next_out = stream->dec_buffer; | ||
3978 | } | ||
3979 | |||
3980 | /* dec_tgtaddrbase refers to an invalid base address, but it is always used with a | ||
3981 | * sufficiently large instruction offset (i.e., beyond the copy window). This condition | ||
3982 | * is enforced by xd3_decode_output_halfinst. */ | ||
3983 | stream->dec_tgtaddrbase = stream->next_out - stream->dec_cpylen; | ||
3984 | |||
3985 | return 0; | ||
3986 | } | ||
3987 | |||
3988 | static int | ||
3989 | xd3_decode_allocate (xd3_stream *stream, | ||
3990 | usize_t size, | ||
3991 | uint8_t **copied1, | ||
3992 | usize_t *alloc1, | ||
3993 | uint8_t **copied2, | ||
3994 | usize_t *alloc2) | ||
3995 | { | ||
3996 | if (*copied1 != NULL && *alloc1 < size) | ||
3997 | { | ||
3998 | xd3_free (stream, *copied1); | ||
3999 | *copied1 = NULL; | ||
4000 | } | ||
4001 | |||
4002 | if (*copied1 == NULL) | ||
4003 | { | ||
4004 | #if SECONDARY_ANY | ||
4005 | /* Borrow from the secondary compressor's allocation. */ | ||
4006 | if (copied2 != NULL && *copied2 != NULL && *alloc2 < size) | ||
4007 | { | ||
4008 | *copied1 = *copied2; | ||
4009 | *alloc1 = *alloc2; | ||
4010 | *copied2 = NULL; | ||
4011 | *alloc2 = 0; | ||
4012 | } | ||
4013 | else | ||
4014 | #endif | ||
4015 | { | ||
4016 | *alloc1 = xd3_round_blksize (size, XD3_ALLOCSIZE); | ||
4017 | |||
4018 | if ((*copied1 = xd3_alloc (stream, *alloc1, 1)) == NULL) | ||
4019 | { | ||
4020 | return ENOMEM; | ||
4021 | } | ||
4022 | } | ||
4023 | } | ||
4024 | |||
4025 | return 0; | ||
4026 | } | ||
4027 | |||
4028 | static int | ||
4029 | xd3_decode_section (xd3_stream *stream, | ||
4030 | xd3_desect *section, | ||
4031 | xd3_decode_state nstate, | ||
4032 | int copy) | ||
4033 | { | ||
4034 | XD3_ASSERT (section->pos <= section->size); | ||
4035 | XD3_ASSERT (stream->dec_state != nstate); | ||
4036 | |||
4037 | if (section->pos < section->size) | ||
4038 | { | ||
4039 | usize_t sect_take; | ||
4040 | |||
4041 | if (stream->avail_in == 0) | ||
4042 | { | ||
4043 | return XD3_INPUT; | ||
4044 | } | ||
4045 | |||
4046 | if ((copy == 0) && (section->pos == 0)) | ||
4047 | { | ||
4048 | /* No allocation/copy needed */ | ||
4049 | section->buf = stream->next_in; | ||
4050 | sect_take = section->size; | ||
4051 | } | ||
4052 | else | ||
4053 | { | ||
4054 | usize_t sect_need = section->size - section->pos; | ||
4055 | |||
4056 | /* Allocate and copy */ | ||
4057 | sect_take = min (sect_need, stream->avail_in); | ||
4058 | |||
4059 | if (section->pos == 0) | ||
4060 | { | ||
4061 | int ret; | ||
4062 | |||
4063 | if ((ret = xd3_decode_allocate (stream, | ||
4064 | section->size, | ||
4065 | & section->copied1, | ||
4066 | & section->alloc1, | ||
4067 | & section->copied2, | ||
4068 | & section->alloc2))) { return ret; } | ||
4069 | |||
4070 | section->buf = section->copied1; | ||
4071 | } | ||
4072 | |||
4073 | memcpy (section->copied1 + section->pos, | ||
4074 | stream->next_in, | ||
4075 | sect_take); | ||
4076 | } | ||
4077 | |||
4078 | section->pos += sect_take; | ||
4079 | |||
4080 | stream->dec_winbytes += sect_take; | ||
4081 | |||
4082 | DECODE_INPUT (sect_take); | ||
4083 | } | ||
4084 | |||
4085 | if (section->pos < section->size) | ||
4086 | { | ||
4087 | stream->msg = "further input required"; | ||
4088 | return XD3_INPUT; | ||
4089 | } | ||
4090 | |||
4091 | XD3_ASSERT (section->pos == section->size); | ||
4092 | |||
4093 | stream->dec_state = nstate; | ||
4094 | section->buf_max = section->buf + section->size; | ||
4095 | section->pos = 0; | ||
4096 | return 0; | ||
4097 | } | ||
4098 | |||
4099 | /* Decode the size and address for half of an instruction (i.e., a single opcode). This | ||
4100 | * updates the stream->dec_position, which are bytes already output prior to processing | ||
4101 | * this instruction. Perform bounds checking for sizes and copy addresses, which uses the | ||
4102 | * dec_position (which is why these checks are done here). */ | ||
4103 | static int | ||
4104 | xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst) | ||
4105 | { | ||
4106 | int ret; | ||
4107 | |||
4108 | /* If the size from the instruction table is zero then read a size value. */ | ||
4109 | if ((inst->size == 0) && | ||
4110 | (ret = xd3_read_size (stream, | ||
4111 | & stream->inst_sect.buf, | ||
4112 | stream->inst_sect.buf_max, | ||
4113 | |||
4114 | & inst->size))) | ||
4115 | { | ||
4116 | return EINVAL; | ||
4117 | } | ||
4118 | |||
4119 | /* For copy instructions, read address. */ | ||
4120 | if (inst->type >= XD3_CPY) | ||
4121 | { | ||
4122 | IF_DEBUG1 ({ | ||
4123 | static int cnt = 0; | ||
4124 | P(RINT "DECODE:%u: COPY at %"Q"u (winoffset %u) size %u winaddr %u\n", | ||
4125 | cnt++, | ||
4126 | stream->total_out + (stream->dec_position - stream->dec_cpylen), | ||
4127 | (stream->dec_position - stream->dec_cpylen), | ||
4128 | inst->size, | ||
4129 | inst->addr); | ||
4130 | }); | ||
4131 | |||
4132 | if ((ret = xd3_decode_address (stream, | ||
4133 | stream->dec_position, | ||
4134 | inst->type - XD3_CPY, | ||
4135 | & stream->addr_sect.buf, | ||
4136 | stream->addr_sect.buf_max, | ||
4137 | & inst->addr))) | ||
4138 | { | ||
4139 | return ret; | ||
4140 | } | ||
4141 | |||
4142 | /* Cannot copy an address before it is filled-in. */ | ||
4143 | if (inst->addr >= stream->dec_position) | ||
4144 | { | ||
4145 | stream->msg = "address too large"; | ||
4146 | return EINVAL; | ||
4147 | } | ||
4148 | |||
4149 | /* Check: a VCD_TARGET or VCD_SOURCE copy cannot exceed the remaining buffer space | ||
4150 | * in its own segment. */ | ||
4151 | if (inst->addr < stream->dec_cpylen && inst->addr + inst->size > stream->dec_cpylen) | ||
4152 | { | ||
4153 | stream->msg = "size too large"; | ||
4154 | return EINVAL; | ||
4155 | } | ||
4156 | } | ||
4157 | else | ||
4158 | { | ||
4159 | IF_DEBUG1 ({ | ||
4160 | if (inst->type == XD3_ADD) | ||
4161 | { | ||
4162 | static int cnt; | ||
4163 | P(RINT "DECODE:%d: ADD at %"Q"u (winoffset %u) size %u\n", | ||
4164 | cnt++, | ||
4165 | stream->total_out + stream->dec_position - stream->dec_cpylen, | ||
4166 | stream->dec_position - stream->dec_cpylen, | ||
4167 | inst->size); | ||
4168 | } | ||
4169 | else | ||
4170 | { | ||
4171 | static int cnt; | ||
4172 | XD3_ASSERT (inst->type == XD3_RUN); | ||
4173 | P(RINT "DECODE:%d: RUN at %"Q"u (winoffset %u) size %u\n", | ||
4174 | cnt++, | ||
4175 | stream->total_out + stream->dec_position - stream->dec_cpylen, | ||
4176 | stream->dec_position - stream->dec_cpylen, | ||
4177 | inst->size); | ||
4178 | } | ||
4179 | }); | ||
4180 | } | ||
4181 | |||
4182 | /* Check: The instruction will not overflow the output buffer. */ | ||
4183 | if (stream->dec_position + inst->size > stream->dec_maxpos) | ||
4184 | { | ||
4185 | stream->msg = "size too large"; | ||
4186 | return EINVAL; | ||
4187 | } | ||
4188 | |||
4189 | stream->dec_position += inst->size; | ||
4190 | return 0; | ||
4191 | } | ||
4192 | |||
4193 | /* Decode a single opcode and then decode the two half-instructions. */ | ||
4194 | static int | ||
4195 | xd3_decode_instruction (xd3_stream *stream) | ||
4196 | { | ||
4197 | int ret; | ||
4198 | const xd3_dinst *inst; | ||
4199 | |||
4200 | if (stream->inst_sect.buf == stream->inst_sect.buf_max) | ||
4201 | { | ||
4202 | stream->msg = "instruction underflow"; | ||
4203 | return EINVAL; | ||
4204 | } | ||
4205 | |||
4206 | inst = &stream->code_table[*stream->inst_sect.buf++]; | ||
4207 | |||
4208 | stream->dec_current1.type = inst->type1; | ||
4209 | stream->dec_current2.type = inst->type2; | ||
4210 | stream->dec_current1.size = inst->size1; | ||
4211 | stream->dec_current2.size = inst->size2; | ||
4212 | |||
4213 | /* For each instruction with a real operation, decode the corresponding size and | ||
4214 | * addresses if necessary. Assume a code-table may have NOOP in either position, | ||
4215 | * although this is unlikely. */ | ||
4216 | if (inst->type1 != XD3_NOOP && (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current1))) | ||
4217 | { | ||
4218 | return ret; | ||
4219 | } | ||
4220 | if (inst->type2 != XD3_NOOP && (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current2))) | ||
4221 | { | ||
4222 | return ret; | ||
4223 | } | ||
4224 | return 0; | ||
4225 | } | ||
4226 | |||
4227 | /* Output the result of a single half-instruction. OPT: This the decoder hotspot. */ | ||
4228 | static int | ||
4229 | xd3_decode_output_halfinst (xd3_stream *stream, xd3_hinst *inst) | ||
4230 | { | ||
4231 | /* To make this reentrant, set take = min (inst->size, available space)... */ | ||
4232 | usize_t take = inst->size; | ||
4233 | |||
4234 | XD3_ASSERT (inst->type != XD3_NOOP); | ||
4235 | |||
4236 | switch (inst->type) | ||
4237 | { | ||
4238 | case XD3_RUN: | ||
4239 | { | ||
4240 | /* Only require a single data byte. */ | ||
4241 | if (stream->data_sect.buf == stream->data_sect.buf_max) | ||
4242 | { | ||
4243 | stream->msg = "data underflow"; | ||
4244 | return EINVAL; | ||
4245 | } | ||
4246 | |||
4247 | /* TUNE: Probably want to eliminate memset/memcpy here */ | ||
4248 | memset (stream->next_out + stream->avail_out, | ||
4249 | stream->data_sect.buf[0], | ||
4250 | take); | ||
4251 | |||
4252 | stream->data_sect.buf += 1; | ||
4253 | stream->avail_out += take; | ||
4254 | inst->type = XD3_NOOP; | ||
4255 | break; | ||
4256 | } | ||
4257 | case XD3_ADD: | ||
4258 | { | ||
4259 | /* Require at least TAKE data bytes. */ | ||
4260 | if (stream->data_sect.buf + take > stream->data_sect.buf_max) | ||
4261 | { | ||
4262 | stream->msg = "data underflow"; | ||
4263 | return EINVAL; | ||
4264 | } | ||
4265 | |||
4266 | memcpy (stream->next_out + stream->avail_out, | ||
4267 | stream->data_sect.buf, | ||
4268 | take); | ||
4269 | |||
4270 | stream->data_sect.buf += take; | ||
4271 | stream->avail_out += take; | ||
4272 | inst->type = XD3_NOOP; | ||
4273 | break; | ||
4274 | } | ||
4275 | default: | ||
4276 | { | ||
4277 | usize_t i; | ||
4278 | const uint8_t *src; | ||
4279 | uint8_t *dst; | ||
4280 | |||
4281 | /* See if it copies from the VCD_TARGET/VCD_SOURCE window or the target window. | ||
4282 | * Out-of-bounds checks for the addresses and sizes are performed in | ||
4283 | * xd3_decode_parse_halfinst. */ | ||
4284 | if (inst->addr < stream->dec_cpylen) | ||
4285 | { | ||
4286 | if (stream->dec_win_ind & VCD_TARGET) | ||
4287 | { | ||
4288 | /* For VCD_TARGET we know the entire range is in-memory, as established by | ||
4289 | * decode_setup_buffers. */ | ||
4290 | src = stream->dec_cpyaddrbase + inst->addr; | ||
4291 | inst->type = XD3_NOOP; | ||
4292 | inst->size = 0; | ||
4293 | } | ||
4294 | else | ||
4295 | { | ||
4296 | /* In this case we have to read a source block, which could return control | ||
4297 | * to the caller. We need to know the first block number needed for this | ||
4298 | * copy. */ | ||
4299 | xd3_source *source; | ||
4300 | xoff_t block; | ||
4301 | usize_t blkoff; | ||
4302 | usize_t blksize; | ||
4303 | int ret; | ||
4304 | |||
4305 | more: | ||
4306 | |||
4307 | source = stream->src; | ||
4308 | block = source->cpyoff_blocks; | ||
4309 | blkoff = source->cpyoff_blkoff + inst->addr; | ||
4310 | blksize = source->blksize; | ||
4311 | |||
4312 | while (blkoff >= blksize) | ||
4313 | { | ||
4314 | block += 1; | ||
4315 | blkoff -= blksize; | ||
4316 | } | ||
4317 | |||
4318 | if ((ret = xd3_getblk (stream, block))) | ||
4319 | { | ||
4320 | /* could be a XD3_GETSRCBLK failure. */ | ||
4321 | return ret; | ||
4322 | } | ||
4323 | |||
4324 | src = source->curblk + blkoff; | ||
4325 | |||
4326 | /* This block either contains enough data or the source file is | ||
4327 | * short. */ | ||
4328 | if ((source->onblk != blksize) && (blkoff + take > source->onblk)) | ||
4329 | { | ||
4330 | stream->msg = "source file too short"; | ||
4331 | return EINVAL; | ||
4332 | |||
4333 | } | ||
4334 | |||
4335 | XD3_ASSERT (blkoff != blksize); | ||
4336 | |||
4337 | if (blkoff + take <= blksize) | ||
4338 | { | ||
4339 | inst->type = XD3_NOOP; | ||
4340 | inst->size = 0; | ||
4341 | } | ||
4342 | else | ||
4343 | { | ||
4344 | /* This block doesn't contain all the data, modify the instruction, do | ||
4345 | * not set to XD3_NOOP. */ | ||
4346 | take = blksize - blkoff; | ||
4347 | inst->size -= take; | ||
4348 | inst->addr += take; | ||
4349 | } | ||
4350 | } | ||
4351 | } | ||
4352 | else | ||
4353 | { | ||
4354 | /* For a target-window copy, we know the entire range is in-memory. The | ||
4355 | * dec_tgtaddrbase is negatively offset by dec_cpylen because the addresses | ||
4356 | * start beyond that point. */ | ||
4357 | src = stream->dec_tgtaddrbase + inst->addr; | ||
4358 | inst->type = XD3_NOOP; | ||
4359 | inst->size = 0; | ||
4360 | } | ||
4361 | |||
4362 | dst = stream->next_out + stream->avail_out; | ||
4363 | |||
4364 | stream->avail_out += take; | ||
4365 | |||
4366 | /* Can't just memcpy here due to possible overlap. */ | ||
4367 | for (i = take; i != 0; i -= 1) | ||
4368 | { | ||
4369 | *dst++ = *src++; | ||
4370 | } | ||
4371 | |||
4372 | take = inst->size; | ||
4373 | |||
4374 | /* If there is more to copy, call getblk again. */ | ||
4375 | if (inst->type != XD3_NOOP) | ||
4376 | { | ||
4377 | XD3_ASSERT (take > 0); | ||
4378 | goto more; | ||
4379 | } | ||
4380 | else | ||
4381 | { | ||
4382 | XD3_ASSERT (take == 0); | ||
4383 | } | ||
4384 | } | ||
4385 | } | ||
4386 | |||
4387 | return 0; | ||
4388 | } | ||
4389 | |||
4390 | static int | ||
4391 | xd3_decode_finish_window (xd3_stream *stream) | ||
4392 | { | ||
4393 | stream->dec_winbytes = 0; | ||
4394 | stream->dec_state = DEC_FINISH; | ||
4395 | |||
4396 | stream->data_sect.pos = 0; | ||
4397 | stream->inst_sect.pos = 0; | ||
4398 | stream->addr_sect.pos = 0; | ||
4399 | |||
4400 | return XD3_OUTPUT; | ||
4401 | } | ||
4402 | |||
4403 | static int | ||
4404 | xd3_decode_sections (xd3_stream *stream) | ||
4405 | { | ||
4406 | usize_t need, more, take; | ||
4407 | int copy, ret; | ||
4408 | |||
4409 | if ((stream->flags & XD3_JUST_HDR) != 0) | ||
4410 | { | ||
4411 | /* Nothing left to do. */ | ||
4412 | return xd3_decode_finish_window (stream); | ||
4413 | } | ||
4414 | |||
4415 | /* To avoid copying, need this much data available */ | ||
4416 | need = (stream->inst_sect.size + | ||
4417 | stream->addr_sect.size + | ||
4418 | stream->data_sect.size); | ||
4419 | |||
4420 | /* The window may be entirely processed. */ | ||
4421 | XD3_ASSERT (stream->dec_winbytes <= need); | ||
4422 | |||
4423 | /* Compute how much more input is needed. */ | ||
4424 | more = (need - stream->dec_winbytes); | ||
4425 | |||
4426 | /* How much to consume. */ | ||
4427 | take = min (more, stream->avail_in); | ||
4428 | |||
4429 | /* See if the input is completely available, to avoid copy. */ | ||
4430 | copy = (take != more); | ||
4431 | |||
4432 | /* If the window is skipped... */ | ||
4433 | if ((stream->flags & XD3_SKIP_WINDOW) != 0) | ||
4434 | { | ||
4435 | /* Skip the available input. */ | ||
4436 | DECODE_INPUT (take); | ||
4437 | |||
4438 | stream->dec_winbytes += take; | ||
4439 | |||
4440 | if (copy) | ||
4441 | { | ||
4442 | stream->msg = "further input required"; | ||
4443 | return XD3_INPUT; | ||
4444 | } | ||
4445 | |||
4446 | return xd3_decode_finish_window (stream); | ||
4447 | } | ||
4448 | |||
4449 | /* Process all but the DATA section. */ | ||
4450 | switch (stream->dec_state) | ||
4451 | { | ||
4452 | default: | ||
4453 | stream->msg = "internal error"; | ||
4454 | return EINVAL; | ||
4455 | |||
4456 | case DEC_DATA: | ||
4457 | if ((ret = xd3_decode_section (stream, & stream->data_sect, DEC_INST, copy))) { return ret; } | ||
4458 | case DEC_INST: | ||
4459 | if ((ret = xd3_decode_section (stream, & stream->inst_sect, DEC_ADDR, copy))) { return ret; } | ||
4460 | case DEC_ADDR: | ||
4461 | if ((ret = xd3_decode_section (stream, & stream->addr_sect, DEC_EMIT, copy))) { return ret; } | ||
4462 | } | ||
4463 | |||
4464 | XD3_ASSERT (stream->dec_winbytes == need); | ||
4465 | |||
4466 | #if SECONDARY_ANY | ||
4467 | #define DECODE_SECONDARY_SECTION(UPPER,LOWER) \ | ||
4468 | ((stream->dec_del_ind & VCD_ ## UPPER ## COMP) && \ | ||
4469 | (ret = xd3_decode_secondary (stream, & stream-> LOWER ## _sect, \ | ||
4470 | & xd3_sec_ ## LOWER (stream)))) | ||
4471 | |||
4472 | if (DECODE_SECONDARY_SECTION (DATA, data) || | ||
4473 | DECODE_SECONDARY_SECTION (INST, inst) || | ||
4474 | DECODE_SECONDARY_SECTION (ADDR, addr)) | ||
4475 | { | ||
4476 | return ret; | ||
4477 | } | ||
4478 | #endif | ||
4479 | |||
4480 | if (stream->flags & XD3_SKIP_EMIT) | ||
4481 | { | ||
4482 | return xd3_decode_finish_window (stream); | ||
4483 | } | ||
4484 | |||
4485 | /* OPT: A possible optimization is to avoid allocating memory in decode_setup_buffers | ||
4486 | * and to avoid a large memcpy when the window consists of a single VCD_SOURCE copy | ||
4487 | * instruction. The only potential problem is if the following window is a VCD_TARGET, | ||
4488 | * then you need to remember... */ | ||
4489 | if ((ret = xd3_decode_setup_buffers (stream))) { return ret; } | ||
4490 | |||
4491 | return 0; | ||
4492 | } | ||
4493 | |||
4494 | static int | ||
4495 | xd3_decode_emit (xd3_stream *stream) | ||
4496 | { | ||
4497 | int ret; | ||
4498 | |||
4499 | /* Produce output: originally structured to allow reentrant code that fills as much of | ||
4500 | * the output buffer as possible, but VCDIFF semantics allows to copy from anywhere from | ||
4501 | * the target window, so instead allocate a sufficiently sized buffer after the target | ||
4502 | * window length is decoded. | ||
4503 | * | ||
4504 | * This code still needs to be reentrant to allow XD3_GETSRCBLK to return control. This | ||
4505 | * is handled by setting the stream->dec_currentN instruction types to XD3_NOOP after | ||
4506 | * they have been processed. */ | ||
4507 | XD3_ASSERT (! (stream->flags & XD3_SKIP_EMIT)); | ||
4508 | XD3_ASSERT (stream->avail_out == 0); | ||
4509 | XD3_ASSERT (stream->dec_tgtlen <= stream->space_out); | ||
4510 | |||
4511 | while (stream->inst_sect.buf != stream->inst_sect.buf_max) | ||
4512 | { | ||
4513 | /* Decode next instruction pair. */ | ||
4514 | if ((stream->dec_current1.type == XD3_NOOP) && | ||
4515 | (stream->dec_current2.type == XD3_NOOP) && | ||
4516 | (ret = xd3_decode_instruction (stream))) { return ret; } | ||
4517 | |||
4518 | /* Output for each instruction. */ | ||
4519 | if ((stream->dec_current1.type != XD3_NOOP) && | ||
4520 | (ret = xd3_decode_output_halfinst (stream, & stream->dec_current1))) { return ret; } | ||
4521 | |||
4522 | if ((stream->dec_current2.type != XD3_NOOP) && | ||
4523 | (ret = xd3_decode_output_halfinst (stream, & stream->dec_current2))) { return ret; } | ||
4524 | } | ||
4525 | |||
4526 | if (stream->avail_out != stream->dec_tgtlen) | ||
4527 | { | ||
4528 | IF_DEBUG1 (P(RINT "AVAIL_OUT(%d) != DEC_TGTLEN(%d)\n", stream->avail_out, stream->dec_tgtlen)); | ||
4529 | stream->msg = "wrong window length"; | ||
4530 | return EINVAL; | ||
4531 | } | ||
4532 | |||
4533 | if (stream->data_sect.buf != stream->data_sect.buf_max) | ||
4534 | { | ||
4535 | stream->msg = "extra data section"; | ||
4536 | return EINVAL; | ||
4537 | } | ||
4538 | |||
4539 | if (stream->addr_sect.buf != stream->addr_sect.buf_max) | ||
4540 | { | ||
4541 | stream->msg = "extra address section"; | ||
4542 | return EINVAL; | ||
4543 | } | ||
4544 | |||
4545 | /* OPT: Should cksum computation be combined with the above loop? */ | ||
4546 | if ((stream->dec_win_ind & VCD_ADLER32) != 0 && | ||
4547 | (stream->flags & XD3_ADLER32_NOVER) == 0) | ||
4548 | { | ||
4549 | uint32_t a32 = adler32 (1L, stream->next_out, stream->avail_out); | ||
4550 | |||
4551 | if (a32 != stream->dec_adler32) | ||
4552 | { | ||
4553 | stream->msg = "target window checksum mismatch"; | ||
4554 | return EINVAL; | ||
4555 | } | ||
4556 | } | ||
4557 | |||
4558 | /* Finished with a window. */ | ||
4559 | return xd3_decode_finish_window (stream); | ||
4560 | } | ||
4561 | |||
4562 | int | ||
4563 | xd3_decode_input (xd3_stream *stream) | ||
4564 | { | ||
4565 | int ret; | ||
4566 | |||
4567 | if (stream->enc_state != 0) | ||
4568 | { | ||
4569 | stream->msg = "encoder/decoder transition"; | ||
4570 | return EINVAL; | ||
4571 | } | ||
4572 | |||
4573 | #define BYTE_CASE(expr,x,nstate) \ | ||
4574 | do { \ | ||
4575 | if ( (expr) && \ | ||
4576 | ((ret = xd3_decode_byte (stream, & (x))) != 0) ) { return ret; } \ | ||
4577 | stream->dec_state = (nstate); \ | ||
4578 | } while (0) | ||
4579 | |||
4580 | #define OFFSET_CASE(expr,x,nstate) \ | ||
4581 | do { \ | ||
4582 | if ( (expr) && \ | ||
4583 | ((ret = xd3_decode_offset (stream, & (x))) != 0) ) { return ret; } \ | ||
4584 | stream->dec_state = (nstate); \ | ||
4585 | } while (0) | ||
4586 | |||
4587 | #define SIZE_CASE(expr,x,nstate) \ | ||
4588 | do { \ | ||
4589 | if ( (expr) && \ | ||
4590 | ((ret = xd3_decode_size (stream, & (x))) != 0) ) { return ret; } \ | ||
4591 | stream->dec_state = (nstate); \ | ||
4592 | } while (0) | ||
4593 | |||
4594 | #define SRCORTGT(x) (((x) & VCD_SRCORTGT) == VCD_SOURCE || \ | ||
4595 | ((x) & VCD_SRCORTGT) == VCD_TARGET) | ||
4596 | |||
4597 | switch (stream->dec_state) | ||
4598 | { | ||
4599 | case DEC_VCHEAD: | ||
4600 | { | ||
4601 | if ((ret = xd3_decode_bytes (stream, stream->dec_magic, & stream->dec_magicbytes, 4))) { return ret; } | ||
4602 | |||
4603 | if (stream->dec_magic[0] != VCDIFF_MAGIC1 || | ||
4604 | stream->dec_magic[1] != VCDIFF_MAGIC2 || | ||
4605 | stream->dec_magic[2] != VCDIFF_MAGIC3) | ||
4606 | { | ||
4607 | stream->msg = "not a VCDIFF input"; | ||
4608 | return EINVAL; | ||
4609 | } | ||
4610 | |||
4611 | if (stream->dec_magic[3] != 0) | ||
4612 | { | ||
4613 | stream->msg = "VCDIFF input version > 0 is not supported"; | ||
4614 | return EINVAL; | ||
4615 | } | ||
4616 | |||
4617 | stream->dec_state = DEC_HDRIND; | ||
4618 | } | ||
4619 | case DEC_HDRIND: | ||
4620 | { | ||
4621 | if ((ret = xd3_decode_byte (stream, & stream->dec_hdr_ind))) { return ret; } | ||
4622 | |||
4623 | if ((stream->dec_hdr_ind & VCD_INVHDR) != 0) | ||
4624 | { | ||
4625 | stream->msg = "unrecognized header indicator bits set"; | ||
4626 | return EINVAL; | ||
4627 | } | ||
4628 | |||
4629 | stream->dec_state = DEC_SECONDID; | ||
4630 | } | ||
4631 | |||
4632 | case DEC_SECONDID: | ||
4633 | /* Secondary compressor ID: only if VCD_SECONDARY is set */ | ||
4634 | if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0) | ||
4635 | { | ||
4636 | BYTE_CASE (1, stream->dec_secondid, DEC_TABLEN); | ||
4637 | |||
4638 | switch (stream->dec_secondid) | ||
4639 | { | ||
4640 | case VCD_FGK_ID: | ||
4641 | FGK_CASE (stream); | ||
4642 | case VCD_DJW_ID: | ||
4643 | DJW_CASE (stream); | ||
4644 | default: | ||
4645 | stream->msg = "unknown secondary compressor ID"; | ||
4646 | return EINVAL; | ||
4647 | } | ||
4648 | } | ||
4649 | |||
4650 | case DEC_TABLEN: | ||
4651 | /* Length of code table data: only if VCD_CODETABLE is set */ | ||
4652 | SIZE_CASE ((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->dec_codetblsz, DEC_NEAR); | ||
4653 | |||
4654 | /* The codetblsz counts the two NEAR/SAME bytes */ | ||
4655 | if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) { | ||
4656 | if (stream->dec_codetblsz <= 2) { | ||
4657 | stream->msg = "invalid code table size"; | ||
4658 | return ENOMEM; | ||
4659 | } | ||
4660 | stream->dec_codetblsz -= 2; | ||
4661 | } | ||
4662 | case DEC_NEAR: | ||
4663 | /* Near modes: only if VCD_CODETABLE is set */ | ||
4664 | BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->acache.s_near, DEC_SAME); | ||
4665 | case DEC_SAME: | ||
4666 | /* Same modes: only if VCD_CODETABLE is set */ | ||
4667 | BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->acache.s_same, DEC_TABDAT); | ||
4668 | case DEC_TABDAT: | ||
4669 | /* Compressed code table data */ | ||
4670 | |||
4671 | if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) | ||
4672 | { | ||
4673 | /* Get the code table data. */ | ||
4674 | if ((stream->dec_codetbl == NULL) && | ||
4675 | (stream->dec_codetbl = xd3_alloc (stream, stream->dec_codetblsz, 1)) == NULL) { return ENOMEM; } | ||
4676 | |||
4677 | if ((ret = xd3_decode_bytes (stream, stream->dec_codetbl, & stream->dec_codetblbytes, stream->dec_codetblsz))) | ||
4678 | { | ||
4679 | return ret; | ||
4680 | } | ||
4681 | |||
4682 | if ((ret = xd3_apply_table_encoding (stream, stream->dec_codetbl, stream->dec_codetblbytes))) | ||
4683 | { | ||
4684 | return ret; | ||
4685 | } | ||
4686 | } | ||
4687 | else | ||
4688 | { | ||
4689 | /* Use the default table. */ | ||
4690 | stream->acache.s_near = __rfc3284_code_table_desc.near_modes; | ||
4691 | stream->acache.s_same = __rfc3284_code_table_desc.same_modes; | ||
4692 | stream->code_table = xd3_rfc3284_code_table (); | ||
4693 | } | ||
4694 | |||
4695 | if ((ret = xd3_alloc_cache (stream))) { return ret; } | ||
4696 | |||
4697 | stream->dec_state = DEC_APPLEN; | ||
4698 | |||
4699 | case DEC_APPLEN: | ||
4700 | /* Length of application data */ | ||
4701 | SIZE_CASE((stream->dec_hdr_ind & VCD_APPHEADER) != 0, stream->dec_appheadsz, DEC_APPDAT); | ||
4702 | |||
4703 | case DEC_APPDAT: | ||
4704 | /* Application data */ | ||
4705 | if (stream->dec_hdr_ind & VCD_APPHEADER) | ||
4706 | { | ||
4707 | /* Note: we add an additional byte for padding, to allow 0-termination. */ | ||
4708 | if ((stream->dec_appheader == NULL) && | ||
4709 | (stream->dec_appheader = xd3_alloc (stream, stream->dec_appheadsz+1, 1)) == NULL) { return ENOMEM; } | ||
4710 | |||
4711 | stream->dec_appheader[stream->dec_appheadsz] = 0; | ||
4712 | |||
4713 | if ((ret = xd3_decode_bytes (stream, stream->dec_appheader, & stream->dec_appheadbytes, stream->dec_appheadsz))) | ||
4714 | { | ||
4715 | return ret; | ||
4716 | } | ||
4717 | } | ||
4718 | |||
4719 | stream->dec_hdrsize = stream->total_in; | ||
4720 | stream->dec_state = DEC_WININD; | ||
4721 | |||
4722 | case DEC_WININD: | ||
4723 | { | ||
4724 | /* Start of a window: the window indicator */ | ||
4725 | |||
4726 | if ((ret = xd3_decode_byte (stream, & stream->dec_win_ind))) { return ret; } | ||
4727 | |||
4728 | stream->current_window = stream->dec_window_count; | ||
4729 | |||
4730 | if (XOFF_T_OVERFLOW (stream->dec_winstart, stream->dec_tgtlen)) | ||
4731 | { | ||
4732 | stream->msg = "decoder file offset overflow"; | ||
4733 | return EINVAL; | ||
4734 | } | ||
4735 | |||
4736 | stream->dec_winstart += stream->dec_tgtlen; | ||
4737 | |||
4738 | if ((stream->dec_win_ind & VCD_INVWIN) != 0) | ||
4739 | { | ||
4740 | stream->msg = "unrecognized window indicator bits set"; | ||
4741 | return EINVAL; | ||
4742 | } | ||
4743 | |||
4744 | if ((ret = xd3_decode_init_window (stream))) { return ret; } | ||
4745 | |||
4746 | stream->dec_state = DEC_CPYLEN; | ||
4747 | |||
4748 | IF_DEBUG1 (P(RINT "--------- TARGET WINDOW %"Q"u ------------------\n", stream->current_window)); | ||
4749 | } | ||
4750 | |||
4751 | case DEC_CPYLEN: | ||
4752 | /* Copy window length: only if VCD_SOURCE or VCD_TARGET is set */ | ||
4753 | SIZE_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpylen, DEC_CPYOFF); | ||
4754 | |||
4755 | /* Set the initial, logical decoder position (HERE address) in dec_position. This | ||
4756 | * is set to just after the source/copy window, as we are just about to output the | ||
4757 | * first byte of target window. */ | ||
4758 | stream->dec_position = stream->dec_cpylen; | ||
4759 | |||
4760 | case DEC_CPYOFF: | ||
4761 | /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */ | ||
4762 | OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff, DEC_ENCLEN); | ||
4763 | |||
4764 | /* Copy offset and copy length may not overflow. */ | ||
4765 | if (XOFF_T_OVERFLOW (stream->dec_cpyoff, stream->dec_cpylen)) | ||
4766 | { | ||
4767 | stream->msg = "decoder copy window overflows a file offset"; | ||
4768 | return EINVAL; | ||
4769 | } | ||
4770 | |||
4771 | /* Check copy window bounds: VCD_TARGET window may not exceed current position. */ | ||
4772 | if ((stream->dec_win_ind & VCD_TARGET) && | ||
4773 | (stream->dec_cpyoff + (xoff_t) stream->dec_cpylen > stream->dec_winstart)) | ||
4774 | { | ||
4775 | stream->msg = "VCD_TARGET window out of bounds"; | ||
4776 | return EINVAL; | ||
4777 | } | ||
4778 | |||
4779 | case DEC_ENCLEN: | ||
4780 | /* Length of the delta encoding */ | ||
4781 | SIZE_CASE(1, stream->dec_enclen, DEC_TGTLEN); | ||
4782 | case DEC_TGTLEN: | ||
4783 | /* Length of target window */ | ||
4784 | SIZE_CASE(1, stream->dec_tgtlen, DEC_DELIND); | ||
4785 | |||
4786 | /* Set the maximum decoder position, beyond which we should not decode any data. | ||
4787 | * This is the maximum value for dec_position. This may not exceed the size of a | ||
4788 | * usize_t. */ | ||
4789 | if (USIZE_T_OVERFLOW (stream->dec_cpylen, stream->dec_tgtlen)) | ||
4790 | { | ||
4791 | stream->msg = "decoder target window overflows a usize_t"; | ||
4792 | return EINVAL; | ||
4793 | } | ||
4794 | |||
4795 | /* Check for malicious files. */ | ||
4796 | if (stream->dec_tgtlen > XD3_HARDMAXWINSIZE) | ||
4797 | { | ||
4798 | stream->msg = "hard window size exceeded"; | ||
4799 | return EINVAL; | ||
4800 | } | ||
4801 | |||
4802 | stream->dec_maxpos = stream->dec_cpylen + stream->dec_tgtlen; | ||
4803 | |||
4804 | case DEC_DELIND: | ||
4805 | /* Delta indicator */ | ||
4806 | BYTE_CASE(1, stream->dec_del_ind, DEC_DATALEN); | ||
4807 | |||
4808 | if ((stream->dec_del_ind & VCD_INVDEL) != 0) | ||
4809 | { | ||
4810 | stream->msg = "unrecognized delta indicator bits set"; | ||
4811 | return EINVAL; | ||
4812 | } | ||
4813 | |||
4814 | /* Delta indicator is only used with secondary compression. */ | ||
4815 | if ((stream->dec_del_ind != 0) && (stream->sec_type == NULL)) | ||
4816 | { | ||
4817 | stream->msg = "invalid delta indicator bits set"; | ||
4818 | return EINVAL; | ||
4819 | } | ||
4820 | |||
4821 | /* Section lengths */ | ||
4822 | case DEC_DATALEN: | ||
4823 | SIZE_CASE(1, stream->data_sect.size, DEC_INSTLEN); | ||
4824 | case DEC_INSTLEN: | ||
4825 | SIZE_CASE(1, stream->inst_sect.size, DEC_ADDRLEN); | ||
4826 | case DEC_ADDRLEN: | ||
4827 | SIZE_CASE(1, stream->addr_sect.size, DEC_CKSUM); | ||
4828 | |||
4829 | case DEC_CKSUM: | ||
4830 | /* Window checksum. */ | ||
4831 | if ((stream->dec_win_ind & VCD_ADLER32) != 0) | ||
4832 | { | ||
4833 | int i; | ||
4834 | |||
4835 | if ((ret = xd3_decode_bytes (stream, stream->dec_cksum, & stream->dec_cksumbytes, 4))) { return ret; } | ||
4836 | |||
4837 | for (i = 0; i < 4; i += 1) | ||
4838 | { | ||
4839 | stream->dec_adler32 = (stream->dec_adler32 << 8) | stream->dec_cksum[i]; | ||
4840 | } | ||
4841 | } | ||
4842 | |||
4843 | stream->dec_state = DEC_DATA; | ||
4844 | |||
4845 | /* Check dec_enclen for redundency, otherwise it is not really used. */ | ||
4846 | { | ||
4847 | usize_t enclen_check = (1 + (xd3_sizeof_size (stream->dec_tgtlen) + | ||
4848 | xd3_sizeof_size (stream->data_sect.size) + | ||
4849 | xd3_sizeof_size (stream->inst_sect.size) + | ||
4850 | xd3_sizeof_size (stream->addr_sect.size)) + | ||
4851 | stream->data_sect.size + | ||
4852 | stream->inst_sect.size + | ||
4853 | stream->addr_sect.size + | ||
4854 | ((stream->dec_win_ind & VCD_ADLER32) ? 4 : 0)); | ||
4855 | |||
4856 | if (stream->dec_enclen != enclen_check) | ||
4857 | { | ||
4858 | stream->msg = "incorrect encoding length (redundent)"; | ||
4859 | return EINVAL; | ||
4860 | } | ||
4861 | } | ||
4862 | |||
4863 | /* Returning here gives the application a chance to inspect the header, skip the | ||
4864 | * window, etc. */ | ||
4865 | if (stream->current_window == 0) { return XD3_GOTHEADER; } | ||
4866 | else { return XD3_WINSTART; } | ||
4867 | |||
4868 | case DEC_DATA: | ||
4869 | case DEC_INST: | ||
4870 | case DEC_ADDR: | ||
4871 | /* Next read the three sections. */ | ||
4872 | if ((ret = xd3_decode_sections (stream))) { return ret; } | ||
4873 | |||
4874 | case DEC_EMIT: | ||
4875 | |||
4876 | /* To speed VCD_SOURCE block-address calculations, the source cpyoff_blocks and | ||
4877 | * cpyoff_blkoff are pre-computed. */ | ||
4878 | if (stream->dec_win_ind & VCD_SOURCE) | ||
4879 | { | ||
4880 | xd3_source *src = stream->src; | ||
4881 | |||
4882 | if (src == NULL) | ||
4883 | { | ||
4884 | stream->msg = "source input required"; | ||
4885 | return EINVAL; | ||
4886 | } | ||
4887 | |||
4888 | src->cpyoff_blocks = stream->dec_cpyoff / src->blksize; | ||
4889 | src->cpyoff_blkoff = stream->dec_cpyoff % src->blksize; | ||
4890 | } | ||
4891 | |||
4892 | /* xd3_decode_emit returns XD3_OUTPUT on every success. */ | ||
4893 | if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT) | ||
4894 | { | ||
4895 | stream->total_out += (xoff_t) stream->avail_out; | ||
4896 | } | ||
4897 | |||
4898 | return ret; | ||
4899 | |||
4900 | case DEC_FINISH: | ||
4901 | { | ||
4902 | if (stream->dec_win_ind & VCD_TARGET) | ||
4903 | { | ||
4904 | if (stream->dec_lastwin == NULL) | ||
4905 | { | ||
4906 | stream->dec_lastwin = stream->next_out; | ||
4907 | stream->dec_lastspace = stream->space_out; | ||
4908 | } | ||
4909 | else | ||
4910 | { | ||
4911 | xd3_swap_uint8p (& stream->dec_lastwin, & stream->next_out); | ||
4912 | xd3_swap_usize_t (& stream->dec_lastspace, & stream->space_out); | ||
4913 | } | ||
4914 | } | ||
4915 | |||
4916 | stream->dec_lastlen = stream->dec_tgtlen; | ||
4917 | stream->dec_laststart = stream->dec_winstart; | ||
4918 | stream->dec_window_count += 1; | ||
4919 | |||
4920 | /* Note: the updates to dec_winstart & current_window are deferred until after the | ||
4921 | * next DEC_WININD byte is read. */ | ||
4922 | stream->dec_state = DEC_WININD; | ||
4923 | return XD3_WINFINISH; | ||
4924 | } | ||
4925 | |||
4926 | default: | ||
4927 | stream->msg = "invalid state"; | ||
4928 | return EINVAL; | ||
4929 | } | ||
4930 | } | ||
4931 | |||
4932 | /****************************************************************************************** | ||
4933 | String matching helpers | ||
4934 | ******************************************************************************************/ | ||
4935 | |||
4936 | #if XD3_ENCODER | ||
4937 | /* Do the initial xd3_string_match() checksum table setup. Allocations are delayed until | ||
4938 | * first use to avoid allocation sometimes (e.g., perfect matches, zero-length inputs). */ | ||
4939 | static int | ||
4940 | xd3_string_match_init (xd3_stream *stream) | ||
4941 | { | ||
4942 | const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); | ||
4943 | const int DO_LARGE = (stream->src != NULL); | ||
4944 | |||
4945 | if (DO_SMALL) | ||
4946 | { | ||
4947 | /* Subsequent calls can return immediately after checking reset. */ | ||
4948 | if (stream->small_table != NULL) | ||
4949 | { | ||
4950 | /* The target hash table is reinitialized once per window. */ | ||
4951 | if (stream->small_reset) | ||
4952 | { | ||
4953 | stream->small_reset = 0; | ||
4954 | memset (stream->small_table, 0, sizeof (usize_t) * stream->small_hash.size); | ||
4955 | } | ||
4956 | |||
4957 | return 0; | ||
4958 | } | ||
4959 | |||
4960 | if ((stream->small_table = xd3_alloc0 (stream, stream->small_hash.size, sizeof (usize_t))) == NULL) | ||
4961 | { | ||
4962 | return ENOMEM; | ||
4963 | } | ||
4964 | |||
4965 | /* If there is a previous table needed. */ | ||
4966 | if (stream->small_chain > 1) | ||
4967 | { | ||
4968 | xd3_slist *p, *m; | ||
4969 | |||
4970 | if ((stream->small_prev = xd3_alloc (stream, stream->sprevsz, sizeof (xd3_slist))) == NULL) | ||
4971 | { | ||
4972 | return ENOMEM; | ||
4973 | } | ||
4974 | |||
4975 | /* Initialize circular lists. */ | ||
4976 | for (p = stream->small_prev, m = stream->small_prev + stream->sprevsz; p != m; p += 1) | ||
4977 | { | ||
4978 | p->next = p; | ||
4979 | p->prev = p; | ||
4980 | } | ||
4981 | } | ||
4982 | } | ||
4983 | |||
4984 | if (DO_LARGE && stream->large_table == NULL) | ||
4985 | { | ||
4986 | if ((stream->large_table = xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL) | ||
4987 | { | ||
4988 | return ENOMEM; | ||
4989 | } | ||
4990 | } | ||
4991 | |||
4992 | return 0; | ||
4993 | } | ||
4994 | |||
4995 | /* Called at every entrance to the string-match loop and each time | ||
4996 | * stream->input_position the value returned as *next_move_point. | ||
4997 | * This function computes more source checksums to advance the window. */ | ||
4998 | static int | ||
4999 | xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) | ||
5000 | { | ||
5001 | // The input offset at which the source should ideally be scanned | ||
5002 | xoff_t logical_input_cksum_pos = stream->total_in + pos_in + stream->srcwin_size; | ||
5003 | |||
5004 | if (stream->srcwin_cksum_pos >= stream->src->size) | ||
5005 | { | ||
5006 | *next_move_point = USIZE_T_MAX; | ||
5007 | return 0; | ||
5008 | } | ||
5009 | |||
5010 | if (stream->srcwin_cksum_pos > logical_input_cksum_pos) | ||
5011 | { | ||
5012 | *next_move_point = stream->srcwin_cksum_pos - logical_input_cksum_pos; | ||
5013 | return 0; | ||
5014 | } | ||
5015 | |||
5016 | IF_DEBUG1 (P(RINT "[move_p1] size=%d T=%"Q"d S=%"Q"d\n", stream->srcwin_size, | ||
5017 | stream->total_in + pos_in, stream->srcwin_cksum_pos)); | ||
5018 | |||
5019 | *next_move_point = pos_in + stream->srcwin_size; | ||
5020 | |||
5021 | if (stream->srcwin_cksum_pos == 0) | ||
5022 | { | ||
5023 | // Two windows to start with | ||
5024 | logical_input_cksum_pos += stream->srcwin_size; | ||
5025 | } | ||
5026 | else | ||
5027 | { | ||
5028 | // Otherwise double and add | ||
5029 | stream->srcwin_size = min(stream->srcwin_maxsz, stream->srcwin_size * 2); | ||
5030 | logical_input_cksum_pos += stream->srcwin_size; | ||
5031 | } | ||
5032 | |||
5033 | while (stream->srcwin_cksum_pos < logical_input_cksum_pos && | ||
5034 | stream->srcwin_cksum_pos < stream->src->size) | ||
5035 | { | ||
5036 | xoff_t blkno = stream->srcwin_cksum_pos / stream->src->blksize; | ||
5037 | usize_t blkoff = stream->srcwin_cksum_pos % stream->src->blksize; | ||
5038 | usize_t onblk = xd3_bytes_on_srcblk (stream->src, blkno); | ||
5039 | int ret; | ||
5040 | |||
5041 | if (blkoff + stream->large_look >= onblk) | ||
5042 | { | ||
5043 | /* Next block */ | ||
5044 | stream->srcwin_cksum_pos = (blkno * stream->src->blksize) + onblk; | ||
5045 | continue; | ||
5046 | } | ||
5047 | |||
5048 | if ((ret = xd3_getblk (stream, blkno))) | ||
5049 | { | ||
5050 | return ret; | ||
5051 | } | ||
5052 | |||
5053 | usize_t diff = logical_input_cksum_pos - stream->srcwin_cksum_pos; | ||
5054 | |||
5055 | onblk = min(onblk, diff + blkoff + stream->large_look); | ||
5056 | |||
5057 | while (blkoff + stream->large_look <= onblk) | ||
5058 | { | ||
5059 | uint32_t cksum = xd3_lcksum (stream->src->curblk + blkoff, stream->large_look); | ||
5060 | usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum); | ||
5061 | |||
5062 | stream->large_table[hval] = stream->srcwin_cksum_pos + HASH_CKOFFSET; | ||
5063 | |||
5064 | blkoff += stream->large_step; | ||
5065 | stream->srcwin_cksum_pos += stream->large_step; | ||
5066 | IF_DEBUG (stream->large_ckcnt += 1); | ||
5067 | } | ||
5068 | } | ||
5069 | |||
5070 | IF_DEBUG1 (P(RINT "[move_p2] size=%d T=%"Q"d S=%"Q"d next_move=%d\n", stream->srcwin_size, | ||
5071 | stream->total_in + pos_in, stream->srcwin_cksum_pos, *next_move_point)); | ||
5072 | |||
5073 | return 0; | ||
5074 | } | ||
5075 | |||
5076 | /* This function sets up the stream->src fields srcbase, srclen. The call is delayed | ||
5077 | * until these values are needed to encode a copy address. At this point the decision has | ||
5078 | * to be made. */ | ||
5079 | static int | ||
5080 | xd3_srcwin_setup (xd3_stream *stream) | ||
5081 | { | ||
5082 | xd3_source *src = stream->src; | ||
5083 | xoff_t length; | ||
5084 | |||
5085 | IF_DEBUG1 (P(RINT "[srcwin setup:%"Q"u] iopt buffer %s\n", | ||
5086 | stream->current_window, | ||
5087 | stream->enc_state < ENC_FLUSH ? "overflow" : "fit")); | ||
5088 | |||
5089 | /* Check the undecided state. */ | ||
5090 | XD3_ASSERT (src->srclen == 0 && src->srcbase == 0); | ||
5091 | |||
5092 | /* Avoid repeating this call. */ | ||
5093 | stream->srcwin_decided = 1; | ||
5094 | |||
5095 | /* If the stream is flushing, then the iopt buffer was able to contain the complete | ||
5096 | * encoding. If no copies were issued no source window is actually needed. This | ||
5097 | * prevents the VCDIFF header from including source base/len. xd3_emit_hdr checks | ||
5098 | * for srclen == 0. */ | ||
5099 | if (stream->enc_state == ENC_FLUSH && stream->match_maxaddr == 0) | ||
5100 | { | ||
5101 | goto done; | ||
5102 | } | ||
5103 | |||
5104 | /* Check for overflow, srclen is usize_t - this can't happen unless XD3_DEFAULT_SRCBACK | ||
5105 | * and related parameters are extreme - should use smaller windows. */ | ||
5106 | length = stream->match_maxaddr - stream->match_minaddr; | ||
5107 | |||
5108 | if (length > (xoff_t) USIZE_T_MAX) | ||
5109 | { | ||
5110 | stream->msg = "source window length overflow (not 64bit)"; | ||
5111 | return EINVAL; | ||
5112 | } | ||
5113 | |||
5114 | /* If ENC_FLUSH, then we know the exact source window to use because no more copies can | ||
5115 | * be issued. */ | ||
5116 | if (stream->enc_state == ENC_FLUSH) | ||
5117 | { | ||
5118 | src->srcbase = stream->match_minaddr; | ||
5119 | src->srclen = (usize_t) length; | ||
5120 | XD3_ASSERT (src->srclen); | ||
5121 | goto done; | ||
5122 | } | ||
5123 | |||
5124 | /* Otherwise, we have to make a guess. More copies may still be issued, but we have to | ||
5125 | * decide the source window base and length now. */ | ||
5126 | src->srcbase = stream->match_minaddr; | ||
5127 | src->srclen = max ((usize_t) length, stream->avail_in + (stream->avail_in >> 2)); | ||
5128 | if (src->size < src->srcbase + (xoff_t) src->srclen) | ||
5129 | { | ||
5130 | /* Could reduce srcbase, as well. */ | ||
5131 | src->srclen = src->size - src->srcbase; | ||
5132 | } | ||
5133 | |||
5134 | XD3_ASSERT (src->srclen); | ||
5135 | done: | ||
5136 | IF_DEBUG1 (P(RINT "[srcwin setup:%"Q"u] base %"Q"u size %u\n", | ||
5137 | stream->current_window, | ||
5138 | src->srcbase, | ||
5139 | src->srclen)); | ||
5140 | /* Set the taroff. This convenience variable is used even when stream->src == NULL. */ | ||
5141 | stream->taroff = src->srclen; | ||
5142 | return 0; | ||
5143 | } | ||
5144 | |||
5145 | /* Sets the bounding region for a newly discovered source match, prior to calling | ||
5146 | * xd3_source_extend_match(). This sets the match_maxfwd, match_maxback variables. Note: | ||
5147 | * srcpos is an absolute position (xoff_t) but the match_maxfwd, match_maxback variables | ||
5148 | * are usize_t. Returns 0 if the setup succeeds, or 1 if the source position lies outside | ||
5149 | * an already-decided srcbase/srclen window. */ | ||
5150 | static int | ||
5151 | xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos) | ||
5152 | { | ||
5153 | xd3_source *src = stream->src; | ||
5154 | usize_t greedy_or_not; | ||
5155 | xoff_t farthest_src; | ||
5156 | |||
5157 | stream->match_maxback = 0; | ||
5158 | stream->match_maxfwd = 0; | ||
5159 | stream->match_back = 0; | ||
5160 | stream->match_fwd = 0; | ||
5161 | |||
5162 | farthest_src = max(stream->srcwin_cksum_pos, srcpos); | ||
5163 | |||
5164 | XD3_ASSERT (stream->srcwin_maxsz > src->blksize); | ||
5165 | |||
5166 | /* This prevents the encoder from seeking back more than srcwin_maxsz. Using | ||
5167 | * srcwin_maxsz is incorrect. TODO: Possibly an new option here, how far back to | ||
5168 | * seek? */ | ||
5169 | if (max_in == 0 || | ||
5170 | farthest_src - srcpos > stream->srcwin_maxsz - src->blksize) | ||
5171 | { | ||
5172 | goto bad; // TODO! Note: this prevents catching the TODO/bug below | ||
5173 | } | ||
5174 | |||
5175 | /* TODO: check for boundary crossing */ | ||
5176 | |||
5177 | /* Going backwards, the 1.5-pass algorithm allows some already-matched input may be | ||
5178 | * covered by a longer source match. The greedy algorithm does not allow this. */ | ||
5179 | if (stream->flags & XD3_BEGREEDY) | ||
5180 | { | ||
5181 | /* The greedy algorithm allows backward matching to the last matched position. */ | ||
5182 | greedy_or_not = xd3_iopt_last_matched (stream); | ||
5183 | } | ||
5184 | else | ||
5185 | { | ||
5186 | /* The 1.5-pass algorithm allows backward matching to go back as far as the | ||
5187 | * unencoded offset, which is updated as instructions pass out of the iopt buffer. | ||
5188 | * If this (default) is chosen, it means xd3_iopt_erase may be called to eliminate | ||
5189 | * instructions when a covering source match is found. */ | ||
5190 | greedy_or_not = stream->unencoded_offset; | ||
5191 | } | ||
5192 | |||
5193 | /* Backward target match limit. */ | ||
5194 | XD3_ASSERT (pos_in >= greedy_or_not); | ||
5195 | stream->match_maxback = pos_in - greedy_or_not; | ||
5196 | |||
5197 | /* Forward target match limit. */ | ||
5198 | XD3_ASSERT (max_in > pos_in); | ||
5199 | stream->match_maxfwd = max_in - pos_in; | ||
5200 | |||
5201 | /* Now we take the source position into account. It depends whether the srclen/srcbase | ||
5202 | * have been decided yet. */ | ||
5203 | if (stream->srcwin_decided == 0) | ||
5204 | { | ||
5205 | /* Unrestricted case: the match can cover the entire source, 0--src->size. We | ||
5206 | * compare the usize_t match_maxfwd/match_maxback against the xoff_t src->size/srcpos values | ||
5207 | * and take the min. */ | ||
5208 | xoff_t srcavail; | ||
5209 | |||
5210 | if (srcpos < (xoff_t) stream->match_maxback) | ||
5211 | { | ||
5212 | stream->match_maxback = srcpos; | ||
5213 | } | ||
5214 | |||
5215 | srcavail = src->size - srcpos; | ||
5216 | if (srcavail < (xoff_t) stream->match_maxfwd) | ||
5217 | { | ||
5218 | stream->match_maxfwd = srcavail; | ||
5219 | } | ||
5220 | |||
5221 | goto good; | ||
5222 | } | ||
5223 | |||
5224 | /* Decided some source window. */ | ||
5225 | XD3_ASSERT (src->srclen > 0); | ||
5226 | |||
5227 | /* Restricted case: fail if the srcpos lies outside the source window */ | ||
5228 | if ((srcpos < src->srcbase) || (srcpos > (src->srcbase + (xoff_t) src->srclen))) | ||
5229 | { | ||
5230 | goto bad; | ||
5231 | } | ||
5232 | else | ||
5233 | { | ||
5234 | usize_t srcavail; | ||
5235 | |||
5236 | srcavail = (usize_t) (srcpos - src->srcbase); | ||
5237 | if (srcavail < stream->match_maxback) | ||
5238 | { | ||
5239 | stream->match_maxback = srcavail; | ||
5240 | } | ||
5241 | |||
5242 | srcavail = (usize_t) (src->srcbase + (xoff_t) src->srclen - srcpos); | ||
5243 | if (srcavail < stream->match_maxfwd) { | ||
5244 | stream->match_maxfwd = srcavail; | ||
5245 | } | ||
5246 | |||
5247 | goto good; | ||
5248 | } | ||
5249 | |||
5250 | good: | ||
5251 | stream->match_state = MATCH_BACKWARD; | ||
5252 | stream->match_srcpos = srcpos; | ||
5253 | return 0; | ||
5254 | |||
5255 | bad: | ||
5256 | stream->match_state = MATCH_SEARCHING; | ||
5257 | return 1; | ||
5258 | } | ||
5259 | |||
5260 | /* This function expands the source match backward and forward. It is reentrant, since | ||
5261 | * xd3_getblk may return XD3_GETSRCBLK, so most variables are kept in xd3_stream. There | ||
5262 | * are two callers of this function, the string_matching routine when a checksum match is | ||
5263 | * discovered, and xd3_encode_input whenever a continuing (or initial) match is suspected. | ||
5264 | * The two callers do different things with the input_position, thus this function leaves | ||
5265 | * that variable untouched. If a match is taken the resulting stream->match_fwd is left | ||
5266 | * non-zero. */ | ||
5267 | static int | ||
5268 | xd3_source_extend_match (xd3_stream *stream) | ||
5269 | { | ||
5270 | int ret; | ||
5271 | xd3_source *src = stream->src; | ||
5272 | xoff_t matchoff; /* matchoff is the current right/left-boundary of the source match being tested. */ | ||
5273 | usize_t streamoff; /* streamoff is the current right/left-boundary of the input match being tested. */ | ||
5274 | xoff_t tryblk; /* tryblk, tryoff are the block, offset position of matchoff */ | ||
5275 | usize_t tryoff; | ||
5276 | usize_t tryrem; /* tryrem is the number of matchable bytes on the source block */ | ||
5277 | |||
5278 | XD3_ASSERT (src != NULL); | ||
5279 | |||
5280 | /* Does it make sense to compute backward match AFTER forward match? */ | ||
5281 | if (stream->match_state == MATCH_BACKWARD) | ||
5282 | { | ||
5283 | /* Note: this code is practically duplicated below, substituting | ||
5284 | * match_fwd/match_back and direction. Consolidate? */ | ||
5285 | matchoff = stream->match_srcpos - stream->match_back; | ||
5286 | streamoff = pos_in - stream->match_back; | ||
5287 | tryblk = matchoff / src->blksize; | ||
5288 | tryoff = matchoff % src->blksize; | ||
5289 | |||
5290 | /* this loops backward over source blocks */ | ||
5291 | while (stream->match_back < stream->match_maxback) | ||
5292 | { | ||
5293 | /* see if we're backing across a source block boundary */ | ||
5294 | if (tryoff == 0) | ||
5295 | { | ||
5296 | tryoff = src->blksize; | ||
5297 | tryblk -= 1; | ||
5298 | } | ||
5299 | |||
5300 | if ((ret = xd3_getblk (stream, tryblk))) | ||
5301 | { | ||
5302 | /* could be a XD3_GETSRCBLK failure. */ | ||
5303 | return ret; | ||
5304 | } | ||
5305 | |||
5306 | /* OPT: This code can be optimized. */ | ||
5307 | for (tryrem = min (tryoff, stream->match_maxback - stream->match_back); | ||
5308 | tryrem != 0; | ||
5309 | tryrem -= 1, stream->match_back += 1) | ||
5310 | { | ||
5311 | if (src->curblk[tryoff-1] != stream->next_in[streamoff-1]) | ||
5312 | { | ||
5313 | goto doneback; | ||
5314 | } | ||
5315 | |||
5316 | tryoff -= 1; | ||
5317 | streamoff -= 1; | ||
5318 | } | ||
5319 | } | ||
5320 | |||
5321 | doneback: | ||
5322 | stream->match_state = MATCH_FORWARD; | ||
5323 | } | ||
5324 | |||
5325 | XD3_ASSERT (stream->match_state == MATCH_FORWARD); | ||
5326 | |||
5327 | matchoff = stream->match_srcpos + stream->match_fwd; | ||
5328 | streamoff = pos_in + stream->match_fwd; | ||
5329 | tryblk = matchoff / src->blksize; | ||
5330 | tryoff = matchoff % src->blksize; | ||
5331 | |||
5332 | /* Note: practically the same code as backwards case above: same comments */ | ||
5333 | while (stream->match_fwd < stream->match_maxfwd) | ||
5334 | { | ||
5335 | if ((ret = xd3_getblk (stream, tryblk))) | ||
5336 | { | ||
5337 | return ret; | ||
5338 | } | ||
5339 | |||
5340 | /* There's a good speedup for doing word comparions: see zlib. */ | ||
5341 | for (tryrem = min(stream->match_maxfwd - stream->match_fwd, | ||
5342 | src->blksize - tryoff); | ||
5343 | tryrem != 0; | ||
5344 | tryrem -= 1, stream->match_fwd += 1) | ||
5345 | { | ||
5346 | if (src->curblk[tryoff] != stream->next_in[streamoff]) | ||
5347 | { | ||
5348 | goto donefwd; | ||
5349 | } | ||
5350 | |||
5351 | tryoff += 1; | ||
5352 | streamoff += 1; | ||
5353 | } | ||
5354 | |||
5355 | if (tryoff == src->blksize) | ||
5356 | { | ||
5357 | tryoff = 0; | ||
5358 | tryblk += 1; | ||
5359 | } | ||
5360 | } | ||
5361 | |||
5362 | donefwd: | ||
5363 | stream->match_state = MATCH_SEARCHING; | ||
5364 | |||
5365 | /* Now decide whether to take the match. There are several ways to answer this | ||
5366 | * question and this is likely the best answer. There is currently an assertion | ||
5367 | * in xd3_iopt_erase that checks whether min_match works. This variable maintains | ||
5368 | * that every match exceeds the end of the previous match. However, it is | ||
5369 | * possible that match_back allows us to find a match that goes a long way back | ||
5370 | * but not enough forward. We could try an alternate approach, which might help | ||
5371 | * or it might just be extra complexity: eliminate the next match_fwd >= min_match | ||
5372 | * test and call xd3_iopt_erase right away. Erase instructions as far as it goes | ||
5373 | * back, then either remember what was deleted and re-insert it, or count on the | ||
5374 | * string-matching algorithm to find that match again. I think it is more | ||
5375 | * worthwhile to implement large_hash duplicates. */ | ||
5376 | if (stream->match_fwd < min_match) | ||
5377 | { | ||
5378 | stream->match_fwd = 0; | ||
5379 | } | ||
5380 | else | ||
5381 | { | ||
5382 | usize_t total = stream->match_fwd + stream->match_back; | ||
5383 | xoff_t match_end; | ||
5384 | |||
5385 | /* Correct the variables to remove match_back from the equation. */ | ||
5386 | stream->input_position -= stream->match_back; | ||
5387 | stream->match_srcpos -= stream->match_back; | ||
5388 | stream->match_fwd += stream->match_back; | ||
5389 | match_end = stream->match_srcpos + stream->match_fwd; | ||
5390 | |||
5391 | /* At this point we may have to erase any iopt-buffer instructions that are | ||
5392 | * fully covered by a backward-extending copy. */ | ||
5393 | if (stream->match_back > 0) | ||
5394 | { | ||
5395 | xd3_iopt_erase (stream, pos_in, total); | ||
5396 | } | ||
5397 | |||
5398 | stream->match_back = 0; | ||
5399 | |||
5400 | /* Update ranges. The first source match occurs with both values set to 0. */ | ||
5401 | if (stream->match_maxaddr == 0 || | ||
5402 | stream->match_srcpos < stream->match_minaddr) | ||
5403 | { | ||
5404 | stream->match_minaddr = stream->match_srcpos; | ||
5405 | } | ||
5406 | |||
5407 | if (match_end > stream->match_maxaddr) | ||
5408 | { | ||
5409 | stream->match_maxaddr = match_end; | ||
5410 | } | ||
5411 | |||
5412 | IF_DEBUG1 ({ | ||
5413 | static int x = 0; | ||
5414 | P(RINT "[source match:%d] <inp %"Q"u %"Q"u> <src %"Q"u %"Q"u> (%s) [ %u bytes ]\n", | ||
5415 | x++, | ||
5416 | stream->total_in + pos_in, | ||
5417 | stream->total_in + pos_in + stream->match_fwd, | ||
5418 | stream->match_srcpos, | ||
5419 | stream->match_srcpos + stream->match_fwd, | ||
5420 | (stream->total_in + stream->input_position == stream->match_srcpos) ? "same" : "diff", | ||
5421 | stream->match_fwd); | ||
5422 | }); | ||
5423 | |||
5424 | if ((ret = xd3_found_match (stream, | ||
5425 | /* decoder position */ pos_in, | ||
5426 | /* length */ stream->match_fwd, | ||
5427 | /* address */ stream->match_srcpos, | ||
5428 | /* is_source */ 1))) | ||
5429 | { | ||
5430 | return ret; | ||
5431 | } | ||
5432 | |||
5433 | // TODO: ideally, we would update srcwin_cksum_pos to avoid computing checksums in | ||
5434 | // the middle of an already-discovered long match. | ||
5435 | |||
5436 | /* If the match ends with the available input: */ | ||
5437 | if (pos_in + stream->match_fwd == max_in) | ||
5438 | { | ||
5439 | /* Setup continuing match for the next window. */ | ||
5440 | stream->match_state = MATCH_TARGET; | ||
5441 | stream->match_srcpos += stream->match_fwd; | ||
5442 | } | ||
5443 | } | ||
5444 | |||
5445 | return 0; | ||
5446 | } | ||
5447 | |||
5448 | /* Update the small hash. Values in the small_table are offset by HASH_CKOFFSET (1) to | ||
5449 | * distinguish empty buckets the zero offset. This maintains the previous linked lists. | ||
5450 | * If owrite is true then this entry is replacing the existing record, otherwise it is | ||
5451 | * merely being called to promote the existing record in the hash bucket (for the same | ||
5452 | * address cache). */ | ||
5453 | static void | ||
5454 | xd3_scksum_insert (xd3_stream *stream, usize_t inx, usize_t scksum, usize_t pos) | ||
5455 | { | ||
5456 | /* If we are maintaining previous links. */ | ||
5457 | if (stream->small_prev) | ||
5458 | { | ||
5459 | usize_t last_pos = stream->small_table[inx]; | ||
5460 | xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask]; | ||
5461 | xd3_slist *prev = pos_list->prev; | ||
5462 | xd3_slist *next = pos_list->next; | ||
5463 | |||
5464 | /* Assert link structure, update pos, cksum */ | ||
5465 | XD3_ASSERT (prev->next == pos_list); | ||
5466 | XD3_ASSERT (next->prev == pos_list); | ||
5467 | pos_list->pos = pos; | ||
5468 | pos_list->scksum = scksum; | ||
5469 | |||
5470 | /* Subtract HASH_CKOFFSET and test for a previous offset. */ | ||
5471 | if (last_pos-- != 0) | ||
5472 | { | ||
5473 | xd3_slist *last_list = & stream->small_prev[last_pos & stream->sprevmask]; | ||
5474 | xd3_slist *last_next; | ||
5475 | |||
5476 | /* Verify existing entry. */ | ||
5477 | SMALL_HASH_DEBUG1 (stream, stream->next_in + last_pos); | ||
5478 | SMALL_HASH_DEBUG2 (stream, stream->next_in + pos); | ||
5479 | |||
5480 | /* The two positions (mod sprevsz) may have the same checksum, making the old | ||
5481 | * and new entries the same. That is why the removal step is not before the | ||
5482 | * above if-stmt. */ | ||
5483 | if (last_list != pos_list) | ||
5484 | { | ||
5485 | /* Remove current position from any list it may belong to. */ | ||
5486 | next->prev = prev; | ||
5487 | prev->next = next; | ||
5488 | |||
5489 | /* The ordinary case, add current position to last_list. */ | ||
5490 | last_next = last_list->next; | ||
5491 | |||
5492 | pos_list->next = last_next; | ||
5493 | pos_list->prev = last_list; | ||
5494 | |||
5495 | last_next->prev = pos_list; | ||
5496 | last_list->next = pos_list; | ||
5497 | } | ||
5498 | } | ||
5499 | else | ||
5500 | { | ||
5501 | /* Remove current position from any list it may belong to. */ | ||
5502 | next->prev = prev; | ||
5503 | prev->next = next; | ||
5504 | |||
5505 | /* Re-initialize current position. */ | ||
5506 | pos_list->next = pos_list; | ||
5507 | pos_list->prev = pos_list; | ||
5508 | } | ||
5509 | } | ||
5510 | |||
5511 | /* Enter the new position into the hash bucket. */ | ||
5512 | stream->small_table[inx] = pos + HASH_CKOFFSET; | ||
5513 | } | ||
5514 | |||
5515 | #if XD3_DEBUG | ||
5516 | static int | ||
5517 | xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0, | ||
5518 | const uint8_t *inp_max, usize_t cmp_len) | ||
5519 | { | ||
5520 | int i; | ||
5521 | |||
5522 | for (i = 0; i < cmp_len; i += 1) | ||
5523 | { | ||
5524 | XD3_ASSERT (ref0[i] == inp0[i]); | ||
5525 | } | ||
5526 | |||
5527 | if (inp0 + cmp_len < inp_max) | ||
5528 | { | ||
5529 | XD3_ASSERT (inp0[i] != ref0[i]); | ||
5530 | } | ||
5531 | |||
5532 | return 1; | ||
5533 | } | ||
5534 | #endif /* XD3_DEBUG */ | ||
5535 | |||
5536 | /* When the hash table indicates a possible small string match, it calls this routine to | ||
5537 | * find the best match. The first matching position is taken from the small_table, | ||
5538 | * HASH_CKOFFSET is subtracted to get the actual position. After checking that match, if | ||
5539 | * previous linked lists are in use (because stream->small_chain > 1), previous matches | ||
5540 | * are tested searching for the longest match. If (min_match > MIN_MATCH) then a lazy | ||
5541 | * match is in effect. | ||
5542 | * | ||
5543 | * OPT: This is by far the most expensive function. The slowdown is in part due to the data | ||
5544 | * structure it maintains, which is relatively more expensive than it needs to be (in | ||
5545 | * comparison to zlib) in order to support the PROMOTE decision, which is to prefer the | ||
5546 | * most recently used matching address of a certain string to aid the VCDIFF same cache. | ||
5547 | * | ||
5548 | * Weak reasoning? it's time to modularize this routine...? Let's say the PROMOTE | ||
5549 | * feature supported by this slow data structure contributes around 2% improvement in | ||
5550 | * compressed size, is there a better code table that doesn't use the SAME address cache, | ||
5551 | * for which the speedup-discount could produce a better encoding? | ||
5552 | */ | ||
5553 | static /*inline*/ usize_t | ||
5554 | xd3_smatch (xd3_stream *stream, usize_t base, usize_t scksum, usize_t *match_offset) | ||
5555 | { | ||
5556 | usize_t cmp_len; | ||
5557 | usize_t match_length = 0; | ||
5558 | usize_t chain = (min_match == MIN_MATCH ? | ||
5559 | stream->small_chain : | ||
5560 | stream->small_lchain); | ||
5561 | xd3_slist *current = NULL; | ||
5562 | xd3_slist *first = NULL; | ||
5563 | const uint8_t *inp_max = stream->next_in + max_in; | ||
5564 | const uint8_t *inp; | ||
5565 | const uint8_t *ref; | ||
5566 | |||
5567 | SMALL_HASH_STATS (usize_t search_cnt = 0); | ||
5568 | SMALL_HASH_DEBUG1 (stream, stream->next_in + pos_in); | ||
5569 | SMALL_HASH_STATS (stream->sh_searches += 1); | ||
5570 | |||
5571 | XD3_ASSERT (min_match + pos_in <= max_in); | ||
5572 | |||
5573 | base -= HASH_CKOFFSET; | ||
5574 | |||
5575 | /* Initialize the chain. */ | ||
5576 | if (stream->small_prev != NULL) | ||
5577 | { | ||
5578 | first = current = & stream->small_prev[base & stream->sprevmask]; | ||
5579 | |||
5580 | /* Check if current->pos is correct. */ | ||
5581 | if (current->pos != base) { goto done; } | ||
5582 | } | ||
5583 | |||
5584 | again: | ||
5585 | |||
5586 | SMALL_HASH_STATS (search_cnt += 1); | ||
5587 | |||
5588 | /* For small matches, we can always go to the end-of-input because the matching position | ||
5589 | * must be less than the input position. */ | ||
5590 | XD3_ASSERT (base < pos_in); | ||
5591 | |||
5592 | ref = stream->next_in + base; | ||
5593 | inp = stream->next_in + pos_in; | ||
5594 | |||
5595 | SMALL_HASH_DEBUG2 (stream, ref); | ||
5596 | |||
5597 | /* Expand potential match forward. */ | ||
5598 | while (inp < inp_max && *inp == *ref) | ||
5599 | { | ||
5600 | ++inp; | ||
5601 | ++ref; | ||
5602 | } | ||
5603 | |||
5604 | cmp_len = inp - (stream->next_in + pos_in); | ||
5605 | |||
5606 | /* Verify correctness */ | ||
5607 | XD3_ASSERT (xd3_check_smatch (stream->next_in + base, stream->next_in + pos_in, | ||
5608 | inp_max, cmp_len)); | ||
5609 | |||
5610 | /* Update longest match */ | ||
5611 | if (cmp_len > match_length) | ||
5612 | { | ||
5613 | ( match_length) = cmp_len; | ||
5614 | (*match_offset) = base; | ||
5615 | |||
5616 | /* Stop if we match the entire input or discover a long_enough match. */ | ||
5617 | if (inp == inp_max || cmp_len >= stream->long_enough) | ||
5618 | { | ||
5619 | goto done; | ||
5620 | } | ||
5621 | } | ||
5622 | |||
5623 | /* If we have not reached the chain limit, see if there is another previous position. */ | ||
5624 | if (current) | ||
5625 | { | ||
5626 | while (--chain != 0) | ||
5627 | { | ||
5628 | /* Calculate the next base offset. */ | ||
5629 | current = current->prev; | ||
5630 | base = current->pos; | ||
5631 | |||
5632 | /* Stop if the next position was the first. Stop if the position is wrong | ||
5633 | * (because the lists are not re-initialized across input windows). Skip if the | ||
5634 | * scksum is wrong. */ | ||
5635 | if (current != first && base < pos_in) | ||
5636 | { | ||
5637 | if (current->scksum != scksum) | ||
5638 | { | ||
5639 | continue; | ||
5640 | } | ||
5641 | goto again; | ||
5642 | } | ||
5643 | } | ||
5644 | } | ||
5645 | |||
5646 | done: | ||
5647 | SMALL_HASH_STATS (stream->sh_compares += search_cnt); | ||
5648 | return match_length; | ||
5649 | } | ||
5650 | |||
5651 | #if XD3_DEBUG | ||
5652 | static void | ||
5653 | xd3_verify_small_state (xd3_stream *stream, | ||
5654 | const uint8_t *inp, | ||
5655 | uint32_t x_cksum) | ||
5656 | { | ||
5657 | uint32_t cksum = xd3_scksum (inp, stream->small_look); | ||
5658 | |||
5659 | XD3_ASSERT (cksum == x_cksum); | ||
5660 | } | ||
5661 | |||
5662 | static void | ||
5663 | xd3_verify_large_state (xd3_stream *stream, | ||
5664 | const uint8_t *inp, | ||
5665 | uint32_t x_cksum) | ||
5666 | { | ||
5667 | uint32_t cksum = xd3_lcksum (inp, stream->large_look); | ||
5668 | |||
5669 | XD3_ASSERT (cksum == x_cksum); | ||
5670 | } | ||
5671 | |||
5672 | static void | ||
5673 | xd3_verify_run_state (xd3_stream *stream, | ||
5674 | const uint8_t *inp, | ||
5675 | int x_run_l, | ||
5676 | uint8_t x_run_c) | ||
5677 | { | ||
5678 | int slook = stream->small_look; | ||
5679 | uint8_t run_c; | ||
5680 | int run_l = xd3_comprun (inp, slook, &run_c); | ||
5681 | |||
5682 | XD3_ASSERT (run_l == 0 || run_c == x_run_c); | ||
5683 | XD3_ASSERT (x_run_l > slook || run_l == x_run_l); | ||
5684 | } | ||
5685 | #endif /* XD3_DEBUG */ | ||
5686 | #endif /* XD3_ENCODER */ | ||
5687 | |||
5688 | /****************************************************************************************** | ||
5689 | TEMPLATE pass | ||
5690 | ******************************************************************************************/ | ||
5691 | |||
5692 | #endif /* __XDELTA3_C_INLINE_PASS__ */ | ||
5693 | #ifdef __XDELTA3_C_TEMPLATE_PASS__ | ||
5694 | |||
5695 | #if XD3_ENCODER | ||
5696 | |||
5697 | /****************************************************************************************** | ||
5698 | Templates | ||
5699 | ******************************************************************************************/ | ||
5700 | |||
5701 | /* Template macros: less than 30 lines work. the template parameters appear as, e.g., | ||
5702 | * SLOOK, MIN_MATCH, TRYLAZY, etc. */ | ||
5703 | #define XD3_TEMPLATE(x) XD3_TEMPLATE2(x,TEMPLATE) | ||
5704 | #define XD3_TEMPLATE2(x,n) XD3_TEMPLATE3(x,n) | ||
5705 | #define XD3_TEMPLATE3(x,n) x ## n | ||
5706 | #define XD3_STRINGIFY(x) XD3_STRINGIFY2(x) | ||
5707 | #define XD3_STRINGIFY2(x) #x | ||
5708 | |||
5709 | static int XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream); | ||
5710 | |||
5711 | static const xd3_smatcher XD3_TEMPLATE(__smatcher_) = | ||
5712 | { | ||
5713 | XD3_STRINGIFY(TEMPLATE), | ||
5714 | XD3_TEMPLATE(xd3_string_match_), | ||
5715 | #if SOFTCFG == 1 | ||
5716 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | ||
5717 | #else | ||
5718 | LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, SSMATCH, TRYLAZY, MAXLAZY, | ||
5719 | LONGENOUGH, PROMOTE | ||
5720 | #endif | ||
5721 | }; | ||
5722 | |||
5723 | static int | ||
5724 | XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream) | ||
5725 | { | ||
5726 | /* TODO config: These next three variables should be statically compliled in various | ||
5727 | * scan_cfg configurations? */ | ||
5728 | const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); | ||
5729 | const int DO_LARGE = (stream->src != NULL); | ||
5730 | const int DO_RUN = (1); | ||
5731 | |||
5732 | const uint8_t *inp; | ||
5733 | uint32_t scksum = 0; | ||
5734 | uint32_t lcksum = 0; | ||
5735 | usize_t sinx; | ||
5736 | usize_t linx; | ||
5737 | uint8_t run_c; | ||
5738 | int run_l; | ||
5739 | int ret; | ||
5740 | usize_t match_length; | ||
5741 | usize_t match_offset; // Note: "may be unused" warnings are bogus | ||
5742 | usize_t next_move_point; | ||
5743 | |||
5744 | /* If there will be no compression due to settings or short input, skip it entirely. */ | ||
5745 | if (! (DO_SMALL || DO_LARGE || DO_RUN) || pos_in + SLOOK > max_in) { goto loopnomore; } | ||
5746 | |||
5747 | if ((ret = xd3_string_match_init (stream))) { return ret; } | ||
5748 | |||
5749 | /* The restartloop label is reached when the incremental loop state needs to be | ||
5750 | * reset. */ | ||
5751 | restartloop: | ||
5752 | |||
5753 | /* If there is not enough input remaining for any kind of match, skip it. */ | ||
5754 | if (pos_in + SLOOK > max_in) { goto loopnomore; } | ||
5755 | |||
5756 | IF_DEBUG1 ({ | ||
5757 | static int x = 0; | ||
5758 | P(RINT "[string match:%d] pos_in %d; \n", | ||
5759 | x++, pos_in); | ||
5760 | }); | ||
5761 | |||
5762 | /* Now reset the incremental loop state: */ | ||
5763 | |||
5764 | /* The min_match variable is updated to avoid matching the same lazy match over and over | ||
5765 | * again. For example, if you find a (small) match of length 9 at one position, you | ||
5766 | * will likely find a match of length 8 at the next position. */ | ||
5767 | min_match = MIN_MATCH; | ||
5768 | |||
5769 | /* The current input byte. */ | ||
5770 | inp = stream->next_in + pos_in; | ||
5771 | |||
5772 | /* Small match state. */ | ||
5773 | if (DO_SMALL) | ||
5774 | { | ||
5775 | scksum = xd3_scksum (inp, SLOOK); | ||
5776 | } | ||
5777 | |||
5778 | /* Run state. */ | ||
5779 | if (DO_RUN) | ||
5780 | { | ||
5781 | run_l = xd3_comprun (inp, SLOOK, & run_c); | ||
5782 | } | ||
5783 | |||
5784 | /* Large match state. We continue the loop even after not enough bytes for LLOOK | ||
5785 | * remain, so always check pos_in in DO_LARGE code. */ | ||
5786 | if (DO_LARGE && (pos_in + LLOOK <= max_in)) | ||
5787 | { | ||
5788 | /* Source window: next_move_point is the point that pos_in must reach before | ||
5789 | * computing more source checksum. */ | ||
5790 | if ((ret = xd3_srcwin_move_point (stream, & next_move_point))) | ||
5791 | { | ||
5792 | return ret; | ||
5793 | } | ||
5794 | |||
5795 | lcksum = xd3_lcksum (inp, LLOOK); | ||
5796 | } | ||
5797 | |||
5798 | /* TRYLAZYLEN: True if a certain length match should be followed by lazy search. This | ||
5799 | * checks that LEN is shorter than MAXLAZY and that there is enough leftover data to | ||
5800 | * consider lazy matching. "Enough" is set to 2 since the next match will start at the | ||
5801 | * next offset, it must match two extra characters. */ | ||
5802 | #define TRYLAZYLEN(LEN,POS,MAX) ((TRYLAZY && (LEN) < MAXLAZY) && ((POS) + (LEN) <= (MAX) - 2)) | ||
5803 | |||
5804 | /* HANDLELAZY: This statement is called each time an instruciton is emitted (three | ||
5805 | * cases). If the instruction is large enough, the loop is restarted, otherwise lazy | ||
5806 | * matching may ensue. */ | ||
5807 | #define HANDLELAZY(mlen) \ | ||
5808 | if (TRYLAZYLEN ((mlen), pos_in, max_in)) \ | ||
5809 | { min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \ | ||
5810 | else \ | ||
5811 | { pos_in += (mlen); goto restartloop; } | ||
5812 | |||
5813 | /* Now loop over one input byte at a time until a match is found... */ | ||
5814 | for (;; inp += 1, pos_in += 1) | ||
5815 | { | ||
5816 | /* Now we try three kinds of string match in order of expense: | ||
5817 | * run, large match, small match. */ | ||
5818 | |||
5819 | /* Expand the start of a RUN. The test for (run_l == SLOOK) avoids repeating this | ||
5820 | * check when we pass through a run area performing lazy matching. The run is only | ||
5821 | * expanded once when the min_match is first reached. If lazy matching is | ||
5822 | * performed, the run_l variable will remain inconsistent until the first | ||
5823 | * non-running input character is reached, at which time the run_l may then again | ||
5824 | * grow to SLOOK. */ | ||
5825 | if (DO_RUN && run_l == SLOOK) | ||
5826 | { | ||
5827 | usize_t max_len = max_in - pos_in; | ||
5828 | |||
5829 | IF_DEBUG (xd3_verify_run_state (stream, inp, run_l, run_c)); | ||
5830 | |||
5831 | while (run_l < max_len && inp[run_l] == run_c) { run_l += 1; } | ||
5832 | |||
5833 | /* Output a RUN instruction. */ | ||
5834 | if (run_l >= min_match && run_l >= MIN_RUN) | ||
5835 | { | ||
5836 | if ((ret = xd3_emit_run (stream, pos_in, run_l, run_c))) { return ret; } | ||
5837 | |||
5838 | HANDLELAZY (run_l); | ||
5839 | } | ||
5840 | } | ||
5841 | |||
5842 | /* If there is enough input remaining. */ | ||
5843 | if (DO_LARGE && (pos_in + LLOOK <= max_in)) | ||
5844 | { | ||
5845 | if ((pos_in >= next_move_point) && | ||
5846 | (ret = xd3_srcwin_move_point (stream, & next_move_point))) | ||
5847 | { | ||
5848 | return ret; | ||
5849 | } | ||
5850 | |||
5851 | linx = xd3_checksum_hash (& stream->large_hash, lcksum); | ||
5852 | |||
5853 | IF_DEBUG (xd3_verify_large_state (stream, inp, lcksum)); | ||
5854 | |||
5855 | /* Note: To handle large checksum duplicates, this code should be rearranged to | ||
5856 | * resemble the small_match case more. But how much of the code can be truly | ||
5857 | * shared? The main difference is the need for xd3_source_extend_match to work | ||
5858 | * outside of xd3_string_match, in the case where inputs are identical. */ | ||
5859 | if (unlikely (stream->large_table[linx] != 0)) | ||
5860 | { | ||
5861 | /* the match_setup will fail if the source window has been decided and the | ||
5862 | * match lies outside it. You could consider forcing a window at this point | ||
5863 | * to permit a new source window. */ | ||
5864 | if (xd3_source_match_setup (stream, stream->large_table[linx] - HASH_CKOFFSET) == 0) | ||
5865 | { | ||
5866 | if ((ret = xd3_source_extend_match (stream))) { return ret; } | ||
5867 | |||
5868 | /* Update stream position. match_fwd is zero if no match. */ | ||
5869 | if (stream->match_fwd > 0) | ||
5870 | { | ||
5871 | HANDLELAZY (stream->match_fwd); | ||
5872 | } | ||
5873 | } | ||
5874 | } | ||
5875 | } | ||
5876 | |||
5877 | /* Small matches. */ | ||
5878 | if (DO_SMALL) | ||
5879 | { | ||
5880 | sinx = xd3_checksum_hash (& stream->small_hash, scksum); | ||
5881 | |||
5882 | /* Verify incremental state in debugging mode. */ | ||
5883 | IF_DEBUG (xd3_verify_small_state (stream, inp, scksum)); | ||
5884 | |||
5885 | /* Search for the longest match */ | ||
5886 | if (unlikely (stream->small_table[sinx] != 0)) | ||
5887 | { | ||
5888 | match_length = xd3_smatch (stream, | ||
5889 | stream->small_table[sinx], | ||
5890 | scksum, | ||
5891 | & match_offset); | ||
5892 | } | ||
5893 | else | ||
5894 | { | ||
5895 | match_length = 0; | ||
5896 | } | ||
5897 | |||
5898 | /* Insert a hash for this string. */ | ||
5899 | xd3_scksum_insert (stream, sinx, scksum, pos_in); | ||
5900 | |||
5901 | /* Promote the previous match address to head of the hash bucket. This is | ||
5902 | * intended to improve the same cache hit rate. */ | ||
5903 | if (match_length != 0 && PROMOTE) | ||
5904 | { | ||
5905 | xd3_scksum_insert (stream, sinx, scksum, match_offset); | ||
5906 | } | ||
5907 | |||
5908 | /* Maybe output a COPY instruction */ | ||
5909 | if (unlikely (match_length >= min_match)) | ||
5910 | { | ||
5911 | IF_DEBUG1 ({ | ||
5912 | static int x = 0; | ||
5913 | P(RINT "[target match:%d] <inp %u %u> <cpy %u %u> (-%d) [ %u bytes ]\n", | ||
5914 | x++, | ||
5915 | pos_in, | ||
5916 | pos_in + match_length, | ||
5917 | match_offset, | ||
5918 | match_offset + match_length, | ||
5919 | pos_in - match_offset, | ||
5920 | match_length); | ||
5921 | }); | ||
5922 | |||
5923 | if ((ret = xd3_found_match (stream, | ||
5924 | /* decoder position */ pos_in, | ||
5925 | /* length */ match_length, | ||
5926 | /* address */ match_offset, | ||
5927 | /* is_source */ 0))) { return ret; } | ||
5928 | |||
5929 | /* SSMATCH option: search small matches: continue the incremental checksum | ||
5930 | * through the matched material. Only if not lazy matching. */ | ||
5931 | if (SSMATCH && !TRYLAZYLEN (match_length, pos_in, max_in)) | ||
5932 | { | ||
5933 | usize_t avail = max_in - SLOOK - pos_in; | ||
5934 | usize_t ml_m1 = match_length - 1; | ||
5935 | usize_t right; | ||
5936 | int aincr; | ||
5937 | |||
5938 | IF_DEBUG (usize_t nposi = pos_in + match_length); | ||
5939 | |||
5940 | /* Avail is the last offset we can compute an incremental cksum. If the | ||
5941 | * match length exceeds that offset then we are finished performing | ||
5942 | * incremental updates after this step. */ | ||
5943 | if (ml_m1 < avail) | ||
5944 | { | ||
5945 | right = ml_m1; | ||
5946 | aincr = 1; | ||
5947 | } | ||
5948 | else | ||
5949 | { | ||
5950 | right = avail; | ||
5951 | aincr = 0; | ||
5952 | } | ||
5953 | |||
5954 | /* Compute incremental checksums within the match. */ | ||
5955 | while (right > 0) | ||
5956 | { | ||
5957 | SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); | ||
5958 | if (DO_LARGE && (pos_in + LLOOK < max_in)) { | ||
5959 | LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK); | ||
5960 | } | ||
5961 | |||
5962 | inp += 1; | ||
5963 | pos_in += 1; | ||
5964 | right -= 1; | ||
5965 | sinx = xd3_checksum_hash (& stream->small_hash, scksum); | ||
5966 | |||
5967 | IF_DEBUG (xd3_verify_small_state (stream, inp, scksum)); | ||
5968 | |||
5969 | xd3_scksum_insert (stream, sinx, scksum, pos_in); | ||
5970 | } | ||
5971 | |||
5972 | if (aincr) | ||
5973 | { | ||
5974 | /* Keep searching... */ | ||
5975 | if (DO_RUN) { run_l = xd3_comprun (inp+1, SLOOK-1, & run_c); } | ||
5976 | XD3_ASSERT (nposi == pos_in + 1); | ||
5977 | XD3_ASSERT (pos_in + SLOOK < max_in); | ||
5978 | min_match = MIN_MATCH; | ||
5979 | goto updatesure; | ||
5980 | } | ||
5981 | else | ||
5982 | { | ||
5983 | /* Not enough input for another match. */ | ||
5984 | XD3_ASSERT (pos_in + SLOOK >= max_in); | ||
5985 | goto loopnomore; | ||
5986 | } | ||
5987 | } | ||
5988 | |||
5989 | /* Else case: copy instruction, but no SSMATCH. */ | ||
5990 | HANDLELAZY (match_length); | ||
5991 | } | ||
5992 | } | ||
5993 | |||
5994 | /* The logic above prevents excess work during lazy matching by increasing min_match | ||
5995 | * to avoid smaller matches. Each time we advance pos_in by one, the minimum match | ||
5996 | * shortens as well. */ | ||
5997 | if (min_match > MIN_MATCH) | ||
5998 | { | ||
5999 | min_match -= 1; | ||
6000 | } | ||
6001 | |||
6002 | updateone: | ||
6003 | |||
6004 | /* See if there are no more incremental cksums to compute. */ | ||
6005 | if (pos_in + SLOOK == max_in) | ||
6006 | { | ||
6007 | goto loopnomore; | ||
6008 | } | ||
6009 | |||
6010 | updatesure: | ||
6011 | |||
6012 | /* Compute next RUN, CKSUM */ | ||
6013 | if (DO_RUN) { NEXTRUN (inp[SLOOK]); } | ||
6014 | if (DO_SMALL) { SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); } | ||
6015 | if (DO_LARGE && (pos_in + LLOOK < max_in)) { LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK); } | ||
6016 | } | ||
6017 | |||
6018 | loopnomore: | ||
6019 | return 0; | ||
6020 | } | ||
6021 | #endif /* XD3_ENCODER */ | ||
6022 | #endif /* __XDELTA3_C_TEMPLATE_PASS__ */ | ||