summaryrefslogtreecommitdiff
path: root/xdelta3
diff options
context:
space:
mode:
authordotdotisdead <dotdotisdead@a3eca27d-f21b-0410-9b4a-6511e771f64e>2006-08-27 18:39:38 +0000
committerdotdotisdead <dotdotisdead@a3eca27d-f21b-0410-9b4a-6511e771f64e>2006-08-27 18:39:38 +0000
commita6f202275ec093b9f8948d77b9783f0820f930d8 (patch)
tree95974d74bd0a6577e80ee2eb917cd852ce6ba011 /xdelta3
Source snapshot... broken.
Diffstat (limited to 'xdelta3')
-rwxr-xr-xxdelta3/.gdb_history256
-rwxr-xr-xxdelta3/.xdelta3.prcs_aux84
-rwxr-xr-xxdelta3/COPYING340
-rwxr-xr-xxdelta3/ChangeLog6
-rwxr-xr-xxdelta3/Makefile111
-rwxr-xr-xxdelta3/RELEASE.NOTES3
-rwxr-xr-xxdelta3/badcopy.c111
-rwxr-xr-xxdelta3/draft-korn-vcdiff.txt1322
-rwxr-xr-xxdelta3/junk.py11
-rwxr-xr-xxdelta3/linkxd3lib.c47
-rwxr-xr-xxdelta3/rcs_junk.cc1861
-rwxr-xr-xxdelta3/setup.py33
-rwxr-xr-xxdelta3/show.c41
-rwxr-xr-xxdelta3/testh.c1
-rwxr-xr-xxdelta3/www/xdelta3-api-guide.html212
-rwxr-xr-xxdelta3/www/xdelta3-cmdline.html166
-rwxr-xr-xxdelta3/www/xdelta3.css69
-rwxr-xr-xxdelta3/www/xdelta3.html89
-rwxr-xr-xxdelta3/xdelta3-cfgs.h118
-rwxr-xr-xxdelta3/xdelta3-djw.h1917
-rwxr-xr-xxdelta3/xdelta3-fgk.h851
-rwxr-xr-xxdelta3/xdelta3-list.h130
-rwxr-xr-xxdelta3/xdelta3-main.h2923
-rwxr-xr-xxdelta3/xdelta3-python.h86
-rwxr-xr-xxdelta3/xdelta3-regtest.py596
-rwxr-xr-xxdelta3/xdelta3-second.h363
-rwxr-xr-xxdelta3/xdelta3-test.h2229
-rwxr-xr-xxdelta3/xdelta3.c6022
-rwxr-xr-xxdelta3/xdelta3.h1029
-rwxr-xr-xxdelta3/xdelta3.prj133
30 files changed, 21160 insertions, 0 deletions
diff --git a/xdelta3/.gdb_history b/xdelta3/.gdb_history
new file mode 100755
index 0000000..72410e8
--- /dev/null
+++ b/xdelta3/.gdb_history
@@ -0,0 +1,256 @@
1run test
2i[
3up
4print tpos
5print recon_size
6break xdelta3-test.h:2323
7run
8s
9s
10n
11print input_size
12up
13up
14print delta
15print delta_size
16run -vv -f -s ~/Desktop/hello.c ~/Desktop/world.c hw
17up
18down
19break xdelta3-main.h:2252
20run
21s
22n
23c
24c
25c
26run -vv -f -s ~/Desktop/hello.c ~/Desktop/world.c hw
27run -vv -f -s testcase/6/source testcase/6/target
28break xdelta3.c:5792
29run
30up
31updown
32break xdelta3.c:3837
33run
34s
35n
36print matchoff
37print streamoff
38print tryblk
39print tryoff
40n
41n
42n
43print stream->match_maxfwd
44print stream->match_fwd
45print str->cublk
46print str->curblk
47print src->curblk
48print tryoff
49n
50n
51print tryoff
52n
53print tryoff
54print src->curblk[21]
55print stream->next_in[21]
56print src->curblk
57print stream->next_in
58break xdelta3.c:5726
59c
60n
61print stream->match_fwd
62n
63n
64n
65n
66n
67n
68s
69n
70n
71n
72n
73n
74n
75n
76step 1
77step 1
78run -s testcase/3/source.doc testcase/3/target.doc
79break xdelta3.c:2697
80run
81p blkno
82p source->blocks
83up
84run -s testcase/3/source.doc testcase/3/target.doc out
85run -s testcase/3/source.doc testcase/3/target.doc > /dev/null
86break xdelta3.c:5095
87run
88p logical_input_cksum_pos
89p stream->input_pos
90p stream->input_position
91p stream->srcwin_size
92p stream->total_in
93n
94p logical_input_cksum_pos
95p stream-srcwin_cksum_pos
96p stream->srcwin_cksum_pos
97n
98n
99n
100p stream->srcwin_size
101n
102n
103p blkno
104p blkoff
105p onblk
106n
107n
108n
109break xdelta3.c:5114
110c
111n
112k
113y
114step 1
115d
116break xdelta3.c:5103
117break xdelta3.c:5097
118run
119n
120p logical_input_cksum_pos
121p stream->srcwin_size
122c
123n
124p stream->srcwin_cksum_pos
125run
126n
127c
128fin
129up
130down
131c
132up
133break xdelta3.c:5131
134c
135n
136p diff
137p onblk
138n
139p onblk
140p blkoff
141p blkoff
142p stream->large_look
143k
144y
145c
146k
147break xdelta3.c:5103
148run -s testcase/3/source.doc testcase/3/target.doc
149n
150o ibbkj
151p onblk
152n
153p blkoff
154p onblk
155n
156p blkoff
157break xdelta3.c:5119
158c
159n
160p stream->input_position
161p stream->srcwin_cksum_pos
162p stream->stream->srcwin_size
163p stream->srcwin_size
164p logical_input_cksum_pos
165p *next_move_point
166c
167n
168p stream->input_position
169p logical_input_cksum_pos
170p logical_input_cksum_pos
171p stream->srcwin_cksum_pos
172d
173c
174run -s testcase/3/source.doc testcase/3/target.doc -o /tmp/foo12
175run -s testcase/3/source.doc testcase/3/target.doc > /dev/null
176run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
177k
178y
179run -vv -s testcase/3/source.doc testcase/3/target.doc
180run -vv -s testcase/3/source.doc testcase/3/target.doc
181run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
182up
183n
184fin
185n
186break xdelta3.c:5119
187c
188c
189p stream->srcwin_cksum_pos
190p logical_input_cksum_pos
191p stream->total_iun
192p stream->total_i
193p stream->total_in
194p stream->srcwin_cksum_pos
195k
196run -vv -s testcase/3/source.doc testcase/3/target.doc /tmp/fdsfd
197kill
198run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
199run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
200y
201run
202run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
203run
204run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
205n
206fin
207n
208p blkoff
209p stream->large_look
210p onblk
211break xdelta3.c:5122
212c
213n
214p blkno
215p stream->srcwin_cksum_pos
216up
217down
218p blkno
219p blkno * (1 <<18)
220p stream->srcwin_cksum_pos
221c
222p blkno * (1 <<18)
223p blkno
224c
225n
226n
227p onblk
228n
229p onblk
230p diff
231p stream->srcwin_cksum_pos
232n
233c
234n
235p stream->srcwin_cksum_pos
236p blkno
237c
238n
239p blkno
240c
241n
242c
243n
244p stream->srcwin_cksum_pos
245p logical_input_cksum_pos
246n
247run -vv -s testcase/3/source.doc testcase/3/target.doc
248run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
249up
250break xdelta3.c:5123
251c
252break xdelta3.c:5097
253c
254p stream->srcwin_cksum_pos
255p stream->logical_input_pos
256p logical_input_cksum_pos
diff --git a/xdelta3/.xdelta3.prcs_aux b/xdelta3/.xdelta3.prcs_aux
new file mode 100755
index 0000000..b30689c
--- /dev/null
+++ b/xdelta3/.xdelta3.prcs_aux
@@ -0,0 +1,84 @@
1;; This file is automatically generated, editing may cause PRCS to do
2;; REALLY bad things.
3(Created-By-Prcs-Version 1 3 3)
4(www/xdelta3.css 938 1085949140 b/26_xdelta3.cs 1.3)
5(analyze_pfx.py 1422 1022037044 12_analyze_pf 1.1)
6(badcopy.c 2622 1047759845 20_badcopy.c 1.1)
7(analyze_clen.py 1342 1021753567 14_analyze_cl 1.1)
8(save.regtest.bug1/input.4 19022 1055471779 b/16_input.4 1.1)
9(save.regtest.bug11/input.0 4 1055554284 b/8_input.0 1.1)
10(save.regtest.bug8/input.0 203756 1055518432 28_input.0 1.1)
11(save.regtest.bug1/input.5 21597 1055471779 b/17_input.5 1.1)
12(save.regtest.bug6/input.20 1235 1055474005 39_input.20 1.1)
13(save.regtest.bug11/input.1 10 1055554284 b/7_input.1 1.1)
14(xdelta3-list.h 9892 1052598762 6_xdelta3-li 1.1)
15(save.regtest.bug8/input.1 203756 1055518493 29_input.1 1.1)
16(save.regtest.bug7/recon 51200 1055515262 36_recon 1.1)
17(save.regtest.bug6/input.21 952 1055474005 38_input.21 1.1)
18(save.regtest.bug6/recon 952 1055480638 37_recon 1.1)
19(save.regtest.bug11/recon.x 10 1055554520 b/5_recon.x 1.1)
20(vcdiff.ps 131548 1014968851 b/19_vcdiff.ps 1.1)
21(www/Xdelta3.html 3200 1058668417 b/24_Xdelta3.ht 1.1)
22(priorities.txt 339 1057496665 b/18_priorities 1.1)
23(draft-korn-vcdiff.txt 60706 1018424758 b/22_draft-korn 1.1)
24(dead.code 72096 1085893991 b/21_dead.code 1.2)
25(linkxd3lib.c 1113 1056324075 19_linkxd3lib 1.1)
26(www/xdelta3.html 4708 1085952599 b/24_Xdelta3.ht 1.4)
27(xdelta3-second.h 8228 1057405215 3_xdelta3-se 1.1)
28(testh.c 21 1042671351 17_testh.c 1.1)
29(save.regtest.bug6/input.0 920 1055474005 40_input.0 1.1)
30(save.regtest.bug12/output.x 705 1055556257 b/1_output.x 1.1)
31(save.regtest.bug8/core 1159168 1055529025 23_core 1.1)
32(save.regtest.bug9/foo,v 123233 1055532021 21_foo,vx 1.1)
33(xdelta3.c 201721 1085893369 16_xdelta3.c 1.3)
34(xdelta3-cfgs.h 2701 1057695639 9_xdelta3-cf 1.1)
35(save.regtest.bug10/input.0 53274 1055532189 b/14_input.0 1.1)
36(xdelta3-regtest.py 17976 1085947234 10_xdelta3-re 1.3)
37(save.regtest.bug10/input.1 74663 1055532189 b/13_input.1 1.1)
38(save.regtest.bug4/input.0 7571 1055461840 45_input.0 1.1)
39(save.regtest.bug12/xd3regtest.27181/output 2336 1055566927 b/0_output 1.1)
40(save.regtest.bug4/input.1 11312 1055461840 44_input.1 1.1)
41(save.regtest.bug7/core 1146880 1055522004 30_core 1.1)
42(xdelta3-main.h 79350 1085950532 5_xdelta3-ma 1.3)
43(xdelta3.h 41796 1084138546 1_xdelta3.h 1.2)
44(rcs_junk.cc 36315 1055086755 15_rcs_junk.c 1.1)
45(www/xdelta3-cmdline.html 5234 1085953288 b/25_xdelta3-cm 1.2)
46(save.regtest.bug7/input.0 7571 1055515262 34_input.0 1.1)
47(save.regtest.bug7/output 14276 1055515262 35_output 1.1)
48(save.regtest.bug2/input.0 2296 1055471815 49_input.0 1.1)
49(save.regtest.bug7/input.1 11312 1055515262 33_input.1 1.1)
50(www/xdelta3-api-guide.html 7553 1085953324 b/23_Xdelta3-ap 1.4)
51(save.regtest.bug8/input.0.xz 70595 1055518500 27_input.0.xz 1.1)
52(save.regtest.bug2/input.1 2521 1055471815 48_input.1 1.1)
53(save.regtest.bug9/foo2,v 123233 1055532069 22_foo2,vx 1.1)
54(xdelta3-python.h 1466 1055671733 4_xdelta3-py 1.1)
55(save.regtest.bug10/output 48388 1055532189 b/15_output 1.1)
56(save.regtest.bug10/output.x 48408 1055533319 b/12_output.x 1.1)
57(save.regtest.bug7/recon.x 11312 1055517553 31_recon.x 1.1)
58(save.regtest.bug11/recon 10 1055554284 b/10_recon 1.1)
59(save.regtest.bug8/input.1.xz 89734 1055518503 26_input.1.xz 1.1)
60(xdelta3-test.h 71959 1084138350 2_xdelta3-te 1.2)
61(save.regtest.bug10/recon.x 74655 1055533328 b/11_recon.x 1.1)
62(save.regtest.bug12/input.0 280 1055555649 b/3_input.0 1.1)
63(save.regtest.bug12/input.1 1155 1055555649 b/2_input.1 1.1)
64(draft-vcdiff-huffman.txt 2935 1021721074 b/20_draft-vcdi 1.1)
65(save.regtest.bug8/output.x.right 48221 1055520912 24_output.x.r 1.1)
66(save.regtest.bug5/input.0 7571 1055471668 43_input.0 1.1)
67(save.regtest.bug11/output.x 45 1055554519 b/6_output.x 1.1)
68(save.regtest.bug12/xd3regtest.27181/input.0 2521 1055566927 51_input.0 1.1)
69(save.regtest.bug5/input.1 11312 1055471668 42_input.1 1.1)
70(save.regtest.bug12/xd3regtest.27181/input.1 2296 1055566927 50_input.1 1.1)
71(setup.py 626 1055562104 11_setup.py 1.1)
72(Makefile 3840 1085893399 0_Makefile 1.3)
73(save.regtest.bug3/input.0 732 1055471934 47_input.0 1.1)
74(save.regtest.bug6/output 69 1055480559 41_output 1.1)
75(xdelta3-fgk.h 21496 1057610026 7_xdelta3-fg 1.1)
76(save.regtest.bug3/input.1 271 1055471934 46_input.1 1.1)
77(www/Xdelta3-api.html 6128 1058674572 b/23_Xdelta3-ap 1.1)
78(save.regtest.bug12/output 39 1055555649 b/4_output 1.1)
79(analyze_gp.py 7442 1022750342 13_analyze_gp 1.1)
80(save.regtest.bug11/output 45 1055554284 b/9_output 1.1)
81(save.regtest.bug7/output.x 14296 1055517870 32_output.x 1.1)
82(save.regtest.bug8/output.x 48225 1055530557 25_output.x 1.1)
83(show.c 647 1043318861 18_show.c 1.1)
84(xdelta3-djw.h 51152 1057610015 8_xdelta3-dj 1.1)
diff --git a/xdelta3/COPYING b/xdelta3/COPYING
new file mode 100755
index 0000000..5b6e7c6
--- /dev/null
+++ b/xdelta3/COPYING
@@ -0,0 +1,340 @@
1 GNU GENERAL PUBLIC LICENSE
2 Version 2, June 1991
3
4 Copyright (C) 1989, 1991 Free Software Foundation, Inc.
5 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
6 Everyone is permitted to copy and distribute verbatim copies
7 of this license document, but changing it is not allowed.
8
9 Preamble
10
11 The licenses for most software are designed to take away your
12freedom to share and change it. By contrast, the GNU General Public
13License is intended to guarantee your freedom to share and change free
14software--to make sure the software is free for all its users. This
15General Public License applies to most of the Free Software
16Foundation's software and to any other program whose authors commit to
17using it. (Some other Free Software Foundation software is covered by
18the GNU Library General Public License instead.) You can apply it to
19your programs, too.
20
21 When we speak of free software, we are referring to freedom, not
22price. Our General Public Licenses are designed to make sure that you
23have the freedom to distribute copies of free software (and charge for
24this service if you wish), that you receive source code or can get it
25if you want it, that you can change the software or use pieces of it
26in new free programs; and that you know you can do these things.
27
28 To protect your rights, we need to make restrictions that forbid
29anyone to deny you these rights or to ask you to surrender the rights.
30These restrictions translate to certain responsibilities for you if you
31distribute copies of the software, or if you modify it.
32
33 For example, if you distribute copies of such a program, whether
34gratis or for a fee, you must give the recipients all the rights that
35you have. You must make sure that they, too, receive or can get the
36source code. And you must show them these terms so they know their
37rights.
38
39 We protect your rights with two steps: (1) copyright the software, and
40(2) offer you this license which gives you legal permission to copy,
41distribute and/or modify the software.
42
43 Also, for each author's protection and ours, we want to make certain
44that everyone understands that there is no warranty for this free
45software. If the software is modified by someone else and passed on, we
46want its recipients to know that what they have is not the original, so
47that any problems introduced by others will not reflect on the original
48authors' reputations.
49
50 Finally, any free program is threatened constantly by software
51patents. We wish to avoid the danger that redistributors of a free
52program will individually obtain patent licenses, in effect making the
53program proprietary. To prevent this, we have made it clear that any
54patent must be licensed for everyone's free use or not licensed at all.
55
56 The precise terms and conditions for copying, distribution and
57modification follow.
58
59 GNU GENERAL PUBLIC LICENSE
60 TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61
62 0. This License applies to any program or other work which contains
63a notice placed by the copyright holder saying it may be distributed
64under the terms of this General Public License. The "Program", below,
65refers to any such program or work, and a "work based on the Program"
66means either the Program or any derivative work under copyright law:
67that is to say, a work containing the Program or a portion of it,
68either verbatim or with modifications and/or translated into another
69language. (Hereinafter, translation is included without limitation in
70the term "modification".) Each licensee is addressed as "you".
71
72Activities other than copying, distribution and modification are not
73covered by this License; they are outside its scope. The act of
74running the Program is not restricted, and the output from the Program
75is covered only if its contents constitute a work based on the
76Program (independent of having been made by running the Program).
77Whether that is true depends on what the Program does.
78
79 1. You may copy and distribute verbatim copies of the Program's
80source code as you receive it, in any medium, provided that you
81conspicuously and appropriately publish on each copy an appropriate
82copyright notice and disclaimer of warranty; keep intact all the
83notices that refer to this License and to the absence of any warranty;
84and give any other recipients of the Program a copy of this License
85along with the Program.
86
87You may charge a fee for the physical act of transferring a copy, and
88you may at your option offer warranty protection in exchange for a fee.
89
90 2. You may modify your copy or copies of the Program or any portion
91of it, thus forming a work based on the Program, and copy and
92distribute such modifications or work under the terms of Section 1
93above, provided that you also meet all of these conditions:
94
95 a) You must cause the modified files to carry prominent notices
96 stating that you changed the files and the date of any change.
97
98 b) You must cause any work that you distribute or publish, that in
99 whole or in part contains or is derived from the Program or any
100 part thereof, to be licensed as a whole at no charge to all third
101 parties under the terms of this License.
102
103 c) If the modified program normally reads commands interactively
104 when run, you must cause it, when started running for such
105 interactive use in the most ordinary way, to print or display an
106 announcement including an appropriate copyright notice and a
107 notice that there is no warranty (or else, saying that you provide
108 a warranty) and that users may redistribute the program under
109 these conditions, and telling the user how to view a copy of this
110 License. (Exception: if the Program itself is interactive but
111 does not normally print such an announcement, your work based on
112 the Program is not required to print an announcement.)
113
114These requirements apply to the modified work as a whole. If
115identifiable sections of that work are not derived from the Program,
116and can be reasonably considered independent and separate works in
117themselves, then this License, and its terms, do not apply to those
118sections when you distribute them as separate works. But when you
119distribute the same sections as part of a whole which is a work based
120on the Program, the distribution of the whole must be on the terms of
121this License, whose permissions for other licensees extend to the
122entire whole, and thus to each and every part regardless of who wrote it.
123
124Thus, it is not the intent of this section to claim rights or contest
125your rights to work written entirely by you; rather, the intent is to
126exercise the right to control the distribution of derivative or
127collective works based on the Program.
128
129In addition, mere aggregation of another work not based on the Program
130with the Program (or with a work based on the Program) on a volume of
131a storage or distribution medium does not bring the other work under
132the scope of this License.
133
134 3. You may copy and distribute the Program (or a work based on it,
135under Section 2) in object code or executable form under the terms of
136Sections 1 and 2 above provided that you also do one of the following:
137
138 a) Accompany it with the complete corresponding machine-readable
139 source code, which must be distributed under the terms of Sections
140 1 and 2 above on a medium customarily used for software interchange; or,
141
142 b) Accompany it with a written offer, valid for at least three
143 years, to give any third party, for a charge no more than your
144 cost of physically performing source distribution, a complete
145 machine-readable copy of the corresponding source code, to be
146 distributed under the terms of Sections 1 and 2 above on a medium
147 customarily used for software interchange; or,
148
149 c) Accompany it with the information you received as to the offer
150 to distribute corresponding source code. (This alternative is
151 allowed only for noncommercial distribution and only if you
152 received the program in object code or executable form with such
153 an offer, in accord with Subsection b above.)
154
155The source code for a work means the preferred form of the work for
156making modifications to it. For an executable work, complete source
157code means all the source code for all modules it contains, plus any
158associated interface definition files, plus the scripts used to
159control compilation and installation of the executable. However, as a
160special exception, the source code distributed need not include
161anything that is normally distributed (in either source or binary
162form) with the major components (compiler, kernel, and so on) of the
163operating system on which the executable runs, unless that component
164itself accompanies the executable.
165
166If distribution of executable or object code is made by offering
167access to copy from a designated place, then offering equivalent
168access to copy the source code from the same place counts as
169distribution of the source code, even though third parties are not
170compelled to copy the source along with the object code.
171
172 4. You may not copy, modify, sublicense, or distribute the Program
173except as expressly provided under this License. Any attempt
174otherwise to copy, modify, sublicense or distribute the Program is
175void, and will automatically terminate your rights under this License.
176However, parties who have received copies, or rights, from you under
177this License will not have their licenses terminated so long as such
178parties remain in full compliance.
179
180 5. You are not required to accept this License, since you have not
181signed it. However, nothing else grants you permission to modify or
182distribute the Program or its derivative works. These actions are
183prohibited by law if you do not accept this License. Therefore, by
184modifying or distributing the Program (or any work based on the
185Program), you indicate your acceptance of this License to do so, and
186all its terms and conditions for copying, distributing or modifying
187the Program or works based on it.
188
189 6. Each time you redistribute the Program (or any work based on the
190Program), the recipient automatically receives a license from the
191original licensor to copy, distribute or modify the Program subject to
192these terms and conditions. You may not impose any further
193restrictions on the recipients' exercise of the rights granted herein.
194You are not responsible for enforcing compliance by third parties to
195this License.
196
197 7. If, as a consequence of a court judgment or allegation of patent
198infringement or for any other reason (not limited to patent issues),
199conditions are imposed on you (whether by court order, agreement or
200otherwise) that contradict the conditions of this License, they do not
201excuse you from the conditions of this License. If you cannot
202distribute so as to satisfy simultaneously your obligations under this
203License and any other pertinent obligations, then as a consequence you
204may not distribute the Program at all. For example, if a patent
205license would not permit royalty-free redistribution of the Program by
206all those who receive copies directly or indirectly through you, then
207the only way you could satisfy both it and this License would be to
208refrain entirely from distribution of the Program.
209
210If any portion of this section is held invalid or unenforceable under
211any particular circumstance, the balance of the section is intended to
212apply and the section as a whole is intended to apply in other
213circumstances.
214
215It is not the purpose of this section to induce you to infringe any
216patents or other property right claims or to contest validity of any
217such claims; this section has the sole purpose of protecting the
218integrity of the free software distribution system, which is
219implemented by public license practices. Many people have made
220generous contributions to the wide range of software distributed
221through that system in reliance on consistent application of that
222system; it is up to the author/donor to decide if he or she is willing
223to distribute software through any other system and a licensee cannot
224impose that choice.
225
226This section is intended to make thoroughly clear what is believed to
227be a consequence of the rest of this License.
228
229 8. If the distribution and/or use of the Program is restricted in
230certain countries either by patents or by copyrighted interfaces, the
231original copyright holder who places the Program under this License
232may add an explicit geographical distribution limitation excluding
233those countries, so that distribution is permitted only in or among
234countries not thus excluded. In such case, this License incorporates
235the limitation as if written in the body of this License.
236
237 9. The Free Software Foundation may publish revised and/or new versions
238of the General Public License from time to time. Such new versions will
239be similar in spirit to the present version, but may differ in detail to
240address new problems or concerns.
241
242Each version is given a distinguishing version number. If the Program
243specifies a version number of this License which applies to it and "any
244later version", you have the option of following the terms and conditions
245either of that version or of any later version published by the Free
246Software Foundation. If the Program does not specify a version number of
247this License, you may choose any version ever published by the Free Software
248Foundation.
249
250 10. If you wish to incorporate parts of the Program into other free
251programs whose distribution conditions are different, write to the author
252to ask for permission. For software which is copyrighted by the Free
253Software Foundation, write to the Free Software Foundation; we sometimes
254make exceptions for this. Our decision will be guided by the two goals
255of preserving the free status of all derivatives of our free software and
256of promoting the sharing and reuse of software generally.
257
258 NO WARRANTY
259
260 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268REPAIR OR CORRECTION.
269
270 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278POSSIBILITY OF SUCH DAMAGES.
279
280 END OF TERMS AND CONDITIONS
281
282 How to Apply These Terms to Your New Programs
283
284 If you develop a new program, and you want it to be of the greatest
285possible use to the public, the best way to achieve this is to make it
286free software which everyone can redistribute and change under these terms.
287
288 To do so, attach the following notices to the program. It is safest
289to attach them to the start of each source file to most effectively
290convey the exclusion of warranty; and each file should have at least
291the "copyright" line and a pointer to where the full notice is found.
292
293 <one line to give the program's name and a brief idea of what it does.>
294 Copyright (C) <year> <name of author>
295
296 This program is free software; you can redistribute it and/or modify
297 it under the terms of the GNU General Public License as published by
298 the Free Software Foundation; either version 2 of the License, or
299 (at your option) any later version.
300
301 This program is distributed in the hope that it will be useful,
302 but WITHOUT ANY WARRANTY; without even the implied warranty of
303 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 GNU General Public License for more details.
305
306 You should have received a copy of the GNU General Public License
307 along with this program; if not, write to the Free Software
308 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
309
310
311Also add information on how to contact you by electronic and paper mail.
312
313If the program is interactive, make it output a short notice like this
314when it starts in an interactive mode:
315
316 Gnomovision version 69, Copyright (C) year name of author
317 Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
318 This is free software, and you are welcome to redistribute it
319 under certain conditions; type `show c' for details.
320
321The hypothetical commands `show w' and `show c' should show the appropriate
322parts of the General Public License. Of course, the commands you use may
323be called something other than `show w' and `show c'; they could even be
324mouse-clicks or menu items--whatever suits your program.
325
326You should also get your employer (if you work as a programmer) or your
327school, if any, to sign a "copyright disclaimer" for the program, if
328necessary. Here is a sample; alter the names:
329
330 Yoyodyne, Inc., hereby disclaims all copyright interest in the program
331 `Gnomovision' (which makes passes at compilers) written by James Hacker.
332
333 <signature of Ty Coon>, 1 April 1989
334 Ty Coon, President of Vice
335
336This General Public License does not permit incorporating your program into
337proprietary programs. If your program is a subroutine library, you may
338consider it more useful to permit linking proprietary applications with the
339library. If this is what you want to do, use the GNU Library General
340Public License instead of this License.
diff --git a/xdelta3/ChangeLog b/xdelta3/ChangeLog
new file mode 100755
index 0000000..d7bd042
--- /dev/null
+++ b/xdelta3/ChangeLog
@@ -0,0 +1,6 @@
12006-07-02 Joshua MacDonald <jmacd@google.com>
2
3 * xdelta3.c (xd3_iopt_flush_instructions): Fixed a bug in flush_instructions
4 clears more than half of the instructions, thus encodes the last two, which may
5 still overlap.
6
diff --git a/xdelta3/Makefile b/xdelta3/Makefile
new file mode 100755
index 0000000..0ef4722
--- /dev/null
+++ b/xdelta3/Makefile
@@ -0,0 +1,111 @@
1##
2PYTHON = python
3PYTGT = build/temp.linux-i686-2.3/xdelta3.so
4
5TARGETS = xdelta3 xdelta3-64 xdelta3-everything \
6 xdelta3-Opg xdelta3-64-O xdelta3-Op xdelta3-O \
7 xdelta3-decoder xdelta3-decoder-nomain.o \
8 $(PYTGT) \
9 xdelta3-nosec.o xdelta3-all.o xdelta3-fgk.o xdelta3-djw.o \
10 xdelta3-noext xdelta3-tools xdelta3-tune \
11 xdelta3-notools
12
13SOURCES = xdelta3.c xdelta3.h xdelta3-fgk.h xdelta3-djw.h xdelta3-list.h xdelta3-test.h \
14 xdelta3-main.h xdelta3-cfgs.h xdelta3-second.h xdelta3-python.h
15
16PYFILES = xdelta3-regtest.py setup.py
17
18EXTRA = Makefile COPYING linkxd3lib.c badcopy.c www RELEASE.NOTES
19
20REL = 0f
21RELDIR = xdelta3$(REL)
22
23all: $(TARGETS)
24
25tar:
26 tar -czf /tmp/$(RELDIR)-tmp.tar.gz $(SOURCES) $(PYFILES) $(EXTRA)
27 rm -rf /tmp/$(RELDIR)
28 mkdir /tmp/$(RELDIR)
29 (cd /tmp/$(RELDIR) && tar -xzf ../$(RELDIR)-tmp.tar.gz)
30 tar -czf ./$(RELDIR).tar.gz -C /tmp $(RELDIR)
31 +tar -tzf ./$(RELDIR).tar.gz
32 rm -rf /tmp/$(RELDIR)
33
34clean:
35 rm -f $(TARGETS) xdtest.* core *.flc
36
37$(PYTGT): $(SOURCES)
38 $(PYTHON) setup.py install --compile --force
39
40xdelta3: $(SOURCES)
41 $(CC) -g -Wall -Wshadow xdelta3.c -o xdelta3 -DXD3_MAIN=1 -DGENERIC_ENCODE_TABLES=1 \
42 -DXD3_USE_LARGEFILE64=1 -DREGRESSION_TEST=1 -DXD3_DEBUG=2 -DSECONDARY_DJW=1 -lm
43
44xdelta3-decoder: $(SOURCES)
45 $(CC) -O2 -Wall -Wshadow xdelta3.c \
46 -DXD3_ENCODER=0 -DXD3_MAIN=1 -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 \
47 -DXD3_POSIX=0 -DEXTERNAL_COMPRESSION=0 -DVCDIFF_TOOLS=0 \
48 -o xdelta3-decoder
49 strip xdelta3-decoder
50
51xdelta3-decoder-nomain.o: $(SOURCES) linkxd3lib.c
52 $(CC) -O2 -Wall -Wshadow xdelta3.c linkxd3lib.c \
53 -DXD3_ENCODER=0 -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 \
54 -o xdelta3-decoder-nomain.o
55 strip xdelta3-decoder-nomain.o
56
57xdelta3-O: $(SOURCES)
58 $(CC) -g -O2 -Wall -Wshadow xdelta3.c -o xdelta3-O -DXD3_MAIN=1 -DSECONDARY_DJW=1 -DREGRESSION_TEST=1 -lm
59
60xdelta3-O++: $(SOURCES)
61 $(CXX) -g -O2 -Wall -Wshadow xdelta3.c -o xdelta3-O++ -DXD3_MAIN=1 -DSECONDARY_DJW=1 -DREGRESSION_TEST=1 -lm
62
63xdelta3-Op: $(SOURCES)
64 $(CC) -g -O2 -Wall -Wshadow xdelta3.c -o xdelta3-Op -DXD3_POSIX=1 -DXD3_MAIN=1 -DREGRESSION_TEST=1 -lm
65
66xdelta3-64: $(SOURCES)
67 $(CC) -g -Wall -Wshadow xdelta3.c -o xdelta3-64 -DXD3_POSIX=1 -DXD3_MAIN=1 -DREGRESSION_TEST=1 \
68 -DXD3_DEBUG=0 -DXD3_USE_LARGEFILE64=1 -lm
69
70xdelta3-64-O: $(SOURCES)
71 $(CC) -O2 -Wall -Wshadow xdelta3.c -o xdelta3-64-O -DXD3_POSIX=1 -DXD3_MAIN=1 \
72 -DXD3_USE_LARGEFILE64=1 -lm
73
74xdelta3-everything: $(SOURCES)
75 $(CC) -g -Wall -Wshadow xdelta3.c -o xdelta3-everything \
76 -DXD3_MAIN=1 -DVCDIFF_TOOLS=1 -DREGRESSION_TEST=1 \
77 -DSECONDARY_FGK=1 -DSECONDARY_DJW=1 \
78 -DGENERIC_ENCODE_TABLES=1 \
79 -DGENERIC_ENCODE_TABLES_COMPUTE=1 \
80 -DXD3_POSIX=1 \
81 -DEXTERNAL_COMPRESSION=1 \
82 -DXD3_DEBUG=1 -lm
83
84xdelta3-tune: $(SOURCES)
85 $(CC) -O2 -Wall -Wshadow xdelta3.c -o xdelta3-tune -DXD3_MAIN=1 \
86 -DSECONDARY_FGK=1 -DSECONDARY_DJW=1 -DTUNE_HUFFMAN=1
87
88xdelta3-Opg: $(SOURCES)
89 $(CC) -pg -g -O3 -Wall -Wshadow xdelta3.c -o xdelta3-Opg -DXD3_MAIN=1 \
90 -DSECONDARY_DJW=1 -DXD3_POSIX=1 -DXD3_USE_LARGEFILE64=1
91
92xdelta3-nosec.o: $(SOURCES)
93 $(CC) -O2 -Wall -Wshadow -c xdelta3.c -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 -o xdelta3-nosec.o
94
95xdelta3-all.o: $(SOURCES)
96 $(CC) -O2 -Wall -Wshadow -c xdelta3.c -DSECONDARY_FGK=1 -DSECONDARY_DJW=1 -o xdelta3-all.o
97
98xdelta3-fgk.o: $(SOURCES)
99 $(CC) -O2 -Wall -Wshadow -c xdelta3.c -DSECONDARY_FGK=1 -DSECONDARY_DJW=0 -o xdelta3-fgk.o
100
101xdelta3-djw.o: $(SOURCES)
102 $(CC) -O2 -Wall -Wshadow -c xdelta3.c -DSECONDARY_FGK=0 -DSECONDARY_DJW=1 -o xdelta3-djw.o
103
104xdelta3-noext: $(SOURCES)
105 $(CC) -O2 -Wall -Wshadow xdelta3.c -DXD3_MAIN=1 -DEXTERNAL_COMPRESSION=0 -o xdelta3-noext
106
107xdelta3-tools: $(SOURCES)
108 $(CC) -O2 -Wall -Wshadow xdelta3.c -DXD3_MAIN=1 -o xdelta3-tools
109
110xdelta3-notools: $(SOURCES)
111 $(CC) -O2 -Wall -Wshadow xdelta3.c -DXD3_MAIN=1 -DVCDIFF_TOOLS=0 -o xdelta3-notools
diff --git a/xdelta3/RELEASE.NOTES b/xdelta3/RELEASE.NOTES
new file mode 100755
index 0000000..a4af327
--- /dev/null
+++ b/xdelta3/RELEASE.NOTES
@@ -0,0 +1,3 @@
12006-05-13 Joshua MacDonald <joshua.macdonald@gmail.com>
2
3 * xdelta 3.0e: Performance and bug fixes.
diff --git a/xdelta3/badcopy.c b/xdelta3/badcopy.c
new file mode 100755
index 0000000..c42e2b5
--- /dev/null
+++ b/xdelta3/badcopy.c
@@ -0,0 +1,111 @@
1#include <stdio.h>
2#include <stdlib.h>
3#include <math.h>
4
5#define BUFSZ (1 << 22)
6
7typedef unsigned int usize_t;
8
9double error_prob = 0.0001;
10usize_t mean_change = 100;
11usize_t total_change = 0;
12usize_t total_size = 0;
13usize_t max_change = 0;
14usize_t num_change = 0;
15
16int last_end = 0;
17
18static int
19edist (usize_t mean, usize_t max)
20{
21 double mean_d = mean;
22 double erand = log (1.0 / drand48 ());
23 usize_t x = (usize_t) (mean_d * erand + 0.5);
24
25 return (x < max) ? (x > 0 ? x : 1) : max;
26}
27
28void modify (char *buf, usize_t size)
29{
30 usize_t bufpos = 0, j;
31
32 last_end = 0;
33
34 for (;; /* bufpos and j are incremented in the inner loop */)
35 {
36 /* The size of the next modification. */
37 usize_t next_size = edist (mean_change, 1 << 31);
38 /* The expected interval of such a change. */
39 double expect_interval = ((double) next_size * (1.0 - error_prob)) / error_prob;
40 /* The number of bytes until the next modification. */
41 usize_t next_mod = edist (expect_interval, 1 << 31);
42
43 if (next_size + next_mod + bufpos > size) { break; }
44
45 if (max_change < next_size) { max_change = next_size; }
46
47 bufpos += next_mod;
48
49 fprintf (stderr, "COPY: %u-%u (%u)\n", total_size + last_end, total_size + bufpos, bufpos - last_end);
50
51 fprintf (stderr, "ADD: %u-%u (%u) is change %u\n", total_size + bufpos , total_size + bufpos + next_size, next_size, num_change);
52
53 total_change += next_size;
54 num_change += 1;
55
56 for (j = 0; j < next_size; j += 1, bufpos += 1)
57 {
58 buf[bufpos] = lrand48 () >> 3;
59 }
60
61 last_end = bufpos;
62 }
63
64 fprintf (stderr, "COPY: %u-%u (%u)\n", total_size + last_end, total_size + size, size - last_end);
65
66 total_size += size;
67}
68
69int main(int argc, char **argv)
70{
71 char buf[BUFSZ];
72 int c, ret;
73
74 if (argc > 3)
75 {
76 fprintf (stderr, "usage: badcopy [byte_error_prob [mean_error_size]]\n");
77 return 1;
78 }
79
80 if (argc > 2) { mean_change = atoi (argv[2]); }
81 if (argc > 1) { error_prob = atof (argv[1]); }
82
83 if (error_prob < 0.0 || error_prob > 1.0)
84 {
85 fprintf (stderr, "warning: error probability out of range\n");
86 return 1;
87 }
88
89 do
90 {
91 c = fread (buf, 1, BUFSZ, stdin);
92
93 if (c == 0) { break; }
94
95 modify (buf, c);
96
97 ret = fwrite (buf, 1, c, stdout);
98 }
99 while (c == BUFSZ);
100
101 if ((ret = fclose (stdout)))
102 {
103 perror ("fclose");
104 return 1;
105 }
106
107 fprintf (stderr, "add_prob %f; %u adds; total_change %u of %u bytes; add percentage %f; max add size %u\n",
108 error_prob, num_change, total_change, total_size, (double) total_change / (double) total_size, max_change);
109
110 return 0;
111}
diff --git a/xdelta3/draft-korn-vcdiff.txt b/xdelta3/draft-korn-vcdiff.txt
new file mode 100755
index 0000000..1487deb
--- /dev/null
+++ b/xdelta3/draft-korn-vcdiff.txt
@@ -0,0 +1,1322 @@
1 David G. Korn, AT&T Labs
2 Joshua P. MacDonald, UC Berkeley
3 Jeffrey C. Mogul, Compaq WRL
4Internet-Draft Kiem-Phong Vo, AT&T Labs
5Expires: 09 November 2002 09 November 2001
6
7
8 The VCDIFF Generic Differencing and Compression Data Format
9
10 draft-korn-vcdiff-06.txt
11
12
13
14Status of this Memo
15
16 This document is an Internet-Draft and is in full conformance
17 with all provisions of Section 10 of RFC2026.
18
19 Internet-Drafts are working documents of the Internet Engineering
20 Task Force (IETF), its areas, and its working groups. Note that
21 other groups may also distribute working documents as
22 Internet-Drafts.
23
24 Internet-Drafts are draft documents valid for a maximum of six
25 months and may be updated, replaced, or obsoleted by other
26 documents at any time. It is inappropriate to use Internet-
27 Drafts as reference material or to cite them other than as
28 "work in progress."
29
30 The list of current Internet-Drafts can be accessed at
31 http://www.ietf.org/ietf/1id-abstracts.txt
32
33 The list of Internet-Draft Shadow Directories can be accessed at
34 http://www.ietf.org/shadow.html.
35
36
37Abstract
38
39 This memo describes a general, efficient and portable data format
40 suitable for encoding compressed and/or differencing data so that
41 they can be easily transported among computers.
42
43
44Table of Contents:
45
46 1. EXECUTIVE SUMMARY ............................................ 2
47 2. CONVENTIONS .................................................. 3
48 3. DELTA INSTRUCTIONS ........................................... 4
49 4. DELTA FILE ORGANIZATION ...................................... 5
50 5. DELTA INSTRUCTION ENCODING ................................... 9
51 6. DECODING A TARGET WINDOW ..................................... 14
52 7. APPLICATION-DEFINED CODE TABLES .............................. 16
53 8. PERFORMANCE .................................................. 16
54 9. FURTHER ISSUES ............................................... 17
55 10. SUMMARY ...................................................... 18
56 11. ACKNOWLEDGEMENTS ............................................. 18
57 12. SECURITY CONSIDERATIONS ...................................... 18
58 13. SOURCE CODE AVAILABILITY ..................................... 18
59 14. INTELLECTUAL PROPERTY RIGHTS ................................. 18
60 15. IANA CONSIDERATIONS .......................................... 19
61 16. REFERENCES ................................................... 19
62 17. AUTHOR'S ADDRESS ............................................. 20
63
64
651. EXECUTIVE SUMMARY
66
67 Compression and differencing techniques can greatly improve storage
68 and transmission of files and file versions. Since files are often
69 transported across machines with distinct architectures and performance
70 characteristics, such data should be encoded in a form that is portable
71 and can be decoded with little or no knowledge of the encoders.
72 This document describes Vcdiff, a compact portable encoding format
73 designed for these purposes.
74
75 Data differencing is the process of computing a compact and invertible
76 encoding of a "target file" given a "source file". Data compression
77 is similar but without the use of source data. The UNIX utilities diff,
78 compress, and gzip are well-known examples of data differencing and
79 compression tools. For data differencing, the computed encoding is
80 called a "delta file", and, for data compression, it is called
81 a "compressed file". Delta and compressed files are good for storage
82 and transmission as they are often smaller than the originals.
83
84 Data differencing and data compression are traditionally treated
85 as distinct types of data processing. However, as shown in the Vdelta
86 technique by Korn and Vo [1], compression can be thought of as a special
87 case of differencing in which the source data is empty. The basic idea
88 is to unify the string parsing scheme used in the Lempel-Ziv'77 style
89 compressors [2], and the block-move technique of Tichy [3]. Loosely
90 speaking, this works as follows:
91
92 a. Concatenate source and target data.
93 b. Parse the data from left to right as in LZ'77 but
94 make sure that a parsed segment starts the target data.
95 c. Start to output when reaching target data.
96
97 Parsing is based on string matching algorithms such as suffix trees [4]
98 or hashing with different time and space performance characteristics.
99 Vdelta uses a fast string matching algorithm that requires less memory
100 than other techniques [5,6]. However, even with this algorithm, the
101 memory requirement can still be prohibitive for large files. A common
102 way to deal with memory limitation is to partition an input file into
103 chunks called "windows" and process them separately. Here, except for
104 unpublished work by Vo, little has been done on designing effective
105 windowing schemes. Current techniques, including Vdelta, simply use
106 source and target windows with corresponding addresses across source
107 and target files.
108
109 String matching and windowing algorithms have large influence on the
110 compression rate of delta and compressed files. However, it is desirable
111 to have a portable encoding format that is independent of such algorithms.
112 This enables construction of client-server applications in which a server
113 may serve clients with unknown computing characteristics. Unfortunately,
114 all current differencing and compressing tools, including Vdelta, fall
115 short in this respect. Their storage formats are closely intertwined
116 with the implemented string matching and/or windowing algorithms.
117
118 The encoding format Vcdiff proposed here addresses the above issues.
119 Vcdiff achieves the below characteristics:
120
121 Output compactness:
122 The basic encoding format compactly represents compressed or delta
123 files. Applications can further extend the basic encoding format
124 with "secondary encoders" to achieve more compression.
125
126 Data portability:
127 The basic encoding format is free from machine byte order and
128 word size issues. This allows data to be encoded on one machine
129 and decoded on a different machine with different architecture.
130
131 Algorithm genericity:
132 The decoding algorithm is independent from string matching and
133 windowing algorithms. This allows competition among implementations
134 of the encoder while keeping the same decoder.
135
136 Decoding efficiency:
137 Except for secondary encoder issues, the decoding algorithm runs
138 in time proportional to the size of the target file and uses space
139 proportional to the maximal window size. Vcdiff differs from more
140 conventional compressors in that it uses only byte-aligned
141 data, thus avoiding bit-level operations, which improves
142 decoding speed at the slight cost of compression efficiency.
143
144 The Vcdiff data format and the algorithms for decoding data shall be
145 described next. Since Vcdiff treats compression as a special case of
146 differencing, we shall use the term "delta file" to indicate the
147 compressed output for both cases.
148
149
1502. CONVENTIONS
151
152 The basic data unit is a byte. For portability, Vcdiff shall limit
153 a byte to its lower eight bits even on machines with larger bytes.
154 The bits in a byte are ordered from right to left so that the least
155 significant bit (LSB) has value 1, and the most significant bit (MSB),
156 has value 128.
157
158 For purposes of exposition in this document, we adopt the convention
159 that the LSB is numbered 0, and the MSB is numbered 7. Bit numbers
160 never appear in the encoded format itself.
161
162 Vcdiff encodes unsigned integer values using a portable variable-sized
163 format (originally introduced in the Sfio library [7]). This encoding
164 treats an integer as a number in base 128. Then, each digit in this
165 representation is encoded in the lower seven bits of a byte. Except for
166 the least significant byte, other bytes have their most significant bit
167 turned on to indicate that there are still more digits in the encoding.
168 The two key properties of this integer encoding that are beneficial
169 to a data compression format are:
170
171 a. The encoding is portable among systems using 8-bit bytes, and
172 b. Small values are encoded compactly.
173
174 For example, consider the value 123456789 which can be represented with
175 four 7-bit digits whose values are 58, 111, 26, 21 in order from most
176 to least significant. Below is the 8-bit byte encoding of these digits.
177 Note that the MSBs of 58, 111 and 26 are on.
178
179 +-------------------------------------------+
180 | 10111010 | 11101111 | 10011010 | 00010101 |
181 +-------------------------------------------+
182 MSB+58 MSB+111 MSB+26 0+21
183
184
185 Henceforth, the terms "byte" and "integer" will refer to a byte and an
186 unsigned integer as described.
187
188
189 From time to time, algorithms are exhibited to clarify the descriptions
190 of parts of the Vcdiff format. On such occasions, the C language will be
191 used to make precise the algorithms. The C code shown in this
192 document is meant for clarification only, and is not part of the
193 actual specification of the Vcdiff format.
194
195 In this specification, the key words "MUST", "MUST NOT",
196 "SHOULD", "SHOULD NOT", and "MAY" document are to be interpreted as
197 described in RFC2119 [12].
198
199
2003. DELTA INSTRUCTIONS
201
202 A large target file is partitioned into non-overlapping sections
203 called "target windows". These target windows are processed separately
204 and sequentially based on their order in the target file.
205
206 A target window T of length t may be compared against some source data
207 segment S of length s. By construction, this source data segment S
208 comes either from the source file, if one is used, or from a part of
209 the target file earlier than T. In this way, during decoding, S is
210 completely known when T is being decoded.
211
212 The choices of T, t, S and s are made by some window selection algorithm
213 which can greatly affect the size of the encoding. However, as seen later,
214 these choices are encoded so that no knowledge of the window selection
215 algorithm is needed during decoding.
216
217 Assume that S[j] represents the jth byte in S, and T[k] represents
218 the kth byte in T. Then, for the delta instructions, we treat the data
219 windows S and T as substrings of a superstring U formed by concatenating
220 them like this:
221
222 S[0]S[1]...S[s-1]T[0]T[1]...T[t-1]
223
224 The "address" of a byte in S or T is referred to by its location in U.
225 For example, the address of T[k] is s+k.
226
227 The instructions to encode and direct the reconstruction of a target
228 window are called delta instructions. There are three types:
229
230 ADD: This instruction has two arguments, a size x and a sequence of
231 x bytes to be copied.
232 COPY: This instruction has two arguments, a size x and an address p
233 in the string U. The arguments specify the substring of U that
234 must be copied. We shall assert that such a substring must be
235 entirely contained in either S or T.
236 RUN: This instruction has two arguments, a size x and a byte b that
237 will be repeated x times.
238
239 Below are example source and target windows and the delta instructions
240 that encode the target window in terms of the source window.
241
242 a b c d e f g h i j k l m n o p
243 a b c d w x y z e f g h e f g h e f g h e f g h z z z z
244
245 COPY 4, 0
246 ADD 4, w x y z
247 COPY 4, 4
248 COPY 12, 24
249 RUN 4, z
250
251
252 Thus, the first letter 'a' in the target window is at location 16
253 in the superstring. Note that the fourth instruction, "COPY 12, 24",
254 copies data from T itself since address 24 is position 8 in T.
255 This instruction also shows that it is fine to overlap the data to be
256 copied with the data being copied from as long as the latter starts
257 earlier. This enables efficient encoding of periodic sequences,
258 i.e., sequences with regularly repeated subsequences. The RUN instruction
259 is a compact way to encode a sequence repeating the same byte even though
260 such a sequence can be thought of as a periodic sequence with period 1.
261
262 To reconstruct the target window, one simply processes one delta
263 instruction at a time and copy the data either from the source window
264 or the being reconstructed target window based on the type of the
265 instruction and the associated address, if any.
266
267
2684. DELTA FILE ORGANIZATION
269
270 A Vcdiff delta file starts with a Header section followed by a sequence
271 of Window sections. The Header section includes magic bytes to identify
272 the file type, and information concerning data processing beyond the
273 basic encoding format. The Window sections encode the target windows.
274
275 Below is the overall organization of a delta file. The indented items
276 refine the ones immediately above them. An item in square brackets may
277 or may not be present in the file depending on the information encoded
278 in the Indicator byte above it.
279
280 Header
281 Header1 - byte
282 Header2 - byte
283 Header3 - byte
284 Header4 - byte
285 Hdr_Indicator - byte
286 [Secondary compressor ID] - byte
287
288[@@@ Why is compressor ID not an integer? ]
289[@@@ If we aren't defining any secondary compressors yet, then it seems
290that defining the [Secondary compressor ID] and the corresponding
291VCD_DECOMPRESS Hdr_Indicator bit in this draft has no real value. An
292implementation of this specification won't be able to decode a VCDIFF
293encoded with this option if it doesn't know about any secondary
294compressors. It seems that you should specify the bits related to
295secondary compressors once you have defined the first a secondary
296compressor. I can imagine a secondary-compressor might want to supply
297extra information, such as a dictionary of some kind, in which case
298this speculative treatment wouldn't go far enough.]
299
300 [Length of code table data] - integer
301 [Code table data]
302 Size of near cache - byte
303 Size of same cache - byte
304 Compressed code table data
305 Window1
306 Win_Indicator - byte
307 [Source segment size] - integer
308 [Source segment position] - integer
309 The delta encoding of the target window
310 Length of the delta encoding - integer
311 The delta encoding
312 Size of the target window - integer
313 Delta_Indicator - byte
314 Length of data for ADDs and RUNs - integer
315 Length of instructions and sizes - integer
316 Length of addresses for COPYs - integer
317 Data section for ADDs and RUNs - array of bytes
318 Instructions and sizes section - array of bytes
319 Addresses section for COPYs - array of bytes
320 Window2
321 ...
322
323
324
3254.1 The Header Section
326
327 Each delta file starts with a header section organized as below.
328 Note the convention that square-brackets enclose optional items.
329
330 Header1 - byte = 0xE6
331 Header2 - byte = 0xD3
332 Header3 - byte = 0xD4
333
334HMMM
335
3360xD6
3370xC3
3380xC4
339
340 Header4 - byte
341 Hdr_Indicator - byte
342 [Secondary compressor ID] - byte
343 [Length of code table data] - integer
344 [Code table data]
345
346 The first three Header bytes are the ASCII characters 'V', 'C' and 'D'
347 with their most significant bits turned on (in hexadecimal, the values
348 are 0xE6, 0xD3, and 0xD4). The fourth Header byte is currently set to
349 zero. In the future, it might be used to indicate the version of Vcdiff.
350
351 The Hdr_Indicator byte shows if there are any initialization data
352 required to aid in the reconstruction of data in the Window sections.
353 This byte MAY have non-zero values for either, both, or neither of
354 the two bits VCD_DECOMPRESS and VCD_CODETABLE below:
355
356 7 6 5 4 3 2 1 0
357 +-+-+-+-+-+-+-+-+
358 | | | | | | | | |
359 +-+-+-+-+-+-+-+-+
360 ^ ^
361 | |
362 | +-- VCD_DECOMPRESS
363 +---- VCD_CODETABLE
364
365 If bit 0 (VCD_DECOMPRESS) is non-zero, this indicates that a secondary
366 compressor may have been used to further compress certain parts of the
367 delta encoding data as described in Sections 4.3 and 6. In that case,
368 the ID of the secondary compressor is given next. If this bit is zero,
369 the compressor ID byte is not included.
370
371[@@@ If we aren't defining any secondary compressors yet, then it seems
372this bit has no real value yet..]
373
374 If bit 1 (VCD_CODETABLE) is non-zero, this indicates that an
375 application-defined code table is to be used for decoding the delta
376 instructions. This table itself is compressed. The length of the data
377 comprising this compressed code table and the data follow next. Section 7
378 discusses application-defined code tables. If this bit is zero, the code
379 table data length and the code table data are not included.
380
381 If both bits are set, then the compressor ID byte is included
382 before the code table data length and the code table data.
383
384
3854.2 The Format of a Window Section
386
387 Each Window section is organized as follows:
388
389 Win_Indicator - byte
390 [Source segment length] - integer
391 [Source segment position] - integer
392 The delta encoding of the target window
393
394
395 Below are the detail of the various items:
396
397[@@@ Here, I want to replace the Win_Indicator with a source-count,
398followed by source-count length/position pairs?]
399
400 Win_Indicator:
401 This byte is a set of bits, as shown:
402
403 7 6 5 4 3 2 1 0
404 +-+-+-+-+-+-+-+-+
405 | | | | | | | | |
406 +-+-+-+-+-+-+-+-+
407 ^ ^
408 | |
409 | +-- VCD_SOURCE
410 +---- VCD_TARGET
411
412
413 If bit 0 (VCD_SOURCE) is non-zero, this indicates that a segment
414 of data from the "source" file was used as the corresponding
415 source window of data to encode the target window. The decoder
416 will use this same source data segment to decode the target window.
417
418 If bit 1 (VCD_TARGET) is non-zero, this indicates that a segment
419 of data from the "target" file was used as the corresponding
420 source window of data to encode the target window. As above, this
421 same source data segment is used to decode the target window.
422
423 The Win_Indicator byte MUST NOT have more than one of the bits
424 set (non-zero). It MAY have none of these bits set.
425
426 If one of these bits is set, the byte is followed by two
427 integers to indicate respectively the length and position of
428 the source data segment in the relevant file. If the
429 indicator byte is zero, the target window was compressed
430 by itself without comparing against another data segment,
431 and these two integers are not included.
432
433 The delta encoding of the target window:
434 This contains the delta encoding of the target window either
435 in terms of the source data segment (i.e., VCD_SOURCE
436 or VCD_TARGET was set) or by itself if no source window
437 is specified. This data format is discussed next.
438
439
4404.3 The Delta Encoding of a Target Window
441
442 The delta encoding of a target window is organized as follows:
443
444 Length of the delta encoding - integer
445 The delta encoding
446 Length of the target window - integer
447 Delta_Indicator - byte
448 Length of data for ADDs and RUNs - integer
449 Length of instructions section - integer
450 Length of addresses for COPYs - integer
451 Data section for ADDs and RUNs - array of bytes
452 Instructions and sizes section - array of bytes
453 Addresses section for COPYs - array of bytes
454
455
456 Length of the delta encoding:
457 This integer gives the total number of remaining bytes that
458 comprise data of the delta encoding for this target window.
459
460 The delta encoding:
461 This contains the data representing the delta encoding which
462 is described next.
463
464 Length of the target window:
465 This integer indicates the actual size of the target window
466 after decompression. A decoder can use this value to allocate
467 memory to store the uncompressed data.
468
469 Delta_Indicator:
470 This byte is a set of bits, as shown:
471
472 7 6 5 4 3 2 1 0
473 +-+-+-+-+-+-+-+-+
474 | | | | | | | | |
475 +-+-+-+-+-+-+-+-+
476 ^ ^ ^
477 | | |
478 | | +-- VCD_DATACOMP
479 | +---- VCD_INSTCOMP
480 +------ VCD_ADDRCOMP
481
482 VCD_DATACOMP: bit value 1.
483 VCD_INSTCOMP: bit value 2.
484 VCD_ADDRCOMP: bit value 4.
485
486 As discussed, the delta encoding consists of COPY, ADD and RUN
487 instructions. The ADD and RUN instructions have accompanying
488 unmatched data (that is, data that does not specifically match
489 any data in the source window or in some earlier part of the
490 target window) and the COPY instructions have addresses of where
491 the matches occur. OPTIONALLY, these types of data MAY be further
492 compressed using a secondary compressor. Thus, Vcdiff separates
493 the encoding of the delta instructions into three parts:
494
495 a. The unmatched data in the ADD and RUN instructions,
496 b. The delta instructions and accompanying sizes, and
497 c. The addresses of the COPY instructions.
498
499 If the bit VCD_DECOMPRESS (Section 4.1) was on, each of these
500 sections may have been compressed using the specified secondary
501 compressor. The bit positions 0 (VCD_DATACOMP), 1 (VCD_INSTCOMP),
502 and 2 (VCD_ADDRCOMP) respectively indicate, if non-zero, that
503 the corresponding parts are compressed. Then, these parts MUST
504 be decompressed before decoding the delta instructions.
505
506 Length of data for ADDs and RUNs:
507 This is the length (in bytes) of the section of data storing
508 the unmatched data accompanying the ADD and RUN instructions.
509
510 Length of instructions section:
511 This is the length (in bytes) of the delta instructions and
512 accompanying sizes.
513
514 Length of addresses for COPYs:
515 This is the length (in bytes) of the section storing
516 the addresses of the COPY instructions.
517
518 Data section for ADDs and RUNs:
519 This sequence of bytes encodes the unmatched data for the ADD
520 and RUN instructions.
521
522 Instructions and sizes section:
523 This sequence of bytes encodes the instructions and their sizes.
524
525 Addresses section for COPYs:
526 This sequence of bytes encodes the addresses of the COPY
527 instructions.
528
529
5305. DELTA INSTRUCTION ENCODING
531
532 The delta instructions described in Section 3 represent the results of
533 string matching. For many data differencing applications in which the
534 changes between source and target data are small, any straightforward
535 representation of these instructions would be adequate. However, for
536 applications including data compression, it is important to encode
537 these instructions well to achieve good compression rates. From our
538 experience, the following observations can be made:
539
540 a. The addresses in COPY instructions are locations of matches and
541 often occur close by or even exactly equal to one another. This is
542 because data in local regions are often replicated with minor changes.
543 In turn, this means that coding a newly matched address against some
544 set of recently matched addresses can be beneficial.
545
546 b. The matches are often short in length and separated by small amounts
547 of unmatched data. That is, the lengths of COPY and ADD instructions
548 are often small. This is particularly true of binary data such as
549 executable files or structured data such as HTML or XML. In such cases,
550 compression can be improved by combining the encoding of the sizes
551 and the instruction types as well as combining the encoding of adjacent
552 delta instructions with sufficiently small data sizes.
553
554 The below subsections discuss how the Vcdiff data format provides
555 mechanisms enabling encoders to use the above observations to improve
556 compression rates.
557
558
5595.1 Address Encoding Modes of COPY Instructions
560
561 As mentioned earlier, addresses of COPY instructions often occur close
562 to one another or are exactly equal. To take advantage of this phenomenon
563 and encode addresses of COPY instructions more efficiently, the Vcdiff
564 data format supports the use of two different types of address caches.
565 Both the encoder and decoder maintain these caches, so that decoder's
566 caches remain synchronized with the encoder's caches.
567
568 a. A "near" cache is an array with "s_near" slots, each containing an
569 address used for encoding addresses nearby to previously encoded
570 addresses (in the positive direction only). The near cache also
571 maintains a "next_slot" index to the near cache. New entries to the
572 near cache are always inserted in the next_slot index, which maintains
573 a circular buffer of the s_near most recent addresses.
574
575 b. A "same" cache is an array with "s_same" multiple of 256 slots, each
576 containing an address. The same cache maintains a hash table of recent
577 addresses used for repeated encoding of the exact same address.
578
579
580 By default, the parameters s_near and s_same are respectively set to 4
581 and 3. An encoder MAY modify these values, but then it MUST encode the
582 new values in the encoding itself, as discussed in Section 7, so that
583 the decoder can properly set up its own caches.
584
585 At the start of processing a target window, an implementation
586 (encoder or decoder) initializes all of the slots in both caches
587 to zero. The next_slot pointer of the near cache is set
588 to point to slot zero.
589
590 Each time a COPY instruction is processed by the encoder or
591 decoder, the implementation's caches are updated as follows, where
592 "addr" is the address in the COPY instruction.
593
594 a. The slot in the near cache referenced by the next_slot
595 index is set to addr. The next_slot index is then incremented
596 modulo s_near.
597
598 b. The slot in the same cache whose index is addr%(s_same*256)
599 is set to addr. [We use the C notations of % for modulo and
600 * for multiplication.]
601
602
6035.2 Example code for maintaining caches
604
605 To make clear the above description, below are example cache data
606 structures and algorithms to initialize and update them:
607
608 typedef struct _cache_s
609 {
610 int* near; /* array of size s_near */
611 int s_near;
612 int next_slot; /* the circular index for near */
613 int* same; /* array of size s_same*256 */
614 int s_same;
615 } Cache_t;
616
617 cache_init(Cache_t* ka)
618 {
619 int i;
620
621 ka->next_slot = 0;
622 for(i = 0; i < ka->s_near; ++i)
623 ka->near[i] = 0;
624
625 for(i = 0; i < ka->s_same*256; ++i)
626 ka->same[i] = 0;
627 }
628
629 cache_update(Cache_t* ka, int addr)
630 {
631 if(ka->s_near > 0)
632 { ka->near[ka->next_slot] = addr;
633 ka->next_slot = (ka->next_slot + 1) % ka->s_near;
634 }
635
636 if(ka->s_same > 0)
637 ka->same[addr % (ka->s_same*256)] = addr;
638 }
639
640
6415.3 Encoding of COPY instruction addresses
642
643 The address of a COPY instruction is encoded using different modes
644 depending on the type of cached address used, if any.
645
646 Let "addr" be the address of a COPY instruction to be decoded and "here"
647 be the current location in the target data (i.e., the start of the data
648 about to be encoded or decoded). Let near[j] be the jth element in
649 the near cache, and same[k] be the kth element in the same cache.
650 Below are the possible address modes:
651
652 VCD_SELF: This mode has value 0. The address was encoded by itself
653 as an integer.
654
655 VCD_HERE: This mode has value 1. The address was encoded as
656 the integer value "here - addr".
657
658 Near modes: The "near modes" are in the range [2,s_near+1]. Let m
659 be the mode of the address encoding. The address was encoded
660 as the integer value "addr - near[m-2]".
661
662 Same modes: The "same modes" are in the range
663 [s_near+2,s_near+s_same+1]. Let m be the mode of the encoding.
664 The address was encoded as a single byte b such that
665 "addr == same[(m - (s_near+2))*256 + b]".
666
667
6685.3 Example code for encoding and decoding of COPY instruction addresses
669
670 We show example algorithms below to demonstrate use of address modes more
671 clearly. The encoder has freedom to choose address modes, the sample
672 addr_encode() algorithm merely shows one way of picking the address
673 mode. The decoding algorithm addr_decode() will uniquely decode addresses
674 regardless of the encoder's algorithm choice.
675
676 Note that the address caches are updated immediately after an address is
677 encoded or decoded. In this way, the decoder is always synchronized with
678 the encoder.
679
680 int addr_encode(Cache_t* ka, int addr, int here, int* mode)
681 {
682 int i, d, bestd, bestm;
683
684 /* Attempt to find the address mode that yields the
685 * smallest integer value for "d", the encoded address
686 * value, thereby minimizing the encoded size of the
687 * address. */
688
689 bestd = addr; bestm = VCD_SELF; /* VCD_SELF == 0 */
690
691 if((d = here-addr) < bestd)
692 { bestd = d; bestm = VCD_HERE; } /* VCD_HERE == 1 */
693
694 for(i = 0; i < ka->s_near; ++i)
695 if((d = addr - ka->near[i]) >= 0 && d < bestd)
696 { bestd = d; bestm = i+2; }
697
698 if(ka->s_same > 0 && ka->same[d = addr%(ka->s_same*256)] == addr)
699 { bestd = d%256; bestm = ka->s_near + 2 + d/256; }
700
701 cache_update(ka,addr);
702
703 *mode = bestm; /* this returns the address encoding mode */
704 return bestd; /* this returns the encoded address */
705 }
706
707 Note that the addr_encode() algorithm chooses the best address mode using a
708 local optimization, but that may not lead to the best encoding efficiency
709 because different modes lead to different instruction encodings, as described below.
710
711 The functions addrint() and addrbyte() used in addr_decode() obtain from
712 the "Addresses section for COPYs" (Section 4.3) an integer or a byte,
713 respectively. These utilities will not be described here. We simply
714 recall that an integer is represented as a compact variable-sized string
715 of bytes as described in Section 2 (i.e., base 128).
716
717 int addr_decode(Cache_t* ka, int here, int mode)
718 { int addr, m;
719
720 if(mode == VCD_SELF)
721 addr = addrint();
722 else if(mode == VCD_HERE)
723 addr = here - addrint();
724 else if((m = mode - 2) >= 0 && m < ka->s_near) /* near cache */
725 addr = ka->near[m] + addrint();
726 else /* same cache */
727 { m = mode - (2 + ka->s_near);
728 addr = ka->same[m*256 + addrbyte()];
729 }
730
731 cache_update(ka, addr);
732
733 return addr;
734 }
735
736
7375.4 Instruction Codes
738
739 As noted, the data sizes associated with delta instructions are often
740 small. Thus, compression efficiency can be improved by combining the sizes
741 and instruction types in a single encoding, as well by combining certain
742 pairs of adjacent delta instructions. Effective choices of when to perform
743 such combinations depend on many factors including the data being processed
744 and the string matching algorithm in use. For example, if many COPY
745 instructions have the same data sizes, it may be worth to encode these
746 instructions more compactly than others.
747
748 The Vcdiff data format is designed so that a decoder does not need to be
749 aware of the choices made in encoding algorithms. This is achieved with the
750 notion of an "instruction code table" containing 256 entries. Each entry
751 defines either a single delta instruction or a pair of instructions that
752 have been combined. Note that the code table itself only exists in main
753 memory, not in the delta file (unless using an application-defined code
754 table, described in Section 7). The encoded data simply includes the index
755 of each instruction and, since there are only 256 indices, each index
756 can be represented as a single byte.
757
758 Each instruction code entry contains six fields, each of which
759 is a single byte with unsigned value:
760
761 +-----------------------------------------------+
762 | inst1 | size1 | mode1 | inst2 | size2 | mode2 |
763 +-----------------------------------------------+
764
765@@@ could be more compact
766
767 Each triple (inst,size,mode) defines a delta instruction. The meanings
768 of these fields are as follows:
769
770 inst: An "inst" field can have one of the four values: NOOP (0), ADD (1),
771 RUN (2) or COPY (3) to indicate the instruction types. NOOP means
772 that no instruction is specified. In this case, both the corresponding
773 size and mode fields will be zero.
774
775 size: A "size" field is zero or positive. A value zero means that the
776 size associated with the instruction is encoded separately as
777 an integer in the "Instructions and sizes section" (Section 6).
778 A positive value for "size" defines the actual data size.
779 Note that since the size is restricted to a byte, the maximum
780 value for any instruction with size implicitly defined in the code
781 table is 255.
782
783 mode: A "mode" field is significant only when the associated delta
784 instruction is a COPY. It defines the mode used to encode the
785 associated addresses. For other instructions, this is always zero.
786
787
7885.5 The Code Table
789
790 Following the discussions on address modes and instruction code tables,
791 we define a "Code Table" to have the data below:
792
793 s_near: the size of the near cache,
794 s_same: the size of the same cache,
795 i_code: the 256-entry instruction code table.
796
797 Vcdiff itself defines a "default code table" in which s_near is 4
798 and s_same is 3. Thus, there are 9 address modes for a COPY instruction.
799 The first two are VCD_SELF (0) and VCD_HERE (1). Modes 2, 3, 4 and 5
800 are for addresses coded against the near cache. And, modes 6, 7 and 8
801 are for addresses coded against the same cache.
802
803 The default instruction code table is depicted below, in a compact
804 representation that we use only for descriptive purposes. See section 7
805 for the specification of how an instruction code table is represented
806 in the Vcdiff encoding format. In the depiction, a zero value for
807 size indicates that the size is separately coded. The mode of non-COPY
808 instructions is represented as 0 even though they are not used.
809
810
811 TYPE SIZE MODE TYPE SIZE MODE INDEX
812 ---------------------------------------------------------------
813 1. RUN 0 0 NOOP 0 0 0
814 2. ADD 0, [1,17] 0 NOOP 0 0 [1,18]
815 3. COPY 0, [4,18] 0 NOOP 0 0 [19,34]
816 4. COPY 0, [4,18] 1 NOOP 0 0 [35,50]
817 5. COPY 0, [4,18] 2 NOOP 0 0 [51,66]
818 6. COPY 0, [4,18] 3 NOOP 0 0 [67,82]
819 7. COPY 0, [4,18] 4 NOOP 0 0 [83,98]
820 8. COPY 0, [4,18] 5 NOOP 0 0 [99,114]
821 9. COPY 0, [4,18] 6 NOOP 0 0 [115,130]
822 10. COPY 0, [4,18] 7 NOOP 0 0 [131,146]
823 11. COPY 0, [4,18] 8 NOOP 0 0 [147,162]
824 12. ADD [1,4] 0 COPY [4,6] 0 [163,174]
825 13. ADD [1,4] 0 COPY [4,6] 1 [175,186]
826 14. ADD [1,4] 0 COPY [4,6] 2 [187,198]
827 15. ADD [1,4] 0 COPY [4,6] 3 [199,210]
828 16. ADD [1,4] 0 COPY [4,6] 4 [211,222]
829 17. ADD [1,4] 0 COPY [4,6] 5 [223,234]
830 18. ADD [1,4] 0 COPY 4 6 [235,238]
831 19. ADD [1,4] 0 COPY 4 7 [239,242]
832 20. ADD [1,4] 0 COPY 4 8 [243,246]
833 21. COPY 4 [0,8] ADD 1 0 [247,255]
834 ---------------------------------------------------------------
835
836 In the above depiction, each numbered line represents one or more
837 entries in the actual instruction code table (recall that an entry in
838 the instruction code table may represent up to two combined delta
839 instructions.) The last column ("INDEX") shows which index value or
840 range of index values of the entries covered by that line. The notation
841 [i,j] means values from i through j, inclusive. The first 6 columns of
842 a line in the depiction describe the pairs of instructions used for
843 the corresponding index value(s).
844
845 If a line in the depiction includes a column entry using the [i,j]
846 notation, this means that the line is instantiated for each value
847 in the range from i to j, inclusive. The notation "0, [i,j]" means
848 that the line is instantiated for the value 0 and for each value
849 in the range from i to j, inclusive.
850
851 If a line in the depiction includes more than one entry using the [i,j]
852 notation, implying a "nested loop" to convert the line to a range of
853 table entries, the first such [i,j] range specifies the outer loop,
854 and the second specifies the inner loop.
855
856 The below examples should make clear the above description:
857
858 Line 1 shows the single RUN instruction with index 0. As the size field
859 is 0, this RUN instruction always has its actual size encoded separately.
860
861 Line 2 shows the 18 single ADD instructions. The ADD instruction with
862 size field 0 (i.e., the actual size is coded separately) has index 1.
863 ADD instructions with sizes from 1 to 17 use code indices 2 to 18 and
864 their sizes are as given (so they will not be separately encoded.)
865
866 Following the single ADD instructions are the single COPY instructions
867 ordered by their address encoding modes. For example, line 11 shows the
868 COPY instructions with mode 8, i.e., the last of the same cache.
869 In this case, the COPY instruction with size field 0 has index 147.
870 Again, the actual size of this instruction will be coded separately.
871
872 Lines 12 to 21 show the pairs of instructions that are combined together.
873 For example, line 12 depicts the 12 entries in which an ADD instruction
874 is combined with an immediately following COPY instruction. The entries
875 with indices 163, 164, 165 represent the pairs in which the ADD
876 instructions all have size 1 while the COPY instructions has mode
877 0 (VCD_SELF) and sizes 4, 5 and 6 respectively.
878
879 The last line, line 21, shows the eight instruction pairs where the first
880 instruction is a COPY and the second is an ADD. In this case, all COPY
881 instructions have size 4 with mode ranging from 0 to 8 and all the ADD
882 instructions have size 1. Thus, the entry with largest index 255
883 combines a COPY instruction of size 4 and mode 8 with an ADD instruction
884 of size 1.
885
886 The choice of the minimum size 4 for COPY instructions in the default code
887 table was made from experiments that showed that excluding small matches
888 (less then 4 bytes long) improved the compression rates.
889
890
8916. DECODING A TARGET WINDOW
892
893 Section 4.3 discusses that the delta instructions and associated data
894 are encoded in three arrays of bytes:
895
896 Data section for ADDs and RUNs,
897 Instructions and sizes section, and
898 Addresses section for COPYs.
899
900
901 Further, these data sections may have been further compressed by some
902 secondary compressor. Assuming that any such compressed data has been
903 decompressed so that we now have three arrays:
904
905 inst: bytes coding the instructions and sizes.
906 data: unmatched data associated with ADDs and RUNs.
907 addr: bytes coding the addresses of COPYs.
908
909 These arrays are organized as follows:
910
911 inst:
912 a sequence of (index, [size1], [size2]) tuples, where "index"
913 is an index into the instruction code table, and size1 and size2
914 are integers that MAY or MAY NOT be included in the tuple as
915 follows. The entry with the given "index" in the instruction
916 code table potentially defines two delta instructions. If the
917 first delta instruction is not a VCD_NOOP and its size is zero,
918 then size1 MUST be present. Otherwise, size1 MUST be omitted and
919 the size of the instruction (if it is not VCD_NOOP) is as defined
920 in the table. The presence or absence of size2 is defined
921 similarly with respect to the second delta instruction.
922
923 data:
924 a sequence of data values, encoded as bytes.
925
926 addr:
927 a sequence of address values. Addresses are normally encoded as
928 integers as described in Section 2 (i.e., base 128).
929 Since the same cache emits addresses in the range [0,255],
930 however, same cache addresses are always encoded as a
931 single byte.
932
933 To summarize, each tuple in the "inst" array includes an index to some
934 entry in the instruction code table that determines:
935
936 a. Whether one or two instructions were encoded and their types.
937
938 b. If the instructions have their sizes encoded separately, these
939 sizes will follow, in order, in the tuple.
940
941 c. If the instructions have accompanying data, i.e., ADDs or RUNs,
942 their data will be in the array "data".
943
944 d. Similarly, if the instructions are COPYs, the coded addresses are
945 found in the array "addr".
946
947 The decoding procedure simply processes the arrays by reading one code
948 index at a time, looking up the corresponding instruction code entry,
949 then consuming the respective sizes, data and addresses following the
950 directions in this entry. In other words, the decoder maintains an implicit
951 next-element pointer for each array; "consuming" an instruction tuple,
952 data, or address value implies incrementing the associated pointer.
953
954 For example, if during the processing of the target window, the next
955 unconsumed tuple in the inst array has index value 19, then the first
956 instruction is a COPY, whose size is found as the immediately following
957 integer in the inst array. Since the mode of this COPY instruction is
958 VCD_SELF, the corresponding address is found by consuming the next
959 integer in the addr array. The data array is left intact. As the second
960 instruction for code index 19 is a NOOP, this tuple is finished.
961
962
9637. APPLICATION-DEFINED CODE TABLES
964
965 Although the default code table used in Vcdiff is good for general
966 purpose encoders, there are times when other code tables may perform
967 better. For example, to code a file with many identical segments of data,
968 it may be advantageous to have a COPY instruction with the specific size
969 of these data segments so that the instruction can be encoded in a single
970 byte. Such a special code table MUST then be encoded in the delta file
971 so that the decoder can reconstruct it before decoding the data.
972
973 Vcdiff allows an application-defined code table to be specified
974 in a delta file with the following data:
975
976 Size of near cache - byte
977 Size of same cache - byte
978 Compressed code table data
979
980 The "compressed code table data" encodes the delta between the default
981 code table (source) and the new code table (target) in the same manner as
982 described in Section 4.3 for encoding a target window in terms of a
983 source window. This delta is computed using the following steps:
984
985 a. Convert the new instruction code table into a string, "code", of
986 1536 bytes using the below steps in order:
987
988 i. Add in order the 256 bytes representing the types of the first
989 instructions in the instruction pairs.
990 ii. Add in order the 256 bytes representing the types of the second
991 instructions in the instruction pairs.
992 iii. Add in order the 256 bytes representing the sizes of the first
993 instructions in the instruction pairs.
994 iv. Add in order the 256 bytes representing the sizes of the second
995 instructions in the instruction pairs.
996 v. Add in order the 256 bytes representing the modes of the first
997 instructions in the instruction pairs.
998 vi. Add in order the 256 bytes representing the modes of the second
999 instructions in the instruction pairs.
1000
1001 b. Similarly, convert the default instruction code table into
1002 a string "dflt".
1003
1004 c. Treat the string "code" as a target window and "dflt" as the
1005 corresponding source data and apply an encoding algorithm to
1006 compute the delta encoding of "code" in terms of "dflt".
1007 This computation MUST use the default code table for encoding
1008 the delta instructions.
1009
1010 The decoder can then reverse the above steps to decode the compressed
1011 table data using the method of Section 6, employing the default code
1012 table, to generate the new code table. Note that the decoder does not
1013 need to know anything about the details of the encoding algorithm used
1014 in step (c). The decoder is still able to decode the new code table
1015 because the Vcdiff format is independent from the choice of encoding
1016 algorithm, and because the encoder in step (c) uses the known, default
1017 code table.
1018
1019
10208. PERFORMANCE
1021
1022 The encoding format is compact. For compression only, using the LZ-77
1023 string parsing strategy and without any secondary compressors, the typical
1024 compression rate is better than Unix compress and close to gzip. For
1025 differencing, the data format is better than all known methods in
1026 terms of its stated goal, which is primarily decoding speed and
1027 encoding efficiency.
1028
1029 We compare the performance of compress, gzip and Vcdiff using the
1030 archives of three versions of the Gnu C compiler, gcc-2.95.1.tar,
1031 gcc-2.95.2.tar and gcc-2.95.3.tar. The experiments were done on an
1032 SGI-MIPS3, 400MHZ. Gzip was used at its default compression level.
1033 Vcdiff timings were done using the Vcodex/Vcdiff software (Section 13).
1034 As string and window matching typically dominates the computation during
1035 compression, the Vcdiff compression times were directly due to the
1036 algorithms used in the Vcodex/Vcdiff software. However, the decompression
1037 times should be generic and representative of any good implementation
1038 of the Vcdiff data format. Timing was done by running each program
1039 three times and taking the average of the total cpu+system times.
1040
1041 Below are the different Vcdiff runs:
1042
1043 Vcdiff: vcdiff is used as compressor only.
1044
1045 Vcdiff-d: vcdiff is used as a differencer only. That is, it only
1046 compares target data against source data. Since the files
1047 involved are large, they are broken into windows. In this
1048 case, each target window starting at some file offset in
1049 the target file is compared against a source window with
1050 the same file offset (in the source file). The source
1051 window is also slightly larger than the target window
1052 to increase matching opportunities. The -d option also gives
1053 a hint to the string matching algorithm of Vcdiff that
1054 the two files are very similar with long stretches of matches.
1055 The algorithm takes advantage of this to minimize its
1056 processing of source data and save time.
1057
1058 Vcdiff-dc: This is similar to Vcdiff-d but vcdiff can also compare
1059 target data against target data as applicable. Thus, vcdiff
1060 both computes differences and compresses data. The windowing
1061 algorithm is the same as above. However, the above hint is
1062 recinded in this case.
1063
1064 Vcdiff-dcs: This is similar to Vcdiff-dc but the windowing algorithm
1065 uses a content-based heuristic to select source data segments
1066 that are more likely to match with a given target window.
1067 Thus, the source data segment selected for a target window
1068 often will not be aligned with the file offsets of this
1069 target window.
1070
1071
1072 gcc-2.95.1 gcc-2.95.2 compression decompression
1073 raw size 55746560 55797760
1074 compress - 19939390 13.85s 7.09s
1075 gzip - 12973443 42.99s 5.35s
1076 Vcdiff - 15358786 20.04s 4.65s
1077 Vcdiff-d - 100971 10.93s 1.92s
1078 Vcdiff-dc - 97246 20.03s 1.84s
1079 Vcdiff-dcs - 256445 44.81s 1.84s
1080
1081 TABLE 1. Compressing gcc-2.95.2.tar given gcc-2.95.1
1082
1083
1084 TABLE 1 shows the raw sizes of gcc-2.95.1.tar and gcc-2.95.2.tar and the
1085 sizes of the compressed results. As a pure compressor, the compression
1086 rate for Vcdiff is worse than gzip and better than compress. The last
1087 three rows shows that when two file versions are very similar, differencing
1088 can have dramatically good compression rates. Vcdiff-d and Vcdiff-dc use
1089 the same simple window selection method but Vcdiff-dc also does compression
1090 so its output is slightly smaller. Vcdiff-dcs uses a heuristic based on
1091 data content to search for source data that likely will match a given target
1092 window. Although it does a good job, the heuristic did not always find the
1093 best matches which are given by the simple algorithm of Vcdiff-d. As a
1094 result, the output size is slightly larger. Note also that there is a large
1095 cost in computing matching windows this way. Finally, the compression times
1096 of Vcdiff-d is nearly half of that of Vcdiff-dc. It is tempting to conclude
1097 that the compression feature causes the additional time in Vcdiff-dc
1098 relative to Vcdiff-d. However, this is not the case. The hint given to
1099 the Vcdiff string matching algorithm that the two files are likely to
1100 have very long stretches of matches helps the algorithm to minimize
1101 processing of the "source data", thus saving half the time. However, as we
1102 shall see below when this hint is wrong, the result is even longer time.
1103
1104
1105 gcc-2.95.2 gcc-2.95.3 compression decompression
1106 raw size 55797760 55787520
1107 compress - 19939453 13.54s 7.00s
1108 gzip - 12998097 42.63s 5.62s
1109 Vcdiff - 15371737 20.09s 4.74s
1110 Vcdiff-d - 26383849 71.41s 6.41s
1111 Vcdiff-dc - 14461203 42.48s 4.82s
1112 Vcdiff-dcs - 1248543 61.18s 1.99s
1113
1114 TABLE 2. Compressing gcc-2.95.3.tar given gcc-2.95.2
1115
1116
1117 TABLE 2 shows the raw sizes of gcc-2.95.2.tar and gcc-2.95.3.tar and
1118 the sizes of the compressed results. In this case, the tar file of
1119 gcc-2.95.3 is rearranged in a way that makes the straightforward method
1120 of matching file offsets for source and target windows fail. As a
1121 result, Vcdiff-d performs rather dismally both in time and output size.
1122 The large time for Vcdiff-d is directly due to fact that the string
1123 matching algorithm has to work much harder to find matches when the hint
1124 that two files have long matching stretches fails to hold. On the other
1125 hand, Vcdiff-dc does both differencing and compression resulting in good
1126 output size. Finally, the window searching heuristic used in Vcdiff-dcs is
1127 effective in finding the right matching source windows for target windows
1128 resulting a small output size. This shows why the data format needs to
1129 have a way to specify matching windows to gain performance. Finally,
1130 we note that the decoding times are always good regardless of how
1131 the string matching or window searching algorithms perform.
1132
1133
11349. FURTHER ISSUES
1135
1136 This document does not address a few issues:
1137
1138 Secondary compressors:
1139 As discussed in Section 4.3, certain sections in the delta encoding
1140 of a window may be further compressed by a secondary compressor.
1141 In our experience, the basic Vcdiff format is adequate for most
1142 purposes so that secondary compressors are seldom needed. In
1143 particular, for normal use of data differencing where the files to
1144 be compared have long stretches of matches, much of the gain in
1145 compression rate is already achieved by normal string matching.
1146 Thus, the use of secondary compressors is seldom needed in this case.
1147 However, for applications beyond differencing of such nearly identical
1148 files, secondary compressors may be needed to achieve maximal
1149 compressed results.
1150
1151 Therefore, we recommend to leave the Vcdiff data format defined
1152 as in this document so that the use of secondary compressors
1153 can be implemented when they become needed in the future.
1154 The formats of the compressed data via such compressors or any
1155 compressors that may be defined in the future are left open to
1156 their implementations. These could include Huffman encoding,
1157 arithmetic encoding, and splay tree encoding [8,9].
1158
1159 Large file system vs. small file system:
1160 As discussed in Section 4, a target window in a large file may be
1161 compared against some source window in another file or in the same
1162 file (from some earlier part). In that case, the file offset of the
1163 source window is specified as a variable-sized integer in the delta
1164 encoding. There is a possibility that the encoding was computed on
1165 a system supporting much larger files than in a system where
1166 the data may be decoded (e.g., 64-bit file systems vs. 32-bit file
1167 systems). In that case, some target data may not be recoverable.
1168 This problem could afflict any compression format, and ought
1169 to be resolved with a generic negotiation mechanism in the
1170 appropriate protocol(s).
1171
1172
117310. SUMMARY
1174
1175 We have described Vcdiff, a general and portable encoding format for
1176 compression and differencing. The format is good in that it allows
1177 implementing a decoder without knowledge of the encoders. Further,
1178 ignoring the use of secondary compressors not defined within the format,
1179 the decoding algorithms runs in linear time and requires working space
1180 proportional to window sizes.
1181
1182
1183
118411. ACKNOWLEDGEMENTS
1185
1186 Thanks are due to Balachander Krishnamurthy, Jeff Mogul and Arthur Van Hoff
1187 who provided much encouragement to publicize Vcdiff. In particular, Jeff
1188 helped clarifying the description of the data format presented here.
1189
1190
1191
119212. SECURITY CONSIDERATIONS
1193
1194 Vcdiff only provides a format to encode compressed and differenced data.
1195 It does not address any issues concerning how such data are, in fact,
1196 stored in a given file system or the run-time memory of a computer system.
1197 Therefore, we do not anticipate any security issues with respect to Vcdiff.
1198
1199
1200
120113. SOURCE CODE AVAILABILITY
1202
1203 Vcdiff is implemented as a data transforming method in Phong Vo's
1204 Vcodex library. AT&T Corp. has made the source code for Vcodex available
1205 for anyone to use to transmit data via HTTP/1.1 Delta Encoding [10,11].
1206 The source code and according license is accessible at the below URL:
1207
1208 http://www.research.att.com/sw/tools
1209
1210
121114. INTELLECTUAL PROPERTY RIGHTS
1212
1213 The IETF has been notified of intellectual property rights claimed in
1214 regard to some or all of the specification contained in this
1215 document. For more information consult the online list of claimed
1216 rights, at <http://www.ietf.org/ipr.html>.
1217
1218 The IETF takes no position regarding the validity or scope of any
1219 intellectual property or other rights that might be claimed to
1220 pertain to the implementation or use of the technology described in
1221 this document or the extent to which any license under such rights
1222 might or might not be available; neither does it represent that it
1223 has made any effort to identify any such rights. Information on the
1224 IETF's procedures with respect to rights in standards-track and
1225 standards-related documentation can be found in BCP-11. Copies of
1226 claims of rights made available for publication and any assurances of
1227 licenses to be made available, or the result of an attempt made to
1228 obtain a general license or permission for the use of such
1229 proprietary rights by implementors or users of this specification can
1230 be obtained from the IETF Secretariat.
1231
1232
1233
123415. IANA CONSIDERATIONS
1235
1236 The Internet Assigned Numbers Authority (IANA) administers the number
1237 space for Secondary Compressor ID values. Values and their meaning
1238 must be documented in an RFC or other peer-reviewed, permanent, and
1239 readily available reference, in sufficient detail so that
1240 interoperability between independent implementations is possible.
1241 Subject to these constraints, name assignments are First Come, First
1242 Served - see RFC2434 [13]. Legal ID values are in the range 1..255.
1243
1244 This document does not define any values in this number space.
1245
1246
124716. REFERENCES
1248
1249 [1] D.G. Korn and K.P. Vo, Vdelta: Differencing and Compression,
1250 Practical Reusable Unix Software, Editor B. Krishnamurthy,
1251 John Wiley & Sons, Inc., 1995.
1252
1253 [2] J. Ziv and A. Lempel, A Universal Algorithm for Sequential Data
1254 Compression, IEEE Trans. on Information Theory, 23(3):337-343, 1977.
1255
1256 [3] W. Tichy, The String-to-String Correction Problem with Block Moves,
1257 ACM Transactions on Computer Systems, 2(4):309-321, November 1984.
1258
1259 [4] E.M. McCreight, A Space-Economical Suffix Tree Construction
1260 Algorithm, Journal of the ACM, 23:262-272, 1976.
1261
1262 [5] J.J. Hunt, K.P. Vo, W. Tichy, An Empirical Study of Delta Algorithms,
1263 IEEE Software Configuration and Maintenance Workshop, 1996.
1264
1265 [6] J.J. Hunt, K.P. Vo, W. Tichy, Delta Algorithms: An Empirical Analysis,
1266 ACM Trans. on Software Engineering and Methodology, 7:192-214, 1998.
1267
1268 [7] D.G. Korn, K.P. Vo, Sfio: A buffered I/O Library,
1269 Proc. of the Summer '91 Usenix Conference, 1991.
1270
1271 [8] D. W. Jones, Application of Splay Trees to Data Compression,
1272 CACM, 31(8):996:1007.
1273
1274 [9] M. Nelson, J. Gailly, The Data Compression Book, ISBN 1-55851-434-1,
1275 M&T Books, New York, NY, 1995.
1276
1277 [10] J.C. Mogul, F. Douglis, A. Feldmann, and B. Krishnamurthy,
1278 Potential benefits of delta encoding and data compression for HTTP,
1279 SIGCOMM '97, Cannes, France, 1997.
1280
1281 [11] J.C. Mogul, B. Krishnamurthy, F. Douglis, A. Feldmann,
1282 Y. Goland, and A. Van Hoff, Delta Encoding in HTTP,
1283 IETF, draft-mogul-http-delta-10, 2001.
1284
1285 [12] S. Bradner, Key words for use in RFCs to Indicate Requirement Levels,
1286 RFC 2119, March 1997.
1287
1288 [13] T. Narten, H. Alvestrand, Guidelines for Writing an IANA
1289 Considerations Section in RFCs, RFC2434, October 1998.
1290
1291
1292
129317. AUTHOR'S ADDRESS
1294
1295 Kiem-Phong Vo (main contact)
1296 AT&T Labs, Room D223
1297 180 Park Avenue
1298 Florham Park, NJ 07932
1299 Email: kpv@research.att.com
1300 Phone: 1 973 360 8630
1301
1302 David G. Korn
1303 AT&T Labs, Room D237
1304 180 Park Avenue
1305 Florham Park, NJ 07932
1306 Email: dgk@research.att.com
1307 Phone: 1 973 360 8602
1308
1309 Jeffrey C. Mogul
1310 Western Research Laboratory
1311 Compaq Computer Corporation
1312 250 University Avenue
1313 Palo Alto, California, 94305, U.S.A.
1314 Email: JeffMogul@acm.org
1315 Phone: 1 650 617 3304 (email preferred)
1316
1317 Joshua P. MacDonald
1318 Computer Science Division
1319 University of California, Berkeley
1320 345 Soda Hall
1321 Berkeley, CA 94720
1322 Email: jmacd@cs.berkeley.edu
diff --git a/xdelta3/junk.py b/xdelta3/junk.py
new file mode 100755
index 0000000..384951e
--- /dev/null
+++ b/xdelta3/junk.py
@@ -0,0 +1,11 @@
1#!/usr/bin/python
2
3bytes = ''
4
5for x in range(0, 250):
6 bytes = bytes + ('%c%c%c%c=' % (x, x+1, x+2, x+3))
7
8for x in range(0, 250):
9 bytes = bytes + ('%c' % x)
10
11print bytes
diff --git a/xdelta3/linkxd3lib.c b/xdelta3/linkxd3lib.c
new file mode 100755
index 0000000..d605fa6
--- /dev/null
+++ b/xdelta3/linkxd3lib.c
@@ -0,0 +1,47 @@
1#include "xdelta3.h"
2
3extern int VVV;
4
5int VVV;
6
7void use(int r)
8{
9 VVV = r;
10}
11
12int main() {
13 xd3_config config;
14 xd3_stream stream;
15 xd3_source source;
16
17 xd3_init_config (& config, 0);
18 use (xd3_config_stream (&stream, &config));
19 use (xd3_close_stream (&stream));
20 xd3_abort_stream (&stream);
21 xd3_free_stream (&stream);
22
23 xd3_avail_input (& stream, NULL, 0);
24 xd3_consume_output (& stream);
25
26 use (xd3_bytes_on_srcblk (& source, 0));
27 use (xd3_set_source (& stream, & source));
28 xd3_set_flags (& stream, 0);
29
30 use (xd3_decode_completely (& stream, NULL, 0, NULL, NULL, 0));
31 use (xd3_decode_input (&stream));
32 use (xd3_decoder_needs_source (& stream));
33 use (xd3_get_appheader (& stream, NULL, NULL));
34
35 use ((int) xd3_errstring (& stream));
36 use ((int) xd3_strerror (0));
37
38#if XD3_ENCODER
39 use (xd3_encode_input (&stream));
40 use (xd3_encode_completely (& stream, NULL, 0, NULL, NULL, 0));
41 use (xd3_set_appheader (& stream));
42 use (xd3_encoder_used_source (& stream));
43 use (xd3_encoder_srcbase (& stream));
44 use (xd3_encoder_srclen (& stream));
45#endif
46 return 0;
47}
diff --git a/xdelta3/rcs_junk.cc b/xdelta3/rcs_junk.cc
new file mode 100755
index 0000000..ac49644
--- /dev/null
+++ b/xdelta3/rcs_junk.cc
@@ -0,0 +1,1861 @@
1typedef struct _RcsWalker RcsWalker;
2typedef struct _RcsFile RcsFile;
3typedef struct _RcsVersion RcsVersion;
4typedef struct _RcsStats RcsStats;
5typedef struct _IntStat IntStat;
6typedef struct _DblStat DblStat;
7typedef struct _BinCounter BinCounter;
8typedef struct _ConfigOption ConfigOption;
9
10struct _RcsWalker {
11 void* (* initialize) (void);
12 int (* finalize) (RcsStats* stats, void* data);
13 int (* onefile) (RcsFile* rcs, RcsStats* stats, void* data);
14 int (* dateorder) (RcsFile* rcs, RcsVersion* v, void* data);
15 int (* delta_orig) (RcsFile* rcs, RcsVersion* from, RcsVersion *to, void* data);
16 int (* delta_date) (RcsFile* rcs, RcsVersion* from, RcsVersion *to, void* data);
17 int min_versions;
18 int max_versions;
19 gboolean write_files;
20};
21
22struct _RcsVersion {
23 RcsFile *rcs;
24 time_t date;
25 int dateseq;
26 int chain_length;
27 char *vname;
28 off_t size;
29 int cc;
30 guint8* segment;
31 char *filename;
32 RcsVersion *parent;
33 GSList *children;
34 guint on_trunk : 1;
35};
36
37struct _RcsFile {
38 char *filename;
39 char *copyname;
40 char *headname;
41
42 int version_count;
43 int forward_count;
44 int reverse_count;
45 int branch_count;
46
47 RcsVersion *versions;
48 RcsVersion **versions_date;
49
50 RcsVersion *head_version;
51 RcsVersion *root_version;
52
53 off_t total_size;
54
55 guint atflag : 1;
56};
57
58struct _RcsStats {
59 BinCounter *avg_version_size;
60 IntStat* version_stat;
61 IntStat* forward_stat;
62 IntStat* reverse_stat;
63 IntStat* branch_stat;
64 IntStat* unencoded_stat;
65 IntStat* literal_stat;
66};
67
68struct _IntStat {
69 const char* name;
70 int count;
71 long long sum;
72 long long min;
73 long long max;
74
75 GArray *values;
76};
77
78struct _DblStat {
79 const char* name;
80 int count;
81 double sum;
82 double min;
83 double max;
84
85 GArray *values;
86};
87
88struct _BinCounter {
89 const char *name;
90 GPtrArray *bins;
91};
92
93enum _ConfigArgument {
94 CO_Required,
95 CO_Optional,
96 CO_None
97};
98
99typedef enum _ConfigArgument ConfigArgument;
100
101enum _ConfigOptionType {
102 CD_Bool,
103 CD_Int32,
104 CD_Double,
105 CD_String
106};
107
108typedef enum _ConfigOptionType ConfigOptionType;
109
110enum _ConfigStyle {
111 CS_Ignore,
112 CS_UseAsFile,
113 CS_Use
114};
115
116typedef enum _ConfigStyle ConfigStyle;
117
118struct _ConfigOption {
119 const char *name;
120 const char *abbrev;
121 ConfigStyle style;
122 ConfigArgument arg;
123 ConfigOptionType type;
124 void *value;
125 gboolean found;
126};
127
128/* RCS inspection stuff
129 */
130
131void rcswalk_init (void);
132int rcswalk (RcsWalker *walker, const char* copy_base);
133void rcswalk_report (RcsStats* stats);
134
135IntStat* stat_int_new (const char* name);
136void stat_int_add_item (IntStat* stat, long long v);
137void stat_int_report (IntStat* stat);
138
139DblStat* stat_dbl_new (const char* name);
140void stat_dbl_add_item (DblStat* stat, double v);
141void stat_dbl_report (DblStat* stat);
142
143BinCounter* stat_bincount_new (const char* name);
144void stat_bincount_add_item (BinCounter* bc, int bin, double val);
145void stat_bincount_report (BinCounter* bc);
146
147/* Experiment configuration stuff
148 */
149
150void config_register (ConfigOption *opts, int nopts);
151int config_parse (const char* config_file);
152int config_done (void);
153void config_help (void);
154void config_set_string (const char* var, const char* val);
155int config_clear_dir (const char* dir);
156int config_create_dir (const char* dir);
157FILE* config_output (const char* fmt, ...);
158
159#ifdef __cplusplus
160}
161#endif
162
163#endif
164#include "rcswalk.h"
165#include "edsio.h"
166#include <stdio.h>
167#include <stdlib.h>
168#include <string.h>
169#include <sys/types.h>
170#include <sys/stat.h>
171#include <sys/wait.h>
172#include <fcntl.h>
173#include <errno.h>
174#include <dirent.h>
175#include <unistd.h>
176#include <math.h>
177
178#undef BUFSIZE
179#define BUFSIZE (1<<14)
180
181char *tmp_file_1;
182gboolean tmp_file_1_free = TRUE;
183char *tmp_file_2;
184gboolean tmp_file_2_free = TRUE;
185
186int skip_count;
187int small_count;
188int large_count;
189int process_count;
190
191extern time_t str2time (char const *, time_t, long);
192
193static guint8 readbuf[BUFSIZE];
194
195static const char* rcswalk_input_dir = NULL;
196static const char* config_output_base = NULL;
197static const char* config_output_dir = NULL;
198static const char* rcswalk_experiment = NULL;
199
200static ConfigOption rcswalk_options[] = {
201 { "rcswalk_experiment", "ex", CS_Use, CO_Required, CD_String, & rcswalk_experiment },
202 { "rcs_input_dir", "id", CS_UseAsFile, CO_Required, CD_String, & rcswalk_input_dir }
203};
204
205static ConfigOption config_options[] = {
206 { "config_output_base", "ob", CS_Ignore, CO_Required, CD_String, & config_output_base }
207};
208
209
210void
211rcswalk_free_segment (RcsVersion *v)
212{
213 if (v->segment)
214 g_free (v->segment);
215
216 if (v->filename == tmp_file_1)
217 tmp_file_1_free = TRUE;
218 else if (v->filename == tmp_file_2)
219 tmp_file_2_free = TRUE;
220 else if (v->filename)
221 g_free (v->filename);
222
223 v->segment = NULL;
224 v->filename = NULL;
225}
226
227int
228rcswalk_checkout (RcsFile* rcs, RcsWalker* walker, RcsVersion *v)
229{
230 FILE* out;
231 char cmdbuf[1024];
232 int nread;
233 int alloc = BUFSIZE;
234 int pos = 0;
235
236 sprintf (cmdbuf, "co -ko -p%s %s 2>/dev/null\n", v->vname, rcs->filename);
237
238 g_assert (! v->segment);
239
240 v->segment = g_malloc (alloc);
241
242 if (! (out = popen (cmdbuf, "r")))
243 {
244 g_warning ("popen failed: %s: %s", cmdbuf, g_strerror (errno));
245 return errno;
246 }
247
248 for (;;)
249 {
250 nread = fread (readbuf, 1, BUFSIZE, out);
251
252 if (nread == 0)
253 break;
254
255 if (nread < 0)
256 {
257 g_warning ("fread failed: %s", g_strerror (errno));
258 return errno;
259 }
260
261 if (pos + nread > alloc)
262 {
263 alloc *= 2;
264 v->segment = g_realloc (v->segment, alloc);
265 }
266
267 memcpy (v->segment + pos, readbuf, nread);
268
269 pos += nread;
270 }
271
272 if (pclose (out) < 0)
273 {
274 g_warning ("pclose failed");
275 return errno;
276 }
277
278 v->size = pos;
279
280 if (walker->write_files)
281 {
282 char* file = NULL;
283
284 if (! file && tmp_file_1_free)
285 {
286 file = tmp_file_1;
287 tmp_file_1_free = FALSE;
288 }
289
290 if (! file && tmp_file_2_free)
291 {
292 file = tmp_file_2;
293 tmp_file_2_free = FALSE;
294 }
295
296 g_assert (file);
297
298 v->filename = file;
299
300 if (! (out = fopen (file, "w")))
301 {
302 g_warning ("fopen failed: %s\n", file);
303 return errno;
304 }
305
306 if (fwrite (v->segment, v->size, 1, out) != 1)
307 {
308 g_warning ("fwrite failed: %s\n", file);
309 return errno;
310 }
311
312 if (fclose (out) < 0)
313 {
314 g_warning ("fclose failed: %s\n", file);
315 return errno;
316 }
317 }
318
319 return 0;
320}
321
322int
323rcswalk_delta_date (RcsFile* rcs, RcsWalker* walker, void* data)
324{
325 int i;
326 int ret;
327 RcsVersion *vf = NULL;
328 RcsVersion *vt = NULL;
329
330 for (i = 0; i < (rcs->version_count-1); i += 1)
331 {
332 vf = rcs->versions_date[i+1];
333 vt = rcs->versions_date[i];
334
335 if (! vt->segment && (ret = rcswalk_checkout (rcs, walker, vt))) {
336 return ret;
337 }
338
339 if ((ret = rcswalk_checkout (rcs, walker, vf))) {
340 return ret;
341 }
342
343 if ((ret = walker->delta_date (rcs, vf, vt, data))) {
344 return ret;
345 }
346
347 rcswalk_free_segment (vt);
348 }
349
350 if (vf) rcswalk_free_segment (vf);
351 if (vt) rcswalk_free_segment (vt);
352
353 return 0;
354}
355
356int
357rcswalk_delta_orig (RcsFile* rcs, RcsWalker* walker, RcsVersion* version, int *count, void* data)
358{
359 int ret;
360 GSList *c;
361 RcsVersion *child;
362
363 for (c = version->children; c; c = c->next)
364 {
365 gboolean reverse;
366
367 child = c->data;
368
369 if (! version->segment)
370 {
371 if ((ret = rcswalk_checkout (rcs, walker, version))) {
372 return ret;
373 }
374 }
375
376 if ((ret = rcswalk_checkout (rcs, walker, child))) {
377 return ret;
378 }
379
380 reverse = version->on_trunk && child->on_trunk;
381
382 (* count) += 1;
383
384 if ((ret = walker->delta_orig (rcs, reverse ? child : version, reverse ? version : child, data))) {
385 return ret;
386 }
387
388 rcswalk_free_segment (version);
389
390 if ((ret = rcswalk_delta_orig (rcs, walker, child, count, data))) {
391 return ret;
392 }
393 }
394
395 rcswalk_free_segment (version);
396 return 0;
397}
398
399int
400rcswalk_dateorder (RcsFile* rcs, RcsWalker *walker, RcsStats *stats, void* data)
401{
402 int i, ret;
403
404 for (i = 0; i < rcs->version_count; i += 1)
405 {
406 RcsVersion *v = rcs->versions_date[i];
407
408 if ((ret = rcswalk_checkout (rcs, walker, v))) {
409 return ret;
410 }
411
412 stat_bincount_add_item (stats->avg_version_size, i, v->size);
413
414 if ((ret = walker->dateorder (rcs, v, data))) {
415 return ret;
416 }
417
418 rcswalk_free_segment (v);
419 }
420
421 return 0;
422}
423
424gboolean
425rcswalk_match (char** line_p, char* str)
426{
427 int len = strlen (str);
428
429 if (strncmp (*line_p, str, len) == 0)
430 {
431 (*line_p) += len;
432 return TRUE;
433 }
434
435 return FALSE;
436}
437
438void
439rcswalk_find_parent (RcsFile *rcs, GHashTable* hash, RcsVersion *v)
440{
441 char *lastdot;
442 char mbuf[1024];
443 int lastn;
444 RcsVersion *p;
445
446 strcpy (mbuf, v->vname);
447
448 if (! (lastdot = strchr (mbuf, '.')))
449 abort ();
450
451 if (! (lastdot = strchr (lastdot+1, '.')))
452 v->on_trunk = TRUE;
453
454 lastdot = strrchr (mbuf, '.');
455 lastn = atoi (lastdot + 1);
456
457 do
458 {
459 if (lastn == 1)
460 {
461 (*lastdot) = 0;
462
463 if (strcmp (mbuf, "1") == 0)
464 {
465 /* Assuming the first version is always "1.1".
466 */
467 rcs->root_version = v;
468 return;
469 }
470 else if (! (lastdot = strrchr (mbuf, '.')))
471 {
472 int i = 1;
473 int br = atoi (mbuf) - 1;
474 RcsVersion *p2 = NULL;
475
476 /* Now we have something like "2.1" and need to
477 * search for the highest "1.x" version.
478 */
479
480 do
481 {
482 sprintf (mbuf, "%d.%d", br, i++);
483 p = p2;
484 }
485 while ((p2 = g_hash_table_lookup (hash, mbuf)));
486
487 if (p == NULL)
488 {
489 rcs->root_version = v;
490 return;
491 }
492
493 break;
494 }
495 else
496 {
497 /* 1.2.3.1 => 1.2 */
498 (*lastdot) = 0;
499 lastdot = strrchr (mbuf, '.');
500 lastn = atoi (lastdot + 1);
501 }
502 }
503 else
504 {
505 lastn -= 1;
506 sprintf (lastdot, ".%d", lastn);
507 }
508 }
509 while (! (p = g_hash_table_lookup (hash, mbuf)));
510
511 g_assert (p);
512
513 v->parent = p;
514
515 p->children = g_slist_prepend (p->children, v);
516}
517
518int
519rcswalk_traverse_graph (RcsFile* rcs, RcsVersion* version, RcsVersion *parent)
520{
521 GSList *c;
522 int distance = -1;
523
524 version->cc = g_slist_length (version->children);
525
526 if (version->cc > 1)
527 rcs->branch_count += (version->cc - 1);
528
529 if (parent)
530 {
531 /* Insure that there is proper date ordering. */
532 if (version->date <= parent->date)
533 version->date = parent->date + 1;
534
535 if (parent->on_trunk && version->on_trunk)
536 rcs->reverse_count += 1;
537 else
538 rcs->forward_count += 1;
539 }
540
541 for (c = version->children; c; c = c->next)
542 {
543 int c_dist = rcswalk_traverse_graph (rcs, c->data, version);
544
545 distance = MAX (distance, c_dist);
546 }
547
548 if (version == rcs->head_version)
549 distance = 0;
550
551 if (distance >= 0)
552 {
553 version->chain_length = distance;
554
555 return distance + 1;
556 }
557
558 return -1;
559}
560
561void
562rcswalk_compute_chain_length (RcsFile* rcs, RcsVersion* version, RcsVersion *parent)
563{
564 GSList *c;
565
566 if (! parent)
567 {
568 g_assert (version->chain_length >= 0);
569 }
570 else if (version->chain_length < 0)
571 {
572 version->chain_length = parent->chain_length + 1;
573 }
574
575 for (c = version->children; c; c = c->next)
576 {
577 rcswalk_compute_chain_length (rcs, c->data, version);
578 }
579}
580
581int
582rcswalk_date_compare (const void* a, const void* b)
583{
584 RcsVersion **ra = (void*) a;
585 RcsVersion **rb = (void*) b;
586
587 return (*ra)->date - (*rb)->date;
588}
589
590int
591rcswalk_build_graph (RcsFile* rcs)
592{
593 GHashTable* hash = g_hash_table_new (g_str_hash, g_str_equal);
594 int i;
595
596 for (i = 0; i < rcs->version_count; i += 1)
597 g_hash_table_insert (hash, rcs->versions[i].vname, rcs->versions + i);
598
599 for (i = 0; i < rcs->version_count; i += 1)
600 {
601 RcsVersion *v = rcs->versions + i;
602
603 v->chain_length = -1;
604 v->rcs = rcs;
605
606 rcswalk_find_parent (rcs, hash, v);
607 }
608
609 rcs->head_version = g_hash_table_lookup (hash, rcs->headname);
610
611 rcswalk_traverse_graph (rcs, rcs->root_version, NULL);
612
613 rcswalk_compute_chain_length (rcs, rcs->root_version, NULL);
614
615 for (i = 0; i < rcs->version_count; i += 1)
616 rcs->versions_date[i] = rcs->versions + i;
617
618 qsort (rcs->versions_date, rcs->version_count, sizeof (RcsVersion*), & rcswalk_date_compare);
619
620 for (i = 0; i < rcs->version_count; i += 1)
621 {
622 RcsVersion *v = rcs->versions_date[i];
623
624 v->dateseq = i;
625 }
626
627 g_hash_table_destroy (hash);
628
629 return 0;
630}
631
632#define HEAD_STATE 0
633#define BAR_STATE 1
634#define REV_STATE 2
635#define DATE_STATE 3
636
637int
638rcswalk_load (RcsFile *rcs, gboolean *skip)
639{
640 FILE* rlog;
641 char cmdbuf[1024];
642 char oneline[1024], *oneline_p;
643 char rbuf[1024];
644 int version_i = 0, ret;
645 int read_state = HEAD_STATE;
646
647 sprintf (cmdbuf, "rlog %s", rcs->filename);
648
649 if (! (rlog = popen (cmdbuf, "r")))
650 {
651 g_warning ("popen failed: %s", cmdbuf);
652 return errno;
653 }
654
655 rcs->headname = NULL;
656
657 while (fgets (oneline, 1024, rlog))
658 {
659 oneline_p = oneline;
660
661 if (read_state == HEAD_STATE && rcswalk_match (& oneline_p, "total revisions: "))
662 {
663 if (sscanf (oneline_p, "%d", & rcs->version_count) != 1)
664 goto badscan;
665
666 rcs->versions = g_new0 (RcsVersion, rcs->version_count);
667 rcs->versions_date = g_new (RcsVersion*, rcs->version_count);
668 read_state = BAR_STATE;
669 }
670 else if (read_state == HEAD_STATE && rcswalk_match (& oneline_p, "head: "))
671 {
672 if (sscanf (oneline_p, "%s", rbuf) != 1)
673 goto badscan;
674
675 rcs->headname = g_strdup (rbuf);
676 read_state = HEAD_STATE; /* no change */
677 }
678 else if (read_state == BAR_STATE && rcswalk_match (& oneline_p, "----------------------------"))
679 {
680 read_state = REV_STATE;
681 }
682 else if (read_state == REV_STATE && rcswalk_match (& oneline_p, "revision "))
683 {
684 if (version_i >= rcs->version_count)
685 {
686 /* jkh likes to insert the rlog of one RCS file into the log
687 * message of another, and this can confuse things. Why, oh why,
688 * doesn't rlog have an option to not print the log?
689 */
690 fprintf (stderr, "rcswalk: too many versions: skipping file %s\n", rcs->filename);
691 *skip = TRUE;
692 skip_count += 1;
693 pclose (rlog);
694 return 0;
695 }
696
697 if (sscanf (oneline_p, "%s", rbuf) != 1)
698 goto badscan;
699
700 rcs->versions[version_i].vname = g_strdup (rbuf);
701 read_state = DATE_STATE;
702
703 g_assert (rcs->versions[version_i].vname);
704 }
705 else if (read_state == DATE_STATE && rcswalk_match (& oneline_p, "date: "))
706 {
707 char* semi = strchr (oneline_p, ';');
708
709 if (! semi)
710 goto badscan;
711
712 strncpy (rbuf, oneline_p, semi - oneline_p);
713
714 rbuf[semi - oneline_p] = 0;
715
716 rcs->versions[version_i].date = str2time (rbuf, 0, 0);
717
718 version_i += 1;
719 read_state = BAR_STATE;
720 }
721 }
722
723 if (! rcs->headname)
724 {
725 fprintf (stderr, "rcswalk: no head version: skipping file %s\n", rcs->filename);
726 *skip = TRUE;
727 skip_count += 1;
728 pclose (rlog);
729 return 0;
730 }
731
732 if (pclose (rlog) < 0)
733 {
734 g_warning ("pclose failed: %s", cmdbuf);
735 return errno;
736 }
737
738 if ((ret = rcswalk_build_graph (rcs))) {
739 return ret;
740 }
741
742 return 0;
743
744 badscan:
745
746 pclose (rlog);
747
748 g_warning ("rlog syntax error");
749 return -1;
750}
751
752void
753rcswalk_free (RcsFile* rcs)
754{
755 int i;
756
757 for (i = 0; i < rcs->version_count; i += 1)
758 {
759 g_free (rcs->versions[i].vname);
760 g_slist_free (rcs->versions[i].children);
761 }
762
763 g_free (rcs->filename);
764 g_free (rcs->headname);
765 g_free (rcs->versions);
766 g_free (rcs->versions_date);
767 g_free (rcs);
768}
769
770int
771rcswalk_one (char* rcsfile, char* copyfile, RcsWalker* walker, RcsStats* stats, void* data)
772{
773 RcsFile* rcs;
774 int i, ret;
775 long long maxsize = 0;
776 gboolean skip = FALSE;
777
778 rcs = g_new0 (RcsFile, 1);
779
780 rcs->filename = g_strdup (rcsfile);
781 rcs->copyname = copyfile;
782
783 if ((ret = rcswalk_load (rcs, & skip))) {
784 return ret;
785 }
786
787 if (walker->min_versions > rcs->version_count)
788 {
789 small_count += 1;
790 skip = TRUE;
791 }
792
793 if (walker->max_versions < rcs->version_count)
794 {
795 large_count += 1;
796 skip = TRUE;
797 }
798
799 if (! skip)
800 {
801 process_count += 1;
802
803 if (walker->dateorder && (ret = rcswalk_dateorder (rcs, walker, stats, data))) {
804 return ret;
805 }
806
807 if (walker->delta_orig)
808 {
809 int count = 0;
810
811 if ((ret = rcswalk_delta_orig (rcs, walker, rcs->root_version, & count, data))) {
812 return ret;
813 }
814
815 g_assert (count == (rcs->version_count - 1));
816 }
817
818 if (walker->delta_date && (ret = rcswalk_delta_date (rcs, walker, data))) {
819 return ret;
820 }
821
822 for (i = 0; i < rcs->version_count; i += 1)
823 {
824 rcs->total_size += rcs->versions[i].size;
825 maxsize = MAX (rcs->versions[i].size, maxsize);
826 }
827
828 stat_int_add_item (stats->version_stat, rcs->version_count);
829 stat_int_add_item (stats->forward_stat, rcs->forward_count);
830 stat_int_add_item (stats->reverse_stat, rcs->reverse_count);
831 stat_int_add_item (stats->branch_stat, rcs->branch_count);
832 stat_int_add_item (stats->unencoded_stat, rcs->total_size);
833 stat_int_add_item (stats->literal_stat, maxsize);
834
835 if (walker->onefile && (ret = walker->onefile (rcs, stats, data))) {
836 return ret;
837 }
838 }
839
840 rcswalk_free (rcs);
841
842 return 0;
843}
844
845int
846rcswalk_dir (const char* dir, RcsWalker* walker, RcsStats* stats, void* data, const char* copy_dir)
847{
848 int ret;
849 DIR* thisdir;
850 struct dirent* ent;
851
852 if (copy_dir && (ret = config_create_dir (copy_dir))) {
853 return ret;
854 }
855
856 if (! (thisdir = opendir (dir)))
857 {
858 g_warning ("opendir failed: %s", dir);
859 return errno;
860 }
861
862 while ((ent = readdir (thisdir)))
863 {
864 char* name = ent->d_name;
865 int len;
866 struct stat buf;
867 char* fullname;
868 char* copyname = NULL;
869
870 if (strcmp (name, ".") == 0)
871 continue;
872
873 if (strcmp (name, "..") == 0)
874 continue;
875
876 len = strlen (name);
877
878 fullname = g_strdup_printf ("%s/%s", dir, name);
879
880 if (copy_dir)
881 copyname = g_strdup_printf ("%s/%s", copy_dir, name);
882
883 if (len > 2 && strcmp (name + len - 2, ",v") == 0)
884 {
885 if ((ret = rcswalk_one (fullname, copyname, walker, stats, data))) {
886 goto abort;
887 }
888 }
889 else
890 {
891 if (stat (fullname, & buf) < 0)
892 {
893 g_warning ("stat failed: %s\n", fullname);
894 goto abort;
895 }
896
897 if (S_ISDIR (buf.st_mode))
898 {
899 if ((ret = rcswalk_dir (fullname, walker, stats, data, copyname))) {
900 goto abort;
901 }
902 }
903 }
904
905 g_free (fullname);
906
907 if (copyname)
908 g_free (copyname);
909 }
910
911 if (closedir (thisdir) < 0)
912 {
913 g_warning ("closedir failed: %s", dir);
914 return errno;
915 }
916
917 return 0;
918
919 abort:
920
921 if (thisdir)
922 closedir (thisdir);
923
924 return -1;
925}
926
927void
928rcswalk_init (void)
929{
930 config_register (rcswalk_options, ARRAY_SIZE (rcswalk_options));
931}
932
933int
934rcswalk (RcsWalker *walker, const char* copy_base)
935{
936 void* data = NULL;
937 RcsStats stats;
938 int ret;
939
940 skip_count = 0;
941 small_count = 0;
942 process_count = 0;
943 large_count = 0;
944
945 memset (& stats, 0, sizeof (stats));
946
947 stats.avg_version_size = stat_bincount_new ("AvgVersionSize"); /* @@@ leak */
948 stats.version_stat = stat_int_new ("Version"); /* @@@ leak */
949 stats.forward_stat = stat_int_new ("Forward"); /* @@@ leak */
950 stats.reverse_stat = stat_int_new ("Reverse"); /* @@@ leak */
951 stats.branch_stat = stat_int_new ("Branch"); /* @@@ leak */
952 stats.unencoded_stat = stat_int_new ("Unencoded"); /* @@@ leak */
953 stats.literal_stat = stat_int_new ("Literal"); /* @@@ leak */
954
955 tmp_file_1 = g_strdup_printf ("%s/rcs1.%d", g_get_tmp_dir (), (int) getpid ());
956 tmp_file_2 = g_strdup_printf ("%s/rcs2.%d", g_get_tmp_dir (), (int) getpid ());
957
958 if (walker->initialize)
959 data = walker->initialize ();
960
961 if ((ret = rcswalk_dir (rcswalk_input_dir, walker, & stats, data, copy_base))) {
962 return ret;
963 }
964
965 if (walker->finalize)
966 {
967 if ((ret = walker->finalize (& stats, data))) {
968 return ret;
969 }
970 }
971
972 unlink (tmp_file_1);
973 unlink (tmp_file_2);
974
975 fprintf (stderr, "rcswalk: processed %d files: too small %d; too large: %d; damaged: %d\n", process_count, small_count, large_count, skip_count);
976
977 return 0;
978}
979
980/* Statistics
981 */
982
983void
984rcswalk_report (RcsStats* set)
985{
986 stat_bincount_report (set->avg_version_size);
987 stat_int_report (set->version_stat);
988 stat_int_report (set->forward_stat);
989 stat_int_report (set->reverse_stat);
990 stat_int_report (set->branch_stat);
991 stat_int_report (set->unencoded_stat);
992 stat_int_report (set->literal_stat);
993}
994
995/* Int stat
996 */
997IntStat*
998stat_int_new (const char* name)
999{
1000 IntStat* s = g_new0 (IntStat, 1);
1001
1002 s->name = name;
1003 s->values = g_array_new (FALSE, FALSE, sizeof (long long));
1004
1005 return s;
1006}
1007
1008void
1009stat_int_add_item (IntStat* stat, long long v)
1010{
1011 if (! stat->count)
1012 stat->min = v;
1013 stat->count += 1;
1014 stat->min = MIN (v, stat->min);
1015 stat->max = MAX (v, stat->max);
1016 stat->sum += v;
1017
1018 g_array_append_val (stat->values, v);
1019}
1020
1021double
1022stat_int_stddev (IntStat *stat)
1023{
1024 double f = 0;
1025 double m = (double) stat->sum / (double) stat->count;
1026 double v;
1027 int i;
1028
1029 for (i = 0; i < stat->count; i += 1)
1030 {
1031 long long x = g_array_index (stat->values, long long, i);
1032
1033 f += (m - (double) x) * (m - (double) x);
1034 }
1035
1036 v = f / (double) stat->count;
1037
1038 return sqrt (v);
1039}
1040
1041int
1042ll_comp (const void* a, const void* b)
1043{
1044 const long long* lla = a;
1045 const long long* llb = b;
1046 return (*lla) - (*llb);
1047}
1048
1049void
1050stat_int_histogram (IntStat *stat)
1051{
1052 int i, consec;
1053 long long cum = 0;
1054
1055 FILE* p_out;
1056 FILE* s_out;
1057
1058 if (! (p_out = config_output ("%s.pop.hist", stat->name)))
1059 abort ();
1060
1061 if (! (s_out = config_output ("%s.sum.hist", stat->name)))
1062 abort ();
1063
1064 qsort (stat->values->data, stat->count, sizeof (long long), ll_comp);
1065
1066 for (i = 0; i < stat->count; i += consec)
1067 {
1068 long long ix = g_array_index (stat->values, long long, i);
1069
1070 for (consec = 1; (i+consec) < stat->count; consec += 1)
1071 {
1072 long long jx = g_array_index (stat->values, long long, i+consec);
1073
1074 if (ix != jx)
1075 break;
1076 }
1077
1078 cum += consec * g_array_index (stat->values, long long, i);
1079
1080 fprintf (p_out, "%qd, %0.3f\n", g_array_index (stat->values, long long, i), (double) (i+consec) / (double) stat->count);
1081 fprintf (s_out, "%qd, %0.3f\n", g_array_index (stat->values, long long, i), (double) cum / (double) stat->sum);
1082 }
1083
1084 if (fclose (p_out) < 0 || fclose (s_out) < 0)
1085 {
1086 g_error ("fclose failed\n");
1087 }
1088}
1089
1090void
1091stat_int_report (IntStat* stat)
1092{
1093 FILE* out;
1094
1095 if (! (out = config_output ("%s.stat", stat->name)))
1096 abort ();
1097
1098 fprintf (out, "Name: %s\n", stat->name);
1099 fprintf (out, "Count: %d\n", stat->count);
1100 fprintf (out, "Min: %qd\n", stat->min);
1101 fprintf (out, "Max: %qd\n", stat->max);
1102 fprintf (out, "Sum: %qd\n", stat->sum);
1103 fprintf (out, "Mean: %0.2f\n", (double) stat->sum / (double) stat->count);
1104 fprintf (out, "Stddev: %0.2f\n", stat_int_stddev (stat));
1105
1106 if (fclose (out) < 0)
1107 g_error ("fclose failed");
1108
1109 stat_int_histogram (stat);
1110}
1111
1112/* Dbl stat
1113 */
1114
1115DblStat*
1116stat_dbl_new (const char* name)
1117{
1118 DblStat* s = g_new0 (DblStat, 1);
1119
1120 s->name = name;
1121 s->values = g_array_new (FALSE, FALSE, sizeof (double));
1122
1123 return s;
1124}
1125
1126void
1127stat_dbl_add_item (DblStat* stat, double v)
1128{
1129 if (! stat->count)
1130 stat->min = v;
1131 stat->count += 1;
1132 stat->min = MIN (v, stat->min);
1133 stat->max = MAX (v, stat->max);
1134 stat->sum += v;
1135
1136 g_array_append_val (stat->values, v);
1137}
1138
1139double
1140stat_dbl_stddev (DblStat *stat)
1141{
1142 double f = 0;
1143 double m = stat->sum / stat->count;
1144 double v;
1145 int i;
1146
1147 for (i = 0; i < stat->count; i += 1)
1148 {
1149 double x = g_array_index (stat->values, double, i);
1150
1151 f += (m - x) * (m - x);
1152 }
1153
1154 v = f / stat->count;
1155
1156 return sqrt (v);
1157}
1158
1159int
1160dbl_comp (const void* a, const void* b)
1161{
1162 const double* da = a;
1163 const double* db = b;
1164 double diff = (*da) - (*db);
1165
1166 if (diff > 0.0)
1167 return 1;
1168 else if (diff < 0.0)
1169 return -1;
1170 else
1171 return 0;
1172}
1173
1174void
1175stat_dbl_histogram (DblStat *stat)
1176{
1177 int i, consec;
1178 double cum = 0.0;
1179
1180 FILE* p_out;
1181 FILE* s_out;
1182
1183 if (! (p_out = config_output ("%s.pop.hist", stat->name)))
1184 abort ();
1185
1186 if (! (s_out = config_output ("%s.sum.hist", stat->name)))
1187 abort ();
1188
1189 qsort (stat->values->data, stat->count, sizeof (double), dbl_comp);
1190
1191 for (i = 0; i < stat->count; i += consec)
1192 {
1193 double ix = g_array_index (stat->values, double, i);
1194
1195 for (consec = 1; (i+consec) < stat->count; consec += 1)
1196 {
1197 double jx = g_array_index (stat->values, double, i+consec);
1198
1199 if (ix != jx)
1200 break;
1201 }
1202
1203 cum += ((double) consec) * g_array_index (stat->values, double, i);
1204
1205 fprintf (p_out, "%0.6f, %0.3f\n", g_array_index (stat->values, double, i), (double) (i+consec) / (double) stat->count);
1206 fprintf (s_out, "%0.6f, %0.3f\n", g_array_index (stat->values, double, i), cum / stat->sum);
1207 }
1208
1209 if (fclose (p_out) < 0 || fclose (s_out) < 0)
1210 {
1211 g_error ("fclose failed\n");
1212 }
1213}
1214
1215void
1216stat_dbl_report (DblStat* stat)
1217{
1218 FILE* out;
1219
1220 if (! (out = config_output ("%s.stat", stat->name)))
1221 abort ();
1222
1223 fprintf (out, "Name: %s\n", stat->name);
1224 fprintf (out, "Count: %d\n", stat->count);
1225 fprintf (out, "Min: %0.6f\n", stat->min);
1226 fprintf (out, "Max: %0.6f\n", stat->max);
1227 fprintf (out, "Sum: %0.6f\n", stat->sum);
1228 fprintf (out, "Mean: %0.6f\n", stat->sum / stat->count);
1229 fprintf (out, "Stddev: %0.6f\n", stat_dbl_stddev (stat));
1230
1231 if (fclose (out) < 0)
1232 g_error ("fclose failed");
1233
1234 stat_dbl_histogram (stat);
1235}
1236
1237/* Bincount
1238 */
1239BinCounter*
1240stat_bincount_new (const char* name)
1241{
1242 BinCounter* bc = g_new0 (BinCounter, 1);
1243
1244 bc->name = name;
1245 bc->bins = g_ptr_array_new ();
1246
1247 return bc;
1248}
1249
1250void
1251stat_bincount_add_item (BinCounter* bc, int bin, double val)
1252{
1253 GArray* one;
1254 int last;
1255
1256 if (bin >= bc->bins->len)
1257 {
1258 g_ptr_array_set_size (bc->bins, bin+1);
1259 }
1260
1261 if (! (one = bc->bins->pdata[bin]))
1262 {
1263 one = bc->bins->pdata[bin] = g_array_new (FALSE, TRUE, sizeof (double));
1264 }
1265
1266 g_assert (one);
1267
1268 last = one->len;
1269
1270 g_array_set_size (one, last + 1);
1271
1272 g_array_index (one, double, last) = val;
1273}
1274
1275void
1276stat_bincount_report (BinCounter* bc)
1277{
1278 FILE *avg_out;
1279 FILE *raw_out;
1280 int i;
1281
1282 if (! (avg_out = config_output ("%s.avg", bc->name)))
1283 abort ();
1284
1285 if (! (raw_out = config_output ("%s.raw", bc->name)))
1286 abort ();
1287
1288 for (i = 0; i < bc->bins->len; i += 1)
1289 {
1290 GArray* one = bc->bins->pdata[i];
1291
1292 double sum = 0.0;
1293 int j;
1294
1295 for (j = 0; j < one->len; j += 1)
1296 {
1297 double d = g_array_index (one, double, j);
1298
1299 sum += d;
1300
1301 fprintf (raw_out, "%e ", d);
1302 }
1303
1304 fprintf (raw_out, "\n");
1305 fprintf (avg_out, "%e %d\n", sum / one->len, one->len);
1306 }
1307
1308 if (fclose (avg_out) < 0)
1309 g_error ("fclose failed");
1310
1311 if (fclose (raw_out) < 0)
1312 g_error ("fclose failed");
1313}
1314
1315/* Config stuff
1316 */
1317
1318int
1319config_create_dir (const char* dirname)
1320{
1321 struct stat buf;
1322
1323 if (stat (dirname, & buf) < 0)
1324 {
1325 if (mkdir (dirname, 0777) < 0)
1326 {
1327 fprintf (stderr, "mkdir failed: %s\n", dirname);
1328 return errno;
1329 }
1330 }
1331 else
1332 {
1333 if (! S_ISDIR (buf.st_mode))
1334 {
1335 fprintf (stderr, "not a directory: %s\n", dirname);
1336 return errno;
1337 }
1338 }
1339
1340 return 0;
1341}
1342
1343int
1344config_clear_dir (const char* dir)
1345{
1346 char buf[1024];
1347
1348 if (dir)
1349 {
1350 sprintf (buf, "rm -rf %s", dir);
1351
1352 system (buf);
1353 }
1354
1355 return 0;
1356}
1357
1358static ConfigOption all_options[64];
1359static int option_count;
1360
1361void
1362config_init ()
1363{
1364 static gboolean once = FALSE;
1365 if (! once)
1366 {
1367 once = TRUE;
1368 config_register (config_options, ARRAY_SIZE (config_options));
1369 }
1370}
1371
1372void
1373config_register (ConfigOption *opts, int nopts)
1374{
1375 int i;
1376
1377 config_init ();
1378
1379 for (i = 0; i < nopts; i += 1)
1380 {
1381 all_options[option_count++] = opts[i];
1382 }
1383}
1384
1385void
1386config_set_string (const char* var, const char* val)
1387{
1388 int i;
1389
1390 for (i = 0; i < option_count; i += 1)
1391 {
1392 ConfigOption *opt = all_options + i;
1393
1394 if (strcmp (opt->name, var) == 0)
1395 {
1396 (* (const char**) opt->value) = val;
1397 opt->found = TRUE;
1398 return;
1399 }
1400 }
1401}
1402
1403int
1404config_parse (const char* config_file)
1405{
1406 FILE *in;
1407 char oname[1024], value[1024];
1408 int i;
1409
1410 if (! (in = fopen (config_file, "r")))
1411 {
1412 fprintf (stderr, "fopen failed: %s\n", config_file);
1413 return errno;
1414 }
1415
1416 for (;;)
1417 {
1418 ConfigOption *opt = NULL;
1419
1420 if (fscanf (in, "%s", oname) != 1)
1421 break;
1422
1423 for (i = 0; i < option_count; i += 1)
1424 {
1425 if (strcmp (oname, all_options[i].name) == 0)
1426 {
1427 opt = all_options + i;
1428 break;
1429 }
1430 }
1431
1432 if (opt && opt->arg == CO_None)
1433 {
1434 (* (gboolean*) opt->value) = TRUE;
1435 opt->found = TRUE;
1436 continue;
1437 }
1438
1439 if (fscanf (in, "%s", value) != 1)
1440 {
1441 fprintf (stderr, "no value for option: %s; file: %s\n", oname, config_file);
1442 goto abort;
1443 }
1444
1445 if (! opt)
1446 {
1447 /*fprintf (stderr, "unrecognized option: %s\n", oname);*/
1448 continue;
1449 }
1450
1451 switch (opt->type)
1452 {
1453 case CD_Bool:
1454
1455 if (strcasecmp (value, "yes") == 0 ||
1456 strcasecmp (value, "true") == 0 ||
1457 strcmp (value, "1") == 0 ||
1458 strcasecmp (value, "on") == 0)
1459 {
1460 ((gboolean*) opt->value) = TRUE;
1461 }
1462 else
1463 {
1464 ((gboolean*) opt->value) = FALSE;
1465 }
1466
1467 break;
1468 case CD_Int32:
1469
1470 if (sscanf (value, "%d", (gint32*) opt->value) != 1)
1471 {
1472 fprintf (stderr, "parse error for option: %s; file: %s\n", oname, config_file);
1473 goto abort;
1474 }
1475
1476 break;
1477 case CD_Double:
1478
1479 if (sscanf (value, "%lf", (double*) opt->value) != 1)
1480 {
1481 fprintf (stderr, "parse error for option: %s; file: %s\n", oname, config_file);
1482 goto abort;
1483 }
1484
1485 break;
1486 case CD_String:
1487
1488 (* (const char**) opt->value) = g_strdup (value);
1489
1490 break;
1491 }
1492
1493 opt->found = TRUE;
1494 }
1495
1496 fclose (in);
1497
1498 return 0;
1499
1500 abort:
1501
1502 fclose (in);
1503
1504 return -1;
1505}
1506
1507int
1508config_compute_output_dir ()
1509{
1510 char tmp[1024];
1511 char buf[1024];
1512 int i;
1513 gboolean last = FALSE;
1514
1515 buf[0] = 0;
1516
1517 for (i = 0; i < option_count; i += 1)
1518 {
1519 ConfigOption *opt = all_options + i;
1520
1521 if (opt->style == CS_Ignore)
1522 continue;
1523
1524 if (! opt->found)
1525 continue;
1526
1527 if (last)
1528 strcat (buf, ",");
1529
1530 last = TRUE;
1531
1532 strcat (buf, opt->abbrev);
1533 strcat (buf, "=");
1534
1535 switch (opt->type)
1536 {
1537 case CD_Bool:
1538
1539 if (* (gboolean*) opt->value)
1540 strcat (buf, "true");
1541 else
1542 strcat (buf, "false");
1543
1544 break;
1545 case CD_Int32:
1546
1547 sprintf (tmp, "%d", (* (gint32*) opt->value));
1548 strcat (buf, tmp);
1549
1550 break;
1551 case CD_Double:
1552
1553 sprintf (tmp, "%0.2f", (* (double*) opt->value));
1554 strcat (buf, tmp);
1555
1556 break;
1557 case CD_String:
1558
1559 if (opt->style == CS_UseAsFile)
1560 {
1561 const char* str = (* (const char**) opt->value);
1562 const char* ls = strrchr (str, '/');
1563
1564 strcat (buf, ls ? (ls + 1) : str);
1565 }
1566 else
1567 {
1568 strcat (buf, (* (const char**) opt->value));
1569 }
1570
1571 break;
1572 }
1573 }
1574
1575 config_output_dir = g_strdup_printf ("%s/%s", config_output_base, buf);
1576
1577 return 0;
1578}
1579
1580int
1581config_done (void)
1582{
1583 int i, ret;
1584 FILE *out;
1585
1586 for (i = 0; i < option_count; i += 1)
1587 {
1588 ConfigOption *opt = all_options + i;
1589
1590 if (! opt->found && opt->arg == CO_Required)
1591 {
1592 fprintf (stderr, "required option not found: %s\n", all_options[i].name);
1593 return -1;
1594 }
1595 }
1596
1597 if ((ret = config_compute_output_dir ())) {
1598 return ret;
1599 }
1600
1601 if ((ret = config_clear_dir (config_output_dir))) {
1602 return ret;
1603 }
1604
1605 if ((ret = config_create_dir (config_output_dir))) {
1606 return ret;
1607 }
1608
1609 if (! (out = config_output ("Options")))
1610 abort ();
1611
1612 for (i = 0; i < option_count; i += 1)
1613 {
1614 ConfigOption *opt = all_options + i;
1615
1616 fprintf (out, "option: %s; value: ", all_options[i].name);
1617
1618 switch (opt->type)
1619 {
1620 case CD_Bool:
1621
1622 fprintf (out, "%s", (* (gboolean*) opt->value) ? "TRUE" : "FALSE");
1623
1624 break;
1625 case CD_Int32:
1626
1627 fprintf (out, "%d", (* (gint32*) opt->value));
1628
1629 break;
1630 case CD_Double:
1631
1632 fprintf (out, "%0.2f", (* (double*) opt->value));
1633
1634 break;
1635 case CD_String:
1636
1637 fprintf (out, "%s", (* (const char**) opt->value));
1638
1639 break;
1640 }
1641
1642 fprintf (out, "\n");
1643 }
1644
1645 if (fclose (out))
1646 {
1647 fprintf (stderr, "fclose failed\n");
1648 return errno;
1649 }
1650
1651 return 0;
1652}
1653
1654const char*
1655config_help_arg (ConfigOption *opt)
1656{
1657 switch (opt->arg)
1658 {
1659 case CO_Required:
1660 return "required";
1661 case CO_Optional:
1662 return "optional";
1663 case CO_None:
1664 return "no value";
1665 }
1666
1667 return "unknown";
1668}
1669
1670const char*
1671config_help_type (ConfigOption *opt)
1672{
1673 switch (opt->arg)
1674 {
1675 case CO_None:
1676 return "boolean";
1677 default:
1678 break;
1679 }
1680
1681 switch (opt->type)
1682 {
1683 case CD_Bool:
1684 return "boolean";
1685 case CD_Int32:
1686 return "int";
1687 case CD_Double:
1688 return "double";
1689 case CD_String:
1690 return "string";
1691 }
1692
1693 return "unknown";
1694}
1695
1696void
1697config_help (void)
1698{
1699 int i;
1700
1701 fprintf (stderr, "Expecting the following options in one or more config files on the command line:\n");
1702
1703 for (i = 0; i < option_count; i += 1)
1704 {
1705 ConfigOption *opt = all_options + i;
1706
1707 fprintf (stderr, "%s: %s %s\n",
1708 opt->name,
1709 config_help_arg (opt),
1710 config_help_type (opt));
1711 }
1712}
1713
1714FILE*
1715config_output (const char* format, ...)
1716{
1717 gchar *buffer;
1718 gchar *file;
1719 va_list args;
1720 FILE *f;
1721
1722 va_start (args, format);
1723 buffer = g_strdup_vprintf (format, args);
1724 va_end (args);
1725
1726 file = g_strdup_printf ("%s/%s", config_output_dir, buffer);
1727
1728 if (! (f = fopen (file, "w")))
1729 g_error ("fopen failed: %s\n", buffer);
1730
1731 g_free (file);
1732
1733 g_free (buffer);
1734
1735 return f;
1736}
1737
1738
1739#include <edsio.h>
1740#include <edsiostdio.h>
1741#include <ctype.h>
1742#include "xdfs.h"
1743
1744/* Warning: very cheesy!
1745 */
1746
1747#ifdef DEBUG_EXTRACT
1748 FileHandle *fh2 = handle_read_file (filename);
1749
1750 guint8* debug_buf = g_malloc (buflen);
1751
1752 if (! handle_read (fh2, debug_buf, buflen))
1753 g_error ("read failed");
1754#endif
1755
1756gboolean
1757rcs_count (const char* filename, guint *encoded_size)
1758{
1759 char *readbuf0, *readbuf;
1760 gboolean in_string = FALSE;
1761 gboolean in_text = FALSE;
1762 guint string_start = 0;
1763 guint string_end = 0;
1764 guint current_pos = 0;
1765 /*char *current_delta = NULL;*/
1766 FileHandle *fh = handle_read_file (filename);
1767 guint buflen = handle_length (fh);
1768
1769 (* encoded_size) = 0;
1770
1771 readbuf0 = g_new (guint8, buflen);
1772
1773 for (;;)
1774 {
1775 int c = handle_gets (fh, readbuf0, buflen);
1776
1777 readbuf = readbuf0;
1778
1779 if (c < 0)
1780 break;
1781
1782 if (strncmp (readbuf, "text", 4) == 0)
1783 in_text = TRUE;
1784
1785 if (! in_string && readbuf[0] == '@')
1786 {
1787 string_start = current_pos + 1;
1788 in_string = TRUE;
1789 readbuf += 1;
1790 }
1791
1792 current_pos += c;
1793
1794 if (in_string)
1795 {
1796 while ((readbuf = strchr (readbuf, '@')))
1797 {
1798 if (readbuf[1] == '@')
1799 {
1800 string_start += 1; /* @@@ bogus, just counting. */
1801 readbuf += 2;
1802 continue;
1803 }
1804
1805 in_string = FALSE;
1806 break;
1807 }
1808
1809 string_end = current_pos - 2;
1810
1811 if (in_text && ! in_string)
1812 {
1813 in_text = FALSE;
1814
1815 /*g_free (current_delta);
1816 current_delta = NULL;*/
1817
1818 (* encoded_size) += (string_end - string_start);
1819 }
1820
1821 continue;
1822 }
1823
1824 if (isdigit (readbuf[0]))
1825 {
1826#if 0
1827 (* strchr (readbuf, '\n')) = 0;
1828 if (current_delta)
1829 g_free (current_delta);
1830 current_delta = g_strdup (readbuf);
1831#endif
1832 }
1833 }
1834
1835 handle_close (fh);
1836
1837 g_free (readbuf0);
1838
1839#if 0
1840 if (current_delta)
1841 g_free (current_delta);
1842#endif
1843
1844 return TRUE;
1845}
1846
1847#if 0
1848int
1849main (int argc, char** argv)
1850{
1851 guint size;
1852
1853 if (argc != 2)
1854 g_error ("usage: %s RCS_file\n", argv[0]);
1855
1856 if (! rcs_count (argv[1], &size))
1857 g_error ("rcs_parse failed");
1858
1859 return 0;
1860}
1861#endif
diff --git a/xdelta3/setup.py b/xdelta3/setup.py
new file mode 100755
index 0000000..9d717e8
--- /dev/null
+++ b/xdelta3/setup.py
@@ -0,0 +1,33 @@
1# xdelta 3 - delta compression tools and library
2# Copyright (C) 2004 and onward. Joshua P. MacDonald
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 2 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program; if not, write to the Free Software
16# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17#
18#
19from distutils.core import setup, Extension
20
21xdelta3_ext = Extension('xdelta3',
22 ['xdelta3.c'],
23 define_macros = [('PYTHON_MODULE',1),
24 ('XD3_POSIX',1),
25 ('REGRESSION_TEST',1),
26 ('XD3_DEBUG',1),
27 ('EXTCOMP',1),
28 ('VCDIFF_TOOLS',1),
29 ('XD3_USE_LARGEFILE64',1)])
30
31setup(name='xdelta3',
32 version='pre',
33 ext_modules=[xdelta3_ext])
diff --git a/xdelta3/show.c b/xdelta3/show.c
new file mode 100755
index 0000000..f53f2ca
--- /dev/null
+++ b/xdelta3/show.c
@@ -0,0 +1,41 @@
1#include <stdio.h>
2#include <stdlib.h>
3
4#define BUFSZ (1 << 22)
5
6int main(int argc, char **argv)
7{
8 int c;
9 int offset;
10 int bytes;
11
12 if (argc != 3)
13 {
14 fprintf (stderr, "usage: show offset bytes\n");
15 return 1;
16 }
17
18 offset = atoi (argv[1]);
19 bytes = atoi (argv[2]);
20
21 for (; offset != 0; offset -= 1)
22 {
23 if ((c = fgetc (stdin)) == EOF)
24 {
25 fprintf (stderr, "EOF before offset\n");
26 }
27 }
28
29 for (; bytes != 0; bytes -= 1)
30 {
31 if ((c = fgetc (stdin)) == EOF)
32 {
33 fprintf (stderr, "\nEOF before offset + bytes\n");
34 }
35
36 fprintf (stderr, "%02x", c);
37 }
38
39 fprintf (stderr, "\n");
40 return 0;
41}
diff --git a/xdelta3/testh.c b/xdelta3/testh.c
new file mode 100755
index 0000000..1be01df
--- /dev/null
+++ b/xdelta3/testh.c
@@ -0,0 +1 @@
#include "xdelta3.h"
diff --git a/xdelta3/www/xdelta3-api-guide.html b/xdelta3/www/xdelta3-api-guide.html
new file mode 100755
index 0000000..b3513ea
--- /dev/null
+++ b/xdelta3/www/xdelta3-api-guide.html
@@ -0,0 +1,212 @@
1<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
2<html>
3<head>
4 <title>Xdelta3 API guide (BETA)</title>
5 <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
6 <link rel="stylesheet" type="text/css" href="xdelta3.css"/>
7</head>
8<body>
9
10<!-- $Format: "$WWWLeftNavBar$" $ --!>
11<table cellpadding="20px" width=700> <tr> <td class="leftbdr" valign=top height=600 width=100> <div class="leftbody"> <h1>Xdelta</h1> <a href="xdelta3.html">overview</a><br> <a href="xdelta3-cmdline.html">command&nbsp;line</a><br> <a href="xdelta3-api-guide.html">api&nbsp;guide</a><br> <br><a href="http://xdelta.org">xdelta.org</a></h2> </div> </td> <td valign=top width=500>
12
13
14<!-- Copyright (C) 2003 and onward. Joshua P. MacDonald --!>
15
16<h1>api guide</h1>
17
18<p>This guide intends to give you a quick start to the Xdelta3 programming
19interface. This is not a complete reference, the comments inside source file
20<code>xdelta3.h</code> and the command-line application,
21<code>xdelta3-main.h</code> offer more complete information.</p>
22
23<p>Have you read the <a href="xdelta3-cmdline.html">command-line interface</a>?</p>
24
25<h1>stream interface</h1>
26
27<p>
28To begin with, there are three external structures, only two of which are
29discussed here. The <code>xd3_stream</code> struct plays the main role, one
30of these contains the state necessary to encode or decode one stream of data.
31An <code>xd3_source</code> struct maintains state about the (optional) source
32file, against which differences are computed. The third structure,
33<code>xd3_config</code> deals with configuring various encoder parameters.</p>
34
35<p>
36At a glance, the interface resembles Zlib. The program puts data in, which
37the xd3_stream consumes. After computing over the data, the xd3_stream in
38turn generates output for the application to consume, or it requests more
39input. The xd3_stream also issues requests to the application to read a block
40of source data. The request to read a source block may be handled in one of
41two ways, according to application preference. If a <code>xd3_getblk</code>
42callback function is provided, the application handler will be called from
43within the library, suspending computation until the request completes. If no
44callback function is provided the library returns a special code
45(XD3_GETSRCBLK), allowing the application to issue the request and resume
46computation whenever it likes. In both cases, the xd3_source struct contains
47the requested block number and a place to store the result.</p>
48
49<h1>setup</h1>
50<p>The code to declare and initialize the xd3_stream:</p>
51<div class="example">
52<pre>
53int ret;
54xd3_stream stream;
55xd3_config config;
56
57xd3_init_config (&config, 0 /* flags */);
58config.winsize = 32768;
59ret = xd3_config_stream (&stream, &config);
60
61if (ret != 0) { /* error */ }
62</pre>
63</div>
64
65<p>
66<code>xd3_init_config()</code> initializes the <code>xd3_config</code> struct
67with default values. Many settings remain undocumented in the beta release.
68The most relevant setting, <code>xd3_config.winsize</code>, sets the encoder
69window size. The encoder allocates a buffer of this size if the program
70supplies input in smaller units (unless the <code>XD3_FLUSH</code> flag is
71set). <code>xd3_config_stream()</code> initializes the <code>xd3_stream</code>
72object with the supplied configuration.
73</p>
74
75<h1>setting the source</h1>
76<p>
77The stream is ready for input at this point, though for encoding the source
78data must be supplied now. To declare an initialize the xd3_source:</p>
79
80<div class="example">
81<pre>
82xd3_source source;
83void *IO_handle = ...;
84
85source.name = "...";
86source.size = file_size;
87source.ioh= IO_handle;
88source.blksize= 32768;
89source.curblkno = (xoff_t) -1;
90source.curblk = NULL;
91
92ret = xd3_set_source (&stream, &source);
93
94if (ret != 0) { /* error */ }
95</pre>
96</div>
97
98<p>
99The decoder sets source data in the same manner, but it may delay this step
100until the application header has been received (<code>XD3_GOTHEADER</code>).
101The application can also check whether source data is required for decoding
102with the <code>xd3_decoder_needs_source()</code>.</p>
103
104<p>
105<code>xd3_source.blksize</code> determines the block size used for requesting
106source blocks. If the first source block (or the entire source) is already in
107memory, set <code>curblkno</code> to 0 and <code>curblk</code> to that block
108of data.</p>
109
110<h1>input/output loop</h1>
111
112<p>The stream is now ready for input, which the application provides by
113calling <code>xd3_avail_input()</code>. The application initiates
114encoding or decoding at this point by calling one of two functions:</p>
115
116<div class="example">
117<pre>
118int xd3_encode_input (xd3_stream *stream)
119int xd3_decode_input (xd3_stream *stream)
120</pre>
121</div>
122
123<p>Unless there is an error, these routines return one of six result
124codes which the application must handle. In many cases, all or most
125of the handler code is shared between encoding and decoding. The
126codes are:</p>
127
128<ul>
129<li> <code>XD3_INPUT</code>: The stream is ready for (or requires) more input. The
130application should call xd3_avail_input when (if) more data is
131available.
132
133<li> <code>XD3_OUTPUT</code>: The stream has pending output. The application
134should write or otherwise consume the block of data found in the
135xd3_stream fields <code>next_out</code> and <code>avail_out</code>,
136then call <code>xd3_consume_output</code>.
137
138<li> <code>XD3_GETSRCBLK</code>: The stream is requesting a source block be read,
139as described above. This is only ever returned if the xd3_getblk
140callback was not provided.
141
142<li> <code>XD3_GOTHEADER</code>: This decoder-specific code indicates that the
143first VCDIFF window header has been received. This gives the
144application a chance to inspect the application header before
145encoding the first window.
146
147<li> <code>XD3_WINSTART</code>: This is returned by both encoder and decoder prior to
148processing a window. For encoding, this code is returned once there is enough
149available input. For decoding, this is returned following each window header
150(except the first, when XD3_GOTHEADER is returned instead).
151
152<li> <code>XD3_WINFINISH</code>: This is called when the output from a single
153window has been fully consumed.
154</ul>
155
156<p>An application could be structured something like this:</p>
157
158<div class="example">
159<pre>
160do {
161 read (&indata, &insize);
162 if (reached_EOF) {
163 xd3_set_flags (&stream, XD3_FLUSH);
164 }
165 xd3_avail_input (&stream, indata, insize);
166process:
167 ret = xd3_xxcode_input (&stream);
168 switch (ret) {
169 case XD3_INPUT:
170 continue;
171 case XD3_OUTPUT:
172 /* write data */
173 goto process;
174 case XD3_GETSRCBLK:
175 /* set source block */
176 goto process;
177 case XD3_GOTHEADER:
178 case XD3_WINSTART:
179 case XD3_WINFINISH:
180 /* no action necessary */
181 goto process;
182 default:
183 /* error */
184 }
185} while (! reached_EOF);
186</pre>
187</div>
188
189<p>
190All that remains is to close the stream and free its resources. The
191<code>xd3_close_stream()</code> checks several error conditions but otherwise
192involves no input or output. The <code>xd3_free_stream()</code> routine frees
193all memory allocated by the stream.</p>
194
195<h1>misc</h1>
196
197<p>
198There are two convenience functions for encoding to and decoding from
199in-memory buffers. See the <code>xd3_encode_completely</code> and
200<code>xd3_decode_completely</code> interfaces.</p>
201
202<p>
203There are two routines to get and set the application header. When
204encoding, sthe application header must be set before the first
205<code>XD3_WINSTART</code>. When decoding, the application header is available
206after after the first <code>XD3_GOTHEADER</code>.</p>
207
208</td>
209</tr>
210</table>
211</body>
212</html>
diff --git a/xdelta3/www/xdelta3-cmdline.html b/xdelta3/www/xdelta3-cmdline.html
new file mode 100755
index 0000000..cabb547
--- /dev/null
+++ b/xdelta3/www/xdelta3-cmdline.html
@@ -0,0 +1,166 @@
1<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
2<html>
3<head>
4 <title>Xdelta3 command-line guide (BETA)</title>
5 <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
6 <link rel="stylesheet" type="text/css" href="xdelta3.css"/>
7</head>
8<body>
9
10<!-- $Format: "$WWWLeftNavBar$" $ --!>
11<table cellpadding="20px" width=700> <tr> <td class="leftbdr" valign=top height=600 width=100> <div class="leftbody"> <h1>Xdelta</h1> <a href="xdelta3.html">overview</a><br> <a href="xdelta3-cmdline.html">command&nbsp;line</a><br> <a href="xdelta3-api-guide.html">api&nbsp;guide</a><br> <br><a href="http://xdelta.org">xdelta.org</a></h2> </div> </td> <td valign=top width=500>
12
13
14<!-- Copyright (C) 2003 and onward. Joshua P. MacDonald --!>
15
16<h1>command-line guide</h1>
17
18<code>xdelta3</code> can be run with syntax familiar but not similar to gzip;
19it requires you to specify the output file in most cases, rather than applying
20any default filename extensions. These are cases that resemble gzip:<p>
21
22<div class="example">
23<pre>
24xdelta3 -c file_to_compress > delta_file
25xdelta3 -dc delta_file > file_uncompressed
26</pre>
27</div>
28<p>
29
30The <code>-c</code> option says to write to the standard output. The
31<code>-d</code> option says to decode. The default action is to encode (also
32specified by <code>-e</code>). <code>xdelta3</code> also supports long
33command names, these two commands are equivalent to the ones abvove:<p>
34
35<div class="example">
36<pre>
37xdelta3 encode file_to_compress > delta_file
38xdelta3 decode delta_file > file_uncompressed
39</pre>
40</div>
41<p>
42
43<code>xdelta3</code> has the notion of a default filename for decoding. If
44you specified a file name during the encode step, it is used as the default
45for decoding. The <code>-s</code> option specifies a <em>source file</em> for
46delta-compression.<p>
47
48<div class="example">
49<pre>
50xdelta3 -s source_file target_file delta_file
51xdelta3 -d delta_file
52</pre>
53</div>
54<p>
55
56The second line above fills in "source_file" and "target_file" as the input
57and output filenames. Without the <code>-f</code> option,
58<code>xdelta3</code> will not overwrite an existing file. When there are no
59default filenames (e.g., in decode), standard input and standard output are
60used. In the example below, the default source filename is applied in
61decoding.
62<p>
63
64<div class="example">
65<pre>
66cat target_file | xdelta3 -s source_file > delta_file
67xdelta3 -d < delta_file > target_file.1
68</pre>
69</div>
70<p>
71
72<code>xdelta3</code> recognizes externally compressed inputs, so the following
73command produces the expected results:<p>
74
75<div class="example">
76<pre>
77xdelta3 -s beta2.tar.gz beta3.tar.gz beta3.tar.gz.xd
78xdelta3 -ds beta2.tar.gz beta3.tar.gz.xd beta3.tar.gz.1
79</pre>
80</div>
81<p>
82
83You can avoid the intermediate file and use <code>xdelta3</code> together
84with a <code>tar</code>-pipeline.
85
86<div class="example">
87<pre>
88tar -cf - beta3 | xdelta3 -s beta2.tar > beta3.tar.xd
89xdelta3 -d beta3.tar.xd | tar -xf -
90</pre>
91</div>
92<p>
93
94<code>xdelta</code> can print various information about a compressed file with
95the "printhdr" command. The "printhdrs" command prints information about each
96<em>window</em> of the encoding. The "printdelta" command prints the actual
97encoding for each window, in human-readable format.<p>
98
99<div class="example">
100<pre>
101# xdelta3 printdelta delta_file
102VCDIFF version: 0
103VCDIFF header size: 5
104VCDIFF header indicator: none
105VCDIFF secondary compressor: none
106VCDIFF window number: 0
107VCDIFF window indicator: VCD_SOURCE VCD_ADLER32
108VCDIFF adler32 checksum: 48BFADB6
109VCDIFF copy window length: 2813
110VCDIFF copy window offset: 0
111VCDIFF delta encoding length: 93
112VCDIFF target window length: 2903
113VCDIFF data section length: 72
114VCDIFF inst section length: 8
115VCDIFF addr section length: 3
116 Offset Code Type1 Size1 @Addr1 + Type2 Size2 @Addr2
117 000000 019 CPY_0 1535 @0
118 001535 001 ADD 72
119 001607 019 CPY_0 1296 @1517
120</pre>
121</div>
122<br>
123<p>
124
125
126<h1>xdelta3 -h</h1>
127
128<pre>
129usage: xdelta3 [command/options] [input [output]]
130commands are:
131 encode encodes the input
132 decode decodes the input
133 config prints xdelta3 configuration
134 test run the builtin tests
135special commands for VCDIFF inputs:
136 printhdr print information about the first window
137 printhdrs print information about all windows
138 printdelta print information about the entire delta
139options are:
140 -c use stdout instead of default
141 -d same as decode command
142 -e same as encode command
143 -f force overwrite
144 -n disable checksum (encode/decode)
145 -D disable external decompression (encode/decode)
146 -R disable external recompression (decode)
147 -N disable small string-matching compression
148 -S [djw|fgk] disable/enable secondary compression
149 -A [apphead] disable/provide application header
150 -s source source file to copy from (if any)
151 -B blksize source file block size
152 -W winsize input window buffer size
153 -v be verbose (max 2)
154 -q be quiet
155 -h show help
156 -V show version
157 -P repeat count (for profiling)
158</pre>
159<p>
160
161</td>
162</tr>
163</table>
164
165</body>
166</html>
diff --git a/xdelta3/www/xdelta3.css b/xdelta3/www/xdelta3.css
new file mode 100755
index 0000000..269b1c9
--- /dev/null
+++ b/xdelta3/www/xdelta3.css
@@ -0,0 +1,69 @@
1body {
2 margin-top: 15px;
3 margin-left: 15px;
4 background-color:#b0b0b0;
5 color:#204080;
6 font-family: serif;
7 word-spacing: 0.5pt;
8 text-indent: 0pt;
9}
10
11A:visited {
12 color: #204080;
13}
14A:link {
15 color: #102040;
16}
17h1 {
18 color: #103060;
19 font-size: 150%;
20}
21
22h2 {
23 color: #103060;
24 font-size: 80%;
25}
26
27code, pre {
28 font-family: monospace;
29}
30
31pre {
32 color: #102040;
33}
34
35code {
36 color:#0060c0;
37}
38
39.example {
40 margin-right: 20px;
41 margin-left: 20px;
42
43 padding-left: 20px;
44 padding-right: 20px;
45 padding-top: 0px;
46 padding-bottom: 0px;
47
48 background-color: #808080;
49 border-style: solid;
50 border-width: 1px;
51 border-color: #000000;
52}
53
54.leftbdr {
55 font-family: sans-serif;
56 color: #103060;
57 background-color: #606060;
58 border-style: solid;
59 border-width: 1px;
60 border-color: #000000;
61}
62.leftbody A:visited {
63 color: #102040;
64 text-decoration: none;
65}
66.leftbody A:link {
67 color: #102040;
68 text-decoration: none;
69}
diff --git a/xdelta3/www/xdelta3.html b/xdelta3/www/xdelta3.html
new file mode 100755
index 0000000..3bddfd9
--- /dev/null
+++ b/xdelta3/www/xdelta3.html
@@ -0,0 +1,89 @@
1<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
2<html>
3<head>
4 <title>Xdelta3 delta compression library (BETA)</title>
5 <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
6 <link rel="stylesheet" type="text/css" href="xdelta3.css"/>
7</head>
8<body>
9
10<!-- $Format: "$WWWLeftNavBar$" $ --!>
11<table cellpadding="20px" width=700> <tr> <td class="leftbdr" valign=top height=600 width=100> <div class="leftbody"> <h1>Xdelta</h1> <a href="xdelta3.html">overview</a><br> <a href="xdelta3-cmdline.html">command&nbsp;line</a><br> <a href="xdelta3-api-guide.html">api&nbsp;guide</a><br> <br><a href="http://xdelta.org">xdelta.org</a></h2> </div> </td> <td valign=top width=500>
12
13<!-- Copyright (C) 2003 and onward. Joshua P. MacDonald --!>
14
15<h1>version three?</h1>
16
17Xdelta3 is the third and latest release of Xdelta, which is a set of tools and
18APIs for reading and writing compressed <em>deltas</em>. Deltas encode the
19differences between two versions of a document. This release features a
20completely new compression engine, several algorithmic improvements, a fully
21programmable interface modelled after zlib, in addition to a command-line
22utility, use of the RFC3284 (VCDIFF) encoding, a python extension, and now
2364-bit support.<p>
24
25Xdelta3 is <em>tiny</em>. A minimal, fully functional VCDIFF decoder library
26pipes in at 16KB. The command-line utility complete with encoder/decoder
27tools, external compression support, and the <code>djw</code> secondary
28compression routines, is just under 60KB, slightly larger than a
29<code>gzip</code> executable.<p>
30
31Xdelta3 has few dependencies because it's capable of stand-alone file
32compression (i.e., what zlib and gzip do). The stand-alone compression of
33Xdelta3/VCDIFF is 10-20% worse than <code>gzip</code>, you may view this as
34paying for the convenience-cost of having a single encoding, tool, and api
35designed to do both <em>data-compression</em> and <em>differencing</em> at
36once.<p>
37
38The Xdelta3 command-line tool, <code>xdelta3</code>, supports several
39convenience routines. Delta compression works when the two inputs are
40similar, but often we would like to compute the difference between two
41compressed documents. <code>xdelta3</code> has (optional) support to
42recognize externally compressed inputs and process them correctly. This
43support is facilitated, in part, using the VCDIFF <em>application header</em>
44field to store <code>xdelta3</code> meta-data, which includes the original
45file names (if any) and codes to incidate whether the inputs were externally
46compressed. Applications may provide their own application header.<p>
47
48<h1>what are version one and version two?</h1>
49
50Many shortcomings in the Xdelta1.x release are fixed in its replacement,
51Xdelta3. Xdelta1 used both a simplistic compression algorithm and a
52simplistic encoding. For example, Xdelta1 compresses the entire document at
53once and thus uses memory proportional to the input size.<p>
54
55The Xdelta1 compression engine made no attempt to find matching strings
56smaller than say 16 or 32 bytes, and the encoding does not attempt to
57efficiently encode the <code>COPY</code> and <code>ADD</code> instructions
58which constitute a delta. For documents with highly similar data, however,
59these techniques degrade performance by a relatively insignificant amount.
60(Xdelta1.x compresses the delta with Zlib to improve matters, but this
61dependency stinks.)<p>
62
63Despite leaving much to be desired, Xdelta1 showed that you can do well
64without great complexity; as it turns out, the particulars of the compression
65aengine are a relatively insignificant compared to the difficulty of
66programming an application that uses delta-compression. Better solve that
67first.<p>
68
69What we want are <em>systems</em> that manage compressed storage and network
70communication. The second major release, Xdelta2, addresses these issues.
71Xdelta2 features a storage interface -- part database and part file system --
72which allows indexing and labeling compressed documents. The feature set is
73similar to RCS. The Xdelta2 interface supports efficient algorithms for
74<em>extracting</em> deltas between any pair of versions in storage. The
75extraction technique also does not rely on hierarchy or centralizing the
76namespace, making the techniques ideal for peer-to-peer communication and
77proxy architectures. I am grateful to Mihut Ionescu for implementing the
78Xproxy HTTP delta-compressing proxy system based on this interface and
79studying the benefits of delta-compression in that context. Xdelta2 stressed
80the Xdelta1 compression engine beyond its limits; so Xdelta3 is designed as
81the ideal replacement. The Xdelta2 techniques are yet to be ported to the new
82implementation.<p>
83
84</td>
85</tr>
86</table>
87
88</body>
89</html>
diff --git a/xdelta3/xdelta3-cfgs.h b/xdelta3/xdelta3-cfgs.h
new file mode 100755
index 0000000..329f3e9
--- /dev/null
+++ b/xdelta3/xdelta3-cfgs.h
@@ -0,0 +1,118 @@
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2001 and onward. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19/******************************************************************************************
20 SOFT string matcher
21 ******************************************************************************************/
22
23#if XD3_BUILD_SOFT
24
25#define TEMPLATE soft
26#define LLOOK stream->large_look
27#define LSTEP stream->large_step
28#define SLOOK stream->small_look
29#define SCHAIN stream->small_chain
30#define SLCHAIN stream->small_lchain
31#define SSMATCH stream->ssmatch
32#define TRYLAZY stream->try_lazy
33#define MAXLAZY stream->max_lazy
34#define LONGENOUGH stream->long_enough
35#define PROMOTE stream->promote
36
37#define SOFTCFG 1
38#include "xdelta3.c"
39#undef SOFTCFG
40
41#undef TEMPLATE
42#undef LLOOK
43#undef SLOOK
44#undef LSTEP
45#undef SCHAIN
46#undef SLCHAIN
47#undef SSMATCH
48#undef TRYLAZY
49#undef MAXLAZY
50#undef LONGENOUGH
51#undef PROMOTE
52#endif
53
54#define SOFTCFG 0
55
56/******************************************************************************************
57 FAST string matcher
58 ******************************************************************************************/
59#if XD3_BUILD_FAST
60#define TEMPLATE fast
61#define LLOOK 32
62#define LSTEP 32
63#define SLOOK 4
64
65#define SCHAIN 2 // For testcase/3, this produces miserable performance
66#define SLCHAIN 2 // with these values != 1 and large input window size
67
68#define SSMATCH 1
69#define TRYLAZY 0
70#define MAXLAZY 0
71#define LONGENOUGH 64
72#define PROMOTE 0
73
74#include "xdelta3.c"
75
76#undef TEMPLATE
77#undef LLOOK
78#undef SLOOK
79#undef LSTEP
80#undef SCHAIN
81#undef SLCHAIN
82#undef SSMATCH
83#undef TRYLAZY
84#undef MAXLAZY
85#undef LONGENOUGH
86#undef PROMOTE
87#endif
88
89/******************************************************************************************
90 SLOW string matcher
91 ******************************************************************************************/
92#if XD3_BUILD_SLOW
93#define TEMPLATE slow
94#define LLOOK 64
95#define LSTEP 64 // TODO
96#define SLOOK 4
97#define SCHAIN 128
98#define SLCHAIN 16
99#define SSMATCH 0
100#define TRYLAZY 1
101#define MAXLAZY 8
102#define LONGENOUGH 128
103#define PROMOTE 0
104
105#include "xdelta3.c"
106
107#undef TEMPLATE
108#undef LLOOK
109#undef SLOOK
110#undef LSTEP
111#undef SCHAIN
112#undef SLCHAIN
113#undef SSMATCH
114#undef TRYLAZY
115#undef MAXLAZY
116#undef LONGENOUGH
117#undef PROMOTE
118#endif
diff --git a/xdelta3/xdelta3-djw.h b/xdelta3/xdelta3-djw.h
new file mode 100755
index 0000000..90f58e2
--- /dev/null
+++ b/xdelta3/xdelta3-djw.h
@@ -0,0 +1,1917 @@
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2002 and onward. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef _XDELTA3_DJW_H_
20#define _XDELTA3_DJW_H_
21
22/* The following people deserve much credit for the algorithms and techniques contained in
23 * this file:
24
25 Julian Seward
26 Bzip2 sources, implementation of the multi-table Huffman technique.
27
28 Jean-loup Gailly and Mark Adler and L. Peter Deutsch
29 Zlib source code, RFC 1951
30
31 Daniel S. Hirschberg and Debra A. LeLewer
32 "Efficient Decoding of Prefix Codes"
33 Communications of the ACM, April 1990 33(4).
34
35 David J. Wheeler
36 Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps.
37 This contains the idea behind the multi-table Huffman and 1-2 coding techniques.
38 ftp://ftp.cl.cam.ac.uk/users/djw3/
39
40*/
41
42/* OPT: during the multi-table iteration, pick the worst-overall performing table and
43 * replace it with exactly the frequencies of the worst-overall performing sector or
44 * N-worst performing sectors. */
45
46/* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with the Bzip prefix coding
47 * strategy. xdfs-0.256 contains the last of the other-format tests, including RFC1950
48 * and the RFC1950+MTF tests. */
49
50#define DJW_MAX_CODELEN 32 /* Maximum length of an alphabet code. */
51
52#define DJW_TOTAL_CODES (DJW_MAX_CODELEN+2) /* [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */
53
54#define RUN_0 0 /* Symbols used in MTF+1/2 coding. */
55#define RUN_1 1
56
57#define DJW_BASIC_CODES 5 /* Number of code lengths always encoded (djw_encode_basic array) */
58#define DJW_RUN_CODES 2 /* Number of run codes */
59#define DJW_EXTRA_12OFFSET 7 /* Offset of extra codes */
60#define DJW_EXTRA_CODES 15 /* Number of optionally encoded code lengths (djw_encode_extra array) */
61#define DJW_EXTRA_CODE_BITS 4 /* Number of bits to code [0-DJW_EXTRA_CODES] */
62
63#define DJW_MAX_GROUPS 8 /* Max number of group coding tables */
64#define DJW_GROUP_BITS 3 /* Number of bits to code [1-DJW_MAX_GROUPS] */
65
66#define DJW_SECTORSZ_MULT 5 /* Multiplier for encoded sectorsz */
67#define DJW_SECTORSZ_BITS 5 /* Number of bits to code group size */
68#define DJW_SECTORSZ_MAX ((1 << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT)
69
70#define DJW_MAX_ITER 6 /* Maximum number of iterations to find group tables. */
71#define DJW_MIN_IMPROVEMENT 20 /* Minimum number of bits an iteration must reduce coding by. */
72
73#define DJW_MAX_CLCLEN 15 /* Maximum code length of a prefix code length */
74#define DJW_CLCLEN_BITS 4 /* Number of bits to code [0-DJW_MAX_CLCLEN] */
75
76#define DJW_MAX_GBCLEN 7 /* Maximum code length of a group selector */
77#define DJW_GBCLEN_BITS 3 /* Number of bits to code [0-DJW_MAX_GBCLEN]
78 * @!@ Actually, should never have zero code lengths here, or
79 * else a group went unused. Write a test for this: if a group
80 * goes unused, eliminate it? */
81
82#define EFFICIENCY_BITS 16 /* It has to save at least this many bits... */
83
84typedef struct _djw_stream djw_stream;
85typedef struct _djw_heapen djw_heapen;
86typedef struct _djw_prefix djw_prefix;
87typedef uint32_t djw_weight;
88
89/* To enable Huffman tuning code... */
90#ifndef TUNE_HUFFMAN
91#define TUNE_HUFFMAN 0
92#endif
93
94#if TUNE_HUFFMAN == 0
95#define xd3_real_encode_huff xd3_encode_huff
96#define IF_TUNE(x)
97#define IF_NTUNE(x) x
98#else
99static uint xd3_bitsof_output (xd3_output *output, bit_state *bstate);
100#define IF_TUNE(x) x
101#define IF_NTUNE(x)
102static djw_weight tune_freq[DJW_TOTAL_CODES];
103static uint8_t tune_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];
104static usize_t tune_prefix_bits;
105static usize_t tune_select_bits;
106static usize_t tune_encode_bits;
107#endif
108struct _djw_heapen
109{
110 uint32_t depth;
111 uint32_t freq;
112 uint32_t parent;
113};
114
115struct _djw_prefix
116{
117 usize_t scount;
118 uint8_t *symbol;
119 usize_t mcount;
120 uint8_t *mtfsym;
121 uint8_t *repcnt;
122};
123
124struct _djw_stream
125{
126 int unused;
127};
128
129/* Each Huffman table consists of 256 "code length" (CLEN) codes, which are themselves
130 * Huffman coded after eliminating repeats and move-to-front coding. The prefix consists
131 * of all the CLEN codes in djw_encode_basic plus a 4-bit value stating how many of the
132 * djw_encode_extra codes are actually coded (the rest are presumed zero, or unused CLEN
133 * codes).
134 *
135 * These values of these two arrays were arrived at by studying the distribution of min
136 * and max clen over a collection of DATA, INST, and ADDR inputs. The goal is to specify
137 * the order of djw_extra_codes that is most likely to minimize the number of extra codes
138 * that must be encoded.
139 *
140 * Results: 158896 sections were counted by compressing files (window size 512K) listed
141 * with: `find / -type f ( -user jmacd -o -perm +444 )`
142 *
143 * The distribution of CLEN codes for each efficient invocation of the secondary
144 * compressor (taking the best number of groups/sector size) was recorded. Then we look at
145 * the distribution of min and max clen values, counting the number of times the value
146 * C_low is less than the min and C_high is greater than the max. Values >= C_high and <=
147 * C_low will not have their lengths coded. The results are sorted and the least likely
148 * 15 are placed into the djw_encode_extra[] array in order. These values are used as
149 * the initial MTF ordering.
150
151 clow[1] = 155119
152 clow[2] = 140325
153 clow[3] = 84072
154 ---
155 clow[4] = 7225
156 clow[5] = 1093
157 clow[6] = 215
158 ---
159 chigh[4] = 1
160 chigh[5] = 30
161 chigh[6] = 218
162 chigh[7] = 2060
163 chigh[8] = 13271
164 ---
165 chigh[9] = 39463
166 chigh[10] = 77360
167 chigh[11] = 118298
168 chigh[12] = 141360
169 chigh[13] = 154086
170 chigh[14] = 157967
171 chigh[15] = 158603
172 chigh[16] = 158864
173 chigh[17] = 158893
174 chigh[18] = 158895
175 chigh[19] = 158896
176 chigh[20] = 158896
177
178*/
179
180static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] =
181 {
182 9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20
183 };
184
185static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] =
186 {
187 4, 5, 6, 7, 8,
188 };
189
190/*********************************************************************/
191/* DECLS */
192/*********************************************************************/
193
194static djw_stream* djw_alloc (xd3_stream *stream /*, int alphabet_size */);
195static void djw_init (djw_stream *h);
196static void djw_destroy (xd3_stream *stream,
197 djw_stream *h);
198
199#if XD3_ENCODER
200static int xd3_encode_huff (xd3_stream *stream,
201 djw_stream *sec_stream,
202 xd3_output *input,
203 xd3_output *output,
204 xd3_sec_cfg *cfg);
205#endif
206
207static int xd3_decode_huff (xd3_stream *stream,
208 djw_stream *sec_stream,
209 const uint8_t **input,
210 const uint8_t *const input_end,
211 uint8_t **output,
212 const uint8_t *const output_end);
213
214/*********************************************************************/
215/* HUFFMAN */
216/*********************************************************************/
217
218static djw_stream*
219djw_alloc (xd3_stream *stream)
220{
221 return xd3_alloc (stream, sizeof (djw_stream), 1);
222}
223
224static void
225djw_init (djw_stream *h)
226{
227 /* Fields are initialized prior to use. */
228}
229
230static void
231djw_destroy (xd3_stream *stream,
232 djw_stream *h)
233{
234 xd3_free (stream, h);
235}
236
237
238/*********************************************************************/
239/* HEAP */
240/*********************************************************************/
241
242static INLINE int
243heap_less (const djw_heapen *a, const djw_heapen *b)
244{
245 return a->freq < b->freq ||
246 (a->freq == b->freq &&
247 a->depth < b->depth);
248}
249
250static INLINE void
251heap_insert (uint *heap, const djw_heapen *ents, uint p, const uint e)
252{
253 /* Insert ents[e] into next slot heap[p] */
254 uint pp = p/2; /* P's parent */
255
256 while (heap_less (& ents[e], & ents[heap[pp]]))
257 {
258 heap[p] = heap[pp];
259 p = pp;
260 pp = p/2;
261 }
262
263 heap[p] = e;
264}
265
266static INLINE djw_heapen*
267heap_extract (uint *heap, const djw_heapen *ents, uint heap_last)
268{
269 uint smallest = heap[1];
270 uint p, pc, t;
271
272 /* Caller decrements heap_last, so heap_last+1 is the replacement elt. */
273 heap[1] = heap[heap_last+1];
274
275 /* Re-heapify */
276 for (p = 1; ; p = pc)
277 {
278 pc = p*2;
279
280 /* Reached bottom of heap */
281 if (pc > heap_last) { break; }
282
283 /* See if second child is smaller. */
284 if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]])) { pc += 1; }
285
286 /* If pc is not smaller than p, heap property re-established. */
287 if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; }
288
289 t = heap[pc];
290 heap[pc] = heap[p];
291 heap[p] = t;
292 }
293
294 return (djw_heapen*) & ents[smallest];
295}
296
297#if XD3_DEBUG
298static void
299heap_check (uint *heap, djw_heapen *ents, uint heap_last)
300{
301 uint i;
302 for (i = 1; i <= heap_last; i += 1)
303 {
304 /* Heap property: child not less than parent */
305 XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]]));
306 }
307}
308#endif
309
310/*********************************************************************/
311/* MTF, 1/2 */
312/*********************************************************************/
313
314static INLINE usize_t
315djw_update_mtf (uint8_t *mtf, usize_t mtf_i)
316{
317 int k;
318 usize_t sym = mtf[mtf_i];
319
320 for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; }
321
322 mtf[0] = sym;
323 return sym;
324}
325
326static INLINE void
327djw_update_1_2 (int *mtf_run, usize_t *mtf_i, uint8_t *mtfsym, djw_weight *freq)
328{
329 int code;
330
331 do
332 {
333 /* Offset by 1, since any number of RUN_ symbols implies run>0... */
334 *mtf_run -= 1;
335
336 code = (*mtf_run & 1) ? RUN_1 : RUN_0;
337
338 mtfsym[(*mtf_i)++] = code;
339 freq[code] += 1;
340 *mtf_run >>= 1;
341 }
342 while (*mtf_run >= 1);
343
344 *mtf_run = 0;
345}
346
347static void
348djw_init_clen_mtf_1_2 (uint8_t *clmtf)
349{
350 int i, cl_i = 0;
351
352 clmtf[cl_i++] = 0;
353 for (i = 0; i < DJW_BASIC_CODES; i += 1) { clmtf[cl_i++] = djw_encode_12basic[i]; }
354 for (i = 0; i < DJW_EXTRA_CODES; i += 1) { clmtf[cl_i++] = djw_encode_12extra[i]; }
355}
356
357/*********************************************************************/
358/* PREFIX CODES */
359/*********************************************************************/
360#if XD3_ENCODER
361static usize_t
362djw_build_prefix (const djw_weight *freq, uint8_t *clen, int asize, int maxlen)
363{
364 /* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1 internal nodes,
365 * never more than ALPHABET_SIZE entries actually in the heap (minimum weight subtrees
366 * during prefix construction). First ALPHABET_SIZE entries are the actual symbols,
367 * next ALPHABET_SIZE-1 are internal nodes. */
368 djw_heapen ents[ALPHABET_SIZE * 2];
369 uint heap[ALPHABET_SIZE + 1];
370
371 uint heap_last; /* Index of the last _valid_ heap entry. */
372 uint ents_size; /* Number of entries, including 0th fake entry */
373 int overflow; /* Number of code lengths that overflow */
374 uint32_t total_bits;
375 int i;
376
377 IF_DEBUG (uint32_t first_bits = 0);
378
379 /* Insert real symbol frequences. */
380 for (i = 0; i < asize; i += 1)
381 {
382 ents[i+1].freq = freq[i];
383 }
384
385 again:
386
387 /* The loop is re-entered each time an overflow occurs. Re-initialize... */
388 heap_last = 0;
389 ents_size = 1;
390 overflow = 0;
391 total_bits = 0;
392
393 /* 0th entry terminates the while loop in heap_insert (its the parent of the smallest
394 * element, always less-than) */
395 heap[0] = 0;
396 ents[0].depth = 0;
397 ents[0].freq = 0;
398
399 /* Initial heap. */
400 for (i = 0; i < asize; i += 1, ents_size += 1)
401 {
402 ents[ents_size].depth = 0;
403 ents[ents_size].parent = 0;
404
405 if (ents[ents_size].freq != 0)
406 {
407 heap_insert (heap, ents, ++heap_last, ents_size);
408 }
409 }
410
411 IF_DEBUG (heap_check (heap, ents, heap_last));
412
413 /* Must be at least one symbol, or else we can't get here. */
414 XD3_ASSERT (heap_last != 0);
415
416 /* If there is only one symbol, fake a second to prevent zero-length codes. */
417 if (unlikely (heap_last == 1))
418 {
419 /* Pick either the first or last symbol. */
420 int s = freq[0] ? asize-1 : 0;
421 ents[s+1].freq = 1;
422 goto again;
423 }
424
425 /* Build prefix tree. */
426 while (heap_last > 1)
427 {
428 djw_heapen *h1 = heap_extract (heap, ents, --heap_last);
429 djw_heapen *h2 = heap_extract (heap, ents, --heap_last);
430
431 ents[ents_size].freq = h1->freq + h2->freq;
432 ents[ents_size].depth = 1 + max (h1->depth, h2->depth);
433 ents[ents_size].parent = 0;
434
435 h1->parent = h2->parent = ents_size;
436
437 heap_insert (heap, ents, ++heap_last, ents_size++);
438
439 IF_DEBUG (heap_check (heap, ents, heap_last));
440 }
441
442 /* Now compute prefix code lengths, counting parents. */
443 for (i = 1; i < asize+1; i += 1)
444 {
445 int b = 0;
446
447 if (ents[i].freq != 0)
448 {
449 int p = i;
450
451 while ((p = ents[p].parent) != 0) { b += 1; }
452
453 if (b > maxlen) { overflow = 1; }
454
455 total_bits += b * freq[i-1];
456 }
457
458 /* clen is 0-origin, unlike ents. */
459 clen[i-1] = b;
460 }
461
462 IF_DEBUG (if (first_bits == 0) first_bits = total_bits);
463
464 if (! overflow)
465 {
466 IF_DEBUG (if (first_bits != total_bits)
467 {
468 P(RINT "code length overflow changed %d bits\n", total_bits - first_bits);
469 });
470 return total_bits;
471 }
472
473 /* OPT: There is a non-looping way to fix overflow shown in zlib, but this is easier
474 * (for now), as done in bzip2. */
475 for (i = 1; i < asize+1; i += 1)
476 {
477 ents[i].freq = ents[i].freq / 2 + 1;
478 }
479
480 goto again;
481}
482
483static void
484djw_build_codes (uint *codes, const uint8_t *clen, int asize DEBUG_ARG (int abs_max))
485{
486 int i, l;
487 int min_clen = DJW_MAX_CODELEN;
488 int max_clen = 0;
489 uint code = 0;
490
491 for (i = 0; i < asize; i += 1)
492 {
493 if (clen[i] > 0 && clen[i] < min_clen)
494 {
495 min_clen = clen[i];
496 }
497
498 max_clen = max (max_clen, (int) clen[i]);
499 }
500
501 XD3_ASSERT (max_clen <= abs_max);
502
503 for (l = min_clen; l <= max_clen; l += 1)
504 {
505 for (i = 0; i < asize; i += 1)
506 {
507 if (clen[i] == l) { codes[i] = code++; }
508 }
509
510 code <<= 1;
511 }
512}
513
514/*********************************************************************/
515/* MOVE-TO-FRONT */
516/*********************************************************************/
517static void
518djw_compute_mtf_1_2 (djw_prefix *prefix,
519 uint8_t *mtf,
520 djw_weight *freq_out, /* freak out! */
521 usize_t nsym)
522{
523 int i, j, k;
524 usize_t sym;
525 usize_t size = prefix->scount;
526 usize_t mtf_i = 0;
527 int mtf_run = 0;
528
529 memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+1));
530
531 for (i = 0; i < size; )
532 {
533 /* OPT: Bzip optimizes this algorithm a little by effectively checking j==0 before
534 * the MTF update. */
535 sym = prefix->symbol[i++];
536
537 for (j = 0; mtf[j] != sym; j += 1) { }
538
539 XD3_ASSERT (j < nsym);
540
541 for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; }
542
543 mtf[0] = sym;
544
545 if (j == 0)
546 {
547 mtf_run += 1;
548 continue;
549 }
550
551 if (mtf_run > 0)
552 {
553 djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
554 }
555
556 /* Non-zero symbols are offset by RUN_1 */
557 prefix->mtfsym[mtf_i++] = j+RUN_1;
558 freq_out[j+RUN_1] += 1;
559 }
560
561 if (mtf_run > 0)
562 {
563 djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
564 }
565
566 prefix->mcount = mtf_i;
567}
568
569static usize_t
570djw_count_freqs (djw_weight *freq, xd3_output *input)
571{
572 xd3_output *in;
573 usize_t size = 0;
574
575 memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE);
576
577 /* Freqency counting. OPT: can be accomplished beforehand. */
578 for (in = input; in; in = in->next_page)
579 {
580 const uint8_t *p = in->base;
581 const uint8_t *p_max = p + in->next;
582
583 size += in->next;
584
585 do { freq[*p++] += 1; } while (p < p_max);
586 }
587
588 IF_DEBUG1 ({int i;
589 P(RINT "freqs: ");
590 for (i = 0; i < ALPHABET_SIZE; i += 1) { P(RINT "%u ", freq[i]); }
591 P(RINT "\n");});
592
593 return size;
594}
595
596static void
597djw_compute_multi_prefix (int groups,
598 uint8_t clen[DJW_MAX_GROUPS][ALPHABET_SIZE],
599 djw_prefix *prefix)
600{
601 int gp, i;
602
603 prefix->scount = ALPHABET_SIZE;
604 memcpy (prefix->symbol, clen[0], ALPHABET_SIZE);
605
606 for (gp = 1; gp < groups; gp += 1)
607 {
608 for (i = 0; i < ALPHABET_SIZE; i += 1)
609 {
610 if (clen[gp][i] == 0)
611 {
612 continue;
613 }
614
615 prefix->symbol[prefix->scount++] = clen[gp][i];
616 }
617 }
618}
619
620static void
621djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq)
622{
623 uint8_t clmtf[DJW_MAX_CODELEN+1];
624
625 djw_init_clen_mtf_1_2 (clmtf);
626
627 djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN+1);
628}
629
630static int
631djw_encode_prefix (xd3_stream *stream,
632 xd3_output **output,
633 bit_state *bstate,
634 djw_prefix *prefix)
635{
636 int ret, i;
637 uint num_to_encode;
638 djw_weight clfreq[DJW_TOTAL_CODES];
639 uint8_t clclen[DJW_TOTAL_CODES];
640 uint clcode[DJW_TOTAL_CODES];
641
642 IF_TUNE (memset (clfreq, 0, sizeof (clfreq)));
643
644 /* Move-to-front encode prefix symbols, count frequencies */
645 djw_compute_prefix_1_2 (prefix, clfreq);
646
647 /* Compute codes */
648 djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN);
649 djw_build_codes (clcode, clclen, DJW_TOTAL_CODES DEBUG_ARG (DJW_MAX_CLCLEN));
650
651 /* Compute number of extra codes beyond basic ones for this template. */
652 num_to_encode = DJW_TOTAL_CODES;
653 while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0) { num_to_encode -= 1; }
654 XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS));
655
656 /* Encode: # of extra codes */
657 if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS,
658 num_to_encode - DJW_EXTRA_12OFFSET))) { return ret; }
659
660 /* Encode: MTF code lengths */
661 for (i = 0; i < num_to_encode; i += 1)
662 {
663 if ((ret = xd3_encode_bits (stream, output, bstate, DJW_CLCLEN_BITS, clclen[i]))) { return ret; }
664 }
665
666 /* Encode: CLEN code lengths */
667 for (i = 0; i < prefix->mcount; i += 1)
668 {
669 usize_t mtf_sym = prefix->mtfsym[i];
670 usize_t bits = clclen[mtf_sym];
671 usize_t code = clcode[mtf_sym];
672
673 if ((ret = xd3_encode_bits (stream, output, bstate, bits, code))) { return ret; }
674 }
675
676 IF_TUNE (memcpy (tune_freq, clfreq, sizeof (clfreq)));
677
678 return 0;
679}
680
681static void
682djw_compute_selector_1_2 (djw_prefix *prefix,
683 usize_t groups,
684 djw_weight *gbest_freq)
685{
686 uint8_t grmtf[DJW_MAX_GROUPS];
687 usize_t i;
688
689 for (i = 0; i < groups; i += 1) { grmtf[i] = i; }
690
691 djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups);
692}
693
694static int
695xd3_encode_howmany_groups (xd3_stream *stream,
696 xd3_sec_cfg *cfg,
697 usize_t input_size,
698 usize_t *ret_groups,
699 usize_t *ret_sector_size)
700{
701 usize_t cfg_groups = 0;
702 usize_t cfg_sector_size = 0;
703 usize_t sugg_groups = 0;
704 usize_t sugg_sector_size = 0;
705
706 if (cfg->ngroups != 0)
707 {
708 if (cfg->ngroups < 0 || cfg->ngroups > DJW_MAX_GROUPS)
709 {
710 stream->msg = "invalid secondary encoder group number";
711 return EINVAL;
712 }
713
714 cfg_groups = cfg->ngroups;
715 }
716
717 if (cfg->sector_size != 0)
718 {
719 if (cfg->sector_size < DJW_SECTORSZ_MULT || cfg->sector_size > DJW_SECTORSZ_MAX || (cfg->sector_size % DJW_SECTORSZ_MULT) != 0)
720 {
721 stream->msg = "invalid secondary encoder sector size";
722 return EINVAL;
723 }
724
725 cfg_sector_size = cfg->sector_size;
726 }
727
728 if (cfg_groups == 0 || cfg_sector_size == 0)
729 {
730 /* These values were found empirically using xdelta3-tune around version
731 * xdfs-0.256. */
732 switch (cfg->data_type)
733 {
734 case DATA_SECTION:
735 if (input_size < 1000) { sugg_groups = 1; sugg_sector_size = 0; }
736 else if (input_size < 4000) { sugg_groups = 2; sugg_sector_size = 10; }
737 else if (input_size < 7000) { sugg_groups = 3; sugg_sector_size = 10; }
738 else if (input_size < 10000) { sugg_groups = 4; sugg_sector_size = 10; }
739 else if (input_size < 25000) { sugg_groups = 5; sugg_sector_size = 10; }
740 else if (input_size < 50000) { sugg_groups = 7; sugg_sector_size = 20; }
741 else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; }
742 else { sugg_groups = 8; sugg_sector_size = 70; }
743 break;
744 case INST_SECTION:
745 if (input_size < 7000) { sugg_groups = 1; sugg_sector_size = 0; }
746 else if (input_size < 10000) { sugg_groups = 2; sugg_sector_size = 50; }
747 else if (input_size < 25000) { sugg_groups = 3; sugg_sector_size = 50; }
748 else if (input_size < 50000) { sugg_groups = 6; sugg_sector_size = 40; }
749 else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; }
750 else { sugg_groups = 8; sugg_sector_size = 40; }
751 break;
752 case ADDR_SECTION:
753 if (input_size < 9000) { sugg_groups = 1; sugg_sector_size = 0; }
754 else if (input_size < 25000) { sugg_groups = 2; sugg_sector_size = 130; }
755 else if (input_size < 50000) { sugg_groups = 3; sugg_sector_size = 130; }
756 else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; }
757 else { sugg_groups = 7; sugg_sector_size = 130; }
758 break;
759 }
760
761 if (cfg_groups == 0)
762 {
763 cfg_groups = sugg_groups;
764 }
765
766 if (cfg_sector_size == 0)
767 {
768 cfg_sector_size = sugg_sector_size;
769 }
770 }
771
772 if (cfg_groups != 1 && cfg_sector_size == 0)
773 {
774 switch (cfg->data_type)
775 {
776 case DATA_SECTION:
777 cfg_sector_size = 20;
778 break;
779 case INST_SECTION:
780 cfg_sector_size = 50;
781 break;
782 case ADDR_SECTION:
783 cfg_sector_size = 130;
784 break;
785 }
786 }
787
788 (*ret_groups) = cfg_groups;
789 (*ret_sector_size) = cfg_sector_size;
790
791 XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS);
792 XD3_ASSERT (cfg_groups == 1 || (cfg_sector_size >= DJW_SECTORSZ_MULT && cfg_sector_size <= DJW_SECTORSZ_MAX));
793
794 return 0;
795}
796
797static int
798xd3_real_encode_huff (xd3_stream *stream,
799 djw_stream *h,
800 xd3_output *input,
801 xd3_output *output,
802 xd3_sec_cfg *cfg)
803{
804 int ret;
805 usize_t groups, sector_size;
806 bit_state bstate = BIT_STATE_ENCODE_INIT;
807 xd3_output *in;
808 int encode_bits;
809 usize_t input_bits;
810 usize_t input_bytes;
811 usize_t initial_offset = output->next;
812 djw_weight real_freq[ALPHABET_SIZE];
813 uint8_t *gbest = NULL; /* Dynamic allocations: could put these in djw_stream. */
814 uint8_t *gbest_mtf = NULL;
815
816 input_bytes = djw_count_freqs (real_freq, input);
817 input_bits = input_bytes * 8;
818
819 XD3_ASSERT (input_bytes > 0);
820
821 if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes, & groups, & sector_size)))
822 {
823 return ret;
824 }
825
826 if (0)
827 {
828 regroup:
829 /* Sometimes we dynamically decide there are too many groups. Arrive here. */
830 output->next = initial_offset;
831 xd3_bit_state_encode_init (& bstate);
832 }
833
834 /* Encode: # of groups (3 bits) */
835 if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GROUP_BITS, groups-1))) { goto failure; }
836
837 if (groups == 1)
838 {
839 /* Single Huffman group. */
840 uint code[ALPHABET_SIZE]; /* Codes */
841 IF_TUNE (uint8_t *clen = tune_clen[0];)
842 IF_NTUNE (uint8_t clen[ALPHABET_SIZE];)
843 uint8_t prefix_mtfsym[ALPHABET_SIZE];
844 djw_prefix prefix;
845
846 encode_bits =
847 djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
848 djw_build_codes (code, clen, ALPHABET_SIZE DEBUG_ARG (DJW_MAX_CODELEN));
849
850 if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
851
852 /* Encode: prefix */
853 prefix.mtfsym = prefix_mtfsym;
854 prefix.symbol = clen;
855 prefix.scount = ALPHABET_SIZE;
856
857 if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; }
858
859 if (encode_bits + (8 * output->next) + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
860
861 IF_TUNE (tune_prefix_bits = xd3_bitsof_output (output, & bstate));
862 IF_TUNE (tune_select_bits = 0);
863 IF_TUNE (tune_encode_bits = encode_bits);
864
865 /* Encode: data */
866 for (in = input; in; in = in->next_page)
867 {
868 const uint8_t *p = in->base;
869 const uint8_t *p_max = p + in->next;
870
871 do
872 {
873 usize_t sym = *p++;
874 usize_t bits = clen[sym];
875
876 IF_DEBUG (encode_bits -= bits);
877
878 if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code[sym]))) { goto failure; }
879 }
880 while (p < p_max);
881 }
882
883 XD3_ASSERT (encode_bits == 0);
884 }
885 else
886 {
887 /* DJW Huffman */
888 djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE];
889#if TUNE_HUFFMAN == 0
890 uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];
891#else
892#define evolve_clen tune_clen
893#endif
894 djw_weight left = input_bytes;
895 int gp;
896 int niter = 0;
897 usize_t select_bits;
898 usize_t sym1 = 0, sym2 = 0, s;
899 usize_t gcost[DJW_MAX_GROUPS];
900 uint gbest_code[DJW_MAX_GROUPS+1];
901 uint8_t gbest_clen[DJW_MAX_GROUPS+1];
902 usize_t gbest_max = 1 + (input_bytes - 1) / sector_size;
903 int best_bits = 0;
904 usize_t gbest_no;
905 usize_t gpcnt;
906 const uint8_t *p;
907 IF_DEBUG1 (usize_t gcount[DJW_MAX_GROUPS]);
908
909 /* Encode: sector size (5 bits) */
910 if ((ret = xd3_encode_bits (stream, & output, & bstate,
911 DJW_SECTORSZ_BITS, (sector_size/DJW_SECTORSZ_MULT)-1))) { goto failure; }
912
913 /* Dynamic allocation. */
914 if (gbest == NULL) { gbest = xd3_alloc (stream, gbest_max, 1); }
915 if (gbest_mtf == NULL) { gbest_mtf = xd3_alloc (stream, gbest_max, 1); }
916
917 /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */
918
919 /* Generate initial code length tables. */
920 for (gp = 0; gp < groups; gp += 1)
921 {
922 djw_weight sum = 0;
923 djw_weight goal = left / (groups - gp);
924
925 IF_DEBUG1 (usize_t nz = 0);
926
927 /* Due to the single-code granularity of this distribution, it may be that we
928 * can't generate a distribution for each group. In that case subtract one
929 * gropu and try again. If (inefficient), we're testing group behavior, so
930 * don't mess things up. */
931 if (goal == 0 && !cfg->inefficient)
932 {
933 IF_DEBUG1 (P(RINT "too many groups (%u), dropping one\n", groups));
934 groups -= 1;
935 goto regroup;
936 }
937
938 /* Sum == goal is possible when (cfg->inefficient)... */
939 while (sum < goal)
940 {
941 XD3_ASSERT (sym2 < ALPHABET_SIZE);
942 IF_DEBUG1 (nz += real_freq[sym2] != 0);
943 sum += real_freq[sym2++];
944 }
945
946 IF_DEBUG1(P(RINT "group %u has symbols %u..%u (%u non-zero) (%u/%u = %.3f)\n",
947 gp, sym1, sym2, nz, sum, input_bytes, sum / (double)input_bytes););
948
949 for (s = 0; s < ALPHABET_SIZE; s += 1)
950 {
951 evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16;
952 }
953
954 left -= sum;
955 sym1 = sym2+1;
956 }
957
958 repeat:
959
960 niter += 1;
961 gbest_no = 0;
962 memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups);
963 IF_DEBUG1 (memset (gcount, 0, sizeof (gcount[0]) * groups));
964
965 /* For each input page (loop is irregular to allow non-pow2-size group size. */
966 in = input;
967 p = in->base;
968
969 /* For each group-size sector. */
970 do
971 {
972 const uint8_t *p0 = p;
973 xd3_output *in0 = in;
974 usize_t best = 0;
975 usize_t winner = 0;
976
977 /* Select best group for each sector, update evolve_freq. */
978 memset (gcost, 0, sizeof (gcost[0]) * groups);
979
980 /* For each byte in sector. */
981 for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
982 {
983 /* For each group. */
984 for (gp = 0; gp < groups; gp += 1)
985 {
986 gcost[gp] += evolve_clen[gp][*p];
987 }
988
989 /* Check end-of-input-page. */
990# define GP_PAGE() \
991 if (++p - in->base == in->next) \
992 { \
993 in = in->next_page; \
994 if (in == NULL) { break; } \
995 p = in->base; \
996 }
997
998 GP_PAGE ();
999 }
1000
1001 /* Find min cost group for this sector */
1002 best = -1U;
1003 for (gp = 0; gp < groups; gp += 1)
1004 {
1005 if (gcost[gp] < best) { best = gcost[gp]; winner = gp; }
1006 }
1007
1008 gbest[gbest_no++] = winner;
1009 IF_DEBUG1 (gcount[winner] += 1);
1010
1011 p = p0;
1012 in = in0;
1013
1014 /* Update group frequencies. */
1015 for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
1016 {
1017 evolve_freq[winner][*p] += 1;
1018
1019 GP_PAGE ();
1020 }
1021 }
1022 while (in != NULL);
1023
1024 XD3_ASSERT (gbest_no == gbest_max);
1025
1026 /* Recompute code lengths. */
1027 encode_bits = 0;
1028 for (gp = 0; gp < groups; gp += 1)
1029 {
1030 int i;
1031 uint8_t evolve_zero[ALPHABET_SIZE];
1032 int any_zeros = 0;
1033
1034 memset (evolve_zero, 0, sizeof (evolve_zero));
1035
1036 /* Cannot allow a zero clen when the real frequency is non-zero. Note: this
1037 * means we are going to encode a fairly long code for these unused entries. An
1038 * improvement would be to implement a NOTUSED code for when these are actually
1039 * zero, but this requires another data structure (evolve_zero) since we don't
1040 * know when evolve_freq[i] == 0... Briefly tested, looked worse. */
1041 for (i = 0; i < ALPHABET_SIZE; i += 1)
1042 {
1043 if (evolve_freq[gp][i] == 0 && real_freq[i] != 0)
1044 {
1045 evolve_freq[gp][i] = 1;
1046 evolve_zero[i] = 1;
1047 any_zeros = 1;
1048 }
1049 }
1050
1051 encode_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp], ALPHABET_SIZE, DJW_MAX_CODELEN);
1052
1053 /* The above faking of frequencies does not matter for the last iteration, but
1054 * we don't know when that is yet. However, it also breaks the encode_bits
1055 * computation. Necessary for accuracy, and for the (encode_bits==0) assert
1056 * after all bits are output. */
1057 if (any_zeros)
1058 {
1059 IF_DEBUG1 (usize_t save_total = encode_bits);
1060
1061 for (i = 0; i < ALPHABET_SIZE; i += 1)
1062 {
1063 if (evolve_zero[i]) { encode_bits -= evolve_clen[gp][i]; }
1064 }
1065
1066 IF_DEBUG1 (P(RINT "evolve_zero reduced %u bits in group %u\n", save_total - encode_bits, gp));
1067 }
1068 }
1069
1070 IF_DEBUG1(
1071 P(RINT "pass %u total bits: %u group uses: ", niter, encode_bits);
1072 for (gp = 0; gp < groups; gp += 1) { P(RINT "%u ", gcount[gp]); }
1073 P(RINT "\n"););
1074
1075 /* End iteration. (The following assertion proved invalid.) */
1076 /*XD3_ASSERT (niter == 1 || best_bits >= encode_bits);*/
1077
1078 IF_DEBUG1 (if (niter > 1 && best_bits < encode_bits) {
1079 P(RINT "iteration lost %u bits\n", encode_bits - best_bits); });
1080
1081 if (niter == 1 || (niter < DJW_MAX_ITER && (best_bits - encode_bits) >= DJW_MIN_IMPROVEMENT))
1082 {
1083 best_bits = encode_bits;
1084 goto repeat;
1085 }
1086
1087 /* Efficiency check. */
1088 if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
1089
1090 IF_DEBUG1 (P(RINT "djw compression: %u -> %0.3f\n", input_bytes, encode_bits / 8.0));
1091
1092 /* Encode: prefix */
1093 {
1094 uint8_t prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE];
1095 uint8_t prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE];
1096 uint8_t prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE];
1097 djw_prefix prefix;
1098
1099 prefix.symbol = prefix_symbol;
1100 prefix.mtfsym = prefix_mtfsym;
1101 prefix.repcnt = prefix_repcnt;
1102
1103 djw_compute_multi_prefix (groups, evolve_clen, & prefix);
1104 if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; }
1105 }
1106
1107 /* Encode: selector frequencies */
1108 {
1109 djw_weight gbest_freq[DJW_MAX_GROUPS+1];
1110 djw_prefix gbest_prefix;
1111 usize_t i;
1112
1113 gbest_prefix.scount = gbest_no;
1114 gbest_prefix.symbol = gbest;
1115 gbest_prefix.mtfsym = gbest_mtf;
1116
1117 djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq);
1118
1119 select_bits =
1120 djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN);
1121 djw_build_codes (gbest_code, gbest_clen, groups+1 DEBUG_ARG (DJW_MAX_GBCLEN));
1122
1123 IF_TUNE (tune_prefix_bits = xd3_bitsof_output (output, & bstate));
1124 IF_TUNE (tune_select_bits = select_bits);
1125 IF_TUNE (tune_encode_bits = encode_bits);
1126
1127 for (i = 0; i < groups+1; i += 1)
1128 {
1129 if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GBCLEN_BITS, gbest_clen[i]))) { goto failure; }
1130 }
1131
1132 for (i = 0; i < gbest_prefix.mcount; i += 1)
1133 {
1134 usize_t gp_mtf = gbest_mtf[i];
1135 usize_t gp_sel_bits = gbest_clen[gp_mtf];
1136 usize_t gp_sel_code = gbest_code[gp_mtf];
1137
1138 XD3_ASSERT (gp_mtf < groups+1);
1139
1140 if ((ret = xd3_encode_bits (stream, & output, & bstate, gp_sel_bits, gp_sel_code))) { goto failure; }
1141
1142 IF_DEBUG (select_bits -= gp_sel_bits);
1143 }
1144
1145 XD3_ASSERT (select_bits == 0);
1146 }
1147
1148 /* Efficiency check. */
1149 if (encode_bits + select_bits + (8 * output->next) + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
1150
1151 /* Encode: data */
1152 {
1153 uint evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE];
1154 usize_t sector = 0;
1155
1156 /* Build code tables for each group. */
1157 for (gp = 0; gp < groups; gp += 1)
1158 {
1159 djw_build_codes (evolve_code[gp], evolve_clen[gp], ALPHABET_SIZE DEBUG_ARG (DJW_MAX_CODELEN));
1160 }
1161
1162 /* Now loop over the input. */
1163 in = input;
1164 p = in->base;
1165
1166 do
1167 {
1168 /* For each sector. */
1169 usize_t gp_best = gbest[sector];
1170 uint *gp_codes = evolve_code[gp_best];
1171 uint8_t *gp_clens = evolve_clen[gp_best];
1172
1173 XD3_ASSERT (sector < gbest_no);
1174
1175 sector += 1;
1176
1177 /* Encode the sector data. */
1178 for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
1179 {
1180 usize_t sym = *p;
1181 usize_t bits = gp_clens[sym];
1182 usize_t code = gp_codes[sym];
1183
1184 IF_DEBUG (encode_bits -= bits);
1185
1186 if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code))) { goto failure; }
1187
1188 GP_PAGE ();
1189 }
1190 }
1191 while (in != NULL);
1192
1193 XD3_ASSERT (select_bits == 0);
1194 XD3_ASSERT (encode_bits == 0);
1195
1196#undef evolve_clen
1197 }
1198 }
1199
1200 ret = xd3_flush_bits (stream, & output, & bstate);
1201
1202 if (0)
1203 {
1204 nosecond:
1205 stream->msg = "secondary compression was inefficient";
1206 ret = XD3_NOSECOND;
1207 }
1208
1209 failure:
1210
1211 xd3_free (stream, gbest);
1212 xd3_free (stream, gbest_mtf);
1213 return ret;
1214}
1215#endif /* XD3_ENCODER */
1216
1217/*********************************************************************/
1218/* DECODE */
1219/*********************************************************************/
1220
1221static void
1222djw_build_decoder (xd3_stream *stream,
1223 usize_t asize,
1224 usize_t abs_max,
1225 const uint8_t *clen,
1226 uint8_t *inorder,
1227 uint *base,
1228 uint *limit,
1229 uint *min_clenp,
1230 uint *max_clenp)
1231{
1232 int i, l;
1233 const uint8_t *ci;
1234 uint nr_clen [DJW_MAX_CODELEN+1];
1235 uint tmp_base[DJW_MAX_CODELEN+1];
1236 int min_clen;
1237 int max_clen;
1238
1239 /* Assumption: the two temporary arrays are large enough to hold abs_max. */
1240 XD3_ASSERT (abs_max <= DJW_MAX_CODELEN);
1241
1242 /* This looks something like the start of zlib's inftrees.c */
1243 memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1));
1244
1245 /* Count number of each code length */
1246 i = asize;
1247 ci = clen;
1248 do
1249 {
1250 /* Caller _must_ check that values are in-range. Most of the time
1251 * the caller decodes a specific number of bits, which imply the max value, and the
1252 * other time the caller decodes a huffman value, which must be in-range. Therefore,
1253 * its an assertion and this function cannot otherwise fail. */
1254 XD3_ASSERT (*ci <= abs_max);
1255
1256 nr_clen[*ci++]++;
1257 }
1258 while (--i != 0);
1259
1260 /* Compute min, max. */
1261 for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } }
1262 min_clen = i;
1263 for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } }
1264 max_clen = i;
1265
1266 /* Fill the BASE, LIMIT table. */
1267 tmp_base[min_clen] = 0;
1268 base[min_clen] = 0;
1269 limit[min_clen] = nr_clen[min_clen] - 1;
1270 for (i = min_clen + 1; i <= max_clen; i += 1)
1271 {
1272 uint last_limit = ((limit[i-1] + 1) << 1);
1273 tmp_base[i] = tmp_base[i-1] + nr_clen[i-1];
1274 limit[i] = last_limit + nr_clen[i] - 1;
1275 base[i] = last_limit - tmp_base[i];
1276 }
1277
1278 /* Fill the inorder array, canonically ordered codes. */
1279 ci = clen;
1280 for (i = 0; i < asize; i += 1)
1281 {
1282 if ((l = *ci++) != 0)
1283 {
1284 inorder[tmp_base[l]++] = i;
1285 }
1286 }
1287
1288 *min_clenp = min_clen;
1289 *max_clenp = max_clen;
1290}
1291
1292static INLINE int
1293djw_decode_symbol (xd3_stream *stream,
1294 bit_state *bstate,
1295 const uint8_t **input,
1296 const uint8_t *input_end,
1297 const uint8_t *inorder,
1298 const uint *base,
1299 const uint *limit,
1300 uint min_clen,
1301 uint max_clen,
1302 usize_t *sym,
1303 usize_t max_sym)
1304{
1305 usize_t code = 0;
1306 usize_t bits = 0;
1307
1308 /* OPT: Supposedly a small lookup table improves speed here... */
1309
1310 /* Code outline is similar to xd3_decode_bits... */
1311 if (bstate->cur_mask == 0x100) { goto next_byte; }
1312
1313 for (;;)
1314 {
1315 do
1316 {
1317 if (bits == max_clen) { goto corrupt; }
1318
1319 bits += 1;
1320 code = (code << 1);
1321
1322 if (bstate->cur_byte & bstate->cur_mask) { code |= 1; }
1323
1324 IF_DEBUG1 (P(RINT "%u", (bstate->cur_byte & bstate->cur_mask) && 1));
1325
1326 bstate->cur_mask <<= 1;
1327
1328 if (bits >= min_clen && code <= limit[bits]) { goto done; }
1329 }
1330 while (bstate->cur_mask != 0x100);
1331
1332 next_byte:
1333
1334 if (*input == input_end)
1335 {
1336 stream->msg = "secondary decoder end of input";
1337 return EINVAL;
1338 }
1339
1340 bstate->cur_byte = *(*input)++;
1341 bstate->cur_mask = 1;
1342 }
1343
1344 done:
1345
1346 if (base[bits] <= code)
1347 {
1348 usize_t offset = code - base[bits];
1349
1350 if (offset <= max_sym)
1351 {
1352 IF_DEBUG1 (P(RINT " (%u) ", bits));
1353 *sym = inorder[offset];
1354 return 0;
1355 }
1356 }
1357
1358 corrupt:
1359 stream->msg = "secondary decoder invalid code";
1360 return EINVAL;
1361}
1362
1363static int
1364djw_decode_clclen (xd3_stream *stream,
1365 bit_state *bstate,
1366 const uint8_t **input,
1367 const uint8_t *input_end,
1368 uint8_t *cl_inorder,
1369 uint *cl_base,
1370 uint *cl_limit,
1371 uint *cl_minlen,
1372 uint *cl_maxlen,
1373 uint8_t *cl_mtf)
1374{
1375 int ret;
1376 uint8_t cl_clen[DJW_TOTAL_CODES];
1377 usize_t num_codes, value;
1378 int i;
1379
1380 /* How many extra code lengths to encode. */
1381 if ((ret = xd3_decode_bits (stream, bstate, input, input_end, DJW_EXTRA_CODE_BITS, & num_codes))) { return ret; }
1382
1383 num_codes += DJW_EXTRA_12OFFSET;
1384
1385 /* Read num_codes. */
1386 for (i = 0; i < num_codes; i += 1)
1387 {
1388 if ((ret = xd3_decode_bits (stream, bstate, input, input_end, DJW_CLCLEN_BITS, & value))) { return ret; }
1389
1390 cl_clen[i] = value;
1391 }
1392
1393 /* Set the rest to zero. */
1394 for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; }
1395
1396 /* No need to check for in-range clen values, because: */
1397 XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1);
1398
1399 /* Build the code-length decoder. */
1400 djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN,
1401 cl_clen, cl_inorder, cl_base, cl_limit, cl_minlen, cl_maxlen);
1402
1403 /* Initialize the MTF state. */
1404 djw_init_clen_mtf_1_2 (cl_mtf);
1405
1406 return 0;
1407}
1408
1409static INLINE int
1410djw_decode_1_2 (xd3_stream *stream,
1411 bit_state *bstate,
1412 const uint8_t **input,
1413 const uint8_t *input_end,
1414 const uint8_t *inorder,
1415 const uint *base,
1416 const uint *limit,
1417 const uint *minlen,
1418 const uint *maxlen,
1419 uint8_t *mtfvals,
1420 usize_t elts,
1421 usize_t skip_offset,
1422 uint8_t *values)
1423{
1424 usize_t n = 0, rep = 0, mtf = 0, s = 0;
1425 int ret;
1426
1427 while (n < elts)
1428 {
1429 /* Special case inside generic code: CLEN only: If not the first group, we already
1430 * know the zero frequencies. */
1431 if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0)
1432 {
1433 values[n++] = 0;
1434 continue;
1435 }
1436
1437 /* Repeat last symbol. */
1438 if (rep != 0)
1439 {
1440 values[n++] = mtfvals[0];
1441 rep -= 1;
1442 continue;
1443 }
1444
1445 /* Symbol following last repeat code. */
1446 if (mtf != 0)
1447 {
1448 usize_t sym = djw_update_mtf (mtfvals, mtf);
1449 values[n++] = sym;
1450 mtf = 0;
1451 continue;
1452 }
1453
1454 /* Decode next symbol/repeat code. */
1455 if ((ret = djw_decode_symbol (stream, bstate, input, input_end,
1456 inorder, base, limit, *minlen, *maxlen,
1457 & mtf, DJW_TOTAL_CODES))) { return ret; }
1458
1459 if (mtf <= RUN_1)
1460 {
1461 /* Repetition. */
1462 rep = ((mtf + 1) << s);
1463 mtf = 0;
1464 s += 1;
1465 }
1466 else
1467 {
1468 /* Remove the RUN_1 MTF offset. */
1469 mtf -= 1;
1470 s = 0;
1471 }
1472 }
1473
1474 /* If (rep != 0) there were too many codes received. */
1475 if (rep != 0)
1476 {
1477 stream->msg = "secondary decoder invalid repeat code";
1478 return EINVAL;
1479 }
1480
1481 return 0;
1482}
1483
1484static INLINE int
1485djw_decode_prefix (xd3_stream *stream,
1486 bit_state *bstate,
1487 const uint8_t **input,
1488 const uint8_t *input_end,
1489 const uint8_t *cl_inorder,
1490 const uint *cl_base,
1491 const uint *cl_limit,
1492 const uint *cl_minlen,
1493 const uint *cl_maxlen,
1494 uint8_t *cl_mtf,
1495 usize_t groups,
1496 uint8_t *clen)
1497{
1498 return djw_decode_1_2 (stream, bstate, input, input_end,
1499 cl_inorder, cl_base, cl_limit, cl_minlen, cl_maxlen, cl_mtf,
1500 ALPHABET_SIZE * groups, ALPHABET_SIZE, clen);
1501}
1502
1503static int
1504xd3_decode_huff (xd3_stream *stream,
1505 djw_stream *h,
1506 const uint8_t **input_pos,
1507 const uint8_t *const input_end,
1508 uint8_t **output_pos,
1509 const uint8_t *const output_end)
1510{
1511 const uint8_t *input = *input_pos;
1512 uint8_t *output = *output_pos;
1513 bit_state bstate = BIT_STATE_DECODE_INIT;
1514 uint8_t *sel_group = NULL;
1515 usize_t groups, gp;
1516 usize_t output_bytes = (output_end - output);
1517 usize_t sector_size;
1518 usize_t sectors;
1519 int ret;
1520
1521 /* Invalid input. */
1522 if (output_bytes == 0)
1523 {
1524 stream->msg = "secondary decoder invalid input";
1525 return EINVAL;
1526 }
1527
1528 /* Decode: number of groups */
1529 if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_GROUP_BITS, & groups))) { goto fail; }
1530
1531 groups += 1;
1532
1533 if (groups > 1)
1534 {
1535 /* Decode: group size */
1536 if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_SECTORSZ_BITS, & sector_size))) { goto fail; }
1537
1538 sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT;
1539 }
1540 else
1541 {
1542 /* Default for groups == 1 */
1543 sector_size = output_bytes;
1544 }
1545
1546 sectors = 1 + (output_bytes - 1) / sector_size;
1547
1548 /* @!@ In the case of groups==1, lots of extra stack space gets used here. Could
1549 * dynamically allocate this memory, which would help with excess parameter passing,
1550 * too. Passing too many parameters in this file, simplify it! */
1551
1552 /* Outer scope: per-group symbol decoder tables. */
1553 {
1554 uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE];
1555 uint base [DJW_MAX_GROUPS][DJW_MAX_CODELEN+1];
1556 uint limit [DJW_MAX_GROUPS][DJW_MAX_CODELEN+1];
1557 uint minlen [DJW_MAX_GROUPS];
1558 uint maxlen [DJW_MAX_GROUPS];
1559
1560 /* Nested scope: code length decoder tables. */
1561 {
1562 uint8_t clen [DJW_MAX_GROUPS][ALPHABET_SIZE];
1563 uint8_t cl_inorder[DJW_TOTAL_CODES];
1564 uint cl_base [DJW_MAX_CLCLEN+1];
1565 uint cl_limit [DJW_MAX_CLCLEN+1];
1566 uint8_t cl_mtf [DJW_TOTAL_CODES];
1567 uint cl_minlen;
1568 uint cl_maxlen;
1569
1570 /* Compute the code length decoder. */
1571 if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end,
1572 cl_inorder, cl_base, cl_limit, & cl_minlen,
1573 & cl_maxlen, cl_mtf))) { goto fail; }
1574
1575 /* Now decode each group decoder. */
1576 if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end,
1577 cl_inorder, cl_base, cl_limit,
1578 & cl_minlen, & cl_maxlen, cl_mtf,
1579 groups, clen[0]))) { goto fail; }
1580
1581 /* Prepare the actual decoding tables. */
1582 for (gp = 0; gp < groups; gp += 1)
1583 {
1584 djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN,
1585 clen[gp], inorder[gp], base[gp], limit[gp],
1586 & minlen[gp], & maxlen[gp]);
1587 }
1588 }
1589
1590 /* Decode: selector clens. */
1591 {
1592 uint8_t sel_inorder[DJW_MAX_GROUPS+1];
1593 uint sel_base [DJW_MAX_GBCLEN+1];
1594 uint sel_limit [DJW_MAX_GBCLEN+1];
1595 uint8_t sel_mtf [DJW_MAX_GROUPS+1];
1596 uint sel_minlen;
1597 uint sel_maxlen;
1598
1599 /* Setup group selection. */
1600 if (groups > 1)
1601 {
1602 uint8_t sel_clen[DJW_MAX_GROUPS+1];
1603
1604 for (gp = 0; gp < groups+1; gp += 1)
1605 {
1606 usize_t value;
1607
1608 if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_GBCLEN_BITS, & value))) { goto fail; }
1609
1610 sel_clen[gp] = value;
1611 sel_mtf[gp] = gp;
1612 }
1613
1614 if ((sel_group = xd3_alloc (stream, sectors, 1)) == NULL) { ret = ENOMEM; goto fail; }
1615
1616 djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen,
1617 sel_inorder, sel_base, sel_limit, & sel_minlen, & sel_maxlen);
1618
1619 if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end,
1620 sel_inorder, sel_base, sel_limit, & sel_minlen, & sel_maxlen, sel_mtf,
1621 sectors, 0, sel_group))) { goto fail; }
1622 }
1623
1624 /* Now decode each sector. */
1625 {
1626 uint8_t *gp_inorder = inorder[0]; /* Initialize for (groups==1) case. */
1627 uint *gp_base = base[0];
1628 uint *gp_limit = limit[0];
1629 uint gp_minlen = minlen[0];
1630 uint gp_maxlen = maxlen[0];
1631 usize_t c;
1632
1633 for (c = 0; c < sectors; c += 1)
1634 {
1635 usize_t n;
1636
1637 if (groups >= 2)
1638 {
1639 gp = sel_group[c];
1640
1641 XD3_ASSERT (gp < groups);
1642
1643 gp_inorder = inorder[gp];
1644 gp_base = base[gp];
1645 gp_limit = limit[gp];
1646 gp_minlen = minlen[gp];
1647 gp_maxlen = maxlen[gp];
1648 }
1649
1650 XD3_ASSERT (output_end - output > 0);
1651
1652 /* Decode next sector. */
1653 n = min (sector_size, (usize_t) (output_end - output));
1654
1655 do
1656 {
1657 usize_t sym;
1658
1659 if ((ret = djw_decode_symbol (stream, & bstate, & input, input_end,
1660 gp_inorder, gp_base, gp_limit, gp_minlen, gp_maxlen,
1661 & sym, ALPHABET_SIZE))) { goto fail; }
1662
1663 *output++ = sym;
1664 }
1665 while (--n);
1666 }
1667 }
1668 }
1669 }
1670
1671 IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate))) { goto fail; });
1672 XD3_ASSERT (ret == 0);
1673
1674 fail:
1675 xd3_free (stream, sel_group);
1676
1677 (*input_pos) = input;
1678 (*output_pos) = output;
1679 return ret;
1680}
1681
1682/*********************************************************************/
1683/* TUNING */
1684/*********************************************************************/
1685
1686#if TUNE_HUFFMAN && XD3_ENCODER
1687#include <stdio.h>
1688#include "xdelta3-fgk.h"
1689
1690static uint
1691xd3_bitsof_output (xd3_output *output, bit_state *bstate)
1692{
1693 uint x = 0;
1694 uint m = bstate->cur_mask;
1695
1696 while (m != 1)
1697 {
1698 x += 1;
1699 m >>= 1;
1700 }
1701
1702 return x + 8 * xd3_sizeof_output (output);
1703}
1704
1705static const char* xd3_sect_type (xd3_section_type type)
1706{
1707 switch (type)
1708 {
1709 case DATA_SECTION: return "DATA";
1710 case INST_SECTION: return "INST";
1711 case ADDR_SECTION: return "ADDR";
1712 }
1713 abort ();
1714}
1715
1716static int
1717xd3_encode_huff (xd3_stream *stream,
1718 djw_stream *h,
1719 xd3_output *input,
1720 xd3_output *unused_output,
1721 xd3_sec_cfg *cfg)
1722{
1723 int ret = 0;
1724 int input_size = xd3_sizeof_output (input);
1725 static int hdr = 0;
1726 const char *sect_type = xd3_sect_type (cfg->data_type);
1727 xd3_output *output;
1728 usize_t output_size;
1729
1730 if (hdr == 0) { hdr = 1; P(RINT "____ SECT INSZ SECTORSZ GPNO OUTSZ PREFIX SELECT ENCODE\n"); }
1731
1732 P(RINT "SECTION %s %u\n", sect_type, input_size);
1733
1734 {
1735 int gp, i;
1736 int best_size = 99999999;
1737 usize_t best_prefix = 0, best_select = 0, best_encode = 0, best_sector_size = 0;
1738 int best_gpno = -1;
1739 const char *t12 = "12";
1740 usize_t clen_count[DJW_MAX_CODELEN+1];
1741 djw_weight best_freq[DJW_TOTAL_CODES];
1742
1743 for (cfg->ngroups = 1; cfg->ngroups <= /*1*/ DJW_MAX_GROUPS; cfg->ngroups += 1)
1744 {
1745 for (cfg->sector_size = 10; cfg->sector_size <= DJW_SECTORSZ_MAX; cfg->sector_size += 10)
1746 {
1747 output = xd3_alloc_output (stream, NULL);
1748
1749 if ((ret = xd3_real_encode_huff (stream, h, input, output, cfg))) { goto fail; }
1750
1751 output_size = xd3_sizeof_output (output);
1752
1753 if (output_size < best_size)
1754 {
1755 best_size = output_size;
1756 best_gpno = cfg->ngroups;
1757 best_prefix = tune_prefix_bits;
1758 best_select = tune_select_bits;
1759 best_encode = tune_encode_bits;
1760 best_sector_size = cfg->sector_size;
1761 memset (clen_count, 0, sizeof (clen_count));
1762
1763 for (gp = 0; gp < cfg->ngroups; gp += 1)
1764 {
1765 for (i = 0; i < ALPHABET_SIZE; i += 1)
1766 {
1767 clen_count[tune_clen[gp][i]] += 1;
1768 }
1769 }
1770
1771 memcpy (best_freq, tune_freq, sizeof (tune_freq));
1772
1773 XD3_ASSERT (sizeof (tune_freq) == sizeof (mtf_freq));
1774 }
1775
1776 if (1)
1777 {
1778 P(RINT "COMP%s %u %u %u %u %u %u\n",
1779 t12, cfg->ngroups, cfg->sector_size,
1780 output_size, tune_prefix_bits, tune_select_bits, tune_encode_bits);
1781 }
1782 else
1783 {
1784 fail:
1785 P(RINT "COMP%s %u %u %u %u %u %u\n",
1786 t12, cfg->ngroups, cfg->sector_size,
1787 input_size, 0, 0, 0);
1788 }
1789
1790 xd3_free_output (stream, output);
1791
1792 XD3_ASSERT (ret == 0 || ret == XD3_NOSECOND);
1793
1794 if (cfg->ngroups == 1) { break; }
1795 }
1796 }
1797
1798 if (best_gpno > 0)
1799 {
1800 P(RINT "BEST%s %u %u %u %u %u %u\n",
1801 t12, best_gpno, best_sector_size,
1802 best_size, best_prefix, best_select, best_encode);
1803
1804#if 0
1805 P(RINT "CLEN%s ", t12);
1806 for (i = 1; i <= DJW_MAX_CODELEN; i += 1)
1807 {
1808 P(RINT "%u ", clen_count[i]);
1809 }
1810 P(RINT "\n");
1811
1812 P(RINT "FREQ%s ", t12);
1813 for (i = 0; i < DJW_TOTAL_CODES; i += 1)
1814 {
1815 P(RINT "%u ", tune_freq[i]);
1816 }
1817 P(RINT "\n");
1818#endif
1819 }
1820 }
1821
1822 /* Compare to split single-table windows. */
1823 {
1824 int parts, i;
1825
1826 cfg->ngroups = 1;
1827
1828 for (parts = 2; parts <= DJW_MAX_GROUPS; parts += 1)
1829 {
1830 usize_t part_size = input_size / parts;
1831 xd3_output *inp = input, *partin, *partin_head;
1832 usize_t off = 0;
1833 usize_t part_total = 0;
1834
1835 if (part_size < 1000) { break; }
1836
1837 for (i = 0; i < parts; i += 1)
1838 {
1839 usize_t inc;
1840
1841 partin = partin_head = xd3_alloc_output (stream, NULL);
1842 output = xd3_alloc_output (stream, NULL);
1843
1844 for (inc = 0; ((i < parts-1) && inc < part_size) ||
1845 ((i == parts-1) && inp != NULL); )
1846 {
1847 usize_t take;
1848
1849 if (i < parts-1)
1850 {
1851 take = min (part_size - inc, inp->next - off);
1852 }
1853 else
1854 {
1855 take = inp->next - off;
1856 }
1857
1858 ret = xd3_emit_bytes (stream, & partin, inp->base + off, take);
1859
1860 off += take;
1861 inc += take;
1862
1863 if (off == inp->next)
1864 {
1865 inp = inp->next_page;
1866 off = 0;
1867 }
1868 }
1869
1870 ret = xd3_real_encode_huff (stream, h, partin_head, output, cfg);
1871
1872 part_total += xd3_sizeof_output (output);
1873
1874 xd3_free_output (stream, partin_head);
1875 xd3_free_output (stream, output);
1876
1877 XD3_ASSERT (ret == 0 || ret == XD3_NOSECOND);
1878
1879 if (ret == XD3_NOSECOND)
1880 {
1881 break;
1882 }
1883 }
1884
1885 if (ret != XD3_NOSECOND)
1886 {
1887 P(RINT "PART %u %u\n", parts, part_total);
1888 }
1889 }
1890 }
1891
1892 /* Compare to FGK */
1893 {
1894 fgk_stream *fgk = fgk_alloc (stream);
1895
1896 fgk_init (fgk);
1897
1898 output = xd3_alloc_output (stream, NULL);
1899
1900 ret = xd3_encode_fgk (stream, fgk, input, output, NULL);
1901
1902 output_size = xd3_sizeof_output (output);
1903 xd3_free_output (stream, output);
1904 fgk_destroy (stream, fgk);
1905
1906 XD3_ASSERT (ret == 0);
1907
1908 P(RINT "FGK %u\n", output_size);
1909 }
1910
1911 P(RINT "END_SECTION %s %u\n", sect_type, input_size);
1912
1913 return 0;
1914}
1915#endif
1916
1917#endif
diff --git a/xdelta3/xdelta3-fgk.h b/xdelta3/xdelta3-fgk.h
new file mode 100755
index 0000000..a19d65c
--- /dev/null
+++ b/xdelta3/xdelta3-fgk.h
@@ -0,0 +1,851 @@
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2002 and onward. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19/* For demonstration purposes only.
20 */
21
22#ifndef _XDELTA3_FGK_h_
23#define _XDELTA3_FGK_h_
24
25/* An implementation of the FGK algorithm described by D.E. Knuth in "Dynamic Huffman
26 * Coding" in Journal of Algorithms 6. */
27
28/* A 32bit counter (fgk_weight) is used as the frequency counter for nodes in the huffman
29 * tree. @!@ Need to test for overflow and/or reset stats. */
30
31typedef struct _fgk_stream fgk_stream;
32typedef struct _fgk_node fgk_node;
33typedef struct _fgk_block fgk_block;
34typedef unsigned int fgk_bit;
35typedef uint32_t fgk_weight;
36
37struct _fgk_block {
38 union {
39 fgk_node *un_leader;
40 fgk_block *un_freeptr;
41 } un;
42};
43
44#define block_leader un.un_leader
45#define block_freeptr un.un_freeptr
46
47/* The code can also support fixed huffman encoding/decoding. */
48#define IS_ADAPTIVE 1
49
50/* weight is a count of the number of times this element has been seen in the current
51 * encoding/decoding. parent, right_child, and left_child are pointers defining the tree
52 * structure. right and left point to neighbors in an ordered sequence of
53 * weights. The left child of a node is always guaranteed to have weight not greater than
54 * its sibling. fgk_blockLeader points to the element with the same weight as itself which is
55 * closest to the next increasing weight block. */
56struct _fgk_node
57{
58 fgk_weight weight;
59 fgk_node *parent;
60 fgk_node *left_child;
61 fgk_node *right_child;
62 fgk_node *left;
63 fgk_node *right;
64 fgk_block *my_block;
65};
66
67/* alphabet_size is the a count of the number of possible leaves in the huffman tree. The
68 * number of total nodes counting internal nodes is ((2 * alphabet_size) - 1).
69 * zero_freq_count is the number of elements remaining which have zero frequency.
70 * zero_freq_exp and zero_freq_rem satisfy the equation zero_freq_count = 2^zero_freq_exp +
71 * zero_freq_rem. root_node is the root of the tree, which is initialized to a node with
72 * zero frequency and contains the 0th such element. free_node contains a pointer to the
73 * next available fgk_node space. alphabet contains all the elements and is indexed by N.
74 * remaining_zeros points to the head of the list of zeros. */
75struct _fgk_stream
76{
77 int alphabet_size;
78 int zero_freq_count;
79 int zero_freq_exp;
80 int zero_freq_rem;
81 int coded_depth;
82
83 int total_nodes;
84 int total_blocks;
85
86 fgk_bit *coded_bits;
87
88 fgk_block *block_array;
89 fgk_block *free_block;
90
91 fgk_node *decode_ptr;
92 fgk_node *remaining_zeros;
93 fgk_node *alphabet;
94 fgk_node *root_node;
95 fgk_node *free_node;
96};
97
98/*********************************************************************/
99/* Encoder */
100/*********************************************************************/
101
102static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size */);
103static void fgk_init (fgk_stream *h);
104static int fgk_encode_data (fgk_stream *h,
105 int n);
106static INLINE fgk_bit fgk_get_encoded_bit (fgk_stream *h);
107
108static int xd3_encode_fgk (xd3_stream *stream,
109 fgk_stream *sec_stream,
110 xd3_output *input,
111 xd3_output *output,
112 xd3_sec_cfg *cfg);
113
114/*********************************************************************/
115/* Decoder */
116/*********************************************************************/
117
118static INLINE int fgk_decode_bit (fgk_stream *h,
119 fgk_bit b);
120static int fgk_decode_data (fgk_stream *h);
121static void fgk_destroy (xd3_stream *stream,
122 fgk_stream *h);
123
124static int xd3_decode_fgk (xd3_stream *stream,
125 fgk_stream *sec_stream,
126 const uint8_t **input,
127 const uint8_t *const input_end,
128 uint8_t **output,
129 const uint8_t *const output_end);
130
131/*********************************************************************/
132/* Private */
133/*********************************************************************/
134
135static unsigned int fgk_find_nth_zero (fgk_stream *h, int n);
136static int fgk_nth_zero (fgk_stream *h, int n);
137static void fgk_update_tree (fgk_stream *h, int n);
138static fgk_node* fgk_increase_zero_weight (fgk_stream *h, int n);
139static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node);
140static void fgk_move_right (fgk_stream *h, fgk_node *node);
141static void fgk_promote (fgk_stream *h, fgk_node *node);
142static void fgk_init_node (fgk_node *node, int i, int size);
143static fgk_block* fgk_make_block (fgk_stream *h, fgk_node *l);
144static void fgk_free_block (fgk_stream *h, fgk_block *b);
145static void fgk_factor_remaining (fgk_stream *h);
146static INLINE void fgk_swap_ptrs (fgk_node **one, fgk_node **two);
147
148/*********************************************************************/
149/* Basic Routines */
150/*********************************************************************/
151
152/* returns an initialized huffman encoder for an alphabet with the
153 * given size. returns NULL if enough memory cannot be allocated */
154static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size0 */)
155{
156 int alphabet_size0 = ALPHABET_SIZE;
157 fgk_stream *h;
158
159 if ((h = (fgk_stream*) xd3_alloc (stream, 1, sizeof (fgk_stream))) == NULL)
160 {
161 return NULL;
162 }
163
164 h->total_nodes = (2 * alphabet_size0) - 1;
165 h->total_blocks = (2 * h->total_nodes);
166 h->alphabet = (fgk_node*) xd3_alloc (stream, h->total_nodes, sizeof (fgk_node));
167 h->block_array = (fgk_block*) xd3_alloc (stream, h->total_blocks, sizeof (fgk_block));
168 h->coded_bits = (fgk_bit*) xd3_alloc (stream, alphabet_size0, sizeof (fgk_bit));
169
170 if (h->coded_bits == NULL ||
171 h->alphabet == NULL ||
172 h->block_array == NULL)
173 {
174 fgk_destroy (stream, h);
175 return NULL;
176 }
177
178 h->alphabet_size = alphabet_size0;
179
180 return h;
181}
182
183static void fgk_init (fgk_stream *h)
184{
185 int i;
186
187 h->root_node = h->alphabet;
188 h->decode_ptr = h->root_node;
189 h->free_node = h->alphabet + h->alphabet_size;
190 h->remaining_zeros = h->alphabet;
191 h->coded_depth = 0;
192 h->zero_freq_count = h->alphabet_size + 2;
193
194 /* after two calls to factor_remaining, zero_freq_count == alphabet_size */
195 fgk_factor_remaining(h); /* set ZFE and ZFR */
196 fgk_factor_remaining(h); /* set ZFDB according to prev state */
197
198 IF_DEBUG (memset (h->alphabet, 0, sizeof (h->alphabet[0]) * h->total_nodes));
199
200 for (i = 0; i < h->total_blocks-1; i += 1)
201 {
202 h->block_array[i].block_freeptr = &h->block_array[i + 1];
203 }
204
205 h->block_array[h->total_blocks - 1].block_freeptr = NULL;
206 h->free_block = h->block_array;
207
208 /* Zero frequency nodes are inserted in the first alphabet_size
209 * positions, with Value, weight, and a pointer to the next zero
210 * frequency node. */
211 for (i = h->alphabet_size - 1; i >= 0; i -= 1)
212 {
213 fgk_init_node (h->alphabet + i, i, h->alphabet_size);
214 }
215}
216
217static void fgk_swap_ptrs(fgk_node **one, fgk_node **two)
218{
219 fgk_node *tmp = *one;
220 *one = *two;
221 *two = tmp;
222}
223
224/* Takes huffman transmitter h and n, the nth elt in the alphabet, and
225 * returns the number of required to encode n. */
226static int fgk_encode_data (fgk_stream* h, int n)
227{
228 fgk_node *target_ptr = h->alphabet + n;
229
230 XD3_ASSERT (n < h->alphabet_size);
231
232 h->coded_depth = 0;
233
234 /* First encode the binary representation of the nth remaining
235 * zero frequency element in reverse such that bit, which will be
236 * encoded from h->coded_depth down to 0 will arrive in increasing
237 * order following the tree path. If there is only one left, it
238 * is not neccesary to encode these bits. */
239 if (IS_ADAPTIVE && target_ptr->weight == 0)
240 {
241 unsigned int where, shift;
242 int bits;
243
244 where = fgk_find_nth_zero(h, n);
245 shift = 1;
246
247 if (h->zero_freq_rem == 0)
248 {
249 bits = h->zero_freq_exp;
250 }
251 else
252 {
253 bits = h->zero_freq_exp + 1;
254 }
255
256 while (bits > 0)
257 {
258 h->coded_bits[h->coded_depth++] = (shift & where) && 1;
259
260 bits -= 1;
261 shift <<= 1;
262 };
263
264 target_ptr = h->remaining_zeros;
265 }
266
267 /* The path from root to node is filled into coded_bits in reverse so
268 * that it is encoded in the right order */
269 while (target_ptr != h->root_node)
270 {
271 h->coded_bits[h->coded_depth++] = (target_ptr->parent->right_child == target_ptr);
272
273 target_ptr = target_ptr->parent;
274 }
275
276 if (IS_ADAPTIVE)
277 {
278 fgk_update_tree(h, n);
279 }
280
281 return h->coded_depth;
282}
283
284/* Should be called as many times as fgk_encode_data returns.
285 */
286static INLINE fgk_bit fgk_get_encoded_bit (fgk_stream *h)
287{
288 XD3_ASSERT (h->coded_depth > 0);
289
290 return h->coded_bits[--h->coded_depth];
291}
292
293/* This procedure updates the tree after alphabet[n] has been encoded
294 * or decoded.
295 */
296static void fgk_update_tree (fgk_stream *h, int n)
297{
298 fgk_node *incr_node;
299
300 if (h->alphabet[n].weight == 0)
301 {
302 incr_node = fgk_increase_zero_weight (h, n);
303 }
304 else
305 {
306 incr_node = h->alphabet + n;
307 }
308
309 while (incr_node != h->root_node)
310 {
311 fgk_move_right (h, incr_node);
312 fgk_promote (h, incr_node);
313 incr_node->weight += 1; /* incr the parent */
314 incr_node = incr_node->parent; /* repeat */
315 }
316
317 h->root_node->weight += 1;
318}
319
320static void fgk_move_right (fgk_stream *h, fgk_node *move_fwd)
321{
322 fgk_node **fwd_par_ptr, **back_par_ptr;
323 fgk_node *move_back, *tmp;
324
325 move_back = move_fwd->my_block->block_leader;
326
327 if (move_fwd == move_back ||
328 move_fwd->parent == move_back ||
329 move_fwd->weight == 0)
330 {
331 return;
332 }
333
334 move_back->right->left = move_fwd;
335
336 if (move_fwd->left)
337 {
338 move_fwd->left->right = move_back;
339 }
340
341 tmp = move_fwd->right;
342 move_fwd->right = move_back->right;
343
344 if (tmp == move_back)
345 {
346 move_back->right = move_fwd;
347 }
348 else
349 {
350 tmp->left = move_back;
351 move_back->right = tmp;
352 }
353
354 tmp = move_back->left;
355 move_back->left = move_fwd->left;
356
357 if (tmp == move_fwd)
358 {
359 move_fwd->left = move_back;
360 }
361 else
362 {
363 tmp->right = move_fwd;
364 move_fwd->left = tmp;
365 }
366
367 if (move_fwd->parent->right_child == move_fwd)
368 {
369 fwd_par_ptr = &move_fwd->parent->right_child;
370 }
371 else
372 {
373 fwd_par_ptr = &move_fwd->parent->left_child;
374 }
375
376 if (move_back->parent->right_child == move_back)
377 {
378 back_par_ptr = &move_back->parent->right_child;
379 }
380 else
381 {
382 back_par_ptr = &move_back->parent->left_child;
383 }
384
385 fgk_swap_ptrs (&move_fwd->parent, &move_back->parent);
386 fgk_swap_ptrs (fwd_par_ptr, back_par_ptr);
387
388 move_fwd->my_block->block_leader = move_fwd;
389}
390
391/* Shifts node, the leader of its block, into the next block. */
392static void fgk_promote (fgk_stream *h, fgk_node *node)
393{
394 fgk_node *my_left, *my_right;
395 fgk_block *cur_block;
396
397 my_right = node->right;
398 my_left = node->left;
399 cur_block = node->my_block;
400
401 if (node->weight == 0)
402 {
403 return;
404 }
405
406 /* if left is right child, parent of remaining zeros case (?), means parent
407 * has same weight as right child. */
408 if (my_left == node->right_child &&
409 node->left_child &&
410 node->left_child->weight == 0)
411 {
412 XD3_ASSERT (node->left_child == h->remaining_zeros);
413 XD3_ASSERT (node->right_child->weight == (node->weight+1)); /* child weight was already incremented */
414
415 if (node->weight == (my_right->weight - 1) && my_right != h->root_node)
416 {
417 fgk_free_block (h, cur_block);
418 node->my_block = my_right->my_block;
419 my_left->my_block = my_right->my_block;
420 }
421
422 return;
423 }
424
425 if (my_left == h->remaining_zeros)
426 {
427 return;
428 }
429
430 /* true if not the leftmost node */
431 if (my_left->my_block == cur_block)
432 {
433 my_left->my_block->block_leader = my_left;
434 }
435 else
436 {
437 fgk_free_block (h, cur_block);
438 }
439
440 /* node->parent != my_right */
441 if ((node->weight == (my_right->weight - 1)) && (my_right != h->root_node))
442 {
443 node->my_block = my_right->my_block;
444 }
445 else
446 {
447 node->my_block = fgk_make_block (h, node);
448 }
449}
450
451/* When an element is seen the first time this is called to remove it from the list of
452 * zero weight elements and introduce a new internal node to the tree. */
453static fgk_node* fgk_increase_zero_weight (fgk_stream *h, int n)
454{
455 fgk_node *this_zero, *new_internal, *zero_ptr;
456
457 this_zero = h->alphabet + n;
458
459 if (h->zero_freq_count == 1)
460 {
461 /* this is the last one */
462 this_zero->right_child = NULL;
463
464 if (this_zero->right->weight == 1)
465 {
466 this_zero->my_block = this_zero->right->my_block;
467 }
468 else
469 {
470 this_zero->my_block = fgk_make_block (h, this_zero);
471 }
472
473 h->remaining_zeros = NULL;
474
475 return this_zero;
476 }
477
478 zero_ptr = h->remaining_zeros;
479
480 new_internal = h->free_node++;
481
482 new_internal->parent = zero_ptr->parent;
483 new_internal->right = zero_ptr->right;
484 new_internal->weight = 0;
485 new_internal->right_child = this_zero;
486 new_internal->left = this_zero;
487
488 if (h->remaining_zeros == h->root_node)
489 {
490 /* This is the first element to be coded */
491 h->root_node = new_internal;
492 this_zero->my_block = fgk_make_block (h, this_zero);
493 new_internal->my_block = fgk_make_block (h, new_internal);
494 }
495 else
496 {
497 new_internal->right->left = new_internal;
498
499 if (zero_ptr->parent->right_child == zero_ptr)
500 {
501 zero_ptr->parent->right_child = new_internal;
502 }
503 else
504 {
505 zero_ptr->parent->left_child = new_internal;
506 }
507
508 if (new_internal->right->weight == 1)
509 {
510 new_internal->my_block = new_internal->right->my_block;
511 }
512 else
513 {
514 new_internal->my_block = fgk_make_block (h, new_internal);
515 }
516
517 this_zero->my_block = new_internal->my_block;
518 }
519
520 fgk_eliminate_zero (h, this_zero);
521
522 new_internal->left_child = h->remaining_zeros;
523
524 this_zero->right = new_internal;
525 this_zero->left = h->remaining_zeros;
526 this_zero->parent = new_internal;
527 this_zero->left_child = NULL;
528 this_zero->right_child = NULL;
529
530 h->remaining_zeros->parent = new_internal;
531 h->remaining_zeros->right = this_zero;
532
533 return this_zero;
534}
535
536/* When a zero frequency element is encoded, it is followed by the binary representation
537 * of the index into the remaining elements. Sets a cache to the element before it so
538 * that it can be removed without calling this procedure again. */
539static unsigned int fgk_find_nth_zero (fgk_stream* h, int n)
540{
541 fgk_node *target_ptr = h->alphabet + n;
542 fgk_node *head_ptr = h->remaining_zeros;
543 unsigned int idx = 0;
544
545 while (target_ptr != head_ptr)
546 {
547 head_ptr = head_ptr->right_child;
548 idx += 1;
549 }
550
551 return idx;
552}
553
554/* Splices node out of the list of zeros. */
555static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node)
556{
557 if (h->zero_freq_count == 1)
558 {
559 return;
560 }
561
562 fgk_factor_remaining(h);
563
564 if (node->left_child == NULL)
565 {
566 h->remaining_zeros = h->remaining_zeros->right_child;
567 h->remaining_zeros->left_child = NULL;
568 }
569 else if (node->right_child == NULL)
570 {
571 node->left_child->right_child = NULL;
572 }
573 else
574 {
575 node->right_child->left_child = node->left_child;
576 node->left_child->right_child = node->right_child;
577 }
578}
579
580static void fgk_init_node (fgk_node *node, int i, int size)
581{
582 if (i < size - 1)
583 {
584 node->right_child = node + 1;
585 }
586 else
587 {
588 node->right_child = NULL;
589 }
590
591 if (i >= 1)
592 {
593 node->left_child = node - 1;
594 }
595 else
596 {
597 node->left_child = NULL;
598 }
599
600 node->weight = 0;
601 node->parent = NULL;
602 node->right = NULL;
603 node->left = NULL;
604 node->my_block = NULL;
605}
606
607/* The data structure used is an array of blocks, which are unions of free pointers and
608 * huffnode pointers. free blocks are a linked list of free blocks, the front of which is
609 * h->free_block. The used blocks are pointers to the head of each block. */
610static fgk_block* fgk_make_block (fgk_stream *h, fgk_node* lead)
611{
612 fgk_block *ret = h->free_block;
613
614 XD3_ASSERT (h->free_block != NULL);
615
616 h->free_block = h->free_block->block_freeptr;
617
618 ret->block_leader = lead;
619
620 return ret;
621}
622
623/* Restores the block to the front of the free list. */
624static void fgk_free_block (fgk_stream *h, fgk_block *b)
625{
626 b->block_freeptr = h->free_block;
627 h->free_block = b;
628}
629
630/* sets zero_freq_count, zero_freq_rem, and zero_freq_exp to satsity the equation given
631 * above. */
632static void fgk_factor_remaining (fgk_stream *h)
633{
634 unsigned int i;
635
636 i = (--h->zero_freq_count);
637 h->zero_freq_exp = 0;
638
639 while (i > 1)
640 {
641 h->zero_freq_exp += 1;
642 i >>= 1;
643 }
644
645 i = 1 << h->zero_freq_exp;
646
647 h->zero_freq_rem = h->zero_freq_count - i;
648}
649
650/* receives a bit at a time and returns true when a complete code has
651 * been received.
652 */
653static int INLINE fgk_decode_bit (fgk_stream* h, fgk_bit b)
654{
655 XD3_ASSERT (b == 1 || b == 0);
656
657 if (IS_ADAPTIVE && h->decode_ptr->weight == 0)
658 {
659 int bitsreq;
660
661 if (h->zero_freq_rem == 0)
662 {
663 bitsreq = h->zero_freq_exp;
664 }
665 else
666 {
667 bitsreq = h->zero_freq_exp + 1;
668 }
669
670 h->coded_bits[h->coded_depth] = b;
671 h->coded_depth += 1;
672
673 return h->coded_depth >= bitsreq;
674 }
675 else
676 {
677 if (b)
678 {
679 h->decode_ptr = h->decode_ptr->right_child;
680 }
681 else
682 {
683 h->decode_ptr = h->decode_ptr->left_child;
684 }
685
686 if (h->decode_ptr->left_child == NULL)
687 {
688 /* If the weight is non-zero, finished. */
689 if (h->decode_ptr->weight != 0)
690 {
691 return 1;
692 }
693
694 /* zero_freq_count is dropping to 0, finished. */
695 return h->zero_freq_count == 1;
696 }
697 else
698 {
699 return 0;
700 }
701 }
702}
703
704static int fgk_nth_zero (fgk_stream* h, int n)
705{
706 fgk_node *ret = h->remaining_zeros;
707
708 /* ERROR: if during this loop (ret->right_child == NULL) then the encoder's zero count
709 * is too high. Could return an error code now, but is probably unnecessary overhead,
710 * since the caller should check integrity anyway. */
711 for (; n != 0 && ret->right_child != NULL; n -= 1)
712 {
713 ret = ret->right_child;
714 }
715
716 return ret - h->alphabet;
717}
718
719/* once fgk_decode_bit returns 1, this retrieves an index into the
720 * alphabet otherwise this returns 0, indicating more bits are
721 * required.
722 */
723static int fgk_decode_data (fgk_stream* h)
724{
725 unsigned int elt = h->decode_ptr - h->alphabet;
726
727 if (IS_ADAPTIVE && h->decode_ptr->weight == 0) {
728 int i;
729 unsigned int n = 0;
730
731 for (i = 0; i < h->coded_depth - 1; i += 1)
732 {
733 n |= h->coded_bits[i];
734 n <<= 1;
735 }
736
737 n |= h->coded_bits[i];
738 elt = fgk_nth_zero(h, n);
739 }
740
741 h->coded_depth = 0;
742
743 if (IS_ADAPTIVE)
744 {
745 fgk_update_tree(h, elt);
746 }
747
748 h->decode_ptr = h->root_node;
749
750 return elt;
751}
752
753static void fgk_destroy (xd3_stream *stream,
754 fgk_stream *h)
755{
756 if (h != NULL)
757 {
758 IF_DEBUG1({
759 int i;
760 for (i = 0; i < ALPHABET_SIZE; i += 1)
761 {
762 XP(OF, "freq[%u] = %u\n", i, h->alphabet[i].weight);
763 }
764 });
765
766 xd3_free (stream, h->alphabet);
767 xd3_free (stream, h->coded_bits);
768 xd3_free (stream, h->block_array);
769 xd3_free (stream, h);
770 }
771}
772
773/*********************************************************************/
774/* Xdelta */
775/*********************************************************************/
776
777static int
778xd3_encode_fgk (xd3_stream *stream, fgk_stream *sec_stream, xd3_output *input, xd3_output *output, xd3_sec_cfg *cfg)
779{
780 bit_state bstate = BIT_STATE_ENCODE_INIT;
781 xd3_output *cur_page;
782 int ret;
783
784 /* OPT: quit compression early if it looks bad */
785 for (cur_page = input; cur_page; cur_page = cur_page->next_page)
786 {
787 const uint8_t *inp = cur_page->base;
788 const uint8_t *inp_max = inp + cur_page->next;
789
790 while (inp < inp_max)
791 {
792 usize_t bits = fgk_encode_data (sec_stream, *inp++);
793
794 while (bits--)
795 {
796 if ((ret = xd3_encode_bit (stream, & output, & bstate, fgk_get_encoded_bit (sec_stream)))) { return ret; }
797 }
798 }
799 }
800
801 return xd3_flush_bits (stream, & output, & bstate);
802}
803
804static int
805xd3_decode_fgk (xd3_stream *stream,
806 fgk_stream *sec_stream,
807 const uint8_t **input_pos,
808 const uint8_t *const input_max,
809 uint8_t **output_pos,
810 const uint8_t *const output_max)
811{
812 bit_state bstate;
813 uint8_t *output = *output_pos;
814 const uint8_t *input = *input_pos;
815
816 for (;;)
817 {
818 if (input == input_max)
819 {
820 stream->msg = "secondary decoder end of input";
821 return EINVAL;
822 }
823
824 bstate.cur_byte = *input++;
825
826 for (bstate.cur_mask = 1; bstate.cur_mask != 0x100; bstate.cur_mask <<= 1)
827 {
828 int done = fgk_decode_bit (sec_stream, (bstate.cur_byte & bstate.cur_mask) && 1);
829
830 if (! done) { continue; }
831
832 *output++ = fgk_decode_data (sec_stream);
833
834 if (unlikely (output == output_max))
835 {
836 /* During regression testing: */
837 IF_REGRESSION ({
838 int ret;
839 bstate.cur_mask <<= 1;
840 if ((ret = xd3_test_clean_bits (stream, & bstate))) { return ret; }
841 });
842
843 (*output_pos) = output;
844 (*input_pos) = input;
845 return 0;
846 }
847 }
848 }
849}
850
851#endif /* _XDELTA3_FGK_ */
diff --git a/xdelta3/xdelta3-list.h b/xdelta3/xdelta3-list.h
new file mode 100755
index 0000000..64a2582
--- /dev/null
+++ b/xdelta3/xdelta3-list.h
@@ -0,0 +1,130 @@
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2002 and onward. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef __XDELTA3_LIST__
20#define __XDELTA3_LIST__
21
22#define XD3_MAKELIST(LTYPE,ETYPE,LNAME) \
23 \
24static inline ETYPE* \
25LTYPE ## _entry (LTYPE* l) \
26{ \
27 return (ETYPE*) ((char*) l - (unsigned long) &((ETYPE*) 0)->LNAME); \
28} \
29 \
30static inline void \
31LTYPE ## _init (LTYPE *l) \
32{ \
33 l->next = l; \
34 l->prev = l; \
35} \
36 \
37static inline void \
38LTYPE ## _add (LTYPE *prev, LTYPE *next, LTYPE *ins) \
39{ \
40 next->prev = ins; \
41 prev->next = ins; \
42 ins->next = next; \
43 ins->prev = prev; \
44} \
45 \
46static inline void \
47LTYPE ## _push_back (LTYPE *l, ETYPE *i) \
48{ \
49 LTYPE ## _add (l->prev, l, & i->LNAME); \
50} \
51 \
52static inline void \
53LTYPE ## _del (LTYPE *next, \
54 LTYPE *prev) \
55{ \
56 next->prev = prev; \
57 prev->next = next; \
58} \
59 \
60static inline ETYPE* \
61LTYPE ## _remove (ETYPE *f) \
62{ \
63 LTYPE *i = f->LNAME.next; \
64 LTYPE ## _del (f->LNAME.next, f->LNAME.prev); \
65 return LTYPE ## _entry (i); \
66} \
67 \
68static inline ETYPE* \
69LTYPE ## _pop_back (LTYPE *l) \
70{ \
71 LTYPE *i = l->prev; \
72 LTYPE ## _del (i->next, i->prev); \
73 return LTYPE ## _entry (i); \
74} \
75 \
76static inline ETYPE* \
77LTYPE ## _pop_front (LTYPE *l) \
78{ \
79 LTYPE *i = l->next; \
80 LTYPE ## _del (i->next, i->prev); \
81 return LTYPE ## _entry (i); \
82} \
83 \
84static inline int \
85LTYPE ## _empty (LTYPE *l) \
86{ \
87 return l == l->next; \
88} \
89 \
90static inline ETYPE* \
91LTYPE ## _front (LTYPE *f) \
92{ \
93 return LTYPE ## _entry (f->next); \
94} \
95 \
96static inline ETYPE* \
97LTYPE ## _back (LTYPE *f) \
98{ \
99 return LTYPE ## _entry (f->prev); \
100} \
101 \
102static inline int \
103LTYPE ## _end (LTYPE *f, ETYPE *i) \
104{ \
105 return f == & i->LNAME; \
106} \
107 \
108static inline ETYPE* \
109LTYPE ## _next (ETYPE *f) \
110{ \
111 return LTYPE ## _entry (f->LNAME.next); \
112} \
113 \
114static inline int \
115LTYPE ## _length (LTYPE *l) \
116{ \
117 LTYPE *p; \
118 int c = 0; \
119 \
120 for (p = l->next; p != l; p = p->next) \
121 { \
122 c += 1; \
123 } \
124 \
125 return c; \
126} \
127 \
128typedef int unused_ ## LTYPE
129
130#endif
diff --git a/xdelta3/xdelta3-main.h b/xdelta3/xdelta3-main.h
new file mode 100755
index 0000000..29469c3
--- /dev/null
+++ b/xdelta3/xdelta3-main.h
@@ -0,0 +1,2923 @@
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2001 and onward. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19/* This is all the extra stuff you need for convenience to users in a command line
20 * application. It contains these major components:
21 *
22 * 1. VCDIFF tools
23 * 2. external compression support (this is POSIX-specific).
24 * 3. a general read/write loop that handles all of the Xdelta decode/encode/VCDIFF-print
25 * functions
26 * 4. command-line interpreter
27 * 5. an Xdelta application header which stores default filename, external compression settings
28 * 6. output/error printing
29 * 7. basic file support and OS interface
30 */
31
32/* Definite TODO list:
33 * 1. do exact gzip-like filename, stdout handling. make a .xz extension, refuse
34 * to encode to stdout without -cf, etc.
35 * 2. Allow the user to add a comment string to the app header without disturbing the default
36 * behavior.
37 * 3. Define zero-length window behavior
38 * 4. Separate getopt() code from main and make flags modular, implement help.
39 * 5. Catch up on related research!
40 */
41
42/* Nice idea TODO list:
43 *
44 * 1. Should probably have a write buffer (option)?
45 * 2. Add a reporting function for displaying progress, warning messages out of the library.
46 * 3. Add WIN32 support in addition to XD3_POSIX/XD3_STDIO. Should almost compile under windows
47 * with XD3_STDIO, but not quite (e.g., stat()?).
48 * 4. Update-in-place, partial-encoding per the latest RFC: see "Wishful TODO" comments below
49 */
50
51/* On error handling and printing:
52 *
53 * The xdelta library sets stream->msg to indicate what condition caused an internal
54 * failure, but many failures originate here and are printed here. The return convention
55 * is 0 for success, as throughout Xdelta code, but special attention is required here for
56 * the operating system calls with different error handling. See the main_file_* routines.
57 * All errors in this file have a message printed at the time of occurance. Since some of
58 * these calls occur within calls to the library, the error may end up being printed again
59 * with a more general error message.
60 */
61
62/******************************************************************************************/
63
64#ifndef XD3_POSIX
65#define XD3_POSIX 0
66#endif
67#ifndef XD3_STDIO
68#define XD3_STDIO 0
69#endif
70#ifndef XD3_WIN32
71#define XD3_WIN32 0
72#endif
73
74/* XPRINTX (used by main) prefixes an "xdelta3: " to the output. */
75#if 0 && XD3_DEBUG
76#define XPR fprintf (stderr, "xdelta3[%u]: ", getpid()); fprintf
77#define NT stderr,
78#else
79#define XPR fprintf
80#define NT stderr, "xdelta3: "
81#endif
82
83#define VC fprintf
84#define OUT vcout,
85
86/* If none are set, default to posix. */
87#if (XD3_POSIX + XD3_STDIO + XD3_WIN32) == 0
88#undef XD3_POSIX
89#define XD3_POSIX 1
90#endif
91
92/* Handle externally-compressed inputs. */
93#ifndef EXTERNAL_COMPRESSION
94#define EXTERNAL_COMPRESSION 1
95#endif
96
97#define PRINTHDR_SPECIAL -4378291
98
99#define PIPE_BUFSIZE (usize_t)(1 << 12)
100#define MIN_BUFSIZE (usize_t)(1 << 12)
101
102/* The number of soft-config variables. Update as field count changes! */
103#define XD3_SOFTCFG_VARCNT 10
104
105/* this is used as in XPR(NT XD3_LIB_ERRMSG (stream, ret)) to print an error message
106 * from the library. */
107#define XD3_LIB_ERRMSG(stream, ret) "%s: %s\n", xd3_errstring (stream), xd3_strerror (ret)
108
109#include <stdio.h> /* fprintf */
110#include <unistd.h> /* lots */
111
112#if XD3_POSIX
113#include <unistd.h> /* close, read, write... */
114#include <sys/types.h>
115#include <fcntl.h>
116#endif
117
118#include <sys/time.h> /* gettimeofday() */
119#include <sys/stat.h> /* stat() and fstat() */
120
121/******************************************************************************************
122 ENUMS and TYPES
123 ******************************************************************************************/
124
125/* These flags (mainly pertaining to main_read() operations) are set in the
126 * main_file->flags variable. All are related to with external decompression support.
127 *
128 * RD_FIRST causes the external decompression check when the input is first read.
129 *
130 * RD_NONEXTERNAL disables external decompression for reading a compressed input, in the
131 * case of Xdelta inputs. Note: Xdelta is supported as an external compression type,
132 * which makes is the reason for this flag. An example to justify this is: to create a
133 * delta between two files that are VCDIFF-compressed. Two external Xdelta decoders are
134 * run to supply decompressed source and target inputs to the Xdelta encoder. */
135typedef enum
136{
137 RD_FIRST = (1 << 0),
138 RD_NONEXTERNAL = (1 << 1),
139} xd3_read_flags;
140
141/* main_file->mode values */
142typedef enum
143{
144 XO_READ = 0,
145 XO_WRITE = 1,
146} main_file_modes;
147
148/* Main commands. For example, CMD_PRINTHDR is the "xdelta printhdr" command. */
149typedef enum
150{
151 CMD_NONE = 0,
152 CMD_PRINTHDR,
153 CMD_PRINTHDRS,
154 CMD_PRINTDELTA,
155#if XD3_ENCODER
156 CMD_ENCODE,
157#endif
158 CMD_DECODE,
159 CMD_TEST,
160 CMD_CONFIG,
161} xd3_cmd;
162
163#if XD3_ENCODER
164#define CMD_DEFAULT CMD_ENCODE
165#define IS_ENCODE(cmd) (cmd == CMD_ENCODE)
166#else
167#define CMD_DEFAULT CMD_DECODE
168#define IS_ENCODE(cmd) (0)
169#endif
170
171typedef struct _main_file main_file;
172typedef struct _main_extcomp main_extcomp;
173typedef struct _main_blklru main_blklru;
174typedef struct _main_blklru_list main_blklru_list;
175
176/* The main_file object supports abstract system calls like open, close, read, write, seek,
177 * stat. The program uses these to represent both seekable files and non-seekable files.
178 * Source files must be seekable, but the target input and any output file do not require
179 * seekability.
180 */
181struct _main_file
182{
183#if XD3_STDIO
184 FILE *file;
185#elif XD3_POSIX
186 int file;
187#endif
188
189 int mode; /* XO_READ and XO_WRITE */
190 const char *filename; /* File name or /dev/stdin, /dev/stdout, /dev/stderr. */
191 const char *realname; /* File name or /dev/stdin, /dev/stdout, /dev/stderr. */
192 const main_extcomp *compressor; /* External compression struct. */
193 int flags; /* RD_FIRST or RD_NONEXTERNAL */
194 xoff_t nread; /* for input position */
195 xoff_t nwrite; /* for output position */
196};
197
198/* Various strings and magic values used to detect and call external compression. See
199 * below for examples. */
200struct _main_extcomp
201{
202 const char *recomp_cmdname;
203 const char *recomp_options;
204
205 const char *decomp_cmdname;
206 const char *decomp_options;
207
208 const char *ident;
209 const char *magic;
210 int magic_size;
211 int flags;
212};
213
214/* This file implements a small LRU of source blocks. For encoding purposes,
215 * we prevent paging in blocks we've already scanned in the source (return
216 * XD3_NOTAVAIL). */
217struct _main_blklru_list
218{
219 main_blklru_list *next;
220 main_blklru_list *prev;
221};
222
223struct _main_blklru
224{
225 uint8_t *blk;
226 xoff_t blkno;
227 main_blklru_list link;
228};
229
230/* ... represented as a list (no cache index). */
231XD3_MAKELIST(main_blklru_list,main_blklru,link);
232
233/* Program options: various command line flags and options. */
234static int option_stdout = 0;
235static int option_force = 0;
236static int option_verbose = 0;
237static int option_quiet = 0;
238static int option_level = 6;
239static int option_use_appheader = 1;
240static uint8_t* option_appheader = NULL;
241static int option_use_secondary = /* until-standardized, leave this off */ 0;
242static char* option_secondary = NULL;
243static int option_use_checksum = 1;
244static int option_use_altcodetable = 0;
245static char* option_smatch_config = NULL;
246static int option_no_compress = 0;
247static int option_no_output = 0; /* go through the motions, but do not open or write output */
248static const char *option_source_filename = NULL;
249
250static usize_t option_winsize = XD3_DEFAULT_WINSIZE;
251static usize_t option_srcwinsz = XD3_DEFAULT_SRCWINSZ;
252
253/* Wishful TODO: Support should probably be for partial deltas & update-in-place deltas,
254 * following the latest draft RFC specs partial deltas [the changes have moderate
255 * complexity]. The following flags implement primitive controls to skip sections
256 * of the input & output, mainly for debugging purposes. */
257
258/* DECODE-ONLY: Skips processing windows up to first_window and past last_window using the
259 * XD3_SKIP_WINDOW flag, but main_ still reads reads/parses every window. TODO: make it
260 * meaningful for encode, etc... */
261/*static xoff_t option_first_window = 0;*/
262/*static xoff_t option_last_window = XOFF_T_MAX;*/
263
264/* ENCODE-ONLY: Seeks to first_offset, EOF at last_offset, done entirely in this main_
265 * routines, so the library actually sees a shortened input. TODO: implement this for
266 * decode, implement proper partial deltas, works with external compression?, works with
267 * non-seekable inputs?, change ranges, etc... */
268/*static xoff_t option_first_offset = 0;*/
269/*static xoff_t option_last_offset = XOFF_T_MAX;*/
270
271/* This controls the number of times main repeats itself, only for profiling. */
272static int option_profile_cnt = 0;
273
274/* These variables are supressed to avoid their use w/o support. main() warns
275 * appropriately. */
276#if EXTERNAL_COMPRESSION
277static int option_decompress_inputs = 1;
278static int option_recompress_outputs = 1;
279#endif
280
281/* This is for comparing "printdelta" output without attention to
282 * copy-instruction modes, useful for reverse engineering. */
283#if VCDIFF_TOOLS
284static int option_print_cpymode = 1;
285#endif
286
287/* Static variables */
288IF_DEBUG(static int main_mallocs = 0;)
289
290static char* program_name = NULL;
291static uint8_t* appheader_used = NULL;
292static uint8_t* main_bdata = NULL;
293
294/* The LRU: obviously this is shared by all callers. */
295static int lru_size = 0;
296static main_blklru *lru = NULL; /* array of lru_size elts */
297static main_blklru_list lru_list;
298static main_blklru_list lru_free;
299static int do_not_lru = 0; /* set to avoid lru, instead discard oldest */
300
301static int lru_hits = 0;
302static int lru_misses = 0;
303static int lru_filled = 0;
304
305/* Hacks for VCDIFF tools */
306static int allow_fake_source = 0;
307
308/* This array of compressor types is compiled even if EXTERNAL_COMPRESSION is false just so
309 * the program knows the mapping of IDENT->NAME. */
310static main_extcomp extcomp_types[] =
311{
312 /* The entry for xdelta must be first because the program_name is set here. */
313 { "xdelta3", "-cfq", "xdelta3", "-dcfq", "X", "\xd6\xc3\xc4", 3, RD_NONEXTERNAL },
314 { "bzip2", "-cf", "bzip2", "-dcf", "B", "BZh", 3, 0 },
315 { "gzip", "-cf", "gzip", "-dcf", "G", "\037\213", 2, 0 },
316 { "compress", "-cf", "uncompress", "-cf", "Z", "\037\235", 2, 0 },
317};
318
319static void main_get_appheader (xd3_stream *stream, main_file *output, main_file *sfile);
320
321static int main_help (void);
322
323static int
324main_version (void)
325{
326 P(RINT "VERSION=3_PRERFC_0\n");
327 return EXIT_SUCCESS;
328}
329
330static int
331main_config (void)
332{
333 main_version ();
334 /* Compile-time */
335 P(RINT "VCDIFF_TOOLS=%d\n", VCDIFF_TOOLS);
336 P(RINT "REGRESSION_TEST=%d\n", REGRESSION_TEST);
337 P(RINT "SECONDARY_FGK=%d\n", SECONDARY_FGK);
338 P(RINT "SECONDARY_DJW=%d\n", SECONDARY_DJW);
339 P(RINT "GENERIC_ENCODE_TABLES=%d\n", GENERIC_ENCODE_TABLES);
340 P(RINT "GENERIC_ENCODE_TABLES_COMPUTE=%d\n", GENERIC_ENCODE_TABLES_COMPUTE);
341 P(RINT "EXTERNAL_COMPRESSION=%d\n", EXTERNAL_COMPRESSION);
342 P(RINT "XD3_POSIX=%d\n", XD3_POSIX);
343 P(RINT "XD3_DEBUG=%d\n", XD3_DEBUG);
344 P(RINT "XD3_USE_LARGEFILE64=%d\n", XD3_USE_LARGEFILE64);
345 P(RINT "XD3_ENCODER=%d\n", XD3_ENCODER);
346 /* Runtime sizes */
347 P(RINT "XD3_DEFAULT_WINSIZE=%d\n", XD3_DEFAULT_WINSIZE);
348 P(RINT "XD3_DEFAULT_SRCBLKSZ=%d\n", XD3_DEFAULT_SRCBLKSZ);
349 P(RINT "XD3_DEFAULT_SRCWINSZ=%d\n", XD3_DEFAULT_SRCWINSZ);
350 P(RINT "XD3_DEFAULT_MEMSIZE=%d\n", XD3_DEFAULT_MEMSIZE);
351 P(RINT "XD3_ALLOCSIZE=%d\n", XD3_ALLOCSIZE);
352 P(RINT "XD3_HARDMAXWINSIZE=%d\n", XD3_HARDMAXWINSIZE);
353 P(RINT "XD3_NODECOMPRESSSIZE=%d\n", XD3_NODECOMPRESSSIZE);
354 P(RINT "XD3_DEFAULT_IOPT_SIZE=%d\n", XD3_DEFAULT_IOPT_SIZE);
355 P(RINT "XD3_DEFAULT_SPREVSZ=%d\n", XD3_DEFAULT_SPREVSZ);
356
357 return EXIT_SUCCESS;
358}
359
360static void*
361main_malloc1 (usize_t size)
362{
363 void* r = malloc (size);
364 if (r == NULL) { XPR(NT "malloc: %s\n", xd3_strerror (ENOMEM)); }
365 else if (option_verbose > 2) { XPR(NT "malloc: %u\n", size); }
366 return r;
367}
368
369static void*
370main_malloc (usize_t size)
371{
372 void *r = main_malloc1 (size);
373 if (r) { IF_DEBUG (main_mallocs += 1); }
374 return r;
375}
376
377static void*
378main_alloc (void *opaque,
379 usize_t items,
380 usize_t size)
381{
382 return main_malloc1 (items * size);
383}
384
385static void
386main_free (void **ptr)
387{
388 if (*ptr)
389 {
390 IF_DEBUG (main_mallocs -= 1);
391 free (*ptr);
392 (*ptr) = NULL;
393 }
394}
395
396static void
397main_free1 (void *opaque, void *ptr)
398{
399 free (ptr);
400}
401
402/* This ensures that (ret = errno) always indicates failure, in case errno was
403 * accidentally not set. If this prints there's a bug somewhere. */
404static int
405get_errno (void)
406{
407 if (errno == 0)
408 {
409 XPR(NT "you found a bug: expected errno != 0\n");
410 errno = EINVAL;
411 }
412 return errno;
413}
414
415static long
416get_millisecs_now (void)
417{
418 struct timeval tv;
419
420 gettimeofday (& tv, NULL);
421
422 return (tv.tv_sec) * 1000L + (tv.tv_usec) / 1000;
423}
424
425/* Always >= 1 millisec, right? */
426static long
427get_millisecs_since (void)
428{
429 double millis;
430 struct timeval tv;
431 /* static holds the first timeval */
432 static struct timeval init;
433
434 gettimeofday (& tv, NULL);
435
436 millis = (tv.tv_sec - init.tv_sec) * 1e6;
437 millis += (tv.tv_usec - init.tv_usec);
438 millis /= 1000;
439
440 init = tv;
441
442 return max ((long)millis, 1L);
443}
444
445static char*
446main_format_bcnt (xoff_t r, char *buf)
447{
448 static const char* fmts[] = { "B", "KB", "MB", "GB" };
449 int i;
450
451 for (i = 0; i < SIZEOF_ARRAY(fmts); i += 1)
452 {
453 if (r < 10 * 1e3 || i == -1 + SIZEOF_ARRAY(fmts))
454 {
455 sprintf (buf, "%"Q"u %s", r, fmts[i]);
456 break;
457 }
458 r /= 1000;
459 }
460 return buf;
461}
462
463static char*
464main_format_rate (xoff_t bytes, long millis, char *buf)
465{
466 xoff_t r = 1.0 * bytes / (1.0 * millis / 1000.0);
467 static char lbuf[32];
468
469 main_format_bcnt (r, lbuf);
470 sprintf (buf, "%s/sec", lbuf);
471 return buf;
472}
473
474static char*
475main_format_millis (long millis, char *buf)
476{
477 if (millis < 1000) { sprintf (buf, "%lu ms", millis); }
478 else if (millis < 10000) { sprintf (buf, "%.1f sec", millis / 1000.0); }
479 else { sprintf (buf, "%lu sec", millis / 1000L); }
480 return buf;
481}
482
483/* A safe version of strtol for xoff_t. */
484static int
485main_strtoxoff (const char* s, xoff_t *xo, char which)
486{
487 char *e;
488 xoff_t x;
489
490 XD3_ASSERT(s && *s != 0);
491
492 {
493 /* Should check LONG_MIN, LONG_MAX, LLONG_MIN, LLONG_MAX? */
494#if SIZEOF_XOFF_T == 4
495 long xx = strtol (s, &e, 0);
496#else
497 long long xx = strtoll (s, &e, 0);
498#endif
499
500 if (xx < 0)
501 {
502 XPR(NT "-%c: negative integer: %s\n", which, s);
503 return EXIT_FAILURE;
504 }
505
506 x = xx;
507 }
508
509 if (*e != 0)
510 {
511 XPR(NT "-%c: invalid integer: %s\n", which, s);
512 return EXIT_FAILURE;
513 }
514
515 (*xo) = x;
516 return 0;
517}
518
519static int
520main_atou (const char* arg, usize_t *xo, usize_t low, char which)
521{
522 xoff_t x;
523 int ret;
524
525 if ((ret = main_strtoxoff (arg, & x, which))) { return ret; }
526
527 if (x > USIZE_T_MAX || x < low)
528 {
529 XPR(NT "-%c: minimum value: %u", which, low);
530 return EXIT_FAILURE;
531 }
532 (*xo) = x;
533 return 0;
534}
535
536/******************************************************************************************
537 FILE BASICS
538 ******************************************************************************************/
539
540/* With all the variation in file system-call semantics, arguments, return values and
541 * error-handling for the POSIX and STDIO file APIs, the insides of these functions make
542 * me sick, which is why these wrappers exist. */
543
544#define XOPEN_OPNAME (xfile->mode == XO_READ ? "read" : "write")
545#define XOPEN_STDIO (xfile->mode == XO_READ ? "rb" : "wb")
546#define XOPEN_POSIX (xfile->mode == XO_READ ? O_RDONLY : O_WRONLY | O_CREAT | O_TRUNC)
547#define XOPEN_MODE (xfile->mode == XO_READ ? 0 : 0666)
548
549#define XF_ERROR(op, name, ret) XPR(NT "file %s failed: %s: %s: %s\n", (op), XOPEN_OPNAME, (name), xd3_strerror (ret))
550
551#if XD3_STDIO
552#define XFNO(f) fileno(f->file)
553#define XSTDOUT_XF(f) { (f)->file = stdout; (f)->filename = "/dev/stdout"; }
554#define XSTDERR_XF(f) { (f)->file = stderr; (f)->filename = "/dev/stderr"; }
555#define XSTDIN_XF(f) { (f)->file = stdin; (f)->filename = "/dev/stdin"; }
556
557#elif XD3_POSIX
558#define XFNO(f) f->file
559#define XSTDOUT_XF(f) { (f)->file = STDOUT_FILENO; (f)->filename = "/dev/stdout"; }
560#define XSTDERR_XF(f) { (f)->file = STDERR_FILENO; (f)->filename = "/dev/stderr"; }
561#define XSTDIN_XF(f) { (f)->file = STDIN_FILENO; (f)->filename = "/dev/stdin"; }
562#endif
563
564static void
565main_file_init (main_file *xfile)
566{
567 memset (xfile, 0, sizeof (*xfile));
568
569#if XD3_POSIX
570 xfile->file = -1;
571#endif
572}
573
574static int
575main_file_isopen (main_file *xfile)
576{
577#if XD3_STDIO
578 return xfile->file != NULL;
579
580#elif XD3_POSIX
581 return xfile->file != -1;
582#endif
583}
584
585static int
586main_file_close (main_file *xfile)
587{
588 int ret = 0;
589
590 if (! main_file_isopen (xfile))
591 {
592 return 0;
593 }
594
595#if XD3_STDIO
596 ret = fclose (xfile->file);
597 xfile->file = NULL;
598
599#elif XD3_POSIX
600 ret = close (xfile->file);
601 xfile->file = -1;
602#endif
603
604 if (ret != 0) { XF_ERROR ("close", xfile->filename, ret = get_errno ()); }
605 return ret;
606}
607
608static int
609main_file_open (main_file *xfile, const char* name, int mode)
610{
611 int ret = 0;
612
613 xfile->mode = mode;
614
615 XD3_ASSERT (! main_file_isopen (xfile));
616
617#if XD3_STDIO
618 xfile->file = fopen (name, XOPEN_STDIO);
619
620 ret = (xfile->file == NULL) ? get_errno () : 0;
621
622#elif XD3_POSIX
623 if ((ret = open (name, XOPEN_POSIX, XOPEN_MODE)) < 0)
624 {
625 ret = get_errno ();
626 }
627 else
628 {
629 xfile->file = ret;
630 ret = 0;
631 }
632#endif
633 if (ret) { XF_ERROR ("open", name, ret); }
634 else { xfile->realname = name; xfile->nread = 0; }
635 return ret;
636}
637
638static int
639main_file_stat (main_file *xfile, xoff_t *size, int err_ifnoseek)
640{
641 int ret;
642 struct stat sbuf;
643
644 XD3_ASSERT (main_file_isopen (xfile));
645
646 if (fstat (XFNO (xfile), & sbuf) < 0)
647 {
648 ret = get_errno ();
649 if (err_ifnoseek) { XF_ERROR ("stat", xfile->filename, ret); }
650 return ret;
651 }
652
653 if (! S_ISREG (sbuf.st_mode))
654 {
655 if (err_ifnoseek) { XPR(NT "source file must be seekable: %s\n", xfile->filename); }
656 return ESPIPE;
657 }
658
659 (*size) = sbuf.st_size;
660 return 0;
661}
662
663static int
664main_file_exists (main_file *xfile)
665{
666 struct stat sbuf;
667 return stat (xfile->filename, & sbuf) == 0 && S_ISREG (sbuf.st_mode);
668}
669
670#if (XD3_POSIX || EXTERNAL_COMPRESSION)
671/* POSIX-generic code takes a function pointer to read() or write(). This calls the
672 * function repeatedly until the buffer is full or EOF. The NREAD parameter is not
673 * set for write, NULL is passed. Return is signed, < 0 indicate errors, otherwise
674 * byte count. */
675typedef int (xd3_posix_func) (int fd, uint8_t *buf, usize_t size);
676
677static int
678xd3_posix_io (int fd, uint8_t *buf, usize_t size, xd3_posix_func *func, usize_t *nread)
679{
680 int ret;
681 usize_t nproc = 0;
682
683 while (nproc < size)
684 {
685 int result = (*func) (fd, buf + nproc, size - nproc);
686
687 if (result < 0)
688 {
689 ret = get_errno ();
690 if (ret != EAGAIN && ret != EINTR)
691 {
692 return ret;
693 }
694 result = 0;
695 }
696
697 if (nread != NULL && result == 0) { break; }
698
699 nproc += result;
700 }
701 if (nread != NULL) { (*nread) = nproc; }
702 return 0;
703}
704#endif
705
706/* POSIX is unbuffered, while STDIO is buffered. main_file_read() should always be called
707 * on blocks. */
708static int
709main_file_read (main_file *ifile,
710 uint8_t *buf,
711 usize_t size,
712 usize_t *nread,
713 const char *msg)
714{
715 int ret = 0;
716
717#if XD3_STDIO
718 usize_t result;
719
720 result = fread (buf, 1, size, ifile->file);
721
722 if (result < size && ferror (ifile->file))
723 {
724 ret = get_errno ();
725 }
726 else
727 {
728 *nread = result;
729 }
730
731#elif XD3_POSIX
732 ret = xd3_posix_io (ifile->file, buf, size, (xd3_posix_func*) &read, nread);
733#endif
734
735 if (ret)
736 {
737 XPR(NT "%s: %s: %s\n", msg, ifile->filename, xd3_strerror (ret));
738 }
739 else
740 {
741 if (option_verbose > 2) { XPR(NT "main read: %s: %u\n", ifile->filename, (*nread)); }
742 ifile->nread += (*nread);
743 }
744
745 return ret;
746}
747
748static int
749main_file_write (main_file *ofile, uint8_t *buf, usize_t size, const char *msg)
750{
751 int ret = 0;
752
753#if XD3_STDIO
754 usize_t result;
755
756 result = fwrite (buf, 1, size, ofile->file);
757
758 if (result != size) { ret = get_errno (); }
759
760#elif XD3_POSIX
761 ret = xd3_posix_io (ofile->file, buf, size, (xd3_posix_func*) &write, NULL);
762#endif
763
764 if (ret)
765 {
766 XPR(NT "%s: %s: %s\n", msg, ofile->filename, xd3_strerror (ret));
767 }
768 else
769 {
770 if (option_verbose > 2) { XPR(NT "main write: %s: %u\n", ofile->filename, size); }
771 ofile->nwrite += size;
772 }
773
774 return ret;
775}
776
777static int
778main_file_seek (main_file *xfile, xoff_t pos)
779{
780 int ret = 0;
781
782#if XD3_STDIO
783 if (fseek (xfile->file, pos, SEEK_SET) != 0) { ret = get_errno (); }
784#else
785 if (lseek (xfile->file, pos, SEEK_SET) != pos) { ret = get_errno (); }
786#endif
787
788 if (ret)
789 {
790 XPR(NT "seek failed: %s: %s\n", xfile->filename, xd3_strerror (ret));
791 }
792
793 return ret;
794}
795
796/******************************************************************************************
797 VCDIFF TOOLS
798 ******************************************************************************************/
799
800#if VCDIFF_TOOLS
801/* This function prints a single VCDIFF window, mainly for debugging purposes. */
802static int
803main_print_window (xd3_stream* stream, FILE *vcout)
804{
805 int ret;
806 usize_t size = 0;
807
808 VC(OUT " Offset Code Type1 Size1 @Addr1 + Type2 Size2 @Addr2\n");
809
810 while (stream->inst_sect.buf < stream->inst_sect.buf_max)
811 {
812 uint code = stream->inst_sect.buf[0];
813
814 if ((ret = xd3_decode_instruction (stream))) { return ret; }
815
816 VC(OUT " %06"Q"u %03u %s %3u", stream->dec_winstart + size, code,
817 xd3_rtype_to_string (stream->dec_current1.type, option_print_cpymode),
818 stream->dec_current1.size);
819
820 if (stream->dec_current1.type != XD3_NOOP)
821 {
822 size += stream->dec_current1.size;
823 if (stream->dec_current1.type >= XD3_CPY)
824 {
825 VC(OUT " @%-6u", stream->dec_current1.addr);
826 }
827 else
828 {
829 VC(OUT " ");
830 }
831 }
832
833 if (stream->dec_current2.type != XD3_NOOP)
834 {
835 size += stream->dec_current2.size;
836 VC(OUT " %s %3u",
837 xd3_rtype_to_string (stream->dec_current2.type, option_print_cpymode),
838 stream->dec_current2.size);
839
840 if (stream->dec_current2.type >= XD3_CPY)
841 {
842 VC(OUT " @%-6u", stream->dec_current2.addr);
843 }
844 }
845
846 VC(OUT "\n");
847 }
848
849 if (stream->dec_tgtlen != size && (stream->flags & XD3_SKIP_WINDOW) == 0)
850 {
851 XPR(NT "target window size inconsistency");
852 return EINVAL;
853 }
854
855 if (stream->dec_position != stream->dec_maxpos)
856 {
857 XPR(NT "target window position inconsistency");
858 return EINVAL;
859 }
860
861 if (stream->addr_sect.buf != stream->addr_sect.buf_max)
862 {
863 XPR(NT "address section inconsistency");
864 return EINVAL;
865 }
866
867 IF_DEBUG (VC(OUT "SIZE=%u TGTLEN=%u\n", size, stream->dec_tgtlen));
868
869 return 0;
870}
871
872static void
873main_print_vcdiff_file (main_file *file, const char *type, FILE *vcout)
874{
875 if (file->filename) { VC(OUT "XDELTA filename (%s): %s\n", type, file->filename); }
876 if (file->compressor) { VC(OUT "XDELTA ext comp (%s): %s\n", type, file->compressor->recomp_cmdname); }
877}
878
879/* This function prints a VCDIFF input, mainly for debugging purposes. */
880static int
881main_print_func (xd3_stream* stream, main_file *xfile)
882{
883 int ret;
884 FILE *vcout;
885#if XD3_POSIX
886 if (! (vcout = fdopen (dup(xfile->file), "w")))
887 {
888 ret = get_errno ();
889 XPR(NT "fdopen: %s: %s\n", xfile->filename, xd3_strerror (ret));
890 return ret;
891 }
892#elif XD3_STDIO
893 vcout = xfile->file;
894#endif
895 XD3_ASSERT (vcout);
896 if (stream->dec_winstart == 0)
897 {
898 VC(OUT "VCDIFF version: 0\n");
899
900 VC(OUT "VCDIFF header size: %d\n", stream->dec_hdrsize);
901 VC(OUT "VCDIFF header indicator: ");
902 if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0) VC(OUT "VCD_SECONDARY ");
903 if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) VC(OUT "VCD_CODETABLE ");
904 if ((stream->dec_hdr_ind & VCD_APPHEADER) != 0) VC(OUT "VCD_APPHEADER ");
905 if (stream->dec_hdr_ind == 0) VC(OUT "none");
906 VC(OUT "\n");
907
908 IF_SEC(VC(OUT "VCDIFF secondary compressor: %s\n", stream->sec_type ? stream->sec_type->name : "none"));
909 IF_NSEC(VC(OUT "VCDIFF secondary compressor: unsupported\n"));
910
911 if (stream->dec_hdr_ind & VCD_APPHEADER)
912 {
913 uint8_t *apphead;
914 usize_t appheadsz;
915 ret = xd3_get_appheader (stream, & apphead, & appheadsz);
916
917 if (ret == 0 && appheadsz > 0)
918 {
919 int sq = option_quiet;
920 main_file o, s;
921 XD3_ASSERT (apphead != NULL);
922 VC(OUT "VCDIFF application header: ");
923 fwrite (apphead, 1, appheadsz, vcout);
924 VC(OUT "\n");
925
926 main_file_init (& o);
927 main_file_init (& s);
928 option_quiet = 1;
929 main_get_appheader (stream, & o, & s);
930 option_quiet = sq;
931 main_print_vcdiff_file (& o, "output", vcout);
932 main_print_vcdiff_file (& s, "source", vcout);
933 }
934 }
935 }
936 else
937 {
938 VC(OUT "\n");
939 }
940
941 VC(OUT "VCDIFF window number: %"Q"u\n", stream->current_window);
942 VC(OUT "VCDIFF window indicator: ");
943 if ((stream->dec_win_ind & VCD_SOURCE) != 0) VC(OUT "VCD_SOURCE ");
944 if ((stream->dec_win_ind & VCD_TARGET) != 0) VC(OUT "VCD_TARGET ");
945 if ((stream->dec_win_ind & VCD_ADLER32) != 0) VC(OUT "VCD_ADLER32 ");
946 if (stream->dec_win_ind == 0) VC(OUT "none");
947 VC(OUT "\n");
948
949 if ((stream->dec_win_ind & VCD_ADLER32) != 0)
950 {
951 VC(OUT "VCDIFF adler32 checksum: %08X\n", stream->dec_adler32);
952 }
953
954 if (stream->dec_del_ind != 0)
955 {
956 VC(OUT "VCDIFF delta indicator: ");
957 if ((stream->dec_del_ind & VCD_DATACOMP) != 0) VC(OUT "VCD_DATACOMP ");
958 if ((stream->dec_del_ind & VCD_INSTCOMP) != 0) VC(OUT "VCD_INSTCOMP ");
959 if ((stream->dec_del_ind & VCD_ADDRCOMP) != 0) VC(OUT "VCD_ADDRCOMP ");
960 if (stream->dec_del_ind == 0) VC(OUT "none");
961 VC(OUT "\n");
962 }
963
964 if (stream->dec_winstart != 0)
965 {
966 VC(OUT "VCDIFF window at offset: %"Q"u\n", stream->dec_winstart);
967 }
968
969 if (SRCORTGT (stream->dec_win_ind))
970 {
971 VC(OUT "VCDIFF copy window length: %u\n", stream->dec_cpylen);
972 VC(OUT "VCDIFF copy window offset: %"Q"u\n", stream->dec_cpyoff);
973 }
974
975 VC(OUT "VCDIFF delta encoding length: %u\n", stream->dec_enclen);
976 VC(OUT "VCDIFF target window length: %u\n", stream->dec_tgtlen);
977
978 VC(OUT "VCDIFF data section length: %u\n", stream->data_sect.size);
979 VC(OUT "VCDIFF inst section length: %u\n", stream->inst_sect.size);
980 VC(OUT "VCDIFF addr section length: %u\n", stream->addr_sect.size);
981
982 ret = 0;
983 if ((stream->flags & XD3_JUST_HDR) != 0)
984 {
985 /* Print a header -- finished! */
986 ret = PRINTHDR_SPECIAL;
987 }
988 else if ((stream->flags & XD3_SKIP_WINDOW) == 0)
989 {
990 ret = main_print_window (stream, vcout);
991 }
992
993 fclose (vcout);
994 return ret;
995}
996#endif /* VCDIFF_TOOLS */
997
998/******************************************************************************************
999 Input decompression, output recompression
1000 ******************************************************************************************/
1001
1002#if EXTERNAL_COMPRESSION
1003/* This is tricky POSIX-specific code with lots of fork(), pipe(), dup(), waitpid(), and
1004 * exec() business. Most of this code originated in PRCS1, which did automatic
1005 * package-file decompression. It works with both XD3_POSIX and XD3_STDIO file
1006 * disciplines.
1007 *
1008 * To automatically detect compressed inputs requires a child process to reconstruct the
1009 * input stream, which was advanced in order to detect compression, because it may not be
1010 * seekable. In other words, the main program reads part of the input stream, and if it
1011 * detects a compressed input it then forks a pipe copier process, which copies the
1012 * first-read block out of the main-program's memory, then streams the remaining
1013 * compressed input into the input-decompression pipe.
1014 */
1015
1016#include <unistd.h>
1017#include <sys/stat.h>
1018#include <sys/wait.h>
1019
1020/* Remember which pipe FD is which. */
1021#define PIPE_READ_FD 0
1022#define PIPE_WRITE_FD 1
1023
1024static pid_t ext_subprocs[2];
1025static const char* ext_tmpfile = NULL;
1026
1027/* Like write(), but makes repeated calls to empty the buffer. */
1028static int
1029main_pipe_write (int outfd, const uint8_t *exist_buf, usize_t remain)
1030{
1031 int ret;
1032
1033 if ((ret = xd3_posix_io (outfd, (uint8_t*) exist_buf, remain, (xd3_posix_func*) &write, NULL)))
1034 {
1035 XPR(NT "pipe write failed: %s", xd3_strerror (ret));
1036 return ret;
1037 }
1038
1039 return 0;
1040}
1041
1042/* A simple error-reporting waitpid interface. */
1043static int
1044main_waitpid_check(pid_t pid)
1045{
1046 int status;
1047 int ret = 0;
1048
1049 if (waitpid (pid, & status, 0) < 0)
1050 {
1051 ret = get_errno ();
1052 XPR(NT "compression subprocess: wait: %s\n", xd3_strerror (ret));
1053 }
1054 else if (! WIFEXITED (status))
1055 {
1056 ret = ECHILD;
1057 XPR(NT "compression subprocess: signal %d\n",
1058 WIFSIGNALED (status) ? WTERMSIG (status) : WSTOPSIG (status));
1059 }
1060 else if (WEXITSTATUS (status) != 0)
1061 {
1062 ret = ECHILD;
1063 XPR(NT "compression subprocess: exit %d\n", WEXITSTATUS (status));
1064 }
1065
1066 return ret;
1067}
1068
1069/* Wait for any existing child processes to check for abnormal exit. */
1070static int
1071main_external_compression_finish (void)
1072{
1073 int i;
1074 int ret;
1075
1076 for (i = 0; i < 2; i += 1)
1077 {
1078 if (! ext_subprocs[i]) { continue; }
1079
1080 if ((ret = main_waitpid_check (ext_subprocs[i])))
1081 {
1082 return ret;
1083 }
1084 }
1085
1086 return 0;
1087}
1088
1089/* This runs as a forked process of main_input_decompress_setup() to copy input to the
1090 * decompression process. First, the available input is copied out of the existing
1091 * buffer, then the buffer is reused to continue reading from the compressed input
1092 * file. */
1093static int
1094main_pipe_copier (uint8_t *pipe_buf,
1095 usize_t pipe_bufsize,
1096 usize_t nread,
1097 main_file *ifile,
1098 int outfd)
1099{
1100 int ret;
1101
1102 for (;;)
1103 {
1104 if (nread > 0 && (ret = main_pipe_write (outfd, pipe_buf, nread)))
1105 {
1106 return ret;
1107 }
1108
1109 if (nread < pipe_bufsize)
1110 {
1111 break;
1112 }
1113
1114 if ((ret = main_file_read (ifile, pipe_buf, pipe_bufsize, & nread, "pipe read failed")) < 0)
1115 {
1116 return ret;
1117 }
1118 }
1119
1120 return 0;
1121}
1122
1123/* This function is called after we have read some amount of data from the input file and
1124 * detected a compressed input. Here we start a decompression subprocess by forking
1125 * twice. The first process runs the decompression command, the second process copies
1126 * data to the input of the first. */
1127static int
1128main_input_decompress_setup (const main_extcomp *decomp,
1129 main_file *ifile,
1130 uint8_t *input_buf,
1131 usize_t input_bufsize,
1132 uint8_t *pipe_buf,
1133 usize_t pipe_bufsize,
1134 usize_t pipe_avail,
1135 usize_t *nread)
1136{
1137 int outpipefd[2], inpipefd[2]; /* The two pipes: input and output file descriptors. */
1138 int input_fd = -1; /* The resulting input_fd (output of decompression). */
1139 pid_t decomp_id, copier_id; /* The two subprocs. */
1140 int ret;
1141
1142 outpipefd[0] = outpipefd[1] = -1;
1143 inpipefd[0] = inpipefd[1] = -1;
1144
1145 if (pipe (outpipefd) || pipe (inpipefd))
1146 {
1147 XPR(NT "pipe failed: %s\n", xd3_strerror (ret = get_errno ()));
1148 goto pipe_cleanup;
1149 }
1150
1151 if ((decomp_id = fork ()) < 0)
1152 {
1153 XPR(NT "fork failed: %s\n", xd3_strerror (ret = get_errno ()));
1154 goto pipe_cleanup;
1155 }
1156
1157 /* The first child runs the decompression process: */
1158 if (decomp_id == 0)
1159 {
1160 /* Setup pipes: write to the outpipe, read from the inpipe. */
1161 if (dup2 (outpipefd[PIPE_WRITE_FD], STDOUT_FILENO) < 0 ||
1162 dup2 (inpipefd[PIPE_READ_FD], STDIN_FILENO) < 0 ||
1163 close (outpipefd[PIPE_READ_FD]) ||
1164 close (outpipefd[PIPE_WRITE_FD]) ||
1165 close (inpipefd[PIPE_READ_FD]) ||
1166 close (inpipefd[PIPE_WRITE_FD]) ||
1167 execlp (decomp->decomp_cmdname, decomp->decomp_cmdname, decomp->decomp_options, NULL))
1168 {
1169 XPR(NT "child process %s failed to execute: %s\n", decomp->decomp_cmdname, xd3_strerror (get_errno ()));
1170 }
1171
1172 _exit (127);
1173 }
1174
1175 ext_subprocs[0] = decomp_id;
1176
1177 if ((copier_id = fork ()) < 0)
1178 {
1179 XPR(NT "fork failed: %s\n", xd3_strerror (ret = get_errno ()));
1180 goto pipe_cleanup;
1181 }
1182
1183 /* The second child runs the copier process: */
1184 if (copier_id == 0)
1185 {
1186 int exitval = 0;
1187
1188 if (close (inpipefd[PIPE_READ_FD]) ||
1189 main_pipe_copier (pipe_buf, pipe_bufsize, pipe_avail, ifile, inpipefd[PIPE_WRITE_FD]) ||
1190 close (inpipefd[PIPE_WRITE_FD]))
1191 {
1192 XPR(NT "child copier process failed: %s\n", xd3_strerror (get_errno ()));
1193 exitval = 1;
1194 }
1195
1196 _exit (exitval);
1197 }
1198
1199 ext_subprocs[1] = copier_id;
1200
1201 /* The parent closes both pipes after duplicating the output of compression. */
1202 input_fd = dup (outpipefd[PIPE_READ_FD]);
1203
1204 if (input_fd < 0 ||
1205 main_file_close (ifile) ||
1206 close (outpipefd[PIPE_READ_FD]) ||
1207 close (outpipefd[PIPE_WRITE_FD]) ||
1208 close (inpipefd[PIPE_READ_FD]) ||
1209 close (inpipefd[PIPE_WRITE_FD]))
1210 {
1211 XPR(NT "dup/close failed: %s\n", xd3_strerror (ret = get_errno ()));
1212 goto pipe_cleanup;
1213 }
1214
1215#if XD3_STDIO
1216 /* Note: fdopen() acquires the fd, closes it when finished. */
1217 if ((ifile->file = fdopen (input_fd, "r")) == NULL)
1218 {
1219 XPR(NT "fdopen failed: %s\n", xd3_strerror (ret = get_errno ()));
1220 goto pipe_cleanup;
1221 }
1222
1223#elif XD3_POSIX
1224 ifile->file = input_fd;
1225#endif
1226
1227 ifile->compressor = decomp;
1228
1229 /* Now the input file is decompressed. */
1230 return main_file_read (ifile, input_buf, input_bufsize, nread, "input decompression failed");
1231
1232 pipe_cleanup:
1233 close (input_fd);
1234 close (outpipefd[PIPE_READ_FD]);
1235 close (outpipefd[PIPE_WRITE_FD]);
1236 close (inpipefd[PIPE_READ_FD]);
1237 close (inpipefd[PIPE_WRITE_FD]);
1238 return ret;
1239}
1240
1241
1242/* This routine is called when the first buffer of input data is read by the main program
1243 * (unless input decompression is disabled by command-line option). If it recognizes the
1244 * magic number of a known input type it invokes decompression.
1245 *
1246 * Skips decompression if the decompression type or the file type is RD_NONEXTERNAL.
1247 *
1248 * Behaves exactly like main_file_read, otherwise.
1249 *
1250 * This function uses a separate buffer to read the first small block of input. If a
1251 * compressed input is detected, the separate buffer is passed to the pipe copier. This
1252 * avoids using the same size buffer in both cases. */
1253static int
1254main_decompress_input_check (main_file *ifile,
1255 uint8_t *input_buf,
1256 usize_t input_size,
1257 usize_t *nread)
1258{
1259 int i;
1260 int ret;
1261 uint8_t check_buf[PIPE_BUFSIZE];
1262 usize_t check_nread;
1263
1264 if ((ret = main_file_read (ifile, check_buf, min (input_size, PIPE_BUFSIZE), & check_nread, "input read failed")))
1265 {
1266 return ret;
1267 }
1268
1269 for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
1270 {
1271 const main_extcomp *decomp = & extcomp_types[i];
1272
1273 if ((check_nread > decomp->magic_size) &&
1274 /* The following expr skips decompression if we are trying to read a VCDIFF
1275 * input and that is the magic number. */
1276 !((decomp->flags & RD_NONEXTERNAL) && (ifile->flags & RD_NONEXTERNAL)) &&
1277 memcmp (check_buf, decomp->magic, decomp->magic_size) == 0)
1278 {
1279 if (! option_quiet)
1280 {
1281 XPR(NT "%s | %s %s\n",
1282 ifile->filename,
1283 decomp->decomp_cmdname,
1284 decomp->decomp_options);
1285 }
1286
1287 return main_input_decompress_setup (decomp, ifile,
1288 input_buf, input_size,
1289 check_buf, PIPE_BUFSIZE,
1290 check_nread, nread);
1291 }
1292 }
1293
1294 /* Now read the rest of the input block. */
1295 (*nread) = 0;
1296
1297 if (check_nread == PIPE_BUFSIZE)
1298 {
1299 ret = main_file_read (ifile, input_buf + PIPE_BUFSIZE, input_size - PIPE_BUFSIZE, nread, "input read failed");
1300 }
1301
1302 memcpy (input_buf, check_buf, check_nread);
1303
1304 (*nread) += check_nread;
1305
1306 return 0;
1307}
1308
1309/* This is called when the source file needs to be decompressed. We fork/exec a
1310 * decompression command with the proper input and output to a temporary file. */
1311static int
1312main_decompress_source (main_file *sfile, xd3_source *source)
1313{
1314 const main_extcomp *decomp = sfile->compressor;
1315 pid_t decomp_id; /* One subproc. */
1316 int input_fd = -1;
1317 int output_fd = -1;
1318 int ret;
1319 char *tmpname = NULL;
1320 char *tmpdir = getenv ("TMPDIR");
1321 static const char tmpl[] = "/xd3src.XXXXXX";
1322
1323 /* Make a template for mkstmp() */
1324 if (tmpdir == NULL) { tmpdir = "/tmp"; }
1325 if ((tmpname = main_malloc (strlen (tmpdir) + sizeof (tmpl) + 1)) == NULL) { return ENOMEM; }
1326 sprintf (tmpname, "%s%s", tmpdir, tmpl);
1327
1328 XD3_ASSERT (ext_tmpfile == NULL);
1329 ext_tmpfile = tmpname;
1330
1331 /* Open the output FD. */
1332 if ((output_fd = mkstemp (tmpname)) < 0)
1333 {
1334 XPR(NT "mkstemp failed: %s: %s", tmpname, xd3_strerror (ret = get_errno ()));
1335 goto cleanup;
1336 }
1337
1338 /* Copy the input FD, reset file position. */
1339 XD3_ASSERT (main_file_isopen (sfile));
1340#if XD3_STDIO
1341 if ((input_fd = dup (fileno (sfile->file))) < 0)
1342 {
1343 XPR(NT "dup failed: %s", xd3_strerror (ret = get_errno ()));
1344 goto cleanup;
1345 }
1346 main_file_close (sfile);
1347 sfile->file = NULL;
1348#elif XD3_POSIX
1349 input_fd = sfile->file;
1350 sfile->file = -1;
1351#endif
1352
1353 if ((ret = lseek (input_fd, SEEK_SET, 0)) != 0)
1354 {
1355 XPR(NT "lseek failed: : %s", xd3_strerror (ret = get_errno ()));
1356 goto cleanup;
1357 }
1358
1359 if ((decomp_id = fork ()) < 0)
1360 {
1361 XPR(NT "fork failed: %s", xd3_strerror (ret = get_errno ()));
1362 goto cleanup;
1363 }
1364
1365 /* The child runs the decompression process: */
1366 if (decomp_id == 0)
1367 {
1368 /* Setup pipes: write to the output file, read from the pipe. */
1369 if (dup2 (input_fd, STDIN_FILENO) < 0 ||
1370 dup2 (output_fd, STDOUT_FILENO) < 0 ||
1371 execlp (decomp->decomp_cmdname, decomp->decomp_cmdname, decomp->decomp_options, NULL))
1372 {
1373 XPR(NT "child process %s failed to execute: %s\n",
1374 decomp->decomp_cmdname, xd3_strerror (get_errno ()));
1375 }
1376
1377 _exit (127);
1378 }
1379
1380 close (input_fd);
1381 close (output_fd);
1382 input_fd = -1;
1383 output_fd = -1;
1384
1385 /* Then wait for completion. */
1386 if ((ret = main_waitpid_check (decomp_id)))
1387 {
1388 goto cleanup;
1389 }
1390
1391 /* Open/stat the decompressed source file. */
1392 if ((ret = main_file_open (sfile, tmpname, XO_READ))) { goto cleanup; }
1393 if ((ret = main_file_stat (sfile, & source->size, 1))) { goto cleanup; }
1394 return 0;
1395
1396 cleanup:
1397 close (input_fd);
1398 close (output_fd);
1399 if (tmpname) { free (tmpname); }
1400 ext_tmpfile = NULL;
1401 return ret;
1402}
1403
1404/* Initiate re-compression of the output stream. This is easier than input decompression
1405 * because we know beforehand that the stream will be compressed, whereas the input has
1406 * already been read when we decide it should be decompressed. Thus, it only requires one
1407 * subprocess and one pipe. */
1408static int
1409main_recompress_output (main_file *ofile)
1410{
1411 pid_t recomp_id; /* One subproc. */
1412 int pipefd[2]; /* One pipe. */
1413 int output_fd = -1;
1414 int ret;
1415 const main_extcomp *recomp = ofile->compressor;
1416
1417 pipefd[0] = pipefd[1] = -1;
1418
1419 if (pipe (pipefd))
1420 {
1421 XPR(NT "pipe failed: %s\n", xd3_strerror (ret = get_errno ()));
1422 goto pipe_cleanup;
1423 }
1424
1425 if ((recomp_id = fork ()) < 0)
1426 {
1427 XPR(NT "fork failed: %s\n", xd3_strerror (ret = get_errno ()));
1428 goto pipe_cleanup;
1429 }
1430
1431 /* The child runs the recompression process: */
1432 if (recomp_id == 0)
1433 {
1434 /* Setup pipes: write to the output file, read from the pipe. */
1435 if (dup2 (XFNO (ofile), STDOUT_FILENO) < 0 ||
1436 dup2 (pipefd[PIPE_READ_FD], STDIN_FILENO) < 0 ||
1437 close (pipefd[PIPE_READ_FD]) ||
1438 close (pipefd[PIPE_WRITE_FD]) ||
1439 execlp (recomp->recomp_cmdname, recomp->recomp_cmdname, recomp->recomp_options, NULL))
1440 {
1441 XPR(NT "child process %s failed to execute: %s\n", recomp->recomp_cmdname, xd3_strerror (get_errno ()));
1442 }
1443
1444 _exit (127);
1445 }
1446
1447 ext_subprocs[0] = recomp_id;
1448
1449 /* The parent closes both pipes after duplicating the output-fd for writing to the
1450 * compression pipe. */
1451 output_fd = dup (pipefd[PIPE_WRITE_FD]);
1452
1453 if (output_fd < 0 ||
1454 main_file_close (ofile) ||
1455 close (pipefd[PIPE_READ_FD]) ||
1456 close (pipefd[PIPE_WRITE_FD]))
1457 {
1458 XPR(NT "close failed: %s\n", xd3_strerror (ret = get_errno ()));
1459 goto pipe_cleanup;
1460 }
1461
1462#if XD3_STDIO
1463 /* Note: fdopen() acquires the fd, closes it when finished. */
1464 if ((ofile->file = fdopen (output_fd, "w")) == NULL)
1465 {
1466 XPR(NT "fdopen failed: %s\n", xd3_strerror (ret = get_errno ()));
1467 goto pipe_cleanup;
1468 }
1469
1470#elif XD3_POSIX
1471 ofile->file = output_fd;
1472#endif
1473
1474 /* Now the output file will be compressed. */
1475 return 0;
1476
1477 pipe_cleanup:
1478 close (output_fd);
1479 close (pipefd[PIPE_READ_FD]);
1480 close (pipefd[PIPE_WRITE_FD]);
1481 return ret;
1482}
1483#endif /* EXTERNAL_COMPRESSION */
1484
1485/* Identify the compressor that was used based on its ident string, which is passed in the
1486 * application header. */
1487static const main_extcomp*
1488main_ident_compressor (const char *ident)
1489{
1490 int i;
1491
1492 for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
1493 {
1494 if (strcmp (extcomp_types[i].ident, ident) == 0)
1495 {
1496 return & extcomp_types[i];
1497 }
1498 }
1499
1500 return NULL;
1501}
1502
1503/* Return the main_extcomp record to use for this identifier, if possible. */
1504static const main_extcomp*
1505main_get_compressor (const char *ident)
1506{
1507 const main_extcomp *ext = main_ident_compressor (ident);
1508
1509 if (ext == NULL)
1510 {
1511 if (! option_quiet)
1512 {
1513 XPR(NT "warning: cannot recompress output: "
1514 "unrecognized external compression ID: %s\n", ident);
1515 }
1516 return NULL;
1517 }
1518 else if (! EXTERNAL_COMPRESSION)
1519 {
1520 if (! option_quiet)
1521 {
1522 XPR(NT "warning: external support not compiled: "
1523 "original input was compressed: %s\n", ext->recomp_cmdname);
1524 }
1525 return NULL;
1526 }
1527 else
1528 {
1529 return ext;
1530 }
1531}
1532
1533/******************************************************************************************
1534 APPLICATION HEADER
1535 ******************************************************************************************/
1536
1537#if XD3_ENCODER
1538static const char*
1539main_apphead_string (const char* x)
1540{
1541 const char *y;
1542
1543 if (x == NULL) { return ""; }
1544
1545 if (strcmp (x, "/dev/stdin") == 0 ||
1546 strcmp (x, "/dev/stdout") == 0 ||
1547 strcmp (x, "/dev/stderr") == 0) { return "-"; }
1548
1549 return (y = strrchr (x, '/')) == NULL ? x : y + 1;
1550}
1551
1552static int
1553main_set_appheader (xd3_stream *stream, main_file *input, main_file *sfile)
1554{
1555 /* The user may disable the application header. Once the appheader is set, this
1556 * disables setting it again. */
1557 if (appheader_used || ! option_use_appheader) { return 0; }
1558
1559 /* The user may specify the application header, otherwise format the default header. */
1560 if (option_appheader)
1561 {
1562 appheader_used = option_appheader;
1563 }
1564 else
1565 {
1566 const char *iname;
1567 const char *icomp;
1568 const char *sname;
1569 const char *scomp;
1570 int len;
1571
1572 iname = main_apphead_string (input->filename);
1573 icomp = (input->compressor == NULL) ? "" : input->compressor->ident;
1574 len = strlen (iname) + strlen (icomp) + 2;
1575
1576 if (sfile->filename != NULL)
1577 {
1578 sname = main_apphead_string (sfile->filename);
1579 scomp = (sfile->compressor == NULL) ? "" : sfile->compressor->ident;
1580 len += strlen (sname) + strlen (scomp) + 2;
1581 }
1582 else
1583 {
1584 sname = scomp = "";
1585 }
1586
1587 if ((appheader_used = main_malloc (len)) == NULL)
1588 {
1589 return ENOMEM;
1590 }
1591
1592 if (sfile->filename == NULL)
1593 {
1594 sprintf ((char*)appheader_used, "%s/%s", iname, icomp);
1595 }
1596 else
1597 {
1598 sprintf ((char*)appheader_used, "%s/%s/%s/%s", iname, icomp, sname, scomp);
1599 }
1600 }
1601
1602 xd3_set_appheader (stream, appheader_used, strlen ((char*)appheader_used));
1603
1604 return 0;
1605}
1606#endif
1607
1608static void
1609main_get_appheader_params (main_file *file, char **parsed, int output, const char *type)
1610{
1611 /* Set the filename if it was not specified. If output, option_stdout (-c) overrides. */
1612 if (file->filename == NULL && ! (output && option_stdout) && strcmp (parsed[0], "-") != 0)
1613 {
1614 file->filename = parsed[0];
1615
1616 if (! option_quiet)
1617 {
1618 XPR(NT "using default %s filename: %s\n", type, file->filename);
1619 }
1620 }
1621
1622 /* Set the compressor, initiate de/recompression later. */
1623 if (file->compressor == NULL && *parsed[1] != 0)
1624 {
1625 file->compressor = main_get_compressor (parsed[1]);
1626 }
1627}
1628
1629static void
1630main_get_appheader (xd3_stream *stream, main_file *output, main_file *sfile)
1631{
1632 uint8_t *apphead;
1633 usize_t appheadsz;
1634 int ret;
1635
1636 /* The user may disable the application header. Once the appheader is set, this
1637 * disables setting it again. */
1638 if (! option_use_appheader) { return; }
1639
1640 ret = xd3_get_appheader (stream, & apphead, & appheadsz);
1641
1642 /* Ignore failure, it only means we haven't received a header yet. */
1643 if (ret != 0) { return; }
1644
1645 if (appheadsz > 0)
1646 {
1647 char *start = (char*)apphead;
1648 char *slash;
1649 int place = 0;
1650 char *parsed[4];
1651
1652 memset (parsed, 0, sizeof (parsed));
1653
1654 while ((slash = strchr (start, '/')) != NULL)
1655 {
1656 *slash = 0;
1657 parsed[place++] = start;
1658 start = slash + 1;
1659 }
1660
1661 parsed[place++] = start;
1662
1663 /* First take the output parameters. */
1664 if (place == 2 || place == 4)
1665 {
1666 main_get_appheader_params (output, parsed, 1, "output");
1667 }
1668
1669 /* Then take the source parameters. */
1670 if (place == 4)
1671 {
1672 main_get_appheader_params (sfile, parsed+2, 0, "source");
1673 }
1674 }
1675
1676 option_use_appheader = 0;
1677 return;
1678}
1679
1680/******************************************************************************************
1681 Main I/O routines
1682 ******************************************************************************************/
1683
1684/* This function acts like the above except it may also try to recognize a compressed
1685 * input when the first buffer of data is read. The EXTERNAL_COMPRESSION code is called
1686 * to search for magic numbers. */
1687static int
1688main_read_primary_input (main_file *ifile,
1689 uint8_t *buf,
1690 usize_t size,
1691 usize_t *nread)
1692{
1693#if EXTERNAL_COMPRESSION
1694 if (option_decompress_inputs && ifile->flags & RD_FIRST)
1695 {
1696 ifile->flags &= ~RD_FIRST;
1697
1698 return main_decompress_input_check (ifile, buf, size, nread);
1699 }
1700#endif
1701
1702 return main_file_read (ifile, buf, size, nread, "input read failed");
1703}
1704
1705/* This function simply writes the stream output buffer, if there is any. This is used
1706 * for both encode and decode commands. (The VCDIFF tools use main_print_func()). */
1707static int
1708main_write_output (xd3_stream* stream, main_file *ofile)
1709{
1710 int ret;
1711
1712 if (stream->avail_out > 0 && (ret = main_file_write (ofile, stream->next_out, stream->avail_out, "write failed")))
1713 {
1714 return ret;
1715 }
1716
1717 return 0;
1718}
1719
1720/* Open the main output file, sets a default file name, initiate recompression. This
1721 * function is expected to fprint any error messages. */
1722static int
1723main_open_output (xd3_stream *stream, main_file *ofile)
1724{
1725 int ret;
1726
1727 if (ofile->filename == NULL)
1728 {
1729 XSTDOUT_XF (ofile);
1730
1731 if (option_verbose > 1) { XPR(NT "using standard output: %s\n", ofile->filename); }
1732 }
1733 else
1734 {
1735 /* Stat the file to check for overwrite. */
1736 if (option_force == 0 && main_file_exists (ofile))
1737 {
1738 XPR(NT "to overwrite output file specify -f: %s\n", ofile->filename);
1739 return EEXIST;
1740 }
1741
1742 if ((ret = main_file_open (ofile, ofile->filename, XO_WRITE)))
1743 {
1744 return ret;
1745 }
1746
1747 if (option_verbose > 1) { XPR(NT "open output: %s\n", ofile->filename); }
1748 }
1749
1750#if EXTERNAL_COMPRESSION
1751 /* Do output recompression. */
1752 if (ofile->compressor != NULL && option_recompress_outputs == 1)
1753 {
1754 if (! option_quiet)
1755 {
1756 XPR(NT "%s %s | %s\n",
1757 ofile->compressor->recomp_cmdname,
1758 ofile->compressor->recomp_options,
1759 ofile->filename);
1760 }
1761
1762 if ((ret = main_recompress_output (ofile)))
1763 {
1764 return ret;
1765 }
1766 }
1767#endif
1768
1769 return 0;
1770}
1771
1772/* This is called at different times for encoding and decoding. The encoder calls it
1773 * immediately, the decoder delays until the application header is received. */
1774static int
1775main_set_source (xd3_stream *stream, int cmd, main_file *sfile, xd3_source *source)
1776{
1777 int ret, i;
1778
1779 /* Open it, check for seekability, set required xd3_source fields. */
1780 if (allow_fake_source)
1781 {
1782 sfile->mode = XO_READ;
1783 sfile->realname = sfile->filename;
1784 sfile->nread = 0;
1785 source->size = UINT64_MAX;
1786 }
1787 else if ((ret = main_file_open (sfile, sfile->filename, XO_READ)) ||
1788 (ret = main_file_stat (sfile, & source->size, 1)))
1789 {
1790 return ret;
1791 }
1792
1793 source->name = sfile->filename;
1794 source->ioh = sfile;
1795 source->curblkno = (xoff_t) -1;
1796 source->curblk = NULL;
1797
1798 /* Source block LRU init. */
1799 main_blklru_list_init (& lru_list);
1800 main_blklru_list_init (& lru_free);
1801
1802 option_srcwinsz = min(source->size, (xoff_t) option_srcwinsz);
1803
1804 if (option_verbose > 1) { XPR(NT "source window size: %u\n", option_srcwinsz); }
1805 if (option_verbose > 1) { XPR(NT "source block size: %u\n", source->blksize); }
1806
1807 lru_size = (option_srcwinsz / source->blksize) + 1;
1808
1809 XD3_ASSERT(lru_size <= 128); /* TODO: fix performance here */
1810
1811 if ((lru = main_malloc (sizeof (main_blklru) * lru_size)) == NULL)
1812 {
1813 return ENOMEM;
1814 }
1815
1816 for (i = 0; i < lru_size; i += 1)
1817 {
1818 lru[i].blkno = (xoff_t) -1;
1819
1820 if ((lru[i].blk = main_malloc (source->blksize)) == NULL)
1821 {
1822 return ENOMEM;
1823 }
1824
1825 main_blklru_list_push_back (& lru_free, & lru[i]);
1826 }
1827
1828#if EXTERNAL_COMPRESSION
1829 if (option_decompress_inputs)
1830 {
1831 if (IS_ENCODE (cmd))
1832 {
1833 usize_t nread;
1834
1835 source->curblk = lru[0].blk;
1836
1837 /* If encoding, read the first block now to check for decompression. */
1838 if ((ret = main_file_read (sfile, (uint8_t*) source->curblk, source->blksize, & nread, "source read failed")))
1839 {
1840 return ret;
1841 }
1842
1843 /* Check known magic numbers. */
1844 for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
1845 {
1846 const main_extcomp *decomp = & extcomp_types[i];
1847
1848 if ((nread > decomp->magic_size) && memcmp (source->curblk, decomp->magic, decomp->magic_size) == 0)
1849 {
1850 sfile->compressor = decomp;
1851 break;
1852 }
1853 }
1854
1855 /* If no decompression, the current buffer is now a valid source->curblock. */
1856 if (sfile->compressor == NULL)
1857 {
1858 main_blklru_list_remove (& lru[0]);
1859 main_blklru_list_push_back (& lru_list, & lru[0]);
1860
1861 lru[0].blkno = 0;
1862 source->curblkno = 0;
1863 source->onblk = nread;
1864
1865 if (option_verbose > 1)
1866 {
1867 XPR(NT "source block 0 read (not compressed)\n");
1868 }
1869 }
1870 }
1871
1872 /* In either the encoder or decoder, start decompression. */
1873 if (sfile->compressor)
1874 {
1875 xoff_t osize = source->size;
1876
1877 if (osize > XD3_NODECOMPRESSSIZE)
1878 {
1879 XPR(NT "source file too large for external decompression: %s: %"Q"u\n",
1880 sfile->filename, osize);
1881 return EFBIG;
1882 }
1883
1884 if ((ret = main_decompress_source (sfile, source)))
1885 {
1886 return ret;
1887 }
1888
1889 if (! option_quiet)
1890 {
1891 char s1[32], s2[32];
1892 XPR(NT "%s | %s %s => %s %.1f%% [ %s , %s ]\n",
1893 sfile->filename,
1894 sfile->compressor->decomp_cmdname,
1895 sfile->compressor->decomp_options,
1896 sfile->realname,
1897 100.0 * source->size / osize,
1898 main_format_bcnt (osize, s1),
1899 main_format_bcnt (source->size, s2));
1900 }
1901 }
1902 }
1903#endif
1904
1905 if (option_verbose > 1) { XPR(NT "source file: %s: %"Q"u bytes\n", sfile->realname, source->size); }
1906
1907 if ((ret = xd3_set_source (stream, source)))
1908 {
1909 XPR(NT XD3_LIB_ERRMSG (stream, ret));
1910 return EXIT_FAILURE;
1911 }
1912
1913 return 0;
1914}
1915
1916/******************************************************************************************
1917 Source routines
1918 ******************************************************************************************/
1919
1920/* This is the callback for reading a block of source. This function is blocking and it
1921 * implements a small LRU.
1922 *
1923 * Note that it is possible for main_input() to handle getblk requests in a non-blocking
1924 * manner. If the callback is NULL then the caller of xd3_*_input() must handle the
1925 * XD3_GETSRCBLK return value and fill the source in the same way. See xd3_getblk for
1926 * details. To see an example of non-blocking getblk, see xdelta-test.h. */
1927static int
1928main_getblk_func (xd3_stream *stream,
1929 xd3_source *source,
1930 xoff_t blkno)
1931{
1932 xoff_t pos = blkno * source->blksize;
1933 main_file *sfile = (main_file*) source->ioh;
1934 main_blklru *blru = NULL;
1935 usize_t onblk = xd3_bytes_on_srcblk (source, blkno);
1936 usize_t nread;
1937 int ret;
1938 int i;
1939
1940 if (allow_fake_source)
1941 {
1942 source->curblkno = blkno;
1943 source->onblk = onblk;
1944 source->curblk = lru[0].blk;
1945 return 0;
1946 }
1947
1948 if (do_not_lru)
1949 {
1950 /* Direct lookup assumes sequential scan w/o skipping blocks. */
1951 int idx = blkno % lru_size;
1952 if (lru[idx].blkno == blkno)
1953 {
1954 source->curblkno = blkno;
1955 source->onblk = onblk;
1956 source->curblk = lru[idx].blk;
1957 lru_hits += 1;
1958 return 0;
1959 }
1960 XD3_ASSERT (lru[idx].blkno == -1LL ||
1961 lru[idx].blkno == blkno - lru_size);
1962 }
1963 else
1964 {
1965 /* Sequential search through LRU. */
1966 for (i = 0; i < lru_size; i += 1)
1967 {
1968 if (lru[i].blkno == blkno)
1969 {
1970 main_blklru_list_remove (& lru[i]);
1971 main_blklru_list_push_back (& lru_list, & lru[i]);
1972
1973 source->curblkno = blkno;
1974 source->onblk = onblk;
1975 source->curblk = lru[i].blk;
1976 lru_hits += 1;
1977 return 0;
1978 }
1979 }
1980 }
1981
1982 if (! main_blklru_list_empty (& lru_free))
1983 {
1984 blru = main_blklru_list_pop_front (& lru_free);
1985 }
1986 else if (! main_blklru_list_empty (& lru_list))
1987 {
1988 if (do_not_lru) {
1989 blru = & lru[blkno % lru_size];
1990 main_blklru_list_remove(blru);
1991 } else {
1992 blru = main_blklru_list_pop_front (& lru_list);
1993 }
1994 lru_misses += 1;
1995 }
1996
1997 lru_filled += 1;
1998
1999 if ((ret = main_file_seek (sfile, pos)))
2000 {
2001 return ret;
2002 }
2003
2004 if ((ret = main_file_read (sfile, (uint8_t*) blru->blk, source->blksize,
2005 & nread, "source read failed")))
2006 {
2007 return ret;
2008 }
2009
2010 if (nread != onblk)
2011 {
2012 XPR(NT "source file size change: %s\n", sfile->filename);
2013 return EINVAL;
2014 }
2015
2016 main_blklru_list_push_back (& lru_list, blru);
2017
2018 if (option_verbose > 1)
2019 {
2020 if (blru->blkno != -1LL)
2021 {
2022 XPR(NT "source block %"Q"u ejects %"Q"u (lru_hits=%u, lru_misses=%u, lru_filled=%u)\n",
2023 blkno, blru->blkno, lru_hits, lru_misses, lru_filled);
2024 }
2025 else
2026 {
2027 XPR(NT "source block %"Q"u read (lru_hits=%u, lru_misses=%u, lru_filled=%u)\n",
2028 blkno, lru_hits, lru_misses, lru_filled);
2029 }
2030 }
2031
2032 blru->blkno = blkno;
2033 source->curblk = blru->blk;
2034 source->curblkno = blkno;
2035 source->onblk = onblk;
2036
2037 return 0;
2038}
2039
2040/******************************************************************************************
2041 Main routines
2042 ******************************************************************************************/
2043
2044/* This is a generic input function. It calls the xd3_encode_input or xd3_decode_input
2045 * functions and makes calls to the various input handling routines above, which
2046 * coordinate external decompression.
2047 *
2048 * TODO config: Still need options for the at least: smatch config, memsize, sprevsz,
2049 * XD3_SEC_* flags, greedy/1.5
2050 */
2051static int
2052main_input (xd3_cmd cmd,
2053 main_file *ifile,
2054 main_file *ofile,
2055 main_file *sfile)
2056{
2057 int ret;
2058 xd3_stream stream;
2059 usize_t nread;
2060 int stream_flags = 0;
2061 xd3_config config;
2062 xd3_source source;
2063 xoff_t last_total_in = 0;
2064 xoff_t last_total_out = 0;
2065 long start_time;
2066
2067 int (*input_func) (xd3_stream*);
2068 int (*output_func) (xd3_stream*, main_file *);
2069
2070 memset (& source, 0, sizeof (source));
2071 memset (& config, 0, sizeof (config));
2072
2073 config.alloc = main_alloc;
2074 config.freef = main_free1;
2075 config.sec_data.ngroups = 1;
2076 config.sec_addr.ngroups = 1;
2077 config.sec_inst.ngroups = 1;
2078
2079 /* main_input setup. */
2080 switch ((int) cmd)
2081 {
2082#if VCDIFF_TOOLS
2083 if (1) { case CMD_PRINTHDR: stream_flags = XD3_JUST_HDR; }
2084 else if (1) { case CMD_PRINTHDRS: stream_flags = XD3_SKIP_WINDOW; }
2085 else { case CMD_PRINTDELTA: stream_flags = XD3_SKIP_EMIT; }
2086 ifile->flags |= RD_NONEXTERNAL;
2087 input_func = xd3_decode_input;
2088 output_func = main_print_func;
2089 stream_flags |= XD3_ADLER32_NOVER;
2090 break;
2091#endif
2092#if XD3_ENCODER
2093 case CMD_ENCODE:
2094 input_func = xd3_encode_input;
2095 output_func = main_write_output;
2096
2097 if (option_use_checksum) { stream_flags |= XD3_ADLER32; }
2098 if (option_use_secondary)
2099 {
2100 /* The default secondary compressor is DJW, if it's compiled, being used, etc. */
2101 if (option_secondary == NULL)
2102 {
2103 if (SECONDARY_DJW) { stream_flags |= XD3_SEC_DJW; }
2104 }
2105 else
2106 {
2107 if (strcmp (option_secondary, "fgk") == 0 && SECONDARY_FGK)
2108 {
2109 stream_flags |= XD3_SEC_FGK;
2110 }
2111 else if (strcmp (option_secondary, "djw") == 0 && SECONDARY_DJW)
2112 {
2113 stream_flags |= XD3_SEC_DJW;
2114 }
2115 else
2116 {
2117 XPR(NT "unrecognized secondary compressor type: %s\n", option_secondary);
2118 return EXIT_FAILURE;
2119 }
2120 }
2121 }
2122 if (option_no_compress) { stream_flags |= XD3_NOCOMPRESS; }
2123 if (option_use_altcodetable) { stream_flags |= XD3_ALT_CODE_TABLE; }
2124 if (option_smatch_config)
2125 {
2126 char *s = option_smatch_config, *e;
2127 int values[XD3_SOFTCFG_VARCNT];
2128 int got;
2129
2130 config.smatch_cfg = XD3_SMATCH_SOFT;
2131
2132 for (got = 0; got < XD3_SOFTCFG_VARCNT; got += 1, s = e + 1)
2133 {
2134 values[got] = strtol (s, &e, 10);
2135
2136 if ((values[got] < 0) ||
2137 (e == s) ||
2138 (got < XD3_SOFTCFG_VARCNT-1 && *e == 0) ||
2139 (got == XD3_SOFTCFG_VARCNT-1 && *e != 0))
2140 {
2141 XPR(NT "invalid string match specifier (-C)\n");
2142 return EXIT_FAILURE;
2143 }
2144 }
2145
2146 config.large_look = values[0];
2147 config.large_step = values[1];
2148 config.small_look = values[2];
2149 config.small_chain = values[3];
2150 config.small_lchain = values[4];
2151 config.ssmatch = values[5];
2152 config.try_lazy = values[6];
2153 config.max_lazy = values[7];
2154 config.long_enough = values[8];
2155 config.promote = values[9];
2156 config.srcwin_size = values[10];
2157 config.srcwin_maxsz = values[11];
2158 }
2159 else if (option_level < 5) { config.smatch_cfg = XD3_SMATCH_FAST; }
2160 else { config.smatch_cfg = XD3_SMATCH_SLOW; }
2161 break;
2162#endif
2163 case CMD_DECODE:
2164 if (option_use_checksum == 0) { stream_flags |= XD3_ADLER32_NOVER; }
2165 stream_flags = 0;
2166 ifile->flags |= RD_NONEXTERNAL;
2167 input_func = xd3_decode_input;
2168 output_func = main_write_output;
2169 break;
2170 default:
2171 XPR(NT "internal error\n");
2172 return EXIT_FAILURE;
2173 }
2174
2175 start_time = get_millisecs_now ();
2176
2177 /* allocate an input buffer. min(file_size, option_winsize) */
2178 {
2179 xoff_t input_size = 0;
2180 config.winsize = option_winsize;
2181 if (main_file_stat (ifile, & input_size, 0) == 0)
2182 {
2183 config.winsize = min (input_size, (xoff_t) option_winsize);
2184 }
2185 config.winsize = xd3_round_blksize (config.winsize, MIN_BUFSIZE);
2186 config.winsize = max (config.winsize, MIN_BUFSIZE);
2187 }
2188 {
2189 /* Source blocksize is not user-settable, only option_srcwinsz is,
2190 * which determines the number of blocks. */
2191 source.blksize = XD3_DEFAULT_SRCBLKSZ;
2192 option_srcwinsz = xd3_round_blksize(option_srcwinsz, MIN_BUFSIZE);
2193 option_srcwinsz = max(option_srcwinsz, MIN_BUFSIZE);
2194 config.srcwin_maxsz = option_srcwinsz;
2195 }
2196
2197 if (option_verbose > 1) { XPR(NT "input buffer size: %u\n", config.winsize); }
2198
2199 if ((main_bdata = main_malloc (config.winsize)) == NULL)
2200 {
2201 return EXIT_FAILURE;
2202 }
2203
2204 config.getblk = main_getblk_func;
2205 config.flags = stream_flags;
2206
2207 if ((ret = xd3_config_stream (& stream, & config)))
2208 {
2209 XPR(NT XD3_LIB_ERRMSG (& stream, ret));
2210 return EXIT_FAILURE;
2211 }
2212
2213 if (IS_ENCODE (cmd))
2214 {
2215 /* When encoding, open the source file, possibly decompress it. The decoder delays
2216 * this step until XD3_GOTHEADER. */
2217 if (sfile->filename != NULL && (ret = main_set_source (& stream, cmd, sfile, & source)))
2218 {
2219 return EXIT_FAILURE;
2220 }
2221 }
2222
2223 /*XD3_ASSERT (option_first_offset <= option_last_offset);*/
2224 /*XD3_ASSERT (option_first_window <= option_last_window);*/
2225
2226 /*if (option_first_offset != 0 && (ret = main_file_seek (ifile, option_first_offset)))
2227 {
2228 return EXIT_FAILURE;
2229 }*/
2230
2231 /* This times each window. */
2232 get_millisecs_since ();
2233
2234 /* Main input loop. */
2235 do
2236 {
2237 xoff_t input_offset;
2238 xoff_t input_remain;
2239 usize_t try_read;
2240
2241 input_offset = ifile->nread;
2242 /*XD3_ASSERT (input_offset <= option_last_offset);*/
2243
2244 input_remain = /*option_last_offset*/ XOFF_T_MAX - input_offset;
2245
2246 try_read = (usize_t) min ((xoff_t) config.winsize, input_remain);
2247
2248 if ((ret = main_read_primary_input (ifile, main_bdata, try_read, & nread)))
2249 {
2250 return EXIT_FAILURE;
2251 }
2252
2253 /* If we've reached EOF tell the stream to flush. */
2254 if (nread < try_read)
2255 {
2256 stream_flags |= XD3_FLUSH;
2257 xd3_set_flags (& stream, stream_flags);
2258 }
2259
2260#if XD3_ENCODER
2261 /* After the first main_read_primary_input completes, we know all the information
2262 * needed to encode the application header. */
2263 if (cmd == CMD_ENCODE && (ret = main_set_appheader (& stream, ifile, sfile)))
2264 {
2265 return EXIT_FAILURE;
2266 }
2267#endif
2268 xd3_avail_input (& stream, main_bdata, nread);
2269
2270 /* If we read zero bytes after encoding at least one window... */
2271 if (nread == 0 && stream.current_window > 0) {
2272 break;
2273 }
2274
2275 again:
2276 ret = input_func (& stream);
2277 /*if (option_verbose > 1) { XPR(NT XD3_LIB_ERRMSG (& stream, ret)); }*/
2278
2279 switch (ret)
2280 {
2281 case XD3_INPUT:
2282 continue;
2283
2284 case XD3_GOTHEADER:
2285 {
2286 XD3_ASSERT (stream.current_window == 0);
2287
2288 /* Need to process the appheader as soon as possible. It may contain a
2289 * suggested default filename/decompression routine for the ofile, and it may
2290 * contain default/decompression routine for the sources. */
2291 if (cmd == CMD_DECODE)
2292 {
2293 int have_src = sfile->filename != NULL;
2294 int need_src = xd3_decoder_needs_source (& stream);
2295 int recv_src;
2296
2297 /* May need to set the sfile->filename if none was given. */
2298 main_get_appheader (& stream, ofile, sfile);
2299
2300 recv_src = sfile->filename != NULL;
2301
2302 /* Check if the user expected a source to be required although it was not. */
2303 if (have_src && ! need_src && ! option_quiet)
2304 {
2305 XPR(NT "warning: output window %"Q"u does not copy source\n", stream.current_window);
2306 }
2307
2308 /* Check if we have no source name and need one. */
2309 /* TODO: this doesn't fire due to cpyblocks_ calculation check */
2310 if (need_src && ! recv_src)
2311 {
2312 XPR(NT "input requires a source file, use -s\n");
2313 return EXIT_FAILURE;
2314 }
2315
2316 /* Now open the source file. */
2317 if (need_src && (ret = main_set_source (& stream, cmd, sfile, & source)))
2318 {
2319 return EXIT_FAILURE;
2320 }
2321 }
2322 else if (cmd == CMD_PRINTHDR ||
2323 cmd == CMD_PRINTHDRS ||
2324 cmd == CMD_PRINTDELTA)
2325 {
2326 if (xd3_decoder_needs_source (& stream) && sfile->filename == NULL)
2327 {
2328 allow_fake_source = 1;
2329 sfile->filename = "<placeholder>";
2330 main_set_source (& stream, cmd, sfile, & source);
2331 }
2332 }
2333 }
2334 /* FALLTHROUGH */
2335 case XD3_WINSTART:
2336 {
2337 /* Set or unset XD3_SKIP_WINDOW. */
2338 /*if (stream.current_window < option_first_window || stream.current_window > option_last_window)
2339 { stream_flags |= XD3_SKIP_WINDOW; }
2340 else
2341 { stream_flags &= ~XD3_SKIP_WINDOW; }*/
2342
2343 xd3_set_flags (& stream, stream_flags);
2344 goto again;
2345 }
2346
2347 case XD3_OUTPUT:
2348 {
2349 if (option_no_output == 0/* &&
2350 stream.current_window >= option_first_window &&
2351 stream.current_window <= option_last_window*/)
2352 {
2353 /* Defer opening the output file until the stream produces its first
2354 * output for both encoder and decoder, this way we delay long enough for
2355 * the decoder to receive the application header. (Or longer if there are
2356 * skipped windows, but I can't think of any reason not to delay open.) */
2357
2358 if (! main_file_isopen (ofile) && (ret = main_open_output (& stream, ofile)) != 0)
2359 {
2360 return EXIT_FAILURE;
2361 }
2362 if ((ret = output_func (& stream, ofile)) && (ret != PRINTHDR_SPECIAL))
2363 {
2364 return EXIT_FAILURE;
2365 }
2366 if (ret == PRINTHDR_SPECIAL)
2367 {
2368 xd3_abort_stream (& stream);
2369 ret = EXIT_SUCCESS;
2370 goto done;
2371 }
2372 ret = 0;
2373 }
2374
2375 xd3_consume_output (& stream);
2376 goto again;
2377 }
2378
2379 case XD3_WINFINISH:
2380 {
2381 if (IS_ENCODE (cmd) || cmd == CMD_DECODE)
2382 {
2383 int used_source = xd3_encoder_used_source (& stream);
2384
2385 if (! option_quiet && IS_ENCODE (cmd) && main_file_isopen (sfile) && ! used_source)
2386 {
2387 XPR(NT "warning: input position %"Q"u no source copies\n",
2388 stream.current_window * source.blksize);
2389 }
2390
2391 if (option_verbose)
2392 {
2393 char rrateavg[32], wrateavg[32], tm[32];
2394 char rdb[32], wdb[32], sb[32];
2395 char trdb[32], twdb[32], tsb[32];
2396 char srcbuf[48], tsrcbuf[48];
2397 long millis = get_millisecs_since ();
2398 usize_t this_read = stream.total_in - last_total_in;
2399 usize_t this_write = stream.total_out - last_total_out;
2400 last_total_in = stream.total_in;
2401 last_total_out = stream.total_out;
2402
2403 tsrcbuf[0] = srcbuf[0] = 0;
2404 if (used_source)
2405 {
2406 sprintf (srcbuf, ": src %s", main_format_bcnt (xd3_encoder_srclen (& stream), sb));
2407 sprintf (tsrcbuf, ": src %s", main_format_bcnt (stream.srcwin_cksum_pos, tsb));
2408 }
2409 /*if (stream.current_window >= option_first_window &&
2410 stream.current_window <= option_last_window)*/
2411 {
2412 XPR(NT "%"Q"u: in %s (%s): out %s (%s)%s: total in %s: out %s%s: %s\n",
2413 stream.current_window,
2414 main_format_bcnt (this_read, rdb),
2415 main_format_rate (this_read, millis, rrateavg),
2416 main_format_bcnt (this_write, wdb),
2417 main_format_rate (this_write, millis, wrateavg),
2418 srcbuf,
2419 main_format_bcnt (stream.total_in, trdb),
2420 main_format_bcnt (stream.total_out, twdb),
2421 tsrcbuf,
2422 main_format_millis (millis, tm));
2423 }
2424 }
2425 }
2426 goto again;
2427 }
2428
2429 default:
2430 /* input_func() error */
2431 XPR(NT XD3_LIB_ERRMSG (& stream, ret));
2432 return EXIT_FAILURE;
2433 }
2434 }
2435 while (nread == config.winsize);
2436done:
2437 /* Close the inputs. (ifile must be open, sfile may be open) */
2438 main_file_close (ifile);
2439 main_file_close (sfile);
2440
2441 /* If output file is not open yet because of delayed-open, it means we never encountered
2442 * a window in the delta, but it could have had a VCDIFF header? TODO: solve this
2443 * elsewhere. For now, it prints "nothing to output" below, but the check doesn't
2444 * happen in case of option_no_output. */
2445 if (! option_no_output)
2446 {
2447 if (! main_file_isopen (ofile))
2448 {
2449 XPR(NT "nothing to output: %s\n", ifile->filename);
2450 return EXIT_FAILURE;
2451 }
2452
2453 /* Have to close the output before calling main_external_compression_finish, or else it hangs. */
2454 if (main_file_close (ofile) != 0)
2455 {
2456 return EXIT_FAILURE;
2457 }
2458 }
2459
2460 if ((ret = xd3_close_stream (& stream)))
2461 {
2462 XPR(NT XD3_LIB_ERRMSG (& stream, ret));
2463 return EXIT_FAILURE;
2464 }
2465
2466#if EXTERNAL_COMPRESSION
2467 if ((ret = main_external_compression_finish ())) { return EXIT_FAILURE; }
2468#endif
2469
2470 xd3_free_stream (& stream);
2471
2472 if (option_verbose)
2473 {
2474 char tm[32];
2475 long end_time = get_millisecs_now ();
2476 XPR(NT "command finished in %s\n", main_format_millis (end_time - start_time, tm));
2477 }
2478 if (option_verbose > 1)
2479 {
2480 XPR(NT "input bytes: %"Q"u\n", ifile->nread);
2481 XPR(NT "output bytes: %"Q"u\n", ofile->nwrite);
2482 }
2483
2484 return EXIT_SUCCESS;
2485}
2486
2487/* free memory before exit, reset single-use variables. */
2488static void
2489main_cleanup (void)
2490{
2491 int i;
2492
2493 if (option_appheader) { appheader_used = NULL; }
2494
2495 main_free ((void**) & appheader_used);
2496 main_free ((void**) & main_bdata);
2497
2498#if EXTERNAL_COMPRESSION
2499 main_free ((void**) & ext_tmpfile);
2500#endif
2501
2502 for (i = 0; lru && i < lru_size; i += 1)
2503 {
2504 main_free ((void**) & lru[i].blk);
2505 }
2506
2507 main_free ((void**) & lru);
2508
2509 lru_hits = 0;
2510 lru_misses = 0;
2511 lru_filled = 0;
2512
2513 XD3_ASSERT (main_mallocs == 0);
2514}
2515
2516int
2517#if PYTHON_MODULE
2518xd3_main_cmdline (int argc, char **argv)
2519#else
2520main (int argc, char **argv)
2521#endif
2522{
2523 xd3_cmd cmd;
2524 main_file ifile;
2525 main_file ofile;
2526 main_file sfile;
2527 static char *flags = "0123456789cdefhnqvDJNRTVs:B:C:E:F:L:O:P:W:A::S::";
2528 int my_optind;
2529 char *my_optarg;
2530 char *my_optstr;
2531 char *sfilename;
2532 int orig_argc = argc;
2533 char **orig_argv = argv;
2534 int ret;
2535
2536 go: /* Go. */
2537 cmd = CMD_NONE;
2538 sfilename = NULL;
2539 my_optind = 1;
2540 argv = orig_argv;
2541 argc = orig_argc;
2542 program_name = argv[0];
2543 extcomp_types[0].recomp_cmdname = program_name;
2544 extcomp_types[0].decomp_cmdname = program_name;
2545 takearg:
2546 my_optarg = NULL;
2547 my_optstr = argv[my_optind];
2548 /* This doesn't use getopt() because it makes trouble for -P & python which reenter
2549 * main() and thus care about freeing all memory. I never had much trust for getopt
2550 * anyway, it's too opaque. This implements a fairly standard non-long-option getopt
2551 * with support for named operations (e.g., "xdelta3 [encode|decode|printhdr...] < in >
2552 * out"). I'll probably add long options at some point. See TODO. */
2553 if (my_optstr)
2554 {
2555 if (*my_optstr == '-') { my_optstr += 1; }
2556 else if (cmd == CMD_NONE) { goto nonflag; }
2557 else { my_optstr = NULL; }
2558 }
2559 while (my_optstr)
2560 {
2561 char *s;
2562 my_optarg = NULL;
2563 if ((ret = *my_optstr++) == 0) { my_optind += 1; goto takearg; }
2564
2565 /* Option handling: first check for one ':' following the option in flags, then
2566 * check for two. The syntax allows:
2567 *
2568 * 1. -Afoo defines optarg="foo"
2569 * 2. -A foo defines optarg="foo"
2570 * 3. -A "" defines optarg="" (allows optional empty-string)
2571 * 4. -A [EOA or -moreargs] error (mandatory case)
2572 * 5. -A [EOA -moreargs] defines optarg=NULL (optional case)
2573 * 6. -A=foo defines optarg="foo"
2574 * 7. -A= defines optarg="" (mandatory case)
2575 * 8. -A= defines optarg=NULL (optional case)
2576 *
2577 * See tests in test_command_line_arguments().
2578 */
2579 s = strchr (flags, ret);
2580 if (s && s[1] && s[1] == ':')
2581 {
2582 int eqcase = 0;
2583 int option = s[2] && s[2] == ':';
2584
2585 /* Case 1, set optarg to the remaining characters. */
2586 my_optarg = my_optstr;
2587 my_optstr = "";
2588
2589 /* Case 2-5 */
2590 if (*my_optarg == 0)
2591 {
2592 /* Condition 4-5 */
2593 int have_arg = my_optind < (argc - 1) && *argv[my_optind+1] != '-';
2594
2595 if (! have_arg)
2596 {
2597 if (! option)
2598 {
2599 /* Case 4 */
2600 XPR(NT "-%c: requires an argument\n", ret);
2601 ret = EXIT_FAILURE;
2602 goto cleanup;
2603 }
2604 /* Case 5. */
2605 my_optarg = NULL;
2606 }
2607 else
2608 {
2609 /* Case 2-3. */
2610 my_optarg = argv[++my_optind];
2611 }
2612 }
2613 /* Case 6-8. */
2614 else if (*my_optarg == '=')
2615 {
2616 /* Remove the = in all cases. */
2617 my_optarg += 1;
2618 eqcase = 1;
2619
2620 if (option && *my_optarg == 0)
2621 {
2622 /* Case 8. */
2623 my_optarg = NULL;
2624 }
2625 }
2626 }
2627
2628 switch (ret)
2629 {
2630 /* case: if no '-' was found, maybe check for a command name. */
2631 nonflag:
2632 if (strcmp (my_optstr, "decode") == 0) { cmd = CMD_DECODE; }
2633 else if (strcmp (my_optstr, "encode") == 0)
2634 {
2635#if XD3_ENCODER
2636 cmd = CMD_ENCODE;
2637#else
2638 XPR(NT "encoder support not compiled\n");
2639 return EXIT_FAILURE;
2640#endif
2641 }
2642 else if (strcmp (my_optstr, "config") == 0) { cmd = CMD_CONFIG; }
2643#if REGRESSION_TEST
2644 else if (strcmp (my_optstr, "test") == 0) { cmd = CMD_TEST; }
2645#endif
2646#if VCDIFF_TOOLS
2647 else if (strcmp (my_optstr, "printhdr") == 0) { cmd = CMD_PRINTHDR; }
2648 else if (strcmp (my_optstr, "printhdrs") == 0) { cmd = CMD_PRINTHDRS; }
2649 else if (strcmp (my_optstr, "printdelta") == 0) { cmd = CMD_PRINTDELTA; }
2650#endif
2651
2652 /* If no option was found and still no command, let the default command be
2653 * encode. The remaining args are treated as filenames. */
2654 if (cmd == CMD_NONE)
2655 {
2656 cmd = CMD_DEFAULT;
2657 my_optstr = NULL;
2658 break;
2659 }
2660 else
2661 {
2662 /* But if we find a command name, continue the getopt loop. */
2663 my_optind += 1;
2664 goto takearg;
2665 }
2666
2667 /* gzip-like options */
2668 case '0': case '1': case '2': case '3': case '4':
2669 case '5': case '6': case '7': case '8': case '9':
2670 option_level = ret - '0';
2671 break;
2672 case 'f': option_force = 1; break;
2673 case 'v': option_verbose += 1; option_quiet = 0; break;
2674 case 'q': option_quiet = 1; option_verbose = 0; break;
2675 case 'c': option_stdout = 1; break;
2676 case 'd':
2677 if (cmd == CMD_NONE) { cmd = CMD_DECODE; }
2678 else { ret = main_help (); goto exit; }
2679 break;
2680 case 'e':
2681#if XD3_ENCODER
2682 if (cmd == CMD_NONE) { cmd = CMD_ENCODE; }
2683 else { ret = main_help (); goto exit; }
2684 break;
2685#else
2686 XPR(NT "encoder support not compiled\n");
2687 return EXIT_FAILURE;
2688#endif
2689
2690 //case 'F': if ((ret = main_strtoxoff (my_optarg, & option_first_window, 'F'))) { goto exit; } break;
2691 //case 'L': if ((ret = main_strtoxoff (my_optarg, & option_last_window, 'L'))) { goto exit; } break;
2692 //case 'O': if ((ret = main_strtoxoff (my_optarg, & option_first_offset, 'O'))) { goto exit; } break;
2693 //case 'E': if ((ret = main_strtoxoff (my_optarg, & option_last_offset, 'E'))) { goto exit; } break;
2694
2695 case 'P':
2696 /* only set profile count once, since... */
2697 if (option_profile_cnt == 0)
2698 {
2699 if ((ret = main_atou(my_optarg, (usize_t*) & option_profile_cnt, 0, 'P'))) { goto exit; }
2700
2701 if (option_profile_cnt <= 0)
2702 {
2703 ret = EXIT_SUCCESS;
2704 goto exit;
2705 }
2706 }
2707 break;
2708
2709 case 'n': option_use_checksum = 0; break;
2710 case 'N': option_no_compress = 1; break;
2711 case 'T': option_use_altcodetable = 1; break;
2712 case 'C': option_smatch_config = my_optarg; break;
2713 case 'J': option_no_output = 1; break;
2714 case 'S': if (my_optarg == NULL) { option_use_secondary = 0; }
2715 else { option_use_secondary = 1; option_secondary = my_optarg; } break;
2716 case 'A': if (my_optarg == NULL) { option_use_appheader = 0; }
2717 else { option_appheader = (uint8_t*) my_optarg; } break;
2718 case 'B': if ((ret = main_atou (my_optarg, & option_srcwinsz, MIN_BUFSIZE, 'B'))) { goto exit; } break;
2719 case 'W': if ((ret = main_atou (my_optarg, & option_winsize, MIN_BUFSIZE, 'W'))) { goto exit; } break;
2720 case 'D':
2721#if EXTERNAL_COMPRESSION == 0
2722 if (! option_quiet)
2723 {
2724 XPR(NT "warning: -D option ignored, "
2725 "external compression support was not compiled\n");
2726 }
2727#else
2728 option_decompress_inputs = 0;
2729#endif
2730 break;
2731 case 'R':
2732#if EXTERNAL_COMPRESSION == 0
2733 if (! option_quiet)
2734 {
2735 XPR(NT "warning: -R option ignored, "
2736 "external compression support was not compiled\n");
2737 }
2738#else
2739 option_recompress_outputs = 0;
2740#endif
2741 break;
2742 case 's':
2743 if (sfilename != NULL)
2744 {
2745 XPR(NT "specify only one source file\n");
2746 goto cleanup;
2747 }
2748
2749 sfilename = my_optarg;
2750 break;
2751
2752 case 'V':
2753 ret = main_version (); goto exit;
2754 default:
2755 ret = main_help (); goto exit;
2756 }
2757 }
2758
2759 option_source_filename = sfilename;
2760
2761 /* In case there were no arguments, set the default command. */
2762 if (cmd == CMD_NONE) { cmd = CMD_DEFAULT; }
2763
2764 argc -= my_optind;
2765 argv += my_optind;
2766
2767 /* There may be up to two more arguments. */
2768 if (argc > 2)
2769 {
2770 XPR(NT "too many filenames: %s ...\n", argv[2]);
2771 ret = EXIT_FAILURE;
2772 goto cleanup;
2773 }
2774
2775 if (option_verbose > 1)
2776 {
2777 int l = 1;
2778 int i;
2779 char buf[1024];
2780 for (i = 0; i < orig_argc; i += 1)
2781 {
2782 l += strlen (orig_argv[i]) + 1;
2783 }
2784 buf[0] = 0;
2785 for (i = 0; i < orig_argc; i += 1)
2786 {
2787 strcat (buf, orig_argv[i]);
2788 strcat (buf, " ");
2789 }
2790 XPR(NT "command line: %s\n", buf);
2791 }
2792
2793 main_file_init (& ifile);
2794 main_file_init (& ofile);
2795 main_file_init (& sfile);
2796
2797 ifile.flags = RD_FIRST;
2798 sfile.flags = RD_FIRST;
2799 sfile.filename = option_source_filename;
2800
2801 /* The infile takes the next argument, if there is one. But if not, infile is set to
2802 * stdin. */
2803 if (argc > 0)
2804 {
2805 ifile.filename = argv[0];
2806
2807 if ((ret = main_file_open (& ifile, ifile.filename, XO_READ)))
2808 {
2809 goto cleanup;
2810 }
2811 }
2812 else
2813 {
2814 XSTDIN_XF (& ifile);
2815 }
2816
2817 /* The ofile takes the following argument, if there is one. But if not, it is left NULL
2818 * until the application header is processed. It will be set in main_open_output. */
2819 if (argc > 1)
2820 {
2821 /* Check for conflicting arguments. */
2822 if (option_stdout && ! option_quiet)
2823 {
2824 XPR(NT "warning: -c option overrides output filename: %s\n", argv[1]);
2825 }
2826
2827 if (! option_stdout) { ofile.filename = argv[1]; }
2828 }
2829
2830 switch (cmd)
2831 {
2832 case CMD_PRINTHDR:
2833 case CMD_PRINTHDRS:
2834 case CMD_PRINTDELTA:
2835#if XD3_ENCODER
2836 case CMD_ENCODE:
2837 if (cmd == CMD_ENCODE)
2838 {
2839 do_not_lru = 1;
2840 }
2841#endif
2842 case CMD_DECODE:
2843 ret = main_input (cmd, & ifile, & ofile, & sfile);
2844 break;
2845
2846#if REGRESSION_TEST
2847 case CMD_TEST:
2848 ret = xd3_selftest ();
2849 break;
2850#endif
2851
2852 case CMD_CONFIG:
2853 ret = main_config ();
2854 break;
2855
2856 default:
2857 ret = main_help ();
2858 break;
2859 }
2860
2861#if EXTERNAL_COMPRESSION
2862 if (ext_tmpfile != NULL) { unlink (ext_tmpfile); }
2863#endif
2864
2865 if (0)
2866 {
2867 cleanup:
2868 ret = EXIT_FAILURE;
2869 exit:
2870 (void)0;
2871 }
2872
2873 main_cleanup ();
2874
2875 if (--option_profile_cnt > 0 && ret == EXIT_SUCCESS) { goto go; }
2876
2877 return ret;
2878}
2879
2880static int
2881main_help (void)
2882{
2883 /* Not all options are shown, yet: 0-9, l J T C P F L O E
2884 * Remember to update www/xdelta3-cmdline.html
2885 */
2886
2887 main_version ();
2888 P(RINT "usage: xdelta3 [command/options] [input [output]]\n");
2889 P(RINT "commands are:\n");
2890 P(RINT " encode encodes the input%s\n", XD3_ENCODER ? "" : " [Not compiled]");
2891 P(RINT " decode decodes the input\n");
2892 P(RINT " config prints xdelta3 configuration\n");
2893#if REGRESSION_TEST
2894 P(RINT " test run the builtin tests\n");
2895#endif
2896#if VCDIFF_TOOLS
2897 P(RINT "special commands for VCDIFF inputs:\n");
2898 P(RINT " printhdr print information about the first window\n");
2899 P(RINT " printhdrs print information about all windows\n");
2900 P(RINT " printdelta print information about the entire delta\n");
2901#endif
2902 P(RINT "options are:\n");
2903 P(RINT " -c use stdout instead of default\n");
2904 P(RINT " -d same as decode command\n");
2905 P(RINT " -e same as encode command\n");
2906 P(RINT " -f force overwrite\n");
2907 P(RINT " -n disable checksum (encode/decode)\n");
2908 P(RINT " -D disable external decompression (encode/decode)\n");
2909 P(RINT " -R disable external recompression (decode)\n");
2910 P(RINT " -N disable small string-matching compression\n");
2911 P(RINT " -S [djw|fgk] disable/enable secondary compression\n");
2912 P(RINT " -A [apphead] disable/provide application header\n");
2913 P(RINT " -s source source file to copy from (if any)\n");
2914 P(RINT " -B blksize source file block size\n");
2915 P(RINT " -W winsize input window buffer size\n");
2916 P(RINT " -v be verbose (max 2)\n");
2917 P(RINT " -q be quiet\n");
2918 P(RINT " -h show help\n");
2919 P(RINT " -V show version\n");
2920 P(RINT " -P repeat count (for profiling)\n");
2921
2922 return EXIT_FAILURE;
2923}
diff --git a/xdelta3/xdelta3-python.h b/xdelta3/xdelta3-python.h
new file mode 100755
index 0000000..cfd6095
--- /dev/null
+++ b/xdelta3/xdelta3-python.h
@@ -0,0 +1,86 @@
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2003 and onward. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include "Python.h"
20
21static PyObject *pyxd3_error;
22
23/* spam: xdelta3.main([string,list,...]) */
24PyObject *xdelta3_main_cmdline (PyObject *self, PyObject *args)
25{
26 int ret, i, nargs;
27 char **argv = NULL;
28 int argc = 0;
29 PyObject *result = NULL;
30 PyObject *o;
31
32 if (! PyArg_ParseTuple (args, "O", &o)
33 || ! PyList_Check (o))
34 {
35 goto cleanup;
36 }
37
38 argc = PyList_Size (o);
39 nargs = argc + 2;
40
41 if (! (argv = malloc (sizeof(argv[0]) * nargs)))
42 {
43 PyErr_NoMemory ();
44 goto cleanup;
45 }
46 memset (argv, 0, sizeof(argv[0]) * nargs);
47
48 for (i = 1; i < nargs-1; i += 1)
49 {
50 char *ps;
51 PyObject *s;
52 if ((s = PyList_GetItem (o, i-1)) == NULL) { goto cleanup; }
53 ps = PyString_AsString (s);
54 argv[i] = ps;
55 }
56
57 ret = xd3_main_cmdline (argc+1, argv);
58
59 if (ret == 0)
60 {
61 result = Py_BuildValue ("i", ret);
62 }
63 else
64 {
65 PyErr_SetString (pyxd3_error, "failed :(");
66 }
67 cleanup:
68 if (argv)
69 {
70 free (argv);
71 }
72 return result;
73}
74static PyMethodDef xdelta3_methods[] = {
75 { "main", xdelta3_main_cmdline, METH_VARARGS, "xdelta3 main()" },
76 { NULL, NULL }
77};
78
79DL_EXPORT(void) initxdelta3 (void)
80{
81 PyObject *m, *d;
82 m = Py_InitModule ("xdelta3", xdelta3_methods);
83 d = PyModule_GetDict (m);
84 pyxd3_error = PyErr_NewException ("xdelta3.error", NULL, NULL);
85 PyDict_SetItemString (d, "error", pyxd3_error);
86}
diff --git a/xdelta3/xdelta3-regtest.py b/xdelta3/xdelta3-regtest.py
new file mode 100755
index 0000000..f3313a4
--- /dev/null
+++ b/xdelta3/xdelta3-regtest.py
@@ -0,0 +1,596 @@
1#!/usr/bin/python2.3
2# xdelta 3 - delta compression tools and library
3# Copyright (C) 2003 and onward. Joshua P. MacDonald
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 2 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program; if not, write to the Free Software
17# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19# Under construction.
20
21# TODO: This is really part test, part performance evaluation suite, and
22# really incomplete.
23
24import os, sys, math, re, time, types, array
25import xdelta3
26
27HIST_SIZE = 10 # the number of buckets
28MIN_SIZE = 0
29
30TIME_TOO_SHORT = 0.050
31
32MIN_REPS = 1
33MAX_REPS = 1
34SKIP_TRIALS = 1
35MIN_TRIALS = 3
36MAX_TRIALS = 15
37
38SKIP_TRIALS = 0
39MIN_TRIALS = 1
40MAX_TRIALS = 1
41
42MIN_STDDEV_PCT = 1.5 # stop
43MAX_RUN = 1000 * 1000 * 10
44
45XD3CMD = './xdelta3-64'
46#XD3CMD = './xdelta3'
47
48# kind:
49PYEXT = 1
50FORK = 0
51
52#
53#
54RCSDIR = '/Volumes/LACIE120RAID/orbit_linux/home/jmacd/PRCS/prcs/b'
55
56TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
57
58RUNFILE = os.path.join(TMPDIR, 'run')
59HFILE = os.path.join(TMPDIR, 'hdr')
60DFILE = os.path.join(TMPDIR, 'output')
61RFILE = os.path.join(TMPDIR, 'recon')
62
63HEAD_STATE = 0
64BAR_STATE = 1
65REV_STATE = 2
66DATE_STATE = 3
67
68# rcs output
69RE_TOTREV = re.compile('total revisions: (\\d+)')
70RE_BAR = re.compile('----------------------------')
71RE_REV = re.compile('revision (.+)')
72RE_DATE = re.compile('date: ([^;]+);.*')
73# xdelta output
74RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)')
75RE_EXTCOMP = re.compile('XDELTA ext comp.*')
76
77#
78# exceptions
79class SkipRcsException:
80 def __init__(self,reason):
81 self.reason = reason
82class NotEnoughVersions:
83 def __init__(self):
84 pass
85class CommandError:
86 def __init__(self,cmd,str):
87 if type(cmd) is types.TupleType or \
88 type(cmd) is types.ListType:
89 cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd)
90 print 'command was: ',cmd
91 print 'command failed: ',str
92 print 'have fun debugging'
93#
94# one version
95class RcsVersion:
96 def __init__(self,vstr):
97 self.vstr = vstr
98 def __cmp__(self,other):
99 return cmp(self.date, other.date)
100 def Print(self):
101 print '%s %s' % (self.vstr, self.date)
102#
103# one rcsfile
104class RcsFile:
105
106 def __init__(self, fname):
107 self.fname = fname
108 self.versions = []
109 self.state = HEAD_STATE
110
111 def SetTotRev(self,s):
112 self.totrev = int(s)
113
114 def Rev(self,s):
115 self.rev = RcsVersion(s)
116 if len(self.versions) >= self.totrev:
117 raise SkipRcsException('too many versions (in log messages)')
118 self.versions.append(self.rev)
119
120 def Date(self,s):
121 self.rev.date = s
122
123 def Match(self, line, state, rx, gp, newstate, f):
124 if state == self.state:
125 m = rx.match(line)
126 if m:
127 if f:
128 f(m.group(gp))
129 self.state = newstate
130 return 1
131 return None
132
133 def Sum1Rlog(self):
134 f = os.popen('rlog '+self.fname, "r")
135 l = f.readline()
136 while l:
137 if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev):
138 pass
139 elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None):
140 pass
141 elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev):
142 pass
143 elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date):
144 pass
145 l = f.readline()
146 c = f.close()
147 if c != None:
148 raise c
149 #print '%s versions %d' % (self.fname, len(self.versions))
150 #for v in self.versions:
151 # v.Print()
152
153 def Sum1(self):
154 st = os.stat(self.fname)
155 self.rcssize = st.st_size
156 self.Sum1Rlog()
157 if self.totrev != len(self.versions):
158 raise SkipRcsException('wrong version count')
159 self.versions.sort()
160
161 def Checkout(self,n):
162 v = self.versions[n]
163 out = open(self.Verf(n), "w")
164 cmd = 'co -ko -p%s %s' % (v.vstr, self.fname)
165 total = 0
166 (inf,
167 stream,
168 err) = os.popen3(cmd, "r")
169 inf.close()
170 buf = stream.read()
171 while buf:
172 total = total + len(buf)
173 out.write(buf)
174 buf = stream.read()
175 v.vsize = total
176 estr = ''
177 buf = err.read()
178 while buf:
179 estr = estr + buf
180 buf = err.read()
181 if stream.close():
182 raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr))
183 out.close()
184 err.close()
185
186 def Vdate(self,n):
187 return self.versions[n].date
188
189 def Vstr(self,n):
190 return self.versions[n].vstr
191
192 def Verf(self,n):
193 return os.path.join(TMPDIR, 'input.%d' % n)
194
195 def PairsByDate(self,runnable):
196 if self.totrev < 2:
197 raise NotEnoughVersions()
198 self.Checkout(0)
199 ntrials = []
200 if self.totrev < 2:
201 return vtrials
202 for v in range(0,self.totrev-1):
203 if v > 1:
204 os.remove(self.Verf(v-1))
205 self.Checkout(v+1)
206 if os.stat(self.Verf(v)).st_size < MIN_SIZE or \
207 os.stat(self.Verf(v+1)).st_size < MIN_SIZE:
208 continue
209
210 result = TimeRun(runnable.Runner(self.Verf(v),
211 self.Vstr(v),
212 self.Verf(v+1),
213 self.Vstr(v+1)))
214 print 'testing %s %s: ideal %.3f%%: time %.7f: in %u/%u trials' % \
215 (os.path.basename(self.fname),
216 self.Vstr(v+1),
217 result.r1.ideal,
218 result.time.mean,
219 result.trials,
220 result.reps)
221 ntrials.append(result)
222
223 os.remove(self.Verf(self.totrev-1))
224 os.remove(self.Verf(self.totrev-2))
225 return ntrials
226#
227# This class recursively scans a directory for rcsfiles
228class RcsFinder:
229 def __init__(self):
230 self.subdirs = []
231 self.rcsfiles = []
232 self.others = []
233 self.skipped = []
234
235 def Scan1(self,dir):
236 dents = os.listdir(dir)
237 subdirs = []
238 rcsfiles = []
239 others = []
240 for dent in dents:
241 full = os.path.join(dir, dent)
242 if os.path.isdir(full):
243 subdirs.append(full)
244 elif dent[len(dent)-2:] == ",v":
245 rcsfiles.append(RcsFile(full))
246 else:
247 others.append(full)
248 self.subdirs = self.subdirs + subdirs
249 self.rcsfiles = self.rcsfiles + rcsfiles
250 self.others = self.others + others
251 return subdirs
252
253 def Crawl(self, dir):
254 subdirs = [dir]
255 while subdirs:
256 s1 = self.Scan1(subdirs[0])
257 subdirs = subdirs[1:] + s1
258
259 def Summarize(self):
260 good = []
261 for rf in self.rcsfiles:
262 try:
263 rf.Sum1()
264 if rf.totrev < 2:
265 raise SkipRcsException('too few versions (< 2)')
266 except SkipRcsException, e:
267 #print 'skipping file %s: %s' % (rf.fname, e.reason)
268 self.skipped.append(rf)
269 else:
270 good.append(rf)
271 self.rcsfiles = good
272
273 def PairsByDate(self,runnable):
274 allvtrials = []
275 good = []
276 for rf in self.rcsfiles:
277 print 'testing %s on %s with %d versions' % (runnable.type, rf.fname, rf.totrev)
278 try:
279 allvtrials.append(rf.PairsByDate(runnable))
280 except SkipRcsException:
281 print 'file %s has compressed versions: skipping' % (rf.fname)
282 except NotEnoughVersions:
283 print 'testing %s on %s: not enough versions' % (runnable.type, rf.fname)
284 else:
285 good.append(rf)
286 self.rcsfiles = good
287 return allvtrials
288#
289#
290class Bucks:
291 def __init__(self,low,high):
292 self.low = low
293 self.high = high
294 self.spread = high - low
295 self.bucks = []
296 for i in range(0,HIST_SIZE):
297 self.bucks.append([low+(self.spread * (i+0.0) / float(HIST_SIZE)),
298 low+(self.spread * (i+0.5) / float(HIST_SIZE)),
299 low+(self.spread * (i+1.0) / float(HIST_SIZE)),
300 0])
301 def Add(self, x):
302 assert(x>=self.low)
303 assert(x<self.high)
304 t = self.bucks[int((x-self.low)/float(self.spread)*HIST_SIZE)]
305 t[3] = t[3] + 1
306 def Print(self, f):
307 for i in self.bucks:
308 # gnuplot -persist "plot %s using 2:4
309 f.write("%.1f %.1f %.1f %d\n" % (i[0],i[1],i[2],i[3]))
310#
311#
312class TimeRun:
313 def __init__(self,runnable,set_reps=1,reps=MIN_REPS,max_reps=MAX_REPS,\
314 skip_trials=SKIP_TRIALS,min_trials=MIN_TRIALS,max_trials=MAX_TRIALS, \
315 min_stddev_pct=MIN_STDDEV_PCT):
316
317 min_trials = min(min_trials,max_trials)
318 self.trials = 0
319 self.measured = []
320 self.r1 = None
321 self.reps = reps
322 while 1:
323 try:
324 os.remove(DFILE)
325 os.remove(RFILE)
326 except OSError:
327 pass
328
329 start_time = time.time()
330 start_clock = time.clock()
331
332 result = runnable.Run(self.trials, self.reps)
333
334 if self.r1 == None:
335 self.r1 = result
336
337 total_clock = (time.clock() - start_clock)
338 total_time = (time.time() - start_time)
339
340 elap_time = max((total_time) / self.reps, 0.000001)
341 elap_clock = max((total_clock) / self.reps, 0.000001)
342
343 #print 'trial: %d' % self.trials
344 if set_reps and runnable.canrep and total_time < TIME_TOO_SHORT and self.reps < max_reps:
345 self.reps = max(self.reps+1,int(self.reps * TIME_TOO_SHORT / total_time))
346 self.reps = min(self.reps,max_reps)
347 #print 'continue: need more reps: %d' % self.reps
348 continue
349
350 self.trials = self.trials + 1
351
352 # skip some of the first trials
353 if self.trials > skip_trials:
354 self.measured.append((elap_clock,elap_time))
355 #print 'measurement total: %.1f ms' % (total_time * 1000.0)
356
357 # at least so many
358 if self.trials < (skip_trials + min_trials):
359 #print 'continue: need more trials: %d' % self.trials
360 continue
361
362 # compute %variance
363 done = 0
364 if skip_trials + min_trials <= 2:
365 done = 1
366 self.measured = self.measured + self.measured;
367
368 self.time = StatList([x[1] for x in self.measured], 'elap time')
369 sp = float(self.time.s) / float(self.time.mean)
370
371 # what if MAX_TRIALS is exceeded?
372 too_many = (self.trials-skip_trials) >= max_trials
373 good = (100.0 * sp) < min_stddev_pct
374 if done or too_many or good:
375 self.trials = self.trials - skip_trials
376 if not done and not good:
377 #print 'too many trials: %d' % self.trials
378 pass
379 self.clock = StatList([x[0] for x in self.measured], 'elap clock')
380 return
381#
382#
383#
384def SumList(l):
385 return reduce(lambda x,y: x+y, l)
386#
387# returns (total, mean, stddev, q2 (median),
388# (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
389class StatList:
390 def __init__(self,l,desc,hist=0):
391 cnt = len(l)
392 assert(cnt > 1)
393 l.sort()
394 self.cnt = cnt
395 self.l = l
396 self.total = SumList(l)
397 self.mean = self.total / float(self.cnt)
398 self.s = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1))
399 self.q0 = l[0]
400 self.q1 = l[int(self.cnt/4.0+0.5)]
401 self.q2 = l[int(self.cnt/2.0+0.5)]
402 self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
403 self.q4 = l[self.cnt-1]+1
404 self.hf = "./%s.hist" % desc
405 self.siqr = (self.q3-self.q1)/2.0;
406 self.spread = (self.q4-self.q0)
407 self.str = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \
408 (desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread)
409 if hist:
410 f = open(self.hf, "w")
411 self.bucks = Bucks(self.q0,self.q4)
412 for i in l:
413 self.bucks.Add(i)
414 self.bucks.Print(f)
415 f.close()
416
417def RunCommand(args):
418 #print "run command", args
419 p = os.spawnvp(os.P_WAIT, args[0], args)
420 if p != 0:
421 raise CommandError(args, 'exited %d' % p)
422
423def RunCommandIO(args,infn,outfn):
424 #print "run command io", args
425 p = os.fork()
426 if p == 0:
427 os.dup2(os.open(infn,os.O_RDONLY),0)
428 os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1)
429 os.execvp(args[0], args)
430 else:
431 s = os.waitpid(p,0)
432 o = os.WEXITSTATUS(s[1])
433 if not os.WIFEXITED(s[1]) or o != 0:
434 raise CommandError(args, 'exited %d' % o)
435
436def RunXdelta3(args,kind=FORK):
437 if 0: # kind == FORK:
438 RunCommand([XD3CMD] + args)
439 else:
440 try:
441 xdelta3.main(args)
442 except Exception, e:
443 raise CommandError(args, "xdelta3.main exception")
444
445class GzipInfo:
446 def __init__(self,target,delta):
447 self.tgtsize = os.stat(target).st_size
448 self.dsize = os.stat(delta).st_size
449
450class Xdelta3Info:
451 def __init__(self,target,delta):
452 RunXdelta3(['printhdr',
453 '-f',
454 delta,
455 HFILE])
456 o = open(HFILE, "r")
457 l = o.readline()
458 self.extcomp = 0
459 self.hdrsize = 0
460 self.tgtsize = os.stat(target).st_size
461 self.dsize = os.stat(delta).st_size
462 if self.tgtsize > 0:
463 self.ideal = 100.0 * self.dsize / self.tgtsize;
464 else:
465 self.ideal = 0.0
466 while l:
467 #print l.strip()
468 m = RE_HDRSZ.match(l)
469 if m:
470 self.hdrsize = int(m.group(1))
471 m = RE_EXTCOMP.match(l)
472 if m:
473 #print 'EXTCOMP', m.group(0)
474 self.extcomp = 1
475 l = o.readline()
476 if self.hdrsize == 0:
477 raise CommandError(cmd, 'no hdrsize')
478 o.close()
479
480class Xdelta3Pair:
481 def __init__(self):
482 self.type = 'xdelta3'
483 self.decode_args = '-dqf'
484 self.encode_args = '-eqf'
485 self.presrc = '-s'
486 self.canrep = 1
487
488 def Runner(self,old,oldv,new,newv):
489 self.old = old
490 self.oldv = oldv
491 self.new = new
492 self.newv = newv
493 return self
494
495 def Run(self,trial,reps):
496 RunXdelta3(['-P',
497 '%d' % reps,
498 self.encode_args,
499 self.presrc,
500 self.old,
501 self.new,
502 DFILE])
503 if trial > 0:
504 return None
505 self.dinfo = Xdelta3Info(self.new,DFILE)
506 if self.dinfo.extcomp:
507 raise SkipRcsException('ext comp')
508 RunXdelta3([self.decode_args,
509 self.presrc,
510 self.old,
511 DFILE,
512 RFILE])
513 RunCommand(('cmp',
514 self.new,
515 RFILE))
516 return self.dinfo
517
518def Test():
519 rcsf = RcsFinder()
520 rcsf.Crawl(RCSDIR)
521 if len(rcsf.rcsfiles) == 0:
522 sys.exit(1)
523 rcsf.Summarize()
524 print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (len(rcsf.rcsfiles),
525 len(rcsf.subdirs),
526 len(rcsf.others),
527 len(rcsf.skipped))
528 print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize", 1).str
529 print StatList([x.totrev for x in rcsf.rcsfiles], "totrev", 1).str
530 pairs = rcsf.PairsByDate(Xdelta3Pair())
531
532def Decimals(max):
533 l = [0]
534 step = 1
535 while 1:
536 r = range(step, step * 10, step)
537 l = l + r
538 if step * 10 >= max:
539 l.append(step * 10)
540 break
541 step = step * 10
542 return l
543
544class Xdelta3Run1:
545 def __init__(self,file,kind,reps=0):
546 self.file = file
547 self.reps = reps
548 self.canrep = 1
549 self.kind = kind
550 def Run(self,trial,reps):
551 if self.reps:
552 assert(reps == 1)
553 reps = self.reps
554 RunXdelta3(['-P', '%d' % reps, '-efq', self.file, DFILE],kind=self.kind)
555 if trial > 0:
556 return None
557 return Xdelta3Info(self.file,DFILE)
558
559class GzipRun1:
560 def __init__(self,file):
561 self.file = file
562 self.canrep = 0
563 def Run(self,trial,reps):
564 assert(reps == 1)
565 RunCommandIO(['gzip', '-cf'], self.file, DFILE)
566 if trial > 0:
567 return None
568 return GzipInfo(self.file,DFILE)
569
570def SetFileSize(F,L):
571 fd = os.open(F, os.O_CREAT | os.O_WRONLY)
572 os.ftruncate(fd,L)
573 assert(os.fstat(fd).st_size == L)
574 os.close(fd)
575
576def ReportSpeed(L,tr,desc):
577 print '%s 0-run length %u: dsize %u: time %.3f ms: encode %.0f B/sec: in %ux%u trials' % \
578 (desc, L, tr.r1.dsize, tr.time.mean * 1000.0, ((L+tr.r1.dsize) / tr.time.mean), tr.trials, tr.reps)
579
580def RunSpeed():
581 for L in Decimals(MAX_RUN):
582 SetFileSize(RUNFILE, L)
583 trx = TimeRun(Xdelta3Run1(RUNFILE,kind=PYEXT))
584 ReportSpeed(L,trx,'xdelta3')
585 trg = TimeRun(GzipRun1(RUNFILE))
586 ReportSpeed(L,trg,'gzip ')
587
588if __name__ == "__main__":
589 try:
590 os.mkdir(TMPDIR)
591 Test()
592 RunSpeed()
593 except CommandError:
594 pass
595 else:
596 RunCommand(['rm', '-rf', TMPDIR])
diff --git a/xdelta3/xdelta3-second.h b/xdelta3/xdelta3-second.h
new file mode 100755
index 0000000..89287f0
--- /dev/null
+++ b/xdelta3/xdelta3-second.h
@@ -0,0 +1,363 @@
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2002 and onward. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef _XDELTA3_SECOND_H_
20#define _XDELTA3_SECOND_H_
21
22/******************************************************************************************
23 Secondary compression
24 ******************************************************************************************/
25
26#define xd3_sec_data(s) ((s)->sec_stream_d)
27#define xd3_sec_inst(s) ((s)->sec_stream_i)
28#define xd3_sec_addr(s) ((s)->sec_stream_a)
29
30struct _xd3_sec_type
31{
32 int id;
33 const char *name;
34 xd3_secondary_flags flags;
35
36 /* xd3_sec_stream is opaque to the generic code */
37 xd3_sec_stream* (*alloc) (xd3_stream *stream);
38 void (*destroy) (xd3_stream *stream,
39 xd3_sec_stream *sec);
40 void (*init) (xd3_sec_stream *sec);
41 int (*decode) (xd3_stream *stream,
42 xd3_sec_stream *sec_stream,
43 const uint8_t **input,
44 const uint8_t *input_end,
45 uint8_t **output,
46 const uint8_t *output_end);
47#if XD3_ENCODER
48 int (*encode) (xd3_stream *stream,
49 xd3_sec_stream *sec_stream,
50 xd3_output *input,
51 xd3_output *output,
52 xd3_sec_cfg *cfg);
53#endif
54};
55
56#define BIT_STATE_ENCODE_INIT { 0, 1 }
57#define BIT_STATE_DECODE_INIT { 0, 0x100 }
58
59typedef struct _bit_state bit_state;
60struct _bit_state
61{
62 usize_t cur_byte;
63 usize_t cur_mask;
64};
65
66static INLINE void xd3_bit_state_encode_init (bit_state *bits)
67{
68 bits->cur_byte = 0;
69 bits->cur_mask = 1;
70}
71
72static INLINE int xd3_decode_bits (xd3_stream *stream,
73 bit_state *bits,
74 const uint8_t **input,
75 const uint8_t *input_max,
76 usize_t nbits,
77 usize_t *valuep)
78{
79 usize_t value = 0;
80 usize_t vmask = 1 << nbits;
81
82 if (bits->cur_mask == 0x100) { goto next_byte; }
83
84 for (;;)
85 {
86 do
87 {
88 vmask >>= 1;
89
90 if (bits->cur_byte & bits->cur_mask)
91 {
92 value |= vmask;
93 }
94
95 IF_DEBUG1 (P(RINT "[dbits] %u", (bits->cur_byte & bits->cur_mask) && 1));
96
97 bits->cur_mask <<= 1;
98
99 if (vmask == 1) { goto done; }
100 }
101 while (bits->cur_mask != 0x100);
102
103 next_byte:
104
105 if (*input == input_max)
106 {
107 stream->msg = "secondary decoder end of input";
108 return EINVAL;
109 }
110
111 bits->cur_byte = *(*input)++;
112 bits->cur_mask = 1;
113 }
114
115 done:
116
117 (*valuep) = value;
118 return 0;
119}
120
121static INLINE int xd3_decode_bit (xd3_stream *stream,
122 bit_state *bits,
123 const uint8_t **input,
124 const uint8_t *input_max,
125 usize_t *valuep)
126{
127 if (bits->cur_mask == 0x100)
128 {
129 if (*input == input_max)
130 {
131 stream->msg = "secondary decoder end of input";
132 return EINVAL;
133 }
134
135 bits->cur_byte = *(*input)++;
136 bits->cur_mask = 1;
137 }
138
139 *valuep = (bits->cur_byte & bits->cur_mask) && 1;
140
141 IF_DEBUG1 (P(RINT "[dbit] %u", (bits->cur_byte & bits->cur_mask) && 1));
142
143 bits->cur_mask <<= 1;
144
145 return 0;
146}
147
148#if REGRESSION_TEST
149/* There may be extra bits at the end of secondary decompression, this macro checks for
150 * non-zero bits. This is overly strict, but helps pass the single-bit-error regression
151 * test. */
152static int
153xd3_test_clean_bits (xd3_stream *stream, bit_state *bits)
154{
155 for (; bits->cur_mask != 0x100; bits->cur_mask <<= 1)
156 {
157 if (bits->cur_byte & bits->cur_mask)
158 {
159 stream->msg = "secondary decoder garbage";
160 return EINVAL;
161 }
162 }
163
164 return 0;
165}
166#endif
167
168static xd3_sec_stream*
169xd3_get_secondary (xd3_stream *stream, xd3_sec_stream **sec_streamp)
170{
171 xd3_sec_stream *sec_stream;
172
173 if ((sec_stream = *sec_streamp) == NULL)
174 {
175 if ((*sec_streamp = stream->sec_type->alloc (stream)) == NULL)
176 {
177 return NULL;
178 }
179
180 sec_stream = *sec_streamp;
181
182 /* If cuumulative stats, init once. */
183 stream->sec_type->init (sec_stream);
184 }
185
186 return sec_stream;
187}
188
189static int
190xd3_decode_secondary (xd3_stream *stream,
191 xd3_desect *sect,
192 xd3_sec_stream **sec_streamp)
193{
194 xd3_sec_stream *sec_stream;
195 uint32_t dec_size;
196 uint8_t *out_used;
197 int ret;
198
199 if ((sec_stream = xd3_get_secondary (stream, sec_streamp)) == NULL) { return ENOMEM; }
200
201 /* Decode the size, allocate the buffer. */
202 if ((ret = xd3_read_size (stream, & sect->buf, sect->buf_max, & dec_size)) ||
203 (ret = xd3_decode_allocate (stream, dec_size, & sect->copied2, & sect->alloc2, NULL, NULL)))
204 {
205 return ret;
206 }
207
208 out_used = sect->copied2;
209
210 if ((ret = stream->sec_type->decode (stream, sec_stream,
211 & sect->buf, sect->buf_max,
212 & out_used, out_used + dec_size))) { return ret; }
213
214 if (sect->buf != sect->buf_max)
215 {
216 stream->msg = "secondary decoder finished with unused input";
217 return EINVAL;
218 }
219
220 if (out_used != sect->copied2 + dec_size)
221 {
222 stream->msg = "secondary decoder short output";
223 return EINVAL;
224 }
225
226 sect->buf = sect->copied2;
227 sect->buf_max = sect->copied2 + dec_size;
228
229 return 0;
230}
231
232#if XD3_ENCODER
233/* OPT: Should these be inline? */
234static INLINE int xd3_encode_bit (xd3_stream *stream,
235 xd3_output **output,
236 bit_state *bits,
237 int bit)
238{
239 int ret;
240
241 if (bit)
242 {
243 bits->cur_byte |= bits->cur_mask;
244 }
245
246 IF_DEBUG1 (P(RINT "[ebit] %u", bit && 1));
247
248 /* OPT: Might help to buffer more than 8 bits at once. */
249 if (bits->cur_mask == 0x80)
250 {
251 if ((ret = xd3_emit_byte (stream, output, bits->cur_byte)) != 0) { return ret; }
252
253 bits->cur_mask = 1;
254 bits->cur_byte = 0;
255 }
256 else
257 {
258 bits->cur_mask <<= 1;
259 }
260
261 return 0;
262}
263
264static INLINE int xd3_flush_bits (xd3_stream *stream,
265 xd3_output **output,
266 bit_state *bits)
267{
268 return (bits->cur_mask == 1) ? 0 : xd3_emit_byte (stream, output, bits->cur_byte);
269}
270
271static INLINE int xd3_encode_bits (xd3_stream *stream,
272 xd3_output **output,
273 bit_state *bits,
274 usize_t nbits,
275 usize_t value)
276{
277 int ret;
278 usize_t mask = 1 << nbits;
279
280 XD3_ASSERT (nbits > 0);
281 XD3_ASSERT (nbits < sizeof (usize_t) * 8);
282 XD3_ASSERT (value < mask);
283
284 do
285 {
286 mask >>= 1;
287
288 if ((ret = xd3_encode_bit (stream, output, bits, value & mask))) { return ret; }
289 }
290 while (mask != 1);
291
292 return 0;
293}
294
295static int
296xd3_encode_secondary (xd3_stream *stream,
297 xd3_output **head,
298 xd3_output **tail,
299 xd3_sec_stream **sec_streamp,
300 xd3_sec_cfg *cfg,
301 int *did_it)
302{
303 xd3_sec_stream *sec_stream;
304 xd3_output *tmp_head;
305 xd3_output *tmp_tail;
306
307 usize_t comp_size;
308 usize_t orig_size;
309
310 int ret;
311
312 orig_size = xd3_sizeof_output (*head);
313
314 if (orig_size < SECONDARY_MIN_INPUT) { return 0; }
315
316 if ((sec_stream = xd3_get_secondary (stream, sec_streamp)) == NULL) { return ENOMEM; }
317
318 tmp_head = xd3_alloc_output (stream, NULL);
319
320 /* Encode the size, encode the data. @@ Encoding the size makes it simpler, but is a
321 * little gross. Should not need the entire section in contiguous memory, but it is
322 * much easier this way. */
323 if ((ret = xd3_emit_size (stream, & tmp_head, orig_size)) ||
324 (ret = stream->sec_type->encode (stream, sec_stream, *head, tmp_head, cfg))) { goto getout; }
325
326 /* If the secondary compressor determines its no good, it returns XD3_NOSECOND. */
327
328 /* Setup tmp_tail, comp_size */
329 tmp_tail = tmp_head;
330 comp_size = tmp_head->next;
331
332 while (tmp_tail->next_page != NULL)
333 {
334 tmp_tail = tmp_tail->next_page;
335 comp_size += tmp_tail->next;
336 }
337
338 XD3_ASSERT (comp_size == xd3_sizeof_output (tmp_head));
339 XD3_ASSERT (tmp_tail != NULL);
340
341 if (comp_size < (orig_size - SECONDARY_MIN_SAVINGS))
342 {
343 IF_DEBUG1(P(RINT "secondary saved %u bytes: %u -> %u (%0.2f%%)\n",
344 orig_size - comp_size, orig_size, comp_size,
345 (double) comp_size / (double) orig_size));
346
347 xd3_free_output (stream, *head);
348
349 *head = tmp_head;
350 *tail = tmp_tail;
351 *did_it = 1;
352 }
353 else
354 {
355 getout:
356 if (ret == XD3_NOSECOND) { ret = 0; }
357 xd3_free_output (stream, tmp_head);
358 }
359
360 return ret;
361}
362#endif /* XD3_ENCODER */
363#endif /* _XDELTA3_SECOND_H_ */
diff --git a/xdelta3/xdelta3-test.h b/xdelta3/xdelta3-test.h
new file mode 100755
index 0000000..198d440
--- /dev/null
+++ b/xdelta3/xdelta3-test.h
@@ -0,0 +1,2229 @@
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2001, 2003, 2004, 2005, 2006. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include <math.h>
20#include <sys/wait.h>
21
22#define MSG_IS(x) (stream->msg != NULL && strcmp ((x), stream->msg) == 0)
23
24static const usize_t TWO_MEGS_AND_DELTA = (2 << 20) + (1 << 10);
25static const usize_t ADDR_CACHE_ROUNDS = 10000;
26
27static const usize_t TEST_FILE_MEAN = 16384;
28static const double TEST_ADD_MEAN = 16;
29static const double TEST_ADD_MAX = 256;
30static const double TEST_ADD_RATIO = 0.1;
31static const double TEST_EPSILON = 0.5;
32
33static char TEST_TARGET_FILE[32];
34static char TEST_SOURCE_FILE[32];
35static char TEST_DELTA_FILE[32];
36static char TEST_RECON_FILE[32];
37static char TEST_RECON2_FILE[32];
38static char TEST_COPY_FILE[32];
39
40static int TESTBUFSIZE = 1024 * 16;
41
42static int test_exponential_dist (usize_t mean, usize_t max);
43
44/* TODO
45 *
46 * 1. Test state changes: that config is called before open, open called before
47 * encode/decode..., close incomplete works, invalid options, consume_output always
48 * called, no mixing of encode/decode, etc.
49 *
50 * 2. Test window selection, window alignment, 1.5 pass alg vs. greedy
51 */
52
53/******************************************************************************************
54 TEST HELPERS
55 ******************************************************************************************/
56
57static void DOT (void) { P(RINT "."); }
58static int do_cmd (xd3_stream *stream, const char *buf)
59{
60 int ret;
61 if ((ret = system (buf)) != 0)
62 {
63 if (WIFEXITED (ret))
64 {
65 stream->msg = "command exited non-zero";
66 }
67 else
68 {
69 stream->msg = "abnormal command termination";
70 }
71 return EINVAL;
72 }
73 DOT ();
74 return 0;
75}
76static int do_fail (xd3_stream *stream, const char *buf)
77{
78 int ret;
79 ret = system (buf);
80 if (! WIFEXITED (ret) || WEXITSTATUS (ret) != 1)
81 {
82 stream->msg = "command should have not succeeded";
83 P(RINT "command was %s", buf);
84 return EINVAL;
85 }
86 DOT ();
87 return 0;
88}
89
90static int
91test_exponential_dist (usize_t mean, usize_t max)
92{
93 double mean_d = mean;
94 double erand = log (1.0 / drand48 ());
95 usize_t x = (usize_t) (mean_d * erand + 0.5);
96
97 return min (x, max);
98}
99
100/* Test that the exponential distribution actually produces its mean. */
101static int
102test_random_numbers (xd3_stream *stream, int ignore)
103{
104 int i;
105 usize_t sum = 0;
106 usize_t mean = 50;
107 usize_t n_rounds = 10000;
108 double average, error;
109 double allowed_error = 1.0;
110
111 for (i = 0; i < n_rounds; i += 1)
112 {
113 sum += test_exponential_dist (mean, USIZE_T_MAX);
114 }
115
116 average = (double) sum / (double) n_rounds;
117 error = average - (double) mean;
118
119 if (error < allowed_error && error > -allowed_error)
120 {
121 /*P(RINT "error is %f\n", error);*/
122 return 0;
123 }
124
125 stream->msg = "random distribution looks broken";
126 return EINVAL;
127}
128
129static int
130test_setup (void)
131{
132 static int x = 0;
133 x++;
134 //P(RINT "test setup: %d", x);
135 sprintf (TEST_TARGET_FILE, "/tmp/xdtest.target.%d", x);
136 sprintf (TEST_SOURCE_FILE, "/tmp/xdtest.source.%d", x);
137 sprintf (TEST_DELTA_FILE, "/tmp/xdtest.delta.%d", x);
138 sprintf (TEST_RECON_FILE, "/tmp/xdtest.recon.%d", x);
139 sprintf (TEST_RECON2_FILE, "/tmp/xdtest.recon2.%d", x);
140 sprintf (TEST_COPY_FILE, "/tmp/xdtest.copy.%d", x);
141 return 0;
142}
143
144static void
145test_unlink (char* file)
146{
147 while (unlink (file) != 0)
148 {
149 if (errno == ENOENT)
150 {
151 break;
152 }
153 char buf[TESTBUFSIZE];
154 sprintf (buf, "rm -f %s", file);
155 system (buf);
156 }
157}
158
159static void
160test_cleanup (void)
161{
162 static int x = 0;
163 x++;
164 //P(RINT "test cleanup: %d", x);
165 test_unlink (TEST_TARGET_FILE);
166 test_unlink (TEST_SOURCE_FILE);
167 test_unlink (TEST_DELTA_FILE);
168 test_unlink (TEST_RECON_FILE);
169 test_unlink (TEST_RECON2_FILE);
170 test_unlink (TEST_COPY_FILE);
171}
172
173static int
174test_make_inputs (xd3_stream *stream, xoff_t *ss_out, xoff_t *ts_out)
175{
176 usize_t ts = (lrand48 () % TEST_FILE_MEAN) + TEST_FILE_MEAN;
177 usize_t ss = (lrand48 () % TEST_FILE_MEAN) + TEST_FILE_MEAN;
178 uint8_t *buf = malloc (ts + ss), *sbuf = buf /*, *tbuf = buf + ss*/;
179 usize_t sadd = 0, sadd_max = ss * TEST_ADD_RATIO;
180 FILE *tf /*, *sf*/;
181 usize_t i, j;
182 int ret;
183
184 if (buf == NULL) { return ENOMEM; }
185
186 if ((tf = fopen (TEST_TARGET_FILE, "w")) == NULL)
187 {
188 stream->msg = "write failed";
189 ret = get_errno ();
190 goto failure;
191 }
192
193 /* Then modify the data to produce copies, everything not copied is an add. The
194 * following logic produces the TEST_ADD_RATIO. The variable SADD contains the number
195 * of adds so far, which should not exceed SADD_MAX. */
196 for (i = 0; i < ss; )
197 {
198 usize_t left = ss - i;
199 usize_t next = test_exponential_dist (TEST_ADD_MEAN, TEST_ADD_MAX);
200 usize_t add_left = sadd_max - sadd;
201 double add_prob = (left == 0) ? 0 : (add_left / left);
202
203 next = min (left, next);
204
205 if (i > 0 && (next > add_left || drand48 () >= add_prob))
206 {
207 /* Copy */
208 usize_t offset = lrand48 () % i;
209
210 for (j = 0; j < next; j += 1)
211 {
212 sbuf[i++] = sbuf[offset + j];
213 }
214 }
215 else
216 {
217 /* Add */
218 for (j = 0; j < next; j += 1)
219 {
220 sbuf[i++] = lrand48 ();
221 }
222 }
223 }
224
225 if ((fwrite (sbuf, 1, ss, tf) != ss))
226 {
227 stream->msg = "write failed";
228 ret = get_errno ();
229 goto failure;
230 }
231
232 if ((ret = fclose (tf)) /* || (ret = fclose (sf))*/)
233 {
234 stream->msg = "close failed";
235 ret = get_errno ();
236 goto failure;
237 }
238
239 if (ts_out) { (*ts_out) = ts; }
240 if (ss_out) { (*ss_out) = ss; }
241
242 failure:
243 free (buf);
244 return ret;
245}
246
247static int
248compare_files (xd3_stream *stream, const char* tgt, const char *rec)
249{
250 FILE *orig, *recons;
251 uint8_t obuf[TESTBUFSIZE], rbuf[TESTBUFSIZE];
252 int offset = 0;
253 int i;
254 int oc, rc;
255
256 if ((orig = fopen (tgt, "r")) == NULL ||
257 (recons = fopen (rec, "r")) == NULL)
258 {
259 stream->msg = "read failed";
260 return get_errno ();
261 }
262
263 for (;;)
264 {
265 oc = fread (obuf, 1, TESTBUFSIZE, orig);
266 rc = fread (rbuf, 1, TESTBUFSIZE, recons);
267
268 if (oc < 0 || rc < 0)
269 {
270 stream->msg = "read failed";
271 return get_errno ();
272 }
273
274 if (oc != rc)
275 {
276 stream->msg = "compare files: different length";
277 return EINVAL;
278 }
279
280 if (oc == 0)
281 {
282 break;
283 }
284
285 for (i = 0; i < oc; i += 1)
286 {
287 if (obuf[i] != rbuf[i])
288 {
289 stream->msg = "compare files: different values";
290 return EINVAL;
291 }
292 }
293
294 offset += oc;
295 }
296
297 fclose (orig);
298 fclose (recons);
299 return 0;
300}
301
302static int
303test_save_copy (const char *origname)
304{
305 char buf[TESTBUFSIZE];
306 int ret;
307
308 sprintf (buf, "cp -f %s %s", origname, TEST_COPY_FILE);
309
310 if ((ret = system (buf)) != 0)
311 {
312 return EINVAL;
313 }
314
315 return 0;
316}
317
318static int
319test_file_size (const char* file, xoff_t *size)
320{
321 struct stat sbuf;
322 int ret;
323
324 if (stat (file, & sbuf) < 0)
325 {
326 ret = get_errno ();
327 P(RINT "xdelta3: stat failed: %s: %s\n", file, strerror (ret));
328 return ret;
329 }
330
331 if (! S_ISREG (sbuf.st_mode))
332 {
333 ret = EINVAL;
334 P(RINT "xdelta3: not a regular file: %s: %s\n", file, strerror (ret));
335 return ret;
336 }
337
338 (*size) = sbuf.st_size;
339 return 0;
340}
341
342/******************************************************************************************
343 READ OFFSET
344 ******************************************************************************************/
345
346/* Common test for read_integer errors: encodes a 64-bit value and then attempts to read
347 * as a 32-bit value. If TRUNC is non-zero, attempts to get errors by shortening the
348 * input, otherwise it should overflow. Expects EINVAL and MSG. */
349static int
350test_read_integer_error (xd3_stream *stream, int trunto, const char *msg)
351{
352 uint64_t eval = (uint64_t) UINT32_MAX + 1ULL;
353 uint32_t rval;
354 xd3_output *buf = NULL;
355 const uint8_t *max;
356 const uint8_t *inp;
357 int ret;
358
359 buf = xd3_alloc_output (stream, buf);
360
361 if ((ret = xd3_emit_uint64_t (stream, & buf, eval)))
362 {
363 goto fail;
364 }
365
366 again:
367
368 inp = buf->base;
369 max = buf->base + buf->next - trunto;
370
371 if ((ret = xd3_read_uint32_t (stream, & inp, max, & rval)) != EINVAL || !MSG_IS (msg))
372 {
373 ret = EINVAL;
374 }
375 else if (trunto && trunto < buf->next)
376 {
377 trunto += 1;
378 goto again;
379 }
380 else
381 {
382 ret = 0;
383 }
384
385 fail:
386 xd3_free_output (stream, buf);
387 return ret;
388}
389
390/* Test integer overflow using the above routine. */
391static int
392test_decode_integer_overflow (xd3_stream *stream, int unused)
393{
394 return test_read_integer_error (stream, 0, "overflow in read_intger");
395}
396
397/* Test integer EOI using the above routine. */
398static int
399test_decode_integer_end_of_input (xd3_stream *stream, int unused)
400{
401 return test_read_integer_error (stream, 1, "end-of-input in read_integer");
402}
403
404/* Test that emit_integer/decode_integer/sizeof_integer/read_integer work on correct
405 * inputs. Tests powers of (2^7), plus or minus, up to the maximum value. */
406#define TEST_ENCODE_DECODE_INTEGER(TYPE,ONE,MAX) \
407 xd3_output *rbuf = NULL; \
408 xd3_output *dbuf = NULL; \
409 TYPE values[64]; \
410 int nvalues = 0; \
411 int i, ret = 0; \
412 \
413 for (i = 0; i < (sizeof (TYPE) * 8); i += 7) \
414 { \
415 values[nvalues++] = (ONE << i) - ONE; \
416 values[nvalues++] = (ONE << i); \
417 values[nvalues++] = (ONE << i) + ONE; \
418 } \
419 \
420 values[nvalues++] = MAX-ONE; \
421 values[nvalues++] = MAX; \
422 \
423 rbuf = xd3_alloc_output (stream, rbuf); \
424 dbuf = xd3_alloc_output (stream, dbuf); \
425 \
426 for (i = 0; i < nvalues; i += 1) \
427 { \
428 const uint8_t *max; \
429 const uint8_t *inp; \
430 TYPE val; \
431 \
432 DOT (); \
433 rbuf->next = 0; \
434 \
435 if ((ret = xd3_emit_ ## TYPE (stream, & rbuf, values[i])) || \
436 (ret = xd3_emit_ ## TYPE (stream, & dbuf, values[i]))) \
437 { \
438 goto fail; \
439 } \
440 \
441 inp = rbuf->base; \
442 max = rbuf->base + rbuf->next; \
443 \
444 if (rbuf->next != xd3_sizeof_ ## TYPE (values[i])) \
445 { \
446 ret = EINVAL; \
447 goto fail; \
448 } \
449 \
450 if ((ret = xd3_read_ ## TYPE (stream, & inp, max, & val))) \
451 { \
452 goto fail; \
453 } \
454 \
455 if (val != values[i]) \
456 { \
457 ret = EINVAL; \
458 goto fail; \
459 } \
460 \
461 DOT (); \
462 } \
463 \
464 stream->next_in = dbuf->base; \
465 stream->avail_in = dbuf->next; \
466 \
467 for (i = 0; i < nvalues; i += 1) \
468 { \
469 TYPE val; \
470 \
471 if ((ret = xd3_decode_ ## TYPE (stream, & val))) \
472 { \
473 goto fail; \
474 } \
475 \
476 if (val != values[i]) \
477 { \
478 ret = EINVAL; \
479 goto fail; \
480 } \
481 } \
482 \
483 if (stream->avail_in != 0) \
484 { \
485 ret = EINVAL; \
486 goto fail; \
487 } \
488 \
489 fail: \
490 xd3_free_output (stream, rbuf); \
491 xd3_free_output (stream, dbuf); \
492 \
493 return ret
494
495static int
496test_encode_decode_uint32_t (xd3_stream *stream, int unused)
497{
498 TEST_ENCODE_DECODE_INTEGER(uint32_t,1U,UINT32_MAX);
499}
500
501static int
502test_encode_decode_uint64_t (xd3_stream *stream, int unused)
503{
504 TEST_ENCODE_DECODE_INTEGER(uint64_t,1ULL,UINT64_MAX);
505}
506
507static int
508test_usize_t_overflow (xd3_stream *stream, int unused)
509{
510 if (USIZE_T_OVERFLOW (0, 0)) { goto fail; }
511 if (USIZE_T_OVERFLOW (USIZE_T_MAX, 0)) { goto fail; }
512 if (USIZE_T_OVERFLOW (0, USIZE_T_MAX)) { goto fail; }
513 if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, 0)) { goto fail; }
514 if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, USIZE_T_MAX / 2)) { goto fail; }
515 if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, USIZE_T_MAX / 2 + 1)) { goto fail; }
516
517 if (! USIZE_T_OVERFLOW (USIZE_T_MAX, 1)) { goto fail; }
518 if (! USIZE_T_OVERFLOW (1, USIZE_T_MAX)) { goto fail; }
519 if (! USIZE_T_OVERFLOW (USIZE_T_MAX / 2 + 1, USIZE_T_MAX / 2 + 1)) { goto fail; }
520
521 return 0;
522
523 fail:
524 stream->msg = "incorrect overflow computation";
525 return EINVAL;
526}
527
528/******************************************************************************************
529 Address cache
530 ******************************************************************************************/
531
532static int
533test_address_cache (xd3_stream *stream, int unused)
534{
535 int ret, i;
536 usize_t offset;
537 usize_t *addrs;
538 uint8_t *big_buf, *buf_max;
539 const uint8_t *buf;
540 xd3_output *outp;
541 uint8_t *modes;
542 int mode_counts[16];
543
544 stream->acache.s_near = stream->code_table_desc->near_modes;
545 stream->acache.s_same = stream->code_table_desc->same_modes;
546
547 if ((ret = xd3_encode_init (stream))) { return ret; }
548
549 addrs = xd3_alloc (stream, sizeof (usize_t), ADDR_CACHE_ROUNDS);
550 modes = xd3_alloc (stream, sizeof (uint8_t), ADDR_CACHE_ROUNDS);
551
552 memset (mode_counts, 0, sizeof (mode_counts));
553 memset (modes, 0, ADDR_CACHE_ROUNDS);
554
555 addrs[0] = 0;
556
557 srand48 (0x9f73f7fc);
558
559 /* First pass: encode addresses */
560 xd3_init_cache (& stream->acache);
561
562 for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1)
563 {
564 double p;
565 usize_t addr;
566 usize_t prev_i;
567 usize_t nearby;
568
569 p = drand48 ();
570 prev_i = lrand48 () % offset;
571 nearby = (lrand48 () % 256) % offset, 1;
572 nearby = max (1U, nearby);
573
574 if (p < 0.1) { addr = addrs[offset-nearby]; }
575 else if (p < 0.4) { addr = min (addrs[prev_i] + nearby, offset-1); }
576 else { addr = prev_i; }
577
578 if ((ret = xd3_encode_address (stream, addr, offset, & modes[offset]))) { return ret; }
579
580 addrs[offset] = addr;
581 mode_counts[modes[offset]] += 1;
582 }
583
584 /* Copy addresses into a contiguous buffer. */
585 big_buf = xd3_alloc (stream, xd3_sizeof_output (ADDR_HEAD (stream)), 1);
586
587 for (offset = 0, outp = ADDR_HEAD (stream); outp != NULL; offset += outp->next, outp = outp->next_page)
588 {
589 memcpy (big_buf + offset, outp->base, outp->next);
590 }
591
592 buf_max = big_buf + offset;
593 buf = big_buf;
594
595 /* Second pass: decode addresses */
596 xd3_init_cache (& stream->acache);
597
598 for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1)
599 {
600 usize_t addr;
601
602 if ((ret = xd3_decode_address (stream, offset, modes[offset], & buf, buf_max, & addr))) { return ret; }
603
604 if (addr != addrs[offset])
605 {
606 stream->msg = "incorrect decoded address";
607 return EINVAL;
608 }
609 }
610
611 /* Check that every byte, mode was used. */
612 if (buf != buf_max)
613 {
614 stream->msg = "address bytes not used";
615 return EINVAL;
616 }
617
618 for (i = 0; i < (2 + stream->acache.s_same + stream->acache.s_near); i += 1)
619 {
620 if (mode_counts[i] == 0)
621 {
622 stream->msg = "address mode not used";
623 return EINVAL;
624 }
625 }
626
627 xd3_free (stream, modes);
628 xd3_free (stream, addrs);
629 xd3_free (stream, big_buf);
630
631 return 0;
632}
633
634/******************************************************************************************
635 Encode and decode with single bit error
636 ******************************************************************************************/
637
638/* It compresses from 256 to around 185 bytes.
639 * Avoids matching addresses that are a single-bit difference.
640 * Avoids matching address 0. */
641static const uint8_t test_text[] =
642"this is a story\n"
643"abouttttttttttt\n"
644"- his is a stor\n"
645"- about nothing "
646" all. boutique -"
647"his story is a -"
648"about "
649"what happens all"
650" the time what -"
651"am I ttttttt the"
652" person said, so"
653" what, per son -"
654" gory story is -"
655" about nothing -"
656"tttttt to test -"
657"his sto nothing";
658
659static const uint8_t test_apphead[] = "header test";
660
661static int
662test_compress_text (xd3_stream *stream,
663 uint8_t *encoded,
664 usize_t *encoded_size)
665{
666 int ret;
667 xd3_config cfg;
668 int flags = stream->flags;
669
670 stream->flags |= XD3_FLUSH;
671
672 (*encoded_size) = 0;
673
674 xd3_set_appheader (stream, test_apphead, sizeof (test_apphead));
675
676 if ((ret = xd3_encode_completely (stream, test_text, sizeof (test_text),
677 encoded, encoded_size, 4*sizeof (test_text)))) { goto fail; }
678
679 if ((ret = xd3_close_stream (stream))) { goto fail; }
680
681 fail:
682 xd3_free_stream (stream);
683 xd3_init_config (& cfg, flags);
684 xd3_config_stream (stream, & cfg);
685 return ret;
686}
687
688static int
689test_decompress_text (xd3_stream *stream, uint8_t *enc, usize_t enc_size, usize_t test_desize)
690{
691 xd3_config cfg;
692 char decoded[sizeof (test_text)];
693 uint8_t *apphead;
694 usize_t apphead_size;
695 usize_t decoded_size;
696 const char *msg;
697 int ret;
698 usize_t pos = 0;
699 int flags = stream->flags;
700 usize_t take;
701
702 input:
703 /* Test decoding test_desize input bytes at a time */
704 take = min (enc_size - pos, test_desize);
705 XD3_ASSERT (take > 0);
706
707 xd3_avail_input (stream, enc + pos, take);
708 again:
709 ret = xd3_decode_input (stream);
710
711 pos += take;
712 take = 0;
713
714 switch (ret)
715 {
716 case XD3_OUTPUT:
717 break;
718 case XD3_WINSTART:
719 case XD3_GOTHEADER:
720 goto again;
721 case XD3_INPUT:
722 if (pos < enc_size) { goto input; }
723 /* else fallthrough */
724 case XD3_WINFINISH:
725 default:
726 goto fail;
727 }
728
729 XD3_ASSERT (ret == XD3_OUTPUT);
730 XD3_ASSERT (pos == enc_size);
731
732 if (stream->avail_out != sizeof (test_text))
733 {
734 stream->msg = "incorrect output size";
735 ret = EINVAL;
736 goto fail;
737 }
738
739 decoded_size = stream->avail_out;
740 memcpy (decoded, stream->next_out, stream->avail_out);
741
742 xd3_consume_output (stream);
743
744 if ((ret = xd3_get_appheader (stream, & apphead, & apphead_size))) { goto fail; }
745
746 if (apphead_size != sizeof (test_apphead) || memcmp (apphead, test_apphead, sizeof (test_apphead)) != 0)
747 {
748 stream->msg = "incorrect appheader";
749 ret = EINVAL;
750 goto fail;
751 }
752
753 if ((ret = xd3_decode_input (stream)) != XD3_WINFINISH ||
754 (ret = xd3_close_stream (stream)) != 0)
755 {
756 goto fail;
757 }
758
759 if (decoded_size != sizeof (test_text) || memcmp (decoded, test_text, sizeof (test_text)) != 0)
760 {
761 stream->msg = "incorrect output text";
762 ret = EIO;
763 }
764
765 fail:
766 msg = stream->msg;
767 xd3_free_stream (stream);
768 xd3_init_config (& cfg, flags);
769 xd3_config_stream (stream, & cfg);
770 stream->msg = msg;
771
772 return ret;
773}
774
775static int
776test_decompress_single_bit_error (xd3_stream *stream, int expected_non_failures)
777{
778 int ret;
779 int i;
780 uint8_t encoded[4*sizeof (test_text)]; /* make room for alt code table */
781 usize_t encoded_size;
782 int non_failures = 0;
783 int cksum = (stream->flags & XD3_ADLER32) != 0;
784
785#if 1
786#define TEST_FAILURES()
787#else
788 /* For checking non-failure cases by hand, enable this macro and run xdelta printdelta
789 * with print_cpymode enabled. Every non-failure should change a copy address mode,
790 * which doesn't cause a failure because the address cache starts out with all zeros.
791
792 ./xdelta3 test
793 for i in test_text.xz.*; do ./xdelta3 printdelta $i > $i.out; diff $i.out test_text.xz.0.out; done
794
795 */
796 system ("rm -rf test_text.*");
797 {
798 char buf[64];
799 FILE *f;
800 sprintf (buf, "test_text");
801 f = fopen (buf, "w");
802 fwrite (test_text,1,sizeof (test_text),f);
803 fclose (f);
804 }
805#define TEST_FAILURES() \
806 do { \
807 char buf[64]; \
808 FILE *f; \
809 sprintf (buf, "test_text.xz.%d", non_failures); \
810 f = fopen (buf, "w"); \
811 fwrite (encoded,1,encoded_size,f); \
812 fclose (f); \
813 } while (0)
814#endif
815
816 stream->sec_data.inefficient = 1;
817 stream->sec_inst.inefficient = 1;
818 stream->sec_addr.inefficient = 1;
819
820 /* Encode text, test correct input */
821 if ((ret = test_compress_text (stream, encoded, & encoded_size)))
822 {
823 /*stream->msg = "without error: encode failure";*/
824 return ret;
825 }
826 if ((ret = test_decompress_text (stream, encoded, encoded_size, sizeof (test_text) / 4)))
827 {
828 /*stream->msg = "without error: decode failure";*/
829 return ret;
830 }
831
832 TEST_FAILURES();
833
834 for (i = 0; i < encoded_size*8; i += 1)
835 {
836 /* Single bit error. */
837 encoded[i/8] ^= 1 << (i%8);
838
839 if ((ret = test_decompress_text (stream, encoded, encoded_size, sizeof (test_text))) == 0)
840 {
841 non_failures += 1;
842 /*P(RINT "%u[%u] non-failure %u\n", i/8, i%8, non_failures);*/
843 TEST_FAILURES();
844 }
845 else
846 {
847 /*P(RINT "%u[%u] failure: %s\n", i/8, i%8, stream->msg);*/
848 }
849
850 /* decompress_text returns EIO when the final memcmp() fails, but that
851 * should never happen with checksumming on. */
852 if (cksum && ret == EIO)
853 {
854 /*P(RINT "%u[%u] cksum mismatch\n", i/8, i%8);*/
855 stream->msg = "checksum mismatch";
856 return EINVAL;
857 }
858
859 /* Undo single bit error. */
860 encoded[i/8] ^= 1 << (i%8);
861 }
862
863 /* Test correct input again */
864 if ((ret = test_decompress_text (stream, encoded, encoded_size, 1)))
865 {
866 /*stream->msg = "without error: decode failure";*/
867 return ret;
868 }
869
870 /* Check expected non-failures */
871 if (non_failures != expected_non_failures)
872 {
873 P(RINT "non-failures %u; expected %u", non_failures, expected_non_failures);
874 stream->msg = "incorrect";
875 return EINVAL;
876 }
877
878 DOT ();
879
880 return 0;
881}
882
883/******************************************************************************************
884 Secondary compression tests
885 ******************************************************************************************/
886
887#if SECONDARY_ANY
888typedef int (*sec_dist_func) (xd3_stream *stream, xd3_output *data);
889
890static int sec_dist_func1 (xd3_stream *stream, xd3_output *data);
891static int sec_dist_func2 (xd3_stream *stream, xd3_output *data);
892static int sec_dist_func3 (xd3_stream *stream, xd3_output *data);
893static int sec_dist_func4 (xd3_stream *stream, xd3_output *data);
894static int sec_dist_func5 (xd3_stream *stream, xd3_output *data);
895static int sec_dist_func6 (xd3_stream *stream, xd3_output *data);
896static int sec_dist_func7 (xd3_stream *stream, xd3_output *data);
897static int sec_dist_func8 (xd3_stream *stream, xd3_output *data);
898static int sec_dist_func9 (xd3_stream *stream, xd3_output *data);
899
900static sec_dist_func sec_dists[] =
901{
902 sec_dist_func1,
903 sec_dist_func2,
904 sec_dist_func3,
905 sec_dist_func4,
906 sec_dist_func5,
907 sec_dist_func6,
908 sec_dist_func7,
909 sec_dist_func8,
910 sec_dist_func9,
911};
912
913/* Test ditsribution: 100 bytes of the same character (13). */
914static int
915sec_dist_func1 (xd3_stream *stream, xd3_output *data)
916{
917 int i, ret;
918 for (i = 0; i < 100; i += 1)
919 {
920 if ((ret = xd3_emit_byte (stream, & data, 13))) { return ret; }
921 }
922 return 0;
923}
924
925/* Test ditsribution: uniform covering half the alphabet. */
926static int
927sec_dist_func2 (xd3_stream *stream, xd3_output *data)
928{
929 int i, ret;
930 for (i = 0; i < ALPHABET_SIZE; i += 1)
931 {
932 if ((ret = xd3_emit_byte (stream, & data, i%(ALPHABET_SIZE/2)))) { return ret; }
933 }
934 return 0;
935}
936
937/* Test ditsribution: uniform covering the entire alphabet. */
938static int
939sec_dist_func3 (xd3_stream *stream, xd3_output *data)
940{
941 int i, ret;
942 for (i = 0; i < ALPHABET_SIZE; i += 1)
943 {
944 if ((ret = xd3_emit_byte (stream, & data, i%ALPHABET_SIZE))) { return ret; }
945 }
946 return 0;
947}
948
949/* Test distribution: An exponential distribution covering half the alphabet */
950static int
951sec_dist_func4 (xd3_stream *stream, xd3_output *data)
952{
953 int i, ret, x;
954 for (i = 0; i < ALPHABET_SIZE*20; i += 1)
955 {
956 x = test_exponential_dist (10, ALPHABET_SIZE/2);
957 if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; }
958 }
959 return 0;
960}
961
962/* Test distribution: An exponential distribution covering the entire alphabet */
963static int
964sec_dist_func5 (xd3_stream *stream, xd3_output *data)
965{
966 int i, ret, x;
967 for (i = 0; i < ALPHABET_SIZE*20; i += 1)
968 {
969 x = test_exponential_dist (10, ALPHABET_SIZE-1);
970 if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; }
971 }
972 return 0;
973}
974
975/* Test distribution: An uniform random distribution covering half the alphabet */
976static int
977sec_dist_func6 (xd3_stream *stream, xd3_output *data)
978{
979 int i, ret, x;
980 for (i = 0; i < ALPHABET_SIZE*20; i += 1)
981 {
982 x = lrand48 () % (ALPHABET_SIZE/2);
983 if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; }
984 }
985 return 0;
986}
987
988/* Test distribution: An uniform random distribution covering the entire alphabet */
989static int
990sec_dist_func7 (xd3_stream *stream, xd3_output *data)
991{
992 int i, ret, x;
993 for (i = 0; i < ALPHABET_SIZE*20; i += 1)
994 {
995 x = lrand48 () % ALPHABET_SIZE;
996 if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; }
997 }
998 return 0;
999}
1000
1001/* Test distribution: A small number of frequent characters, difficult to divide into many
1002 * groups */
1003static int
1004sec_dist_func8 (xd3_stream *stream, xd3_output *data)
1005{
1006 int i, ret;
1007 for (i = 0; i < ALPHABET_SIZE*5; i += 1)
1008 {
1009 if ((ret = xd3_emit_byte (stream, & data, 0))) { return ret; }
1010 if ((ret = xd3_emit_byte (stream, & data, 64))) { return ret; }
1011 if ((ret = xd3_emit_byte (stream, & data, 128))) { return ret; }
1012 if ((ret = xd3_emit_byte (stream, & data, 255))) { return ret; }
1013 }
1014 return 0;
1015}
1016
1017/* Test distribution: One that causes many FGK block promotions (found a bug) */
1018static int
1019sec_dist_func9 (xd3_stream *stream, xd3_output *data)
1020{
1021 int i, ret;
1022
1023 int ramp = 0;
1024 int rcount = 0;
1025 int prom = 0;
1026 int pcount = 0;
1027
1028 /* 200 was long enough to trigger it--only when stricter checking that counted all
1029 * blocks was turned on, but it seems I deleted this code. (missing fgk_free_block on
1030 * line 398). */
1031 for (i = 0; i < ALPHABET_SIZE*200; i += 1)
1032 {
1033 repeat:
1034 if (ramp < ALPHABET_SIZE)
1035 {
1036 /* Initially Nth symbol has (N+1) frequency */
1037 if (rcount <= ramp)
1038 {
1039 rcount += 1;
1040 if ((ret = xd3_emit_byte (stream, & data, ramp))) { return ret; }
1041 continue;
1042 }
1043
1044 ramp += 1;
1045 rcount = 0;
1046 goto repeat;
1047 }
1048
1049 /* Thereafter, promote least freq to max freq */
1050 if (pcount == ALPHABET_SIZE)
1051 {
1052 pcount = 0;
1053 prom = (prom + 1) % ALPHABET_SIZE;
1054 }
1055
1056 pcount += 1;
1057 if ((ret = xd3_emit_byte (stream, & data, prom))) { return ret; }
1058 }
1059
1060 return 0;
1061}
1062
1063static int
1064test_secondary_decode (xd3_stream *stream,
1065 const xd3_sec_type *sec,
1066 usize_t input_size,
1067 usize_t compress_size,
1068 const uint8_t *dec_input,
1069 const uint8_t *dec_correct,
1070 uint8_t *dec_output)
1071{
1072 int ret;
1073 xd3_sec_stream *dec_stream;
1074 const uint8_t *dec_input_used, *dec_input_end;
1075 uint8_t *dec_output_used, *dec_output_end;
1076
1077 if ((dec_stream = sec->alloc (stream)) == NULL) { return ENOMEM; }
1078
1079 sec->init (dec_stream);
1080
1081 dec_input_used = dec_input;
1082 dec_input_end = dec_input + compress_size;
1083
1084 dec_output_used = dec_output;
1085 dec_output_end = dec_output + input_size;
1086
1087 if ((ret = sec->decode (stream, dec_stream,
1088 & dec_input_used, dec_input_end,
1089 & dec_output_used, dec_output_end)))
1090 {
1091 goto fail;
1092 }
1093
1094 if (dec_input_used != dec_input_end)
1095 {
1096 stream->msg = "unused input";
1097 ret = EINVAL;
1098 goto fail;
1099 }
1100
1101 if (dec_output_used != dec_output_end)
1102 {
1103 stream->msg = "unfinished output";
1104 ret = EINVAL;
1105 goto fail;
1106 }
1107
1108 if (memcmp (dec_output, dec_correct, input_size) != 0)
1109 {
1110 stream->msg = "incorrect output";
1111 ret = EINVAL;
1112 goto fail;
1113 }
1114
1115 fail:
1116 sec->destroy (stream, dec_stream);
1117 return ret;
1118}
1119
1120static int
1121test_secondary (xd3_stream *stream, const xd3_sec_type *sec, int groups)
1122{
1123 int test_i, ret;
1124 xd3_output *in_head, *out_head, *p;
1125 usize_t p_off, input_size, compress_size;
1126 uint8_t *dec_input = NULL, *dec_output = NULL, *dec_correct = NULL;
1127 xd3_sec_stream *enc_stream;
1128 xd3_sec_cfg cfg;
1129
1130 memset (& cfg, 0, sizeof (cfg));
1131
1132 cfg.inefficient = 1;
1133
1134 for (cfg.ngroups = 1; cfg.ngroups <= groups; cfg.ngroups += 1)
1135 {
1136 P(RINT "\n...");
1137 for (test_i = 0; test_i < SIZEOF_ARRAY (sec_dists); test_i += 1)
1138 {
1139 srand48 (0x84687674);
1140
1141 in_head = xd3_alloc_output (stream, NULL);
1142 out_head = xd3_alloc_output (stream, NULL);
1143 enc_stream = sec->alloc (stream);
1144 dec_input = NULL;
1145 dec_output = NULL;
1146 dec_correct = NULL;
1147
1148 if (in_head == NULL || out_head == NULL || enc_stream == NULL) { goto nomem; }
1149
1150 if ((ret = sec_dists[test_i] (stream, in_head))) { goto fail; }
1151
1152 sec->init (enc_stream);
1153
1154 /* Encode data */
1155 if ((ret = sec->encode (stream, enc_stream, in_head, out_head, & cfg)))
1156 {
1157 P(RINT "test %u: encode: %s", test_i, stream->msg);
1158 goto fail;
1159 }
1160
1161 /* Calculate sizes, allocate contiguous arrays for decoding */
1162 input_size = xd3_sizeof_output (in_head);
1163 compress_size = xd3_sizeof_output (out_head);
1164
1165 P(RINT "%.3f", 8.0 * (double) compress_size / (double) input_size);
1166
1167 if ((dec_input = xd3_alloc (stream, compress_size, 1)) == NULL ||
1168 (dec_output = xd3_alloc (stream, input_size, 1)) == NULL ||
1169 (dec_correct = xd3_alloc (stream, input_size, 1)) == NULL) { goto nomem; }
1170
1171 /* Fill the compressed data array */
1172 for (p_off = 0, p = out_head; p != NULL; p_off += p->next, p = p->next_page)
1173 {
1174 memcpy (dec_input + p_off, p->base, p->next);
1175 }
1176
1177 XD3_ASSERT (p_off == compress_size);
1178
1179 /* Fill the input data array */
1180 for (p_off = 0, p = in_head; p != NULL; p_off += p->next, p = p->next_page)
1181 {
1182 memcpy (dec_correct + p_off, p->base, p->next);
1183 }
1184
1185 XD3_ASSERT (p_off == input_size);
1186
1187 if ((ret = test_secondary_decode (stream, sec, input_size, compress_size, dec_input, dec_correct, dec_output)))
1188 {
1189 P(RINT "test %u: decode: %s", test_i, stream->msg);
1190 goto fail;
1191 }
1192
1193 /* Single-bit error test, only cover the first 10 bytes. Some non-failures are
1194 * expected in the Huffman case: Changing the clclen array, for example, may not
1195 * harm the decoding. Really looking for faults here. */
1196 {
1197 int i;
1198 int bytes = min (compress_size, 10U);
1199 for (i = 0; i < bytes * 8; i += 1)
1200 {
1201 dec_input[i/8] ^= 1 << (i%8);
1202
1203 if ((ret = test_secondary_decode (stream, sec, input_size, compress_size, dec_input, dec_correct, dec_output)) == 0)
1204 {
1205 /*P(RINT "test %u: decode single-bit [%u/%u] error non-failure", test_i, i/8, i%8);*/
1206 }
1207
1208 dec_input[i/8] ^= 1 << (i%8);
1209
1210 if ((i % (2*bytes)) == (2*bytes)-1)
1211 {
1212 DOT ();
1213 }
1214 }
1215 ret = 0;
1216 }
1217
1218 if (0) { nomem: ret = ENOMEM; }
1219
1220 fail:
1221 sec->destroy (stream, enc_stream);
1222 xd3_free_output (stream, in_head);
1223 xd3_free_output (stream, out_head);
1224 xd3_free (stream, dec_input);
1225 xd3_free (stream, dec_output);
1226 xd3_free (stream, dec_correct);
1227
1228 if (ret != 0) { return ret; }
1229 }
1230 }
1231
1232 return 0;
1233}
1234
1235IF_FGK (static int test_secondary_fgk (xd3_stream *stream, int gp) { return test_secondary (stream, & fgk_sec_type, gp); })
1236IF_DJW (static int test_secondary_huff (xd3_stream *stream, int gp) { return test_secondary (stream, & djw_sec_type, gp); })
1237#endif
1238
1239/******************************************************************************************
1240 TEST INSTRUCTION TABLE
1241 ******************************************************************************************/
1242
1243/* Test that xd3_choose_instruction() does the right thing for its code table. */
1244static int
1245test_choose_instruction (xd3_stream *stream, int ignore)
1246{
1247 int i;
1248
1249 stream->code_table = (*stream->code_table_func) ();
1250
1251 for (i = 0; i < 256; i += 1)
1252 {
1253 const xd3_dinst *d = stream->code_table + i;
1254 xd3_rinst prev, inst;
1255
1256 XD3_ASSERT (d->type1 > 0);
1257
1258 memset (& prev, 0, sizeof (prev));
1259 memset (& inst, 0, sizeof (inst));
1260
1261 if (d->type2 == 0)
1262 {
1263 inst.type = d->type1;
1264
1265 if ((inst.size = d->size1) == 0)
1266 {
1267 inst.size = TESTBUFSIZE;
1268 }
1269
1270 XD3_CHOOSE_INSTRUCTION (stream, NULL, & inst);
1271
1272 if (inst.code2 != 0 || inst.code1 != i)
1273 {
1274 stream->msg = "wrong single instruction";
1275 return EINVAL;
1276 }
1277 }
1278 else
1279 {
1280 prev.type = d->type1;
1281 prev.size = d->size1;
1282 inst.type = d->type2;
1283 inst.size = d->size2;
1284
1285 XD3_CHOOSE_INSTRUCTION (stream, & prev, & inst);
1286
1287 if (prev.code2 != i)
1288 {
1289 stream->msg = "wrong double instruction";
1290 return EINVAL;
1291 }
1292 }
1293 }
1294
1295 return 0;
1296}
1297
1298/******************************************************************************************
1299 TEST INSTRUCTION TABLE CODING
1300 ******************************************************************************************/
1301
1302#if GENERIC_ENCODE_TABLES
1303/* Test that encoding and decoding a code table works */
1304static int
1305test_encode_code_table (xd3_stream *stream, int ignore)
1306{
1307 int ret;
1308 const uint8_t *comp_data;
1309 usize_t comp_size;
1310
1311 if ((ret = xd3_compute_alternate_table_encoding (stream, & comp_data, & comp_size)))
1312 {
1313 return ret;
1314 }
1315
1316 stream->acache.s_near = __alternate_code_table_desc.near_modes;
1317 stream->acache.s_same = __alternate_code_table_desc.same_modes;
1318
1319 if ((ret = xd3_apply_table_encoding (stream, comp_data, comp_size)))
1320 {
1321 return ret;
1322 }
1323
1324 if (memcmp (stream->code_table, xd3_alternate_code_table (), sizeof (xd3_dinst) * 256) != 0)
1325 {
1326 stream->msg = "wrong code table reconstruction";
1327 return EINVAL;
1328 }
1329
1330 return 0;
1331}
1332#endif
1333
1334/******************************************************************************************
1335 64BIT STREAMING
1336 ******************************************************************************************/
1337
1338/* This test encodes and decodes a series of 1 megabyte windows, each containing a long
1339 * run of zeros along with a single xoff_t size record to indicate the sequence. */
1340static int
1341test_streaming (xd3_stream *in_stream, uint8_t *encbuf, uint8_t *decbuf, uint8_t *delbuf, usize_t megs)
1342{
1343 xd3_stream estream, dstream;
1344 int ret;
1345 usize_t i, delsize, decsize;
1346
1347 if ((ret = xd3_config_stream (& estream, NULL)) ||
1348 (ret = xd3_config_stream (& dstream, NULL)))
1349 {
1350 goto fail;
1351 }
1352
1353 for (i = 0; i < megs; i += 1)
1354 {
1355 ((usize_t*) encbuf)[0] = i;
1356
1357 if ((i % 200) == 199) { DOT (); }
1358
1359 if ((ret = xd3_process_completely (& estream, xd3_encode_input, 0,
1360 encbuf, 1 << 20,
1361 delbuf, & delsize, 1 << 10)))
1362 {
1363 in_stream->msg = estream.msg;
1364 goto fail;
1365 }
1366
1367 if ((ret = xd3_process_completely (& dstream, xd3_decode_input, 0,
1368 delbuf, delsize,
1369 decbuf, & decsize, 1 << 20)))
1370 {
1371 in_stream->msg = dstream.msg;
1372 goto fail;
1373 }
1374
1375 if (decsize != 1 << 20 ||
1376 memcmp (encbuf, decbuf, 1 << 20) != 0)
1377 {
1378 in_stream->msg = "wrong result";
1379 ret = EINVAL;
1380 goto fail;
1381 }
1382 }
1383
1384 if ((ret = xd3_close_stream (& estream)) ||
1385 (ret = xd3_close_stream (& dstream)))
1386 {
1387 goto fail;
1388 }
1389
1390 fail:
1391 xd3_free_stream (& estream);
1392 xd3_free_stream (& dstream);
1393 return ret;
1394}
1395
1396/* Run tests of data streaming of over and around 4GB of data. */
1397static int
1398test_compressed_stream_overflow (xd3_stream *stream, int ignore)
1399{
1400 int ret;
1401 uint8_t *buf;
1402
1403 if ((buf = malloc (TWO_MEGS_AND_DELTA)) == NULL) { return ENOMEM; }
1404
1405 memset (buf, 0, TWO_MEGS_AND_DELTA);
1406
1407 /* Test overflow of a 32-bit file offset. */
1408 if (SIZEOF_XOFF_T == 4)
1409 {
1410 ret = test_streaming (stream, buf, buf + (1 << 20), buf + (2 << 20), (1 << 12) + 1);
1411
1412 if (ret == EINVAL && MSG_IS ("decoder file offset overflow"))
1413 {
1414 ret = 0;
1415 }
1416 else
1417 {
1418 stream->msg = "expected overflow condition";
1419 ret = EINVAL;
1420 goto fail;
1421 }
1422 }
1423
1424 /* Test transfer of exactly 32bits worth of data. */
1425 if ((ret = test_streaming (stream, buf, buf + (1 << 20), buf + (2 << 20), 1 << 12))) { goto fail; }
1426
1427 fail:
1428 free (buf);
1429 return ret;
1430}
1431
1432/******************************************************************************************
1433 COMMAND LINE
1434 ******************************************************************************************/
1435
1436/* For each pair of command templates in the array below, test that encoding and decoding
1437 * commands work. Also check for the expected size delta, which should be approximately
1438 * TEST_ADD_RATIO times the file size created by test_make_inputs. Due to differences in
1439 * the application header, it is suppressed (-A) so that all delta files are the same. */
1440static int
1441test_command_line_arguments (xd3_stream *stream, int ignore)
1442{
1443 int i, ret;
1444
1445 static const char* cmdpairs[] =
1446 {
1447 /* standard input, output */
1448 "%s -A < %s > %s", "%s -d < %s > %s",
1449 "%s -A -e < %s > %s", "%s -d < %s > %s",
1450 "%s -A= encode < %s > %s", "%s decode < %s > %s",
1451 "%s -A -q encode < %s > %s", "%s -qdq < %s > %s",
1452
1453 /* file input, standard output */
1454 "%s -A= %s > %s", "%s -d %s > %s",
1455 "%s -A -e %s > %s", "%s -d %s > %s",
1456 "%s encode -A= %s > %s", "%s decode %s > %s",
1457
1458 /* file input, output */
1459 "%s -A= %s %s", "%s -d %s %s",
1460 "%s -A -e %s %s", "%s -d %s %s",
1461 "%s -A= encode %s %s", "%s decode %s %s",
1462
1463 /* option placement */
1464 "%s -A -f %s %s", "%s -f -d %s %s",
1465 "%s -e -A= %s %s", "%s -d -f %s %s",
1466 "%s -f encode -A= %s %s", "%s -f decode -f %s %s",
1467 };
1468
1469 char ecmd[128], dcmd[128];
1470 int pairs = SIZEOF_ARRAY (cmdpairs) / 2;
1471 xoff_t tsize;
1472 xoff_t dsize;
1473 double ratio;
1474
1475 srand48 (0x89162337);
1476
1477 for (i = 0; i < pairs; i += 1)
1478 {
1479 sprintf (ecmd, cmdpairs[2*i], program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
1480 sprintf (dcmd, cmdpairs[2*i+1], program_name, TEST_DELTA_FILE, TEST_RECON_FILE);
1481
1482 test_setup ();
1483 if ((ret = test_make_inputs (stream, NULL, & tsize))) { return ret; }
1484
1485 /* Encode and decode. */
1486 if ((ret = system (ecmd)) != 0)
1487 {
1488 P(RINT "xdelta3: command was: %s\n", ecmd);
1489 stream->msg = "encode cmd failed";
1490 return EINVAL;
1491 }
1492
1493 if ((ret = system (dcmd)) != 0)
1494 {
1495 stream->msg = "decode cmd failed";
1496 return EINVAL;
1497 }
1498
1499 /* Compare the target file. */
1500 if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE)))
1501 {
1502 return ret;
1503 }
1504
1505 if (i == 0)
1506 {
1507 /* The first time through, check the compression ratio and save a copy of the
1508 * delta. */
1509 if ((ret = test_save_copy (TEST_DELTA_FILE)))
1510 {
1511 stream->msg = "copy failed";
1512 return ret;
1513 }
1514
1515 if ((ret = test_file_size (TEST_DELTA_FILE, & dsize)))
1516 {
1517 return ret;
1518 }
1519
1520 ratio = (double) dsize / (double) tsize;
1521
1522 /* Check that it is not too small, not too large. */
1523 if (ratio >= TEST_ADD_RATIO + TEST_EPSILON)
1524 {
1525 P(RINT "xdelta3: test encode with size ratio %.3f, expected < %.3f\n",
1526 ratio, TEST_ADD_RATIO + TEST_EPSILON);
1527 stream->msg = "strange encoding";
1528 return EINVAL;
1529 }
1530
1531 if (ratio <= TEST_ADD_RATIO - TEST_EPSILON)
1532 {
1533 P(RINT "xdelta3: test encode with size ratio %.3f, expected > %.3f\n",
1534 ratio, TEST_ADD_RATIO - TEST_EPSILON);
1535 stream->msg = "strange encoding";
1536 return EINVAL;
1537 }
1538
1539 /* Also check that compare_files works. The delta and original should not be
1540 * identical. */
1541 if ((ret = compare_files (stream, TEST_DELTA_FILE, TEST_TARGET_FILE)) == 0)
1542 {
1543 stream->msg = "broken compare_files";
1544 return EINVAL;
1545 }
1546 }
1547 else
1548 {
1549 /* In subsequent passes, verify that the copy and delta are the same. */
1550 if ((ret = compare_files (stream, TEST_COPY_FILE, TEST_DELTA_FILE)))
1551 {
1552 return ret;
1553 }
1554 }
1555
1556 test_cleanup ();
1557 DOT ();
1558 }
1559
1560 return 0;
1561}
1562
1563/******************************************************************************************
1564 EXTERNAL I/O DECOMPRESSION/RECOMPRESSION
1565 ******************************************************************************************/
1566
1567#if EXTERNAL_COMPRESSION
1568/* This performs one step of the test_externally_compressed_io function described below.
1569 * It builds a pipe containing both Xdelta and external compression/decompression that
1570 * should not modify the data passing through. */
1571static int
1572test_compressed_pipe (xd3_stream *stream, main_extcomp *ext, char* buf,
1573 const char* comp_options, const char* decomp_options,
1574 int do_ext_recomp, const char* msg)
1575{
1576 int ret;
1577 char decomp_buf[TESTBUFSIZE];
1578
1579 if (do_ext_recomp)
1580 {
1581 sprintf (decomp_buf, " | %s %s", ext->decomp_cmdname, ext->decomp_options);
1582 }
1583 else
1584 {
1585 decomp_buf[0] = 0;
1586 }
1587
1588 sprintf (buf, "%s %s < %s | %s %s | %s %s%s > %s",
1589 ext->recomp_cmdname, ext->recomp_options,
1590 TEST_TARGET_FILE,
1591 program_name, comp_options,
1592 program_name, decomp_options,
1593 decomp_buf,
1594 TEST_RECON_FILE);
1595
1596 if ((ret = system (buf)) != 0)
1597 {
1598 stream->msg = msg;
1599 return EINVAL;
1600 }
1601
1602 if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE)))
1603 {
1604 return EINVAL;
1605 }
1606
1607 DOT ();
1608 return 0;
1609}
1610
1611/* We want to test that a pipe such as:
1612 *
1613 * --> | gzip -cf | xdelta3 -cf | xdelta3 -dcf | gzip -dcf | -->
1614 *
1615 * is transparent, i.e., does not modify the stream of data. However, we also want to
1616 * verify that at the center the data is properly compressed, i.e., that we do not just
1617 * have a re-compressed gzip format, that we have an VCDIFF format. We do this in two
1618 * steps. First test the above pipe, then test with suppressed output recompression
1619 * (-D). The result should be the original input:
1620 *
1621 * --> | gzip -cf | xdelta3 -cf | xdelta3 -Ddcf | -->
1622 *
1623 * Finally we want to test that -D also disables input decompression:
1624 *
1625 * --> | gzip -cf | xdelta3 -Dcf | xdelta3 -Ddcf | gzip -dcf | -->
1626 */
1627static int
1628test_externally_compressed_io (xd3_stream *stream, int ignore)
1629{
1630 int i, ret;
1631 char buf[TESTBUFSIZE];
1632
1633 srand48 (0x91723913);
1634
1635 if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; }
1636
1637 for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
1638 {
1639 main_extcomp *ext = & extcomp_types[i];
1640
1641 /* Test for the existence of the external command first, if not skip. */
1642 sprintf (buf, "%s %s < /dev/null > /dev/null", ext->recomp_cmdname, ext->recomp_options);
1643
1644 if ((ret = system (buf)) != 0)
1645 {
1646 P(RINT "%s=0", ext->recomp_cmdname);
1647 continue;
1648 }
1649
1650 if ((ret = test_compressed_pipe (stream, ext, buf, "-cfq", "-dcfq", 1,
1651 "compression failed: identity pipe")) ||
1652 (ret = test_compressed_pipe (stream, ext, buf, "-cfq", "-Rdcfq", 0,
1653 "compression failed: without recompression")) ||
1654 (ret = test_compressed_pipe (stream, ext, buf, "-Dcfq", "-Rdcfq", 1,
1655 "compression failed: without decompression")))
1656 {
1657 return ret;
1658 }
1659 }
1660
1661 return 0;
1662}
1663
1664/* This tests the proper functioning of external decompression for source files. The
1665 * source and target files are identical and compressed by gzip. Decoding such a delta
1666 * with recompression disbaled (-R) should produce the original, uncompressed
1667 * source/target file. Then it checks with output recompression enabled--in this case the
1668 * output should be a compressed copy of the original source/target file. Then it checks
1669 * that encoding with decompression disabled works--the compressed files are identical and
1670 * decoding them should always produce a compressed output, regardless of -R since the
1671 * encoded delta file had decompression disabled..
1672 */
1673static int
1674test_source_decompression (xd3_stream *stream, int ignore)
1675{
1676 int ret;
1677 char buf[TESTBUFSIZE];
1678 const main_extcomp *ext;
1679
1680 srand48 (0x9ff56acb);
1681
1682 test_setup ();
1683 if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; }
1684
1685 /* Use gzip. */
1686 if ((ext = main_get_compressor ("G")) == NULL) { P(RINT "skipped"); return 0; }
1687
1688 /* Save an uncompressed copy. */
1689 if ((ret = test_save_copy (TEST_TARGET_FILE))) { return ret; }
1690
1691 /* Compress the target. */
1692 sprintf (buf, "%s %s < %s > %s", ext->recomp_cmdname, ext->recomp_options, TEST_TARGET_FILE, TEST_SOURCE_FILE);
1693 if ((ret = do_cmd (stream, buf))) { return ret; }
1694
1695 /* Copy back to the source. */
1696 sprintf (buf, "cp -f %s %s", TEST_SOURCE_FILE, TEST_TARGET_FILE);
1697 if ((ret = do_cmd (stream, buf))) { return ret; }
1698
1699 /* Now the two identical files are compressed. Delta-encode the target, with decompression. */
1700 sprintf (buf, "%s -eq -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE);
1701 if ((ret = do_cmd (stream, buf))) { return ret; }
1702
1703 /* Decode the delta file with recompression disabled, should get an uncompressed file
1704 * out. */
1705 sprintf (buf, "%s -dq -R -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE);
1706 if ((ret = do_cmd (stream, buf))) { return ret; }
1707 if ((ret = compare_files (stream, TEST_COPY_FILE, TEST_RECON_FILE))) { return ret; }
1708
1709 /* Decode the delta file with recompression, should get a compressed file out. But we
1710 * can't compare compressed files directly. */
1711 sprintf (buf, "%s -dqf -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE);
1712 if ((ret = do_cmd (stream, buf))) { return ret; }
1713 sprintf (buf, "%s %s < %s > %s", ext->decomp_cmdname, ext->decomp_options, TEST_RECON_FILE, TEST_RECON2_FILE);
1714 if ((ret = do_cmd (stream, buf))) { return ret; }
1715 if ((ret = compare_files (stream, TEST_COPY_FILE, TEST_RECON2_FILE))) { return ret; }
1716
1717 /* Encode with decompression disabled */
1718 sprintf (buf, "%s -feqD -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE);
1719 if ((ret = do_cmd (stream, buf))) { return ret; }
1720
1721 /* Decode the delta file with recompression enabled, it doesn't matter, should get the
1722 * compressed file out. */
1723 sprintf (buf, "%s -fdq -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE);
1724 if ((ret = do_cmd (stream, buf))) { return ret; }
1725 if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) { return ret; }
1726
1727 /* Try again with recompression disabled, it doesn't make a difference. */
1728 sprintf (buf, "%s -fqRd -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE);
1729 if ((ret = do_cmd (stream, buf))) { return ret; }
1730 if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) { return ret; }
1731 test_cleanup();
1732 return 0;
1733}
1734#endif
1735
1736/******************************************************************************************
1737 FORCE, STDOUT
1738 ******************************************************************************************/
1739
1740/* This tests that output will not overwrite an existing file unless -f was specified.
1741 * The test is for encoding (the same code handles it for decoding). */
1742static int
1743test_force_behavior (xd3_stream *stream, int ignore)
1744{
1745 int ret;
1746 char buf[128];
1747
1748 /* Create empty target file */
1749 test_setup ();
1750 sprintf (buf, "cp /dev/null %s", TEST_TARGET_FILE);
1751 if ((ret = do_cmd (stream, buf))) { return ret; }
1752
1753 /* Encode to delta file */
1754 sprintf (buf, "%s -e %s %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
1755 if ((ret = do_cmd (stream, buf))) { return ret; }
1756
1757 /* Encode again, should fail. */
1758 sprintf (buf, "%s -e %s %s 2> /dev/null", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
1759 if ((ret = do_fail (stream, buf))) { return ret; }
1760
1761 /* Force it, should succeed. */
1762 sprintf (buf, "%s -ef %s %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
1763 if ((ret = do_cmd (stream, buf))) { return ret; }
1764 test_cleanup();
1765 return 0;
1766}
1767
1768/* This checks the proper operation of the -c flag. When specified the default output
1769 * becomes stdout, otherwise the input must be provided (encode) or it may be defaulted
1770 * (decode w/ app header). */
1771static int
1772test_stdout_behavior (xd3_stream *stream, int ignore)
1773{
1774 int ret;
1775 char buf[128];
1776
1777 test_setup();
1778 sprintf (buf, "cp /dev/null %s", TEST_TARGET_FILE);
1779 if ((ret = do_cmd (stream, buf))) { return ret; }
1780
1781 /* Without -c, encode writes to delta file */
1782 sprintf (buf, "%s -e %s %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
1783 if ((ret = do_cmd (stream, buf))) { return ret; }
1784
1785 /* With -c, encode writes to stdout */
1786 sprintf (buf, "%s -e -c %s > %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
1787 if ((ret = do_cmd (stream, buf))) { return ret; }
1788
1789 /* Without -c, decode writes to target file name, but it fails because the file exists. */
1790 sprintf (buf, "%s -d %s 2> /dev/null", program_name, TEST_DELTA_FILE);
1791 if ((ret = do_fail (stream, buf))) { return ret; }
1792
1793 /* With -c, decode writes to stdout */
1794 sprintf (buf, "%s -d -c %s > /dev/null", program_name, TEST_DELTA_FILE);
1795 if ((ret = do_cmd (stream, buf))) { return ret; }
1796 test_cleanup();
1797
1798 return 0;
1799}
1800
1801/* This tests that the no-output flag (-J) works. */
1802static int
1803test_no_output (xd3_stream *stream, int ignore)
1804{
1805 int ret;
1806 char buf[TESTBUFSIZE];
1807
1808 test_setup ();
1809 if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; }
1810
1811 /* Try no_output encode w/out unwritable output file */
1812 sprintf (buf, "%s -e %s /dont_run_xdelta3_test_as_root 2> /dev/null", program_name, TEST_TARGET_FILE);
1813 if ((ret = do_fail (stream, buf))) { return ret; }
1814 sprintf (buf, "%s -J -e %s /dont_run_xdelta3_test_as_root", program_name, TEST_TARGET_FILE);
1815 if ((ret = do_cmd (stream, buf))) { return ret; }
1816
1817 /* Now really write the delta to test decode no-output */
1818 sprintf (buf, "%s -e %s %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
1819 if ((ret = do_cmd (stream, buf))) { return ret; }
1820
1821 sprintf (buf, "%s -d %s /dont_run_xdelta3_test_as_root 2> /dev/null", program_name, TEST_DELTA_FILE);
1822 if ((ret = do_fail (stream, buf))) { return ret; }
1823 sprintf (buf, "%s -J -d %s /dont_run_xdelta3_test_as_root", program_name, TEST_DELTA_FILE);
1824 if ((ret = do_cmd (stream, buf))) { return ret; }
1825 test_cleanup ();
1826 return 0;
1827}
1828
1829/******************************************************************************************
1830 Source identical optimization
1831 ******************************************************************************************/
1832
1833/* Computing a delta should be fastest when the two inputs are identical, this checks it.
1834 * The library is called to compute a delta between a 10000 byte file, 1000 byte winsize,
1835 * 500 byte source blocksize. The same buffer is used for both source and target. */
1836static int
1837test_identical_behavior (xd3_stream *stream, int ignore)
1838{
1839#define IDB_TGTSZ 10000
1840#define IDB_BLKSZ 500
1841#define IDB_WINSZ 1000
1842#define IDB_DELSZ 1000
1843#define IDB_WINCNT (IDB_TGTSZ / IDB_WINSZ)
1844
1845 int ret, i;
1846 uint8_t buf[IDB_TGTSZ];
1847 uint8_t del[IDB_DELSZ];
1848 uint8_t rec[IDB_TGTSZ];
1849 xd3_source source;
1850 int encwin = 0;
1851 usize_t delpos = 0, recsize;
1852 xd3_config config;
1853
1854 for (i = 0; i < IDB_TGTSZ; i += 1) { buf[i] = lrand48 (); }
1855
1856 stream->winsize = IDB_WINSZ;
1857
1858 source.size = IDB_TGTSZ;
1859 source.blksize = IDB_BLKSZ;
1860 source.name = "";
1861 source.curblk = NULL;
1862 source.curblkno = -1;
1863
1864 if ((ret = xd3_set_source (stream, & source))) { goto fail; }
1865
1866 /* Compute an delta between identical source and targets. */
1867 for (;;)
1868 {
1869 ret = xd3_encode_input (stream);
1870
1871 if (ret == XD3_INPUT)
1872 {
1873 if (encwin == IDB_WINCNT-1) { break; }
1874 xd3_avail_input (stream, buf + (IDB_WINSZ * encwin), IDB_WINSZ);
1875 encwin += 1;
1876 continue;
1877 }
1878
1879 if (ret == XD3_GETSRCBLK)
1880 {
1881 source.curblkno = source.getblkno;
1882 source.onblk = IDB_BLKSZ;
1883 source.curblk = buf + source.getblkno * IDB_BLKSZ;
1884 continue;
1885 }
1886
1887 if (ret == XD3_WINSTART) { continue; }
1888 if (ret == XD3_WINFINISH) { continue; }
1889
1890 if (ret != XD3_OUTPUT) { goto fail; }
1891
1892 XD3_ASSERT (delpos + stream->avail_out <= IDB_DELSZ);
1893
1894 memcpy (del + delpos, stream->next_out, stream->avail_out);
1895
1896 delpos += stream->avail_out;
1897
1898 xd3_consume_output (stream);
1899 }
1900
1901 /* Reset. */
1902 source.blksize = IDB_TGTSZ;
1903 source.onblk = IDB_TGTSZ;
1904 source.curblk = buf;
1905 source.curblkno = 0;
1906
1907 if ((ret = xd3_close_stream (stream))) { goto fail; }
1908 xd3_free_stream (stream);
1909 xd3_init_config (& config, 0);
1910 if ((ret = xd3_config_stream (stream, & config))) { goto fail; }
1911 if ((ret = xd3_set_source (stream, & source))) { goto fail; }
1912
1913 /* Decode. */
1914 if ((ret = xd3_decode_completely (stream, del, delpos, rec, & recsize, IDB_TGTSZ))) { goto fail; }
1915
1916 /* Check result size and data. */
1917 if (recsize != IDB_TGTSZ) { stream->msg = "wrong size reconstruction"; goto fail; }
1918 if (memcmp (rec, buf, IDB_TGTSZ) != 0) { stream->msg = "wrong data reconstruction"; goto fail; }
1919
1920 /* Check that there was one copy per window. */
1921 IF_DEBUG (if (stream->n_cpy != IDB_WINCNT ||
1922 stream->n_add != 0 ||
1923 stream->n_run != 0) { stream->msg = "wrong copy count"; goto fail; });
1924
1925 /* Check that no checksums were computed because the initial match was presumed. */
1926 IF_DEBUG (if (stream->large_ckcnt != 0) { stream->msg = "wrong checksum behavior"; goto fail; });
1927
1928 ret = 0;
1929 fail:
1930 return ret;
1931}
1932
1933/******************************************************************************************
1934 String matching test
1935 ******************************************************************************************/
1936
1937/* Check particular matching behaviors by calling xd3_string_match_soft directly with
1938 * specific arguments. */
1939typedef struct _string_match_test string_match_test;
1940
1941typedef enum
1942{
1943 SM_NONE = 0,
1944 SM_SSMATCH = (1 << 0),
1945 SM_LAZY = (1 << 1),
1946 SM_PROMOTE = (1 << 2),
1947} string_match_flags;
1948
1949struct _string_match_test
1950{
1951 const char *input;
1952 int flags;
1953 const char *result;
1954};
1955
1956static const string_match_test match_tests[] =
1957{
1958 /* nothing */
1959 { "1234567890", SM_NONE, "" },
1960
1961 /* basic run, copy */
1962 { "11111111112323232323", SM_NONE, "R0/10 C12/8@10" },
1963
1964 /* no run smaller than MIN_RUN=8 */
1965 { "1111111", SM_NONE, "C1/6@0" },
1966 { "11111111", SM_NONE, "R0/8" },
1967
1968 /* simple promotion: the third copy address depends on promotion */
1969 { "ABCDEF_ABCDEF^ABCDEF", SM_NONE, "C7/6@0 C14/6@7" },
1970 { "ABCDEF_ABCDEF^ABCDEF", SM_PROMOTE, "C7/6@0 C14/6@0" },
1971
1972 /* simple lazy: there is a better copy starting with "23 X" than "123 " */
1973 { "123 23 XYZ 123 XYZ", SM_NONE, "C11/4@0" },
1974 { "123 23 XYZ 123 XYZ", SM_LAZY, "C11/4@0 C12/6@4" },
1975
1976 /* trylazy: no lazy matches unless there are at least two characters beyond the first
1977 * match */
1978 { "2123_121212", SM_LAZY, "C7/4@5" },
1979 { "2123_1212123", SM_LAZY, "C7/4@5" },
1980 { "2123_1212123_", SM_LAZY, "C7/4@5 C8/5@0" },
1981
1982 /* trylazy: no lazy matches if the copy is >= MAXLAZY=10 */
1983 { "2123_121212123_", SM_LAZY, "C7/6@5 C10/5@0" },
1984 { "2123_12121212123_", SM_LAZY, "C7/8@5 C12/5@0" },
1985 { "2123_1212121212123_", SM_LAZY, "C7/10@5" },
1986
1987 /* lazy run: check a run overlapped by a longer copy */
1988 { "11111112 111111112 1", SM_LAZY, "C1/6@0 R9/8 C10/10@0" },
1989
1990 /* lazy match: match_length,run_l >= min_match tests, shouldn't get any copies within
1991 * the run, no run within the copy */
1992 { "^________^________ ", SM_LAZY, "R1/8 C9/9@0" },
1993
1994 /* chain depth: it only goes back 10. this checks that the 10th match hits and the 11th
1995 * misses. */
1996 { "1234 1234_1234-1234=1234+1234[1234]1234{1234}1234<1234 ", SM_NONE,
1997 "C5/4@0 C10/4@5 C15/4@10 C20/4@15 C25/4@20 C30/4@25 C35/4@30 C40/4@35 C45/4@40 C50/5@0" },
1998 { "1234 1234_1234-1234=1234+1234[1234]1234{1234}1234<1234>1234 ", SM_NONE,
1999 "C5/4@0 C10/4@5 C15/4@10 C20/4@15 C25/4@20 C30/4@25 C35/4@30 C40/4@35 C45/4@40 C50/4@45 C55/4@50" },
2000
2001 /* ssmatch test */
2002 { "ABCDE___ABCDE*** BCDE***", SM_NONE, "C8/5@0 C17/4@1" },
2003 { "ABCDE___ABCDE*** BCDE***", SM_SSMATCH, "C8/5@0 C17/7@9" },
2004};
2005
2006static int
2007test_string_matching (xd3_stream *stream, int ignore)
2008{
2009 int i, ret;
2010 xd3_config config;
2011 char rbuf[TESTBUFSIZE];
2012
2013 for (i = 0; i < SIZEOF_ARRAY (match_tests); i += 1)
2014 {
2015 const string_match_test *test = & match_tests[i];
2016 char *rptr = rbuf;
2017 usize_t len = strlen (test->input);
2018
2019 xd3_free_stream (stream);
2020 xd3_init_config (& config, 0);
2021
2022 config.smatch_cfg = XD3_SMATCH_SOFT;
2023 config.large_look = 4;
2024 config.large_step = 4;
2025 config.small_look = 4;
2026 config.small_chain = 10;
2027 config.small_lchain = 10;
2028 config.max_lazy = 10;
2029 config.long_enough = 10;
2030 config.ssmatch = (test->flags & SM_SSMATCH) && 1;
2031 config.try_lazy = (test->flags & SM_LAZY) && 1;
2032 config.promote = (test->flags & SM_PROMOTE) && 1;
2033
2034 if ((ret = xd3_config_stream (stream, & config))) { return ret; }
2035 if ((ret = xd3_encode_init (stream))) { return ret; }
2036
2037 xd3_avail_input (stream, (uint8_t*)test->input, len);
2038
2039 if ((ret = stream->string_match (stream))) { return ret; }
2040
2041 *rptr = 0;
2042 while (! xd3_rlist_empty (& stream->iopt.used))
2043 {
2044 xd3_rinst *inst = xd3_rlist_pop_front (& stream->iopt.used);
2045
2046 switch (inst->type)
2047 {
2048 case XD3_RUN: *rptr++ = 'R'; break;
2049 case XD3_CPY: *rptr++ = 'C'; break;
2050 default: XD3_ASSERT (0);
2051 }
2052
2053 sprintf (rptr, "%d/%d", inst->pos, inst->size);
2054 rptr += strlen (rptr);
2055
2056 if (inst->type == XD3_CPY)
2057 {
2058 *rptr++ = '@';
2059 sprintf (rptr, "%"Q"d", inst->addr);
2060 rptr += strlen (rptr);
2061 }
2062
2063 *rptr++ = ' ';
2064
2065 xd3_rlist_push_back (& stream->iopt.free, inst);
2066 }
2067
2068 if (rptr != rbuf)
2069 {
2070 rptr -= 1; *rptr = 0;
2071 }
2072
2073 if (strcmp (rbuf, test->result) != 0)
2074 {
2075 P(RINT "test %u: expected %s: got %s", i, test->result, rbuf);
2076 stream->msg = "wrong result";
2077 return EINVAL;
2078 }
2079 }
2080
2081 return 0;
2082}
2083
2084/******************************************************************************************
2085 Source window advance, update
2086 ******************************************************************************************/
2087
2088/*
2089 * This is a test for many overlapping instructions. It must be a lazy
2090 * matcher.
2091 */
2092static int
2093test_iopt_flush_instructions (xd3_stream *stream, int ignore)
2094{
2095 int ret, i, tpos = 0;
2096 usize_t delta_size, recon_size;
2097 xd3_config config;
2098 uint8_t target[TESTBUFSIZE];
2099 uint8_t delta[TESTBUFSIZE];
2100 uint8_t recon[TESTBUFSIZE];
2101
2102 xd3_free_stream (stream);
2103 xd3_init_config (& config, 0);
2104
2105 config.smatch_cfg = XD3_SMATCH_SOFT;
2106 config.large_look = 16;
2107 config.large_step = 16;
2108 config.small_look = 4;
2109 config.small_chain = 128;
2110 config.small_lchain = 16;
2111 config.ssmatch = 0;
2112 config.try_lazy = 1;
2113 config.max_lazy = 8;
2114 config.long_enough = 128;
2115 config.promote = 0;
2116
2117 if ((ret = xd3_config_stream (stream, & config))) { return ret; }
2118
2119 for (i = 1; i < 250; i++)
2120 {
2121 target[tpos++] = i;
2122 target[tpos++] = i+1;
2123 target[tpos++] = i+2;
2124 target[tpos++] = i+3;
2125 target[tpos++] = 0;
2126 }
2127 for (i = 1; i < 253; i++)
2128 {
2129 target[tpos++] = i;
2130 }
2131
2132 if ((ret = xd3_encode_completely (stream, target, tpos,
2133 delta, & delta_size, sizeof (delta))))
2134 {
2135 return ret;
2136 }
2137
2138 xd3_free_stream(stream);
2139 if ((ret = xd3_config_stream (stream, & config))) { return ret; }
2140
2141 if ((ret = xd3_decode_completely (stream, delta, delta_size,
2142 recon, & recon_size, sizeof (recon))))
2143 {
2144 return ret;
2145 }
2146
2147 XD3_ASSERT(tpos == recon_size);
2148 XD3_ASSERT(memcmp(target, recon, recon_size) == 0);
2149
2150 return 0;
2151}
2152
2153/******************************************************************************************
2154 TEST MAIN
2155 ******************************************************************************************/
2156
2157static int
2158xd3_selftest (void)
2159{
2160#define DO_TEST(fn,flags,arg) \
2161 do { \
2162 xd3_stream stream; \
2163 xd3_config config; \
2164 xd3_init_config (& config, flags); \
2165 P(RINT "xdelta3: testing " #fn "%s...", \
2166 flags ? (" (" #flags ")") : ""); \
2167 if ((ret = xd3_config_stream (& stream, & config) == 0) && \
2168 (ret = test_ ## fn (& stream, arg)) == 0) { \
2169 P(RINT " success\n"); \
2170 } else { \
2171 P(RINT " failed: %s: %s\n", xd3_errstring (& stream), \
2172 xd3_strerror (ret)); } \
2173 xd3_free_stream (& stream); \
2174 if (ret != 0) { goto failure; } \
2175 } while (0)
2176
2177 int ret;
2178
2179 DO_TEST (random_numbers, 0, 0);
2180 DO_TEST (decode_integer_end_of_input, 0, 0);
2181 DO_TEST (decode_integer_overflow, 0, 0);
2182 DO_TEST (encode_decode_uint32_t, 0, 0);
2183 DO_TEST (encode_decode_uint64_t, 0, 0);
2184 DO_TEST (usize_t_overflow, 0, 0);
2185
2186 DO_TEST (address_cache, 0, 0);
2187 IF_GENCODETBL (DO_TEST (address_cache, XD3_ALT_CODE_TABLE, 0));
2188
2189 DO_TEST (string_matching, 0, 0);
2190
2191 DO_TEST (choose_instruction, 0, 0);
2192 IF_GENCODETBL (DO_TEST (choose_instruction, XD3_ALT_CODE_TABLE, 0));
2193 IF_GENCODETBL (DO_TEST (encode_code_table, 0, 0));
2194
2195 DO_TEST (identical_behavior, 0, 0);
2196 DO_TEST (iopt_flush_instructions, 0, 0);
2197
2198 IF_DJW (DO_TEST (secondary_huff, 0, DJW_MAX_GROUPS));
2199 IF_FGK (DO_TEST (secondary_fgk, 0, 1));
2200
2201 DO_TEST (decompress_single_bit_error, 0, 3);
2202 DO_TEST (decompress_single_bit_error, XD3_ADLER32, 3);
2203
2204 IF_FGK (DO_TEST (decompress_single_bit_error, XD3_SEC_FGK, 3));
2205 IF_DJW (DO_TEST (decompress_single_bit_error, XD3_SEC_DJW, 8));
2206
2207 /* There are many expected non-failures for ALT_CODE_TABLE because not all of the
2208 * instruction codes are used. */
2209 IF_GENCODETBL (DO_TEST (decompress_single_bit_error, XD3_ALT_CODE_TABLE, 224));
2210
2211 DO_TEST (compressed_stream_overflow, 0, 0);
2212
2213 /* The following tests have random failures on my OSX box.
2214 */
2215 DO_TEST (force_behavior, 0, 0);
2216 DO_TEST (stdout_behavior, 0, 0);
2217 DO_TEST (no_output, 0, 0);
2218 DO_TEST (command_line_arguments, 0, 0);
2219
2220#if EXTERNAL_COMPRESSION
2221 DO_TEST (source_decompression, 0, 0);
2222 DO_TEST (externally_compressed_io, 0, 0);
2223#endif
2224
2225failure:
2226 test_cleanup ();
2227 return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
2228#undef DO_TEST
2229}
diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c
new file mode 100755
index 0000000..fb9a09f
--- /dev/null
+++ b/xdelta3/xdelta3.c
@@ -0,0 +1,6022 @@
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2001, 2003, 2004, 2005, 2006. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
18 -------------------------------------------------------------------
19
20 Xdelta 3
21
22 The goal of this library is to to implement both the (stand-alone)
23 data-compression and delta-compression aspects of VCDIFF encoding, and
24 to support a programming interface that works like Zlib
25 (http://www.gzip.org/zlib.html). See RFC3284: The VCDIFF Generic
26 Differencing and Compression Data Format.
27
28 VCDIFF is a unified encoding that combines data-compression and
29 delta-encoding ("differencing").
30
31 VCDIFF has a detailed byte-code instruction set with many features.
32 The instruction format supports an immediate size operand for small
33 COPYs and ADDs (e.g., under 18 bytes). There are also instruction
34 "modes", which are used to compress COPY addresses by using two
35 address caches. An instruction mode refers to slots in the NEAR
36 and SAME caches for recent addresses. NEAR remembers the
37 previous 4 (by default) COPY addresses, and SAME catches
38 frequent re-uses of the same address using a 3-way (by default)
39 256-entry associative cache of [ADDR mod 256], the encoded byte.
40 A hit in the NEAR/SAME cache requires 0/1 ADDR bytes.
41
42 VCDIFF has a default instruction table, but an alternate
43 instruction tables may themselves be be delta-compressed and
44 included in the encoding header. This allows even more freedom.
45 There are 9 instruction modes in the default code table, 4 near, 3
46 same, VCD_SELF (absolute encoding) and VCD_HERE (relative to the
47 current position).
48
49 ----------------------------------------------------------------------
50
51 Algorithms
52
53 Aside from the details of encoding and decoding, there are a bunch
54 of algorithms needed.
55
56 1. STRING-MATCH. A two-level fingerprinting approach is used. A
57 single loop computes the two checksums -- small and large -- at
58 successive offsets in the TARGET file. The large checksum is more
59 accurate and is used to discover SOURCE matches, which are
60 potentially very long. The small checksum is used to discover
61 copies within the TARGET. Small matching, which is more expensive,
62 usually dominates the large STRING-MATCH costs in this code - the
63 more exhaustive the search, the better the results. Either of the
64 two string-matching mechanisms may be disabled. Currently, large
65 checksums are only performed in the source file, if present, and
66 small checksums are performed only in the left-over target input.
67 However, small matches are possible in the source file too, with a
68 range of possibilities. [I've seen a paper on this subject, but
69 I lost it.]
70
71 2. INSTRUCTION SELECTION. The IOPT buffer here represents a queue
72 used to store overlapping copy instructions. There are two possible
73 optimizations that go beyond a greedy search. Both of these fall
74 into the category of "non-greedy matching" optimizations.
75
76 The first optimization stems from backward SOURCE-COPY matching.
77 When a new SOURCE-COPY instruction covers a previous instruction in
78 the target completely, it is erased from the queue. Randal Burns
79 originally analyzed these algorithms and did a lot of related work
80 (\cite the 1.5-pass algorithm).
81
82 The second optimization comes by the encoding of common very-small
83 COPY and ADD instructions, for which there are special DOUBLE-code
84 instructions, which code two instructions in a single byte.
85
86 The cost of bad instruction-selection overhead is relatively high
87 for data-compression, relative to delta-compression, so this second
88 optimization is fairly important. With "lazy" matching (the name
89 used in Zlib for a similar optimization), the string-match
90 algorithm searches after a match for potential overlapping copy
91 instructions. In Xdelta and by default, VCDIFF, the minimum match
92 size is 4 bytes, whereas Zlib searches with a 3-byte minimum. This
93 feature, combined with double instructions, provides a nice
94 challenge. Search in this file for "black magic", a heuristic.
95
96 3. STREAM ALIGNMENT. Stream alignment is needed to compress large
97 inputs in constant space. TODO: redocument
98
99 4. WINDOW SELECTION. When the IOPT buffer flushes, in the first call
100 to xd3_iopt_finish_encoding containing any kind of copy instruction,
101 the parameters of the source window must be decided: the offset into
102 the source and the length of the window. Since the IOPT buffer is
103 finite, the program may be forced to fix these values before knowing
104 the best offset/length. XD3_DEFAULT_SRCBACK limits the length, but a
105 smaller length is preferred because all target copies are addressed
106 after source copies in the VCDIFF address space. Picking too large a
107 source window means larger address encoding.
108
109 If the IOPT buffer is filling easily, perhaps the target window is
110 too large. In any case, a decision is made (though an alternative is
111 to emit the sub-window right away, to reduce the winsize
112 automatically - not implemented, another alternative is to grow the
113 IOPT buffer, it is after all bounded in size by winsize.)
114
115 The algorithm is in xd3_srcwin_setup.
116
117 5. SECONDARY COMPRESSION. VCDIFF supports a secondary encoding to
118 be applied to the individual sections of the data format, which are
119 ADDRess, INSTruction, and DATA. Several secondary compressor
120 variations are implemented here, although none is standardized yet.
121
122 One is an adaptive huffman algorithm -- the FGK algorithm (Faller,
123 Gallager, and Knuth, 1985). This compressor is extremely slow.
124
125 The other is a simple static Huffman routine, which is the base
126 case of a semi-adaptive scheme published by D.J. Wheeler and first
127 widely used in bzip2 (by Julian Seward). This is a very
128 interesting algorithm, originally published in nearly cryptic form
129 by D.J. Wheeler. !!!NOTE!!! Because these are not standardized, the
130 -S option (no secondary compression) remains on by default.
131 ftp://ftp.cl.cam.ac.uk/users/djw3/bred3.{c,ps}
132 --------------------------------------------------------------------
133
134 Other Features
135
136 1. USER CONVENIENCE
137
138 For user convenience, it is essential to recognize Gzip-compressed
139 files and automatically Gzip-decompress them prior to
140 delta-compression (or else no delta-compression will be achieved
141 unless the user manually decompresses the inputs). The compressed
142 represention competes with Xdelta, and this must be hidden from the
143 command-line user interface. The Xdelta-1.x encoding was simple, not
144 compressed itself, so Xdelta-1.x uses Zlib internally to compress the
145 representation.
146
147 This implementation supports external compression, which implements
148 the necessary fork() and pipe() mechanics. There is a tricky step
149 involved to support automatic detection of a compressed input in a
150 non-seekable input. First you read a bit of the input to detect
151 magic headers. When a compressed format is recognized, exec() the
152 external compression program and create a second child process to
153 copy the original input stream. [Footnote: There is a difficulty
154 related to using Gzip externally. It is not possible to decompress
155 and recompress a Gzip file transparently. If FILE.GZ had a
156 cryptographic signature, then, after: (1) Gzip-decompression, (2)
157 Xdelta-encoding, (3) Gzip-compression the signature could be
158 broken. The only way to solve this problem is to guess at Gzip's
159 compression level or control it by other means. I recommend that
160 specific implementations of any compression scheme store
161 information needed to exactly re-compress the input, that way
162 external compression is transparent - however, this won't happen
163 here until it has stabilized.]
164
165 2. APPLICATION-HEADER
166
167 This feature was introduced in RFC3284. It allows any application
168 to include a header within the VCDIFF file format. This allows
169 general inter-application data exchange with support for
170 application-specific extensions to communicate metadata.
171
172 3. VCDIFF CHECKSUM
173
174 An optional checksum value is included with each window, which can
175 be used to validate the final result. This verifies the correct source
176 file was used for decompression as well as the obvious advantage:
177 checking the implementation (and underlying) correctness.
178
179 4. LIGHT WEIGHT
180
181 The code makes efforts to avoid copying data more than necessary.
182 The code delays many initialization tasks until the first use, it
183 optimizes for identical (perfectly matching) inputs. It does not
184 compute any checksums until the first lookup misses. Memory usage
185 is reduced. String-matching is templatized (by slightly gross use
186 of CPP) to hard-code alternative compile-time defaults. The code
187 has few outside dependencies.
188 ----------------------------------------------------------------------
189
190 The default rfc3284 instruction table:
191 (see RFC for the explanation)
192
193 TYPE SIZE MODE TYPE SIZE MODE INDEX
194 --------------------------------------------------------------------
195 1. Run 0 0 Noop 0 0 0
196 2. Add 0, [1,17] 0 Noop 0 0 [1,18]
197 3. Copy 0, [4,18] 0 Noop 0 0 [19,34]
198 4. Copy 0, [4,18] 1 Noop 0 0 [35,50]
199 5. Copy 0, [4,18] 2 Noop 0 0 [51,66]
200 6. Copy 0, [4,18] 3 Noop 0 0 [67,82]
201 7. Copy 0, [4,18] 4 Noop 0 0 [83,98]
202 8. Copy 0, [4,18] 5 Noop 0 0 [99,114]
203 9. Copy 0, [4,18] 6 Noop 0 0 [115,130]
204 10. Copy 0, [4,18] 7 Noop 0 0 [131,146]
205 11. Copy 0, [4,18] 8 Noop 0 0 [147,162]
206 12. Add [1,4] 0 Copy [4,6] 0 [163,174]
207 13. Add [1,4] 0 Copy [4,6] 1 [175,186]
208 14. Add [1,4] 0 Copy [4,6] 2 [187,198]
209 15. Add [1,4] 0 Copy [4,6] 3 [199,210]
210 16. Add [1,4] 0 Copy [4,6] 4 [211,222]
211 17. Add [1,4] 0 Copy [4,6] 5 [223,234]
212 18. Add [1,4] 0 Copy 4 6 [235,238]
213 19. Add [1,4] 0 Copy 4 7 [239,242]
214 20. Add [1,4] 0 Copy 4 8 [243,246]
215 21. Copy 4 [0,8] Add 1 0 [247,255]
216 --------------------------------------------------------------------
217
218 Reading the source: Overview
219
220 This file includes itself in several passes to macro-expand certain
221 sections with variable forms. Just read ahead, there's only a
222 little confusion. I know this sounds ugly, but hard-coding some of
223 the string-matching parameters results in a 10-15% increase in
224 string-match performance. The only time this hurts is when you have
225 unbalanced #if/endifs.
226
227 A single compilation unit tames the Makefile. In short, this is to
228 allow the above-described hack without an explodingMakefile. The
229 single compilation unit includes the core library features,
230 configurable string-match templates, optional main() command-line
231 tool, misc optional features, and a regression test. Features are
232 controled with CPP #defines, see Makefile.am.
233
234 The initial __XDELTA3_C_HEADER_PASS__ starts first, the INLINE and
235 TEMPLATE sections follow. Easy stuff first, hard stuff last.
236
237 Optional features include:
238
239 xdelta3-main.h The command-line interface, external compression
240 support, POSIX-specific, info & VCDIFF-debug tools.
241 xdelta3-second.h The common secondary compression routines.
242 xdelta3-djw.h The semi-adaptive huffman secondary encoder.
243 xdelta3-fgk.h The adaptive huffman secondary encoder.
244 xdelta3-test.h The unit test covers major algorithms,
245 encoding and decoding. There are single-bit
246 error decoding tests. There are 32/64-bit file size
247 boundary tests. There are command-line tests.
248 There are compression tests. There are external
249 compression tests. There are string-matching tests.
250 There should be more tests...
251
252 Additional headers include:
253
254 xdelta3.h The public header file.
255 xdelta3-cfgs.h The default settings for default, built-in
256 encoders. These are hard-coded at
257 compile-time. There is also a single
258 soft-coded string matcher for experimenting
259 with arbitrary values.
260 xdelta3-list.h A cyclic list template
261
262 Misc little debug utilities:
263
264 badcopy.c Randomly modifies an input file based on two
265 parameters: (1) the probability that a byte in
266 the file is replaced with a pseudo-random value,
267 and (2) the mean change size. Changes are
268 generated using an expoential distribution
269 which approximates the expected error_prob
270 distribution.
271 show.c Prints an offset/length segment from a file.
272 testh.c Checks that xdelta3.h is can be #included
273 --------------------------------------------------------------------
274
275 This file itself is unusually large. I hope to defend this layout
276 with lots of comments. Everything in this file is related to
277 encoding and decoding. I like it all together - the template stuff
278 is just a hack. */
279
280#ifndef __XDELTA3_C_HEADER_PASS__
281#define __XDELTA3_C_HEADER_PASS__
282
283#include <errno.h>
284#include <string.h>
285
286#include "xdelta3.h"
287
288/******************************************************************************************
289 STATIC CONFIGURATION
290 ******************************************************************************************/
291
292#ifndef XD3_MAIN /* the main application */
293#define XD3_MAIN 0
294#endif
295
296#ifndef VCDIFF_TOOLS
297#define VCDIFF_TOOLS XD3_MAIN
298#endif
299
300#ifndef SECONDARY_FGK /* one from the algorithm preservation department: */
301#define SECONDARY_FGK 0 /* adaptive Huffman routines */
302#endif
303
304#ifndef SECONDARY_DJW /* semi-adaptive/static Huffman for the eventual */
305#define SECONDARY_DJW 0 /* standardization, off by default until such time. */
306#endif
307
308#ifndef GENERIC_ENCODE_TABLES /* These three are the RFC-spec'd app-specific */
309#define GENERIC_ENCODE_TABLES 0 /* code features. This is tested but not recommended */
310#endif /* unless there's a real application. */
311#ifndef GENERIC_ENCODE_TABLES_COMPUTE
312#define GENERIC_ENCODE_TABLES_COMPUTE 0
313#endif
314#ifndef GENERIC_ENCODE_TABLES_COMPUTE_PRINT
315#define GENERIC_ENCODE_TABLES_COMPUTE_PRINT 0
316#endif
317
318#if XD3_USE_LARGEFILE64 /* How does everyone else do this? */
319#define Q "q"
320#else
321#define Q
322#endif
323
324#if XD3_ENCODER
325#define IF_ENCODER(x) x
326#else
327#define IF_ENCODER(x)
328#endif
329
330/******************************************************************************************/
331
332typedef enum {
333
334 /* header indicator bits */
335 VCD_SECONDARY = (1 << 0), /* uses secondary compressor */
336 VCD_CODETABLE = (1 << 1), /* supplies code table data */
337 VCD_APPHEADER = (1 << 2), /* supplies application data */
338 VCD_INVHDR = ~7U,
339
340 /* window indicator bits */
341 VCD_SOURCE = (1 << 0), /* copy window in source file */
342 VCD_TARGET = (1 << 1), /* copy window in target file */
343 VCD_ADLER32 = (1 << 2), /* has adler32 checksum */
344 VCD_INVWIN = ~7U,
345
346 VCD_SRCORTGT = VCD_SOURCE | VCD_TARGET,
347
348 /* delta indicator bits */
349 VCD_DATACOMP = (1 << 0),
350 VCD_INSTCOMP = (1 << 1),
351 VCD_ADDRCOMP = (1 << 2),
352 VCD_INVDEL = ~0x7U,
353
354} xd3_indicator;
355
356typedef enum {
357 VCD_DJW_ID = 1,
358 VCD_FGK_ID = 16, /* !!!Note: these are not a standard IANA-allocated ID!!! */
359} xd3_secondary_ids;
360
361typedef enum {
362 SEC_NOFLAGS = 0,
363 SEC_COUNT_FREQS = (1 << 0), /* OPT: Not implemented: Could eliminate first pass of Huffman... */
364} xd3_secondary_flags;
365
366typedef enum {
367 DATA_SECTION, /* These indicate which section to the secondary compressor. */
368 INST_SECTION, /* The header section is not compressed, therefore not listed here. */
369 ADDR_SECTION,
370} xd3_section_type;
371
372typedef enum
373{
374 XD3_NOOP = 0,
375 XD3_ADD = 1,
376 XD3_RUN = 2,
377 XD3_CPY = 3, /* XD3_CPY rtypes are represented as (XD3_CPY + copy-mode value) */
378} xd3_rtype;
379
380/******************************************************************************************/
381
382#include "xdelta3-list.h"
383
384XD3_MAKELIST(xd3_rlist, xd3_rinst, link);
385
386/******************************************************************************************/
387
388#ifndef unlikely /* The unlikely macro - any good? */
389#if defined(__GNUC__) && __GNUC__ >= 3
390#define unlikely(x) __builtin_expect((x),0)
391#define likely(x) __builtin_expect((x),1)
392#else
393#define unlikely(x) (x)
394#define likely(x) (x)
395#endif
396#endif
397
398#define SECONDARY_MIN_SAVINGS 2 /* Secondary compression has to save at least this many bytes. */
399#define SECONDARY_MIN_INPUT 10 /* Secondary compression needs at least this many bytes. */
400
401#define VCDIFF_MAGIC1 0xd6 /* 1st file byte */
402#define VCDIFF_MAGIC2 0xc3 /* 2nd file byte */
403#define VCDIFF_MAGIC3 0xc4 /* 3rd file byte */
404#define VCDIFF_VERSION 0x00 /* 4th file byte */
405
406#define VCD_SELF 0 /* 1st address mode */
407#define VCD_HERE 1 /* 2nd address mode */
408
409#define CODE_TABLE_STRING_SIZE (6 * 256) /* Should fit a code table string. */
410#define CODE_TABLE_VCDIFF_SIZE (6 * 256) /* Should fit a compressed code table string */
411
412#define SECONDARY_ANY (SECONDARY_DJW || SECONDARY_FGK) /* True if any secondary compressor is used. */
413
414#define ALPHABET_SIZE 256 /* Used in test code--size of the secondary compressor alphabet. */
415
416#define HASH_PRIME 0 /* Old hashing experiments */
417#define HASH_PERMUTE 1
418#define ARITH_SMALL_CKSUM 1
419
420#define HASH_CKOFFSET 1U /* Table entries distinguish "no-entry" from offset 0 using this offset. */
421
422#define MIN_SMALL_LOOK 2U /* Match-optimization stuff. */
423#define MIN_LARGE_LOOK 2U
424#define MIN_MATCH_OFFSET 1U
425#define MAX_MATCH_SPLIT 18U /* VCDIFF code table: 18 is the default limit for direct-coded ADD sizes */
426
427#define LEAST_MATCH_INCR 0 /* The least number of bytes an overlapping match must beat
428 * the preceding match by. This is a bias for the lazy
429 * match optimization. A non-zero value means that an
430 * adjacent match has to be better by more than the step
431 * between them. 0. */
432
433#define MIN_MATCH 4U /* VCDIFF code table: MIN_MATCH=4 */
434#define MIN_ADD 1U /* 1 */
435#define MIN_RUN 8U /* The shortest run, if it is shorter than this an immediate
436 * add/copy will be just as good. ADD1/COPY6 = 1I+1D+1A bytes,
437 * RUN18 = 1I+1D+1A. */
438
439#define MAX_MODES 9 /* Maximum number of nodes used for compression--does not limit decompression. */
440
441#define ENC_SECTS 4 /* Number of separate output sections. */
442
443#define HDR_TAIL(s) (stream->enc_tails[0])
444#define DATA_TAIL(s) (stream->enc_tails[1])
445#define INST_TAIL(s) (stream->enc_tails[2])
446#define ADDR_TAIL(s) (stream->enc_tails[3])
447
448#define HDR_HEAD(s) (stream->enc_heads[0])
449#define DATA_HEAD(s) (stream->enc_heads[1])
450#define INST_HEAD(s) (stream->enc_heads[2])
451#define ADDR_HEAD(s) (stream->enc_heads[3])
452
453#define SIZEOF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
454
455#define TOTAL_MODES(x) (2+(x)->acache.s_same+(x)->acache.s_near)
456
457/* Template instances. */
458#if XD3_BUILD_SLOW
459#define IF_BUILD_SLOW(x) x
460#else
461#define IF_BUILD_SLOW(x)
462#endif
463#if XD3_BUILD_FAST
464#define IF_BUILD_FAST(x) x
465#else
466#define IF_BUILD_FAST(x)
467#endif
468#if XD3_BUILD_SOFT
469#define IF_BUILD_SOFT(x) x
470#else
471#define IF_BUILD_SOFT(x)
472#endif
473
474IF_BUILD_SOFT(static const xd3_smatcher __smatcher_soft;)
475IF_BUILD_FAST(static const xd3_smatcher __smatcher_fast;)
476IF_BUILD_SLOW(static const xd3_smatcher __smatcher_slow;)
477
478#if XD3_DEBUG
479#define SMALL_HASH_DEBUG1(s,inp) \
480 usize_t debug_hval = xd3_checksum_hash (& (s)->small_hash, \
481 xd3_scksum ((inp), (s)->small_look))
482#define SMALL_HASH_DEBUG2(s,inp) \
483 XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \
484 xd3_scksum ((inp), (s)->small_look)))
485#define SMALL_HASH_STATS(x) x
486#else
487#define SMALL_HASH_DEBUG1(s,inp)
488#define SMALL_HASH_DEBUG2(s,inp)
489#define SMALL_HASH_STATS(x)
490#endif /* XD3_DEBUG */
491
492/* Config fields: three structures contain these variables, so this is non-typed. */
493#define XD3_COPY_CONFIG_FIELDS(dst,src) \
494 do { \
495 (dst)->large_look = (src)->large_look; \
496 (dst)->large_step = (src)->large_step; \
497 (dst)->small_look = (src)->small_look; \
498 (dst)->small_chain = (src)->small_chain; \
499 (dst)->small_lchain = (src)->small_lchain; \
500 (dst)->ssmatch = (src)->ssmatch; \
501 (dst)->try_lazy = (src)->try_lazy; \
502 (dst)->max_lazy = (src)->max_lazy; \
503 (dst)->long_enough = (src)->long_enough; \
504 (dst)->promote = (src)->promote; \
505 } while (0)
506
507/* Update the run-length state */
508#define NEXTRUN(c) do { if ((c) == run_c) { run_l += 1; } else { run_c = (c); run_l = 1; } } while (0)
509
510/* Update the checksum state. */
511#define LARGE_CKSUM_UPDATE(cksum,base,look) \
512 do { \
513 uint32_t old_c = PERMUTE((base)[0]); \
514 uint32_t new_c = PERMUTE((base)[(look)]); \
515 uint32_t low = (((cksum) & 0xffff) - old_c + new_c) & 0xffff; \
516 uint32_t high = (((cksum) >> 16) - (old_c * (look)) + low) & 0xffff; \
517 (cksum) = (high << 16) | low; \
518 } while (0)
519
520/* Multiply and add hash function */
521#if ARITH_SMALL_CKSUM
522#define SMALL_CKSUM_UPDATE(cksum,base,look) (cksum) = ((*(unsigned long*)(base+1)) * 71143)
523#else
524#define SMALL_CKSUM_UPDATE LARGE_CKSUM_UPDATE
525#endif
526
527/* Consume N bytes of input, only used by the decoder. */
528#define DECODE_INPUT(n) \
529 do { \
530 stream->total_in += (xoff_t) (n); \
531 stream->avail_in -= (n); \
532 stream->next_in += (n); \
533 } while (0)
534
535/* This CPP-conditional stuff can be cleaned up... */
536#if XD3_DEBUG
537#define IF_DEBUG(x) x
538#define DEBUG_ARG(x) , x
539#else
540#define IF_DEBUG(x)
541#define DEBUG_ARG(x)
542#endif
543#if XD3_DEBUG > 1
544#define IF_DEBUG1(x) x
545#else
546#define IF_DEBUG1(x)
547#endif
548#if REGRESSION_TEST
549#define IF_REGRESSION(x) x
550#else
551#define IF_REGRESSION(x)
552#endif
553
554/******************************************************************************************/
555
556#if XD3_ENCODER
557static void* xd3_alloc0 (xd3_stream *stream,
558 usize_t elts,
559 usize_t size);
560
561
562static xd3_output* xd3_alloc_output (xd3_stream *stream,
563 xd3_output *old_output);
564
565
566
567static void xd3_free_output (xd3_stream *stream,
568 xd3_output *output);
569
570static int xd3_emit_byte (xd3_stream *stream,
571 xd3_output **outputp,
572 uint8_t code);
573
574static int xd3_emit_bytes (xd3_stream *stream,
575 xd3_output **outputp,
576 const uint8_t *base,
577 usize_t size);
578
579static int xd3_emit_double (xd3_stream *stream, xd3_rinst *first, xd3_rinst *second, uint code);
580static int xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code);
581
582static usize_t xd3_sizeof_output (xd3_output *output);
583
584static int xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos);
585static int xd3_source_extend_match (xd3_stream *stream);
586static int xd3_srcwin_setup (xd3_stream *stream);
587static int xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point);
588static usize_t xd3_iopt_last_matched (xd3_stream *stream);
589static int xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num);
590
591#endif /* XD3_ENCODER */
592
593static int xd3_decode_allocate (xd3_stream *stream, usize_t size,
594 uint8_t **copied1, usize_t *alloc1,
595 uint8_t **copied2, usize_t *alloc2);
596
597static void xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str);
598static void* xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size);
599static void xd3_free (xd3_stream *stream, void *ptr);
600
601static int xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp,
602 const uint8_t *max, uint32_t *valp);
603
604#if REGRESSION_TEST
605static int xd3_selftest (void);
606#endif
607
608/******************************************************************************************/
609
610#define UINT32_OFLOW_MASK 0xfe000000U
611#define UINT64_OFLOW_MASK 0xfe00000000000000ULL
612
613#define UINT32_MAX 4294967295U
614#define UINT64_MAX 18446744073709551615ULL
615
616#if SIZEOF_USIZE_T == 4
617#define USIZE_T_MAX UINT32_MAX
618#define xd3_decode_size xd3_decode_uint32_t
619#define xd3_emit_size xd3_emit_uint32_t
620#define xd3_sizeof_size xd3_sizeof_uint32_t
621#define xd3_read_size xd3_read_uint32_t
622#elif SIZEOF_USIZE_T == 8
623#define USIZE_T_MAX UINT64_MAX
624#define xd3_decode_size xd3_decode_uint64_t
625#define xd3_emit_size xd3_emit_uint64_t
626#define xd3_sizeof_size xd3_sizeof_uint64_t
627#define xd3_read_size xd3_read_uint64_t
628#endif
629
630#if SIZEOF_XOFF_T == 4
631#define XOFF_T_MAX UINT32_MAX
632#define xd3_decode_offset xd3_decode_uint32_t
633//#define xd3_emit_offset xd3_emit_uint32_t
634//#define xd3_sizeof_offset xd3_sizeof_uint32_t
635//#define xd3_read_offset xd3_read_uint32_t
636#elif SIZEOF_XOFF_T == 8
637#define XOFF_T_MAX UINT64_MAX
638#define xd3_decode_offset xd3_decode_uint64_t
639//#define xd3_emit_offset xd3_emit_uint64_t
640//#define xd3_sizeof_offset xd3_sizeof_uint64_t
641//#define xd3_read_offset xd3_read_uint64_t
642#endif
643
644#define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b))
645#define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b))
646
647const char* xd3_strerror (int ret)
648{
649 switch (ret)
650 {
651 case XD3_INPUT: return "XD3_INPUT";
652 case XD3_OUTPUT: return "XD3_OUTPUT";
653 case XD3_GETSRCBLK: return "XD3_GETSRCBLK";
654 case XD3_GOTHEADER: return "XD3_GOTHEADER";
655 case XD3_WINSTART: return "XD3_WINSTART";
656 case XD3_WINFINISH: return "XD3_WINFINISH";
657 }
658 return strerror (ret);
659}
660
661/******************************************************************************************/
662
663#if SECONDARY_ANY == 0
664#define IF_SEC(x)
665#define IF_NSEC(x) x
666#else /* yuck */
667#define IF_SEC(x) x
668#define IF_NSEC(x)
669#include "xdelta3-second.h"
670#endif /* SECONDARY_ANY */
671
672#if SECONDARY_FGK
673#include "xdelta3-fgk.h"
674
675static const xd3_sec_type fgk_sec_type =
676{
677 VCD_FGK_ID,
678 "FGK Adaptive Huffman",
679 SEC_NOFLAGS,
680 (xd3_sec_stream* (*)()) fgk_alloc,
681 (void (*)()) fgk_destroy,
682 (void (*)()) fgk_init,
683 (int (*)()) xd3_decode_fgk,
684 IF_ENCODER((int (*)()) xd3_encode_fgk)
685};
686
687#define IF_FGK(x) x
688#define FGK_CASE(s) \
689 s->sec_type = & fgk_sec_type; \
690 break;
691#else
692#define IF_FGK(x)
693#define FGK_CASE(s) \
694 s->msg = "unavailable secondary compressor: FGK Adaptive Huffman"; \
695 return EINVAL;
696#endif
697
698#if SECONDARY_DJW
699#include "xdelta3-djw.h"
700
701static const xd3_sec_type djw_sec_type =
702{
703 VCD_DJW_ID,
704 "Static Huffman",
705 SEC_COUNT_FREQS,
706 (xd3_sec_stream* (*)()) djw_alloc,
707 (void (*)()) djw_destroy,
708 (void (*)()) djw_init,
709 (int (*)()) xd3_decode_huff,
710 IF_ENCODER((int (*)()) xd3_encode_huff)
711};
712
713#define IF_DJW(x) x
714#define DJW_CASE(s) \
715 s->sec_type = & djw_sec_type; \
716 break;
717#else
718#define IF_DJW(x)
719#define DJW_CASE(s) \
720 s->msg = "unavailable secondary compressor: DJW Static Huffman"; \
721 return EINVAL;
722#endif
723
724/******************************************************************************************/
725
726/* Abbreviate frequently referenced fields. */
727#define max_in stream->avail_in
728#define pos_in stream->input_position
729#define min_match stream->min_match
730
731/* Process the inline pass. */
732#define __XDELTA3_C_INLINE_PASS__
733#include "xdelta3.c"
734#undef __XDELTA3_C_INLINE_PASS__
735
736/* Process template passes - this includes xdelta3.c several times. */
737#define __XDELTA3_C_TEMPLATE_PASS__
738#include "xdelta3-cfgs.h"
739#undef __XDELTA3_C_TEMPLATE_PASS__
740
741#undef max_in
742#undef pos_in
743#undef min_match
744
745#if XD3_MAIN || PYTHON_MODULE
746#include "xdelta3-main.h"
747#endif
748
749#if REGRESSION_TEST
750#include "xdelta3-test.h"
751#endif
752
753#if PYTHON_MODULE
754#include "xdelta3-python.h"
755#endif
756
757#endif /* __XDELTA3_C_HEADER_PASS__ */
758#ifdef __XDELTA3_C_INLINE_PASS__
759
760/******************************************************************************************
761 Instruction tables
762 ******************************************************************************************/
763
764/* The following code implements a parametrized description of the
765 * code table given above for a few reasons. It is not necessary for
766 * implementing the standard, to support compression with variable
767 * tables, so an implementation is only required to know the default
768 * code table to begin decompression. (If the encoder uses an
769 * alternate table, the table is included in compressed form inside
770 * the VCDIFF file.)
771 *
772 * Before adding variable-table support there were two functions which
773 * were hard-coded to the default table above.
774 * xd3_compute_default_table() would create the default table by
775 * filling a 256-elt array of xd3_dinst values. The corresponding
776 * function, xd3_choose_instruction(), would choose an instruction
777 * based on the hard-coded parameters of the default code table.
778 *
779 * Notes: The parametrized code table description here only generates
780 * tables of a certain regularity similar to the default table by
781 * allowing to vary the distribution of single- and
782 * double-instructions and change the number of near and same copy
783 * modes. More exotic tables are only possible by extending this
784 * code, but a detailed experiment would need to be carried out first,
785 * probably using separate code. I would like to experiment with a
786 * double-copy instruction, for example.
787 *
788 * For performance reasons, both the parametrized and non-parametrized
789 * versions of xd3_choose_instruction remain. The parametrized
790 * version is only needed for testing multi-table decoding support.
791 * If ever multi-table encoding is required, this can be optimized by
792 * compiling static functions for each table.
793 */
794
795/* The XD3_CHOOSE_INSTRUCTION calls xd3_choose_instruction with the
796 * table description when GENERIC_ENCODE_TABLES are in use. The
797 * IF_GENCODETBL macro enables generic-code-table specific code. */
798#if GENERIC_ENCODE_TABLES
799#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (stream->code_table_desc, prev, inst)
800#define IF_GENCODETBL(x) x
801#else
802#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (prev, inst)
803#define IF_GENCODETBL(x)
804#endif
805
806/* This structure maintains information needed by
807 * xd3_choose_instruction to compute the code for a double instruction
808 * by first indexing an array of code_table_sizes by copy mode, then
809 * using (offset + (muliplier * X)) */
810struct _xd3_code_table_sizes {
811 uint8_t cpy_max;
812 uint8_t offset;
813 uint8_t mult;
814};
815
816/* This contains a complete description of a code table. */
817struct _xd3_code_table_desc
818{
819 /* Assumes a single RUN instruction */
820 /* Assumes that MIN_MATCH is 4 */
821
822 uint8_t add_sizes; /* Number of immediate-size single adds (default 17) */
823 uint8_t near_modes; /* Number of near copy modes (default 4) */
824 uint8_t same_modes; /* Number of same copy modes (default 3) */
825 uint8_t cpy_sizes; /* Number of immediate-size single copies (default 15) */
826
827 uint8_t addcopy_add_max; /* Maximum add size for an add-copy double instruction, all modes (default 4) */
828 uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy double instruction, up through VCD_NEAR modes (default 6) */
829 uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy double instruction, VCD_SAME modes (default 4) */
830
831 uint8_t copyadd_add_max; /* Maximum add size for a copy-add double instruction, all modes (default 1) */
832 uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add double instruction, up through VCD_NEAR modes (default 4) */
833 uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add double instruction, VCD_SAME modes (default 4) */
834
835 xd3_code_table_sizes addcopy_max_sizes[MAX_MODES];
836 xd3_code_table_sizes copyadd_max_sizes[MAX_MODES];
837};
838
839/* The rfc3284 code table is represented: */
840static const xd3_code_table_desc __rfc3284_code_table_desc = {
841 17, /* add sizes */
842 4, /* near modes */
843 3, /* same modes */
844 15, /* copy sizes */
845
846 4, /* add-copy max add */
847 6, /* add-copy max cpy, near */
848 4, /* add-copy max cpy, same */
849
850 1, /* copy-add max add */
851 4, /* copy-add max cpy, near */
852 4, /* copy-add max cpy, same */
853
854 /* addcopy */
855 { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{4,235,1},{4,239,1},{4,243,1} },
856 /* copyadd */
857 { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1},{4,253,1},{4,254,1},{4,255,1} },
858};
859
860#if GENERIC_ENCODE_TABLES
861/* An alternate code table for testing (5 near, 0 same):
862 *
863 * TYPE SIZE MODE TYPE SIZE MODE INDEX
864 * ---------------------------------------------------------------
865 * 1. Run 0 0 Noop 0 0 0
866 * 2. Add 0, [1,23] 0 Noop 0 0 [1,24]
867 * 3. Copy 0, [4,20] 0 Noop 0 0 [25,42]
868 * 4. Copy 0, [4,20] 1 Noop 0 0 [43,60]
869 * 5. Copy 0, [4,20] 2 Noop 0 0 [61,78]
870 * 6. Copy 0, [4,20] 3 Noop 0 0 [79,96]
871 * 7. Copy 0, [4,20] 4 Noop 0 0 [97,114]
872 * 8. Copy 0, [4,20] 5 Noop 0 0 [115,132]
873 * 9. Copy 0, [4,20] 6 Noop 0 0 [133,150]
874 * 10. Add [1,4] 0 Copy [4,6] 0 [151,162]
875 * 11. Add [1,4] 0 Copy [4,6] 1 [163,174]
876 * 12. Add [1,4] 0 Copy [4,6] 2 [175,186]
877 * 13. Add [1,4] 0 Copy [4,6] 3 [187,198]
878 * 14. Add [1,4] 0 Copy [4,6] 4 [199,210]
879 * 15. Add [1,4] 0 Copy [4,6] 5 [211,222]
880 * 16. Add [1,4] 0 Copy [4,6] 6 [223,234]
881 * 17. Copy 4 [0,6] Add [1,3] 0 [235,255]
882 * --------------------------------------------------------------- */
883static const xd3_code_table_desc __alternate_code_table_desc = {
884 23, /* add sizes */
885 5, /* near modes */
886 0, /* same modes */
887 17, /* copy sizes */
888
889 4, /* add-copy max add */
890 6, /* add-copy max cpy, near */
891 0, /* add-copy max cpy, same */
892
893 3, /* copy-add max add */
894 4, /* copy-add max cpy, near */
895 0, /* copy-add max cpy, same */
896
897 /* addcopy */
898 { {6,151,3},{6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{0,0,0},{0,0,0} },
899 /* copyadd */
900 { {4,235,1},{4,238,1},{4,241,1},{4,244,1},{4,247,1},{4,250,1},{4,253,1},{0,0,0},{0,0,0} },
901};
902#endif
903
904/* Computes code table entries of TBL using the specified description. */
905static void
906xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl)
907{
908 int size1, size2, mode;
909 int cpy_modes = 2 + desc->near_modes + desc->same_modes;
910 xd3_dinst *d = tbl;
911
912 (d++)->type1 = XD3_RUN;
913 (d++)->type1 = XD3_ADD;
914
915 for (size1 = 1; size1 <= desc->add_sizes; size1 += 1, d += 1)
916 {
917 d->type1 = XD3_ADD;
918 d->size1 = size1;
919 }
920
921 for (mode = 0; mode < cpy_modes; mode += 1)
922 {
923 (d++)->type1 = XD3_CPY + mode;
924
925 for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes; size1 += 1, d += 1)
926 {
927 d->type1 = XD3_CPY + mode;
928 d->size1 = size1;
929 }
930 }
931
932 for (mode = 0; mode < cpy_modes; mode += 1)
933 {
934 for (size1 = 1; size1 <= desc->addcopy_add_max; size1 += 1)
935 {
936 int max = (mode < 2 + desc->near_modes) ? desc->addcopy_near_cpy_max : desc->addcopy_same_cpy_max;
937
938 for (size2 = MIN_MATCH; size2 <= max; size2 += 1, d += 1)
939 {
940 d->type1 = XD3_ADD;
941 d->size1 = size1;
942 d->type2 = XD3_CPY + mode;
943 d->size2 = size2;
944 }
945 }
946 }
947
948 for (mode = 0; mode < cpy_modes; mode += 1)
949 {
950 int max = (mode < 2 + desc->near_modes) ? desc->copyadd_near_cpy_max : desc->copyadd_same_cpy_max;
951
952 for (size1 = MIN_MATCH; size1 <= max; size1 += 1)
953 {
954 for (size2 = 1; size2 <= desc->copyadd_add_max; size2 += 1, d += 1)
955 {
956 d->type1 = XD3_CPY + mode;
957 d->size1 = size1;
958 d->type2 = XD3_ADD;
959 d->size2 = size2;
960 }
961 }
962 }
963
964 XD3_ASSERT (d - tbl == 256);
965}
966
967/* This function generates the static default code table. */
968static const xd3_dinst*
969xd3_rfc3284_code_table (void)
970{
971 static xd3_dinst __rfc3284_code_table[256];
972
973 if (__rfc3284_code_table[0].type1 != XD3_RUN)
974 {
975 xd3_build_code_table (& __rfc3284_code_table_desc, __rfc3284_code_table);
976 }
977
978 return __rfc3284_code_table;
979}
980
981#if XD3_ENCODER
982#if GENERIC_ENCODE_TABLES
983/* This function generates the alternate code table. */
984static const xd3_dinst*
985xd3_alternate_code_table (void)
986{
987 static xd3_dinst __alternate_code_table[256];
988
989 if (__alternate_code_table[0].type1 != XD3_RUN)
990 {
991 xd3_build_code_table (& __alternate_code_table_desc, __alternate_code_table);
992 }
993
994 return __alternate_code_table;
995}
996
997/* This function computes the ideal second instruction INST based on preceding instruction
998 * PREV. If it is possible to issue a double instruction based on this pair it sets
999 * PREV->code2, otherwise it sets INST->code1. */
1000static void
1001xd3_choose_instruction (const xd3_code_table_desc *desc, xd3_rinst *prev, xd3_rinst *inst)
1002{
1003 switch (inst->type)
1004 {
1005 case XD3_RUN:
1006 /* The 0th instruction is RUN */
1007 inst->code1 = 0;
1008 break;
1009
1010 case XD3_ADD:
1011
1012 if (inst->size > desc->add_sizes)
1013 {
1014 /* The first instruction is non-immediate ADD */
1015 inst->code1 = 1;
1016 }
1017 else
1018 {
1019 /* The following ADD_SIZES instructions are immediate ADDs */
1020 inst->code1 = 1 + inst->size;
1021
1022 /* Now check for a possible COPY-ADD double instruction */
1023 if (prev != NULL)
1024 {
1025 int prev_mode = prev->type - XD3_CPY;
1026
1027 /* If previous is a copy. Note: as long as the previous is not a RUN
1028 * instruction, it should be a copy because it cannot be an add. This check
1029 * is more clear. */
1030 if (prev_mode >= 0 && inst->size <= desc->copyadd_add_max)
1031 {
1032 const xd3_code_table_sizes *sizes = & desc->copyadd_max_sizes[prev_mode];
1033
1034 /* This check and the inst->size-<= above are == in the default table. */
1035 if (prev->size <= sizes->cpy_max)
1036 {
1037 /* The second and third exprs are 0 in the default table. */
1038 prev->code2 = sizes->offset + (sizes->mult * (prev->size - MIN_MATCH)) + (inst->size - MIN_ADD);
1039 }
1040 }
1041 }
1042 }
1043 break;
1044
1045 default:
1046 {
1047 int mode = inst->type - XD3_CPY;
1048
1049 /* The large copy instruction is offset by the run, large add, and immediate adds,
1050 * then multipled by the number of immediate copies plus one (the large copy)
1051 * (i.e., if there are 15 immediate copy instructions then there are 16 copy
1052 * instructions per mode). */
1053 inst->code1 = 2 + desc->add_sizes + (1 + desc->cpy_sizes) * mode;
1054
1055 /* Now if the copy is short enough for an immediate instruction. */
1056 if (inst->size < MIN_MATCH + desc->cpy_sizes)
1057 {
1058 inst->code1 += inst->size + 1 - MIN_MATCH;
1059
1060 /* Now check for a possible ADD-COPY double instruction. */
1061 if ( (prev != NULL) &&
1062 (prev->type == XD3_ADD) &&
1063 (prev->size <= desc->addcopy_add_max) )
1064 {
1065 const xd3_code_table_sizes *sizes = & desc->addcopy_max_sizes[mode];
1066
1067 if (inst->size <= sizes->cpy_max)
1068 {
1069 prev->code2 = sizes->offset + (sizes->mult * (prev->size - MIN_ADD)) + (inst->size - MIN_MATCH);
1070 }
1071 }
1072 }
1073 }
1074 }
1075}
1076#else /* GENERIC_ENCODE_TABLES */
1077
1078/* This version of xd3_choose_instruction is hard-coded for the default table. */
1079static void
1080xd3_choose_instruction (/* const xd3_code_table_desc *desc,*/ xd3_rinst *prev, xd3_rinst *inst)
1081{
1082 switch (inst->type)
1083 {
1084 case XD3_RUN:
1085 inst->code1 = 0;
1086 break;
1087
1088 case XD3_ADD:
1089 inst->code1 = 1;
1090
1091 if (inst->size <= 17)
1092 {
1093 inst->code1 += inst->size;
1094
1095 if ( (inst->size == 1) &&
1096 (prev != NULL) &&
1097 (prev->size == 4) &&
1098 (prev->type >= XD3_CPY) )
1099 {
1100 prev->code2 = 247 + (prev->type - XD3_CPY);
1101 }
1102 }
1103
1104 break;
1105
1106 default:
1107 {
1108 int mode = inst->type - XD3_CPY;
1109
1110 XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12);
1111
1112 inst->code1 = 19 + 16 * mode;
1113
1114 if (inst->size <= 18)
1115 {
1116 inst->code1 += inst->size - 3;
1117
1118 if ( (prev != NULL) &&
1119 (prev->type == XD3_ADD) &&
1120 (prev->size <= 4) )
1121 {
1122 if ( (inst->size <= 6) &&
1123 (mode <= 5) )
1124 {
1125 prev->code2 = 163 + (mode * 12) + (3 * (prev->size - 1)) + (inst->size - 4);
1126
1127 XD3_ASSERT (prev->code2 <= 234);
1128 }
1129 else if ( (inst->size == 4) &&
1130 (mode >= 6) )
1131 {
1132 prev->code2 = 235 + ((mode - 6) * 4) + (prev->size - 1);
1133
1134 XD3_ASSERT (prev->code2 <= 246);
1135 }
1136 }
1137 }
1138
1139 XD3_ASSERT (inst->code1 <= 162);
1140 }
1141 break;
1142 }
1143}
1144#endif /* GENERIC_ENCODE_TABLES */
1145
1146/******************************************************************************************
1147 Instruction table encoder/decoder
1148 ******************************************************************************************/
1149
1150#if GENERIC_ENCODE_TABLES
1151#if GENERIC_ENCODE_TABLES_COMPUTE == 0
1152
1153/* In this case, we hard-code the result of compute_code_table_encoding for each alternate
1154 * code table, presuming that saves time/space. This has been 131 bytes, but secondary
1155 * compression was turned off. */
1156static const uint8_t __alternate_code_table_compressed[178] =
1157{0xd6,0xc3,0xc4,0x00,0x00,0x01,0x8a,0x6f,0x40,0x81,0x27,0x8c,0x00,0x00,0x4a,0x4a,0x0d,0x02,0x01,0x03,
11580x01,0x03,0x00,0x01,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,
11590x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x00,0x01,0x01,0x01,0x02,0x02,0x02,0x03,0x03,0x03,0x04,
11600x04,0x04,0x04,0x00,0x04,0x05,0x06,0x01,0x02,0x03,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x05,0x05,0x05,
11610x06,0x06,0x06,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x00,0x02,0x00,0x18,0x13,0x63,0x00,0x1b,0x00,0x54,
11620x00,0x15,0x23,0x6f,0x00,0x28,0x13,0x54,0x00,0x15,0x01,0x1a,0x31,0x23,0x6c,0x0d,0x23,0x48,0x00,0x15,
11630x93,0x6f,0x00,0x28,0x04,0x23,0x51,0x04,0x32,0x00,0x2b,0x00,0x12,0x00,0x12,0x00,0x12,0x00,0x12,0x00,
11640x12,0x00,0x12,0x53,0x57,0x9c,0x07,0x43,0x6f,0x00,0x34,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00,
11650x0c,0x00,0x0c,0x00,0x15,0x00,0x82,0x6f,0x00,0x15,0x12,0x0c,0x00,0x03,0x03,0x00,0x06,0x00,};
1166
1167static int
1168xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size)
1169{
1170 (*data) = __alternate_code_table_compressed;
1171 (*size) = sizeof (__alternate_code_table_compressed);
1172 return 0;
1173}
1174
1175#else
1176
1177/* The alternate code table will be computed and stored here. */
1178static uint8_t __alternate_code_table_compressed[CODE_TABLE_VCDIFF_SIZE];
1179static usize_t __alternate_code_table_compressed_size;
1180
1181/* This function generates a delta describing the code table for encoding within a VCDIFF
1182 * file. This function is NOT thread safe because it is only intended that this function
1183 * is used to generate statically-compiled strings. */
1184int xd3_compute_code_table_encoding (xd3_stream *in_stream, const xd3_dinst *code_table,
1185 uint8_t *comp_string, usize_t *comp_string_size)
1186{
1187 uint8_t dflt_string[CODE_TABLE_STRING_SIZE];
1188 uint8_t code_string[CODE_TABLE_STRING_SIZE];
1189 xd3_stream stream;
1190 xd3_source source;
1191 xd3_config config;
1192 int ret;
1193
1194 memset (& source, 0, sizeof (source));
1195
1196 xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string);
1197 xd3_compute_code_table_string (code_table, code_string);
1198
1199 /* Use DJW secondary compression if it is on by default. This saves about 20 bytes. */
1200 xd3_init_config (& config, XD3_FLUSH | (SECONDARY_DJW ? XD3_SEC_DJW : 0));
1201
1202 /* Be exhaustive. */
1203 config.sprevsz = 1<<11;
1204 config.memsize = CODE_TABLE_STRING_SIZE * 10;
1205
1206 config.large_look = 4;
1207 config.large_step = 1;
1208 config.small_look = 4;
1209 config.small_chain = CODE_TABLE_STRING_SIZE;
1210 config.small_lchain = CODE_TABLE_STRING_SIZE;
1211 config.ssmatch = 1;
1212 config.try_lazy = 1;
1213 config.max_lazy = CODE_TABLE_STRING_SIZE;
1214 config.long_enough = CODE_TABLE_STRING_SIZE;
1215 config.promote = 1;
1216 config.srcwin_size = CODE_TABLE_STRING_SIZE;
1217 config.srcwin_maxsz = CODE_TABLE_STRING_SIZE;
1218
1219 if ((ret = xd3_config_stream (& stream, & config))) { goto fail; }
1220
1221 source.size = CODE_TABLE_STRING_SIZE;
1222 source.blksize = CODE_TABLE_STRING_SIZE;
1223 source.onblk = CODE_TABLE_STRING_SIZE;
1224 source.name = "";
1225 source.curblk = dflt_string;
1226 source.curblkno = 0;
1227
1228 if ((ret = xd3_set_source (& stream, & source))) { goto fail; }
1229
1230 if ((ret = xd3_encode_completely (& stream, code_string, CODE_TABLE_STRING_SIZE,
1231 comp_string, comp_string_size, CODE_TABLE_VCDIFF_SIZE))) { goto fail; }
1232
1233 fail:
1234
1235 in_stream->msg = stream.msg;
1236 xd3_free_stream (& stream);
1237 return ret;
1238}
1239
1240/* Compute a delta between alternate and rfc3284 tables. As soon as another alternate
1241 * table is added, this code should become generic. For now there is only one alternate
1242 * table for testing. */
1243static int
1244xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size)
1245{
1246 int ret;
1247
1248 if (__alternate_code_table_compressed[0] == 0)
1249 {
1250 if ((ret = xd3_compute_code_table_encoding (stream, xd3_alternate_code_table (),
1251 __alternate_code_table_compressed,
1252 & __alternate_code_table_compressed_size)))
1253 {
1254 return ret;
1255 }
1256
1257 /* During development of a new code table, enable this variable to print the new
1258 * static contents and determine its size. At run time the table will be filled in
1259 * appropriately, but at least it should have the proper size beforehand. */
1260#if GENERIC_ENCODE_TABLES_COMPUTE_PRINT
1261 {
1262 int i;
1263
1264 P(RINT, "\nstatic const usize_t __alternate_code_table_compressed_size = %u;\n",
1265 __alternate_code_table_compressed_size);
1266
1267 P(RINT, "static const uint8_t __alternate_code_table_compressed[%u] =\n{",
1268 __alternate_code_table_compressed_size);
1269
1270 for (i = 0; i < __alternate_code_table_compressed_size; i += 1)
1271 {
1272 P(RINT, "0x%02x,", __alternate_code_table_compressed[i]);
1273 if ((i % 20) == 19) { P(RINT, "\n"); }
1274 }
1275
1276 P(RINT, "};\n");
1277 }
1278#endif
1279 }
1280
1281 (*data) = __alternate_code_table_compressed;
1282 (*size) = __alternate_code_table_compressed_size;
1283
1284 return 0;
1285}
1286#endif /* GENERIC_ENCODE_TABLES_COMPUTE != 0 */
1287#endif /* GENERIC_ENCODE_TABLES */
1288
1289#endif /* XD3_ENCODER */
1290
1291/* This function generates the 1536-byte string specified in sections 5.4 and 7 of
1292 * rfc3284, which is used to represent a code table within a VCDIFF file. */
1293void xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str)
1294{
1295 int i, s;
1296
1297 XD3_ASSERT (CODE_TABLE_STRING_SIZE == 6 * 256);
1298
1299 for (s = 0; s < 6; s += 1)
1300 {
1301 for (i = 0; i < 256; i += 1)
1302 {
1303 switch (s)
1304 {
1305 case 0: *str++ = (code_table[i].type1 >= XD3_CPY ? XD3_CPY : code_table[i].type1); break;
1306 case 1: *str++ = (code_table[i].type2 >= XD3_CPY ? XD3_CPY : code_table[i].type2); break;
1307 case 2: *str++ = (code_table[i].size1); break;
1308 case 3: *str++ = (code_table[i].size2); break;
1309 case 4: *str++ = (code_table[i].type1 >= XD3_CPY ? code_table[i].type1 - XD3_CPY : 0); break;
1310 case 5: *str++ = (code_table[i].type2 >= XD3_CPY ? code_table[i].type2 - XD3_CPY : 0); break;
1311 }
1312 }
1313 }
1314}
1315
1316/* This function translates the code table string into the internal representation. The
1317 * stream's near and same-modes should already be set. */
1318static int
1319xd3_apply_table_string (xd3_stream *stream, const uint8_t *code_string)
1320{
1321 int i, s;
1322 int modes = TOTAL_MODES (stream);
1323 xd3_dinst *code_table;
1324
1325 if ((code_table = stream->code_table_alloc = xd3_alloc (stream, sizeof (xd3_dinst), 256)) == NULL)
1326 {
1327 return ENOMEM;
1328 }
1329
1330 for (s = 0; s < 6; s += 1)
1331 {
1332 for (i = 0; i < 256; i += 1)
1333 {
1334 switch (s)
1335 {
1336 case 0:
1337 if (*code_string > XD3_CPY)
1338 {
1339 stream->msg = "invalid code-table opcode";
1340 return EINVAL;
1341 }
1342 code_table[i].type1 = *code_string++;
1343 break;
1344 case 1:
1345 if (*code_string > XD3_CPY)
1346 {
1347 stream->msg = "invalid code-table opcode";
1348 return EINVAL;
1349 }
1350 code_table[i].type2 = *code_string++;
1351 break;
1352 case 2:
1353 if (*code_string != 0 && code_table[i].type1 == XD3_NOOP)
1354 {
1355 stream->msg = "invalid code-table size";
1356 return EINVAL;
1357 }
1358 code_table[i].size1 = *code_string++;
1359 break;
1360 case 3:
1361 if (*code_string != 0 && code_table[i].type2 == XD3_NOOP)
1362 {
1363 stream->msg = "invalid code-table size";
1364 return EINVAL;
1365 }
1366 code_table[i].size2 = *code_string++;
1367 break;
1368 case 4:
1369 if (*code_string >= modes)
1370 {
1371 stream->msg = "invalid code-table mode";
1372 return EINVAL;
1373 }
1374 if (*code_string != 0 && code_table[i].type1 != XD3_CPY)
1375 {
1376 stream->msg = "invalid code-table mode";
1377 return EINVAL;
1378 }
1379 code_table[i].type1 += *code_string++;
1380 break;
1381 case 5:
1382 if (*code_string >= modes)
1383 {
1384 stream->msg = "invalid code-table mode";
1385 return EINVAL;
1386 }
1387 if (*code_string != 0 && code_table[i].type2 != XD3_CPY)
1388 {
1389 stream->msg = "invalid code-table mode";
1390 return EINVAL;
1391 }
1392 code_table[i].type2 += *code_string++;
1393 break;
1394 }
1395 }
1396 }
1397
1398 stream->code_table = code_table;
1399 return 0;
1400}
1401
1402/* This function applies a code table delta and returns an actual code table. */
1403static int
1404xd3_apply_table_encoding (xd3_stream *in_stream, const uint8_t *data, usize_t size)
1405{
1406 uint8_t dflt_string[CODE_TABLE_STRING_SIZE];
1407 uint8_t code_string[CODE_TABLE_STRING_SIZE];
1408 usize_t code_size;
1409 xd3_stream stream;
1410 xd3_source source;
1411 int ret;
1412
1413 /* The default code table string can be cached if alternate code tables ever become
1414 * popular. */
1415 xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string);
1416
1417 source.size = CODE_TABLE_STRING_SIZE;
1418 source.blksize = CODE_TABLE_STRING_SIZE;
1419 source.onblk = CODE_TABLE_STRING_SIZE;
1420 source.name = "rfc3284 code table";
1421 source.curblk = dflt_string;
1422 source.curblkno = 0;
1423
1424 if ((ret = xd3_config_stream (& stream, NULL)) ||
1425 (ret = xd3_set_source (& stream, & source)) ||
1426 (ret = xd3_decode_completely (& stream, data, size, code_string, & code_size, sizeof (code_string))))
1427 {
1428 in_stream->msg = stream.msg;
1429 goto fail;
1430 }
1431
1432 if (code_size != sizeof (code_string))
1433 {
1434 stream.msg = "corrupt code-table encoding";
1435 ret = EINVAL;
1436 goto fail;
1437 }
1438
1439 if ((ret = xd3_apply_table_string (in_stream, code_string))) { goto fail; }
1440
1441 fail:
1442
1443 xd3_free_stream (& stream);
1444 return ret;
1445}
1446
1447/******************************************************************************************
1448 Permute stuff
1449 ******************************************************************************************/
1450
1451#if HASH_PERMUTE == 0
1452#define PERMUTE(x) (x)
1453#else
1454#define PERMUTE(x) (__single_hash[(uint)x])
1455
1456static const uint16_t __single_hash[256] =
1457{
1458 /* Random numbers generated using SLIB's pseudo-random number generator. This hashes
1459 * the input alphabet. */
1460 0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46,
1461 0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602,
1462 0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6,
1463 0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a,
1464 0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58,
1465 0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b,
1466 0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94,
1467 0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176,
1468 0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743,
1469 0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6,
1470 0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227,
1471 0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d,
1472 0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a,
1473 0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31,
1474 0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9,
1475 0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6,
1476 0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f,
1477 0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f,
1478 0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49,
1479 0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037,
1480 0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad,
1481 0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f,
1482 0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d,
1483 0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b,
1484 0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624,
1485 0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272,
1486 0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806,
1487 0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050,
1488 0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113,
1489 0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2,
1490 0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25,
1491 0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5
1492};
1493#endif
1494
1495/******************************************************************************************
1496 Ctable stuff
1497 ******************************************************************************************/
1498
1499#if HASH_PRIME
1500static const usize_t __primes[] =
1501{
1502 11, 19, 37, 73, 109,
1503 163, 251, 367, 557, 823,
1504 1237, 1861, 2777, 4177, 6247,
1505 9371, 14057, 21089, 31627, 47431,
1506 71143, 106721, 160073, 240101, 360163,
1507 540217, 810343, 1215497, 1823231, 2734867,
1508 4102283, 6153409, 9230113, 13845163, 20767711,
1509 31151543, 46727321, 70090921, 105136301, 157704401,
1510 236556601, 354834919, 532252367, 798378509, 1197567719,
1511 1796351503
1512};
1513
1514static const usize_t __nprimes = SIZEOF_ARRAY (__primes);
1515#endif
1516
1517static INLINE uint32_t
1518xd3_checksum_hash (const xd3_hash_cfg *cfg, const uint32_t cksum)
1519{
1520#if HASH_PRIME
1521 /* If the table is prime compute the modulus. */
1522 return (cksum % cfg->size);
1523#else
1524 /* If the table is power-of-two compute the mask.*/
1525 return (cksum ^ (cksum >> cfg->shift)) & cfg->mask;
1526#endif
1527}
1528
1529/******************************************************************************************
1530 Create the hash table.
1531 ******************************************************************************************/
1532
1533static INLINE void
1534xd3_swap_uint8p (uint8_t** p1, uint8_t** p2)
1535{
1536 uint8_t *t = (*p1);
1537 (*p1) = (*p2);
1538 (*p2) = t;
1539}
1540
1541static INLINE void
1542xd3_swap_usize_t (usize_t* p1, usize_t* p2)
1543{
1544 usize_t t = (*p1);
1545 (*p1) = (*p2);
1546 (*p2) = t;
1547}
1548
1549/* It's not constant time, but it computes the log. */
1550static int
1551xd3_check_pow2 (usize_t value, usize_t *logof)
1552{
1553 usize_t x = 1;
1554 usize_t nolog;
1555 if (logof == NULL) {
1556 logof = &nolog;
1557 }
1558
1559 *logof = 0;
1560
1561 for (; x != 0; x <<= 1, *logof += 1)
1562 {
1563 if (x == value)
1564 {
1565 return 0;
1566 }
1567 }
1568
1569 return EINVAL;
1570}
1571
1572static usize_t
1573xd3_round_blksize (usize_t sz, usize_t blksz)
1574{
1575 usize_t mod = sz & (blksz-1);
1576
1577 XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0);
1578
1579 return mod ? (sz + (blksz - mod)) : sz;
1580}
1581
1582#if XD3_ENCODER
1583#if !HASH_PRIME
1584static usize_t
1585xd3_size_log2 (usize_t slots)
1586{
1587 int bits = 28; /* This should not be an unreasonable limit. */
1588 int i;
1589
1590 for (i = 3; i <= bits; i += 1)
1591 {
1592 if (slots < (1 << i))
1593 {
1594 bits = i-1;
1595 break;
1596 }
1597 }
1598
1599 return bits;
1600}
1601#endif
1602
1603static void
1604xd3_size_hashtable (xd3_stream *stream,
1605 usize_t space,
1606 xd3_hash_cfg *cfg)
1607{
1608 usize_t slots = space / sizeof (usize_t);
1609
1610 /* initialize ctable: the number of hash buckets is computed from the table of primes or
1611 * the nearest power-of-two, in both cases rounding down in favor of using less
1612 * memory. */
1613
1614#if HASH_PRIME
1615 usize_t i;
1616
1617 cfg->size = __primes[__nprimes-1];
1618
1619 for (i = 1; i < __nprimes; i += 1)
1620 {
1621 if (slots < __primes[i])
1622 {
1623 cfg->size = __primes[i-1];
1624 break;
1625 }
1626 }
1627#else
1628 int bits = xd3_size_log2 (slots);
1629
1630 cfg->size = (1 << bits);
1631 cfg->mask = (cfg->size - 1);
1632 cfg->shift = min (32 - bits, 16);
1633#endif
1634}
1635#endif
1636
1637/******************************************************************************************
1638 Cksum function
1639 ******************************************************************************************/
1640
1641/* OPT: It turns out that the compiler can't unroll the loop as well as you can by hand. */
1642static INLINE uint32_t
1643xd3_lcksum (const uint8_t *seg, const int ln)
1644{
1645 int i = 0;
1646 uint32_t low = 0;
1647 uint32_t high = 0;
1648
1649 for (; i < ln; i += 1)
1650 {
1651 low += PERMUTE(*seg++);
1652 high += low;
1653 }
1654
1655 return ((high & 0xffff) << 16) | (low & 0xffff);
1656}
1657
1658#if ARITH_SMALL_CKSUM
1659static INLINE usize_t
1660xd3_scksum (const uint8_t *seg, const int ln)
1661{
1662 usize_t c;
1663 /* The -1 is because UPDATE operates on seg[1..ln] */
1664 SMALL_CKSUM_UPDATE (c,(seg-1),ln);
1665 return c;
1666}
1667#else
1668#define xd3_scksum(seg,ln) xd3_lcksum(seg,ln)
1669#endif
1670
1671/******************************************************************************************
1672 Adler32 stream function: code copied from Zlib, defined in RFC1950
1673 ******************************************************************************************/
1674
1675#define A32_BASE 65521L /* Largest prime smaller than 2^16 */
1676#define A32_NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
1677
1678#define A32_DO1(buf,i) {s1 += buf[i]; s2 += s1;}
1679#define A32_DO2(buf,i) A32_DO1(buf,i); A32_DO1(buf,i+1);
1680#define A32_DO4(buf,i) A32_DO2(buf,i); A32_DO2(buf,i+2);
1681#define A32_DO8(buf,i) A32_DO4(buf,i); A32_DO4(buf,i+4);
1682#define A32_DO16(buf) A32_DO8(buf,0); A32_DO8(buf,8);
1683
1684static unsigned long adler32 (unsigned long adler, const uint8_t *buf, usize_t len)
1685{
1686 unsigned long s1 = adler & 0xffff;
1687 unsigned long s2 = (adler >> 16) & 0xffff;
1688 int k;
1689
1690 while (len > 0)
1691 {
1692 k = (len < A32_NMAX) ? len : A32_NMAX;
1693 len -= k;
1694
1695 while (k >= 16)
1696 {
1697 A32_DO16(buf);
1698 buf += 16;
1699 k -= 16;
1700 }
1701
1702 if (k != 0)
1703 {
1704 do
1705 {
1706 s1 += *buf++;
1707 s2 += s1;
1708 }
1709 while (--k);
1710 }
1711
1712 s1 %= A32_BASE;
1713 s2 %= A32_BASE;
1714 }
1715
1716 return (s2 << 16) | s1;
1717}
1718
1719/******************************************************************************************
1720 Run-length function
1721 ******************************************************************************************/
1722
1723static INLINE int
1724xd3_comprun (const uint8_t *seg, int slook, uint8_t *run_cp)
1725{
1726 int i;
1727 int run_l = 0;
1728 uint8_t run_c = 0;
1729
1730 for (i = 0; i < slook; i += 1)
1731 {
1732 NEXTRUN(seg[i]);
1733 }
1734
1735 (*run_cp) = run_c;
1736
1737 return run_l;
1738}
1739
1740/******************************************************************************************
1741 Basic encoder/decoder functions
1742 ******************************************************************************************/
1743
1744static int
1745xd3_decode_byte (xd3_stream *stream, uint *val)
1746{
1747 if (stream->avail_in == 0)
1748 {
1749 stream->msg = "further input required";
1750 return XD3_INPUT;
1751 }
1752
1753 (*val) = stream->next_in[0];
1754
1755 DECODE_INPUT (1);
1756 return 0;
1757}
1758
1759static int
1760xd3_decode_bytes (xd3_stream *stream, uint8_t *buf, usize_t *pos, usize_t size)
1761{
1762 usize_t want;
1763 usize_t take;
1764
1765 /* Note: The case where (*pos == size) happens when a zero-length appheader or code
1766 * table is transmitted, but there is nothing in the standard against that. */
1767
1768 while (*pos < size)
1769 {
1770 if (stream->avail_in == 0)
1771 {
1772 stream->msg = "further input required";
1773 return XD3_INPUT;
1774 }
1775
1776 want = size - *pos;
1777 take = min (want, stream->avail_in);
1778
1779 memcpy (buf + *pos, stream->next_in, take);
1780
1781 DECODE_INPUT (take);
1782 (*pos) += take;
1783 }
1784
1785 return 0;
1786}
1787
1788#if XD3_ENCODER
1789static int
1790xd3_emit_byte (xd3_stream *stream,
1791 xd3_output **outputp,
1792 uint8_t code)
1793{
1794 xd3_output *output = (*outputp);
1795
1796 if (output->next == output->avail)
1797 {
1798 xd3_output *aoutput;
1799
1800 if ((aoutput = xd3_alloc_output (stream, output)) == NULL)
1801 {
1802 return ENOMEM;
1803 }
1804
1805 output = (*outputp) = aoutput;
1806 }
1807
1808 output->base[output->next++] = code;
1809
1810 return 0;
1811}
1812
1813static int
1814xd3_emit_bytes (xd3_stream *stream,
1815 xd3_output **outputp,
1816 const uint8_t *base,
1817 usize_t size)
1818{
1819 xd3_output *output = (*outputp);
1820
1821 do
1822 {
1823 usize_t take;
1824
1825 if (output->next == output->avail)
1826 {
1827 xd3_output *aoutput;
1828
1829 if ((aoutput = xd3_alloc_output (stream, output)) == NULL)
1830 {
1831 return ENOMEM;
1832 }
1833
1834 output = (*outputp) = aoutput;
1835 }
1836
1837 take = min (output->avail - output->next, size);
1838
1839 memcpy (output->base + output->next, base, take);
1840
1841 output->next += take;
1842 size -= take;
1843 base += take;
1844 }
1845 while (size > 0);
1846
1847 return 0;
1848}
1849#endif /* XD3_ENCODER */
1850
1851/******************************************************************************************
1852 Integer encoder/decoder functions
1853 ******************************************************************************************/
1854
1855#define DECODE_INTEGER_TYPE(PART,OFLOW) \
1856 while (stream->avail_in != 0) \
1857 { \
1858 uint next = stream->next_in[0]; \
1859 \
1860 DECODE_INPUT(1); \
1861 \
1862 if (PART & OFLOW) \
1863 { \
1864 stream->msg = "overflow in decode_integer"; \
1865 return EINVAL; \
1866 } \
1867 \
1868 PART = (PART << 7) | (next & 127); \
1869 \
1870 if ((next & 128) == 0) \
1871 { \
1872 (*val) = PART; \
1873 PART = 0; \
1874 return 0; \
1875 } \
1876 } \
1877 \
1878 stream->msg = "further input required"; \
1879 return XD3_INPUT
1880
1881#define READ_INTEGER_TYPE(TYPE, OFLOW) \
1882 TYPE val = 0; \
1883 const uint8_t *inp = (*inpp); \
1884 uint next; \
1885 \
1886 do \
1887 { \
1888 if (inp == max) \
1889 { \
1890 stream->msg = "end-of-input in read_integer"; \
1891 return EINVAL; \
1892 } \
1893 \
1894 if (val & OFLOW) \
1895 { \
1896 stream->msg = "overflow in read_intger"; \
1897 return EINVAL; \
1898 } \
1899 \
1900 next = (*inp++); \
1901 val = (val << 7) | (next & 127); \
1902 } \
1903 while (next & 128); \
1904 \
1905 (*valp) = val; \
1906 (*inpp) = inp; \
1907 \
1908 return 0
1909
1910#define EMIT_INTEGER_TYPE() \
1911 /* max 64-bit value in base-7 encoding is 9.1 bytes */ \
1912 uint8_t buf[10]; \
1913 usize_t bufi = 10; \
1914 \
1915 XD3_ASSERT (num >= 0); \
1916 \
1917 /* This loop performs division and turns on all MSBs. */ \
1918 do \
1919 { \
1920 buf[--bufi] = (num & 127) | 128; \
1921 num >>= 7; \
1922 } \
1923 while (num != 0); \
1924 \
1925 /* Turn off MSB of the last byte. */ \
1926 buf[9] &= 127; \
1927 \
1928 XD3_ASSERT (bufi >= 0); \
1929 \
1930 return xd3_emit_bytes (stream, output, buf + bufi, 10 - bufi)
1931
1932#define IF_SIZEOF32(x) if (num < (1U << (7 * (x)))) return (x);
1933#define IF_SIZEOF64(x) if (num < (1ULL << (7 * (x)))) return (x);
1934
1935#if USE_UINT32
1936static uint
1937xd3_sizeof_uint32_t (uint32_t num)
1938{
1939 IF_SIZEOF32(1);
1940 IF_SIZEOF32(2);
1941 IF_SIZEOF32(3);
1942 IF_SIZEOF32(4);
1943
1944 return 5;
1945}
1946
1947static int
1948xd3_decode_uint32_t (xd3_stream *stream, uint32_t *val)
1949{ DECODE_INTEGER_TYPE (stream->dec_32part, UINT32_OFLOW_MASK); }
1950static int
1951xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, const uint8_t *max, uint32_t *valp)
1952{ READ_INTEGER_TYPE (uint32_t, UINT32_OFLOW_MASK); }
1953#if XD3_ENCODER
1954static int
1955xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num)
1956{ EMIT_INTEGER_TYPE (); }
1957#endif
1958#endif
1959
1960#if USE_UINT64
1961/* We only ever decode offsets, but the other three are part of the regression test
1962 * anyway. */
1963static int
1964xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val)
1965{ DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); }
1966#if REGRESSION_TEST
1967#if XD3_ENCODER
1968static int
1969xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num)
1970{ EMIT_INTEGER_TYPE (); }
1971#endif
1972static int
1973xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp, const uint8_t *max, uint64_t *valp)
1974{ READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); }
1975
1976static uint
1977xd3_sizeof_uint64_t (uint64_t num)
1978{
1979 IF_SIZEOF64(1);
1980 IF_SIZEOF64(2);
1981 IF_SIZEOF64(3);
1982 IF_SIZEOF64(4);
1983 IF_SIZEOF64(5);
1984 IF_SIZEOF64(6);
1985 IF_SIZEOF64(7);
1986 IF_SIZEOF64(8);
1987 IF_SIZEOF64(9);
1988
1989 return 10;
1990}
1991#endif
1992#endif
1993
1994/******************************************************************************************
1995 Debug instruction statistics
1996 ******************************************************************************************/
1997
1998#if XD3_DEBUG
1999static void
2000xd3_count_inst (xd3_stream *stream, uint code)
2001{
2002 IF_DEBUG1 ({
2003 if (stream->i_freqs == NULL &&
2004 (stream->i_freqs = xd3_alloc0 (stream, sizeof (stream->i_freqs[0]), 256)) == NULL) { abort (); }
2005
2006 stream->i_freqs[code] += 1;
2007 });
2008 stream->n_ibytes += 1;
2009}
2010
2011static void
2012xd3_count_mode (xd3_stream *stream, uint mode)
2013{
2014 IF_DEBUG1 ({
2015 if (stream->i_modes == NULL &&
2016 (stream->i_modes = xd3_alloc0 (stream, sizeof (stream->i_modes[0]), TOTAL_MODES (stream))) == NULL) { abort (); }
2017 stream->i_modes[mode] += 1;
2018 });
2019}
2020
2021static void
2022xd3_count_size (xd3_stream *stream, usize_t size)
2023{
2024 IF_DEBUG1({
2025 if (stream->i_sizes == NULL &&
2026 (stream->i_sizes = xd3_alloc0 (stream, sizeof (stream->i_sizes[0]), 64)) == NULL) { abort (); }
2027
2028 if (size < 64) { stream->i_sizes[size] += 1; }
2029 });
2030 stream->n_sbytes += xd3_sizeof_size (size);
2031}
2032#endif
2033
2034/******************************************************************************************
2035 Address cache stuff
2036 ******************************************************************************************/
2037
2038static int
2039xd3_alloc_cache (xd3_stream *stream)
2040{
2041 if (((stream->acache.s_near > 0) &&
2042 (stream->acache.near_array = xd3_alloc (stream, stream->acache.s_near, sizeof (usize_t))) == NULL) ||
2043 ((stream->acache.s_same > 0) &&
2044 (stream->acache.same_array = xd3_alloc (stream, stream->acache.s_same * 256, sizeof (usize_t))) == NULL))
2045 {
2046 return ENOMEM;
2047 }
2048
2049 return 0;
2050}
2051
2052static void
2053xd3_init_cache (xd3_addr_cache* acache)
2054{
2055 if (acache->s_near > 0)
2056 {
2057 memset (acache->near_array, 0, acache->s_near * sizeof (usize_t));
2058 acache->next_slot = 0;
2059 }
2060
2061 if (acache->s_same > 0)
2062 {
2063 memset (acache->same_array, 0, acache->s_same * 256 * sizeof (usize_t));
2064 }
2065}
2066
2067static void
2068xd3_update_cache (xd3_addr_cache* acache, usize_t addr)
2069{
2070 if (acache->s_near > 0)
2071 {
2072 acache->near_array[acache->next_slot] = addr;
2073 acache->next_slot = (acache->next_slot + 1) % acache->s_near;
2074 }
2075
2076 if (acache->s_same > 0)
2077 {
2078 acache->same_array[addr % (acache->s_same*256)] = addr;
2079 }
2080}
2081
2082#if XD3_ENCODER
2083/* OPT: this gets called a lot, can it be optimized? */
2084static int
2085xd3_encode_address (xd3_stream *stream, usize_t addr, usize_t here, uint8_t* mode)
2086{
2087 usize_t d, bestd;
2088 int i, bestm, ret;
2089 xd3_addr_cache* acache = & stream->acache;
2090
2091#define SMALLEST_INT(x) do { if (((x) & ~127) == 0) { goto good; } } while (0)
2092
2093 /* Attempt to find the address mode that yields the smallest integer value for "d", the
2094 * encoded address value, thereby minimizing the encoded size of the address. */
2095 bestd = addr;
2096 bestm = VCD_SELF;
2097
2098 XD3_ASSERT (addr < here);
2099
2100 SMALLEST_INT (bestd);
2101
2102 if ((d = here-addr) < bestd)
2103 {
2104 bestd = d;
2105 bestm = VCD_HERE;
2106
2107 SMALLEST_INT (bestd);
2108 }
2109
2110 for (i = 0; i < acache->s_near; i += 1)
2111 {
2112 d = addr - acache->near_array[i];
2113
2114 if (d >= 0 && d < bestd)
2115 {
2116 bestd = d;
2117 bestm = i+2; /* 2 counts the VCD_SELF, VCD_HERE modes */
2118
2119 SMALLEST_INT (bestd);
2120 }
2121 }
2122
2123 if (acache->s_same > 0 && acache->same_array[d = addr%(acache->s_same*256)] == addr)
2124 {
2125 bestd = d%256;
2126 bestm = acache->s_near + 2 + d/256; /* 2 + s_near offsets past the VCD_NEAR modes */
2127
2128 if ((ret = xd3_emit_byte (stream, & ADDR_TAIL (stream), bestd))) { return ret; }
2129 }
2130 else
2131 {
2132 good:
2133
2134 if ((ret = xd3_emit_size (stream, & ADDR_TAIL (stream), bestd))) { return ret; }
2135 }
2136
2137 xd3_update_cache (acache, addr);
2138
2139 IF_DEBUG (xd3_count_mode (stream, bestm));
2140
2141 (*mode) += bestm;
2142
2143 return 0;
2144}
2145#endif
2146
2147static int
2148xd3_decode_address (xd3_stream *stream, usize_t here, uint mode, const uint8_t **inpp, const uint8_t *max, uint32_t *valp)
2149{
2150 int ret;
2151 uint same_start = 2 + stream->acache.s_near;
2152
2153 if (mode < same_start)
2154 {
2155 if ((ret = xd3_read_size (stream, inpp, max, valp))) { return ret; }
2156
2157 switch (mode)
2158 {
2159 case VCD_SELF:
2160 break;
2161 case VCD_HERE:
2162 (*valp) = here - (*valp);
2163 break;
2164 default:
2165 (*valp) += stream->acache.near_array[mode - 2];
2166 break;
2167 }
2168 }
2169 else
2170 {
2171 if (*inpp == max)
2172 {
2173 stream->msg = "address underflow";
2174 return EINVAL;
2175 }
2176
2177 mode -= same_start;
2178
2179 (*valp) = stream->acache.same_array[mode*256 + (**inpp)];
2180
2181 (*inpp) += 1;
2182 }
2183
2184 xd3_update_cache (& stream->acache, *valp);
2185
2186 return 0;
2187}
2188
2189/******************************************************************************************
2190 Alloc/free
2191 ******************************************************************************************/
2192
2193static void*
2194__xd3_alloc_func (void* opaque, usize_t items, usize_t size)
2195{
2196 return malloc (items * size);
2197}
2198
2199static void
2200__xd3_free_func (void* opaque, void* address)
2201{
2202 free (address);
2203}
2204
2205static void*
2206xd3_alloc (xd3_stream *stream,
2207 usize_t elts,
2208 usize_t size)
2209{
2210 void *a = stream->alloc (stream->opaque, elts, size);
2211
2212 if (a != NULL)
2213 {
2214 IF_DEBUG (stream->alloc_cnt += 1);
2215 }
2216 else
2217 {
2218 stream->msg = "out of memory";
2219 }
2220
2221 return a;
2222}
2223
2224static void
2225xd3_free (xd3_stream *stream,
2226 void *ptr)
2227{
2228 if (ptr != NULL)
2229 {
2230 IF_DEBUG (stream->free_cnt += 1);
2231 XD3_ASSERT (stream->free_cnt <= stream->alloc_cnt);
2232 stream->free (stream->opaque, ptr);
2233 }
2234}
2235
2236#if XD3_ENCODER
2237static void*
2238xd3_alloc0 (xd3_stream *stream,
2239 usize_t elts,
2240 usize_t size)
2241{
2242 void *a = xd3_alloc (stream, elts, size);
2243
2244 if (a != NULL)
2245 {
2246 memset (a, 0, elts * size);
2247 }
2248
2249 return a;
2250}
2251
2252static xd3_output*
2253xd3_alloc_output (xd3_stream *stream,
2254 xd3_output *old_output)
2255{
2256 xd3_output *output;
2257 uint8_t *base;
2258
2259 if (stream->enc_free != NULL)
2260 {
2261 output = stream->enc_free;
2262 stream->enc_free = output->next_page;
2263 }
2264 else
2265 {
2266 if ((output = xd3_alloc (stream, 1, sizeof (xd3_output))) == NULL)
2267 {
2268 return NULL;
2269 }
2270
2271 if ((base = xd3_alloc (stream, XD3_ALLOCSIZE, sizeof (uint8_t))) == NULL)
2272 {
2273 xd3_free (stream, output);
2274 return NULL;
2275 }
2276
2277 output->base = base;
2278 output->avail = XD3_ALLOCSIZE;
2279 }
2280
2281 output->next = 0;
2282
2283 if (old_output)
2284 {
2285 old_output->next_page = output;
2286 }
2287
2288 output->next_page = NULL;
2289
2290 return output;
2291}
2292
2293static usize_t
2294xd3_sizeof_output (xd3_output *output)
2295{
2296 usize_t s = 0;
2297
2298 for (; output; output = output->next_page)
2299 {
2300 s += output->next;
2301 }
2302
2303 return s;
2304}
2305
2306static void
2307xd3_freelist_output (xd3_stream *stream,
2308 xd3_output *output)
2309{
2310 xd3_output *tmp;
2311
2312 while (output)
2313 {
2314 tmp = output;
2315 output = output->next_page;
2316
2317 tmp->next = 0;
2318 tmp->next_page = stream->enc_free;
2319 stream->enc_free = tmp;
2320 }
2321}
2322
2323static void
2324xd3_free_output (xd3_stream *stream,
2325 xd3_output *output)
2326{
2327 xd3_output *next;
2328
2329 again:
2330 if (output == NULL)
2331 {
2332 return;
2333 }
2334
2335 next = output->next_page;
2336
2337 xd3_free (stream, output->base);
2338 xd3_free (stream, output);
2339
2340 output = next;
2341 goto again;
2342}
2343#endif /* XD3_ENCODER */
2344
2345void
2346xd3_free_stream (xd3_stream *stream)
2347{
2348
2349 xd3_free (stream, stream->large_table);
2350 xd3_free (stream, stream->small_table);
2351 xd3_free (stream, stream->small_prev);
2352 xd3_free (stream, stream->iopt.buffer);
2353
2354#if XD3_ENCODER
2355 {
2356 int i;
2357 for (i = 0; i < ENC_SECTS; i += 1)
2358 {
2359 xd3_free_output (stream, stream->enc_heads[i]);
2360 }
2361 xd3_free_output (stream, stream->enc_free);
2362 }
2363#endif
2364
2365 xd3_free (stream, stream->acache.near_array);
2366 xd3_free (stream, stream->acache.same_array);
2367
2368 xd3_free (stream, stream->inst_sect.copied1);
2369 xd3_free (stream, stream->addr_sect.copied1);
2370 xd3_free (stream, stream->data_sect.copied1);
2371
2372 xd3_free (stream, stream->dec_buffer);
2373 xd3_free (stream, (uint8_t*) stream->dec_lastwin);
2374
2375 xd3_free (stream, stream->buf_in);
2376 xd3_free (stream, stream->dec_appheader);
2377 xd3_free (stream, stream->dec_codetbl);
2378 xd3_free (stream, stream->code_table_alloc);
2379
2380#if SECONDARY_ANY
2381 xd3_free (stream, stream->inst_sect.copied2);
2382 xd3_free (stream, stream->addr_sect.copied2);
2383 xd3_free (stream, stream->data_sect.copied2);
2384
2385 if (stream->sec_type != NULL)
2386 {
2387 stream->sec_type->destroy (stream, stream->sec_stream_d);
2388 stream->sec_type->destroy (stream, stream->sec_stream_i);
2389 stream->sec_type->destroy (stream, stream->sec_stream_a);
2390 }
2391#endif
2392
2393 IF_DEBUG (xd3_free (stream, stream->i_freqs));
2394 IF_DEBUG (xd3_free (stream, stream->i_modes));
2395 IF_DEBUG (xd3_free (stream, stream->i_sizes));
2396
2397 XD3_ASSERT (stream->alloc_cnt == stream->free_cnt);
2398
2399 memset (stream, 0, sizeof (xd3_stream));
2400}
2401
2402#if (XD3_DEBUG || VCDIFF_TOOLS)
2403static const char*
2404xd3_rtype_to_string (xd3_rtype type, int print_mode)
2405{
2406 switch (type)
2407 {
2408 case XD3_NOOP:
2409 return "NOOP ";
2410 case XD3_RUN:
2411 return "RUN ";
2412 case XD3_ADD:
2413 return "ADD ";
2414 default: break;
2415 }
2416 if (! print_mode)
2417 {
2418 return "CPY ";
2419 }
2420 switch (type)
2421 {
2422 case XD3_CPY + 0: return "CPY_0";
2423 case XD3_CPY + 1: return "CPY_1";
2424 case XD3_CPY + 2: return "CPY_2";
2425 case XD3_CPY + 3: return "CPY_3";
2426 case XD3_CPY + 4: return "CPY_4";
2427 case XD3_CPY + 5: return "CPY_5";
2428 case XD3_CPY + 6: return "CPY_6";
2429 case XD3_CPY + 7: return "CPY_7";
2430 case XD3_CPY + 8: return "CPY_8";
2431 case XD3_CPY + 9: return "CPY_9";
2432 default: return "CPY>9";
2433 }
2434}
2435#endif
2436
2437/******************************************************************************************
2438 Stream configuration
2439 ******************************************************************************************/
2440
2441int
2442xd3_config_stream(xd3_stream *stream,
2443 xd3_config *config)
2444{
2445 int ret;
2446 xd3_config defcfg;
2447 const xd3_smatcher* smatcher;
2448
2449 if (config == NULL)
2450 {
2451 config = & defcfg;
2452 memset (config, 0, sizeof (*config));
2453 }
2454
2455 /* Initial setup: no error checks yet */
2456 memset (stream, 0, sizeof (*stream));
2457
2458 stream->memsize = config->memsize ? config->memsize : XD3_DEFAULT_MEMSIZE;
2459 stream->winsize = config->winsize ? config->winsize : XD3_DEFAULT_WINSIZE;
2460 stream->sprevsz = config->sprevsz ? config->sprevsz : XD3_DEFAULT_SPREVSZ;
2461 stream->srcwin_size = config->srcwin_size ? config->srcwin_size : XD3_DEFAULT_START_CKSUM_ADVANCE;
2462 stream->srcwin_maxsz = config->srcwin_maxsz ? config->srcwin_maxsz : XD3_DEFAULT_MAX_CKSUM_ADVANCE;
2463 stream->iopt_size = config->iopt_size ? config->iopt_size : XD3_DEFAULT_IOPT_SIZE;
2464 stream->getblk = config->getblk;
2465 stream->alloc = config->alloc ? config->alloc : __xd3_alloc_func;
2466 stream->free = config->freef ? config->freef : __xd3_free_func;
2467 stream->opaque = config->opaque;
2468 stream->flags = config->flags;
2469
2470 XD3_ASSERT (stream->winsize > 0);
2471
2472 /* Secondary setup. */
2473 stream->sec_data = config->sec_data;
2474 stream->sec_inst = config->sec_inst;
2475 stream->sec_addr = config->sec_addr;
2476
2477 stream->sec_data.data_type = DATA_SECTION;
2478 stream->sec_inst.data_type = INST_SECTION;
2479 stream->sec_addr.data_type = ADDR_SECTION;
2480
2481 /* Check static sizes. */
2482 if (sizeof (usize_t) != SIZEOF_USIZE_T ||
2483 sizeof (xoff_t) != SIZEOF_XOFF_T ||
2484 (ret = xd3_check_pow2(XD3_ALLOCSIZE, NULL)))
2485 {
2486 stream->msg = "incorrect compilation: wrong integer sizes";
2487 return EINVAL;
2488 }
2489
2490 /* Check/set secondary compressor. */
2491 switch (stream->flags & XD3_SEC_TYPE)
2492 {
2493 case 0:
2494 if (stream->flags & XD3_SEC_OTHER)
2495 {
2496 stream->msg = "XD3_SEC flags require a secondary compressor type";
2497 return EINVAL;
2498 }
2499 break;
2500 case XD3_SEC_FGK:
2501 FGK_CASE (stream);
2502 case XD3_SEC_DJW:
2503 DJW_CASE (stream);
2504 default:
2505 stream->msg = "too many secondary compressor types set";
2506 return EINVAL;
2507 }
2508
2509 /* Check/set encoder code table. */
2510 switch (stream->flags & XD3_ALT_CODE_TABLE) {
2511 case 0:
2512 stream->code_table_desc = & __rfc3284_code_table_desc;
2513 stream->code_table_func = xd3_rfc3284_code_table;
2514 break;
2515#if GENERIC_ENCODE_TABLES
2516 case XD3_ALT_CODE_TABLE:
2517 stream->code_table_desc = & __alternate_code_table_desc;
2518 stream->code_table_func = xd3_alternate_code_table;
2519 stream->comp_table_func = xd3_compute_alternate_table_encoding;
2520 break;
2521#endif
2522 default:
2523 stream->msg = "alternate code table support was not compiled";
2524 return EINVAL;
2525 }
2526
2527 /* Check sprevsz */
2528 if (config->small_chain == 1)
2529 {
2530 stream->sprevsz = 0;
2531 }
2532 else
2533 {
2534 if ((ret = xd3_check_pow2 (stream->sprevsz, NULL)))
2535 {
2536 stream->msg = "sprevsz is required to be a power of two";
2537 return EINVAL;
2538 }
2539
2540 stream->sprevmask = stream->sprevsz - 1;
2541 }
2542
2543 /* Default scanner settings. */
2544 switch (config->smatch_cfg)
2545 {
2546 IF_BUILD_SOFT(case XD3_SMATCH_SOFT:
2547 smatcher = & __smatcher_soft; break;
2548
2549 if (config->large_look < MIN_MATCH ||
2550 config->large_step < 1 ||
2551 config->small_look < MIN_MATCH ||
2552 config->small_chain < 1 ||
2553 config->large_look < config->small_look ||
2554 config->small_chain < config->small_lchain ||
2555 (config->small_lchain == 0 && config->try_lazy) ||
2556 config->srcwin_size < stream->large_look ||
2557 config->srcwin_maxsz < stream->srcwin_size)
2558 {
2559 stream->msg = "invalid soft string-match config";
2560 return EINVAL;
2561 }
2562 break;)
2563
2564 IF_BUILD_SLOW(case XD3_SMATCH_DEFAULT:)
2565 IF_BUILD_SLOW(case XD3_SMATCH_SLOW: smatcher = & __smatcher_slow; break;)
2566 IF_BUILD_FAST(case XD3_SMATCH_FAST: smatcher = & __smatcher_fast; break;)
2567 default:
2568 stream->msg = "invalid string match config type";
2569 return EINVAL;
2570 }
2571
2572 stream->string_match = smatcher->string_match;
2573 XD3_ASSERT(stream->string_match);
2574
2575 XD3_COPY_CONFIG_FIELDS (stream, smatcher);
2576
2577 /* If it is a soft config, the smatcher fields didn't set anything, copy from config
2578 * instead. */
2579 if (stream->large_look == 0)
2580 {
2581 XD3_COPY_CONFIG_FIELDS (stream, config);
2582 }
2583
2584 IF_DEBUG1 (P(RINT "[stream cfg] llook %u lstep %u slook %u\n",
2585 stream->large_look, stream->large_step, stream->small_look));
2586 return 0;
2587}
2588
2589/******************************************************************************************
2590 Getblk interface
2591 ******************************************************************************************/
2592
2593/* This function interfaces with the client getblk function, checks its results, etc. */
2594static int
2595xd3_getblk (xd3_stream *stream/*, xd3_source *source*/, xoff_t blkno)
2596{
2597 int ret;
2598 xd3_source *source = stream->src;
2599
2600 if (blkno >= source->blocks)
2601 {
2602 stream->msg = "source file too short";
2603 return EINVAL;
2604 }
2605
2606 if (blkno != source->curblkno || source->curblk == NULL)
2607 {
2608 XD3_ASSERT (source->curblk != NULL || blkno != source->curblkno);
2609
2610 source->getblkno = blkno;
2611
2612 if (stream->getblk == NULL)
2613 {
2614 stream->msg = "getblk source input";
2615 return XD3_GETSRCBLK;
2616 }
2617 else if ((ret = stream->getblk (stream, source, blkno)) != 0)
2618 {
2619 stream->msg = "getblk failed";
2620 return ret;
2621 }
2622
2623 XD3_ASSERT (source->curblk != NULL);
2624 }
2625
2626 if (source->onblk != xd3_bytes_on_srcblk (source, blkno))
2627 {
2628 stream->msg = "getblk returned short block";
2629 return EINVAL;
2630 }
2631
2632 return 0;
2633}
2634
2635/******************************************************************************************
2636 Stream open/close
2637 ******************************************************************************************/
2638
2639int
2640xd3_set_source (xd3_stream *stream,
2641 xd3_source *src)
2642{
2643 xoff_t blk_num;
2644 xoff_t tail_size;
2645
2646 IF_DEBUG1 (P(RINT "[set source] size %"Q"u\n", src->size));
2647
2648 if (src == NULL || src->size < stream->large_look) { return 0; }
2649
2650 stream->src = src;
2651 blk_num = src->size / src->blksize;
2652 tail_size = src->size % src->blksize;
2653 src->blocks = blk_num + (tail_size > 0);
2654 src->srclen = 0;
2655 src->srcbase = 0;
2656
2657 return 0;
2658}
2659
2660void
2661xd3_abort_stream (xd3_stream *stream)
2662{
2663 stream->dec_state = DEC_ABORTED;
2664 stream->enc_state = ENC_ABORTED;
2665}
2666
2667int
2668xd3_close_stream (xd3_stream *stream)
2669{
2670 if (stream->enc_state != 0 && stream->enc_state != ENC_ABORTED)
2671 {
2672 /* If encoding, should be ready for more input but not actually have any. */
2673 if (stream->enc_state != ENC_INPUT || stream->avail_in != 0)
2674 {
2675 stream->msg = "encoding is incomplete";
2676 return EINVAL;
2677 }
2678 }
2679 else
2680 {
2681 switch (stream->dec_state)
2682 {
2683 case DEC_VCHEAD:
2684 case DEC_WININD:
2685 /* TODO: Address the zero-byte ambiguity. Does the encoder emit a window or
2686 * not? If so, then catch an error here. If not, need another routine to say
2687 * decode_at_least_one_if_empty. */
2688 case DEC_ABORTED:
2689 break;
2690 default:
2691 /* If decoding, should be ready for the next window. */
2692 stream->msg = "EOF in decode";
2693 return EINVAL;
2694 }
2695 }
2696
2697 return 0;
2698}
2699
2700/******************************************************************************************
2701 Application header
2702 ******************************************************************************************/
2703
2704int
2705xd3_get_appheader (xd3_stream *stream,
2706 uint8_t **data,
2707 usize_t *size)
2708{
2709 if (stream->dec_state < DEC_WININD)
2710 {
2711 stream->msg = "application header not available";
2712 return EINVAL;
2713 }
2714
2715 (*data) = stream->dec_appheader;
2716 (*size) = stream->dec_appheadsz;
2717 return 0;
2718}
2719
2720#if XD3_ENCODER
2721void
2722xd3_set_appheader (xd3_stream *stream,
2723 const uint8_t *data,
2724 usize_t size)
2725{
2726 stream->enc_appheader = data;
2727 stream->enc_appheadsz = size;
2728}
2729
2730/******************************************************************************************
2731 Encoder stuff
2732 ******************************************************************************************/
2733
2734#if XD3_DEBUG
2735static int
2736xd3_iopt_check (xd3_stream *stream)
2737{
2738 int ul = xd3_rlist_length (& stream->iopt.used);
2739 int fl = xd3_rlist_length (& stream->iopt.free);
2740
2741 return (ul + fl + (stream->iout ? 1 : 0)) == stream->iopt_size;
2742}
2743#endif
2744
2745static xd3_rinst*
2746xd3_iopt_free (xd3_stream *stream, xd3_rinst *i)
2747{
2748 xd3_rinst *n = xd3_rlist_remove (i);
2749 xd3_rlist_push_back (& stream->iopt.free, i);
2750 return n;
2751}
2752
2753static void
2754xd3_iopt_free_nonadd (xd3_stream *stream, xd3_rinst *i)
2755{
2756 if (i->type != XD3_ADD)
2757 {
2758 xd3_rlist_push_back (& stream->iopt.free, i);
2759 }
2760}
2761
2762/* When an instruction is ready to flush from the iopt buffer, this function is called to
2763 * produce an encoding. It writes the instruction plus size, address, and data to the
2764 * various encoding sections. */
2765static int
2766xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
2767{
2768 int ret;
2769
2770 /* Check for input overflow. */
2771 XD3_ASSERT (inst->pos + inst->size <= stream->avail_in);
2772
2773 switch (inst->type)
2774 {
2775 case XD3_CPY:
2776 {
2777 /* the address may have an offset if there is a source window. */
2778 usize_t addr;
2779 xd3_source *src = stream->src;
2780
2781 if (src != NULL)
2782 {
2783 /* If there is a source copy, the source must have its source window decided
2784 * before we can encode. This can be bad -- we have to make this decision
2785 * even if no source matches have been found. */
2786 if (stream->srcwin_decided == 0)
2787 {
2788 if ((ret = xd3_srcwin_setup (stream))) { return ret; }
2789 }
2790
2791 /* xtra field indicates the copy is from the source */
2792 if (inst->xtra)
2793 {
2794 XD3_ASSERT (inst->addr >= src->srcbase);
2795 XD3_ASSERT (inst->addr + inst->size <= src->srcbase + src->srclen);
2796 addr = (inst->addr - src->srcbase);
2797 }
2798 else
2799 {
2800 /* with source window: target copy address is offset by taroff. */
2801 addr = stream->taroff + (usize_t) inst->addr;
2802 }
2803 }
2804 else
2805 {
2806 addr = (usize_t) inst->addr;
2807 }
2808
2809 XD3_ASSERT (inst->size >= MIN_MATCH);
2810
2811 /* the "here" position is always offset by taroff */
2812 if ((ret = xd3_encode_address (stream, addr, inst->pos + stream->taroff, & inst->type)))
2813 {
2814 return ret;
2815 }
2816
2817 IF_DEBUG (stream->n_cpy += 1);
2818 IF_DEBUG (stream->l_cpy += inst->size);
2819
2820 IF_DEBUG1 ({
2821 static int cnt;
2822 P(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %u\n",
2823 cnt++,
2824 stream->total_in + inst->pos,
2825 stream->total_in + inst->pos + inst->size,
2826 inst->addr, inst->addr + inst->size, inst->size);
2827 });
2828 break;
2829 }
2830 case XD3_RUN:
2831 {
2832 XD3_ASSERT (inst->size >= MIN_MATCH);
2833
2834 if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { return ret; }
2835
2836 IF_DEBUG (stream->n_run += 1);
2837 IF_DEBUG (stream->l_run += inst->size);
2838 IF_DEBUG (stream->n_dbytes += 1);
2839
2840 IF_DEBUG1 ({
2841 static int cnt;
2842 P(RINT "[iopt run:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size);
2843 });
2844 break;
2845 }
2846 case XD3_ADD:
2847 {
2848 if ((ret = xd3_emit_bytes (stream, & DATA_TAIL (stream),
2849 stream->next_in + inst->pos, inst->size))) { return ret; }
2850
2851 IF_DEBUG (stream->n_add += 1);
2852 IF_DEBUG (stream->l_add += inst->size);
2853 IF_DEBUG (stream->n_dbytes += inst->size);
2854
2855 IF_DEBUG1 ({
2856 static int cnt;
2857 P(RINT "[iopt add:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size);
2858 });
2859
2860 break;
2861 }
2862 }
2863
2864 /* This is the only place stream->unencoded_offset is incremented. */
2865 XD3_ASSERT (stream->unencoded_offset == inst->pos);
2866 stream->unencoded_offset += inst->size;
2867
2868 IF_DEBUG (stream->n_emit += inst->size);
2869
2870 inst->code2 = 0;
2871
2872 XD3_CHOOSE_INSTRUCTION (stream, stream->iout, inst);
2873
2874 if (stream->iout != NULL)
2875 {
2876 if (stream->iout->code2 != 0)
2877 {
2878 if ((ret = xd3_emit_double (stream, stream->iout, inst, stream->iout->code2))) { return ret; }
2879
2880 xd3_iopt_free_nonadd (stream, stream->iout);
2881 xd3_iopt_free_nonadd (stream, inst);
2882 stream->iout = NULL;
2883 return 0;
2884 }
2885 else
2886 {
2887 if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; }
2888
2889 xd3_iopt_free_nonadd (stream, stream->iout);
2890 }
2891 }
2892
2893 stream->iout = inst;
2894
2895 return 0;
2896}
2897
2898/* This possibly encodes an add instruction, iadd, which must remain on the stack until
2899 * the following call to xd3_iopt_finish_encoding. */
2900static int
2901xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd)
2902{
2903 int ret;
2904 usize_t off = stream->unencoded_offset;
2905
2906 if (pos > off)
2907 {
2908 iadd->type = XD3_ADD;
2909 iadd->pos = off;
2910 iadd->size = pos - off;
2911
2912 if ((ret = xd3_iopt_finish_encoding (stream, iadd))) { return ret; }
2913 }
2914
2915 return 0;
2916}
2917
2918/* This function calls xd3_iopt_finish_encoding to finish encoding an instruction, and it
2919 * may also produce an add instruction for an unmatched region. */
2920static int
2921xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst)
2922{
2923 int ret;
2924 xd3_rinst iadd;
2925
2926 if ((ret = xd3_iopt_add (stream, inst->pos, & iadd))) { return ret; }
2927
2928 if ((ret = xd3_iopt_finish_encoding (stream, inst))) { return ret; }
2929
2930 return 0;
2931}
2932
2933/* Generates a final add instruction to encode the remaining input. */
2934static int
2935xd3_iopt_add_finalize (xd3_stream *stream)
2936{
2937 int ret;
2938 xd3_rinst iadd;
2939
2940 if ((ret = xd3_iopt_add (stream, stream->avail_in, & iadd))) { return ret; }
2941
2942 if (stream->iout)
2943 {
2944 if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; }
2945
2946 xd3_iopt_free_nonadd (stream, stream->iout);
2947 stream->iout = NULL;
2948 }
2949
2950 return 0;
2951}
2952
2953/* Compact the instruction buffer by choosing the best non-overlapping instructions when
2954 * lazy string-matching. There are no ADDs in the iopt buffer because those are
2955 * synthesized in xd3_iopt_add_encoding and during xd3_iopt_add_finalize. */
2956static int
2957xd3_iopt_flush_instructions (xd3_stream *stream, int force)
2958{
2959 xd3_rinst *r1 = xd3_rlist_front (& stream->iopt.used);
2960 xd3_rinst *r2;
2961 xd3_rinst *r3;
2962 usize_t r1end;
2963 usize_t r2end;
2964 usize_t r2off;
2965 usize_t r2moff;
2966 usize_t gap;
2967 usize_t flushed;
2968 int ret;
2969
2970 XD3_ASSERT (xd3_iopt_check (stream));
2971
2972 /* Note: once tried to skip this step if it's possible to assert there are no
2973 * overlapping instructions. Doesn't work because xd3_opt_erase leaves overlapping
2974 * instructions. */
2975 while (! xd3_rlist_end (& stream->iopt.used, r1) &&
2976 ! xd3_rlist_end (& stream->iopt.used, r2 = xd3_rlist_next (r1)))
2977 {
2978 r1end = r1->pos + r1->size;
2979
2980 /* If the instructions do not overlap, continue. */
2981 if (r1end <= r2->pos)
2982 {
2983 r1 = r2;
2984 continue;
2985 }
2986
2987 r2end = r2->pos + r2->size;
2988
2989 /* The min_match adjustments prevent this. */
2990 XD3_ASSERT (r2end > (r1end + LEAST_MATCH_INCR));
2991
2992 /* If r3 is available... */
2993 if (! xd3_rlist_end (& stream->iopt.used, r3 = xd3_rlist_next (r2)))
2994 {
2995 /* If r3 starts before r1 finishes or just about, r2 is irrelevant */
2996 if (r3->pos <= r1end + 1)
2997 {
2998 xd3_iopt_free (stream, r2);
2999 continue;
3000 }
3001 }
3002 else if (! force)
3003 {
3004 /* Unless force, end the loop when r3 is not available. */
3005 break;
3006 }
3007
3008 r2off = r2->pos - r1->pos;
3009 r2moff = r2end - r1end;
3010 gap = r2end - r1->pos;
3011
3012 /* If the two matches overlap almost entirely, choose the better match and discard
3013 * the other. This heuristic is BLACK MAGIC. Havesomething better? */
3014 if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2)
3015 {
3016 /* Only one match should be used, choose the longer one. */
3017 if (r1->size < r2->size)
3018 {
3019 xd3_iopt_free (stream, r1);
3020 r1 = r2;
3021 }
3022 else
3023 {
3024 /* We are guaranteed that r1 does not overlap now, so advance past r2 */
3025 r1 = xd3_iopt_free (stream, r2);
3026 }
3027 continue;
3028 }
3029 else
3030 {
3031 /* Shorten one of the instructions -- could be optimized based on the address
3032 * cache. */
3033 usize_t average;
3034 usize_t newsize;
3035 usize_t adjust1;
3036
3037 XD3_ASSERT (r1end > r2->pos && r2end > r1->pos);
3038
3039 /* Try to balance the length of both instructions, but avoid making both longer
3040 * than MAX_MATCH_SPLIT . */
3041 average = (gap) / 2;
3042 newsize = min (MAX_MATCH_SPLIT, gap - average);
3043
3044 /* Should be possible to simplify this code. */
3045 if (newsize > r1->size)
3046 {
3047 /* shorten r2 */
3048 adjust1 = r1end - r2->pos;
3049 }
3050 else if (newsize > r2->size)
3051 {
3052 /* shorten r1 */
3053 adjust1 = r1end - r2->pos;
3054
3055 XD3_ASSERT (r1->size > adjust1);
3056
3057 r1->size -= adjust1;
3058
3059 /* don't shorten r2 */
3060 adjust1 = 0;
3061 }
3062 else
3063 {
3064 /* shorten r1 */
3065 adjust1 = r1->size - newsize;
3066
3067 if (r2->pos > r1end - adjust1)
3068 {
3069 adjust1 -= r2->pos - (r1end - adjust1);
3070 }
3071
3072 XD3_ASSERT (r1->size > adjust1);
3073
3074 r1->size -= adjust1;
3075
3076 /* shorten r2 */
3077 XD3_ASSERT (r1->pos + r1->size >= r2->pos);
3078
3079 adjust1 = r1->pos + r1->size - r2->pos;
3080 }
3081
3082 /* Fallthrough above if-else, shorten r2 */
3083 XD3_ASSERT (r2->size > adjust1);
3084
3085 r2->size -= adjust1;
3086 r2->pos += adjust1;
3087 r2->addr += adjust1;
3088
3089 XD3_ASSERT (r1->size >= MIN_MATCH);
3090 XD3_ASSERT (r2->size >= MIN_MATCH);
3091
3092 r1 = r2;
3093 }
3094 }
3095
3096 XD3_ASSERT (xd3_iopt_check (stream));
3097
3098 /* If forcing, pick instructions until the list is empty, otherwise this empties 50% of
3099 * the queue. */
3100 for (flushed = 0; ! xd3_rlist_empty (& stream->iopt.used); )
3101 {
3102 xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt.used);
3103 if ((ret = xd3_iopt_add_encoding (stream, renc)))
3104 {
3105 return ret;
3106 }
3107
3108 if (! force)
3109 {
3110 if (++flushed > stream->iopt_size / 2)
3111 {
3112 break;
3113 }
3114
3115 /* If there are only two instructions remaining, break, because they were
3116 * not optimized. This means there were more than 50% eliminated by the
3117 * loop above. */
3118 r1 = xd3_rlist_front (& stream->iopt.used);
3119 if (xd3_rlist_end(& stream->iopt.used, r1) ||
3120 xd3_rlist_end(& stream->iopt.used, r2 = xd3_rlist_next (r1)) ||
3121 xd3_rlist_end(& stream->iopt.used, r3 = xd3_rlist_next (r2)))
3122 {
3123 break;
3124 }
3125 }
3126 }
3127
3128 XD3_ASSERT (xd3_iopt_check (stream));
3129
3130 XD3_ASSERT (!force || xd3_rlist_length (& stream->iopt.used) == 0);
3131
3132 return 0;
3133}
3134
3135static int
3136xd3_iopt_get_slot (xd3_stream *stream, xd3_rinst** iptr)
3137{
3138 xd3_rinst *i;
3139 int ret;
3140
3141 if (xd3_rlist_empty (& stream->iopt.free))
3142 {
3143 if ((ret = xd3_iopt_flush_instructions (stream, 0))) { return ret; }
3144
3145 XD3_ASSERT (! xd3_rlist_empty (& stream->iopt.free));
3146 }
3147
3148 i = xd3_rlist_pop_back (& stream->iopt.free);
3149
3150 xd3_rlist_push_back (& stream->iopt.used, i);
3151
3152 (*iptr) = i;
3153
3154 return 0;
3155}
3156
3157/* A copy is about to be emitted that extends backwards to POS, therefore it may
3158 * completely cover some existing instructions in the buffer. If an instruction is
3159 * completely covered by this new match, erase it. If the new instruction is covered by
3160 * the previous one, return 1 to skip it. */
3161static void
3162xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size)
3163{
3164 while (! xd3_rlist_empty (& stream->iopt.used))
3165 {
3166 xd3_rinst *r = xd3_rlist_back (& stream->iopt.used);
3167
3168 /* Verify that greedy is working. The previous instruction should end before the
3169 * new one begins. */
3170 XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos));
3171 /* Verify that min_match is working. The previous instruction should end before the
3172 * new one ends. */
3173 XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size));
3174
3175 /* See if the last instruction starts before the new instruction. If so, there is
3176 * nothing to erase. */
3177 if (r->pos < pos)
3178 {
3179 return;
3180 }
3181
3182 /* Otherwise, the new instruction covers the old one, delete it and repeat. */
3183 xd3_rlist_remove (r);
3184 xd3_rlist_push_back (& stream->iopt.free, r);
3185 }
3186}
3187
3188/* This function tells the last matched input position. */
3189static usize_t
3190xd3_iopt_last_matched (xd3_stream *stream)
3191{
3192 xd3_rinst *r;
3193
3194 if (xd3_rlist_empty (& stream->iopt.used))
3195 {
3196 return 0;
3197 }
3198
3199 r = xd3_rlist_back (& stream->iopt.used);
3200
3201 return r->pos + r->size;
3202}
3203
3204/******************************************************************************************
3205 Emit routines
3206 ******************************************************************************************/
3207
3208static int
3209xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code)
3210{
3211 int has_size = stream->code_table[code].size1 == 0;
3212 int ret;
3213
3214 IF_DEBUG1 (P(RINT "[emit1] %u %s (%u) code %u\n",
3215 single->pos,
3216 xd3_rtype_to_string (single->type, 0),
3217 single->size,
3218 code));
3219
3220 if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) { return ret; }
3221
3222 if (has_size)
3223 {
3224 if ((ret = xd3_emit_size (stream, & INST_TAIL (stream), single->size))) { return ret; }
3225
3226 IF_DEBUG (xd3_count_size (stream, single->size));
3227 }
3228
3229 IF_DEBUG (xd3_count_inst (stream, code));
3230
3231 return 0;
3232}
3233
3234static int
3235xd3_emit_double (xd3_stream *stream, xd3_rinst *first, xd3_rinst *second, uint code)
3236{
3237 int ret;
3238
3239 /* All double instructions use fixed sizes, so all we need to do is output the
3240 * instruction code, no sizes. */
3241 XD3_ASSERT (stream->code_table[code].size1 != 0 &&
3242 stream->code_table[code].size2 != 0);
3243
3244 if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) { return ret; }
3245
3246 IF_DEBUG1 (P(RINT "[emit2]: %u %s (%u) %s (%u) code %u\n",
3247 first->pos,
3248 xd3_rtype_to_string (first->type, 0),
3249 first->size,
3250 xd3_rtype_to_string (second->type, 0),
3251 second->size,
3252 code));
3253
3254 IF_DEBUG (xd3_count_inst (stream, code));
3255
3256 return 0;
3257}
3258
3259/* This enters a potential run instruction into the iopt buffer. The position argument is
3260 * relative to the target window. */
3261static INLINE int
3262xd3_emit_run (xd3_stream *stream, usize_t pos, usize_t size, uint8_t run_c)
3263{
3264 xd3_rinst* ri;
3265 int ret;
3266
3267 XD3_ASSERT (pos + size <= stream->avail_in);
3268
3269 if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; }
3270
3271 ri->type = XD3_RUN;
3272 ri->xtra = run_c;
3273 ri->pos = pos;
3274 ri->size = size;
3275
3276 return 0;
3277}
3278
3279/* This enters a potential copy instruction into the iopt buffer. The position argument
3280 * is relative to the target window.. */
3281static INLINE int
3282xd3_found_match (xd3_stream *stream, usize_t pos, usize_t size, xoff_t addr, int is_source)
3283{
3284 xd3_rinst* ri;
3285 int ret;
3286
3287 XD3_ASSERT (pos + size <= stream->avail_in);
3288
3289 if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; }
3290
3291 ri->type = XD3_CPY;
3292 ri->xtra = is_source;
3293 ri->pos = pos;
3294 ri->size = size;
3295 ri->addr = addr;
3296
3297 return 0;
3298}
3299
3300static int
3301xd3_emit_hdr (xd3_stream *stream)
3302{
3303 int ret;
3304 int use_secondary = stream->sec_type != NULL;
3305 int use_adler32 = stream->flags & XD3_ADLER32;
3306 int vcd_source = xd3_encoder_used_source (stream);
3307 uint win_ind = 0;
3308 uint del_ind = 0;
3309 usize_t enc_len;
3310 usize_t tgt_len;
3311 usize_t data_len;
3312 usize_t inst_len;
3313 usize_t addr_len;
3314
3315 XD3_ASSERT (stream->n_emit == stream->avail_in);
3316
3317 if (stream->current_window == 0)
3318 {
3319 uint hdr_ind = 0;
3320 int use_appheader = stream->enc_appheader != NULL;
3321 int use_gencodetbl = GENERIC_ENCODE_TABLES && (stream->code_table_desc != & __rfc3284_code_table_desc);
3322
3323 if (use_secondary) { hdr_ind |= VCD_SECONDARY; }
3324 if (use_gencodetbl) { hdr_ind |= VCD_CODETABLE; }
3325 if (use_appheader) { hdr_ind |= VCD_APPHEADER; }
3326
3327 if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC1)) != 0 ||
3328 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC2)) != 0 ||
3329 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC3)) != 0 ||
3330 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_VERSION)) != 0 ||
3331 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), hdr_ind)) != 0)
3332 {
3333 return ret;
3334 }
3335
3336 /* Secondary compressor ID */
3337#if SECONDARY_ANY
3338 if (use_secondary && (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->sec_type->id))) { return ret; }
3339#endif
3340
3341 /* Compressed code table */
3342 if (use_gencodetbl)
3343 {
3344 usize_t code_table_size;
3345 const uint8_t *code_table_data;
3346
3347 if ((ret = stream->comp_table_func (stream, & code_table_data, & code_table_size))) { return ret; }
3348
3349 if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), code_table_size + 2)) ||
3350 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->code_table_desc->near_modes)) ||
3351 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->code_table_desc->same_modes)) ||
3352 (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), code_table_data, code_table_size))) { return ret; }
3353 }
3354
3355 /* Application header */
3356 if (use_appheader)
3357 {
3358 if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->enc_appheadsz)) ||
3359 (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), stream->enc_appheader, stream->enc_appheadsz)))
3360 {
3361 return ret;
3362 }
3363 }
3364 }
3365
3366 /* try to compress this window */
3367#if SECONDARY_ANY
3368 if (use_secondary)
3369 {
3370 int data_sec = 0;
3371 int inst_sec = 0;
3372 int addr_sec = 0;
3373
3374# define ENCODE_SECONDARY_SECTION(UPPER,LOWER) \
3375 ((stream->flags & XD3_SEC_NO ## UPPER) == 0 && \
3376 (ret = xd3_encode_secondary (stream, & UPPER ## _HEAD (stream), & UPPER ## _TAIL (stream), \
3377 & xd3_sec_ ## LOWER (stream), \
3378 & stream->sec_ ## LOWER, & LOWER ## _sec)))
3379
3380 if (ENCODE_SECONDARY_SECTION (DATA, data) ||
3381 ENCODE_SECONDARY_SECTION (INST, inst) ||
3382 ENCODE_SECONDARY_SECTION (ADDR, addr))
3383 {
3384 return ret;
3385 }
3386
3387 del_ind |= (data_sec ? VCD_DATACOMP : 0);
3388 del_ind |= (inst_sec ? VCD_INSTCOMP : 0);
3389 del_ind |= (addr_sec ? VCD_ADDRCOMP : 0);
3390 }
3391#endif
3392
3393 /* if (vcd_target) { win_ind |= VCD_TARGET; } */
3394 if (vcd_source) { win_ind |= VCD_SOURCE; }
3395 if (use_adler32) { win_ind |= VCD_ADLER32; }
3396
3397 /* window indicator */
3398 if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), win_ind))) { return ret; }
3399
3400 /* source window */
3401 if (vcd_source)
3402 {
3403 /* or (vcd_target) { ... } */
3404 if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->src->srclen)) ||
3405 (ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->src->srcbase))) { return ret; }
3406 }
3407
3408 tgt_len = stream->avail_in;
3409 data_len = xd3_sizeof_output (DATA_HEAD (stream));
3410 inst_len = xd3_sizeof_output (INST_HEAD (stream));
3411 addr_len = xd3_sizeof_output (ADDR_HEAD (stream));
3412
3413 /* The enc_len field is redundent... doh! */
3414 enc_len = (1 + (xd3_sizeof_size (tgt_len) +
3415 xd3_sizeof_size (data_len) +
3416 xd3_sizeof_size (inst_len) +
3417 xd3_sizeof_size (addr_len)) +
3418 data_len +
3419 inst_len +
3420 addr_len +
3421 (use_adler32 ? 4 : 0));
3422
3423 if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), enc_len)) ||
3424 (ret = xd3_emit_size (stream, & HDR_TAIL (stream), tgt_len)) ||
3425 (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), del_ind)) ||
3426 (ret = xd3_emit_size (stream, & HDR_TAIL (stream), data_len)) ||
3427 (ret = xd3_emit_size (stream, & HDR_TAIL (stream), inst_len)) ||
3428 (ret = xd3_emit_size (stream, & HDR_TAIL (stream), addr_len)))
3429 {
3430 return ret;
3431 }
3432
3433 if (use_adler32)
3434 {
3435 uint8_t send[4];
3436 uint32_t a32 = adler32 (1L, stream->next_in, stream->avail_in);
3437
3438 send[0] = (a32 >> 24);
3439 send[1] = (a32 >> 16);
3440 send[2] = (a32 >> 8);
3441 send[3] = (a32 & 0xff);
3442
3443 if ((ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), send, 4))) { return ret; }
3444 }
3445
3446 return 0;
3447}
3448
3449/******************************************************************************************
3450 Encode routines
3451 ******************************************************************************************/
3452
3453static int
3454xd3_encode_buffer_leftover (xd3_stream *stream)
3455{
3456 usize_t take;
3457 usize_t room;
3458
3459 /* Allocate the buffer. */
3460 if (stream->buf_in == NULL && (stream->buf_in = xd3_alloc (stream, stream->winsize, 1)) == NULL)
3461 {
3462 return ENOMEM;
3463 }
3464
3465 /* Take leftover input first. */
3466 if (stream->buf_leftover != NULL)
3467 {
3468 XD3_ASSERT (stream->buf_avail == 0);
3469 XD3_ASSERT (stream->buf_leftavail < stream->winsize);
3470
3471 IF_DEBUG1 (P(RINT "[leftover] previous %u avail %u\n", stream->buf_leftavail, stream->avail_in));
3472
3473 memcpy (stream->buf_in, stream->buf_leftover, stream->buf_leftavail);
3474
3475 stream->buf_leftover = NULL;
3476 stream->buf_avail = stream->buf_leftavail;
3477 }
3478
3479 /* Copy into the buffer. */
3480 room = stream->winsize - stream->buf_avail;
3481 take = min (room, stream->avail_in);
3482
3483 memcpy (stream->buf_in + stream->buf_avail, stream->next_in, take);
3484
3485 stream->buf_avail += take;
3486
3487 if (take < stream->avail_in)
3488 {
3489 /* Buffer is full */
3490 stream->buf_leftover = stream->next_in + take;
3491 stream->buf_leftavail = stream->avail_in - take;
3492
3493 IF_DEBUG1 (P(RINT "[leftover] take %u remaining %u\n", take, stream->buf_leftavail));
3494 }
3495 else if ((stream->buf_avail < stream->winsize) && !(stream->flags & XD3_FLUSH))
3496 {
3497 /* Buffer has space */
3498 IF_DEBUG1 (P(RINT "[leftover] %u emptied\n", take));
3499 return XD3_INPUT;
3500 }
3501
3502 /* Use the buffer: */
3503 stream->next_in = stream->buf_in;
3504 stream->avail_in = stream->buf_avail;
3505 stream->buf_avail = 0;
3506
3507 return 0;
3508}
3509
3510/* This function allocates all memory initially used by the encoder. */
3511static int
3512xd3_encode_init (xd3_stream *stream)
3513{
3514 int i;
3515 int large_comp = (stream->src != NULL);
3516 int small_comp = ! (stream->flags & XD3_NOCOMPRESS);
3517 /*int small_prev = (stream->small_chain > 1);*/
3518 int space_fact = (large_comp + small_comp);
3519 int memsize = stream->memsize;
3520
3521 /* Memory allocations for checksum tables are delayed until xd3_string_match_init in the
3522 * first call to string_match--that way identical or short inputs require no table
3523 * allocation. */
3524 if (large_comp)
3525 {
3526 xd3_size_hashtable (stream, memsize / space_fact, & stream->large_hash);
3527 }
3528
3529 if (small_comp)
3530 {
3531 xd3_size_hashtable (stream, memsize / space_fact, & stream->small_hash);
3532 }
3533
3534 for (i = 0; i < ENC_SECTS; i += 1)
3535 {
3536 if ((stream->enc_heads[i] = stream->enc_tails[i] =
3537 xd3_alloc_output (stream, NULL)) == NULL)
3538 {
3539 goto fail;
3540 }
3541 }
3542
3543 /* iopt buffer */
3544 xd3_rlist_init (& stream->iopt.used);
3545 xd3_rlist_init (& stream->iopt.free);
3546
3547 if ((stream->iopt.buffer = xd3_alloc (stream, sizeof (xd3_rinst), stream->iopt_size)) == NULL)
3548 {
3549 goto fail;
3550 }
3551
3552 for (i = 0; i < stream->iopt_size; i += 1)
3553 {
3554 xd3_rlist_push_back (& stream->iopt.free, & stream->iopt.buffer[i]);
3555 }
3556
3557 XD3_ASSERT (xd3_rlist_length (& stream->iopt.free) == stream->iopt_size);
3558 XD3_ASSERT (xd3_rlist_length (& stream->iopt.used) == 0);
3559
3560 /* address cache, code table */
3561 stream->acache.s_near = stream->code_table_desc->near_modes;
3562 stream->acache.s_same = stream->code_table_desc->same_modes;
3563 stream->code_table = stream->code_table_func ();
3564
3565 return xd3_alloc_cache (stream);
3566
3567 fail:
3568
3569 return ENOMEM;
3570}
3571
3572#if XD3_DEBUG
3573static int
3574xd3_check_sprevlist (xd3_stream *stream)
3575{
3576 int i;
3577 for (i = 0; i < stream->sprevsz; i += 1)
3578 {
3579 xd3_slist *l = & stream->small_prev[i];
3580
3581 XD3_ASSERT (l->prev->next == l);
3582 XD3_ASSERT (l->next->prev == l);
3583 }
3584 return 1;
3585}
3586#endif
3587
3588/* Called after the ENC_POSTOUT state, this puts the output buffers back into separate
3589 * lists and re-initializes some variables. (The output lists were spliced together
3590 * during the ENC_FLUSH state.) */
3591static void
3592xd3_encode_reset (xd3_stream *stream)
3593{
3594 int i;
3595 xd3_output *olist;
3596
3597 XD3_ASSERT (stream->small_prev == NULL || xd3_check_sprevlist (stream));
3598
3599 IF_DEBUG (stream->n_emit = 0);
3600 stream->avail_in = 0;
3601 stream->small_reset = 1;
3602
3603 if (stream->src != NULL)
3604 {
3605 stream->src->srcbase = 0;
3606 stream->src->srclen = 0;
3607 stream->srcwin_decided = 0;
3608 stream->match_minaddr = 0;
3609 stream->match_maxaddr = 0;
3610 stream->taroff = 0;
3611 }
3612
3613 /* Reset output chains. */
3614 olist = stream->enc_heads[0];
3615
3616 for (i = 0; i < ENC_SECTS; i += 1)
3617 {
3618 XD3_ASSERT (olist != NULL);
3619
3620 stream->enc_heads[i] = olist;
3621 stream->enc_tails[i] = olist;
3622 olist = olist->next_page;
3623
3624 stream->enc_heads[i]->next = 0;
3625 stream->enc_heads[i]->next_page = NULL;
3626
3627 stream->enc_tails[i]->next_page = NULL;
3628 stream->enc_tails[i] = stream->enc_heads[i];
3629 }
3630
3631 xd3_freelist_output (stream, olist);
3632}
3633
3634/* The main encoding routine. */
3635int
3636xd3_encode_input (xd3_stream *stream)
3637{
3638 int ret, i;
3639
3640 if (stream->dec_state != 0)
3641 {
3642 stream->msg = "encoder/decoder transition";
3643 return EINVAL;
3644 }
3645
3646 switch (stream->enc_state)
3647 {
3648 case ENC_INIT:
3649 /* Only reached on first time through: memory setup. */
3650 if ((ret = xd3_encode_init (stream))) { return ret; }
3651
3652 stream->enc_state = ENC_INPUT;
3653
3654 case ENC_INPUT:
3655
3656 /* If there is no input yet, just return. This checks for next_in == NULL, not
3657 * avail_in == 0 since zero bytes is a valid input. There is an assertion in
3658 * xd3_avail_input() that next_in != NULL for this reason. By returning right away
3659 * we avoid creating an input buffer before the caller has supplied its first data.
3660 * It is possible for xd3_avail_input to be called both before and after the first
3661 * call to xd3_encode_input(). */
3662 if (stream->next_in == NULL)
3663 {
3664 return XD3_INPUT;
3665 }
3666
3667 enc_flush:
3668 /* See if we should buffer the input: either if there is already a leftover buffer,
3669 * or if the input is short of winsize without flush. The label at this point is
3670 * reached by a goto below, when there is leftover input after postout. */
3671 if ((stream->buf_leftover != NULL) ||
3672 (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH)))
3673 {
3674 if ((ret = xd3_encode_buffer_leftover (stream))) { return ret; }
3675 }
3676
3677 /* Initalize the address cache before each window. */
3678 xd3_init_cache (& stream->acache);
3679
3680 pos_in = 0;
3681 min_match = MIN_MATCH;
3682 stream->unencoded_offset = 0;
3683
3684 stream->enc_state = ENC_SEARCH;
3685
3686 IF_DEBUG1 (P(RINT "[input window:%"Q"u] input bytes %u offset %"Q"u\n",
3687 stream->current_window, stream->avail_in, stream->total_in));
3688
3689 return XD3_WINSTART;
3690
3691 case ENC_SEARCH:
3692
3693 /* Reentrant matching. */
3694 if (stream->src != NULL)
3695 {
3696 switch (stream->match_state)
3697 {
3698 case MATCH_TARGET:
3699 /* Try matching forward at the start of the target. This is entered the
3700 * first time through, to check for a perfect match, and whenever there is a
3701 * source match that extends to the end of the previous window. The
3702 * match_srcpos field is initially zero and later set during
3703 * xd3_source_extend_match. */
3704 if (stream->avail_in > 0) {
3705 /* This call can't fail because the source window is unrestricted. */
3706 ret = xd3_source_match_setup (stream, stream->match_srcpos);
3707 XD3_ASSERT (ret == 0);
3708 stream->match_state = MATCH_FORWARD;
3709 } else {
3710 stream->match_state = MATCH_SEARCHING;
3711 }
3712 XD3_ASSERT (stream->match_fwd == 0);
3713
3714 case MATCH_FORWARD:
3715 case MATCH_BACKWARD:
3716 if (stream->avail_in != 0)
3717 {
3718 if ((ret = xd3_source_extend_match (stream)) != 0)
3719 {
3720 return ret;
3721 }
3722
3723 stream->input_position += stream->match_fwd;
3724 }
3725
3726 case MATCH_SEARCHING:
3727 /* Continue string matching. (It's possible that the initial match
3728 * continued through the entire input, in which case we're still in
3729 * MATCH_FORWARD and should remain so for the next input window.) */
3730 break;
3731 }
3732 }
3733
3734 /* String matching... */
3735 if (stream->avail_in != 0 &&
3736 (ret = stream->string_match (stream)))
3737 {
3738 return ret;
3739 }
3740
3741 /* Flush the instrution buffer, then possibly add one more instruction, then emit
3742 * the header. */
3743 stream->enc_state = ENC_FLUSH;
3744 if ((ret = xd3_iopt_flush_instructions (stream, 1)) ||
3745 (ret = xd3_iopt_add_finalize (stream)) ||
3746 (ret = xd3_emit_hdr (stream)))
3747 {
3748 return ret;
3749 }
3750
3751 /* Begin output. */
3752 stream->enc_current = HDR_HEAD (stream);
3753
3754 /* Chain all the outputs together. After doing this, it looks as if there is only
3755 * one section. The other enc_heads are set to NULL to avoid freeing them more than
3756 * once. */
3757 for (i = 1; i < ENC_SECTS; i += 1)
3758 {
3759 stream->enc_tails[i-1]->next_page = stream->enc_heads[i];
3760 stream->enc_heads[i] = NULL;
3761 }
3762
3763 enc_output:
3764
3765 stream->enc_state = ENC_POSTOUT;
3766 stream->next_out = stream->enc_current->base;
3767 stream->avail_out = stream->enc_current->next;
3768 stream->total_out += (xoff_t) stream->avail_out;
3769
3770 /* If there is any output in this buffer, return it, otherwise fall through to
3771 * handle the next buffer or finish the window after all buffers have been
3772 * output. */
3773 if (stream->avail_out > 0)
3774 {
3775 /* This is the only place xd3_encode returns XD3_OUTPUT */
3776 return XD3_OUTPUT;
3777 }
3778
3779 case ENC_POSTOUT:
3780
3781 if (stream->avail_out != 0)
3782 {
3783 stream->msg = "missed call to consume output";
3784 return EINVAL;
3785 }
3786
3787 /* Continue outputting one buffer at a time, until the next is NULL. */
3788 if ((stream->enc_current = stream->enc_current->next_page) != NULL)
3789 {
3790 goto enc_output;
3791 }
3792
3793 stream->total_in += (xoff_t) stream->avail_in;
3794 stream->enc_state = ENC_POSTWIN;
3795
3796 return XD3_WINFINISH;
3797
3798 case ENC_POSTWIN:
3799
3800 xd3_encode_reset (stream);
3801
3802 stream->current_window += 1;
3803 stream->enc_state = ENC_INPUT;
3804
3805 /* If there is leftover input to flush, repeat. */
3806 if ((stream->buf_leftover != NULL) && (stream->flags & XD3_FLUSH))
3807 {
3808 goto enc_flush;
3809 }
3810
3811 /* Ready for more input. */
3812 return XD3_INPUT;
3813
3814 default:
3815 stream->msg = "invalid state";
3816 return EINVAL;
3817 }
3818}
3819#endif /* XD3_ENCODER */
3820
3821/******************************************************************************************
3822 Client convenience functions
3823 ******************************************************************************************/
3824
3825/* This function invokes either encode or decode to and from in-memory arrays. The output array
3826 * must be large enough to hold the output or else ENOSPC is returned. */
3827static int
3828xd3_process_completely (xd3_stream *stream,
3829 int (*func) (xd3_stream *),
3830 int close_stream,
3831 const uint8_t *input,
3832 usize_t input_size,
3833 uint8_t *output,
3834 usize_t *output_size,
3835 usize_t avail_size)
3836{
3837 (*output_size) = 0;
3838
3839 stream->flags |= XD3_FLUSH;
3840
3841 xd3_avail_input (stream, input, input_size);
3842
3843 for (;;)
3844 {
3845 int ret;
3846 switch((ret = func (stream)))
3847 {
3848 case XD3_OUTPUT: { /* memcpy below */ break; }
3849 case XD3_INPUT: { /* this means EOF */ goto done; }
3850 case XD3_GOTHEADER: { /* ignore */ continue; }
3851 case XD3_WINSTART: { /* ignore */ continue; }
3852 case XD3_WINFINISH: { /* ignore */ continue; }
3853 case XD3_GETSRCBLK:
3854 {
3855 stream->msg = "stream requires source input";
3856 return EINVAL;
3857 }
3858 case 0: /* there is no plain "success" return for xd3_encode/decode */
3859 XD3_ASSERT (ret != 0);
3860 default:
3861 return ret;
3862 }
3863
3864 if (*output_size + stream->avail_out > avail_size)
3865 {
3866 stream->msg = "insufficient output space";
3867 return ENOSPC;
3868 }
3869
3870 memcpy (output + *output_size, stream->next_out, stream->avail_out);
3871
3872 *output_size += stream->avail_out;
3873
3874 xd3_consume_output (stream);
3875 }
3876 done:
3877 return (close_stream == 0) ? 0 : xd3_close_stream (stream);
3878}
3879
3880int
3881xd3_decode_completely (xd3_stream *stream,
3882 const uint8_t *input,
3883 usize_t input_size,
3884 uint8_t *output,
3885 usize_t *output_size,
3886 usize_t avail_size)
3887{
3888 return xd3_process_completely (stream, & xd3_decode_input, 1,
3889 input, input_size,
3890 output, output_size, avail_size);
3891}
3892
3893#if XD3_ENCODER
3894int
3895xd3_encode_completely (xd3_stream *stream,
3896 const uint8_t *input,
3897 usize_t input_size,
3898 uint8_t *output,
3899 usize_t *output_size,
3900 usize_t avail_size)
3901{
3902 return xd3_process_completely (stream, & xd3_encode_input, 1,
3903 input, input_size,
3904 output, output_size, avail_size);
3905}
3906#endif
3907
3908/******************************************************************************************
3909 DECODE stuff
3910 ******************************************************************************************/
3911
3912/* Return true if the caller must provide a source. Theoretically, this has to be checked
3913 * after every window. It could be that the first window requires no source, but the
3914 * second window does. In practice? */
3915int xd3_decoder_needs_source (xd3_stream *stream)
3916{
3917 return stream->dec_win_ind & VCD_SOURCE;
3918}
3919
3920/* Initialize the decoder for a new window. The dec_tgtlen value is preserved across
3921 * successive window decodings, and the update to dec_winstart is delayed until a new
3922 * window actually starts. This is to avoid throwing an error due to overflow until the
3923 * last possible moment. This makes it possible to encode exactly 4GB through a 32-bit
3924 * encoder. */
3925static int
3926xd3_decode_init_window (xd3_stream *stream)
3927{
3928 stream->dec_cpylen = 0;
3929 stream->dec_cpyoff = 0;
3930 stream->dec_cksumbytes = 0;
3931
3932 xd3_init_cache (& stream->acache);
3933
3934 return 0;
3935}
3936
3937/* Allocates buffer space for the target window and possibly the VCD_TARGET copy-window.
3938 * Also sets the base of the two copy segments. */
3939static int
3940xd3_decode_setup_buffers (xd3_stream *stream)
3941{
3942 /* If VCD_TARGET is set then the previous buffer may be reused. */
3943 if (stream->dec_win_ind & VCD_TARGET)
3944 {
3945 /* But this implementation only supports copying from the last target window. If the
3946 * offset is outside that range, it can't be done. */
3947 if (stream->dec_cpyoff < stream->dec_laststart)
3948 {
3949 stream->msg = "unsupported VCD_TARGET offset";
3950 return EINVAL;
3951 }
3952
3953 /* See if the two windows are the same. This indicates the first time VCD_TARGET is
3954 * used. This causes a second buffer to be allocated, after that the two are
3955 * swapped in the DEC_FINISH case. */
3956 if (stream->dec_lastwin == stream->next_out)
3957 {
3958 stream->next_out = NULL;
3959 stream->space_out = 0;
3960 }
3961
3962 stream->dec_cpyaddrbase = stream->dec_lastwin + (usize_t) (stream->dec_cpyoff - stream->dec_laststart);
3963 }
3964
3965 /* See if the current output window is large enough. */
3966 if (stream->space_out < stream->dec_tgtlen)
3967 {
3968 xd3_free (stream, stream->dec_buffer);
3969
3970 stream->space_out = xd3_round_blksize (stream->dec_tgtlen, XD3_ALLOCSIZE);
3971
3972 if ((stream->dec_buffer = xd3_alloc (stream, stream->space_out, 1)) == NULL)
3973 {
3974 return ENOMEM;
3975 }
3976
3977 stream->next_out = stream->dec_buffer;
3978 }
3979
3980 /* dec_tgtaddrbase refers to an invalid base address, but it is always used with a
3981 * sufficiently large instruction offset (i.e., beyond the copy window). This condition
3982 * is enforced by xd3_decode_output_halfinst. */
3983 stream->dec_tgtaddrbase = stream->next_out - stream->dec_cpylen;
3984
3985 return 0;
3986}
3987
3988static int
3989xd3_decode_allocate (xd3_stream *stream,
3990 usize_t size,
3991 uint8_t **copied1,
3992 usize_t *alloc1,
3993 uint8_t **copied2,
3994 usize_t *alloc2)
3995{
3996 if (*copied1 != NULL && *alloc1 < size)
3997 {
3998 xd3_free (stream, *copied1);
3999 *copied1 = NULL;
4000 }
4001
4002 if (*copied1 == NULL)
4003 {
4004#if SECONDARY_ANY
4005 /* Borrow from the secondary compressor's allocation. */
4006 if (copied2 != NULL && *copied2 != NULL && *alloc2 < size)
4007 {
4008 *copied1 = *copied2;
4009 *alloc1 = *alloc2;
4010 *copied2 = NULL;
4011 *alloc2 = 0;
4012 }
4013 else
4014#endif
4015 {
4016 *alloc1 = xd3_round_blksize (size, XD3_ALLOCSIZE);
4017
4018 if ((*copied1 = xd3_alloc (stream, *alloc1, 1)) == NULL)
4019 {
4020 return ENOMEM;
4021 }
4022 }
4023 }
4024
4025 return 0;
4026}
4027
4028static int
4029xd3_decode_section (xd3_stream *stream,
4030 xd3_desect *section,
4031 xd3_decode_state nstate,
4032 int copy)
4033{
4034 XD3_ASSERT (section->pos <= section->size);
4035 XD3_ASSERT (stream->dec_state != nstate);
4036
4037 if (section->pos < section->size)
4038 {
4039 usize_t sect_take;
4040
4041 if (stream->avail_in == 0)
4042 {
4043 return XD3_INPUT;
4044 }
4045
4046 if ((copy == 0) && (section->pos == 0))
4047 {
4048 /* No allocation/copy needed */
4049 section->buf = stream->next_in;
4050 sect_take = section->size;
4051 }
4052 else
4053 {
4054 usize_t sect_need = section->size - section->pos;
4055
4056 /* Allocate and copy */
4057 sect_take = min (sect_need, stream->avail_in);
4058
4059 if (section->pos == 0)
4060 {
4061 int ret;
4062
4063 if ((ret = xd3_decode_allocate (stream,
4064 section->size,
4065 & section->copied1,
4066 & section->alloc1,
4067 & section->copied2,
4068 & section->alloc2))) { return ret; }
4069
4070 section->buf = section->copied1;
4071 }
4072
4073 memcpy (section->copied1 + section->pos,
4074 stream->next_in,
4075 sect_take);
4076 }
4077
4078 section->pos += sect_take;
4079
4080 stream->dec_winbytes += sect_take;
4081
4082 DECODE_INPUT (sect_take);
4083 }
4084
4085 if (section->pos < section->size)
4086 {
4087 stream->msg = "further input required";
4088 return XD3_INPUT;
4089 }
4090
4091 XD3_ASSERT (section->pos == section->size);
4092
4093 stream->dec_state = nstate;
4094 section->buf_max = section->buf + section->size;
4095 section->pos = 0;
4096 return 0;
4097}
4098
4099/* Decode the size and address for half of an instruction (i.e., a single opcode). This
4100 * updates the stream->dec_position, which are bytes already output prior to processing
4101 * this instruction. Perform bounds checking for sizes and copy addresses, which uses the
4102 * dec_position (which is why these checks are done here). */
4103static int
4104xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst)
4105{
4106 int ret;
4107
4108 /* If the size from the instruction table is zero then read a size value. */
4109 if ((inst->size == 0) &&
4110 (ret = xd3_read_size (stream,
4111 & stream->inst_sect.buf,
4112 stream->inst_sect.buf_max,
4113
4114 & inst->size)))
4115 {
4116 return EINVAL;
4117 }
4118
4119 /* For copy instructions, read address. */
4120 if (inst->type >= XD3_CPY)
4121 {
4122 IF_DEBUG1 ({
4123 static int cnt = 0;
4124 P(RINT "DECODE:%u: COPY at %"Q"u (winoffset %u) size %u winaddr %u\n",
4125 cnt++,
4126 stream->total_out + (stream->dec_position - stream->dec_cpylen),
4127 (stream->dec_position - stream->dec_cpylen),
4128 inst->size,
4129 inst->addr);
4130 });
4131
4132 if ((ret = xd3_decode_address (stream,
4133 stream->dec_position,
4134 inst->type - XD3_CPY,
4135 & stream->addr_sect.buf,
4136 stream->addr_sect.buf_max,
4137 & inst->addr)))
4138 {
4139 return ret;
4140 }
4141
4142 /* Cannot copy an address before it is filled-in. */
4143 if (inst->addr >= stream->dec_position)
4144 {
4145 stream->msg = "address too large";
4146 return EINVAL;
4147 }
4148
4149 /* Check: a VCD_TARGET or VCD_SOURCE copy cannot exceed the remaining buffer space
4150 * in its own segment. */
4151 if (inst->addr < stream->dec_cpylen && inst->addr + inst->size > stream->dec_cpylen)
4152 {
4153 stream->msg = "size too large";
4154 return EINVAL;
4155 }
4156 }
4157 else
4158 {
4159 IF_DEBUG1 ({
4160 if (inst->type == XD3_ADD)
4161 {
4162 static int cnt;
4163 P(RINT "DECODE:%d: ADD at %"Q"u (winoffset %u) size %u\n",
4164 cnt++,
4165 stream->total_out + stream->dec_position - stream->dec_cpylen,
4166 stream->dec_position - stream->dec_cpylen,
4167 inst->size);
4168 }
4169 else
4170 {
4171 static int cnt;
4172 XD3_ASSERT (inst->type == XD3_RUN);
4173 P(RINT "DECODE:%d: RUN at %"Q"u (winoffset %u) size %u\n",
4174 cnt++,
4175 stream->total_out + stream->dec_position - stream->dec_cpylen,
4176 stream->dec_position - stream->dec_cpylen,
4177 inst->size);
4178 }
4179 });
4180 }
4181
4182 /* Check: The instruction will not overflow the output buffer. */
4183 if (stream->dec_position + inst->size > stream->dec_maxpos)
4184 {
4185 stream->msg = "size too large";
4186 return EINVAL;
4187 }
4188
4189 stream->dec_position += inst->size;
4190 return 0;
4191}
4192
4193/* Decode a single opcode and then decode the two half-instructions. */
4194static int
4195xd3_decode_instruction (xd3_stream *stream)
4196{
4197 int ret;
4198 const xd3_dinst *inst;
4199
4200 if (stream->inst_sect.buf == stream->inst_sect.buf_max)
4201 {
4202 stream->msg = "instruction underflow";
4203 return EINVAL;
4204 }
4205
4206 inst = &stream->code_table[*stream->inst_sect.buf++];
4207
4208 stream->dec_current1.type = inst->type1;
4209 stream->dec_current2.type = inst->type2;
4210 stream->dec_current1.size = inst->size1;
4211 stream->dec_current2.size = inst->size2;
4212
4213 /* For each instruction with a real operation, decode the corresponding size and
4214 * addresses if necessary. Assume a code-table may have NOOP in either position,
4215 * although this is unlikely. */
4216 if (inst->type1 != XD3_NOOP && (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current1)))
4217 {
4218 return ret;
4219 }
4220 if (inst->type2 != XD3_NOOP && (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current2)))
4221 {
4222 return ret;
4223 }
4224 return 0;
4225}
4226
4227/* Output the result of a single half-instruction. OPT: This the decoder hotspot. */
4228static int
4229xd3_decode_output_halfinst (xd3_stream *stream, xd3_hinst *inst)
4230{
4231 /* To make this reentrant, set take = min (inst->size, available space)... */
4232 usize_t take = inst->size;
4233
4234 XD3_ASSERT (inst->type != XD3_NOOP);
4235
4236 switch (inst->type)
4237 {
4238 case XD3_RUN:
4239 {
4240 /* Only require a single data byte. */
4241 if (stream->data_sect.buf == stream->data_sect.buf_max)
4242 {
4243 stream->msg = "data underflow";
4244 return EINVAL;
4245 }
4246
4247 /* TUNE: Probably want to eliminate memset/memcpy here */
4248 memset (stream->next_out + stream->avail_out,
4249 stream->data_sect.buf[0],
4250 take);
4251
4252 stream->data_sect.buf += 1;
4253 stream->avail_out += take;
4254 inst->type = XD3_NOOP;
4255 break;
4256 }
4257 case XD3_ADD:
4258 {
4259 /* Require at least TAKE data bytes. */
4260 if (stream->data_sect.buf + take > stream->data_sect.buf_max)
4261 {
4262 stream->msg = "data underflow";
4263 return EINVAL;
4264 }
4265
4266 memcpy (stream->next_out + stream->avail_out,
4267 stream->data_sect.buf,
4268 take);
4269
4270 stream->data_sect.buf += take;
4271 stream->avail_out += take;
4272 inst->type = XD3_NOOP;
4273 break;
4274 }
4275 default:
4276 {
4277 usize_t i;
4278 const uint8_t *src;
4279 uint8_t *dst;
4280
4281 /* See if it copies from the VCD_TARGET/VCD_SOURCE window or the target window.
4282 * Out-of-bounds checks for the addresses and sizes are performed in
4283 * xd3_decode_parse_halfinst. */
4284 if (inst->addr < stream->dec_cpylen)
4285 {
4286 if (stream->dec_win_ind & VCD_TARGET)
4287 {
4288 /* For VCD_TARGET we know the entire range is in-memory, as established by
4289 * decode_setup_buffers. */
4290 src = stream->dec_cpyaddrbase + inst->addr;
4291 inst->type = XD3_NOOP;
4292 inst->size = 0;
4293 }
4294 else
4295 {
4296 /* In this case we have to read a source block, which could return control
4297 * to the caller. We need to know the first block number needed for this
4298 * copy. */
4299 xd3_source *source;
4300 xoff_t block;
4301 usize_t blkoff;
4302 usize_t blksize;
4303 int ret;
4304
4305 more:
4306
4307 source = stream->src;
4308 block = source->cpyoff_blocks;
4309 blkoff = source->cpyoff_blkoff + inst->addr;
4310 blksize = source->blksize;
4311
4312 while (blkoff >= blksize)
4313 {
4314 block += 1;
4315 blkoff -= blksize;
4316 }
4317
4318 if ((ret = xd3_getblk (stream, block)))
4319 {
4320 /* could be a XD3_GETSRCBLK failure. */
4321 return ret;
4322 }
4323
4324 src = source->curblk + blkoff;
4325
4326 /* This block either contains enough data or the source file is
4327 * short. */
4328 if ((source->onblk != blksize) && (blkoff + take > source->onblk))
4329 {
4330 stream->msg = "source file too short";
4331 return EINVAL;
4332
4333 }
4334
4335 XD3_ASSERT (blkoff != blksize);
4336
4337 if (blkoff + take <= blksize)
4338 {
4339 inst->type = XD3_NOOP;
4340 inst->size = 0;
4341 }
4342 else
4343 {
4344 /* This block doesn't contain all the data, modify the instruction, do
4345 * not set to XD3_NOOP. */
4346 take = blksize - blkoff;
4347 inst->size -= take;
4348 inst->addr += take;
4349 }
4350 }
4351 }
4352 else
4353 {
4354 /* For a target-window copy, we know the entire range is in-memory. The
4355 * dec_tgtaddrbase is negatively offset by dec_cpylen because the addresses
4356 * start beyond that point. */
4357 src = stream->dec_tgtaddrbase + inst->addr;
4358 inst->type = XD3_NOOP;
4359 inst->size = 0;
4360 }
4361
4362 dst = stream->next_out + stream->avail_out;
4363
4364 stream->avail_out += take;
4365
4366 /* Can't just memcpy here due to possible overlap. */
4367 for (i = take; i != 0; i -= 1)
4368 {
4369 *dst++ = *src++;
4370 }
4371
4372 take = inst->size;
4373
4374 /* If there is more to copy, call getblk again. */
4375 if (inst->type != XD3_NOOP)
4376 {
4377 XD3_ASSERT (take > 0);
4378 goto more;
4379 }
4380 else
4381 {
4382 XD3_ASSERT (take == 0);
4383 }
4384 }
4385 }
4386
4387 return 0;
4388}
4389
4390static int
4391xd3_decode_finish_window (xd3_stream *stream)
4392{
4393 stream->dec_winbytes = 0;
4394 stream->dec_state = DEC_FINISH;
4395
4396 stream->data_sect.pos = 0;
4397 stream->inst_sect.pos = 0;
4398 stream->addr_sect.pos = 0;
4399
4400 return XD3_OUTPUT;
4401}
4402
4403static int
4404xd3_decode_sections (xd3_stream *stream)
4405{
4406 usize_t need, more, take;
4407 int copy, ret;
4408
4409 if ((stream->flags & XD3_JUST_HDR) != 0)
4410 {
4411 /* Nothing left to do. */
4412 return xd3_decode_finish_window (stream);
4413 }
4414
4415 /* To avoid copying, need this much data available */
4416 need = (stream->inst_sect.size +
4417 stream->addr_sect.size +
4418 stream->data_sect.size);
4419
4420 /* The window may be entirely processed. */
4421 XD3_ASSERT (stream->dec_winbytes <= need);
4422
4423 /* Compute how much more input is needed. */
4424 more = (need - stream->dec_winbytes);
4425
4426 /* How much to consume. */
4427 take = min (more, stream->avail_in);
4428
4429 /* See if the input is completely available, to avoid copy. */
4430 copy = (take != more);
4431
4432 /* If the window is skipped... */
4433 if ((stream->flags & XD3_SKIP_WINDOW) != 0)
4434 {
4435 /* Skip the available input. */
4436 DECODE_INPUT (take);
4437
4438 stream->dec_winbytes += take;
4439
4440 if (copy)
4441 {
4442 stream->msg = "further input required";
4443 return XD3_INPUT;
4444 }
4445
4446 return xd3_decode_finish_window (stream);
4447 }
4448
4449 /* Process all but the DATA section. */
4450 switch (stream->dec_state)
4451 {
4452 default:
4453 stream->msg = "internal error";
4454 return EINVAL;
4455
4456 case DEC_DATA:
4457 if ((ret = xd3_decode_section (stream, & stream->data_sect, DEC_INST, copy))) { return ret; }
4458 case DEC_INST:
4459 if ((ret = xd3_decode_section (stream, & stream->inst_sect, DEC_ADDR, copy))) { return ret; }
4460 case DEC_ADDR:
4461 if ((ret = xd3_decode_section (stream, & stream->addr_sect, DEC_EMIT, copy))) { return ret; }
4462 }
4463
4464 XD3_ASSERT (stream->dec_winbytes == need);
4465
4466#if SECONDARY_ANY
4467#define DECODE_SECONDARY_SECTION(UPPER,LOWER) \
4468 ((stream->dec_del_ind & VCD_ ## UPPER ## COMP) && \
4469 (ret = xd3_decode_secondary (stream, & stream-> LOWER ## _sect, \
4470 & xd3_sec_ ## LOWER (stream))))
4471
4472 if (DECODE_SECONDARY_SECTION (DATA, data) ||
4473 DECODE_SECONDARY_SECTION (INST, inst) ||
4474 DECODE_SECONDARY_SECTION (ADDR, addr))
4475 {
4476 return ret;
4477 }
4478#endif
4479
4480 if (stream->flags & XD3_SKIP_EMIT)
4481 {
4482 return xd3_decode_finish_window (stream);
4483 }
4484
4485 /* OPT: A possible optimization is to avoid allocating memory in decode_setup_buffers
4486 * and to avoid a large memcpy when the window consists of a single VCD_SOURCE copy
4487 * instruction. The only potential problem is if the following window is a VCD_TARGET,
4488 * then you need to remember... */
4489 if ((ret = xd3_decode_setup_buffers (stream))) { return ret; }
4490
4491 return 0;
4492}
4493
4494static int
4495xd3_decode_emit (xd3_stream *stream)
4496{
4497 int ret;
4498
4499 /* Produce output: originally structured to allow reentrant code that fills as much of
4500 * the output buffer as possible, but VCDIFF semantics allows to copy from anywhere from
4501 * the target window, so instead allocate a sufficiently sized buffer after the target
4502 * window length is decoded.
4503 *
4504 * This code still needs to be reentrant to allow XD3_GETSRCBLK to return control. This
4505 * is handled by setting the stream->dec_currentN instruction types to XD3_NOOP after
4506 * they have been processed. */
4507 XD3_ASSERT (! (stream->flags & XD3_SKIP_EMIT));
4508 XD3_ASSERT (stream->avail_out == 0);
4509 XD3_ASSERT (stream->dec_tgtlen <= stream->space_out);
4510
4511 while (stream->inst_sect.buf != stream->inst_sect.buf_max)
4512 {
4513 /* Decode next instruction pair. */
4514 if ((stream->dec_current1.type == XD3_NOOP) &&
4515 (stream->dec_current2.type == XD3_NOOP) &&
4516 (ret = xd3_decode_instruction (stream))) { return ret; }
4517
4518 /* Output for each instruction. */
4519 if ((stream->dec_current1.type != XD3_NOOP) &&
4520 (ret = xd3_decode_output_halfinst (stream, & stream->dec_current1))) { return ret; }
4521
4522 if ((stream->dec_current2.type != XD3_NOOP) &&
4523 (ret = xd3_decode_output_halfinst (stream, & stream->dec_current2))) { return ret; }
4524 }
4525
4526 if (stream->avail_out != stream->dec_tgtlen)
4527 {
4528 IF_DEBUG1 (P(RINT "AVAIL_OUT(%d) != DEC_TGTLEN(%d)\n", stream->avail_out, stream->dec_tgtlen));
4529 stream->msg = "wrong window length";
4530 return EINVAL;
4531 }
4532
4533 if (stream->data_sect.buf != stream->data_sect.buf_max)
4534 {
4535 stream->msg = "extra data section";
4536 return EINVAL;
4537 }
4538
4539 if (stream->addr_sect.buf != stream->addr_sect.buf_max)
4540 {
4541 stream->msg = "extra address section";
4542 return EINVAL;
4543 }
4544
4545 /* OPT: Should cksum computation be combined with the above loop? */
4546 if ((stream->dec_win_ind & VCD_ADLER32) != 0 &&
4547 (stream->flags & XD3_ADLER32_NOVER) == 0)
4548 {
4549 uint32_t a32 = adler32 (1L, stream->next_out, stream->avail_out);
4550
4551 if (a32 != stream->dec_adler32)
4552 {
4553 stream->msg = "target window checksum mismatch";
4554 return EINVAL;
4555 }
4556 }
4557
4558 /* Finished with a window. */
4559 return xd3_decode_finish_window (stream);
4560}
4561
4562int
4563xd3_decode_input (xd3_stream *stream)
4564{
4565 int ret;
4566
4567 if (stream->enc_state != 0)
4568 {
4569 stream->msg = "encoder/decoder transition";
4570 return EINVAL;
4571 }
4572
4573#define BYTE_CASE(expr,x,nstate) \
4574 do { \
4575 if ( (expr) && \
4576 ((ret = xd3_decode_byte (stream, & (x))) != 0) ) { return ret; } \
4577 stream->dec_state = (nstate); \
4578 } while (0)
4579
4580#define OFFSET_CASE(expr,x,nstate) \
4581 do { \
4582 if ( (expr) && \
4583 ((ret = xd3_decode_offset (stream, & (x))) != 0) ) { return ret; } \
4584 stream->dec_state = (nstate); \
4585 } while (0)
4586
4587#define SIZE_CASE(expr,x,nstate) \
4588 do { \
4589 if ( (expr) && \
4590 ((ret = xd3_decode_size (stream, & (x))) != 0) ) { return ret; } \
4591 stream->dec_state = (nstate); \
4592 } while (0)
4593
4594#define SRCORTGT(x) (((x) & VCD_SRCORTGT) == VCD_SOURCE || \
4595 ((x) & VCD_SRCORTGT) == VCD_TARGET)
4596
4597 switch (stream->dec_state)
4598 {
4599 case DEC_VCHEAD:
4600 {
4601 if ((ret = xd3_decode_bytes (stream, stream->dec_magic, & stream->dec_magicbytes, 4))) { return ret; }
4602
4603 if (stream->dec_magic[0] != VCDIFF_MAGIC1 ||
4604 stream->dec_magic[1] != VCDIFF_MAGIC2 ||
4605 stream->dec_magic[2] != VCDIFF_MAGIC3)
4606 {
4607 stream->msg = "not a VCDIFF input";
4608 return EINVAL;
4609 }
4610
4611 if (stream->dec_magic[3] != 0)
4612 {
4613 stream->msg = "VCDIFF input version > 0 is not supported";
4614 return EINVAL;
4615 }
4616
4617 stream->dec_state = DEC_HDRIND;
4618 }
4619 case DEC_HDRIND:
4620 {
4621 if ((ret = xd3_decode_byte (stream, & stream->dec_hdr_ind))) { return ret; }
4622
4623 if ((stream->dec_hdr_ind & VCD_INVHDR) != 0)
4624 {
4625 stream->msg = "unrecognized header indicator bits set";
4626 return EINVAL;
4627 }
4628
4629 stream->dec_state = DEC_SECONDID;
4630 }
4631
4632 case DEC_SECONDID:
4633 /* Secondary compressor ID: only if VCD_SECONDARY is set */
4634 if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0)
4635 {
4636 BYTE_CASE (1, stream->dec_secondid, DEC_TABLEN);
4637
4638 switch (stream->dec_secondid)
4639 {
4640 case VCD_FGK_ID:
4641 FGK_CASE (stream);
4642 case VCD_DJW_ID:
4643 DJW_CASE (stream);
4644 default:
4645 stream->msg = "unknown secondary compressor ID";
4646 return EINVAL;
4647 }
4648 }
4649
4650 case DEC_TABLEN:
4651 /* Length of code table data: only if VCD_CODETABLE is set */
4652 SIZE_CASE ((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->dec_codetblsz, DEC_NEAR);
4653
4654 /* The codetblsz counts the two NEAR/SAME bytes */
4655 if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) {
4656 if (stream->dec_codetblsz <= 2) {
4657 stream->msg = "invalid code table size";
4658 return ENOMEM;
4659 }
4660 stream->dec_codetblsz -= 2;
4661 }
4662 case DEC_NEAR:
4663 /* Near modes: only if VCD_CODETABLE is set */
4664 BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->acache.s_near, DEC_SAME);
4665 case DEC_SAME:
4666 /* Same modes: only if VCD_CODETABLE is set */
4667 BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->acache.s_same, DEC_TABDAT);
4668 case DEC_TABDAT:
4669 /* Compressed code table data */
4670
4671 if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0)
4672 {
4673 /* Get the code table data. */
4674 if ((stream->dec_codetbl == NULL) &&
4675 (stream->dec_codetbl = xd3_alloc (stream, stream->dec_codetblsz, 1)) == NULL) { return ENOMEM; }
4676
4677 if ((ret = xd3_decode_bytes (stream, stream->dec_codetbl, & stream->dec_codetblbytes, stream->dec_codetblsz)))
4678 {
4679 return ret;
4680 }
4681
4682 if ((ret = xd3_apply_table_encoding (stream, stream->dec_codetbl, stream->dec_codetblbytes)))
4683 {
4684 return ret;
4685 }
4686 }
4687 else
4688 {
4689 /* Use the default table. */
4690 stream->acache.s_near = __rfc3284_code_table_desc.near_modes;
4691 stream->acache.s_same = __rfc3284_code_table_desc.same_modes;
4692 stream->code_table = xd3_rfc3284_code_table ();
4693 }
4694
4695 if ((ret = xd3_alloc_cache (stream))) { return ret; }
4696
4697 stream->dec_state = DEC_APPLEN;
4698
4699 case DEC_APPLEN:
4700 /* Length of application data */
4701 SIZE_CASE((stream->dec_hdr_ind & VCD_APPHEADER) != 0, stream->dec_appheadsz, DEC_APPDAT);
4702
4703 case DEC_APPDAT:
4704 /* Application data */
4705 if (stream->dec_hdr_ind & VCD_APPHEADER)
4706 {
4707 /* Note: we add an additional byte for padding, to allow 0-termination. */
4708 if ((stream->dec_appheader == NULL) &&
4709 (stream->dec_appheader = xd3_alloc (stream, stream->dec_appheadsz+1, 1)) == NULL) { return ENOMEM; }
4710
4711 stream->dec_appheader[stream->dec_appheadsz] = 0;
4712
4713 if ((ret = xd3_decode_bytes (stream, stream->dec_appheader, & stream->dec_appheadbytes, stream->dec_appheadsz)))
4714 {
4715 return ret;
4716 }
4717 }
4718
4719 stream->dec_hdrsize = stream->total_in;
4720 stream->dec_state = DEC_WININD;
4721
4722 case DEC_WININD:
4723 {
4724 /* Start of a window: the window indicator */
4725
4726 if ((ret = xd3_decode_byte (stream, & stream->dec_win_ind))) { return ret; }
4727
4728 stream->current_window = stream->dec_window_count;
4729
4730 if (XOFF_T_OVERFLOW (stream->dec_winstart, stream->dec_tgtlen))
4731 {
4732 stream->msg = "decoder file offset overflow";
4733 return EINVAL;
4734 }
4735
4736 stream->dec_winstart += stream->dec_tgtlen;
4737
4738 if ((stream->dec_win_ind & VCD_INVWIN) != 0)
4739 {
4740 stream->msg = "unrecognized window indicator bits set";
4741 return EINVAL;
4742 }
4743
4744 if ((ret = xd3_decode_init_window (stream))) { return ret; }
4745
4746 stream->dec_state = DEC_CPYLEN;
4747
4748 IF_DEBUG1 (P(RINT "--------- TARGET WINDOW %"Q"u ------------------\n", stream->current_window));
4749 }
4750
4751 case DEC_CPYLEN:
4752 /* Copy window length: only if VCD_SOURCE or VCD_TARGET is set */
4753 SIZE_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpylen, DEC_CPYOFF);
4754
4755 /* Set the initial, logical decoder position (HERE address) in dec_position. This
4756 * is set to just after the source/copy window, as we are just about to output the
4757 * first byte of target window. */
4758 stream->dec_position = stream->dec_cpylen;
4759
4760 case DEC_CPYOFF:
4761 /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */
4762 OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff, DEC_ENCLEN);
4763
4764 /* Copy offset and copy length may not overflow. */
4765 if (XOFF_T_OVERFLOW (stream->dec_cpyoff, stream->dec_cpylen))
4766 {
4767 stream->msg = "decoder copy window overflows a file offset";
4768 return EINVAL;
4769 }
4770
4771 /* Check copy window bounds: VCD_TARGET window may not exceed current position. */
4772 if ((stream->dec_win_ind & VCD_TARGET) &&
4773 (stream->dec_cpyoff + (xoff_t) stream->dec_cpylen > stream->dec_winstart))
4774 {
4775 stream->msg = "VCD_TARGET window out of bounds";
4776 return EINVAL;
4777 }
4778
4779 case DEC_ENCLEN:
4780 /* Length of the delta encoding */
4781 SIZE_CASE(1, stream->dec_enclen, DEC_TGTLEN);
4782 case DEC_TGTLEN:
4783 /* Length of target window */
4784 SIZE_CASE(1, stream->dec_tgtlen, DEC_DELIND);
4785
4786 /* Set the maximum decoder position, beyond which we should not decode any data.
4787 * This is the maximum value for dec_position. This may not exceed the size of a
4788 * usize_t. */
4789 if (USIZE_T_OVERFLOW (stream->dec_cpylen, stream->dec_tgtlen))
4790 {
4791 stream->msg = "decoder target window overflows a usize_t";
4792 return EINVAL;
4793 }
4794
4795 /* Check for malicious files. */
4796 if (stream->dec_tgtlen > XD3_HARDMAXWINSIZE)
4797 {
4798 stream->msg = "hard window size exceeded";
4799 return EINVAL;
4800 }
4801
4802 stream->dec_maxpos = stream->dec_cpylen + stream->dec_tgtlen;
4803
4804 case DEC_DELIND:
4805 /* Delta indicator */
4806 BYTE_CASE(1, stream->dec_del_ind, DEC_DATALEN);
4807
4808 if ((stream->dec_del_ind & VCD_INVDEL) != 0)
4809 {
4810 stream->msg = "unrecognized delta indicator bits set";
4811 return EINVAL;
4812 }
4813
4814 /* Delta indicator is only used with secondary compression. */
4815 if ((stream->dec_del_ind != 0) && (stream->sec_type == NULL))
4816 {
4817 stream->msg = "invalid delta indicator bits set";
4818 return EINVAL;
4819 }
4820
4821 /* Section lengths */
4822 case DEC_DATALEN:
4823 SIZE_CASE(1, stream->data_sect.size, DEC_INSTLEN);
4824 case DEC_INSTLEN:
4825 SIZE_CASE(1, stream->inst_sect.size, DEC_ADDRLEN);
4826 case DEC_ADDRLEN:
4827 SIZE_CASE(1, stream->addr_sect.size, DEC_CKSUM);
4828
4829 case DEC_CKSUM:
4830 /* Window checksum. */
4831 if ((stream->dec_win_ind & VCD_ADLER32) != 0)
4832 {
4833 int i;
4834
4835 if ((ret = xd3_decode_bytes (stream, stream->dec_cksum, & stream->dec_cksumbytes, 4))) { return ret; }
4836
4837 for (i = 0; i < 4; i += 1)
4838 {
4839 stream->dec_adler32 = (stream->dec_adler32 << 8) | stream->dec_cksum[i];
4840 }
4841 }
4842
4843 stream->dec_state = DEC_DATA;
4844
4845 /* Check dec_enclen for redundency, otherwise it is not really used. */
4846 {
4847 usize_t enclen_check = (1 + (xd3_sizeof_size (stream->dec_tgtlen) +
4848 xd3_sizeof_size (stream->data_sect.size) +
4849 xd3_sizeof_size (stream->inst_sect.size) +
4850 xd3_sizeof_size (stream->addr_sect.size)) +
4851 stream->data_sect.size +
4852 stream->inst_sect.size +
4853 stream->addr_sect.size +
4854 ((stream->dec_win_ind & VCD_ADLER32) ? 4 : 0));
4855
4856 if (stream->dec_enclen != enclen_check)
4857 {
4858 stream->msg = "incorrect encoding length (redundent)";
4859 return EINVAL;
4860 }
4861 }
4862
4863 /* Returning here gives the application a chance to inspect the header, skip the
4864 * window, etc. */
4865 if (stream->current_window == 0) { return XD3_GOTHEADER; }
4866 else { return XD3_WINSTART; }
4867
4868 case DEC_DATA:
4869 case DEC_INST:
4870 case DEC_ADDR:
4871 /* Next read the three sections. */
4872 if ((ret = xd3_decode_sections (stream))) { return ret; }
4873
4874 case DEC_EMIT:
4875
4876 /* To speed VCD_SOURCE block-address calculations, the source cpyoff_blocks and
4877 * cpyoff_blkoff are pre-computed. */
4878 if (stream->dec_win_ind & VCD_SOURCE)
4879 {
4880 xd3_source *src = stream->src;
4881
4882 if (src == NULL)
4883 {
4884 stream->msg = "source input required";
4885 return EINVAL;
4886 }
4887
4888 src->cpyoff_blocks = stream->dec_cpyoff / src->blksize;
4889 src->cpyoff_blkoff = stream->dec_cpyoff % src->blksize;
4890 }
4891
4892 /* xd3_decode_emit returns XD3_OUTPUT on every success. */
4893 if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT)
4894 {
4895 stream->total_out += (xoff_t) stream->avail_out;
4896 }
4897
4898 return ret;
4899
4900 case DEC_FINISH:
4901 {
4902 if (stream->dec_win_ind & VCD_TARGET)
4903 {
4904 if (stream->dec_lastwin == NULL)
4905 {
4906 stream->dec_lastwin = stream->next_out;
4907 stream->dec_lastspace = stream->space_out;
4908 }
4909 else
4910 {
4911 xd3_swap_uint8p (& stream->dec_lastwin, & stream->next_out);
4912 xd3_swap_usize_t (& stream->dec_lastspace, & stream->space_out);
4913 }
4914 }
4915
4916 stream->dec_lastlen = stream->dec_tgtlen;
4917 stream->dec_laststart = stream->dec_winstart;
4918 stream->dec_window_count += 1;
4919
4920 /* Note: the updates to dec_winstart & current_window are deferred until after the
4921 * next DEC_WININD byte is read. */
4922 stream->dec_state = DEC_WININD;
4923 return XD3_WINFINISH;
4924 }
4925
4926 default:
4927 stream->msg = "invalid state";
4928 return EINVAL;
4929 }
4930}
4931
4932/******************************************************************************************
4933 String matching helpers
4934 ******************************************************************************************/
4935
4936#if XD3_ENCODER
4937/* Do the initial xd3_string_match() checksum table setup. Allocations are delayed until
4938 * first use to avoid allocation sometimes (e.g., perfect matches, zero-length inputs). */
4939static int
4940xd3_string_match_init (xd3_stream *stream)
4941{
4942 const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS);
4943 const int DO_LARGE = (stream->src != NULL);
4944
4945 if (DO_SMALL)
4946 {
4947 /* Subsequent calls can return immediately after checking reset. */
4948 if (stream->small_table != NULL)
4949 {
4950 /* The target hash table is reinitialized once per window. */
4951 if (stream->small_reset)
4952 {
4953 stream->small_reset = 0;
4954 memset (stream->small_table, 0, sizeof (usize_t) * stream->small_hash.size);
4955 }
4956
4957 return 0;
4958 }
4959
4960 if ((stream->small_table = xd3_alloc0 (stream, stream->small_hash.size, sizeof (usize_t))) == NULL)
4961 {
4962 return ENOMEM;
4963 }
4964
4965 /* If there is a previous table needed. */
4966 if (stream->small_chain > 1)
4967 {
4968 xd3_slist *p, *m;
4969
4970 if ((stream->small_prev = xd3_alloc (stream, stream->sprevsz, sizeof (xd3_slist))) == NULL)
4971 {
4972 return ENOMEM;
4973 }
4974
4975 /* Initialize circular lists. */
4976 for (p = stream->small_prev, m = stream->small_prev + stream->sprevsz; p != m; p += 1)
4977 {
4978 p->next = p;
4979 p->prev = p;
4980 }
4981 }
4982 }
4983
4984 if (DO_LARGE && stream->large_table == NULL)
4985 {
4986 if ((stream->large_table = xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL)
4987 {
4988 return ENOMEM;
4989 }
4990 }
4991
4992 return 0;
4993}
4994
4995/* Called at every entrance to the string-match loop and each time
4996 * stream->input_position the value returned as *next_move_point.
4997 * This function computes more source checksums to advance the window. */
4998static int
4999xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
5000{
5001 // The input offset at which the source should ideally be scanned
5002 xoff_t logical_input_cksum_pos = stream->total_in + pos_in + stream->srcwin_size;
5003
5004 if (stream->srcwin_cksum_pos >= stream->src->size)
5005 {
5006 *next_move_point = USIZE_T_MAX;
5007 return 0;
5008 }
5009
5010 if (stream->srcwin_cksum_pos > logical_input_cksum_pos)
5011 {
5012 *next_move_point = stream->srcwin_cksum_pos - logical_input_cksum_pos;
5013 return 0;
5014 }
5015
5016 IF_DEBUG1 (P(RINT "[move_p1] size=%d T=%"Q"d S=%"Q"d\n", stream->srcwin_size,
5017 stream->total_in + pos_in, stream->srcwin_cksum_pos));
5018
5019 *next_move_point = pos_in + stream->srcwin_size;
5020
5021 if (stream->srcwin_cksum_pos == 0)
5022 {
5023 // Two windows to start with
5024 logical_input_cksum_pos += stream->srcwin_size;
5025 }
5026 else
5027 {
5028 // Otherwise double and add
5029 stream->srcwin_size = min(stream->srcwin_maxsz, stream->srcwin_size * 2);
5030 logical_input_cksum_pos += stream->srcwin_size;
5031 }
5032
5033 while (stream->srcwin_cksum_pos < logical_input_cksum_pos &&
5034 stream->srcwin_cksum_pos < stream->src->size)
5035 {
5036 xoff_t blkno = stream->srcwin_cksum_pos / stream->src->blksize;
5037 usize_t blkoff = stream->srcwin_cksum_pos % stream->src->blksize;
5038 usize_t onblk = xd3_bytes_on_srcblk (stream->src, blkno);
5039 int ret;
5040
5041 if (blkoff + stream->large_look >= onblk)
5042 {
5043 /* Next block */
5044 stream->srcwin_cksum_pos = (blkno * stream->src->blksize) + onblk;
5045 continue;
5046 }
5047
5048 if ((ret = xd3_getblk (stream, blkno)))
5049 {
5050 return ret;
5051 }
5052
5053 usize_t diff = logical_input_cksum_pos - stream->srcwin_cksum_pos;
5054
5055 onblk = min(onblk, diff + blkoff + stream->large_look);
5056
5057 while (blkoff + stream->large_look <= onblk)
5058 {
5059 uint32_t cksum = xd3_lcksum (stream->src->curblk + blkoff, stream->large_look);
5060 usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum);
5061
5062 stream->large_table[hval] = stream->srcwin_cksum_pos + HASH_CKOFFSET;
5063
5064 blkoff += stream->large_step;
5065 stream->srcwin_cksum_pos += stream->large_step;
5066 IF_DEBUG (stream->large_ckcnt += 1);
5067 }
5068 }
5069
5070 IF_DEBUG1 (P(RINT "[move_p2] size=%d T=%"Q"d S=%"Q"d next_move=%d\n", stream->srcwin_size,
5071 stream->total_in + pos_in, stream->srcwin_cksum_pos, *next_move_point));
5072
5073 return 0;
5074}
5075
5076/* This function sets up the stream->src fields srcbase, srclen. The call is delayed
5077 * until these values are needed to encode a copy address. At this point the decision has
5078 * to be made. */
5079static int
5080xd3_srcwin_setup (xd3_stream *stream)
5081{
5082 xd3_source *src = stream->src;
5083 xoff_t length;
5084
5085 IF_DEBUG1 (P(RINT "[srcwin setup:%"Q"u] iopt buffer %s\n",
5086 stream->current_window,
5087 stream->enc_state < ENC_FLUSH ? "overflow" : "fit"));
5088
5089 /* Check the undecided state. */
5090 XD3_ASSERT (src->srclen == 0 && src->srcbase == 0);
5091
5092 /* Avoid repeating this call. */
5093 stream->srcwin_decided = 1;
5094
5095 /* If the stream is flushing, then the iopt buffer was able to contain the complete
5096 * encoding. If no copies were issued no source window is actually needed. This
5097 * prevents the VCDIFF header from including source base/len. xd3_emit_hdr checks
5098 * for srclen == 0. */
5099 if (stream->enc_state == ENC_FLUSH && stream->match_maxaddr == 0)
5100 {
5101 goto done;
5102 }
5103
5104 /* Check for overflow, srclen is usize_t - this can't happen unless XD3_DEFAULT_SRCBACK
5105 * and related parameters are extreme - should use smaller windows. */
5106 length = stream->match_maxaddr - stream->match_minaddr;
5107
5108 if (length > (xoff_t) USIZE_T_MAX)
5109 {
5110 stream->msg = "source window length overflow (not 64bit)";
5111 return EINVAL;
5112 }
5113
5114 /* If ENC_FLUSH, then we know the exact source window to use because no more copies can
5115 * be issued. */
5116 if (stream->enc_state == ENC_FLUSH)
5117 {
5118 src->srcbase = stream->match_minaddr;
5119 src->srclen = (usize_t) length;
5120 XD3_ASSERT (src->srclen);
5121 goto done;
5122 }
5123
5124 /* Otherwise, we have to make a guess. More copies may still be issued, but we have to
5125 * decide the source window base and length now. */
5126 src->srcbase = stream->match_minaddr;
5127 src->srclen = max ((usize_t) length, stream->avail_in + (stream->avail_in >> 2));
5128 if (src->size < src->srcbase + (xoff_t) src->srclen)
5129 {
5130 /* Could reduce srcbase, as well. */
5131 src->srclen = src->size - src->srcbase;
5132 }
5133
5134 XD3_ASSERT (src->srclen);
5135 done:
5136 IF_DEBUG1 (P(RINT "[srcwin setup:%"Q"u] base %"Q"u size %u\n",
5137 stream->current_window,
5138 src->srcbase,
5139 src->srclen));
5140 /* Set the taroff. This convenience variable is used even when stream->src == NULL. */
5141 stream->taroff = src->srclen;
5142 return 0;
5143}
5144
5145/* Sets the bounding region for a newly discovered source match, prior to calling
5146 * xd3_source_extend_match(). This sets the match_maxfwd, match_maxback variables. Note:
5147 * srcpos is an absolute position (xoff_t) but the match_maxfwd, match_maxback variables
5148 * are usize_t. Returns 0 if the setup succeeds, or 1 if the source position lies outside
5149 * an already-decided srcbase/srclen window. */
5150static int
5151xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
5152{
5153 xd3_source *src = stream->src;
5154 usize_t greedy_or_not;
5155 xoff_t farthest_src;
5156
5157 stream->match_maxback = 0;
5158 stream->match_maxfwd = 0;
5159 stream->match_back = 0;
5160 stream->match_fwd = 0;
5161
5162 farthest_src = max(stream->srcwin_cksum_pos, srcpos);
5163
5164 XD3_ASSERT (stream->srcwin_maxsz > src->blksize);
5165
5166 /* This prevents the encoder from seeking back more than srcwin_maxsz. Using
5167 * srcwin_maxsz is incorrect. TODO: Possibly an new option here, how far back to
5168 * seek? */
5169 if (max_in == 0 ||
5170 farthest_src - srcpos > stream->srcwin_maxsz - src->blksize)
5171 {
5172 goto bad; // TODO! Note: this prevents catching the TODO/bug below
5173 }
5174
5175 /* TODO: check for boundary crossing */
5176
5177 /* Going backwards, the 1.5-pass algorithm allows some already-matched input may be
5178 * covered by a longer source match. The greedy algorithm does not allow this. */
5179 if (stream->flags & XD3_BEGREEDY)
5180 {
5181 /* The greedy algorithm allows backward matching to the last matched position. */
5182 greedy_or_not = xd3_iopt_last_matched (stream);
5183 }
5184 else
5185 {
5186 /* The 1.5-pass algorithm allows backward matching to go back as far as the
5187 * unencoded offset, which is updated as instructions pass out of the iopt buffer.
5188 * If this (default) is chosen, it means xd3_iopt_erase may be called to eliminate
5189 * instructions when a covering source match is found. */
5190 greedy_or_not = stream->unencoded_offset;
5191 }
5192
5193 /* Backward target match limit. */
5194 XD3_ASSERT (pos_in >= greedy_or_not);
5195 stream->match_maxback = pos_in - greedy_or_not;
5196
5197 /* Forward target match limit. */
5198 XD3_ASSERT (max_in > pos_in);
5199 stream->match_maxfwd = max_in - pos_in;
5200
5201 /* Now we take the source position into account. It depends whether the srclen/srcbase
5202 * have been decided yet. */
5203 if (stream->srcwin_decided == 0)
5204 {
5205 /* Unrestricted case: the match can cover the entire source, 0--src->size. We
5206 * compare the usize_t match_maxfwd/match_maxback against the xoff_t src->size/srcpos values
5207 * and take the min. */
5208 xoff_t srcavail;
5209
5210 if (srcpos < (xoff_t) stream->match_maxback)
5211 {
5212 stream->match_maxback = srcpos;
5213 }
5214
5215 srcavail = src->size - srcpos;
5216 if (srcavail < (xoff_t) stream->match_maxfwd)
5217 {
5218 stream->match_maxfwd = srcavail;
5219 }
5220
5221 goto good;
5222 }
5223
5224 /* Decided some source window. */
5225 XD3_ASSERT (src->srclen > 0);
5226
5227 /* Restricted case: fail if the srcpos lies outside the source window */
5228 if ((srcpos < src->srcbase) || (srcpos > (src->srcbase + (xoff_t) src->srclen)))
5229 {
5230 goto bad;
5231 }
5232 else
5233 {
5234 usize_t srcavail;
5235
5236 srcavail = (usize_t) (srcpos - src->srcbase);
5237 if (srcavail < stream->match_maxback)
5238 {
5239 stream->match_maxback = srcavail;
5240 }
5241
5242 srcavail = (usize_t) (src->srcbase + (xoff_t) src->srclen - srcpos);
5243 if (srcavail < stream->match_maxfwd) {
5244 stream->match_maxfwd = srcavail;
5245 }
5246
5247 goto good;
5248 }
5249
5250 good:
5251 stream->match_state = MATCH_BACKWARD;
5252 stream->match_srcpos = srcpos;
5253 return 0;
5254
5255 bad:
5256 stream->match_state = MATCH_SEARCHING;
5257 return 1;
5258}
5259
5260/* This function expands the source match backward and forward. It is reentrant, since
5261 * xd3_getblk may return XD3_GETSRCBLK, so most variables are kept in xd3_stream. There
5262 * are two callers of this function, the string_matching routine when a checksum match is
5263 * discovered, and xd3_encode_input whenever a continuing (or initial) match is suspected.
5264 * The two callers do different things with the input_position, thus this function leaves
5265 * that variable untouched. If a match is taken the resulting stream->match_fwd is left
5266 * non-zero. */
5267static int
5268xd3_source_extend_match (xd3_stream *stream)
5269{
5270 int ret;
5271 xd3_source *src = stream->src;
5272 xoff_t matchoff; /* matchoff is the current right/left-boundary of the source match being tested. */
5273 usize_t streamoff; /* streamoff is the current right/left-boundary of the input match being tested. */
5274 xoff_t tryblk; /* tryblk, tryoff are the block, offset position of matchoff */
5275 usize_t tryoff;
5276 usize_t tryrem; /* tryrem is the number of matchable bytes on the source block */
5277
5278 XD3_ASSERT (src != NULL);
5279
5280 /* Does it make sense to compute backward match AFTER forward match? */
5281 if (stream->match_state == MATCH_BACKWARD)
5282 {
5283 /* Note: this code is practically duplicated below, substituting
5284 * match_fwd/match_back and direction. Consolidate? */
5285 matchoff = stream->match_srcpos - stream->match_back;
5286 streamoff = pos_in - stream->match_back;
5287 tryblk = matchoff / src->blksize;
5288 tryoff = matchoff % src->blksize;
5289
5290 /* this loops backward over source blocks */
5291 while (stream->match_back < stream->match_maxback)
5292 {
5293 /* see if we're backing across a source block boundary */
5294 if (tryoff == 0)
5295 {
5296 tryoff = src->blksize;
5297 tryblk -= 1;
5298 }
5299
5300 if ((ret = xd3_getblk (stream, tryblk)))
5301 {
5302 /* could be a XD3_GETSRCBLK failure. */
5303 return ret;
5304 }
5305
5306 /* OPT: This code can be optimized. */
5307 for (tryrem = min (tryoff, stream->match_maxback - stream->match_back);
5308 tryrem != 0;
5309 tryrem -= 1, stream->match_back += 1)
5310 {
5311 if (src->curblk[tryoff-1] != stream->next_in[streamoff-1])
5312 {
5313 goto doneback;
5314 }
5315
5316 tryoff -= 1;
5317 streamoff -= 1;
5318 }
5319 }
5320
5321 doneback:
5322 stream->match_state = MATCH_FORWARD;
5323 }
5324
5325 XD3_ASSERT (stream->match_state == MATCH_FORWARD);
5326
5327 matchoff = stream->match_srcpos + stream->match_fwd;
5328 streamoff = pos_in + stream->match_fwd;
5329 tryblk = matchoff / src->blksize;
5330 tryoff = matchoff % src->blksize;
5331
5332 /* Note: practically the same code as backwards case above: same comments */
5333 while (stream->match_fwd < stream->match_maxfwd)
5334 {
5335 if ((ret = xd3_getblk (stream, tryblk)))
5336 {
5337 return ret;
5338 }
5339
5340 /* There's a good speedup for doing word comparions: see zlib. */
5341 for (tryrem = min(stream->match_maxfwd - stream->match_fwd,
5342 src->blksize - tryoff);
5343 tryrem != 0;
5344 tryrem -= 1, stream->match_fwd += 1)
5345 {
5346 if (src->curblk[tryoff] != stream->next_in[streamoff])
5347 {
5348 goto donefwd;
5349 }
5350
5351 tryoff += 1;
5352 streamoff += 1;
5353 }
5354
5355 if (tryoff == src->blksize)
5356 {
5357 tryoff = 0;
5358 tryblk += 1;
5359 }
5360 }
5361
5362 donefwd:
5363 stream->match_state = MATCH_SEARCHING;
5364
5365 /* Now decide whether to take the match. There are several ways to answer this
5366 * question and this is likely the best answer. There is currently an assertion
5367 * in xd3_iopt_erase that checks whether min_match works. This variable maintains
5368 * that every match exceeds the end of the previous match. However, it is
5369 * possible that match_back allows us to find a match that goes a long way back
5370 * but not enough forward. We could try an alternate approach, which might help
5371 * or it might just be extra complexity: eliminate the next match_fwd >= min_match
5372 * test and call xd3_iopt_erase right away. Erase instructions as far as it goes
5373 * back, then either remember what was deleted and re-insert it, or count on the
5374 * string-matching algorithm to find that match again. I think it is more
5375 * worthwhile to implement large_hash duplicates. */
5376 if (stream->match_fwd < min_match)
5377 {
5378 stream->match_fwd = 0;
5379 }
5380 else
5381 {
5382 usize_t total = stream->match_fwd + stream->match_back;
5383 xoff_t match_end;
5384
5385 /* Correct the variables to remove match_back from the equation. */
5386 stream->input_position -= stream->match_back;
5387 stream->match_srcpos -= stream->match_back;
5388 stream->match_fwd += stream->match_back;
5389 match_end = stream->match_srcpos + stream->match_fwd;
5390
5391 /* At this point we may have to erase any iopt-buffer instructions that are
5392 * fully covered by a backward-extending copy. */
5393 if (stream->match_back > 0)
5394 {
5395 xd3_iopt_erase (stream, pos_in, total);
5396 }
5397
5398 stream->match_back = 0;
5399
5400 /* Update ranges. The first source match occurs with both values set to 0. */
5401 if (stream->match_maxaddr == 0 ||
5402 stream->match_srcpos < stream->match_minaddr)
5403 {
5404 stream->match_minaddr = stream->match_srcpos;
5405 }
5406
5407 if (match_end > stream->match_maxaddr)
5408 {
5409 stream->match_maxaddr = match_end;
5410 }
5411
5412 IF_DEBUG1 ({
5413 static int x = 0;
5414 P(RINT "[source match:%d] <inp %"Q"u %"Q"u> <src %"Q"u %"Q"u> (%s) [ %u bytes ]\n",
5415 x++,
5416 stream->total_in + pos_in,
5417 stream->total_in + pos_in + stream->match_fwd,
5418 stream->match_srcpos,
5419 stream->match_srcpos + stream->match_fwd,
5420 (stream->total_in + stream->input_position == stream->match_srcpos) ? "same" : "diff",
5421 stream->match_fwd);
5422 });
5423
5424 if ((ret = xd3_found_match (stream,
5425 /* decoder position */ pos_in,
5426 /* length */ stream->match_fwd,
5427 /* address */ stream->match_srcpos,
5428 /* is_source */ 1)))
5429 {
5430 return ret;
5431 }
5432
5433 // TODO: ideally, we would update srcwin_cksum_pos to avoid computing checksums in
5434 // the middle of an already-discovered long match.
5435
5436 /* If the match ends with the available input: */
5437 if (pos_in + stream->match_fwd == max_in)
5438 {
5439 /* Setup continuing match for the next window. */
5440 stream->match_state = MATCH_TARGET;
5441 stream->match_srcpos += stream->match_fwd;
5442 }
5443 }
5444
5445 return 0;
5446}
5447
5448/* Update the small hash. Values in the small_table are offset by HASH_CKOFFSET (1) to
5449 * distinguish empty buckets the zero offset. This maintains the previous linked lists.
5450 * If owrite is true then this entry is replacing the existing record, otherwise it is
5451 * merely being called to promote the existing record in the hash bucket (for the same
5452 * address cache). */
5453static void
5454xd3_scksum_insert (xd3_stream *stream, usize_t inx, usize_t scksum, usize_t pos)
5455{
5456 /* If we are maintaining previous links. */
5457 if (stream->small_prev)
5458 {
5459 usize_t last_pos = stream->small_table[inx];
5460 xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask];
5461 xd3_slist *prev = pos_list->prev;
5462 xd3_slist *next = pos_list->next;
5463
5464 /* Assert link structure, update pos, cksum */
5465 XD3_ASSERT (prev->next == pos_list);
5466 XD3_ASSERT (next->prev == pos_list);
5467 pos_list->pos = pos;
5468 pos_list->scksum = scksum;
5469
5470 /* Subtract HASH_CKOFFSET and test for a previous offset. */
5471 if (last_pos-- != 0)
5472 {
5473 xd3_slist *last_list = & stream->small_prev[last_pos & stream->sprevmask];
5474 xd3_slist *last_next;
5475
5476 /* Verify existing entry. */
5477 SMALL_HASH_DEBUG1 (stream, stream->next_in + last_pos);
5478 SMALL_HASH_DEBUG2 (stream, stream->next_in + pos);
5479
5480 /* The two positions (mod sprevsz) may have the same checksum, making the old
5481 * and new entries the same. That is why the removal step is not before the
5482 * above if-stmt. */
5483 if (last_list != pos_list)
5484 {
5485 /* Remove current position from any list it may belong to. */
5486 next->prev = prev;
5487 prev->next = next;
5488
5489 /* The ordinary case, add current position to last_list. */
5490 last_next = last_list->next;
5491
5492 pos_list->next = last_next;
5493 pos_list->prev = last_list;
5494
5495 last_next->prev = pos_list;
5496 last_list->next = pos_list;
5497 }
5498 }
5499 else
5500 {
5501 /* Remove current position from any list it may belong to. */
5502 next->prev = prev;
5503 prev->next = next;
5504
5505 /* Re-initialize current position. */
5506 pos_list->next = pos_list;
5507 pos_list->prev = pos_list;
5508 }
5509 }
5510
5511 /* Enter the new position into the hash bucket. */
5512 stream->small_table[inx] = pos + HASH_CKOFFSET;
5513}
5514
5515#if XD3_DEBUG
5516static int
5517xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0,
5518 const uint8_t *inp_max, usize_t cmp_len)
5519{
5520 int i;
5521
5522 for (i = 0; i < cmp_len; i += 1)
5523 {
5524 XD3_ASSERT (ref0[i] == inp0[i]);
5525 }
5526
5527 if (inp0 + cmp_len < inp_max)
5528 {
5529 XD3_ASSERT (inp0[i] != ref0[i]);
5530 }
5531
5532 return 1;
5533}
5534#endif /* XD3_DEBUG */
5535
5536/* When the hash table indicates a possible small string match, it calls this routine to
5537 * find the best match. The first matching position is taken from the small_table,
5538 * HASH_CKOFFSET is subtracted to get the actual position. After checking that match, if
5539 * previous linked lists are in use (because stream->small_chain > 1), previous matches
5540 * are tested searching for the longest match. If (min_match > MIN_MATCH) then a lazy
5541 * match is in effect.
5542 *
5543 * OPT: This is by far the most expensive function. The slowdown is in part due to the data
5544 * structure it maintains, which is relatively more expensive than it needs to be (in
5545 * comparison to zlib) in order to support the PROMOTE decision, which is to prefer the
5546 * most recently used matching address of a certain string to aid the VCDIFF same cache.
5547 *
5548 * Weak reasoning? it's time to modularize this routine...? Let's say the PROMOTE
5549 * feature supported by this slow data structure contributes around 2% improvement in
5550 * compressed size, is there a better code table that doesn't use the SAME address cache,
5551 * for which the speedup-discount could produce a better encoding?
5552 */
5553static /*inline*/ usize_t
5554xd3_smatch (xd3_stream *stream, usize_t base, usize_t scksum, usize_t *match_offset)
5555{
5556 usize_t cmp_len;
5557 usize_t match_length = 0;
5558 usize_t chain = (min_match == MIN_MATCH ?
5559 stream->small_chain :
5560 stream->small_lchain);
5561 xd3_slist *current = NULL;
5562 xd3_slist *first = NULL;
5563 const uint8_t *inp_max = stream->next_in + max_in;
5564 const uint8_t *inp;
5565 const uint8_t *ref;
5566
5567 SMALL_HASH_STATS (usize_t search_cnt = 0);
5568 SMALL_HASH_DEBUG1 (stream, stream->next_in + pos_in);
5569 SMALL_HASH_STATS (stream->sh_searches += 1);
5570
5571 XD3_ASSERT (min_match + pos_in <= max_in);
5572
5573 base -= HASH_CKOFFSET;
5574
5575 /* Initialize the chain. */
5576 if (stream->small_prev != NULL)
5577 {
5578 first = current = & stream->small_prev[base & stream->sprevmask];
5579
5580 /* Check if current->pos is correct. */
5581 if (current->pos != base) { goto done; }
5582 }
5583
5584 again:
5585
5586 SMALL_HASH_STATS (search_cnt += 1);
5587
5588 /* For small matches, we can always go to the end-of-input because the matching position
5589 * must be less than the input position. */
5590 XD3_ASSERT (base < pos_in);
5591
5592 ref = stream->next_in + base;
5593 inp = stream->next_in + pos_in;
5594
5595 SMALL_HASH_DEBUG2 (stream, ref);
5596
5597 /* Expand potential match forward. */
5598 while (inp < inp_max && *inp == *ref)
5599 {
5600 ++inp;
5601 ++ref;
5602 }
5603
5604 cmp_len = inp - (stream->next_in + pos_in);
5605
5606 /* Verify correctness */
5607 XD3_ASSERT (xd3_check_smatch (stream->next_in + base, stream->next_in + pos_in,
5608 inp_max, cmp_len));
5609
5610 /* Update longest match */
5611 if (cmp_len > match_length)
5612 {
5613 ( match_length) = cmp_len;
5614 (*match_offset) = base;
5615
5616 /* Stop if we match the entire input or discover a long_enough match. */
5617 if (inp == inp_max || cmp_len >= stream->long_enough)
5618 {
5619 goto done;
5620 }
5621 }
5622
5623 /* If we have not reached the chain limit, see if there is another previous position. */
5624 if (current)
5625 {
5626 while (--chain != 0)
5627 {
5628 /* Calculate the next base offset. */
5629 current = current->prev;
5630 base = current->pos;
5631
5632 /* Stop if the next position was the first. Stop if the position is wrong
5633 * (because the lists are not re-initialized across input windows). Skip if the
5634 * scksum is wrong. */
5635 if (current != first && base < pos_in)
5636 {
5637 if (current->scksum != scksum)
5638 {
5639 continue;
5640 }
5641 goto again;
5642 }
5643 }
5644 }
5645
5646 done:
5647 SMALL_HASH_STATS (stream->sh_compares += search_cnt);
5648 return match_length;
5649}
5650
5651#if XD3_DEBUG
5652static void
5653xd3_verify_small_state (xd3_stream *stream,
5654 const uint8_t *inp,
5655 uint32_t x_cksum)
5656{
5657 uint32_t cksum = xd3_scksum (inp, stream->small_look);
5658
5659 XD3_ASSERT (cksum == x_cksum);
5660}
5661
5662static void
5663xd3_verify_large_state (xd3_stream *stream,
5664 const uint8_t *inp,
5665 uint32_t x_cksum)
5666{
5667 uint32_t cksum = xd3_lcksum (inp, stream->large_look);
5668
5669 XD3_ASSERT (cksum == x_cksum);
5670}
5671
5672static void
5673xd3_verify_run_state (xd3_stream *stream,
5674 const uint8_t *inp,
5675 int x_run_l,
5676 uint8_t x_run_c)
5677{
5678 int slook = stream->small_look;
5679 uint8_t run_c;
5680 int run_l = xd3_comprun (inp, slook, &run_c);
5681
5682 XD3_ASSERT (run_l == 0 || run_c == x_run_c);
5683 XD3_ASSERT (x_run_l > slook || run_l == x_run_l);
5684}
5685#endif /* XD3_DEBUG */
5686#endif /* XD3_ENCODER */
5687
5688/******************************************************************************************
5689 TEMPLATE pass
5690 ******************************************************************************************/
5691
5692#endif /* __XDELTA3_C_INLINE_PASS__ */
5693#ifdef __XDELTA3_C_TEMPLATE_PASS__
5694
5695#if XD3_ENCODER
5696
5697/******************************************************************************************
5698 Templates
5699 ******************************************************************************************/
5700
5701/* Template macros: less than 30 lines work. the template parameters appear as, e.g.,
5702 * SLOOK, MIN_MATCH, TRYLAZY, etc. */
5703#define XD3_TEMPLATE(x) XD3_TEMPLATE2(x,TEMPLATE)
5704#define XD3_TEMPLATE2(x,n) XD3_TEMPLATE3(x,n)
5705#define XD3_TEMPLATE3(x,n) x ## n
5706#define XD3_STRINGIFY(x) XD3_STRINGIFY2(x)
5707#define XD3_STRINGIFY2(x) #x
5708
5709static int XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream);
5710
5711static const xd3_smatcher XD3_TEMPLATE(__smatcher_) =
5712{
5713 XD3_STRINGIFY(TEMPLATE),
5714 XD3_TEMPLATE(xd3_string_match_),
5715#if SOFTCFG == 1
5716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5717#else
5718 LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, SSMATCH, TRYLAZY, MAXLAZY,
5719 LONGENOUGH, PROMOTE
5720#endif
5721};
5722
5723static int
5724XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream)
5725{
5726 /* TODO config: These next three variables should be statically compliled in various
5727 * scan_cfg configurations? */
5728 const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS);
5729 const int DO_LARGE = (stream->src != NULL);
5730 const int DO_RUN = (1);
5731
5732 const uint8_t *inp;
5733 uint32_t scksum = 0;
5734 uint32_t lcksum = 0;
5735 usize_t sinx;
5736 usize_t linx;
5737 uint8_t run_c;
5738 int run_l;
5739 int ret;
5740 usize_t match_length;
5741 usize_t match_offset; // Note: "may be unused" warnings are bogus
5742 usize_t next_move_point;
5743
5744 /* If there will be no compression due to settings or short input, skip it entirely. */
5745 if (! (DO_SMALL || DO_LARGE || DO_RUN) || pos_in + SLOOK > max_in) { goto loopnomore; }
5746
5747 if ((ret = xd3_string_match_init (stream))) { return ret; }
5748
5749 /* The restartloop label is reached when the incremental loop state needs to be
5750 * reset. */
5751 restartloop:
5752
5753 /* If there is not enough input remaining for any kind of match, skip it. */
5754 if (pos_in + SLOOK > max_in) { goto loopnomore; }
5755
5756 IF_DEBUG1 ({
5757 static int x = 0;
5758 P(RINT "[string match:%d] pos_in %d; \n",
5759 x++, pos_in);
5760 });
5761
5762 /* Now reset the incremental loop state: */
5763
5764 /* The min_match variable is updated to avoid matching the same lazy match over and over
5765 * again. For example, if you find a (small) match of length 9 at one position, you
5766 * will likely find a match of length 8 at the next position. */
5767 min_match = MIN_MATCH;
5768
5769 /* The current input byte. */
5770 inp = stream->next_in + pos_in;
5771
5772 /* Small match state. */
5773 if (DO_SMALL)
5774 {
5775 scksum = xd3_scksum (inp, SLOOK);
5776 }
5777
5778 /* Run state. */
5779 if (DO_RUN)
5780 {
5781 run_l = xd3_comprun (inp, SLOOK, & run_c);
5782 }
5783
5784 /* Large match state. We continue the loop even after not enough bytes for LLOOK
5785 * remain, so always check pos_in in DO_LARGE code. */
5786 if (DO_LARGE && (pos_in + LLOOK <= max_in))
5787 {
5788 /* Source window: next_move_point is the point that pos_in must reach before
5789 * computing more source checksum. */
5790 if ((ret = xd3_srcwin_move_point (stream, & next_move_point)))
5791 {
5792 return ret;
5793 }
5794
5795 lcksum = xd3_lcksum (inp, LLOOK);
5796 }
5797
5798 /* TRYLAZYLEN: True if a certain length match should be followed by lazy search. This
5799 * checks that LEN is shorter than MAXLAZY and that there is enough leftover data to
5800 * consider lazy matching. "Enough" is set to 2 since the next match will start at the
5801 * next offset, it must match two extra characters. */
5802#define TRYLAZYLEN(LEN,POS,MAX) ((TRYLAZY && (LEN) < MAXLAZY) && ((POS) + (LEN) <= (MAX) - 2))
5803
5804 /* HANDLELAZY: This statement is called each time an instruciton is emitted (three
5805 * cases). If the instruction is large enough, the loop is restarted, otherwise lazy
5806 * matching may ensue. */
5807#define HANDLELAZY(mlen) \
5808 if (TRYLAZYLEN ((mlen), pos_in, max_in)) \
5809 { min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \
5810 else \
5811 { pos_in += (mlen); goto restartloop; }
5812
5813 /* Now loop over one input byte at a time until a match is found... */
5814 for (;; inp += 1, pos_in += 1)
5815 {
5816 /* Now we try three kinds of string match in order of expense:
5817 * run, large match, small match. */
5818
5819 /* Expand the start of a RUN. The test for (run_l == SLOOK) avoids repeating this
5820 * check when we pass through a run area performing lazy matching. The run is only
5821 * expanded once when the min_match is first reached. If lazy matching is
5822 * performed, the run_l variable will remain inconsistent until the first
5823 * non-running input character is reached, at which time the run_l may then again
5824 * grow to SLOOK. */
5825 if (DO_RUN && run_l == SLOOK)
5826 {
5827 usize_t max_len = max_in - pos_in;
5828
5829 IF_DEBUG (xd3_verify_run_state (stream, inp, run_l, run_c));
5830
5831 while (run_l < max_len && inp[run_l] == run_c) { run_l += 1; }
5832
5833 /* Output a RUN instruction. */
5834 if (run_l >= min_match && run_l >= MIN_RUN)
5835 {
5836 if ((ret = xd3_emit_run (stream, pos_in, run_l, run_c))) { return ret; }
5837
5838 HANDLELAZY (run_l);
5839 }
5840 }
5841
5842 /* If there is enough input remaining. */
5843 if (DO_LARGE && (pos_in + LLOOK <= max_in))
5844 {
5845 if ((pos_in >= next_move_point) &&
5846 (ret = xd3_srcwin_move_point (stream, & next_move_point)))
5847 {
5848 return ret;
5849 }
5850
5851 linx = xd3_checksum_hash (& stream->large_hash, lcksum);
5852
5853 IF_DEBUG (xd3_verify_large_state (stream, inp, lcksum));
5854
5855 /* Note: To handle large checksum duplicates, this code should be rearranged to
5856 * resemble the small_match case more. But how much of the code can be truly
5857 * shared? The main difference is the need for xd3_source_extend_match to work
5858 * outside of xd3_string_match, in the case where inputs are identical. */
5859 if (unlikely (stream->large_table[linx] != 0))
5860 {
5861 /* the match_setup will fail if the source window has been decided and the
5862 * match lies outside it. You could consider forcing a window at this point
5863 * to permit a new source window. */
5864 if (xd3_source_match_setup (stream, stream->large_table[linx] - HASH_CKOFFSET) == 0)
5865 {
5866 if ((ret = xd3_source_extend_match (stream))) { return ret; }
5867
5868 /* Update stream position. match_fwd is zero if no match. */
5869 if (stream->match_fwd > 0)
5870 {
5871 HANDLELAZY (stream->match_fwd);
5872 }
5873 }
5874 }
5875 }
5876
5877 /* Small matches. */
5878 if (DO_SMALL)
5879 {
5880 sinx = xd3_checksum_hash (& stream->small_hash, scksum);
5881
5882 /* Verify incremental state in debugging mode. */
5883 IF_DEBUG (xd3_verify_small_state (stream, inp, scksum));
5884
5885 /* Search for the longest match */
5886 if (unlikely (stream->small_table[sinx] != 0))
5887 {
5888 match_length = xd3_smatch (stream,
5889 stream->small_table[sinx],
5890 scksum,
5891 & match_offset);
5892 }
5893 else
5894 {
5895 match_length = 0;
5896 }
5897
5898 /* Insert a hash for this string. */
5899 xd3_scksum_insert (stream, sinx, scksum, pos_in);
5900
5901 /* Promote the previous match address to head of the hash bucket. This is
5902 * intended to improve the same cache hit rate. */
5903 if (match_length != 0 && PROMOTE)
5904 {
5905 xd3_scksum_insert (stream, sinx, scksum, match_offset);
5906 }
5907
5908 /* Maybe output a COPY instruction */
5909 if (unlikely (match_length >= min_match))
5910 {
5911 IF_DEBUG1 ({
5912 static int x = 0;
5913 P(RINT "[target match:%d] <inp %u %u> <cpy %u %u> (-%d) [ %u bytes ]\n",
5914 x++,
5915 pos_in,
5916 pos_in + match_length,
5917 match_offset,
5918 match_offset + match_length,
5919 pos_in - match_offset,
5920 match_length);
5921 });
5922
5923 if ((ret = xd3_found_match (stream,
5924 /* decoder position */ pos_in,
5925 /* length */ match_length,
5926 /* address */ match_offset,
5927 /* is_source */ 0))) { return ret; }
5928
5929 /* SSMATCH option: search small matches: continue the incremental checksum
5930 * through the matched material. Only if not lazy matching. */
5931 if (SSMATCH && !TRYLAZYLEN (match_length, pos_in, max_in))
5932 {
5933 usize_t avail = max_in - SLOOK - pos_in;
5934 usize_t ml_m1 = match_length - 1;
5935 usize_t right;
5936 int aincr;
5937
5938 IF_DEBUG (usize_t nposi = pos_in + match_length);
5939
5940 /* Avail is the last offset we can compute an incremental cksum. If the
5941 * match length exceeds that offset then we are finished performing
5942 * incremental updates after this step. */
5943 if (ml_m1 < avail)
5944 {
5945 right = ml_m1;
5946 aincr = 1;
5947 }
5948 else
5949 {
5950 right = avail;
5951 aincr = 0;
5952 }
5953
5954 /* Compute incremental checksums within the match. */
5955 while (right > 0)
5956 {
5957 SMALL_CKSUM_UPDATE (scksum, inp, SLOOK);
5958 if (DO_LARGE && (pos_in + LLOOK < max_in)) {
5959 LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK);
5960 }
5961
5962 inp += 1;
5963 pos_in += 1;
5964 right -= 1;
5965 sinx = xd3_checksum_hash (& stream->small_hash, scksum);
5966
5967 IF_DEBUG (xd3_verify_small_state (stream, inp, scksum));
5968
5969 xd3_scksum_insert (stream, sinx, scksum, pos_in);
5970 }
5971
5972 if (aincr)
5973 {
5974 /* Keep searching... */
5975 if (DO_RUN) { run_l = xd3_comprun (inp+1, SLOOK-1, & run_c); }
5976 XD3_ASSERT (nposi == pos_in + 1);
5977 XD3_ASSERT (pos_in + SLOOK < max_in);
5978 min_match = MIN_MATCH;
5979 goto updatesure;
5980 }
5981 else
5982 {
5983 /* Not enough input for another match. */
5984 XD3_ASSERT (pos_in + SLOOK >= max_in);
5985 goto loopnomore;
5986 }
5987 }
5988
5989 /* Else case: copy instruction, but no SSMATCH. */
5990 HANDLELAZY (match_length);
5991 }
5992 }
5993
5994 /* The logic above prevents excess work during lazy matching by increasing min_match
5995 * to avoid smaller matches. Each time we advance pos_in by one, the minimum match
5996 * shortens as well. */
5997 if (min_match > MIN_MATCH)
5998 {
5999 min_match -= 1;
6000 }
6001
6002 updateone:
6003
6004 /* See if there are no more incremental cksums to compute. */
6005 if (pos_in + SLOOK == max_in)
6006 {
6007 goto loopnomore;
6008 }
6009
6010 updatesure:
6011
6012 /* Compute next RUN, CKSUM */
6013 if (DO_RUN) { NEXTRUN (inp[SLOOK]); }
6014 if (DO_SMALL) { SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); }
6015 if (DO_LARGE && (pos_in + LLOOK < max_in)) { LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK); }
6016 }
6017
6018 loopnomore:
6019 return 0;
6020}
6021#endif /* XD3_ENCODER */
6022#endif /* __XDELTA3_C_TEMPLATE_PASS__ */
diff --git a/xdelta3/xdelta3.h b/xdelta3/xdelta3.h
new file mode 100755
index 0000000..a35c9b0
--- /dev/null
+++ b/xdelta3/xdelta3.h
@@ -0,0 +1,1029 @@
1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2001, 2003, 2004, 2005, 2006. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19/* Welcome to Xdelta. If you want to know more about Xdelta, start by reading xdelta3.c.
20 * If you are ready to use the API, continue reading here. There are two interfaces --
21 * xd3_encode_input and xd3_decode_input -- plus a dozen or so related calls. This
22 * interface is styled after Zlib. */
23
24#ifndef _XDELTA3_H_
25#define _XDELTA3_H_
26
27#include <stdlib.h>
28#include <string.h>
29#include <sys/types.h>
30
31/**********************************************************************/
32
33/* Default configured value of stream->winsize. If the program supplies
34 * xd3_encode_input() with data smaller than winsize the stream will
35 * automatically buffer the input, otherwise the input buffer is used directly.
36 */
37#ifndef XD3_DEFAULT_WINSIZE
38#define XD3_DEFAULT_WINSIZE (1U << 18)
39#endif
40
41/* The source block size.
42 */
43#ifndef XD3_DEFAULT_SRCBLKSZ
44#define XD3_DEFAULT_SRCBLKSZ (1U << 18)
45#endif
46
47/* The source window starts with only a few checksums, then doubles up to
48 * XD3_DEFAULT_MAX_CKSUM_ADVANCE. */
49#ifndef XD3_DEFAULT_START_CKSUM_ADVANCE
50#define XD3_DEFAULT_START_CKSUM_ADVANCE 1024
51#endif
52
53/* TODO: There is no command-line flag to set this value. */
54#ifndef XD3_DEFAULT_MAX_CKSUM_ADVANCE
55#define XD3_DEFAULT_MAX_CKSUM_ADVANCE (1U << 23)
56#endif
57
58/* Default total size of the source window used in xdelta3-main.h */
59#ifndef XD3_DEFAULT_SRCWINSZ
60#define XD3_DEFAULT_SRCWINSZ (1U << 23)
61#endif
62
63/* Default configured value of stream->memsize. This dictates how much memory Xdelta will
64 * use for string-matching data structures. */
65#ifndef XD3_DEFAULT_MEMSIZE
66#define XD3_DEFAULT_MEMSIZE (1U << 18)
67#endif
68
69/* When Xdelta requests a memory allocation for certain buffers, it rounds up to units of
70 * at least this size. The code assumes (and asserts) that this is a power-of-two. */
71#ifndef XD3_ALLOCSIZE
72#define XD3_ALLOCSIZE (1U<<13)
73#endif
74
75/* The XD3_HARDMAXWINSIZE parameter is a safety mechanism to protect decoders against
76 * malicious files. The decoder will never decode a window larger than this. If the file
77 * specifies VCD_TARGET the decoder may require two buffers of this size. Rationale for
78 * choosing 22-bits as a maximum: this means that in the worst case, any VCDIFF address
79 * without a copy window will require 3 bytes to encode (7 bits per byte, HERE and SAME
80 * modes making every address within half the window away. */
81#ifndef XD3_HARDMAXWINSIZE
82#define XD3_HARDMAXWINSIZE (1U<<23)
83#endif
84
85/* The XD3_NODECOMPRESSSIZE parameter tells the xdelta main routine not to try to
86 * externally-decompress source inputs that are too large. Since these files must be
87 * seekable, they are decompressed to a temporary file location and the user may not wish
88 * for this. */
89#ifndef XD3_NODECOMPRESSSIZE
90#define XD3_NODECOMPRESSSIZE (1U<<24)
91#endif
92
93/* The IOPT_SIZE value sets the size of a buffer used to batch overlapping copy
94 * instructions before they are optimized by picking the best non-overlapping ranges. The
95 * larger this buffer, the longer a forced xd3_srcwin_setup() decision is held off. */
96#ifndef XD3_DEFAULT_IOPT_SIZE
97#define XD3_DEFAULT_IOPT_SIZE 128
98#endif
99
100/* The maximum distance backward to search for small matches */
101#ifndef XD3_DEFAULT_SPREVSZ
102#define XD3_DEFAULT_SPREVSZ (1U << 16)
103#endif
104
105/* Sizes and addresses within VCDIFF windows are represented as usize_t
106 *
107 * For source-file offsets and total file sizes, total input and output counts, the xoff_t
108 * type is used. The decoder and encoder generally check for overflow of the xoff_t size,
109 * and this is tested at the 32bit boundary [xdelta3-test.h].
110 */
111#ifndef _WIN32
112typedef unsigned int usize_t;
113typedef u_int8_t uint8_t;
114typedef u_int16_t uint16_t;
115typedef u_int32_t uint32_t;
116typedef u_int64_t uint64_t;
117#else
118#include <windows.h>
119#define INLINE
120typedef unsigned int uint;
121typedef unsigned int usize_t
122typedef unsigned char uint8_t;
123typedef unsigned short uint16_t;
124typedef unsigned long uint32_t;
125typedef ULONGLONG uint64_t;
126#endif
127
128#define SIZEOF_USIZE_T 4
129
130#ifndef XD3_USE_LARGEFILE64
131#define XD3_USE_LARGEFILE64 1
132#endif
133
134#if XD3_USE_LARGEFILE64
135#define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops, ... ? */
136typedef uint64_t xoff_t;
137#define SIZEOF_XOFF_T 8
138#else
139typedef uint32_t xoff_t;
140#define SIZEOF_XOFF_T 4
141#endif
142
143#define USE_UINT32 (SIZEOF_USIZE_T == 4 || SIZEOF_XOFF_T == 4 || REGRESSION_TEST)
144#define USE_UINT64 (SIZEOF_USIZE_T == 8 || SIZEOF_XOFF_T == 8 || REGRESSION_TEST)
145
146/**********************************************************************/
147
148#ifndef INLINE
149#define INLINE inline
150#endif
151
152/* Whether to build the encoder, otherwise only build the decoder. */
153#ifndef XD3_ENCODER
154#define XD3_ENCODER 1
155#endif
156
157/* The code returned when main() fails, also defined in system includes. */
158#ifndef EXIT_FAILURE
159#define EXIT_FAILURE 1
160#endif
161
162/* REGRESSION TEST enables the "xdelta3 test" command, which runs a series of self-tests. */
163#ifndef REGRESSION_TEST
164#define REGRESSION_TEST 0
165#endif
166
167/* XD3_DEBUG=1 enables assertions and various statistics. Levels > 1 enable some
168 * additional output only useful during development and debugging. */
169#ifndef XD3_DEBUG
170#define XD3_DEBUG 0
171#endif
172
173#ifndef PYTHON_MODULE
174#define PYTHON_MODULE 0
175#endif
176
177/* There are three string matching functions supplied: one fast, one slow (default), and
178 * one soft-configurable. To disable any of these, use the following definitions. */
179#ifndef XD3_BUILD_SLOW
180#define XD3_BUILD_SLOW 1
181#endif
182#ifndef XD3_BUILD_FAST
183#define XD3_BUILD_FAST 1
184#endif
185#ifndef XD3_BUILD_SOFT
186#define XD3_BUILD_SOFT 1
187#endif
188
189#if XD3_DEBUG
190#include <stdio.h>
191#endif
192
193/* XPRINT. Debug output and VCDIFF_TOOLS functions report to stderr. I have used an
194 * irregular style to abbreviate [fprintf(stderr, "] as [P(RINT "]. */
195#define P fprintf
196#define RINT stderr,
197
198typedef struct _xd3_stream xd3_stream;
199typedef struct _xd3_source xd3_source;
200typedef struct _xd3_hash_cfg xd3_hash_cfg;
201typedef struct _xd3_smatcher xd3_smatcher;
202typedef struct _xd3_rinst xd3_rinst;
203typedef struct _xd3_dinst xd3_dinst;
204typedef struct _xd3_hinst xd3_hinst;
205typedef struct _xd3_rpage xd3_rpage;
206typedef struct _xd3_addr_cache xd3_addr_cache;
207typedef struct _xd3_output xd3_output;
208typedef struct _xd3_desect xd3_desect;
209typedef struct _xd3_iopt_buf xd3_iopt_buf;
210typedef struct _xd3_rlist xd3_rlist;
211typedef struct _xd3_sec_type xd3_sec_type;
212typedef struct _xd3_sec_cfg xd3_sec_cfg;
213typedef struct _xd3_sec_stream xd3_sec_stream;
214typedef struct _xd3_config xd3_config;
215typedef struct _xd3_code_table_desc xd3_code_table_desc;
216typedef struct _xd3_code_table_sizes xd3_code_table_sizes;
217typedef struct _xd3_slist xd3_slist;
218
219/* The stream configuration has three callbacks functions, all of which may be supplied
220 * with NULL values. If config->getblk is provided as NULL, the stream returns
221 * XD3_GETSRCBLK. */
222
223typedef void* (xd3_alloc_func) (void *opaque,
224 usize_t items,
225 usize_t size);
226typedef void (xd3_free_func) (void *opaque,
227 void *address);
228
229typedef int (xd3_getblk_func) (xd3_stream *stream,
230 xd3_source *source,
231 xoff_t blkno);
232
233/* These are internal functions to delay construction of encoding tables and support
234 * alternate code tables. See the comments & code enabled by GENERIC_ENCODE_TABLES. */
235
236typedef const xd3_dinst* (xd3_code_table_func) (void);
237typedef int (xd3_comp_table_func) (xd3_stream *stream, const uint8_t **data, usize_t *size);
238
239
240/* Some junk. */
241
242#ifndef XD3_ASSERT
243#if XD3_DEBUG
244#define XD3_ASSERT(x) \
245 do { if (! (x)) { P(RINT "%s:%d: XD3 assertion failed: %s\n", __FILE__, __LINE__, #x); \
246 abort (); } } while (0)
247#else
248#define XD3_ASSERT(x) (void)0
249#endif
250#endif
251
252#ifdef __GNUC__
253/* As seen on linux-kernel. */
254#ifndef max
255#define max(x,y) ({ \
256 const typeof(x) _x = (x); \
257 const typeof(y) _y = (y); \
258 (void) (&_x == &_y); \
259 _x > _y ? _x : _y; })
260#endif
261
262#ifndef min
263#define min(x,y) ({ \
264 const typeof(x) _x = (x); \
265 const typeof(y) _y = (y); \
266 (void) (&_x == &_y); \
267 _x < _y ? _x : _y; })
268#endif
269#else
270#ifndef max
271#define max(x,y) ((x) < (y) ? (y) : (x))
272#endif
273#ifndef min
274#define min(x,y) ((x) < (y) ? (x) : (y))
275#endif
276#endif
277
278/******************************************************************************************
279 PUBLIC ENUMS
280 ******************************************************************************************/
281
282/* These are the five ordinary status codes returned by the xd3_encode_input() and
283 * xd3_decode_input() state machines. */
284typedef enum {
285
286 /* An application must be prepared to handle these five return values from either
287 * xd3_encode_input or xd3_decode_input, except in the case of no-source compression, in
288 * which case XD3_GETSRCBLK is never returned. More detailed comments for these are
289 * given in xd3_encode_input and xd3_decode_input comments, below. */
290 XD3_INPUT = -17703, /* need input */
291 XD3_OUTPUT = -17704, /* have output */
292 XD3_GETSRCBLK = -17705, /* need a block of source input (with no xd3_getblk function),
293 * a chance to do non-blocking read. */
294 XD3_GOTHEADER = -17706, /* (decode-only) after the initial VCDIFF & first window header */
295 XD3_WINSTART = -17707, /* notification: returned before a window is processed, giving a
296 * chance to XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that window. */
297 XD3_WINFINISH = -17708, /* notification: returned after encode/decode & output for a window */
298
299} xd3_rvalues;
300
301/* special values in config->flags */
302typedef enum
303{
304 XD3_JUST_HDR = (1 << 1), /* used by VCDIFF tools, see xdelta3-main.h. */
305 XD3_SKIP_WINDOW = (1 << 2), /* used by VCDIFF tools, see xdelta3-main.h. */
306 XD3_SKIP_EMIT = (1 << 3), /* used by VCDIFF tools, see xdelta3-main.h. */
307 XD3_FLUSH = (1 << 4), /* flush the stream buffer to prepare for xd3_stream_close(). */
308
309 XD3_SEC_DJW = (1 << 5), /* use DJW static huffman */
310 XD3_SEC_FGK = (1 << 6), /* use FGK adaptive huffman */
311 XD3_SEC_TYPE = (XD3_SEC_DJW | XD3_SEC_FGK),
312
313 XD3_SEC_NODATA = (1 << 7), /* disable secondary compression of the data section. */
314 XD3_SEC_NOINST = (1 << 8), /* disable secondary compression of the inst section. */
315 XD3_SEC_NOADDR = (1 << 9), /* disable secondary compression of the addr section (which is most random). */
316
317 XD3_SEC_OTHER = (XD3_SEC_NODATA | XD3_SEC_NOINST | XD3_SEC_NOADDR),
318
319 XD3_ADLER32 = (1 << 10), /* enable checksum computation in the encoder. */
320 XD3_ADLER32_NOVER = (1 << 11), /* disable checksum verification in the decoder. */
321
322 XD3_ALT_CODE_TABLE = (1 << 12), /* for testing the alternate code table encoding. */
323
324 XD3_NOCOMPRESS = (1 << 13), /* disable ordinary data compression feature,
325 * only search the source, not the target. */
326 XD3_BEGREEDY = (1 << 14), /* disable the "1.5-pass algorithm", instead use
327 * greedy matching. Greedy is off by default. */
328} xd3_flags;
329
330/* The values of this enumeration are set in xd3_config using the smatch_cfg variable. It
331 * can be set to slow, fast, soft, or default. The fast and slow setting uses preset,
332 * hardcoded parameters and the soft setting is accompanied by user-supplied parameters.
333 * If the user supplies 'default' the code selects one of the available string matchers.
334 * Due to compile-time settings (see XD3_SLOW_SMATCHER, XD3_FAST_SMATCHER,
335 * XD3_SOFT_SMATCHER variables), not all options may be available. */
336typedef enum
337{
338 XD3_SMATCH_DEFAULT = 0,
339 XD3_SMATCH_SLOW = 1,
340 XD3_SMATCH_FAST = 2,
341 XD3_SMATCH_SOFT = 3,
342} xd3_smatch_cfg;
343
344/******************************************************************************************
345 PRIVATE ENUMS
346 ******************************************************************************************/
347
348/* stream->match_state is part of the xd3_encode_input state machine for source matching:
349 *
350 * 1. the XD3_GETSRCBLK block-read mechanism means reentrant matching
351 * 2. this state spans encoder windows: a match and end-of-window will continue in the next
352 * 3. the initial target byte and source byte are a presumed match, to avoid some computation
353 * in case the inputs are identical.
354 */
355typedef enum {
356
357 MATCH_TARGET = 0, /* in this state, attempt to match the start of the target with the
358 * previously set source address (initially 0). */
359 MATCH_BACKWARD = 1, /* currently expanding a match backward in the source/target. */
360 MATCH_FORWARD = 2, /* currently expanding a match forward in the source/target. */
361 MATCH_SEARCHING = 3, /* currently searching for a match. */
362
363} xd3_match_state;
364
365/* The xd3_encode_input state machine steps through these states in the following order.
366 * The matcher is reentrant and returns XD3_INPUT whenever it requires more data. After
367 * receiving XD3_INPUT, if the application reads EOF it should call xd3_stream_close().
368 */
369typedef enum {
370
371 ENC_INIT = 0, /* xd3_encode_input has never been called. */
372 ENC_INPUT = 1, /* waiting for xd3_avail_input () to be called. */
373 ENC_SEARCH = 2, /* currently searching for matches. */
374 ENC_FLUSH = 3, /* currently emitting output. */
375 ENC_POSTOUT = 4, /* after an output section. */
376 ENC_POSTWIN = 5, /* after all output sections. */
377 ENC_ABORTED = 6, /* abort. */
378} xd3_encode_state;
379
380/* The xd3_decode_input state machine steps through these states in the following order.
381 * The matcher is reentrant and returns XD3_INPUT whenever it requires more data. After
382 * receiving XD3_INPUT, if the application reads EOF it should call xd3_stream_close().
383 *
384 * 0-8: the VCDIFF header
385 * 9-18: the VCDIFF window header
386 * 19-21: the three primary sections: data (which I think should have gone last), inst, addr
387 * 22: producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK,
388 * 23: return XD3_WINFINISH, set state=9 to decode more input
389 */
390typedef enum {
391
392 DEC_VCHEAD = 0, /* VCDIFF header */
393 DEC_HDRIND = 1, /* header indicator */
394
395 DEC_SECONDID = 2, /* secondary compressor ID */
396
397 DEC_TABLEN = 3, /* code table length */
398 DEC_NEAR = 4, /* code table near */
399 DEC_SAME = 5, /* code table same */
400 DEC_TABDAT = 6, /* code table data */
401
402 DEC_APPLEN = 7, /* application data length */
403 DEC_APPDAT = 8, /* application data */
404
405 DEC_WININD = 9, /* window indicator */
406
407 DEC_CPYLEN = 10, /* copy window length */
408 DEC_CPYOFF = 11, /* copy window offset */
409
410 DEC_ENCLEN = 12, /* length of delta encoding */
411 DEC_TGTLEN = 13, /* length of target window */
412 DEC_DELIND = 14, /* delta indicator */
413
414 DEC_DATALEN = 15, /* length of ADD+RUN data */
415 DEC_INSTLEN = 16, /* length of instruction data */
416 DEC_ADDRLEN = 17, /* length of address data */
417
418 DEC_CKSUM = 18, /* window checksum */
419
420 DEC_DATA = 19, /* data section */
421 DEC_INST = 20, /* instruction section */
422 DEC_ADDR = 21, /* address section */
423
424 DEC_EMIT = 22, /* producing data */
425
426 DEC_FINISH = 23, /* window finished */
427
428 DEC_ABORTED = 24, /* xd3_abort_stream */
429} xd3_decode_state;
430
431/* An application never sees these internal codes: */
432typedef enum {
433 XD3_NOSECOND = -17708, /* when secondary compression finds no improvement. */
434} xd3_pvalues;
435
436/******************************************************************************************
437 internal types
438 ******************************************************************************************/
439
440/* instruction lists used in the IOPT buffer */
441struct _xd3_rlist
442{
443 xd3_rlist *next;
444 xd3_rlist *prev;
445};
446
447/* the raw encoding of an instruction used in the IOPT buffer */
448struct _xd3_rinst
449{
450 uint8_t type;
451 uint8_t xtra;
452 uint8_t code1;
453 uint8_t code2;
454 usize_t pos;
455 usize_t size;
456 xoff_t addr;
457 xd3_rlist link;
458};
459
460/* the code-table form of an single- or double-instruction */
461struct _xd3_dinst
462{
463 uint8_t type1;
464 uint8_t size1;
465 uint8_t type2;
466 uint8_t size2;
467};
468
469/* the decoded form of a single (half) instruction. */
470struct _xd3_hinst
471{
472 uint8_t type;
473 usize_t size;
474 usize_t addr;
475};
476
477/* used by the encoder to buffer output in sections. list of blocks. */
478struct _xd3_output
479{
480 uint8_t *base;
481 usize_t next;
482 usize_t avail;
483 xd3_output *next_page;
484};
485
486/* the VCDIFF address cache, see the RFC */
487struct _xd3_addr_cache
488{
489 uint s_near;
490 uint s_same;
491 usize_t next_slot; /* the circular index for near */
492 usize_t *near_array; /* array of size s_near */
493 usize_t *same_array; /* array of size s_same*256 */
494};
495
496/* the IOPT buffer has a used list of (ordered) instructions, possibly overlapping in
497 * target addresses, awaiting a flush */
498struct _xd3_iopt_buf
499{
500 xd3_rlist used;
501 xd3_rlist free;
502 xd3_rinst *buffer;
503};
504
505/* This is the record of a pre-compiled configuration, a subset of xd3_config. Keep them
506 * in sync! The user never sees this structure. Note: update XD3_SOFTCFG_VARCNT when
507 * changing. */
508struct _xd3_smatcher
509{
510 const char *name;
511 int (*string_match) (xd3_stream *stream);
512 uint large_look;
513 uint large_step;
514 uint small_look;
515 uint small_chain;
516 uint small_lchain;
517 uint ssmatch;
518 uint try_lazy;
519 uint max_lazy;
520 uint long_enough;
521 uint promote;
522};
523
524/* hash table size & power-of-two hash function. */
525struct _xd3_hash_cfg
526{
527 usize_t size;
528 usize_t shift;
529 usize_t mask;
530};
531
532/* a hash-chain link in the small match table, embedded with position and checksum */
533struct _xd3_slist
534{
535 xd3_slist *next;
536 xd3_slist *prev;
537 usize_t pos;
538 usize_t scksum;
539};
540
541/* a decoder section (data, inst, or addr). there is an optimization to avoid copying
542 * these sections if all the input is available, related to the copied field below.
543 * secondation compression uses the copied2 field. */
544struct _xd3_desect
545{
546 const uint8_t *buf;
547 const uint8_t *buf_max;
548 usize_t size;
549 usize_t pos;
550 uint8_t *copied1;
551 usize_t alloc1;
552 uint8_t *copied2;
553 usize_t alloc2;
554};
555
556/******************************************************************************************
557 public types
558 ******************************************************************************************/
559
560/* Settings for the secondary compressor. */
561struct _xd3_sec_cfg
562{
563 int data_type; /* Which section. (set automatically) */
564 int ngroups; /* Number of DJW Huffman groups. */
565 int sector_size; /* Sector size. */
566 int inefficient; /* If true, ignore efficiency check [avoid XD3_NOSECOND]. */
567};
568
569/* This is the user-visible stream configuration. */
570struct _xd3_config
571{
572 usize_t memsize; /* How much memory Xdelta may allocate */
573 usize_t winsize; /* The encoder window size. */
574 usize_t sprevsz; /* How far back small string matching goes */
575 usize_t iopt_size; /* entries in the instruction-optimizing buffer */
576
577 usize_t srcwin_size; /* Initial size of the source-window lookahead */
578 usize_t srcwin_maxsz; /* srcwin_size grows by a factor of 2 when no matches are found */
579
580 xd3_getblk_func *getblk; /* The three callbacks. */
581 xd3_alloc_func *alloc;
582 xd3_free_func *freef;
583 void *opaque; /* Not used. */
584 int flags; /* stream->flags are initialized from xd3_config &
585 * never modified by the library. Use xd3_set_flags
586 * to modify flags settings mid-stream. */
587
588 xd3_sec_cfg sec_data; /* Secondary compressor config: data */
589 xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */
590 xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */
591
592 xd3_smatch_cfg smatch_cfg; /* See enum: use fields below for soft config */
593 uint large_look; /* large string lookahead (i.e., hashed chars) */
594 uint large_step; /* large string interval */
595 uint small_look; /* small string lookahead (i.e., hashed chars) */
596 uint small_chain; /* small string number of previous matches to try */
597 uint small_lchain; /* small string number of previous matches to try, when a lazy match */
598 uint ssmatch; /* boolean: insert checksums for matched strings */
599 uint try_lazy; /* boolean: whether lazy instruction optimization is attempted */
600 uint max_lazy; /* size of smallest match that will disable lazy matching */
601 uint long_enough; /* size of smallest match long enough to discontinue string matching. */
602 uint promote; /* whether to promote matches in the hash chain */
603};
604
605/* The primary source file object. You create one of these objects and initialize the first
606 * four fields. This library maintains the next 5 fields. The configured getblk implementation is
607 * responsible for setting the final 3 fields when called (and/or when XD3_GETSRCBLK is returned).
608 */
609struct _xd3_source
610{
611 /* you set */
612 xoff_t size; /* size of this source */
613 usize_t blksize; /* block size */
614 const char *name; /* its name, for debug/print purposes */
615 void *ioh; /* opaque handle */
616
617 /* xd3 sets */
618 usize_t srclen; /* length of this source window */
619 xoff_t srcbase; /* offset of this source window in the source itself */
620 xoff_t blocks; /* the total number of blocks in this source */
621 usize_t cpyoff_blocks; /* offset of copy window in blocks */
622 usize_t cpyoff_blkoff; /* offset of copy window in blocks, remainder */
623 xoff_t getblkno; /* request block number: xd3 sets current getblk request */
624
625 /* getblk sets */
626 xoff_t curblkno; /* current block number: client sets after getblk request */
627 usize_t onblk; /* number of bytes on current block: client sets, xd3 verifies */
628 const uint8_t *curblk; /* current block array: client sets after getblk request */
629};
630
631/* The primary xd3_stream object, used for encoding and decoding. You may access only two
632 * fields: avail_out, next_out. Use the methods above to operate on xd3_stream. */
633struct _xd3_stream
634{
635 /* input state */
636 const uint8_t *next_in; /* next input byte */
637 usize_t avail_in; /* number of bytes available at next_in */
638 xoff_t total_in; /* how many bytes in */
639
640 /* output state */
641 uint8_t *next_out; /* next output byte */
642 usize_t avail_out; /* number of bytes available at next_out */
643 usize_t space_out; /* total out space */
644 xoff_t current_window; /* number of windows encoded/decoded */
645 xoff_t total_out; /* how many bytes out */
646
647 /* to indicate an error, xd3 sets */
648 const char *msg; /* last error message, NULL if no error */
649
650 /* source configuration */
651 xd3_source *src; /* source array */
652
653 /* encoder memory configuration */
654 usize_t winsize; /* suggested window size */
655 usize_t memsize; /* memory size parameter */
656 usize_t sprevsz; /* small string, previous window size (power of 2) */
657 usize_t sprevmask; /* small string, previous window size mask */
658 uint iopt_size;
659
660 /* general configuration */
661 xd3_getblk_func *getblk; /* set nxtblk, nxtblkno to scanblkno */
662 xd3_alloc_func *alloc; /* malloc function */
663 xd3_free_func *free; /* free function */
664 void* opaque; /* private data object passed to alloc, free, and getblk */
665 int flags; /* various options */
666 int aborted;
667
668 /* secondary compressor configuration */
669 xd3_sec_cfg sec_data; /* Secondary compressor config: data */
670 xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */
671 xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */
672
673 /* fields common to xd3_stream_config, xd3_smatcher */
674 uint large_look;
675 uint large_step;
676 uint small_look;
677 uint small_chain;
678 uint small_lchain;
679 uint ssmatch;
680 uint try_lazy;
681 uint max_lazy;
682 uint long_enough;
683 uint promote;
684 uint srcwin_size;
685 uint srcwin_maxsz;
686 int (*string_match) (xd3_stream *stream);
687
688 usize_t *large_table; /* table of large checksums */
689 xd3_hash_cfg large_hash; /* large hash config */
690
691 usize_t *small_table; /* table of small checksums */
692 xd3_slist *small_prev; /* table of previous offsets, circular linked list (no sentinel) */
693 int small_reset; /* true if small table should be reset */
694
695 xd3_hash_cfg small_hash; /* small hash config */
696
697 xd3_addr_cache acache; /* the vcdiff address cache */
698
699 xd3_encode_state enc_state; /* state of the encoder */
700
701 usize_t taroff; /* base offset of the target input */
702 usize_t input_position; /* current input position */
703 usize_t min_match; /* current minimum match length, avoids redundent matches */
704 usize_t unencoded_offset; /* current input, first unencoded offset. this value is <= the first
705 * instruction's position in the iopt buffer, if there is at least one
706 * match in the buffer. */
707
708 // SRCWIN
709 // these variables plus srcwin_size, srcwin_maxsz above (set by config)
710 int srcwin_decided; /* boolean: true if the srclen,srcbase have been decided. */
711 xoff_t srcwin_cksum_pos; /* Source checksum position */
712
713 // MATCH
714 xd3_match_state match_state; /* encoder match state */
715 xoff_t match_srcpos; /* current match source position relative to srcbase */
716 xoff_t match_minaddr; /* smallest matching address to set window params
717 * (reset each window xd3_encode_reset) */
718 xoff_t match_maxaddr; /* largest matching address to set window params
719 * (reset each window xd3_encode_reset) */
720 usize_t match_back; /* match extends back so far */
721 usize_t match_maxback; /* match extends back maximum */
722 usize_t match_fwd; /* match extends forward so far */
723 usize_t match_maxfwd; /* match extends forward maximum */
724
725 uint8_t *buf_in; /* for saving buffered input */
726 usize_t buf_avail; /* amount of saved input */
727 const uint8_t *buf_leftover; /* leftover content of next_in (i.e., user's buffer) */
728 usize_t buf_leftavail; /* amount of leftover content */
729
730 xd3_output *enc_current; /* current output buffer */
731 xd3_output *enc_free; /* free output buffers */
732 xd3_output *enc_heads[4]; /* array of encoded outputs: head of chain */
733 xd3_output *enc_tails[4]; /* array of encoded outputs: tail of chain */
734
735 xd3_iopt_buf iopt; /* instruction optimizing buffer */
736 xd3_rinst *iout; /* next single instruction */
737
738 const uint8_t *enc_appheader; /* application header to encode */
739 usize_t enc_appheadsz; /* application header size */
740
741 /* decoder stuff */
742 xd3_decode_state dec_state; /* current DEC_XXX value */
743 uint dec_hdr_ind; /* VCDIFF header indicator */
744 uint dec_win_ind; /* VCDIFF window indicator */
745 uint dec_del_ind; /* VCDIFF delta indicator */
746
747 uint8_t dec_magic[4]; /* First four bytes */
748 usize_t dec_magicbytes; /* Magic position. */
749
750 uint dec_secondid; /* Optional secondary compressor ID. */
751
752 usize_t dec_codetblsz; /* Optional code table: length. */
753 uint8_t *dec_codetbl; /* Optional code table: storage. */
754 usize_t dec_codetblbytes; /* Optional code table: position. */
755
756 usize_t dec_appheadsz; /* Optional application header: size. */
757 uint8_t *dec_appheader; /* Optional application header: storage */
758 usize_t dec_appheadbytes; /* Optional application header: position. */
759
760 usize_t dec_cksumbytes; /* Optional checksum: position. */
761 uint8_t dec_cksum[4]; /* Optional checksum: storage. */
762 uint32_t dec_adler32; /* Optional checksum: value. */
763
764 usize_t dec_cpylen; /* length of copy window (VCD_SOURCE or VCD_TARGET) */
765 xoff_t dec_cpyoff; /* offset of copy window (VCD_SOURCE or VCD_TARGET) */
766 usize_t dec_enclen; /* length of delta encoding */
767 usize_t dec_tgtlen; /* length of target window */
768
769#if USE_UINT64
770 uint64_t dec_64part; /* part of a decoded uint64_t */
771#endif
772#if USE_UINT32
773 uint32_t dec_32part; /* part of a decoded uint32_t */
774#endif
775
776 xoff_t dec_winstart; /* offset of the start of current target window */
777 xoff_t dec_window_count; /* == current_window + 1 in DEC_FINISH */
778 usize_t dec_winbytes; /* bytes of the three sections so far consumed */
779 usize_t dec_hdrsize; /* VCDIFF + app header size */
780
781 const uint8_t *dec_tgtaddrbase; /* Base of decoded target addresses (addr >= dec_cpylen). */
782 const uint8_t *dec_cpyaddrbase; /* Base of decoded copy addresses (addr < dec_cpylen). */
783
784 usize_t dec_position; /* current decoder position counting the cpylen offset */
785 usize_t dec_maxpos; /* maximum decoder position counting the cpylen offset */
786 xd3_hinst dec_current1; /* current instruction */
787 xd3_hinst dec_current2; /* current instruction */
788
789 uint8_t *dec_buffer; /* Decode buffer */
790 uint8_t *dec_lastwin; /* In case of VCD_TARGET, the last target window. */
791 usize_t dec_lastlen; /* length of the last target window */
792 xoff_t dec_laststart; /* offset of the start of last target window */
793 usize_t dec_lastspace; /* allocated space of last target window, for reuse */
794
795 xd3_desect inst_sect; /* staging area for decoding window sections */
796 xd3_desect addr_sect;
797 xd3_desect data_sect;
798
799 xd3_code_table_func *code_table_func;
800 xd3_comp_table_func *comp_table_func;
801 const xd3_dinst *code_table;
802 const xd3_code_table_desc *code_table_desc;
803 xd3_dinst *code_table_alloc;
804
805 /* secondary compression */
806 const xd3_sec_type *sec_type;
807 xd3_sec_stream *sec_stream_d;
808 xd3_sec_stream *sec_stream_i;
809 xd3_sec_stream *sec_stream_a;
810
811#if XD3_DEBUG
812 /* statistics */
813 usize_t n_cpy;
814 usize_t n_add;
815 usize_t n_run;
816
817 usize_t n_ibytes;
818 usize_t n_sbytes;
819 usize_t n_dbytes;
820
821 usize_t l_cpy;
822 usize_t l_add;
823 usize_t l_run;
824
825 usize_t sh_searches;
826 usize_t sh_compares;
827
828 usize_t *i_freqs;
829 usize_t *i_modes;
830 usize_t *i_sizes;
831
832 usize_t large_ckcnt;
833
834 /* memory usage */
835 usize_t alloc_cnt;
836 usize_t free_cnt;
837
838 xoff_t n_emit;
839#endif
840};
841
842/******************************************************************************************
843 PUBLIC FUNCTIONS
844 ******************************************************************************************/
845
846/* The two I/O disciplines, encode and decode, have similar stream semantics. It is
847 * recommended that applications use the same code for compression and decompression -
848 * because there are only a few differences in handling encoding/decoding.
849 *
850 * See also the xd3_avail_input() and xd3_consume_output() routines, inlined below.
851 *
852 * XD3_INPUT: the process requires more input: call xd3_avail_input() then repeat
853 * XD3_OUTPUT: the process has more output: read stream->next_out, stream->avail_out,
854 * then call xd3_consume_output(), then repeat
855 * XD3_GOTHEADER: (decoder-only) notification returned following the VCDIFF header and
856 * first window header. the decoder may use the header to configure itself.
857 * XD3_WINSTART: a general notification returned once for each window except the 0-th
858 * window, which is implied by XD3_GOTHEADER. It is recommended to
859 * use a switch-stmt such as:
860 * ...
861 * again:
862 * switch ((ret = xd3_decode_input (stream))) {
863 * case XD3_GOTHEADER: {
864 * assert(stream->current_window == 0);
865 * stuff;
866 * }
867 * // fallthrough
868 * case XD3_WINSTART: {
869 * something(stream->current_window);
870 * goto again;
871 * }
872 * ...
873 * XD3_WINFINISH: a general notification, following the complete input & output of a
874 * window. at this point, stream->total_in and stream->total_out are
875 * consistent for either encoding or decoding.
876 * XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value is returned to
877 * initiate a non-blocking source read.
878 *
879 * For simple usage, see the xd3_process_completely() function, which underlies
880 * xd3_encode_completely() and xd3_decode_completely() [xdelta3.c]. For real application
881 * usage, including the application header, the see command-line utility [xdelta3-main.h].
882 *
883 * main_input() implements the command-line encode and decode as well as the optional
884 * VCDIFF_TOOLS printhdr, printhdrs, and printdelta with a single loop [xdelta3-main.h].
885 */
886int xd3_decode_input (xd3_stream *stream);
887int xd3_encode_input (xd3_stream *stream);
888
889/* The xd3_config structure is used to initialize a stream - all data is copied into
890 * stream so config may be a temporary variable. See the [documentation] or comments on
891 * the xd3_config structure. */
892int xd3_config_stream (xd3_stream *stream,
893 xd3_config *config);
894
895/* Since Xdelta3 doesn't open any files, xd3_close_stream is just an error check that the
896 * stream is in a proper state to be closed: this means the encoder is flushed and the
897 * decoder is at a window boundary. The application is responsible for freeing any of the
898 * resources it supplied. */
899int xd3_close_stream (xd3_stream *stream);
900
901/* This unconditionally closes/frees the stream, future close() will succeed.*/
902void xd3_abort_stream (xd3_stream *stream);
903
904/* xd3_free_stream frees all memory allocated for the stream. The application is
905 * responsible for freeing any of the resources it supplied. */
906void xd3_free_stream (xd3_stream *stream);
907
908/* This function informs the encoder or decoder that source matching (i.e.,
909 * delta-compression) is possible. For encoding, this should be called before the first
910 * xd3_encode_input. A NULL source is ignored. For decoding, this should be called
911 * before the first window is decoded, but the appheader may be read first
912 * (XD3_GOTHEADER). At this point, consult xd3_decoder_needs_source(), inlined below, to
913 * determine if a source is expected by the decoder. */
914int xd3_set_source (xd3_stream *stream,
915 xd3_source *source);
916
917/* This function invokes xd3_encode_input using whole-file, in-memory inputs. The output
918 * array must be large enough to hold the output or else ENOSPC is returned. */
919int xd3_encode_completely (xd3_stream *stream,
920 const uint8_t *input,
921 usize_t input_size,
922 uint8_t *output,
923 usize_t *output_size,
924 usize_t avail_output);
925
926/* This function invokes xd3_decode_input using whole-file, in-memory inputs. The output
927 * array must be large enough to hold the output or else ENOSPC is returned. */
928int xd3_decode_completely (xd3_stream *stream,
929 const uint8_t *input,
930 usize_t input_size,
931 uint8_t *output,
932 usize_t *output_size,
933 usize_t avail_size);
934
935/* This should be called before the first call to xd3_encode_input() to include
936 * application-specific data in the VCDIFF header. */
937void xd3_set_appheader (xd3_stream *stream,
938 const uint8_t *data,
939 usize_t size);
940
941/* xd3_get_appheader may be called in the decoder after XD3_GOTHEADER. For convenience,
942 * the decoder always adds a single byte padding to the end of the application header,
943 * which is set to zero in case the application header is a string. */
944int xd3_get_appheader (xd3_stream *stream,
945 uint8_t **data,
946 usize_t *size);
947
948/* After receiving XD3_GOTHEADER, the decoder should check this function which returns 1
949 * if the decoder will require source data. */
950int xd3_decoder_needs_source (xd3_stream *stream);
951
952/* Includes the above rvalues */
953const char* xd3_strerror (int ret);
954
955/* For convenience, zero & initialize the xd3_config structure with specified flags. */
956static inline
957void xd3_init_config (xd3_config *config,
958 int flags)
959{
960 memset (config, 0, sizeof (*config));
961 config->flags = flags;
962}
963
964/* This supplies some input to the stream. */
965static inline
966void xd3_avail_input (xd3_stream *stream,
967 const uint8_t *idata,
968 usize_t isize)
969{
970 /* Even if isize is zero, the code expects a non-NULL idata. Why? It uses this value
971 * to determine whether xd3_avail_input has ever been called. If xd3_encode_input is
972 * called before xd3_avail_input it will return XD3_INPUT right away without allocating
973 * a stream->winsize buffer. This is to avoid an unwanted allocation. */
974 XD3_ASSERT (idata != NULL);
975
976 /* TODO: Should check for a call to xd3_avail_input in the wrong state. */
977 stream->next_in = idata;
978 stream->avail_in = isize;
979}
980
981/* This acknowledges receipt of output data, must be called after any XD3_OUTPUT
982 * return. */
983static inline
984void xd3_consume_output (xd3_stream *stream)
985{
986 /* TODO: Is it correct to set avail_in = 0 here, then check == 0 in avail_in? */
987 stream->avail_out = 0;
988}
989
990/* These are set for each XD3_WINFINISH return. */
991static inline
992int xd3_encoder_used_source (xd3_stream *stream) { return stream->src != NULL && stream->src->srclen > 0; }
993static inline
994xoff_t xd3_encoder_srcbase (xd3_stream *stream) { return stream->src->srcbase; }
995static inline
996usize_t xd3_encoder_srclen (xd3_stream *stream) { return stream->src->srclen; }
997
998/* Checks for legal flag changes. */
999static inline
1000void xd3_set_flags (xd3_stream *stream, int flags)
1001{
1002 /* The bitwise difference should contain only XD3_FLUSH or XD3_SKIP_WINDOW */
1003 XD3_ASSERT(((flags ^ stream->flags) & ~(XD3_FLUSH | XD3_SKIP_WINDOW)) == 0);
1004 stream->flags = flags;
1005}
1006
1007/* Gives some extra information about the latest library error, if any is known. */
1008static inline
1009const char* xd3_errstring (xd3_stream *stream)
1010{
1011 return stream->msg ? stream->msg : "";
1012}
1013
1014/* This function tells the number of bytes expected to be set in source->onblk after a
1015 * getblk request. This is for convenience of handling a partial last block. */
1016static inline
1017usize_t xd3_bytes_on_srcblk (xd3_source *source, xoff_t blkno)
1018{
1019 XD3_ASSERT (blkno < source->blocks);
1020
1021 if (blkno != source->blocks - 1)
1022 {
1023 return source->blksize;
1024 }
1025
1026 return ((source->size - 1) % source->blksize) + 1;
1027}
1028
1029#endif /* _XDELTA3_H_ */
diff --git a/xdelta3/xdelta3.prj b/xdelta3/xdelta3.prj
new file mode 100755
index 0000000..df1a445
--- /dev/null
+++ b/xdelta3/xdelta3.prj
@@ -0,0 +1,133 @@
1;; -*- Prcs -*-
2(Created-By-Prcs-Version 1 3 3)
3(Project-Description "")
4(Project-Version xdelta3 0 5)
5(Parent-Version xdelta3 0 4)
6(Version-Log "write a bit of documentation, work to fix/clean the regression test, fixed one actual bug in xd3_stream_close()")
7(New-Version-Log "")
8(Checkin-Time "Sun, 30 May 2004 14:42:47 -0700")
9(Checkin-Login jmacd)
10(Populate-Ignore ())
11(Project-Keywords
12 (WWWLeftNavBar "<table cellpadding=\"20px\" width=700> <tr> <td class=\"leftbdr\" valign=top height=600 width=100> <div class=\"leftbody\"> <h1>Xdelta</h1> <a href=\"xdelta3.html\">overview</a><br> <a href=\"xdelta3-cmdline.html\">command&nbsp;line</a><br> <a href=\"xdelta3-api-guide.html\">api&nbsp;guide</a><br> <br><a href=\"http://xdelta.org\">xdelta.org</a></h2> </div> </td> <td valign=top width=500>")
13 )
14(Files
15
16;; Files added by populate at Sun, 20 Jul 2003 04:22:04 +0400,
17;; to version 0.0(w), by jmacd:
18
19 (Makefile (xdelta3/0_Makefile 1.3 644))
20 (xdelta3.h (xdelta3/1_xdelta3.h 1.2 644))
21 (xdelta3-test.h (xdelta3/2_xdelta3-te 1.2 644))
22 (xdelta3-second.h (xdelta3/3_xdelta3-se 1.1 644))
23 (xdelta3-python.h (xdelta3/4_xdelta3-py 1.1 644))
24 (xdelta3-main.h (xdelta3/5_xdelta3-ma 1.3 644))
25 (xdelta3-list.h (xdelta3/6_xdelta3-li 1.1 644))
26 (xdelta3-fgk.h (xdelta3/7_xdelta3-fg 1.1 644))
27 (xdelta3-djw.h (xdelta3/8_xdelta3-dj 1.1 644))
28 (xdelta3-cfgs.h (xdelta3/9_xdelta3-cf 1.1 644))
29 (xdelta3-regtest.py (xdelta3/10_xdelta3-re 1.3 755))
30 (setup.py (xdelta3/11_setup.py 1.1 644))
31 (analyze_pfx.py (xdelta3/12_analyze_pf 1.1 644))
32 (analyze_gp.py (xdelta3/13_analyze_gp 1.1 644))
33 (analyze_clen.py (xdelta3/14_analyze_cl 1.1 644))
34 (rcs_junk.cc (xdelta3/15_rcs_junk.c 1.1 644))
35 (xdelta3.c (xdelta3/16_xdelta3.c 1.3 644))
36 (testh.c (xdelta3/17_testh.c 1.1 644))
37 (show.c (xdelta3/18_show.c 1.1 644))
38 (linkxd3lib.c (xdelta3/19_linkxd3lib 1.1 644))
39 (badcopy.c (xdelta3/20_badcopy.c 1.1 644))
40
41;; Files added by populate at Sun, 20 Jul 2003 04:22:08 +0400,
42;; to version 0.0(w), by jmacd:
43
44 (save.regtest.bug9/foo,v (xdelta3/21_foo,vx 1.1 444) :no-keywords)
45 (save.regtest.bug9/foo2,v (xdelta3/22_foo2,vx 1.1 444) :no-keywords)
46 (save.regtest.bug8/core (xdelta3/23_core 1.1 600) :no-keywords)
47 (save.regtest.bug8/output.x.right (xdelta3/24_output.x.r 1.1 644) :no-keywords)
48 (save.regtest.bug8/output.x (xdelta3/25_output.x 1.1 644) :no-keywords)
49 (save.regtest.bug8/input.1.xz (xdelta3/26_input.1.xz 1.1 644) :no-keywords)
50 (save.regtest.bug8/input.0.xz (xdelta3/27_input.0.xz 1.1 644) :no-keywords)
51 (save.regtest.bug8/input.0 (xdelta3/28_input.0 1.1 644))
52 (save.regtest.bug8/input.1 (xdelta3/29_input.1 1.1 644))
53 (save.regtest.bug7/core (xdelta3/30_core 1.1 600) :no-keywords)
54 (save.regtest.bug7/recon.x (xdelta3/31_recon.x 1.1 644) :no-keywords)
55 (save.regtest.bug7/output.x (xdelta3/32_output.x 1.1 644) :no-keywords)
56 (save.regtest.bug7/input.1 (xdelta3/33_input.1 1.1 644) :no-keywords)
57 (save.regtest.bug7/input.0 (xdelta3/34_input.0 1.1 644) :no-keywords)
58 (save.regtest.bug7/output (xdelta3/35_output 1.1 644) :no-keywords)
59 (save.regtest.bug7/recon (xdelta3/36_recon 1.1 644) :no-keywords)
60 (save.regtest.bug6/recon (xdelta3/37_recon 1.1 644))
61 (save.regtest.bug6/input.21 (xdelta3/38_input.21 1.1 644))
62 (save.regtest.bug6/input.20 (xdelta3/39_input.20 1.1 644))
63 (save.regtest.bug6/input.0 (xdelta3/40_input.0 1.1 644))
64 (save.regtest.bug6/output (xdelta3/41_output 1.1 644) :no-keywords)
65 (save.regtest.bug5/input.1 (xdelta3/42_input.1 1.1 644) :no-keywords)
66 (save.regtest.bug5/input.0 (xdelta3/43_input.0 1.1 644) :no-keywords)
67 (save.regtest.bug4/input.1 (xdelta3/44_input.1 1.1 644) :no-keywords)
68 (save.regtest.bug4/input.0 (xdelta3/45_input.0 1.1 644) :no-keywords)
69 (save.regtest.bug3/input.1 (xdelta3/46_input.1 1.1 644))
70 (save.regtest.bug3/input.0 (xdelta3/47_input.0 1.1 644))
71 (save.regtest.bug2/input.1 (xdelta3/48_input.1 1.1 644) :no-keywords)
72 (save.regtest.bug2/input.0 (xdelta3/49_input.0 1.1 644) :no-keywords)
73 (save.regtest.bug12/xd3regtest.27181/input.1 (xdelta3/50_input.1 1.1 644) :no-keywords)
74 (save.regtest.bug12/xd3regtest.27181/input.0 (xdelta3/51_input.0 1.1 644) :no-keywords)
75 (save.regtest.bug12/xd3regtest.27181/output (xdelta3/b/0_output 1.1 644) :no-keywords)
76 (save.regtest.bug12/output.x (xdelta3/b/1_output.x 1.1 644) :no-keywords)
77 (save.regtest.bug12/input.1 (xdelta3/b/2_input.1 1.1 644))
78 (save.regtest.bug12/input.0 (xdelta3/b/3_input.0 1.1 644))
79 (save.regtest.bug12/output (xdelta3/b/4_output 1.1 644) :no-keywords)
80 (save.regtest.bug11/recon.x (xdelta3/b/5_recon.x 1.1 644))
81 (save.regtest.bug11/output.x (xdelta3/b/6_output.x 1.1 644) :no-keywords)
82 (save.regtest.bug11/input.1 (xdelta3/b/7_input.1 1.1 644))
83 (save.regtest.bug11/input.0 (xdelta3/b/8_input.0 1.1 644))
84 (save.regtest.bug11/output (xdelta3/b/9_output 1.1 644) :no-keywords)
85 (save.regtest.bug11/recon (xdelta3/b/10_recon 1.1 644))
86 (save.regtest.bug10/recon.x (xdelta3/b/11_recon.x 1.1 644) :no-keywords)
87 (save.regtest.bug10/output.x (xdelta3/b/12_output.x 1.1 644) :no-keywords)
88 (save.regtest.bug10/input.1 (xdelta3/b/13_input.1 1.1 644) :no-keywords)
89 (save.regtest.bug10/input.0 (xdelta3/b/14_input.0 1.1 644) :no-keywords)
90 (save.regtest.bug10/output (xdelta3/b/15_output 1.1 644) :no-keywords)
91 (save.regtest.bug1/input.4 (xdelta3/b/16_input.4 1.1 644))
92 (save.regtest.bug1/input.5 (xdelta3/b/17_input.5 1.1 644))
93
94;; Files added by populate at Sun, 20 Jul 2003 04:22:28 +0400,
95;; to version 0.0(w), by jmacd:
96
97 (priorities.txt (xdelta3/b/18_priorities 1.1 644))
98
99;; Files added by populate at Sun, 20 Jul 2003 04:22:40 +0400,
100;; to version 0.0(w), by jmacd:
101
102 (vcdiff.ps (xdelta3/b/19_vcdiff.ps 1.1 644))
103 (draft-vcdiff-huffman.txt (xdelta3/b/20_draft-vcdi 1.1 600))
104
105;; Files added by populate at Sun, 20 Jul 2003 04:22:59 +0400,
106;; to version 0.0(w), by jmacd:
107
108 (dead.code (xdelta3/b/21_dead.code 1.2 644))
109
110;; Files added by populate at Sun, 20 Jul 2003 04:23:05 +0400,
111;; to version 0.0(w), by jmacd:
112
113 (draft-korn-vcdiff.txt (xdelta3/b/22_draft-korn 1.1 600))
114
115;; Files added by populate at Sun, 20 Jul 2003 08:16:41 +0400,
116;; to version 0.1(w), by jmacd:
117
118 (www/xdelta3-api-guide.html (xdelta3/b/23_Xdelta3-ap 1.4 644))
119 (www/xdelta3.html (xdelta3/b/24_Xdelta3.ht 1.4 644))
120
121;; Files added by populate at Sun, 20 Jul 2003 22:35:48 +0400,
122;; to version 0.2(w), by jmacd:
123
124 (www/xdelta3-cmdline.html (xdelta3/b/25_xdelta3-cm 1.2 644))
125 (www/xdelta3.css (xdelta3/b/26_xdelta3.cs 1.3 644))
126
127;; Files added by populate at Wed, 21 Jul 2004 15:39:04 -0700,
128;; to version 0.5(w), by jmacd:
129
130 (gpl.txt ())
131)
132(Merge-Parents)
133(New-Merge-Parents)