diff options
author | dotdotisdead <dotdotisdead@a3eca27d-f21b-0410-9b4a-6511e771f64e> | 2006-08-27 18:39:38 +0000 |
---|---|---|
committer | dotdotisdead <dotdotisdead@a3eca27d-f21b-0410-9b4a-6511e771f64e> | 2006-08-27 18:39:38 +0000 |
commit | a6f202275ec093b9f8948d77b9783f0820f930d8 (patch) | |
tree | 95974d74bd0a6577e80ee2eb917cd852ce6ba011 /xdelta3 |
Source snapshot... broken.
Diffstat (limited to 'xdelta3')
30 files changed, 21160 insertions, 0 deletions
diff --git a/xdelta3/.gdb_history b/xdelta3/.gdb_history new file mode 100755 index 0000000..72410e8 --- /dev/null +++ b/xdelta3/.gdb_history | |||
@@ -0,0 +1,256 @@ | |||
1 | run test | ||
2 | i[ | ||
3 | up | ||
4 | print tpos | ||
5 | print recon_size | ||
6 | break xdelta3-test.h:2323 | ||
7 | run | ||
8 | s | ||
9 | s | ||
10 | n | ||
11 | print input_size | ||
12 | up | ||
13 | up | ||
14 | print delta | ||
15 | print delta_size | ||
16 | run -vv -f -s ~/Desktop/hello.c ~/Desktop/world.c hw | ||
17 | up | ||
18 | down | ||
19 | break xdelta3-main.h:2252 | ||
20 | run | ||
21 | s | ||
22 | n | ||
23 | c | ||
24 | c | ||
25 | c | ||
26 | run -vv -f -s ~/Desktop/hello.c ~/Desktop/world.c hw | ||
27 | run -vv -f -s testcase/6/source testcase/6/target | ||
28 | break xdelta3.c:5792 | ||
29 | run | ||
30 | up | ||
31 | updown | ||
32 | break xdelta3.c:3837 | ||
33 | run | ||
34 | s | ||
35 | n | ||
36 | print matchoff | ||
37 | print streamoff | ||
38 | print tryblk | ||
39 | print tryoff | ||
40 | n | ||
41 | n | ||
42 | n | ||
43 | print stream->match_maxfwd | ||
44 | print stream->match_fwd | ||
45 | print str->cublk | ||
46 | print str->curblk | ||
47 | print src->curblk | ||
48 | print tryoff | ||
49 | n | ||
50 | n | ||
51 | print tryoff | ||
52 | n | ||
53 | print tryoff | ||
54 | print src->curblk[21] | ||
55 | print stream->next_in[21] | ||
56 | print src->curblk | ||
57 | print stream->next_in | ||
58 | break xdelta3.c:5726 | ||
59 | c | ||
60 | n | ||
61 | print stream->match_fwd | ||
62 | n | ||
63 | n | ||
64 | n | ||
65 | n | ||
66 | n | ||
67 | n | ||
68 | s | ||
69 | n | ||
70 | n | ||
71 | n | ||
72 | n | ||
73 | n | ||
74 | n | ||
75 | n | ||
76 | step 1 | ||
77 | step 1 | ||
78 | run -s testcase/3/source.doc testcase/3/target.doc | ||
79 | break xdelta3.c:2697 | ||
80 | run | ||
81 | p blkno | ||
82 | p source->blocks | ||
83 | up | ||
84 | run -s testcase/3/source.doc testcase/3/target.doc out | ||
85 | run -s testcase/3/source.doc testcase/3/target.doc > /dev/null | ||
86 | break xdelta3.c:5095 | ||
87 | run | ||
88 | p logical_input_cksum_pos | ||
89 | p stream->input_pos | ||
90 | p stream->input_position | ||
91 | p stream->srcwin_size | ||
92 | p stream->total_in | ||
93 | n | ||
94 | p logical_input_cksum_pos | ||
95 | p stream-srcwin_cksum_pos | ||
96 | p stream->srcwin_cksum_pos | ||
97 | n | ||
98 | n | ||
99 | n | ||
100 | p stream->srcwin_size | ||
101 | n | ||
102 | n | ||
103 | p blkno | ||
104 | p blkoff | ||
105 | p onblk | ||
106 | n | ||
107 | n | ||
108 | n | ||
109 | break xdelta3.c:5114 | ||
110 | c | ||
111 | n | ||
112 | k | ||
113 | y | ||
114 | step 1 | ||
115 | d | ||
116 | break xdelta3.c:5103 | ||
117 | break xdelta3.c:5097 | ||
118 | run | ||
119 | n | ||
120 | p logical_input_cksum_pos | ||
121 | p stream->srcwin_size | ||
122 | c | ||
123 | n | ||
124 | p stream->srcwin_cksum_pos | ||
125 | run | ||
126 | n | ||
127 | c | ||
128 | fin | ||
129 | up | ||
130 | down | ||
131 | c | ||
132 | up | ||
133 | break xdelta3.c:5131 | ||
134 | c | ||
135 | n | ||
136 | p diff | ||
137 | p onblk | ||
138 | n | ||
139 | p onblk | ||
140 | p blkoff | ||
141 | p blkoff | ||
142 | p stream->large_look | ||
143 | k | ||
144 | y | ||
145 | c | ||
146 | k | ||
147 | break xdelta3.c:5103 | ||
148 | run -s testcase/3/source.doc testcase/3/target.doc | ||
149 | n | ||
150 | o ibbkj | ||
151 | p onblk | ||
152 | n | ||
153 | p blkoff | ||
154 | p onblk | ||
155 | n | ||
156 | p blkoff | ||
157 | break xdelta3.c:5119 | ||
158 | c | ||
159 | n | ||
160 | p stream->input_position | ||
161 | p stream->srcwin_cksum_pos | ||
162 | p stream->stream->srcwin_size | ||
163 | p stream->srcwin_size | ||
164 | p logical_input_cksum_pos | ||
165 | p *next_move_point | ||
166 | c | ||
167 | n | ||
168 | p stream->input_position | ||
169 | p logical_input_cksum_pos | ||
170 | p logical_input_cksum_pos | ||
171 | p stream->srcwin_cksum_pos | ||
172 | d | ||
173 | c | ||
174 | run -s testcase/3/source.doc testcase/3/target.doc -o /tmp/foo12 | ||
175 | run -s testcase/3/source.doc testcase/3/target.doc > /dev/null | ||
176 | run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null | ||
177 | k | ||
178 | y | ||
179 | run -vv -s testcase/3/source.doc testcase/3/target.doc | ||
180 | run -vv -s testcase/3/source.doc testcase/3/target.doc | ||
181 | run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null | ||
182 | up | ||
183 | n | ||
184 | fin | ||
185 | n | ||
186 | break xdelta3.c:5119 | ||
187 | c | ||
188 | c | ||
189 | p stream->srcwin_cksum_pos | ||
190 | p logical_input_cksum_pos | ||
191 | p stream->total_iun | ||
192 | p stream->total_i | ||
193 | p stream->total_in | ||
194 | p stream->srcwin_cksum_pos | ||
195 | k | ||
196 | run -vv -s testcase/3/source.doc testcase/3/target.doc /tmp/fdsfd | ||
197 | kill | ||
198 | run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null | ||
199 | run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null | ||
200 | y | ||
201 | run | ||
202 | run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null | ||
203 | run | ||
204 | run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null | ||
205 | n | ||
206 | fin | ||
207 | n | ||
208 | p blkoff | ||
209 | p stream->large_look | ||
210 | p onblk | ||
211 | break xdelta3.c:5122 | ||
212 | c | ||
213 | n | ||
214 | p blkno | ||
215 | p stream->srcwin_cksum_pos | ||
216 | up | ||
217 | down | ||
218 | p blkno | ||
219 | p blkno * (1 <<18) | ||
220 | p stream->srcwin_cksum_pos | ||
221 | c | ||
222 | p blkno * (1 <<18) | ||
223 | p blkno | ||
224 | c | ||
225 | n | ||
226 | n | ||
227 | p onblk | ||
228 | n | ||
229 | p onblk | ||
230 | p diff | ||
231 | p stream->srcwin_cksum_pos | ||
232 | n | ||
233 | c | ||
234 | n | ||
235 | p stream->srcwin_cksum_pos | ||
236 | p blkno | ||
237 | c | ||
238 | n | ||
239 | p blkno | ||
240 | c | ||
241 | n | ||
242 | c | ||
243 | n | ||
244 | p stream->srcwin_cksum_pos | ||
245 | p logical_input_cksum_pos | ||
246 | n | ||
247 | run -vv -s testcase/3/source.doc testcase/3/target.doc | ||
248 | run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null | ||
249 | up | ||
250 | break xdelta3.c:5123 | ||
251 | c | ||
252 | break xdelta3.c:5097 | ||
253 | c | ||
254 | p stream->srcwin_cksum_pos | ||
255 | p stream->logical_input_pos | ||
256 | p logical_input_cksum_pos | ||
diff --git a/xdelta3/.xdelta3.prcs_aux b/xdelta3/.xdelta3.prcs_aux new file mode 100755 index 0000000..b30689c --- /dev/null +++ b/xdelta3/.xdelta3.prcs_aux | |||
@@ -0,0 +1,84 @@ | |||
1 | ;; This file is automatically generated, editing may cause PRCS to do | ||
2 | ;; REALLY bad things. | ||
3 | (Created-By-Prcs-Version 1 3 3) | ||
4 | (www/xdelta3.css 938 1085949140 b/26_xdelta3.cs 1.3) | ||
5 | (analyze_pfx.py 1422 1022037044 12_analyze_pf 1.1) | ||
6 | (badcopy.c 2622 1047759845 20_badcopy.c 1.1) | ||
7 | (analyze_clen.py 1342 1021753567 14_analyze_cl 1.1) | ||
8 | (save.regtest.bug1/input.4 19022 1055471779 b/16_input.4 1.1) | ||
9 | (save.regtest.bug11/input.0 4 1055554284 b/8_input.0 1.1) | ||
10 | (save.regtest.bug8/input.0 203756 1055518432 28_input.0 1.1) | ||
11 | (save.regtest.bug1/input.5 21597 1055471779 b/17_input.5 1.1) | ||
12 | (save.regtest.bug6/input.20 1235 1055474005 39_input.20 1.1) | ||
13 | (save.regtest.bug11/input.1 10 1055554284 b/7_input.1 1.1) | ||
14 | (xdelta3-list.h 9892 1052598762 6_xdelta3-li 1.1) | ||
15 | (save.regtest.bug8/input.1 203756 1055518493 29_input.1 1.1) | ||
16 | (save.regtest.bug7/recon 51200 1055515262 36_recon 1.1) | ||
17 | (save.regtest.bug6/input.21 952 1055474005 38_input.21 1.1) | ||
18 | (save.regtest.bug6/recon 952 1055480638 37_recon 1.1) | ||
19 | (save.regtest.bug11/recon.x 10 1055554520 b/5_recon.x 1.1) | ||
20 | (vcdiff.ps 131548 1014968851 b/19_vcdiff.ps 1.1) | ||
21 | (www/Xdelta3.html 3200 1058668417 b/24_Xdelta3.ht 1.1) | ||
22 | (priorities.txt 339 1057496665 b/18_priorities 1.1) | ||
23 | (draft-korn-vcdiff.txt 60706 1018424758 b/22_draft-korn 1.1) | ||
24 | (dead.code 72096 1085893991 b/21_dead.code 1.2) | ||
25 | (linkxd3lib.c 1113 1056324075 19_linkxd3lib 1.1) | ||
26 | (www/xdelta3.html 4708 1085952599 b/24_Xdelta3.ht 1.4) | ||
27 | (xdelta3-second.h 8228 1057405215 3_xdelta3-se 1.1) | ||
28 | (testh.c 21 1042671351 17_testh.c 1.1) | ||
29 | (save.regtest.bug6/input.0 920 1055474005 40_input.0 1.1) | ||
30 | (save.regtest.bug12/output.x 705 1055556257 b/1_output.x 1.1) | ||
31 | (save.regtest.bug8/core 1159168 1055529025 23_core 1.1) | ||
32 | (save.regtest.bug9/foo,v 123233 1055532021 21_foo,vx 1.1) | ||
33 | (xdelta3.c 201721 1085893369 16_xdelta3.c 1.3) | ||
34 | (xdelta3-cfgs.h 2701 1057695639 9_xdelta3-cf 1.1) | ||
35 | (save.regtest.bug10/input.0 53274 1055532189 b/14_input.0 1.1) | ||
36 | (xdelta3-regtest.py 17976 1085947234 10_xdelta3-re 1.3) | ||
37 | (save.regtest.bug10/input.1 74663 1055532189 b/13_input.1 1.1) | ||
38 | (save.regtest.bug4/input.0 7571 1055461840 45_input.0 1.1) | ||
39 | (save.regtest.bug12/xd3regtest.27181/output 2336 1055566927 b/0_output 1.1) | ||
40 | (save.regtest.bug4/input.1 11312 1055461840 44_input.1 1.1) | ||
41 | (save.regtest.bug7/core 1146880 1055522004 30_core 1.1) | ||
42 | (xdelta3-main.h 79350 1085950532 5_xdelta3-ma 1.3) | ||
43 | (xdelta3.h 41796 1084138546 1_xdelta3.h 1.2) | ||
44 | (rcs_junk.cc 36315 1055086755 15_rcs_junk.c 1.1) | ||
45 | (www/xdelta3-cmdline.html 5234 1085953288 b/25_xdelta3-cm 1.2) | ||
46 | (save.regtest.bug7/input.0 7571 1055515262 34_input.0 1.1) | ||
47 | (save.regtest.bug7/output 14276 1055515262 35_output 1.1) | ||
48 | (save.regtest.bug2/input.0 2296 1055471815 49_input.0 1.1) | ||
49 | (save.regtest.bug7/input.1 11312 1055515262 33_input.1 1.1) | ||
50 | (www/xdelta3-api-guide.html 7553 1085953324 b/23_Xdelta3-ap 1.4) | ||
51 | (save.regtest.bug8/input.0.xz 70595 1055518500 27_input.0.xz 1.1) | ||
52 | (save.regtest.bug2/input.1 2521 1055471815 48_input.1 1.1) | ||
53 | (save.regtest.bug9/foo2,v 123233 1055532069 22_foo2,vx 1.1) | ||
54 | (xdelta3-python.h 1466 1055671733 4_xdelta3-py 1.1) | ||
55 | (save.regtest.bug10/output 48388 1055532189 b/15_output 1.1) | ||
56 | (save.regtest.bug10/output.x 48408 1055533319 b/12_output.x 1.1) | ||
57 | (save.regtest.bug7/recon.x 11312 1055517553 31_recon.x 1.1) | ||
58 | (save.regtest.bug11/recon 10 1055554284 b/10_recon 1.1) | ||
59 | (save.regtest.bug8/input.1.xz 89734 1055518503 26_input.1.xz 1.1) | ||
60 | (xdelta3-test.h 71959 1084138350 2_xdelta3-te 1.2) | ||
61 | (save.regtest.bug10/recon.x 74655 1055533328 b/11_recon.x 1.1) | ||
62 | (save.regtest.bug12/input.0 280 1055555649 b/3_input.0 1.1) | ||
63 | (save.regtest.bug12/input.1 1155 1055555649 b/2_input.1 1.1) | ||
64 | (draft-vcdiff-huffman.txt 2935 1021721074 b/20_draft-vcdi 1.1) | ||
65 | (save.regtest.bug8/output.x.right 48221 1055520912 24_output.x.r 1.1) | ||
66 | (save.regtest.bug5/input.0 7571 1055471668 43_input.0 1.1) | ||
67 | (save.regtest.bug11/output.x 45 1055554519 b/6_output.x 1.1) | ||
68 | (save.regtest.bug12/xd3regtest.27181/input.0 2521 1055566927 51_input.0 1.1) | ||
69 | (save.regtest.bug5/input.1 11312 1055471668 42_input.1 1.1) | ||
70 | (save.regtest.bug12/xd3regtest.27181/input.1 2296 1055566927 50_input.1 1.1) | ||
71 | (setup.py 626 1055562104 11_setup.py 1.1) | ||
72 | (Makefile 3840 1085893399 0_Makefile 1.3) | ||
73 | (save.regtest.bug3/input.0 732 1055471934 47_input.0 1.1) | ||
74 | (save.regtest.bug6/output 69 1055480559 41_output 1.1) | ||
75 | (xdelta3-fgk.h 21496 1057610026 7_xdelta3-fg 1.1) | ||
76 | (save.regtest.bug3/input.1 271 1055471934 46_input.1 1.1) | ||
77 | (www/Xdelta3-api.html 6128 1058674572 b/23_Xdelta3-ap 1.1) | ||
78 | (save.regtest.bug12/output 39 1055555649 b/4_output 1.1) | ||
79 | (analyze_gp.py 7442 1022750342 13_analyze_gp 1.1) | ||
80 | (save.regtest.bug11/output 45 1055554284 b/9_output 1.1) | ||
81 | (save.regtest.bug7/output.x 14296 1055517870 32_output.x 1.1) | ||
82 | (save.regtest.bug8/output.x 48225 1055530557 25_output.x 1.1) | ||
83 | (show.c 647 1043318861 18_show.c 1.1) | ||
84 | (xdelta3-djw.h 51152 1057610015 8_xdelta3-dj 1.1) | ||
diff --git a/xdelta3/COPYING b/xdelta3/COPYING new file mode 100755 index 0000000..5b6e7c6 --- /dev/null +++ b/xdelta3/COPYING | |||
@@ -0,0 +1,340 @@ | |||
1 | GNU GENERAL PUBLIC LICENSE | ||
2 | Version 2, June 1991 | ||
3 | |||
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. | ||
5 | 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
6 | Everyone is permitted to copy and distribute verbatim copies | ||
7 | of this license document, but changing it is not allowed. | ||
8 | |||
9 | Preamble | ||
10 | |||
11 | The licenses for most software are designed to take away your | ||
12 | freedom to share and change it. By contrast, the GNU General Public | ||
13 | License is intended to guarantee your freedom to share and change free | ||
14 | software--to make sure the software is free for all its users. This | ||
15 | General Public License applies to most of the Free Software | ||
16 | Foundation's software and to any other program whose authors commit to | ||
17 | using it. (Some other Free Software Foundation software is covered by | ||
18 | the GNU Library General Public License instead.) You can apply it to | ||
19 | your programs, too. | ||
20 | |||
21 | When we speak of free software, we are referring to freedom, not | ||
22 | price. Our General Public Licenses are designed to make sure that you | ||
23 | have the freedom to distribute copies of free software (and charge for | ||
24 | this service if you wish), that you receive source code or can get it | ||
25 | if you want it, that you can change the software or use pieces of it | ||
26 | in new free programs; and that you know you can do these things. | ||
27 | |||
28 | To protect your rights, we need to make restrictions that forbid | ||
29 | anyone to deny you these rights or to ask you to surrender the rights. | ||
30 | These restrictions translate to certain responsibilities for you if you | ||
31 | distribute copies of the software, or if you modify it. | ||
32 | |||
33 | For example, if you distribute copies of such a program, whether | ||
34 | gratis or for a fee, you must give the recipients all the rights that | ||
35 | you have. You must make sure that they, too, receive or can get the | ||
36 | source code. And you must show them these terms so they know their | ||
37 | rights. | ||
38 | |||
39 | We protect your rights with two steps: (1) copyright the software, and | ||
40 | (2) offer you this license which gives you legal permission to copy, | ||
41 | distribute and/or modify the software. | ||
42 | |||
43 | Also, for each author's protection and ours, we want to make certain | ||
44 | that everyone understands that there is no warranty for this free | ||
45 | software. If the software is modified by someone else and passed on, we | ||
46 | want its recipients to know that what they have is not the original, so | ||
47 | that any problems introduced by others will not reflect on the original | ||
48 | authors' reputations. | ||
49 | |||
50 | Finally, any free program is threatened constantly by software | ||
51 | patents. We wish to avoid the danger that redistributors of a free | ||
52 | program will individually obtain patent licenses, in effect making the | ||
53 | program proprietary. To prevent this, we have made it clear that any | ||
54 | patent must be licensed for everyone's free use or not licensed at all. | ||
55 | |||
56 | The precise terms and conditions for copying, distribution and | ||
57 | modification follow. | ||
58 | |||
59 | GNU GENERAL PUBLIC LICENSE | ||
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION | ||
61 | |||
62 | 0. This License applies to any program or other work which contains | ||
63 | a notice placed by the copyright holder saying it may be distributed | ||
64 | under the terms of this General Public License. The "Program", below, | ||
65 | refers to any such program or work, and a "work based on the Program" | ||
66 | means either the Program or any derivative work under copyright law: | ||
67 | that is to say, a work containing the Program or a portion of it, | ||
68 | either verbatim or with modifications and/or translated into another | ||
69 | language. (Hereinafter, translation is included without limitation in | ||
70 | the term "modification".) Each licensee is addressed as "you". | ||
71 | |||
72 | Activities other than copying, distribution and modification are not | ||
73 | covered by this License; they are outside its scope. The act of | ||
74 | running the Program is not restricted, and the output from the Program | ||
75 | is covered only if its contents constitute a work based on the | ||
76 | Program (independent of having been made by running the Program). | ||
77 | Whether that is true depends on what the Program does. | ||
78 | |||
79 | 1. You may copy and distribute verbatim copies of the Program's | ||
80 | source code as you receive it, in any medium, provided that you | ||
81 | conspicuously and appropriately publish on each copy an appropriate | ||
82 | copyright notice and disclaimer of warranty; keep intact all the | ||
83 | notices that refer to this License and to the absence of any warranty; | ||
84 | and give any other recipients of the Program a copy of this License | ||
85 | along with the Program. | ||
86 | |||
87 | You may charge a fee for the physical act of transferring a copy, and | ||
88 | you may at your option offer warranty protection in exchange for a fee. | ||
89 | |||
90 | 2. You may modify your copy or copies of the Program or any portion | ||
91 | of it, thus forming a work based on the Program, and copy and | ||
92 | distribute such modifications or work under the terms of Section 1 | ||
93 | above, provided that you also meet all of these conditions: | ||
94 | |||
95 | a) You must cause the modified files to carry prominent notices | ||
96 | stating that you changed the files and the date of any change. | ||
97 | |||
98 | b) You must cause any work that you distribute or publish, that in | ||
99 | whole or in part contains or is derived from the Program or any | ||
100 | part thereof, to be licensed as a whole at no charge to all third | ||
101 | parties under the terms of this License. | ||
102 | |||
103 | c) If the modified program normally reads commands interactively | ||
104 | when run, you must cause it, when started running for such | ||
105 | interactive use in the most ordinary way, to print or display an | ||
106 | announcement including an appropriate copyright notice and a | ||
107 | notice that there is no warranty (or else, saying that you provide | ||
108 | a warranty) and that users may redistribute the program under | ||
109 | these conditions, and telling the user how to view a copy of this | ||
110 | License. (Exception: if the Program itself is interactive but | ||
111 | does not normally print such an announcement, your work based on | ||
112 | the Program is not required to print an announcement.) | ||
113 | |||
114 | These requirements apply to the modified work as a whole. If | ||
115 | identifiable sections of that work are not derived from the Program, | ||
116 | and can be reasonably considered independent and separate works in | ||
117 | themselves, then this License, and its terms, do not apply to those | ||
118 | sections when you distribute them as separate works. But when you | ||
119 | distribute the same sections as part of a whole which is a work based | ||
120 | on the Program, the distribution of the whole must be on the terms of | ||
121 | this License, whose permissions for other licensees extend to the | ||
122 | entire whole, and thus to each and every part regardless of who wrote it. | ||
123 | |||
124 | Thus, it is not the intent of this section to claim rights or contest | ||
125 | your rights to work written entirely by you; rather, the intent is to | ||
126 | exercise the right to control the distribution of derivative or | ||
127 | collective works based on the Program. | ||
128 | |||
129 | In addition, mere aggregation of another work not based on the Program | ||
130 | with the Program (or with a work based on the Program) on a volume of | ||
131 | a storage or distribution medium does not bring the other work under | ||
132 | the scope of this License. | ||
133 | |||
134 | 3. You may copy and distribute the Program (or a work based on it, | ||
135 | under Section 2) in object code or executable form under the terms of | ||
136 | Sections 1 and 2 above provided that you also do one of the following: | ||
137 | |||
138 | a) Accompany it with the complete corresponding machine-readable | ||
139 | source code, which must be distributed under the terms of Sections | ||
140 | 1 and 2 above on a medium customarily used for software interchange; or, | ||
141 | |||
142 | b) Accompany it with a written offer, valid for at least three | ||
143 | years, to give any third party, for a charge no more than your | ||
144 | cost of physically performing source distribution, a complete | ||
145 | machine-readable copy of the corresponding source code, to be | ||
146 | distributed under the terms of Sections 1 and 2 above on a medium | ||
147 | customarily used for software interchange; or, | ||
148 | |||
149 | c) Accompany it with the information you received as to the offer | ||
150 | to distribute corresponding source code. (This alternative is | ||
151 | allowed only for noncommercial distribution and only if you | ||
152 | received the program in object code or executable form with such | ||
153 | an offer, in accord with Subsection b above.) | ||
154 | |||
155 | The source code for a work means the preferred form of the work for | ||
156 | making modifications to it. For an executable work, complete source | ||
157 | code means all the source code for all modules it contains, plus any | ||
158 | associated interface definition files, plus the scripts used to | ||
159 | control compilation and installation of the executable. However, as a | ||
160 | special exception, the source code distributed need not include | ||
161 | anything that is normally distributed (in either source or binary | ||
162 | form) with the major components (compiler, kernel, and so on) of the | ||
163 | operating system on which the executable runs, unless that component | ||
164 | itself accompanies the executable. | ||
165 | |||
166 | If distribution of executable or object code is made by offering | ||
167 | access to copy from a designated place, then offering equivalent | ||
168 | access to copy the source code from the same place counts as | ||
169 | distribution of the source code, even though third parties are not | ||
170 | compelled to copy the source along with the object code. | ||
171 | |||
172 | 4. You may not copy, modify, sublicense, or distribute the Program | ||
173 | except as expressly provided under this License. Any attempt | ||
174 | otherwise to copy, modify, sublicense or distribute the Program is | ||
175 | void, and will automatically terminate your rights under this License. | ||
176 | However, parties who have received copies, or rights, from you under | ||
177 | this License will not have their licenses terminated so long as such | ||
178 | parties remain in full compliance. | ||
179 | |||
180 | 5. You are not required to accept this License, since you have not | ||
181 | signed it. However, nothing else grants you permission to modify or | ||
182 | distribute the Program or its derivative works. These actions are | ||
183 | prohibited by law if you do not accept this License. Therefore, by | ||
184 | modifying or distributing the Program (or any work based on the | ||
185 | Program), you indicate your acceptance of this License to do so, and | ||
186 | all its terms and conditions for copying, distributing or modifying | ||
187 | the Program or works based on it. | ||
188 | |||
189 | 6. Each time you redistribute the Program (or any work based on the | ||
190 | Program), the recipient automatically receives a license from the | ||
191 | original licensor to copy, distribute or modify the Program subject to | ||
192 | these terms and conditions. You may not impose any further | ||
193 | restrictions on the recipients' exercise of the rights granted herein. | ||
194 | You are not responsible for enforcing compliance by third parties to | ||
195 | this License. | ||
196 | |||
197 | 7. If, as a consequence of a court judgment or allegation of patent | ||
198 | infringement or for any other reason (not limited to patent issues), | ||
199 | conditions are imposed on you (whether by court order, agreement or | ||
200 | otherwise) that contradict the conditions of this License, they do not | ||
201 | excuse you from the conditions of this License. If you cannot | ||
202 | distribute so as to satisfy simultaneously your obligations under this | ||
203 | License and any other pertinent obligations, then as a consequence you | ||
204 | may not distribute the Program at all. For example, if a patent | ||
205 | license would not permit royalty-free redistribution of the Program by | ||
206 | all those who receive copies directly or indirectly through you, then | ||
207 | the only way you could satisfy both it and this License would be to | ||
208 | refrain entirely from distribution of the Program. | ||
209 | |||
210 | If any portion of this section is held invalid or unenforceable under | ||
211 | any particular circumstance, the balance of the section is intended to | ||
212 | apply and the section as a whole is intended to apply in other | ||
213 | circumstances. | ||
214 | |||
215 | It is not the purpose of this section to induce you to infringe any | ||
216 | patents or other property right claims or to contest validity of any | ||
217 | such claims; this section has the sole purpose of protecting the | ||
218 | integrity of the free software distribution system, which is | ||
219 | implemented by public license practices. Many people have made | ||
220 | generous contributions to the wide range of software distributed | ||
221 | through that system in reliance on consistent application of that | ||
222 | system; it is up to the author/donor to decide if he or she is willing | ||
223 | to distribute software through any other system and a licensee cannot | ||
224 | impose that choice. | ||
225 | |||
226 | This section is intended to make thoroughly clear what is believed to | ||
227 | be a consequence of the rest of this License. | ||
228 | |||
229 | 8. If the distribution and/or use of the Program is restricted in | ||
230 | certain countries either by patents or by copyrighted interfaces, the | ||
231 | original copyright holder who places the Program under this License | ||
232 | may add an explicit geographical distribution limitation excluding | ||
233 | those countries, so that distribution is permitted only in or among | ||
234 | countries not thus excluded. In such case, this License incorporates | ||
235 | the limitation as if written in the body of this License. | ||
236 | |||
237 | 9. The Free Software Foundation may publish revised and/or new versions | ||
238 | of the General Public License from time to time. Such new versions will | ||
239 | be similar in spirit to the present version, but may differ in detail to | ||
240 | address new problems or concerns. | ||
241 | |||
242 | Each version is given a distinguishing version number. If the Program | ||
243 | specifies a version number of this License which applies to it and "any | ||
244 | later version", you have the option of following the terms and conditions | ||
245 | either of that version or of any later version published by the Free | ||
246 | Software Foundation. If the Program does not specify a version number of | ||
247 | this License, you may choose any version ever published by the Free Software | ||
248 | Foundation. | ||
249 | |||
250 | 10. If you wish to incorporate parts of the Program into other free | ||
251 | programs whose distribution conditions are different, write to the author | ||
252 | to ask for permission. For software which is copyrighted by the Free | ||
253 | Software Foundation, write to the Free Software Foundation; we sometimes | ||
254 | make exceptions for this. Our decision will be guided by the two goals | ||
255 | of preserving the free status of all derivatives of our free software and | ||
256 | of promoting the sharing and reuse of software generally. | ||
257 | |||
258 | NO WARRANTY | ||
259 | |||
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY | ||
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN | ||
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES | ||
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED | ||
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS | ||
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE | ||
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, | ||
268 | REPAIR OR CORRECTION. | ||
269 | |||
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING | ||
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR | ||
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, | ||
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING | ||
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED | ||
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY | ||
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER | ||
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE | ||
278 | POSSIBILITY OF SUCH DAMAGES. | ||
279 | |||
280 | END OF TERMS AND CONDITIONS | ||
281 | |||
282 | How to Apply These Terms to Your New Programs | ||
283 | |||
284 | If you develop a new program, and you want it to be of the greatest | ||
285 | possible use to the public, the best way to achieve this is to make it | ||
286 | free software which everyone can redistribute and change under these terms. | ||
287 | |||
288 | To do so, attach the following notices to the program. It is safest | ||
289 | to attach them to the start of each source file to most effectively | ||
290 | convey the exclusion of warranty; and each file should have at least | ||
291 | the "copyright" line and a pointer to where the full notice is found. | ||
292 | |||
293 | <one line to give the program's name and a brief idea of what it does.> | ||
294 | Copyright (C) <year> <name of author> | ||
295 | |||
296 | This program is free software; you can redistribute it and/or modify | ||
297 | it under the terms of the GNU General Public License as published by | ||
298 | the Free Software Foundation; either version 2 of the License, or | ||
299 | (at your option) any later version. | ||
300 | |||
301 | This program is distributed in the hope that it will be useful, | ||
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
304 | GNU General Public License for more details. | ||
305 | |||
306 | You should have received a copy of the GNU General Public License | ||
307 | along with this program; if not, write to the Free Software | ||
308 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
309 | |||
310 | |||
311 | Also add information on how to contact you by electronic and paper mail. | ||
312 | |||
313 | If the program is interactive, make it output a short notice like this | ||
314 | when it starts in an interactive mode: | ||
315 | |||
316 | Gnomovision version 69, Copyright (C) year name of author | ||
317 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. | ||
318 | This is free software, and you are welcome to redistribute it | ||
319 | under certain conditions; type `show c' for details. | ||
320 | |||
321 | The hypothetical commands `show w' and `show c' should show the appropriate | ||
322 | parts of the General Public License. Of course, the commands you use may | ||
323 | be called something other than `show w' and `show c'; they could even be | ||
324 | mouse-clicks or menu items--whatever suits your program. | ||
325 | |||
326 | You should also get your employer (if you work as a programmer) or your | ||
327 | school, if any, to sign a "copyright disclaimer" for the program, if | ||
328 | necessary. Here is a sample; alter the names: | ||
329 | |||
330 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program | ||
331 | `Gnomovision' (which makes passes at compilers) written by James Hacker. | ||
332 | |||
333 | <signature of Ty Coon>, 1 April 1989 | ||
334 | Ty Coon, President of Vice | ||
335 | |||
336 | This General Public License does not permit incorporating your program into | ||
337 | proprietary programs. If your program is a subroutine library, you may | ||
338 | consider it more useful to permit linking proprietary applications with the | ||
339 | library. If this is what you want to do, use the GNU Library General | ||
340 | Public License instead of this License. | ||
diff --git a/xdelta3/ChangeLog b/xdelta3/ChangeLog new file mode 100755 index 0000000..d7bd042 --- /dev/null +++ b/xdelta3/ChangeLog | |||
@@ -0,0 +1,6 @@ | |||
1 | 2006-07-02 Joshua MacDonald <jmacd@google.com> | ||
2 | |||
3 | * xdelta3.c (xd3_iopt_flush_instructions): Fixed a bug in flush_instructions | ||
4 | clears more than half of the instructions, thus encodes the last two, which may | ||
5 | still overlap. | ||
6 | |||
diff --git a/xdelta3/Makefile b/xdelta3/Makefile new file mode 100755 index 0000000..0ef4722 --- /dev/null +++ b/xdelta3/Makefile | |||
@@ -0,0 +1,111 @@ | |||
1 | ## | ||
2 | PYTHON = python | ||
3 | PYTGT = build/temp.linux-i686-2.3/xdelta3.so | ||
4 | |||
5 | TARGETS = xdelta3 xdelta3-64 xdelta3-everything \ | ||
6 | xdelta3-Opg xdelta3-64-O xdelta3-Op xdelta3-O \ | ||
7 | xdelta3-decoder xdelta3-decoder-nomain.o \ | ||
8 | $(PYTGT) \ | ||
9 | xdelta3-nosec.o xdelta3-all.o xdelta3-fgk.o xdelta3-djw.o \ | ||
10 | xdelta3-noext xdelta3-tools xdelta3-tune \ | ||
11 | xdelta3-notools | ||
12 | |||
13 | SOURCES = xdelta3.c xdelta3.h xdelta3-fgk.h xdelta3-djw.h xdelta3-list.h xdelta3-test.h \ | ||
14 | xdelta3-main.h xdelta3-cfgs.h xdelta3-second.h xdelta3-python.h | ||
15 | |||
16 | PYFILES = xdelta3-regtest.py setup.py | ||
17 | |||
18 | EXTRA = Makefile COPYING linkxd3lib.c badcopy.c www RELEASE.NOTES | ||
19 | |||
20 | REL = 0f | ||
21 | RELDIR = xdelta3$(REL) | ||
22 | |||
23 | all: $(TARGETS) | ||
24 | |||
25 | tar: | ||
26 | tar -czf /tmp/$(RELDIR)-tmp.tar.gz $(SOURCES) $(PYFILES) $(EXTRA) | ||
27 | rm -rf /tmp/$(RELDIR) | ||
28 | mkdir /tmp/$(RELDIR) | ||
29 | (cd /tmp/$(RELDIR) && tar -xzf ../$(RELDIR)-tmp.tar.gz) | ||
30 | tar -czf ./$(RELDIR).tar.gz -C /tmp $(RELDIR) | ||
31 | +tar -tzf ./$(RELDIR).tar.gz | ||
32 | rm -rf /tmp/$(RELDIR) | ||
33 | |||
34 | clean: | ||
35 | rm -f $(TARGETS) xdtest.* core *.flc | ||
36 | |||
37 | $(PYTGT): $(SOURCES) | ||
38 | $(PYTHON) setup.py install --compile --force | ||
39 | |||
40 | xdelta3: $(SOURCES) | ||
41 | $(CC) -g -Wall -Wshadow xdelta3.c -o xdelta3 -DXD3_MAIN=1 -DGENERIC_ENCODE_TABLES=1 \ | ||
42 | -DXD3_USE_LARGEFILE64=1 -DREGRESSION_TEST=1 -DXD3_DEBUG=2 -DSECONDARY_DJW=1 -lm | ||
43 | |||
44 | xdelta3-decoder: $(SOURCES) | ||
45 | $(CC) -O2 -Wall -Wshadow xdelta3.c \ | ||
46 | -DXD3_ENCODER=0 -DXD3_MAIN=1 -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 \ | ||
47 | -DXD3_POSIX=0 -DEXTERNAL_COMPRESSION=0 -DVCDIFF_TOOLS=0 \ | ||
48 | -o xdelta3-decoder | ||
49 | strip xdelta3-decoder | ||
50 | |||
51 | xdelta3-decoder-nomain.o: $(SOURCES) linkxd3lib.c | ||
52 | $(CC) -O2 -Wall -Wshadow xdelta3.c linkxd3lib.c \ | ||
53 | -DXD3_ENCODER=0 -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 \ | ||
54 | -o xdelta3-decoder-nomain.o | ||
55 | strip xdelta3-decoder-nomain.o | ||
56 | |||
57 | xdelta3-O: $(SOURCES) | ||
58 | $(CC) -g -O2 -Wall -Wshadow xdelta3.c -o xdelta3-O -DXD3_MAIN=1 -DSECONDARY_DJW=1 -DREGRESSION_TEST=1 -lm | ||
59 | |||
60 | xdelta3-O++: $(SOURCES) | ||
61 | $(CXX) -g -O2 -Wall -Wshadow xdelta3.c -o xdelta3-O++ -DXD3_MAIN=1 -DSECONDARY_DJW=1 -DREGRESSION_TEST=1 -lm | ||
62 | |||
63 | xdelta3-Op: $(SOURCES) | ||
64 | $(CC) -g -O2 -Wall -Wshadow xdelta3.c -o xdelta3-Op -DXD3_POSIX=1 -DXD3_MAIN=1 -DREGRESSION_TEST=1 -lm | ||
65 | |||
66 | xdelta3-64: $(SOURCES) | ||
67 | $(CC) -g -Wall -Wshadow xdelta3.c -o xdelta3-64 -DXD3_POSIX=1 -DXD3_MAIN=1 -DREGRESSION_TEST=1 \ | ||
68 | -DXD3_DEBUG=0 -DXD3_USE_LARGEFILE64=1 -lm | ||
69 | |||
70 | xdelta3-64-O: $(SOURCES) | ||
71 | $(CC) -O2 -Wall -Wshadow xdelta3.c -o xdelta3-64-O -DXD3_POSIX=1 -DXD3_MAIN=1 \ | ||
72 | -DXD3_USE_LARGEFILE64=1 -lm | ||
73 | |||
74 | xdelta3-everything: $(SOURCES) | ||
75 | $(CC) -g -Wall -Wshadow xdelta3.c -o xdelta3-everything \ | ||
76 | -DXD3_MAIN=1 -DVCDIFF_TOOLS=1 -DREGRESSION_TEST=1 \ | ||
77 | -DSECONDARY_FGK=1 -DSECONDARY_DJW=1 \ | ||
78 | -DGENERIC_ENCODE_TABLES=1 \ | ||
79 | -DGENERIC_ENCODE_TABLES_COMPUTE=1 \ | ||
80 | -DXD3_POSIX=1 \ | ||
81 | -DEXTERNAL_COMPRESSION=1 \ | ||
82 | -DXD3_DEBUG=1 -lm | ||
83 | |||
84 | xdelta3-tune: $(SOURCES) | ||
85 | $(CC) -O2 -Wall -Wshadow xdelta3.c -o xdelta3-tune -DXD3_MAIN=1 \ | ||
86 | -DSECONDARY_FGK=1 -DSECONDARY_DJW=1 -DTUNE_HUFFMAN=1 | ||
87 | |||
88 | xdelta3-Opg: $(SOURCES) | ||
89 | $(CC) -pg -g -O3 -Wall -Wshadow xdelta3.c -o xdelta3-Opg -DXD3_MAIN=1 \ | ||
90 | -DSECONDARY_DJW=1 -DXD3_POSIX=1 -DXD3_USE_LARGEFILE64=1 | ||
91 | |||
92 | xdelta3-nosec.o: $(SOURCES) | ||
93 | $(CC) -O2 -Wall -Wshadow -c xdelta3.c -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 -o xdelta3-nosec.o | ||
94 | |||
95 | xdelta3-all.o: $(SOURCES) | ||
96 | $(CC) -O2 -Wall -Wshadow -c xdelta3.c -DSECONDARY_FGK=1 -DSECONDARY_DJW=1 -o xdelta3-all.o | ||
97 | |||
98 | xdelta3-fgk.o: $(SOURCES) | ||
99 | $(CC) -O2 -Wall -Wshadow -c xdelta3.c -DSECONDARY_FGK=1 -DSECONDARY_DJW=0 -o xdelta3-fgk.o | ||
100 | |||
101 | xdelta3-djw.o: $(SOURCES) | ||
102 | $(CC) -O2 -Wall -Wshadow -c xdelta3.c -DSECONDARY_FGK=0 -DSECONDARY_DJW=1 -o xdelta3-djw.o | ||
103 | |||
104 | xdelta3-noext: $(SOURCES) | ||
105 | $(CC) -O2 -Wall -Wshadow xdelta3.c -DXD3_MAIN=1 -DEXTERNAL_COMPRESSION=0 -o xdelta3-noext | ||
106 | |||
107 | xdelta3-tools: $(SOURCES) | ||
108 | $(CC) -O2 -Wall -Wshadow xdelta3.c -DXD3_MAIN=1 -o xdelta3-tools | ||
109 | |||
110 | xdelta3-notools: $(SOURCES) | ||
111 | $(CC) -O2 -Wall -Wshadow xdelta3.c -DXD3_MAIN=1 -DVCDIFF_TOOLS=0 -o xdelta3-notools | ||
diff --git a/xdelta3/RELEASE.NOTES b/xdelta3/RELEASE.NOTES new file mode 100755 index 0000000..a4af327 --- /dev/null +++ b/xdelta3/RELEASE.NOTES | |||
@@ -0,0 +1,3 @@ | |||
1 | 2006-05-13 Joshua MacDonald <joshua.macdonald@gmail.com> | ||
2 | |||
3 | * xdelta 3.0e: Performance and bug fixes. | ||
diff --git a/xdelta3/badcopy.c b/xdelta3/badcopy.c new file mode 100755 index 0000000..c42e2b5 --- /dev/null +++ b/xdelta3/badcopy.c | |||
@@ -0,0 +1,111 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <stdlib.h> | ||
3 | #include <math.h> | ||
4 | |||
5 | #define BUFSZ (1 << 22) | ||
6 | |||
7 | typedef unsigned int usize_t; | ||
8 | |||
9 | double error_prob = 0.0001; | ||
10 | usize_t mean_change = 100; | ||
11 | usize_t total_change = 0; | ||
12 | usize_t total_size = 0; | ||
13 | usize_t max_change = 0; | ||
14 | usize_t num_change = 0; | ||
15 | |||
16 | int last_end = 0; | ||
17 | |||
18 | static int | ||
19 | edist (usize_t mean, usize_t max) | ||
20 | { | ||
21 | double mean_d = mean; | ||
22 | double erand = log (1.0 / drand48 ()); | ||
23 | usize_t x = (usize_t) (mean_d * erand + 0.5); | ||
24 | |||
25 | return (x < max) ? (x > 0 ? x : 1) : max; | ||
26 | } | ||
27 | |||
28 | void modify (char *buf, usize_t size) | ||
29 | { | ||
30 | usize_t bufpos = 0, j; | ||
31 | |||
32 | last_end = 0; | ||
33 | |||
34 | for (;; /* bufpos and j are incremented in the inner loop */) | ||
35 | { | ||
36 | /* The size of the next modification. */ | ||
37 | usize_t next_size = edist (mean_change, 1 << 31); | ||
38 | /* The expected interval of such a change. */ | ||
39 | double expect_interval = ((double) next_size * (1.0 - error_prob)) / error_prob; | ||
40 | /* The number of bytes until the next modification. */ | ||
41 | usize_t next_mod = edist (expect_interval, 1 << 31); | ||
42 | |||
43 | if (next_size + next_mod + bufpos > size) { break; } | ||
44 | |||
45 | if (max_change < next_size) { max_change = next_size; } | ||
46 | |||
47 | bufpos += next_mod; | ||
48 | |||
49 | fprintf (stderr, "COPY: %u-%u (%u)\n", total_size + last_end, total_size + bufpos, bufpos - last_end); | ||
50 | |||
51 | fprintf (stderr, "ADD: %u-%u (%u) is change %u\n", total_size + bufpos , total_size + bufpos + next_size, next_size, num_change); | ||
52 | |||
53 | total_change += next_size; | ||
54 | num_change += 1; | ||
55 | |||
56 | for (j = 0; j < next_size; j += 1, bufpos += 1) | ||
57 | { | ||
58 | buf[bufpos] = lrand48 () >> 3; | ||
59 | } | ||
60 | |||
61 | last_end = bufpos; | ||
62 | } | ||
63 | |||
64 | fprintf (stderr, "COPY: %u-%u (%u)\n", total_size + last_end, total_size + size, size - last_end); | ||
65 | |||
66 | total_size += size; | ||
67 | } | ||
68 | |||
69 | int main(int argc, char **argv) | ||
70 | { | ||
71 | char buf[BUFSZ]; | ||
72 | int c, ret; | ||
73 | |||
74 | if (argc > 3) | ||
75 | { | ||
76 | fprintf (stderr, "usage: badcopy [byte_error_prob [mean_error_size]]\n"); | ||
77 | return 1; | ||
78 | } | ||
79 | |||
80 | if (argc > 2) { mean_change = atoi (argv[2]); } | ||
81 | if (argc > 1) { error_prob = atof (argv[1]); } | ||
82 | |||
83 | if (error_prob < 0.0 || error_prob > 1.0) | ||
84 | { | ||
85 | fprintf (stderr, "warning: error probability out of range\n"); | ||
86 | return 1; | ||
87 | } | ||
88 | |||
89 | do | ||
90 | { | ||
91 | c = fread (buf, 1, BUFSZ, stdin); | ||
92 | |||
93 | if (c == 0) { break; } | ||
94 | |||
95 | modify (buf, c); | ||
96 | |||
97 | ret = fwrite (buf, 1, c, stdout); | ||
98 | } | ||
99 | while (c == BUFSZ); | ||
100 | |||
101 | if ((ret = fclose (stdout))) | ||
102 | { | ||
103 | perror ("fclose"); | ||
104 | return 1; | ||
105 | } | ||
106 | |||
107 | fprintf (stderr, "add_prob %f; %u adds; total_change %u of %u bytes; add percentage %f; max add size %u\n", | ||
108 | error_prob, num_change, total_change, total_size, (double) total_change / (double) total_size, max_change); | ||
109 | |||
110 | return 0; | ||
111 | } | ||
diff --git a/xdelta3/draft-korn-vcdiff.txt b/xdelta3/draft-korn-vcdiff.txt new file mode 100755 index 0000000..1487deb --- /dev/null +++ b/xdelta3/draft-korn-vcdiff.txt | |||
@@ -0,0 +1,1322 @@ | |||
1 | David G. Korn, AT&T Labs | ||
2 | Joshua P. MacDonald, UC Berkeley | ||
3 | Jeffrey C. Mogul, Compaq WRL | ||
4 | Internet-Draft Kiem-Phong Vo, AT&T Labs | ||
5 | Expires: 09 November 2002 09 November 2001 | ||
6 | |||
7 | |||
8 | The VCDIFF Generic Differencing and Compression Data Format | ||
9 | |||
10 | draft-korn-vcdiff-06.txt | ||
11 | |||
12 | |||
13 | |||
14 | Status of this Memo | ||
15 | |||
16 | This document is an Internet-Draft and is in full conformance | ||
17 | with all provisions of Section 10 of RFC2026. | ||
18 | |||
19 | Internet-Drafts are working documents of the Internet Engineering | ||
20 | Task Force (IETF), its areas, and its working groups. Note that | ||
21 | other groups may also distribute working documents as | ||
22 | Internet-Drafts. | ||
23 | |||
24 | Internet-Drafts are draft documents valid for a maximum of six | ||
25 | months and may be updated, replaced, or obsoleted by other | ||
26 | documents at any time. It is inappropriate to use Internet- | ||
27 | Drafts as reference material or to cite them other than as | ||
28 | "work in progress." | ||
29 | |||
30 | The list of current Internet-Drafts can be accessed at | ||
31 | http://www.ietf.org/ietf/1id-abstracts.txt | ||
32 | |||
33 | The list of Internet-Draft Shadow Directories can be accessed at | ||
34 | http://www.ietf.org/shadow.html. | ||
35 | |||
36 | |||
37 | Abstract | ||
38 | |||
39 | This memo describes a general, efficient and portable data format | ||
40 | suitable for encoding compressed and/or differencing data so that | ||
41 | they can be easily transported among computers. | ||
42 | |||
43 | |||
44 | Table of Contents: | ||
45 | |||
46 | 1. EXECUTIVE SUMMARY ............................................ 2 | ||
47 | 2. CONVENTIONS .................................................. 3 | ||
48 | 3. DELTA INSTRUCTIONS ........................................... 4 | ||
49 | 4. DELTA FILE ORGANIZATION ...................................... 5 | ||
50 | 5. DELTA INSTRUCTION ENCODING ................................... 9 | ||
51 | 6. DECODING A TARGET WINDOW ..................................... 14 | ||
52 | 7. APPLICATION-DEFINED CODE TABLES .............................. 16 | ||
53 | 8. PERFORMANCE .................................................. 16 | ||
54 | 9. FURTHER ISSUES ............................................... 17 | ||
55 | 10. SUMMARY ...................................................... 18 | ||
56 | 11. ACKNOWLEDGEMENTS ............................................. 18 | ||
57 | 12. SECURITY CONSIDERATIONS ...................................... 18 | ||
58 | 13. SOURCE CODE AVAILABILITY ..................................... 18 | ||
59 | 14. INTELLECTUAL PROPERTY RIGHTS ................................. 18 | ||
60 | 15. IANA CONSIDERATIONS .......................................... 19 | ||
61 | 16. REFERENCES ................................................... 19 | ||
62 | 17. AUTHOR'S ADDRESS ............................................. 20 | ||
63 | |||
64 | |||
65 | 1. EXECUTIVE SUMMARY | ||
66 | |||
67 | Compression and differencing techniques can greatly improve storage | ||
68 | and transmission of files and file versions. Since files are often | ||
69 | transported across machines with distinct architectures and performance | ||
70 | characteristics, such data should be encoded in a form that is portable | ||
71 | and can be decoded with little or no knowledge of the encoders. | ||
72 | This document describes Vcdiff, a compact portable encoding format | ||
73 | designed for these purposes. | ||
74 | |||
75 | Data differencing is the process of computing a compact and invertible | ||
76 | encoding of a "target file" given a "source file". Data compression | ||
77 | is similar but without the use of source data. The UNIX utilities diff, | ||
78 | compress, and gzip are well-known examples of data differencing and | ||
79 | compression tools. For data differencing, the computed encoding is | ||
80 | called a "delta file", and, for data compression, it is called | ||
81 | a "compressed file". Delta and compressed files are good for storage | ||
82 | and transmission as they are often smaller than the originals. | ||
83 | |||
84 | Data differencing and data compression are traditionally treated | ||
85 | as distinct types of data processing. However, as shown in the Vdelta | ||
86 | technique by Korn and Vo [1], compression can be thought of as a special | ||
87 | case of differencing in which the source data is empty. The basic idea | ||
88 | is to unify the string parsing scheme used in the Lempel-Ziv'77 style | ||
89 | compressors [2], and the block-move technique of Tichy [3]. Loosely | ||
90 | speaking, this works as follows: | ||
91 | |||
92 | a. Concatenate source and target data. | ||
93 | b. Parse the data from left to right as in LZ'77 but | ||
94 | make sure that a parsed segment starts the target data. | ||
95 | c. Start to output when reaching target data. | ||
96 | |||
97 | Parsing is based on string matching algorithms such as suffix trees [4] | ||
98 | or hashing with different time and space performance characteristics. | ||
99 | Vdelta uses a fast string matching algorithm that requires less memory | ||
100 | than other techniques [5,6]. However, even with this algorithm, the | ||
101 | memory requirement can still be prohibitive for large files. A common | ||
102 | way to deal with memory limitation is to partition an input file into | ||
103 | chunks called "windows" and process them separately. Here, except for | ||
104 | unpublished work by Vo, little has been done on designing effective | ||
105 | windowing schemes. Current techniques, including Vdelta, simply use | ||
106 | source and target windows with corresponding addresses across source | ||
107 | and target files. | ||
108 | |||
109 | String matching and windowing algorithms have large influence on the | ||
110 | compression rate of delta and compressed files. However, it is desirable | ||
111 | to have a portable encoding format that is independent of such algorithms. | ||
112 | This enables construction of client-server applications in which a server | ||
113 | may serve clients with unknown computing characteristics. Unfortunately, | ||
114 | all current differencing and compressing tools, including Vdelta, fall | ||
115 | short in this respect. Their storage formats are closely intertwined | ||
116 | with the implemented string matching and/or windowing algorithms. | ||
117 | |||
118 | The encoding format Vcdiff proposed here addresses the above issues. | ||
119 | Vcdiff achieves the below characteristics: | ||
120 | |||
121 | Output compactness: | ||
122 | The basic encoding format compactly represents compressed or delta | ||
123 | files. Applications can further extend the basic encoding format | ||
124 | with "secondary encoders" to achieve more compression. | ||
125 | |||
126 | Data portability: | ||
127 | The basic encoding format is free from machine byte order and | ||
128 | word size issues. This allows data to be encoded on one machine | ||
129 | and decoded on a different machine with different architecture. | ||
130 | |||
131 | Algorithm genericity: | ||
132 | The decoding algorithm is independent from string matching and | ||
133 | windowing algorithms. This allows competition among implementations | ||
134 | of the encoder while keeping the same decoder. | ||
135 | |||
136 | Decoding efficiency: | ||
137 | Except for secondary encoder issues, the decoding algorithm runs | ||
138 | in time proportional to the size of the target file and uses space | ||
139 | proportional to the maximal window size. Vcdiff differs from more | ||
140 | conventional compressors in that it uses only byte-aligned | ||
141 | data, thus avoiding bit-level operations, which improves | ||
142 | decoding speed at the slight cost of compression efficiency. | ||
143 | |||
144 | The Vcdiff data format and the algorithms for decoding data shall be | ||
145 | described next. Since Vcdiff treats compression as a special case of | ||
146 | differencing, we shall use the term "delta file" to indicate the | ||
147 | compressed output for both cases. | ||
148 | |||
149 | |||
150 | 2. CONVENTIONS | ||
151 | |||
152 | The basic data unit is a byte. For portability, Vcdiff shall limit | ||
153 | a byte to its lower eight bits even on machines with larger bytes. | ||
154 | The bits in a byte are ordered from right to left so that the least | ||
155 | significant bit (LSB) has value 1, and the most significant bit (MSB), | ||
156 | has value 128. | ||
157 | |||
158 | For purposes of exposition in this document, we adopt the convention | ||
159 | that the LSB is numbered 0, and the MSB is numbered 7. Bit numbers | ||
160 | never appear in the encoded format itself. | ||
161 | |||
162 | Vcdiff encodes unsigned integer values using a portable variable-sized | ||
163 | format (originally introduced in the Sfio library [7]). This encoding | ||
164 | treats an integer as a number in base 128. Then, each digit in this | ||
165 | representation is encoded in the lower seven bits of a byte. Except for | ||
166 | the least significant byte, other bytes have their most significant bit | ||
167 | turned on to indicate that there are still more digits in the encoding. | ||
168 | The two key properties of this integer encoding that are beneficial | ||
169 | to a data compression format are: | ||
170 | |||
171 | a. The encoding is portable among systems using 8-bit bytes, and | ||
172 | b. Small values are encoded compactly. | ||
173 | |||
174 | For example, consider the value 123456789 which can be represented with | ||
175 | four 7-bit digits whose values are 58, 111, 26, 21 in order from most | ||
176 | to least significant. Below is the 8-bit byte encoding of these digits. | ||
177 | Note that the MSBs of 58, 111 and 26 are on. | ||
178 | |||
179 | +-------------------------------------------+ | ||
180 | | 10111010 | 11101111 | 10011010 | 00010101 | | ||
181 | +-------------------------------------------+ | ||
182 | MSB+58 MSB+111 MSB+26 0+21 | ||
183 | |||
184 | |||
185 | Henceforth, the terms "byte" and "integer" will refer to a byte and an | ||
186 | unsigned integer as described. | ||
187 | |||
188 | |||
189 | From time to time, algorithms are exhibited to clarify the descriptions | ||
190 | of parts of the Vcdiff format. On such occasions, the C language will be | ||
191 | used to make precise the algorithms. The C code shown in this | ||
192 | document is meant for clarification only, and is not part of the | ||
193 | actual specification of the Vcdiff format. | ||
194 | |||
195 | In this specification, the key words "MUST", "MUST NOT", | ||
196 | "SHOULD", "SHOULD NOT", and "MAY" document are to be interpreted as | ||
197 | described in RFC2119 [12]. | ||
198 | |||
199 | |||
200 | 3. DELTA INSTRUCTIONS | ||
201 | |||
202 | A large target file is partitioned into non-overlapping sections | ||
203 | called "target windows". These target windows are processed separately | ||
204 | and sequentially based on their order in the target file. | ||
205 | |||
206 | A target window T of length t may be compared against some source data | ||
207 | segment S of length s. By construction, this source data segment S | ||
208 | comes either from the source file, if one is used, or from a part of | ||
209 | the target file earlier than T. In this way, during decoding, S is | ||
210 | completely known when T is being decoded. | ||
211 | |||
212 | The choices of T, t, S and s are made by some window selection algorithm | ||
213 | which can greatly affect the size of the encoding. However, as seen later, | ||
214 | these choices are encoded so that no knowledge of the window selection | ||
215 | algorithm is needed during decoding. | ||
216 | |||
217 | Assume that S[j] represents the jth byte in S, and T[k] represents | ||
218 | the kth byte in T. Then, for the delta instructions, we treat the data | ||
219 | windows S and T as substrings of a superstring U formed by concatenating | ||
220 | them like this: | ||
221 | |||
222 | S[0]S[1]...S[s-1]T[0]T[1]...T[t-1] | ||
223 | |||
224 | The "address" of a byte in S or T is referred to by its location in U. | ||
225 | For example, the address of T[k] is s+k. | ||
226 | |||
227 | The instructions to encode and direct the reconstruction of a target | ||
228 | window are called delta instructions. There are three types: | ||
229 | |||
230 | ADD: This instruction has two arguments, a size x and a sequence of | ||
231 | x bytes to be copied. | ||
232 | COPY: This instruction has two arguments, a size x and an address p | ||
233 | in the string U. The arguments specify the substring of U that | ||
234 | must be copied. We shall assert that such a substring must be | ||
235 | entirely contained in either S or T. | ||
236 | RUN: This instruction has two arguments, a size x and a byte b that | ||
237 | will be repeated x times. | ||
238 | |||
239 | Below are example source and target windows and the delta instructions | ||
240 | that encode the target window in terms of the source window. | ||
241 | |||
242 | a b c d e f g h i j k l m n o p | ||
243 | a b c d w x y z e f g h e f g h e f g h e f g h z z z z | ||
244 | |||
245 | COPY 4, 0 | ||
246 | ADD 4, w x y z | ||
247 | COPY 4, 4 | ||
248 | COPY 12, 24 | ||
249 | RUN 4, z | ||
250 | |||
251 | |||
252 | Thus, the first letter 'a' in the target window is at location 16 | ||
253 | in the superstring. Note that the fourth instruction, "COPY 12, 24", | ||
254 | copies data from T itself since address 24 is position 8 in T. | ||
255 | This instruction also shows that it is fine to overlap the data to be | ||
256 | copied with the data being copied from as long as the latter starts | ||
257 | earlier. This enables efficient encoding of periodic sequences, | ||
258 | i.e., sequences with regularly repeated subsequences. The RUN instruction | ||
259 | is a compact way to encode a sequence repeating the same byte even though | ||
260 | such a sequence can be thought of as a periodic sequence with period 1. | ||
261 | |||
262 | To reconstruct the target window, one simply processes one delta | ||
263 | instruction at a time and copy the data either from the source window | ||
264 | or the being reconstructed target window based on the type of the | ||
265 | instruction and the associated address, if any. | ||
266 | |||
267 | |||
268 | 4. DELTA FILE ORGANIZATION | ||
269 | |||
270 | A Vcdiff delta file starts with a Header section followed by a sequence | ||
271 | of Window sections. The Header section includes magic bytes to identify | ||
272 | the file type, and information concerning data processing beyond the | ||
273 | basic encoding format. The Window sections encode the target windows. | ||
274 | |||
275 | Below is the overall organization of a delta file. The indented items | ||
276 | refine the ones immediately above them. An item in square brackets may | ||
277 | or may not be present in the file depending on the information encoded | ||
278 | in the Indicator byte above it. | ||
279 | |||
280 | Header | ||
281 | Header1 - byte | ||
282 | Header2 - byte | ||
283 | Header3 - byte | ||
284 | Header4 - byte | ||
285 | Hdr_Indicator - byte | ||
286 | [Secondary compressor ID] - byte | ||
287 | |||
288 | [@@@ Why is compressor ID not an integer? ] | ||
289 | [@@@ If we aren't defining any secondary compressors yet, then it seems | ||
290 | that defining the [Secondary compressor ID] and the corresponding | ||
291 | VCD_DECOMPRESS Hdr_Indicator bit in this draft has no real value. An | ||
292 | implementation of this specification won't be able to decode a VCDIFF | ||
293 | encoded with this option if it doesn't know about any secondary | ||
294 | compressors. It seems that you should specify the bits related to | ||
295 | secondary compressors once you have defined the first a secondary | ||
296 | compressor. I can imagine a secondary-compressor might want to supply | ||
297 | extra information, such as a dictionary of some kind, in which case | ||
298 | this speculative treatment wouldn't go far enough.] | ||
299 | |||
300 | [Length of code table data] - integer | ||
301 | [Code table data] | ||
302 | Size of near cache - byte | ||
303 | Size of same cache - byte | ||
304 | Compressed code table data | ||
305 | Window1 | ||
306 | Win_Indicator - byte | ||
307 | [Source segment size] - integer | ||
308 | [Source segment position] - integer | ||
309 | The delta encoding of the target window | ||
310 | Length of the delta encoding - integer | ||
311 | The delta encoding | ||
312 | Size of the target window - integer | ||
313 | Delta_Indicator - byte | ||
314 | Length of data for ADDs and RUNs - integer | ||
315 | Length of instructions and sizes - integer | ||
316 | Length of addresses for COPYs - integer | ||
317 | Data section for ADDs and RUNs - array of bytes | ||
318 | Instructions and sizes section - array of bytes | ||
319 | Addresses section for COPYs - array of bytes | ||
320 | Window2 | ||
321 | ... | ||
322 | |||
323 | |||
324 | |||
325 | 4.1 The Header Section | ||
326 | |||
327 | Each delta file starts with a header section organized as below. | ||
328 | Note the convention that square-brackets enclose optional items. | ||
329 | |||
330 | Header1 - byte = 0xE6 | ||
331 | Header2 - byte = 0xD3 | ||
332 | Header3 - byte = 0xD4 | ||
333 | |||
334 | HMMM | ||
335 | |||
336 | 0xD6 | ||
337 | 0xC3 | ||
338 | 0xC4 | ||
339 | |||
340 | Header4 - byte | ||
341 | Hdr_Indicator - byte | ||
342 | [Secondary compressor ID] - byte | ||
343 | [Length of code table data] - integer | ||
344 | [Code table data] | ||
345 | |||
346 | The first three Header bytes are the ASCII characters 'V', 'C' and 'D' | ||
347 | with their most significant bits turned on (in hexadecimal, the values | ||
348 | are 0xE6, 0xD3, and 0xD4). The fourth Header byte is currently set to | ||
349 | zero. In the future, it might be used to indicate the version of Vcdiff. | ||
350 | |||
351 | The Hdr_Indicator byte shows if there are any initialization data | ||
352 | required to aid in the reconstruction of data in the Window sections. | ||
353 | This byte MAY have non-zero values for either, both, or neither of | ||
354 | the two bits VCD_DECOMPRESS and VCD_CODETABLE below: | ||
355 | |||
356 | 7 6 5 4 3 2 1 0 | ||
357 | +-+-+-+-+-+-+-+-+ | ||
358 | | | | | | | | | | | ||
359 | +-+-+-+-+-+-+-+-+ | ||
360 | ^ ^ | ||
361 | | | | ||
362 | | +-- VCD_DECOMPRESS | ||
363 | +---- VCD_CODETABLE | ||
364 | |||
365 | If bit 0 (VCD_DECOMPRESS) is non-zero, this indicates that a secondary | ||
366 | compressor may have been used to further compress certain parts of the | ||
367 | delta encoding data as described in Sections 4.3 and 6. In that case, | ||
368 | the ID of the secondary compressor is given next. If this bit is zero, | ||
369 | the compressor ID byte is not included. | ||
370 | |||
371 | [@@@ If we aren't defining any secondary compressors yet, then it seems | ||
372 | this bit has no real value yet..] | ||
373 | |||
374 | If bit 1 (VCD_CODETABLE) is non-zero, this indicates that an | ||
375 | application-defined code table is to be used for decoding the delta | ||
376 | instructions. This table itself is compressed. The length of the data | ||
377 | comprising this compressed code table and the data follow next. Section 7 | ||
378 | discusses application-defined code tables. If this bit is zero, the code | ||
379 | table data length and the code table data are not included. | ||
380 | |||
381 | If both bits are set, then the compressor ID byte is included | ||
382 | before the code table data length and the code table data. | ||
383 | |||
384 | |||
385 | 4.2 The Format of a Window Section | ||
386 | |||
387 | Each Window section is organized as follows: | ||
388 | |||
389 | Win_Indicator - byte | ||
390 | [Source segment length] - integer | ||
391 | [Source segment position] - integer | ||
392 | The delta encoding of the target window | ||
393 | |||
394 | |||
395 | Below are the detail of the various items: | ||
396 | |||
397 | [@@@ Here, I want to replace the Win_Indicator with a source-count, | ||
398 | followed by source-count length/position pairs?] | ||
399 | |||
400 | Win_Indicator: | ||
401 | This byte is a set of bits, as shown: | ||
402 | |||
403 | 7 6 5 4 3 2 1 0 | ||
404 | +-+-+-+-+-+-+-+-+ | ||
405 | | | | | | | | | | | ||
406 | +-+-+-+-+-+-+-+-+ | ||
407 | ^ ^ | ||
408 | | | | ||
409 | | +-- VCD_SOURCE | ||
410 | +---- VCD_TARGET | ||
411 | |||
412 | |||
413 | If bit 0 (VCD_SOURCE) is non-zero, this indicates that a segment | ||
414 | of data from the "source" file was used as the corresponding | ||
415 | source window of data to encode the target window. The decoder | ||
416 | will use this same source data segment to decode the target window. | ||
417 | |||
418 | If bit 1 (VCD_TARGET) is non-zero, this indicates that a segment | ||
419 | of data from the "target" file was used as the corresponding | ||
420 | source window of data to encode the target window. As above, this | ||
421 | same source data segment is used to decode the target window. | ||
422 | |||
423 | The Win_Indicator byte MUST NOT have more than one of the bits | ||
424 | set (non-zero). It MAY have none of these bits set. | ||
425 | |||
426 | If one of these bits is set, the byte is followed by two | ||
427 | integers to indicate respectively the length and position of | ||
428 | the source data segment in the relevant file. If the | ||
429 | indicator byte is zero, the target window was compressed | ||
430 | by itself without comparing against another data segment, | ||
431 | and these two integers are not included. | ||
432 | |||
433 | The delta encoding of the target window: | ||
434 | This contains the delta encoding of the target window either | ||
435 | in terms of the source data segment (i.e., VCD_SOURCE | ||
436 | or VCD_TARGET was set) or by itself if no source window | ||
437 | is specified. This data format is discussed next. | ||
438 | |||
439 | |||
440 | 4.3 The Delta Encoding of a Target Window | ||
441 | |||
442 | The delta encoding of a target window is organized as follows: | ||
443 | |||
444 | Length of the delta encoding - integer | ||
445 | The delta encoding | ||
446 | Length of the target window - integer | ||
447 | Delta_Indicator - byte | ||
448 | Length of data for ADDs and RUNs - integer | ||
449 | Length of instructions section - integer | ||
450 | Length of addresses for COPYs - integer | ||
451 | Data section for ADDs and RUNs - array of bytes | ||
452 | Instructions and sizes section - array of bytes | ||
453 | Addresses section for COPYs - array of bytes | ||
454 | |||
455 | |||
456 | Length of the delta encoding: | ||
457 | This integer gives the total number of remaining bytes that | ||
458 | comprise data of the delta encoding for this target window. | ||
459 | |||
460 | The delta encoding: | ||
461 | This contains the data representing the delta encoding which | ||
462 | is described next. | ||
463 | |||
464 | Length of the target window: | ||
465 | This integer indicates the actual size of the target window | ||
466 | after decompression. A decoder can use this value to allocate | ||
467 | memory to store the uncompressed data. | ||
468 | |||
469 | Delta_Indicator: | ||
470 | This byte is a set of bits, as shown: | ||
471 | |||
472 | 7 6 5 4 3 2 1 0 | ||
473 | +-+-+-+-+-+-+-+-+ | ||
474 | | | | | | | | | | | ||
475 | +-+-+-+-+-+-+-+-+ | ||
476 | ^ ^ ^ | ||
477 | | | | | ||
478 | | | +-- VCD_DATACOMP | ||
479 | | +---- VCD_INSTCOMP | ||
480 | +------ VCD_ADDRCOMP | ||
481 | |||
482 | VCD_DATACOMP: bit value 1. | ||
483 | VCD_INSTCOMP: bit value 2. | ||
484 | VCD_ADDRCOMP: bit value 4. | ||
485 | |||
486 | As discussed, the delta encoding consists of COPY, ADD and RUN | ||
487 | instructions. The ADD and RUN instructions have accompanying | ||
488 | unmatched data (that is, data that does not specifically match | ||
489 | any data in the source window or in some earlier part of the | ||
490 | target window) and the COPY instructions have addresses of where | ||
491 | the matches occur. OPTIONALLY, these types of data MAY be further | ||
492 | compressed using a secondary compressor. Thus, Vcdiff separates | ||
493 | the encoding of the delta instructions into three parts: | ||
494 | |||
495 | a. The unmatched data in the ADD and RUN instructions, | ||
496 | b. The delta instructions and accompanying sizes, and | ||
497 | c. The addresses of the COPY instructions. | ||
498 | |||
499 | If the bit VCD_DECOMPRESS (Section 4.1) was on, each of these | ||
500 | sections may have been compressed using the specified secondary | ||
501 | compressor. The bit positions 0 (VCD_DATACOMP), 1 (VCD_INSTCOMP), | ||
502 | and 2 (VCD_ADDRCOMP) respectively indicate, if non-zero, that | ||
503 | the corresponding parts are compressed. Then, these parts MUST | ||
504 | be decompressed before decoding the delta instructions. | ||
505 | |||
506 | Length of data for ADDs and RUNs: | ||
507 | This is the length (in bytes) of the section of data storing | ||
508 | the unmatched data accompanying the ADD and RUN instructions. | ||
509 | |||
510 | Length of instructions section: | ||
511 | This is the length (in bytes) of the delta instructions and | ||
512 | accompanying sizes. | ||
513 | |||
514 | Length of addresses for COPYs: | ||
515 | This is the length (in bytes) of the section storing | ||
516 | the addresses of the COPY instructions. | ||
517 | |||
518 | Data section for ADDs and RUNs: | ||
519 | This sequence of bytes encodes the unmatched data for the ADD | ||
520 | and RUN instructions. | ||
521 | |||
522 | Instructions and sizes section: | ||
523 | This sequence of bytes encodes the instructions and their sizes. | ||
524 | |||
525 | Addresses section for COPYs: | ||
526 | This sequence of bytes encodes the addresses of the COPY | ||
527 | instructions. | ||
528 | |||
529 | |||
530 | 5. DELTA INSTRUCTION ENCODING | ||
531 | |||
532 | The delta instructions described in Section 3 represent the results of | ||
533 | string matching. For many data differencing applications in which the | ||
534 | changes between source and target data are small, any straightforward | ||
535 | representation of these instructions would be adequate. However, for | ||
536 | applications including data compression, it is important to encode | ||
537 | these instructions well to achieve good compression rates. From our | ||
538 | experience, the following observations can be made: | ||
539 | |||
540 | a. The addresses in COPY instructions are locations of matches and | ||
541 | often occur close by or even exactly equal to one another. This is | ||
542 | because data in local regions are often replicated with minor changes. | ||
543 | In turn, this means that coding a newly matched address against some | ||
544 | set of recently matched addresses can be beneficial. | ||
545 | |||
546 | b. The matches are often short in length and separated by small amounts | ||
547 | of unmatched data. That is, the lengths of COPY and ADD instructions | ||
548 | are often small. This is particularly true of binary data such as | ||
549 | executable files or structured data such as HTML or XML. In such cases, | ||
550 | compression can be improved by combining the encoding of the sizes | ||
551 | and the instruction types as well as combining the encoding of adjacent | ||
552 | delta instructions with sufficiently small data sizes. | ||
553 | |||
554 | The below subsections discuss how the Vcdiff data format provides | ||
555 | mechanisms enabling encoders to use the above observations to improve | ||
556 | compression rates. | ||
557 | |||
558 | |||
559 | 5.1 Address Encoding Modes of COPY Instructions | ||
560 | |||
561 | As mentioned earlier, addresses of COPY instructions often occur close | ||
562 | to one another or are exactly equal. To take advantage of this phenomenon | ||
563 | and encode addresses of COPY instructions more efficiently, the Vcdiff | ||
564 | data format supports the use of two different types of address caches. | ||
565 | Both the encoder and decoder maintain these caches, so that decoder's | ||
566 | caches remain synchronized with the encoder's caches. | ||
567 | |||
568 | a. A "near" cache is an array with "s_near" slots, each containing an | ||
569 | address used for encoding addresses nearby to previously encoded | ||
570 | addresses (in the positive direction only). The near cache also | ||
571 | maintains a "next_slot" index to the near cache. New entries to the | ||
572 | near cache are always inserted in the next_slot index, which maintains | ||
573 | a circular buffer of the s_near most recent addresses. | ||
574 | |||
575 | b. A "same" cache is an array with "s_same" multiple of 256 slots, each | ||
576 | containing an address. The same cache maintains a hash table of recent | ||
577 | addresses used for repeated encoding of the exact same address. | ||
578 | |||
579 | |||
580 | By default, the parameters s_near and s_same are respectively set to 4 | ||
581 | and 3. An encoder MAY modify these values, but then it MUST encode the | ||
582 | new values in the encoding itself, as discussed in Section 7, so that | ||
583 | the decoder can properly set up its own caches. | ||
584 | |||
585 | At the start of processing a target window, an implementation | ||
586 | (encoder or decoder) initializes all of the slots in both caches | ||
587 | to zero. The next_slot pointer of the near cache is set | ||
588 | to point to slot zero. | ||
589 | |||
590 | Each time a COPY instruction is processed by the encoder or | ||
591 | decoder, the implementation's caches are updated as follows, where | ||
592 | "addr" is the address in the COPY instruction. | ||
593 | |||
594 | a. The slot in the near cache referenced by the next_slot | ||
595 | index is set to addr. The next_slot index is then incremented | ||
596 | modulo s_near. | ||
597 | |||
598 | b. The slot in the same cache whose index is addr%(s_same*256) | ||
599 | is set to addr. [We use the C notations of % for modulo and | ||
600 | * for multiplication.] | ||
601 | |||
602 | |||
603 | 5.2 Example code for maintaining caches | ||
604 | |||
605 | To make clear the above description, below are example cache data | ||
606 | structures and algorithms to initialize and update them: | ||
607 | |||
608 | typedef struct _cache_s | ||
609 | { | ||
610 | int* near; /* array of size s_near */ | ||
611 | int s_near; | ||
612 | int next_slot; /* the circular index for near */ | ||
613 | int* same; /* array of size s_same*256 */ | ||
614 | int s_same; | ||
615 | } Cache_t; | ||
616 | |||
617 | cache_init(Cache_t* ka) | ||
618 | { | ||
619 | int i; | ||
620 | |||
621 | ka->next_slot = 0; | ||
622 | for(i = 0; i < ka->s_near; ++i) | ||
623 | ka->near[i] = 0; | ||
624 | |||
625 | for(i = 0; i < ka->s_same*256; ++i) | ||
626 | ka->same[i] = 0; | ||
627 | } | ||
628 | |||
629 | cache_update(Cache_t* ka, int addr) | ||
630 | { | ||
631 | if(ka->s_near > 0) | ||
632 | { ka->near[ka->next_slot] = addr; | ||
633 | ka->next_slot = (ka->next_slot + 1) % ka->s_near; | ||
634 | } | ||
635 | |||
636 | if(ka->s_same > 0) | ||
637 | ka->same[addr % (ka->s_same*256)] = addr; | ||
638 | } | ||
639 | |||
640 | |||
641 | 5.3 Encoding of COPY instruction addresses | ||
642 | |||
643 | The address of a COPY instruction is encoded using different modes | ||
644 | depending on the type of cached address used, if any. | ||
645 | |||
646 | Let "addr" be the address of a COPY instruction to be decoded and "here" | ||
647 | be the current location in the target data (i.e., the start of the data | ||
648 | about to be encoded or decoded). Let near[j] be the jth element in | ||
649 | the near cache, and same[k] be the kth element in the same cache. | ||
650 | Below are the possible address modes: | ||
651 | |||
652 | VCD_SELF: This mode has value 0. The address was encoded by itself | ||
653 | as an integer. | ||
654 | |||
655 | VCD_HERE: This mode has value 1. The address was encoded as | ||
656 | the integer value "here - addr". | ||
657 | |||
658 | Near modes: The "near modes" are in the range [2,s_near+1]. Let m | ||
659 | be the mode of the address encoding. The address was encoded | ||
660 | as the integer value "addr - near[m-2]". | ||
661 | |||
662 | Same modes: The "same modes" are in the range | ||
663 | [s_near+2,s_near+s_same+1]. Let m be the mode of the encoding. | ||
664 | The address was encoded as a single byte b such that | ||
665 | "addr == same[(m - (s_near+2))*256 + b]". | ||
666 | |||
667 | |||
668 | 5.3 Example code for encoding and decoding of COPY instruction addresses | ||
669 | |||
670 | We show example algorithms below to demonstrate use of address modes more | ||
671 | clearly. The encoder has freedom to choose address modes, the sample | ||
672 | addr_encode() algorithm merely shows one way of picking the address | ||
673 | mode. The decoding algorithm addr_decode() will uniquely decode addresses | ||
674 | regardless of the encoder's algorithm choice. | ||
675 | |||
676 | Note that the address caches are updated immediately after an address is | ||
677 | encoded or decoded. In this way, the decoder is always synchronized with | ||
678 | the encoder. | ||
679 | |||
680 | int addr_encode(Cache_t* ka, int addr, int here, int* mode) | ||
681 | { | ||
682 | int i, d, bestd, bestm; | ||
683 | |||
684 | /* Attempt to find the address mode that yields the | ||
685 | * smallest integer value for "d", the encoded address | ||
686 | * value, thereby minimizing the encoded size of the | ||
687 | * address. */ | ||
688 | |||
689 | bestd = addr; bestm = VCD_SELF; /* VCD_SELF == 0 */ | ||
690 | |||
691 | if((d = here-addr) < bestd) | ||
692 | { bestd = d; bestm = VCD_HERE; } /* VCD_HERE == 1 */ | ||
693 | |||
694 | for(i = 0; i < ka->s_near; ++i) | ||
695 | if((d = addr - ka->near[i]) >= 0 && d < bestd) | ||
696 | { bestd = d; bestm = i+2; } | ||
697 | |||
698 | if(ka->s_same > 0 && ka->same[d = addr%(ka->s_same*256)] == addr) | ||
699 | { bestd = d%256; bestm = ka->s_near + 2 + d/256; } | ||
700 | |||
701 | cache_update(ka,addr); | ||
702 | |||
703 | *mode = bestm; /* this returns the address encoding mode */ | ||
704 | return bestd; /* this returns the encoded address */ | ||
705 | } | ||
706 | |||
707 | Note that the addr_encode() algorithm chooses the best address mode using a | ||
708 | local optimization, but that may not lead to the best encoding efficiency | ||
709 | because different modes lead to different instruction encodings, as described below. | ||
710 | |||
711 | The functions addrint() and addrbyte() used in addr_decode() obtain from | ||
712 | the "Addresses section for COPYs" (Section 4.3) an integer or a byte, | ||
713 | respectively. These utilities will not be described here. We simply | ||
714 | recall that an integer is represented as a compact variable-sized string | ||
715 | of bytes as described in Section 2 (i.e., base 128). | ||
716 | |||
717 | int addr_decode(Cache_t* ka, int here, int mode) | ||
718 | { int addr, m; | ||
719 | |||
720 | if(mode == VCD_SELF) | ||
721 | addr = addrint(); | ||
722 | else if(mode == VCD_HERE) | ||
723 | addr = here - addrint(); | ||
724 | else if((m = mode - 2) >= 0 && m < ka->s_near) /* near cache */ | ||
725 | addr = ka->near[m] + addrint(); | ||
726 | else /* same cache */ | ||
727 | { m = mode - (2 + ka->s_near); | ||
728 | addr = ka->same[m*256 + addrbyte()]; | ||
729 | } | ||
730 | |||
731 | cache_update(ka, addr); | ||
732 | |||
733 | return addr; | ||
734 | } | ||
735 | |||
736 | |||
737 | 5.4 Instruction Codes | ||
738 | |||
739 | As noted, the data sizes associated with delta instructions are often | ||
740 | small. Thus, compression efficiency can be improved by combining the sizes | ||
741 | and instruction types in a single encoding, as well by combining certain | ||
742 | pairs of adjacent delta instructions. Effective choices of when to perform | ||
743 | such combinations depend on many factors including the data being processed | ||
744 | and the string matching algorithm in use. For example, if many COPY | ||
745 | instructions have the same data sizes, it may be worth to encode these | ||
746 | instructions more compactly than others. | ||
747 | |||
748 | The Vcdiff data format is designed so that a decoder does not need to be | ||
749 | aware of the choices made in encoding algorithms. This is achieved with the | ||
750 | notion of an "instruction code table" containing 256 entries. Each entry | ||
751 | defines either a single delta instruction or a pair of instructions that | ||
752 | have been combined. Note that the code table itself only exists in main | ||
753 | memory, not in the delta file (unless using an application-defined code | ||
754 | table, described in Section 7). The encoded data simply includes the index | ||
755 | of each instruction and, since there are only 256 indices, each index | ||
756 | can be represented as a single byte. | ||
757 | |||
758 | Each instruction code entry contains six fields, each of which | ||
759 | is a single byte with unsigned value: | ||
760 | |||
761 | +-----------------------------------------------+ | ||
762 | | inst1 | size1 | mode1 | inst2 | size2 | mode2 | | ||
763 | +-----------------------------------------------+ | ||
764 | |||
765 | @@@ could be more compact | ||
766 | |||
767 | Each triple (inst,size,mode) defines a delta instruction. The meanings | ||
768 | of these fields are as follows: | ||
769 | |||
770 | inst: An "inst" field can have one of the four values: NOOP (0), ADD (1), | ||
771 | RUN (2) or COPY (3) to indicate the instruction types. NOOP means | ||
772 | that no instruction is specified. In this case, both the corresponding | ||
773 | size and mode fields will be zero. | ||
774 | |||
775 | size: A "size" field is zero or positive. A value zero means that the | ||
776 | size associated with the instruction is encoded separately as | ||
777 | an integer in the "Instructions and sizes section" (Section 6). | ||
778 | A positive value for "size" defines the actual data size. | ||
779 | Note that since the size is restricted to a byte, the maximum | ||
780 | value for any instruction with size implicitly defined in the code | ||
781 | table is 255. | ||
782 | |||
783 | mode: A "mode" field is significant only when the associated delta | ||
784 | instruction is a COPY. It defines the mode used to encode the | ||
785 | associated addresses. For other instructions, this is always zero. | ||
786 | |||
787 | |||
788 | 5.5 The Code Table | ||
789 | |||
790 | Following the discussions on address modes and instruction code tables, | ||
791 | we define a "Code Table" to have the data below: | ||
792 | |||
793 | s_near: the size of the near cache, | ||
794 | s_same: the size of the same cache, | ||
795 | i_code: the 256-entry instruction code table. | ||
796 | |||
797 | Vcdiff itself defines a "default code table" in which s_near is 4 | ||
798 | and s_same is 3. Thus, there are 9 address modes for a COPY instruction. | ||
799 | The first two are VCD_SELF (0) and VCD_HERE (1). Modes 2, 3, 4 and 5 | ||
800 | are for addresses coded against the near cache. And, modes 6, 7 and 8 | ||
801 | are for addresses coded against the same cache. | ||
802 | |||
803 | The default instruction code table is depicted below, in a compact | ||
804 | representation that we use only for descriptive purposes. See section 7 | ||
805 | for the specification of how an instruction code table is represented | ||
806 | in the Vcdiff encoding format. In the depiction, a zero value for | ||
807 | size indicates that the size is separately coded. The mode of non-COPY | ||
808 | instructions is represented as 0 even though they are not used. | ||
809 | |||
810 | |||
811 | TYPE SIZE MODE TYPE SIZE MODE INDEX | ||
812 | --------------------------------------------------------------- | ||
813 | 1. RUN 0 0 NOOP 0 0 0 | ||
814 | 2. ADD 0, [1,17] 0 NOOP 0 0 [1,18] | ||
815 | 3. COPY 0, [4,18] 0 NOOP 0 0 [19,34] | ||
816 | 4. COPY 0, [4,18] 1 NOOP 0 0 [35,50] | ||
817 | 5. COPY 0, [4,18] 2 NOOP 0 0 [51,66] | ||
818 | 6. COPY 0, [4,18] 3 NOOP 0 0 [67,82] | ||
819 | 7. COPY 0, [4,18] 4 NOOP 0 0 [83,98] | ||
820 | 8. COPY 0, [4,18] 5 NOOP 0 0 [99,114] | ||
821 | 9. COPY 0, [4,18] 6 NOOP 0 0 [115,130] | ||
822 | 10. COPY 0, [4,18] 7 NOOP 0 0 [131,146] | ||
823 | 11. COPY 0, [4,18] 8 NOOP 0 0 [147,162] | ||
824 | 12. ADD [1,4] 0 COPY [4,6] 0 [163,174] | ||
825 | 13. ADD [1,4] 0 COPY [4,6] 1 [175,186] | ||
826 | 14. ADD [1,4] 0 COPY [4,6] 2 [187,198] | ||
827 | 15. ADD [1,4] 0 COPY [4,6] 3 [199,210] | ||
828 | 16. ADD [1,4] 0 COPY [4,6] 4 [211,222] | ||
829 | 17. ADD [1,4] 0 COPY [4,6] 5 [223,234] | ||
830 | 18. ADD [1,4] 0 COPY 4 6 [235,238] | ||
831 | 19. ADD [1,4] 0 COPY 4 7 [239,242] | ||
832 | 20. ADD [1,4] 0 COPY 4 8 [243,246] | ||
833 | 21. COPY 4 [0,8] ADD 1 0 [247,255] | ||
834 | --------------------------------------------------------------- | ||
835 | |||
836 | In the above depiction, each numbered line represents one or more | ||
837 | entries in the actual instruction code table (recall that an entry in | ||
838 | the instruction code table may represent up to two combined delta | ||
839 | instructions.) The last column ("INDEX") shows which index value or | ||
840 | range of index values of the entries covered by that line. The notation | ||
841 | [i,j] means values from i through j, inclusive. The first 6 columns of | ||
842 | a line in the depiction describe the pairs of instructions used for | ||
843 | the corresponding index value(s). | ||
844 | |||
845 | If a line in the depiction includes a column entry using the [i,j] | ||
846 | notation, this means that the line is instantiated for each value | ||
847 | in the range from i to j, inclusive. The notation "0, [i,j]" means | ||
848 | that the line is instantiated for the value 0 and for each value | ||
849 | in the range from i to j, inclusive. | ||
850 | |||
851 | If a line in the depiction includes more than one entry using the [i,j] | ||
852 | notation, implying a "nested loop" to convert the line to a range of | ||
853 | table entries, the first such [i,j] range specifies the outer loop, | ||
854 | and the second specifies the inner loop. | ||
855 | |||
856 | The below examples should make clear the above description: | ||
857 | |||
858 | Line 1 shows the single RUN instruction with index 0. As the size field | ||
859 | is 0, this RUN instruction always has its actual size encoded separately. | ||
860 | |||
861 | Line 2 shows the 18 single ADD instructions. The ADD instruction with | ||
862 | size field 0 (i.e., the actual size is coded separately) has index 1. | ||
863 | ADD instructions with sizes from 1 to 17 use code indices 2 to 18 and | ||
864 | their sizes are as given (so they will not be separately encoded.) | ||
865 | |||
866 | Following the single ADD instructions are the single COPY instructions | ||
867 | ordered by their address encoding modes. For example, line 11 shows the | ||
868 | COPY instructions with mode 8, i.e., the last of the same cache. | ||
869 | In this case, the COPY instruction with size field 0 has index 147. | ||
870 | Again, the actual size of this instruction will be coded separately. | ||
871 | |||
872 | Lines 12 to 21 show the pairs of instructions that are combined together. | ||
873 | For example, line 12 depicts the 12 entries in which an ADD instruction | ||
874 | is combined with an immediately following COPY instruction. The entries | ||
875 | with indices 163, 164, 165 represent the pairs in which the ADD | ||
876 | instructions all have size 1 while the COPY instructions has mode | ||
877 | 0 (VCD_SELF) and sizes 4, 5 and 6 respectively. | ||
878 | |||
879 | The last line, line 21, shows the eight instruction pairs where the first | ||
880 | instruction is a COPY and the second is an ADD. In this case, all COPY | ||
881 | instructions have size 4 with mode ranging from 0 to 8 and all the ADD | ||
882 | instructions have size 1. Thus, the entry with largest index 255 | ||
883 | combines a COPY instruction of size 4 and mode 8 with an ADD instruction | ||
884 | of size 1. | ||
885 | |||
886 | The choice of the minimum size 4 for COPY instructions in the default code | ||
887 | table was made from experiments that showed that excluding small matches | ||
888 | (less then 4 bytes long) improved the compression rates. | ||
889 | |||
890 | |||
891 | 6. DECODING A TARGET WINDOW | ||
892 | |||
893 | Section 4.3 discusses that the delta instructions and associated data | ||
894 | are encoded in three arrays of bytes: | ||
895 | |||
896 | Data section for ADDs and RUNs, | ||
897 | Instructions and sizes section, and | ||
898 | Addresses section for COPYs. | ||
899 | |||
900 | |||
901 | Further, these data sections may have been further compressed by some | ||
902 | secondary compressor. Assuming that any such compressed data has been | ||
903 | decompressed so that we now have three arrays: | ||
904 | |||
905 | inst: bytes coding the instructions and sizes. | ||
906 | data: unmatched data associated with ADDs and RUNs. | ||
907 | addr: bytes coding the addresses of COPYs. | ||
908 | |||
909 | These arrays are organized as follows: | ||
910 | |||
911 | inst: | ||
912 | a sequence of (index, [size1], [size2]) tuples, where "index" | ||
913 | is an index into the instruction code table, and size1 and size2 | ||
914 | are integers that MAY or MAY NOT be included in the tuple as | ||
915 | follows. The entry with the given "index" in the instruction | ||
916 | code table potentially defines two delta instructions. If the | ||
917 | first delta instruction is not a VCD_NOOP and its size is zero, | ||
918 | then size1 MUST be present. Otherwise, size1 MUST be omitted and | ||
919 | the size of the instruction (if it is not VCD_NOOP) is as defined | ||
920 | in the table. The presence or absence of size2 is defined | ||
921 | similarly with respect to the second delta instruction. | ||
922 | |||
923 | data: | ||
924 | a sequence of data values, encoded as bytes. | ||
925 | |||
926 | addr: | ||
927 | a sequence of address values. Addresses are normally encoded as | ||
928 | integers as described in Section 2 (i.e., base 128). | ||
929 | Since the same cache emits addresses in the range [0,255], | ||
930 | however, same cache addresses are always encoded as a | ||
931 | single byte. | ||
932 | |||
933 | To summarize, each tuple in the "inst" array includes an index to some | ||
934 | entry in the instruction code table that determines: | ||
935 | |||
936 | a. Whether one or two instructions were encoded and their types. | ||
937 | |||
938 | b. If the instructions have their sizes encoded separately, these | ||
939 | sizes will follow, in order, in the tuple. | ||
940 | |||
941 | c. If the instructions have accompanying data, i.e., ADDs or RUNs, | ||
942 | their data will be in the array "data". | ||
943 | |||
944 | d. Similarly, if the instructions are COPYs, the coded addresses are | ||
945 | found in the array "addr". | ||
946 | |||
947 | The decoding procedure simply processes the arrays by reading one code | ||
948 | index at a time, looking up the corresponding instruction code entry, | ||
949 | then consuming the respective sizes, data and addresses following the | ||
950 | directions in this entry. In other words, the decoder maintains an implicit | ||
951 | next-element pointer for each array; "consuming" an instruction tuple, | ||
952 | data, or address value implies incrementing the associated pointer. | ||
953 | |||
954 | For example, if during the processing of the target window, the next | ||
955 | unconsumed tuple in the inst array has index value 19, then the first | ||
956 | instruction is a COPY, whose size is found as the immediately following | ||
957 | integer in the inst array. Since the mode of this COPY instruction is | ||
958 | VCD_SELF, the corresponding address is found by consuming the next | ||
959 | integer in the addr array. The data array is left intact. As the second | ||
960 | instruction for code index 19 is a NOOP, this tuple is finished. | ||
961 | |||
962 | |||
963 | 7. APPLICATION-DEFINED CODE TABLES | ||
964 | |||
965 | Although the default code table used in Vcdiff is good for general | ||
966 | purpose encoders, there are times when other code tables may perform | ||
967 | better. For example, to code a file with many identical segments of data, | ||
968 | it may be advantageous to have a COPY instruction with the specific size | ||
969 | of these data segments so that the instruction can be encoded in a single | ||
970 | byte. Such a special code table MUST then be encoded in the delta file | ||
971 | so that the decoder can reconstruct it before decoding the data. | ||
972 | |||
973 | Vcdiff allows an application-defined code table to be specified | ||
974 | in a delta file with the following data: | ||
975 | |||
976 | Size of near cache - byte | ||
977 | Size of same cache - byte | ||
978 | Compressed code table data | ||
979 | |||
980 | The "compressed code table data" encodes the delta between the default | ||
981 | code table (source) and the new code table (target) in the same manner as | ||
982 | described in Section 4.3 for encoding a target window in terms of a | ||
983 | source window. This delta is computed using the following steps: | ||
984 | |||
985 | a. Convert the new instruction code table into a string, "code", of | ||
986 | 1536 bytes using the below steps in order: | ||
987 | |||
988 | i. Add in order the 256 bytes representing the types of the first | ||
989 | instructions in the instruction pairs. | ||
990 | ii. Add in order the 256 bytes representing the types of the second | ||
991 | instructions in the instruction pairs. | ||
992 | iii. Add in order the 256 bytes representing the sizes of the first | ||
993 | instructions in the instruction pairs. | ||
994 | iv. Add in order the 256 bytes representing the sizes of the second | ||
995 | instructions in the instruction pairs. | ||
996 | v. Add in order the 256 bytes representing the modes of the first | ||
997 | instructions in the instruction pairs. | ||
998 | vi. Add in order the 256 bytes representing the modes of the second | ||
999 | instructions in the instruction pairs. | ||
1000 | |||
1001 | b. Similarly, convert the default instruction code table into | ||
1002 | a string "dflt". | ||
1003 | |||
1004 | c. Treat the string "code" as a target window and "dflt" as the | ||
1005 | corresponding source data and apply an encoding algorithm to | ||
1006 | compute the delta encoding of "code" in terms of "dflt". | ||
1007 | This computation MUST use the default code table for encoding | ||
1008 | the delta instructions. | ||
1009 | |||
1010 | The decoder can then reverse the above steps to decode the compressed | ||
1011 | table data using the method of Section 6, employing the default code | ||
1012 | table, to generate the new code table. Note that the decoder does not | ||
1013 | need to know anything about the details of the encoding algorithm used | ||
1014 | in step (c). The decoder is still able to decode the new code table | ||
1015 | because the Vcdiff format is independent from the choice of encoding | ||
1016 | algorithm, and because the encoder in step (c) uses the known, default | ||
1017 | code table. | ||
1018 | |||
1019 | |||
1020 | 8. PERFORMANCE | ||
1021 | |||
1022 | The encoding format is compact. For compression only, using the LZ-77 | ||
1023 | string parsing strategy and without any secondary compressors, the typical | ||
1024 | compression rate is better than Unix compress and close to gzip. For | ||
1025 | differencing, the data format is better than all known methods in | ||
1026 | terms of its stated goal, which is primarily decoding speed and | ||
1027 | encoding efficiency. | ||
1028 | |||
1029 | We compare the performance of compress, gzip and Vcdiff using the | ||
1030 | archives of three versions of the Gnu C compiler, gcc-2.95.1.tar, | ||
1031 | gcc-2.95.2.tar and gcc-2.95.3.tar. The experiments were done on an | ||
1032 | SGI-MIPS3, 400MHZ. Gzip was used at its default compression level. | ||
1033 | Vcdiff timings were done using the Vcodex/Vcdiff software (Section 13). | ||
1034 | As string and window matching typically dominates the computation during | ||
1035 | compression, the Vcdiff compression times were directly due to the | ||
1036 | algorithms used in the Vcodex/Vcdiff software. However, the decompression | ||
1037 | times should be generic and representative of any good implementation | ||
1038 | of the Vcdiff data format. Timing was done by running each program | ||
1039 | three times and taking the average of the total cpu+system times. | ||
1040 | |||
1041 | Below are the different Vcdiff runs: | ||
1042 | |||
1043 | Vcdiff: vcdiff is used as compressor only. | ||
1044 | |||
1045 | Vcdiff-d: vcdiff is used as a differencer only. That is, it only | ||
1046 | compares target data against source data. Since the files | ||
1047 | involved are large, they are broken into windows. In this | ||
1048 | case, each target window starting at some file offset in | ||
1049 | the target file is compared against a source window with | ||
1050 | the same file offset (in the source file). The source | ||
1051 | window is also slightly larger than the target window | ||
1052 | to increase matching opportunities. The -d option also gives | ||
1053 | a hint to the string matching algorithm of Vcdiff that | ||
1054 | the two files are very similar with long stretches of matches. | ||
1055 | The algorithm takes advantage of this to minimize its | ||
1056 | processing of source data and save time. | ||
1057 | |||
1058 | Vcdiff-dc: This is similar to Vcdiff-d but vcdiff can also compare | ||
1059 | target data against target data as applicable. Thus, vcdiff | ||
1060 | both computes differences and compresses data. The windowing | ||
1061 | algorithm is the same as above. However, the above hint is | ||
1062 | recinded in this case. | ||
1063 | |||
1064 | Vcdiff-dcs: This is similar to Vcdiff-dc but the windowing algorithm | ||
1065 | uses a content-based heuristic to select source data segments | ||
1066 | that are more likely to match with a given target window. | ||
1067 | Thus, the source data segment selected for a target window | ||
1068 | often will not be aligned with the file offsets of this | ||
1069 | target window. | ||
1070 | |||
1071 | |||
1072 | gcc-2.95.1 gcc-2.95.2 compression decompression | ||
1073 | raw size 55746560 55797760 | ||
1074 | compress - 19939390 13.85s 7.09s | ||
1075 | gzip - 12973443 42.99s 5.35s | ||
1076 | Vcdiff - 15358786 20.04s 4.65s | ||
1077 | Vcdiff-d - 100971 10.93s 1.92s | ||
1078 | Vcdiff-dc - 97246 20.03s 1.84s | ||
1079 | Vcdiff-dcs - 256445 44.81s 1.84s | ||
1080 | |||
1081 | TABLE 1. Compressing gcc-2.95.2.tar given gcc-2.95.1 | ||
1082 | |||
1083 | |||
1084 | TABLE 1 shows the raw sizes of gcc-2.95.1.tar and gcc-2.95.2.tar and the | ||
1085 | sizes of the compressed results. As a pure compressor, the compression | ||
1086 | rate for Vcdiff is worse than gzip and better than compress. The last | ||
1087 | three rows shows that when two file versions are very similar, differencing | ||
1088 | can have dramatically good compression rates. Vcdiff-d and Vcdiff-dc use | ||
1089 | the same simple window selection method but Vcdiff-dc also does compression | ||
1090 | so its output is slightly smaller. Vcdiff-dcs uses a heuristic based on | ||
1091 | data content to search for source data that likely will match a given target | ||
1092 | window. Although it does a good job, the heuristic did not always find the | ||
1093 | best matches which are given by the simple algorithm of Vcdiff-d. As a | ||
1094 | result, the output size is slightly larger. Note also that there is a large | ||
1095 | cost in computing matching windows this way. Finally, the compression times | ||
1096 | of Vcdiff-d is nearly half of that of Vcdiff-dc. It is tempting to conclude | ||
1097 | that the compression feature causes the additional time in Vcdiff-dc | ||
1098 | relative to Vcdiff-d. However, this is not the case. The hint given to | ||
1099 | the Vcdiff string matching algorithm that the two files are likely to | ||
1100 | have very long stretches of matches helps the algorithm to minimize | ||
1101 | processing of the "source data", thus saving half the time. However, as we | ||
1102 | shall see below when this hint is wrong, the result is even longer time. | ||
1103 | |||
1104 | |||
1105 | gcc-2.95.2 gcc-2.95.3 compression decompression | ||
1106 | raw size 55797760 55787520 | ||
1107 | compress - 19939453 13.54s 7.00s | ||
1108 | gzip - 12998097 42.63s 5.62s | ||
1109 | Vcdiff - 15371737 20.09s 4.74s | ||
1110 | Vcdiff-d - 26383849 71.41s 6.41s | ||
1111 | Vcdiff-dc - 14461203 42.48s 4.82s | ||
1112 | Vcdiff-dcs - 1248543 61.18s 1.99s | ||
1113 | |||
1114 | TABLE 2. Compressing gcc-2.95.3.tar given gcc-2.95.2 | ||
1115 | |||
1116 | |||
1117 | TABLE 2 shows the raw sizes of gcc-2.95.2.tar and gcc-2.95.3.tar and | ||
1118 | the sizes of the compressed results. In this case, the tar file of | ||
1119 | gcc-2.95.3 is rearranged in a way that makes the straightforward method | ||
1120 | of matching file offsets for source and target windows fail. As a | ||
1121 | result, Vcdiff-d performs rather dismally both in time and output size. | ||
1122 | The large time for Vcdiff-d is directly due to fact that the string | ||
1123 | matching algorithm has to work much harder to find matches when the hint | ||
1124 | that two files have long matching stretches fails to hold. On the other | ||
1125 | hand, Vcdiff-dc does both differencing and compression resulting in good | ||
1126 | output size. Finally, the window searching heuristic used in Vcdiff-dcs is | ||
1127 | effective in finding the right matching source windows for target windows | ||
1128 | resulting a small output size. This shows why the data format needs to | ||
1129 | have a way to specify matching windows to gain performance. Finally, | ||
1130 | we note that the decoding times are always good regardless of how | ||
1131 | the string matching or window searching algorithms perform. | ||
1132 | |||
1133 | |||
1134 | 9. FURTHER ISSUES | ||
1135 | |||
1136 | This document does not address a few issues: | ||
1137 | |||
1138 | Secondary compressors: | ||
1139 | As discussed in Section 4.3, certain sections in the delta encoding | ||
1140 | of a window may be further compressed by a secondary compressor. | ||
1141 | In our experience, the basic Vcdiff format is adequate for most | ||
1142 | purposes so that secondary compressors are seldom needed. In | ||
1143 | particular, for normal use of data differencing where the files to | ||
1144 | be compared have long stretches of matches, much of the gain in | ||
1145 | compression rate is already achieved by normal string matching. | ||
1146 | Thus, the use of secondary compressors is seldom needed in this case. | ||
1147 | However, for applications beyond differencing of such nearly identical | ||
1148 | files, secondary compressors may be needed to achieve maximal | ||
1149 | compressed results. | ||
1150 | |||
1151 | Therefore, we recommend to leave the Vcdiff data format defined | ||
1152 | as in this document so that the use of secondary compressors | ||
1153 | can be implemented when they become needed in the future. | ||
1154 | The formats of the compressed data via such compressors or any | ||
1155 | compressors that may be defined in the future are left open to | ||
1156 | their implementations. These could include Huffman encoding, | ||
1157 | arithmetic encoding, and splay tree encoding [8,9]. | ||
1158 | |||
1159 | Large file system vs. small file system: | ||
1160 | As discussed in Section 4, a target window in a large file may be | ||
1161 | compared against some source window in another file or in the same | ||
1162 | file (from some earlier part). In that case, the file offset of the | ||
1163 | source window is specified as a variable-sized integer in the delta | ||
1164 | encoding. There is a possibility that the encoding was computed on | ||
1165 | a system supporting much larger files than in a system where | ||
1166 | the data may be decoded (e.g., 64-bit file systems vs. 32-bit file | ||
1167 | systems). In that case, some target data may not be recoverable. | ||
1168 | This problem could afflict any compression format, and ought | ||
1169 | to be resolved with a generic negotiation mechanism in the | ||
1170 | appropriate protocol(s). | ||
1171 | |||
1172 | |||
1173 | 10. SUMMARY | ||
1174 | |||
1175 | We have described Vcdiff, a general and portable encoding format for | ||
1176 | compression and differencing. The format is good in that it allows | ||
1177 | implementing a decoder without knowledge of the encoders. Further, | ||
1178 | ignoring the use of secondary compressors not defined within the format, | ||
1179 | the decoding algorithms runs in linear time and requires working space | ||
1180 | proportional to window sizes. | ||
1181 | |||
1182 | |||
1183 | |||
1184 | 11. ACKNOWLEDGEMENTS | ||
1185 | |||
1186 | Thanks are due to Balachander Krishnamurthy, Jeff Mogul and Arthur Van Hoff | ||
1187 | who provided much encouragement to publicize Vcdiff. In particular, Jeff | ||
1188 | helped clarifying the description of the data format presented here. | ||
1189 | |||
1190 | |||
1191 | |||
1192 | 12. SECURITY CONSIDERATIONS | ||
1193 | |||
1194 | Vcdiff only provides a format to encode compressed and differenced data. | ||
1195 | It does not address any issues concerning how such data are, in fact, | ||
1196 | stored in a given file system or the run-time memory of a computer system. | ||
1197 | Therefore, we do not anticipate any security issues with respect to Vcdiff. | ||
1198 | |||
1199 | |||
1200 | |||
1201 | 13. SOURCE CODE AVAILABILITY | ||
1202 | |||
1203 | Vcdiff is implemented as a data transforming method in Phong Vo's | ||
1204 | Vcodex library. AT&T Corp. has made the source code for Vcodex available | ||
1205 | for anyone to use to transmit data via HTTP/1.1 Delta Encoding [10,11]. | ||
1206 | The source code and according license is accessible at the below URL: | ||
1207 | |||
1208 | http://www.research.att.com/sw/tools | ||
1209 | |||
1210 | |||
1211 | 14. INTELLECTUAL PROPERTY RIGHTS | ||
1212 | |||
1213 | The IETF has been notified of intellectual property rights claimed in | ||
1214 | regard to some or all of the specification contained in this | ||
1215 | document. For more information consult the online list of claimed | ||
1216 | rights, at <http://www.ietf.org/ipr.html>. | ||
1217 | |||
1218 | The IETF takes no position regarding the validity or scope of any | ||
1219 | intellectual property or other rights that might be claimed to | ||
1220 | pertain to the implementation or use of the technology described in | ||
1221 | this document or the extent to which any license under such rights | ||
1222 | might or might not be available; neither does it represent that it | ||
1223 | has made any effort to identify any such rights. Information on the | ||
1224 | IETF's procedures with respect to rights in standards-track and | ||
1225 | standards-related documentation can be found in BCP-11. Copies of | ||
1226 | claims of rights made available for publication and any assurances of | ||
1227 | licenses to be made available, or the result of an attempt made to | ||
1228 | obtain a general license or permission for the use of such | ||
1229 | proprietary rights by implementors or users of this specification can | ||
1230 | be obtained from the IETF Secretariat. | ||
1231 | |||
1232 | |||
1233 | |||
1234 | 15. IANA CONSIDERATIONS | ||
1235 | |||
1236 | The Internet Assigned Numbers Authority (IANA) administers the number | ||
1237 | space for Secondary Compressor ID values. Values and their meaning | ||
1238 | must be documented in an RFC or other peer-reviewed, permanent, and | ||
1239 | readily available reference, in sufficient detail so that | ||
1240 | interoperability between independent implementations is possible. | ||
1241 | Subject to these constraints, name assignments are First Come, First | ||
1242 | Served - see RFC2434 [13]. Legal ID values are in the range 1..255. | ||
1243 | |||
1244 | This document does not define any values in this number space. | ||
1245 | |||
1246 | |||
1247 | 16. REFERENCES | ||
1248 | |||
1249 | [1] D.G. Korn and K.P. Vo, Vdelta: Differencing and Compression, | ||
1250 | Practical Reusable Unix Software, Editor B. Krishnamurthy, | ||
1251 | John Wiley & Sons, Inc., 1995. | ||
1252 | |||
1253 | [2] J. Ziv and A. Lempel, A Universal Algorithm for Sequential Data | ||
1254 | Compression, IEEE Trans. on Information Theory, 23(3):337-343, 1977. | ||
1255 | |||
1256 | [3] W. Tichy, The String-to-String Correction Problem with Block Moves, | ||
1257 | ACM Transactions on Computer Systems, 2(4):309-321, November 1984. | ||
1258 | |||
1259 | [4] E.M. McCreight, A Space-Economical Suffix Tree Construction | ||
1260 | Algorithm, Journal of the ACM, 23:262-272, 1976. | ||
1261 | |||
1262 | [5] J.J. Hunt, K.P. Vo, W. Tichy, An Empirical Study of Delta Algorithms, | ||
1263 | IEEE Software Configuration and Maintenance Workshop, 1996. | ||
1264 | |||
1265 | [6] J.J. Hunt, K.P. Vo, W. Tichy, Delta Algorithms: An Empirical Analysis, | ||
1266 | ACM Trans. on Software Engineering and Methodology, 7:192-214, 1998. | ||
1267 | |||
1268 | [7] D.G. Korn, K.P. Vo, Sfio: A buffered I/O Library, | ||
1269 | Proc. of the Summer '91 Usenix Conference, 1991. | ||
1270 | |||
1271 | [8] D. W. Jones, Application of Splay Trees to Data Compression, | ||
1272 | CACM, 31(8):996:1007. | ||
1273 | |||
1274 | [9] M. Nelson, J. Gailly, The Data Compression Book, ISBN 1-55851-434-1, | ||
1275 | M&T Books, New York, NY, 1995. | ||
1276 | |||
1277 | [10] J.C. Mogul, F. Douglis, A. Feldmann, and B. Krishnamurthy, | ||
1278 | Potential benefits of delta encoding and data compression for HTTP, | ||
1279 | SIGCOMM '97, Cannes, France, 1997. | ||
1280 | |||
1281 | [11] J.C. Mogul, B. Krishnamurthy, F. Douglis, A. Feldmann, | ||
1282 | Y. Goland, and A. Van Hoff, Delta Encoding in HTTP, | ||
1283 | IETF, draft-mogul-http-delta-10, 2001. | ||
1284 | |||
1285 | [12] S. Bradner, Key words for use in RFCs to Indicate Requirement Levels, | ||
1286 | RFC 2119, March 1997. | ||
1287 | |||
1288 | [13] T. Narten, H. Alvestrand, Guidelines for Writing an IANA | ||
1289 | Considerations Section in RFCs, RFC2434, October 1998. | ||
1290 | |||
1291 | |||
1292 | |||
1293 | 17. AUTHOR'S ADDRESS | ||
1294 | |||
1295 | Kiem-Phong Vo (main contact) | ||
1296 | AT&T Labs, Room D223 | ||
1297 | 180 Park Avenue | ||
1298 | Florham Park, NJ 07932 | ||
1299 | Email: kpv@research.att.com | ||
1300 | Phone: 1 973 360 8630 | ||
1301 | |||
1302 | David G. Korn | ||
1303 | AT&T Labs, Room D237 | ||
1304 | 180 Park Avenue | ||
1305 | Florham Park, NJ 07932 | ||
1306 | Email: dgk@research.att.com | ||
1307 | Phone: 1 973 360 8602 | ||
1308 | |||
1309 | Jeffrey C. Mogul | ||
1310 | Western Research Laboratory | ||
1311 | Compaq Computer Corporation | ||
1312 | 250 University Avenue | ||
1313 | Palo Alto, California, 94305, U.S.A. | ||
1314 | Email: JeffMogul@acm.org | ||
1315 | Phone: 1 650 617 3304 (email preferred) | ||
1316 | |||
1317 | Joshua P. MacDonald | ||
1318 | Computer Science Division | ||
1319 | University of California, Berkeley | ||
1320 | 345 Soda Hall | ||
1321 | Berkeley, CA 94720 | ||
1322 | Email: jmacd@cs.berkeley.edu | ||
diff --git a/xdelta3/junk.py b/xdelta3/junk.py new file mode 100755 index 0000000..384951e --- /dev/null +++ b/xdelta3/junk.py | |||
@@ -0,0 +1,11 @@ | |||
1 | #!/usr/bin/python | ||
2 | |||
3 | bytes = '' | ||
4 | |||
5 | for x in range(0, 250): | ||
6 | bytes = bytes + ('%c%c%c%c=' % (x, x+1, x+2, x+3)) | ||
7 | |||
8 | for x in range(0, 250): | ||
9 | bytes = bytes + ('%c' % x) | ||
10 | |||
11 | print bytes | ||
diff --git a/xdelta3/linkxd3lib.c b/xdelta3/linkxd3lib.c new file mode 100755 index 0000000..d605fa6 --- /dev/null +++ b/xdelta3/linkxd3lib.c | |||
@@ -0,0 +1,47 @@ | |||
1 | #include "xdelta3.h" | ||
2 | |||
3 | extern int VVV; | ||
4 | |||
5 | int VVV; | ||
6 | |||
7 | void use(int r) | ||
8 | { | ||
9 | VVV = r; | ||
10 | } | ||
11 | |||
12 | int main() { | ||
13 | xd3_config config; | ||
14 | xd3_stream stream; | ||
15 | xd3_source source; | ||
16 | |||
17 | xd3_init_config (& config, 0); | ||
18 | use (xd3_config_stream (&stream, &config)); | ||
19 | use (xd3_close_stream (&stream)); | ||
20 | xd3_abort_stream (&stream); | ||
21 | xd3_free_stream (&stream); | ||
22 | |||
23 | xd3_avail_input (& stream, NULL, 0); | ||
24 | xd3_consume_output (& stream); | ||
25 | |||
26 | use (xd3_bytes_on_srcblk (& source, 0)); | ||
27 | use (xd3_set_source (& stream, & source)); | ||
28 | xd3_set_flags (& stream, 0); | ||
29 | |||
30 | use (xd3_decode_completely (& stream, NULL, 0, NULL, NULL, 0)); | ||
31 | use (xd3_decode_input (&stream)); | ||
32 | use (xd3_decoder_needs_source (& stream)); | ||
33 | use (xd3_get_appheader (& stream, NULL, NULL)); | ||
34 | |||
35 | use ((int) xd3_errstring (& stream)); | ||
36 | use ((int) xd3_strerror (0)); | ||
37 | |||
38 | #if XD3_ENCODER | ||
39 | use (xd3_encode_input (&stream)); | ||
40 | use (xd3_encode_completely (& stream, NULL, 0, NULL, NULL, 0)); | ||
41 | use (xd3_set_appheader (& stream)); | ||
42 | use (xd3_encoder_used_source (& stream)); | ||
43 | use (xd3_encoder_srcbase (& stream)); | ||
44 | use (xd3_encoder_srclen (& stream)); | ||
45 | #endif | ||
46 | return 0; | ||
47 | } | ||
diff --git a/xdelta3/rcs_junk.cc b/xdelta3/rcs_junk.cc new file mode 100755 index 0000000..ac49644 --- /dev/null +++ b/xdelta3/rcs_junk.cc | |||
@@ -0,0 +1,1861 @@ | |||
1 | typedef struct _RcsWalker RcsWalker; | ||
2 | typedef struct _RcsFile RcsFile; | ||
3 | typedef struct _RcsVersion RcsVersion; | ||
4 | typedef struct _RcsStats RcsStats; | ||
5 | typedef struct _IntStat IntStat; | ||
6 | typedef struct _DblStat DblStat; | ||
7 | typedef struct _BinCounter BinCounter; | ||
8 | typedef struct _ConfigOption ConfigOption; | ||
9 | |||
10 | struct _RcsWalker { | ||
11 | void* (* initialize) (void); | ||
12 | int (* finalize) (RcsStats* stats, void* data); | ||
13 | int (* onefile) (RcsFile* rcs, RcsStats* stats, void* data); | ||
14 | int (* dateorder) (RcsFile* rcs, RcsVersion* v, void* data); | ||
15 | int (* delta_orig) (RcsFile* rcs, RcsVersion* from, RcsVersion *to, void* data); | ||
16 | int (* delta_date) (RcsFile* rcs, RcsVersion* from, RcsVersion *to, void* data); | ||
17 | int min_versions; | ||
18 | int max_versions; | ||
19 | gboolean write_files; | ||
20 | }; | ||
21 | |||
22 | struct _RcsVersion { | ||
23 | RcsFile *rcs; | ||
24 | time_t date; | ||
25 | int dateseq; | ||
26 | int chain_length; | ||
27 | char *vname; | ||
28 | off_t size; | ||
29 | int cc; | ||
30 | guint8* segment; | ||
31 | char *filename; | ||
32 | RcsVersion *parent; | ||
33 | GSList *children; | ||
34 | guint on_trunk : 1; | ||
35 | }; | ||
36 | |||
37 | struct _RcsFile { | ||
38 | char *filename; | ||
39 | char *copyname; | ||
40 | char *headname; | ||
41 | |||
42 | int version_count; | ||
43 | int forward_count; | ||
44 | int reverse_count; | ||
45 | int branch_count; | ||
46 | |||
47 | RcsVersion *versions; | ||
48 | RcsVersion **versions_date; | ||
49 | |||
50 | RcsVersion *head_version; | ||
51 | RcsVersion *root_version; | ||
52 | |||
53 | off_t total_size; | ||
54 | |||
55 | guint atflag : 1; | ||
56 | }; | ||
57 | |||
58 | struct _RcsStats { | ||
59 | BinCounter *avg_version_size; | ||
60 | IntStat* version_stat; | ||
61 | IntStat* forward_stat; | ||
62 | IntStat* reverse_stat; | ||
63 | IntStat* branch_stat; | ||
64 | IntStat* unencoded_stat; | ||
65 | IntStat* literal_stat; | ||
66 | }; | ||
67 | |||
68 | struct _IntStat { | ||
69 | const char* name; | ||
70 | int count; | ||
71 | long long sum; | ||
72 | long long min; | ||
73 | long long max; | ||
74 | |||
75 | GArray *values; | ||
76 | }; | ||
77 | |||
78 | struct _DblStat { | ||
79 | const char* name; | ||
80 | int count; | ||
81 | double sum; | ||
82 | double min; | ||
83 | double max; | ||
84 | |||
85 | GArray *values; | ||
86 | }; | ||
87 | |||
88 | struct _BinCounter { | ||
89 | const char *name; | ||
90 | GPtrArray *bins; | ||
91 | }; | ||
92 | |||
93 | enum _ConfigArgument { | ||
94 | CO_Required, | ||
95 | CO_Optional, | ||
96 | CO_None | ||
97 | }; | ||
98 | |||
99 | typedef enum _ConfigArgument ConfigArgument; | ||
100 | |||
101 | enum _ConfigOptionType { | ||
102 | CD_Bool, | ||
103 | CD_Int32, | ||
104 | CD_Double, | ||
105 | CD_String | ||
106 | }; | ||
107 | |||
108 | typedef enum _ConfigOptionType ConfigOptionType; | ||
109 | |||
110 | enum _ConfigStyle { | ||
111 | CS_Ignore, | ||
112 | CS_UseAsFile, | ||
113 | CS_Use | ||
114 | }; | ||
115 | |||
116 | typedef enum _ConfigStyle ConfigStyle; | ||
117 | |||
118 | struct _ConfigOption { | ||
119 | const char *name; | ||
120 | const char *abbrev; | ||
121 | ConfigStyle style; | ||
122 | ConfigArgument arg; | ||
123 | ConfigOptionType type; | ||
124 | void *value; | ||
125 | gboolean found; | ||
126 | }; | ||
127 | |||
128 | /* RCS inspection stuff | ||
129 | */ | ||
130 | |||
131 | void rcswalk_init (void); | ||
132 | int rcswalk (RcsWalker *walker, const char* copy_base); | ||
133 | void rcswalk_report (RcsStats* stats); | ||
134 | |||
135 | IntStat* stat_int_new (const char* name); | ||
136 | void stat_int_add_item (IntStat* stat, long long v); | ||
137 | void stat_int_report (IntStat* stat); | ||
138 | |||
139 | DblStat* stat_dbl_new (const char* name); | ||
140 | void stat_dbl_add_item (DblStat* stat, double v); | ||
141 | void stat_dbl_report (DblStat* stat); | ||
142 | |||
143 | BinCounter* stat_bincount_new (const char* name); | ||
144 | void stat_bincount_add_item (BinCounter* bc, int bin, double val); | ||
145 | void stat_bincount_report (BinCounter* bc); | ||
146 | |||
147 | /* Experiment configuration stuff | ||
148 | */ | ||
149 | |||
150 | void config_register (ConfigOption *opts, int nopts); | ||
151 | int config_parse (const char* config_file); | ||
152 | int config_done (void); | ||
153 | void config_help (void); | ||
154 | void config_set_string (const char* var, const char* val); | ||
155 | int config_clear_dir (const char* dir); | ||
156 | int config_create_dir (const char* dir); | ||
157 | FILE* config_output (const char* fmt, ...); | ||
158 | |||
159 | #ifdef __cplusplus | ||
160 | } | ||
161 | #endif | ||
162 | |||
163 | #endif | ||
164 | #include "rcswalk.h" | ||
165 | #include "edsio.h" | ||
166 | #include <stdio.h> | ||
167 | #include <stdlib.h> | ||
168 | #include <string.h> | ||
169 | #include <sys/types.h> | ||
170 | #include <sys/stat.h> | ||
171 | #include <sys/wait.h> | ||
172 | #include <fcntl.h> | ||
173 | #include <errno.h> | ||
174 | #include <dirent.h> | ||
175 | #include <unistd.h> | ||
176 | #include <math.h> | ||
177 | |||
178 | #undef BUFSIZE | ||
179 | #define BUFSIZE (1<<14) | ||
180 | |||
181 | char *tmp_file_1; | ||
182 | gboolean tmp_file_1_free = TRUE; | ||
183 | char *tmp_file_2; | ||
184 | gboolean tmp_file_2_free = TRUE; | ||
185 | |||
186 | int skip_count; | ||
187 | int small_count; | ||
188 | int large_count; | ||
189 | int process_count; | ||
190 | |||
191 | extern time_t str2time (char const *, time_t, long); | ||
192 | |||
193 | static guint8 readbuf[BUFSIZE]; | ||
194 | |||
195 | static const char* rcswalk_input_dir = NULL; | ||
196 | static const char* config_output_base = NULL; | ||
197 | static const char* config_output_dir = NULL; | ||
198 | static const char* rcswalk_experiment = NULL; | ||
199 | |||
200 | static ConfigOption rcswalk_options[] = { | ||
201 | { "rcswalk_experiment", "ex", CS_Use, CO_Required, CD_String, & rcswalk_experiment }, | ||
202 | { "rcs_input_dir", "id", CS_UseAsFile, CO_Required, CD_String, & rcswalk_input_dir } | ||
203 | }; | ||
204 | |||
205 | static ConfigOption config_options[] = { | ||
206 | { "config_output_base", "ob", CS_Ignore, CO_Required, CD_String, & config_output_base } | ||
207 | }; | ||
208 | |||
209 | |||
210 | void | ||
211 | rcswalk_free_segment (RcsVersion *v) | ||
212 | { | ||
213 | if (v->segment) | ||
214 | g_free (v->segment); | ||
215 | |||
216 | if (v->filename == tmp_file_1) | ||
217 | tmp_file_1_free = TRUE; | ||
218 | else if (v->filename == tmp_file_2) | ||
219 | tmp_file_2_free = TRUE; | ||
220 | else if (v->filename) | ||
221 | g_free (v->filename); | ||
222 | |||
223 | v->segment = NULL; | ||
224 | v->filename = NULL; | ||
225 | } | ||
226 | |||
227 | int | ||
228 | rcswalk_checkout (RcsFile* rcs, RcsWalker* walker, RcsVersion *v) | ||
229 | { | ||
230 | FILE* out; | ||
231 | char cmdbuf[1024]; | ||
232 | int nread; | ||
233 | int alloc = BUFSIZE; | ||
234 | int pos = 0; | ||
235 | |||
236 | sprintf (cmdbuf, "co -ko -p%s %s 2>/dev/null\n", v->vname, rcs->filename); | ||
237 | |||
238 | g_assert (! v->segment); | ||
239 | |||
240 | v->segment = g_malloc (alloc); | ||
241 | |||
242 | if (! (out = popen (cmdbuf, "r"))) | ||
243 | { | ||
244 | g_warning ("popen failed: %s: %s", cmdbuf, g_strerror (errno)); | ||
245 | return errno; | ||
246 | } | ||
247 | |||
248 | for (;;) | ||
249 | { | ||
250 | nread = fread (readbuf, 1, BUFSIZE, out); | ||
251 | |||
252 | if (nread == 0) | ||
253 | break; | ||
254 | |||
255 | if (nread < 0) | ||
256 | { | ||
257 | g_warning ("fread failed: %s", g_strerror (errno)); | ||
258 | return errno; | ||
259 | } | ||
260 | |||
261 | if (pos + nread > alloc) | ||
262 | { | ||
263 | alloc *= 2; | ||
264 | v->segment = g_realloc (v->segment, alloc); | ||
265 | } | ||
266 | |||
267 | memcpy (v->segment + pos, readbuf, nread); | ||
268 | |||
269 | pos += nread; | ||
270 | } | ||
271 | |||
272 | if (pclose (out) < 0) | ||
273 | { | ||
274 | g_warning ("pclose failed"); | ||
275 | return errno; | ||
276 | } | ||
277 | |||
278 | v->size = pos; | ||
279 | |||
280 | if (walker->write_files) | ||
281 | { | ||
282 | char* file = NULL; | ||
283 | |||
284 | if (! file && tmp_file_1_free) | ||
285 | { | ||
286 | file = tmp_file_1; | ||
287 | tmp_file_1_free = FALSE; | ||
288 | } | ||
289 | |||
290 | if (! file && tmp_file_2_free) | ||
291 | { | ||
292 | file = tmp_file_2; | ||
293 | tmp_file_2_free = FALSE; | ||
294 | } | ||
295 | |||
296 | g_assert (file); | ||
297 | |||
298 | v->filename = file; | ||
299 | |||
300 | if (! (out = fopen (file, "w"))) | ||
301 | { | ||
302 | g_warning ("fopen failed: %s\n", file); | ||
303 | return errno; | ||
304 | } | ||
305 | |||
306 | if (fwrite (v->segment, v->size, 1, out) != 1) | ||
307 | { | ||
308 | g_warning ("fwrite failed: %s\n", file); | ||
309 | return errno; | ||
310 | } | ||
311 | |||
312 | if (fclose (out) < 0) | ||
313 | { | ||
314 | g_warning ("fclose failed: %s\n", file); | ||
315 | return errno; | ||
316 | } | ||
317 | } | ||
318 | |||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | int | ||
323 | rcswalk_delta_date (RcsFile* rcs, RcsWalker* walker, void* data) | ||
324 | { | ||
325 | int i; | ||
326 | int ret; | ||
327 | RcsVersion *vf = NULL; | ||
328 | RcsVersion *vt = NULL; | ||
329 | |||
330 | for (i = 0; i < (rcs->version_count-1); i += 1) | ||
331 | { | ||
332 | vf = rcs->versions_date[i+1]; | ||
333 | vt = rcs->versions_date[i]; | ||
334 | |||
335 | if (! vt->segment && (ret = rcswalk_checkout (rcs, walker, vt))) { | ||
336 | return ret; | ||
337 | } | ||
338 | |||
339 | if ((ret = rcswalk_checkout (rcs, walker, vf))) { | ||
340 | return ret; | ||
341 | } | ||
342 | |||
343 | if ((ret = walker->delta_date (rcs, vf, vt, data))) { | ||
344 | return ret; | ||
345 | } | ||
346 | |||
347 | rcswalk_free_segment (vt); | ||
348 | } | ||
349 | |||
350 | if (vf) rcswalk_free_segment (vf); | ||
351 | if (vt) rcswalk_free_segment (vt); | ||
352 | |||
353 | return 0; | ||
354 | } | ||
355 | |||
356 | int | ||
357 | rcswalk_delta_orig (RcsFile* rcs, RcsWalker* walker, RcsVersion* version, int *count, void* data) | ||
358 | { | ||
359 | int ret; | ||
360 | GSList *c; | ||
361 | RcsVersion *child; | ||
362 | |||
363 | for (c = version->children; c; c = c->next) | ||
364 | { | ||
365 | gboolean reverse; | ||
366 | |||
367 | child = c->data; | ||
368 | |||
369 | if (! version->segment) | ||
370 | { | ||
371 | if ((ret = rcswalk_checkout (rcs, walker, version))) { | ||
372 | return ret; | ||
373 | } | ||
374 | } | ||
375 | |||
376 | if ((ret = rcswalk_checkout (rcs, walker, child))) { | ||
377 | return ret; | ||
378 | } | ||
379 | |||
380 | reverse = version->on_trunk && child->on_trunk; | ||
381 | |||
382 | (* count) += 1; | ||
383 | |||
384 | if ((ret = walker->delta_orig (rcs, reverse ? child : version, reverse ? version : child, data))) { | ||
385 | return ret; | ||
386 | } | ||
387 | |||
388 | rcswalk_free_segment (version); | ||
389 | |||
390 | if ((ret = rcswalk_delta_orig (rcs, walker, child, count, data))) { | ||
391 | return ret; | ||
392 | } | ||
393 | } | ||
394 | |||
395 | rcswalk_free_segment (version); | ||
396 | return 0; | ||
397 | } | ||
398 | |||
399 | int | ||
400 | rcswalk_dateorder (RcsFile* rcs, RcsWalker *walker, RcsStats *stats, void* data) | ||
401 | { | ||
402 | int i, ret; | ||
403 | |||
404 | for (i = 0; i < rcs->version_count; i += 1) | ||
405 | { | ||
406 | RcsVersion *v = rcs->versions_date[i]; | ||
407 | |||
408 | if ((ret = rcswalk_checkout (rcs, walker, v))) { | ||
409 | return ret; | ||
410 | } | ||
411 | |||
412 | stat_bincount_add_item (stats->avg_version_size, i, v->size); | ||
413 | |||
414 | if ((ret = walker->dateorder (rcs, v, data))) { | ||
415 | return ret; | ||
416 | } | ||
417 | |||
418 | rcswalk_free_segment (v); | ||
419 | } | ||
420 | |||
421 | return 0; | ||
422 | } | ||
423 | |||
424 | gboolean | ||
425 | rcswalk_match (char** line_p, char* str) | ||
426 | { | ||
427 | int len = strlen (str); | ||
428 | |||
429 | if (strncmp (*line_p, str, len) == 0) | ||
430 | { | ||
431 | (*line_p) += len; | ||
432 | return TRUE; | ||
433 | } | ||
434 | |||
435 | return FALSE; | ||
436 | } | ||
437 | |||
438 | void | ||
439 | rcswalk_find_parent (RcsFile *rcs, GHashTable* hash, RcsVersion *v) | ||
440 | { | ||
441 | char *lastdot; | ||
442 | char mbuf[1024]; | ||
443 | int lastn; | ||
444 | RcsVersion *p; | ||
445 | |||
446 | strcpy (mbuf, v->vname); | ||
447 | |||
448 | if (! (lastdot = strchr (mbuf, '.'))) | ||
449 | abort (); | ||
450 | |||
451 | if (! (lastdot = strchr (lastdot+1, '.'))) | ||
452 | v->on_trunk = TRUE; | ||
453 | |||
454 | lastdot = strrchr (mbuf, '.'); | ||
455 | lastn = atoi (lastdot + 1); | ||
456 | |||
457 | do | ||
458 | { | ||
459 | if (lastn == 1) | ||
460 | { | ||
461 | (*lastdot) = 0; | ||
462 | |||
463 | if (strcmp (mbuf, "1") == 0) | ||
464 | { | ||
465 | /* Assuming the first version is always "1.1". | ||
466 | */ | ||
467 | rcs->root_version = v; | ||
468 | return; | ||
469 | } | ||
470 | else if (! (lastdot = strrchr (mbuf, '.'))) | ||
471 | { | ||
472 | int i = 1; | ||
473 | int br = atoi (mbuf) - 1; | ||
474 | RcsVersion *p2 = NULL; | ||
475 | |||
476 | /* Now we have something like "2.1" and need to | ||
477 | * search for the highest "1.x" version. | ||
478 | */ | ||
479 | |||
480 | do | ||
481 | { | ||
482 | sprintf (mbuf, "%d.%d", br, i++); | ||
483 | p = p2; | ||
484 | } | ||
485 | while ((p2 = g_hash_table_lookup (hash, mbuf))); | ||
486 | |||
487 | if (p == NULL) | ||
488 | { | ||
489 | rcs->root_version = v; | ||
490 | return; | ||
491 | } | ||
492 | |||
493 | break; | ||
494 | } | ||
495 | else | ||
496 | { | ||
497 | /* 1.2.3.1 => 1.2 */ | ||
498 | (*lastdot) = 0; | ||
499 | lastdot = strrchr (mbuf, '.'); | ||
500 | lastn = atoi (lastdot + 1); | ||
501 | } | ||
502 | } | ||
503 | else | ||
504 | { | ||
505 | lastn -= 1; | ||
506 | sprintf (lastdot, ".%d", lastn); | ||
507 | } | ||
508 | } | ||
509 | while (! (p = g_hash_table_lookup (hash, mbuf))); | ||
510 | |||
511 | g_assert (p); | ||
512 | |||
513 | v->parent = p; | ||
514 | |||
515 | p->children = g_slist_prepend (p->children, v); | ||
516 | } | ||
517 | |||
518 | int | ||
519 | rcswalk_traverse_graph (RcsFile* rcs, RcsVersion* version, RcsVersion *parent) | ||
520 | { | ||
521 | GSList *c; | ||
522 | int distance = -1; | ||
523 | |||
524 | version->cc = g_slist_length (version->children); | ||
525 | |||
526 | if (version->cc > 1) | ||
527 | rcs->branch_count += (version->cc - 1); | ||
528 | |||
529 | if (parent) | ||
530 | { | ||
531 | /* Insure that there is proper date ordering. */ | ||
532 | if (version->date <= parent->date) | ||
533 | version->date = parent->date + 1; | ||
534 | |||
535 | if (parent->on_trunk && version->on_trunk) | ||
536 | rcs->reverse_count += 1; | ||
537 | else | ||
538 | rcs->forward_count += 1; | ||
539 | } | ||
540 | |||
541 | for (c = version->children; c; c = c->next) | ||
542 | { | ||
543 | int c_dist = rcswalk_traverse_graph (rcs, c->data, version); | ||
544 | |||
545 | distance = MAX (distance, c_dist); | ||
546 | } | ||
547 | |||
548 | if (version == rcs->head_version) | ||
549 | distance = 0; | ||
550 | |||
551 | if (distance >= 0) | ||
552 | { | ||
553 | version->chain_length = distance; | ||
554 | |||
555 | return distance + 1; | ||
556 | } | ||
557 | |||
558 | return -1; | ||
559 | } | ||
560 | |||
561 | void | ||
562 | rcswalk_compute_chain_length (RcsFile* rcs, RcsVersion* version, RcsVersion *parent) | ||
563 | { | ||
564 | GSList *c; | ||
565 | |||
566 | if (! parent) | ||
567 | { | ||
568 | g_assert (version->chain_length >= 0); | ||
569 | } | ||
570 | else if (version->chain_length < 0) | ||
571 | { | ||
572 | version->chain_length = parent->chain_length + 1; | ||
573 | } | ||
574 | |||
575 | for (c = version->children; c; c = c->next) | ||
576 | { | ||
577 | rcswalk_compute_chain_length (rcs, c->data, version); | ||
578 | } | ||
579 | } | ||
580 | |||
581 | int | ||
582 | rcswalk_date_compare (const void* a, const void* b) | ||
583 | { | ||
584 | RcsVersion **ra = (void*) a; | ||
585 | RcsVersion **rb = (void*) b; | ||
586 | |||
587 | return (*ra)->date - (*rb)->date; | ||
588 | } | ||
589 | |||
590 | int | ||
591 | rcswalk_build_graph (RcsFile* rcs) | ||
592 | { | ||
593 | GHashTable* hash = g_hash_table_new (g_str_hash, g_str_equal); | ||
594 | int i; | ||
595 | |||
596 | for (i = 0; i < rcs->version_count; i += 1) | ||
597 | g_hash_table_insert (hash, rcs->versions[i].vname, rcs->versions + i); | ||
598 | |||
599 | for (i = 0; i < rcs->version_count; i += 1) | ||
600 | { | ||
601 | RcsVersion *v = rcs->versions + i; | ||
602 | |||
603 | v->chain_length = -1; | ||
604 | v->rcs = rcs; | ||
605 | |||
606 | rcswalk_find_parent (rcs, hash, v); | ||
607 | } | ||
608 | |||
609 | rcs->head_version = g_hash_table_lookup (hash, rcs->headname); | ||
610 | |||
611 | rcswalk_traverse_graph (rcs, rcs->root_version, NULL); | ||
612 | |||
613 | rcswalk_compute_chain_length (rcs, rcs->root_version, NULL); | ||
614 | |||
615 | for (i = 0; i < rcs->version_count; i += 1) | ||
616 | rcs->versions_date[i] = rcs->versions + i; | ||
617 | |||
618 | qsort (rcs->versions_date, rcs->version_count, sizeof (RcsVersion*), & rcswalk_date_compare); | ||
619 | |||
620 | for (i = 0; i < rcs->version_count; i += 1) | ||
621 | { | ||
622 | RcsVersion *v = rcs->versions_date[i]; | ||
623 | |||
624 | v->dateseq = i; | ||
625 | } | ||
626 | |||
627 | g_hash_table_destroy (hash); | ||
628 | |||
629 | return 0; | ||
630 | } | ||
631 | |||
632 | #define HEAD_STATE 0 | ||
633 | #define BAR_STATE 1 | ||
634 | #define REV_STATE 2 | ||
635 | #define DATE_STATE 3 | ||
636 | |||
637 | int | ||
638 | rcswalk_load (RcsFile *rcs, gboolean *skip) | ||
639 | { | ||
640 | FILE* rlog; | ||
641 | char cmdbuf[1024]; | ||
642 | char oneline[1024], *oneline_p; | ||
643 | char rbuf[1024]; | ||
644 | int version_i = 0, ret; | ||
645 | int read_state = HEAD_STATE; | ||
646 | |||
647 | sprintf (cmdbuf, "rlog %s", rcs->filename); | ||
648 | |||
649 | if (! (rlog = popen (cmdbuf, "r"))) | ||
650 | { | ||
651 | g_warning ("popen failed: %s", cmdbuf); | ||
652 | return errno; | ||
653 | } | ||
654 | |||
655 | rcs->headname = NULL; | ||
656 | |||
657 | while (fgets (oneline, 1024, rlog)) | ||
658 | { | ||
659 | oneline_p = oneline; | ||
660 | |||
661 | if (read_state == HEAD_STATE && rcswalk_match (& oneline_p, "total revisions: ")) | ||
662 | { | ||
663 | if (sscanf (oneline_p, "%d", & rcs->version_count) != 1) | ||
664 | goto badscan; | ||
665 | |||
666 | rcs->versions = g_new0 (RcsVersion, rcs->version_count); | ||
667 | rcs->versions_date = g_new (RcsVersion*, rcs->version_count); | ||
668 | read_state = BAR_STATE; | ||
669 | } | ||
670 | else if (read_state == HEAD_STATE && rcswalk_match (& oneline_p, "head: ")) | ||
671 | { | ||
672 | if (sscanf (oneline_p, "%s", rbuf) != 1) | ||
673 | goto badscan; | ||
674 | |||
675 | rcs->headname = g_strdup (rbuf); | ||
676 | read_state = HEAD_STATE; /* no change */ | ||
677 | } | ||
678 | else if (read_state == BAR_STATE && rcswalk_match (& oneline_p, "----------------------------")) | ||
679 | { | ||
680 | read_state = REV_STATE; | ||
681 | } | ||
682 | else if (read_state == REV_STATE && rcswalk_match (& oneline_p, "revision ")) | ||
683 | { | ||
684 | if (version_i >= rcs->version_count) | ||
685 | { | ||
686 | /* jkh likes to insert the rlog of one RCS file into the log | ||
687 | * message of another, and this can confuse things. Why, oh why, | ||
688 | * doesn't rlog have an option to not print the log? | ||
689 | */ | ||
690 | fprintf (stderr, "rcswalk: too many versions: skipping file %s\n", rcs->filename); | ||
691 | *skip = TRUE; | ||
692 | skip_count += 1; | ||
693 | pclose (rlog); | ||
694 | return 0; | ||
695 | } | ||
696 | |||
697 | if (sscanf (oneline_p, "%s", rbuf) != 1) | ||
698 | goto badscan; | ||
699 | |||
700 | rcs->versions[version_i].vname = g_strdup (rbuf); | ||
701 | read_state = DATE_STATE; | ||
702 | |||
703 | g_assert (rcs->versions[version_i].vname); | ||
704 | } | ||
705 | else if (read_state == DATE_STATE && rcswalk_match (& oneline_p, "date: ")) | ||
706 | { | ||
707 | char* semi = strchr (oneline_p, ';'); | ||
708 | |||
709 | if (! semi) | ||
710 | goto badscan; | ||
711 | |||
712 | strncpy (rbuf, oneline_p, semi - oneline_p); | ||
713 | |||
714 | rbuf[semi - oneline_p] = 0; | ||
715 | |||
716 | rcs->versions[version_i].date = str2time (rbuf, 0, 0); | ||
717 | |||
718 | version_i += 1; | ||
719 | read_state = BAR_STATE; | ||
720 | } | ||
721 | } | ||
722 | |||
723 | if (! rcs->headname) | ||
724 | { | ||
725 | fprintf (stderr, "rcswalk: no head version: skipping file %s\n", rcs->filename); | ||
726 | *skip = TRUE; | ||
727 | skip_count += 1; | ||
728 | pclose (rlog); | ||
729 | return 0; | ||
730 | } | ||
731 | |||
732 | if (pclose (rlog) < 0) | ||
733 | { | ||
734 | g_warning ("pclose failed: %s", cmdbuf); | ||
735 | return errno; | ||
736 | } | ||
737 | |||
738 | if ((ret = rcswalk_build_graph (rcs))) { | ||
739 | return ret; | ||
740 | } | ||
741 | |||
742 | return 0; | ||
743 | |||
744 | badscan: | ||
745 | |||
746 | pclose (rlog); | ||
747 | |||
748 | g_warning ("rlog syntax error"); | ||
749 | return -1; | ||
750 | } | ||
751 | |||
752 | void | ||
753 | rcswalk_free (RcsFile* rcs) | ||
754 | { | ||
755 | int i; | ||
756 | |||
757 | for (i = 0; i < rcs->version_count; i += 1) | ||
758 | { | ||
759 | g_free (rcs->versions[i].vname); | ||
760 | g_slist_free (rcs->versions[i].children); | ||
761 | } | ||
762 | |||
763 | g_free (rcs->filename); | ||
764 | g_free (rcs->headname); | ||
765 | g_free (rcs->versions); | ||
766 | g_free (rcs->versions_date); | ||
767 | g_free (rcs); | ||
768 | } | ||
769 | |||
770 | int | ||
771 | rcswalk_one (char* rcsfile, char* copyfile, RcsWalker* walker, RcsStats* stats, void* data) | ||
772 | { | ||
773 | RcsFile* rcs; | ||
774 | int i, ret; | ||
775 | long long maxsize = 0; | ||
776 | gboolean skip = FALSE; | ||
777 | |||
778 | rcs = g_new0 (RcsFile, 1); | ||
779 | |||
780 | rcs->filename = g_strdup (rcsfile); | ||
781 | rcs->copyname = copyfile; | ||
782 | |||
783 | if ((ret = rcswalk_load (rcs, & skip))) { | ||
784 | return ret; | ||
785 | } | ||
786 | |||
787 | if (walker->min_versions > rcs->version_count) | ||
788 | { | ||
789 | small_count += 1; | ||
790 | skip = TRUE; | ||
791 | } | ||
792 | |||
793 | if (walker->max_versions < rcs->version_count) | ||
794 | { | ||
795 | large_count += 1; | ||
796 | skip = TRUE; | ||
797 | } | ||
798 | |||
799 | if (! skip) | ||
800 | { | ||
801 | process_count += 1; | ||
802 | |||
803 | if (walker->dateorder && (ret = rcswalk_dateorder (rcs, walker, stats, data))) { | ||
804 | return ret; | ||
805 | } | ||
806 | |||
807 | if (walker->delta_orig) | ||
808 | { | ||
809 | int count = 0; | ||
810 | |||
811 | if ((ret = rcswalk_delta_orig (rcs, walker, rcs->root_version, & count, data))) { | ||
812 | return ret; | ||
813 | } | ||
814 | |||
815 | g_assert (count == (rcs->version_count - 1)); | ||
816 | } | ||
817 | |||
818 | if (walker->delta_date && (ret = rcswalk_delta_date (rcs, walker, data))) { | ||
819 | return ret; | ||
820 | } | ||
821 | |||
822 | for (i = 0; i < rcs->version_count; i += 1) | ||
823 | { | ||
824 | rcs->total_size += rcs->versions[i].size; | ||
825 | maxsize = MAX (rcs->versions[i].size, maxsize); | ||
826 | } | ||
827 | |||
828 | stat_int_add_item (stats->version_stat, rcs->version_count); | ||
829 | stat_int_add_item (stats->forward_stat, rcs->forward_count); | ||
830 | stat_int_add_item (stats->reverse_stat, rcs->reverse_count); | ||
831 | stat_int_add_item (stats->branch_stat, rcs->branch_count); | ||
832 | stat_int_add_item (stats->unencoded_stat, rcs->total_size); | ||
833 | stat_int_add_item (stats->literal_stat, maxsize); | ||
834 | |||
835 | if (walker->onefile && (ret = walker->onefile (rcs, stats, data))) { | ||
836 | return ret; | ||
837 | } | ||
838 | } | ||
839 | |||
840 | rcswalk_free (rcs); | ||
841 | |||
842 | return 0; | ||
843 | } | ||
844 | |||
845 | int | ||
846 | rcswalk_dir (const char* dir, RcsWalker* walker, RcsStats* stats, void* data, const char* copy_dir) | ||
847 | { | ||
848 | int ret; | ||
849 | DIR* thisdir; | ||
850 | struct dirent* ent; | ||
851 | |||
852 | if (copy_dir && (ret = config_create_dir (copy_dir))) { | ||
853 | return ret; | ||
854 | } | ||
855 | |||
856 | if (! (thisdir = opendir (dir))) | ||
857 | { | ||
858 | g_warning ("opendir failed: %s", dir); | ||
859 | return errno; | ||
860 | } | ||
861 | |||
862 | while ((ent = readdir (thisdir))) | ||
863 | { | ||
864 | char* name = ent->d_name; | ||
865 | int len; | ||
866 | struct stat buf; | ||
867 | char* fullname; | ||
868 | char* copyname = NULL; | ||
869 | |||
870 | if (strcmp (name, ".") == 0) | ||
871 | continue; | ||
872 | |||
873 | if (strcmp (name, "..") == 0) | ||
874 | continue; | ||
875 | |||
876 | len = strlen (name); | ||
877 | |||
878 | fullname = g_strdup_printf ("%s/%s", dir, name); | ||
879 | |||
880 | if (copy_dir) | ||
881 | copyname = g_strdup_printf ("%s/%s", copy_dir, name); | ||
882 | |||
883 | if (len > 2 && strcmp (name + len - 2, ",v") == 0) | ||
884 | { | ||
885 | if ((ret = rcswalk_one (fullname, copyname, walker, stats, data))) { | ||
886 | goto abort; | ||
887 | } | ||
888 | } | ||
889 | else | ||
890 | { | ||
891 | if (stat (fullname, & buf) < 0) | ||
892 | { | ||
893 | g_warning ("stat failed: %s\n", fullname); | ||
894 | goto abort; | ||
895 | } | ||
896 | |||
897 | if (S_ISDIR (buf.st_mode)) | ||
898 | { | ||
899 | if ((ret = rcswalk_dir (fullname, walker, stats, data, copyname))) { | ||
900 | goto abort; | ||
901 | } | ||
902 | } | ||
903 | } | ||
904 | |||
905 | g_free (fullname); | ||
906 | |||
907 | if (copyname) | ||
908 | g_free (copyname); | ||
909 | } | ||
910 | |||
911 | if (closedir (thisdir) < 0) | ||
912 | { | ||
913 | g_warning ("closedir failed: %s", dir); | ||
914 | return errno; | ||
915 | } | ||
916 | |||
917 | return 0; | ||
918 | |||
919 | abort: | ||
920 | |||
921 | if (thisdir) | ||
922 | closedir (thisdir); | ||
923 | |||
924 | return -1; | ||
925 | } | ||
926 | |||
927 | void | ||
928 | rcswalk_init (void) | ||
929 | { | ||
930 | config_register (rcswalk_options, ARRAY_SIZE (rcswalk_options)); | ||
931 | } | ||
932 | |||
933 | int | ||
934 | rcswalk (RcsWalker *walker, const char* copy_base) | ||
935 | { | ||
936 | void* data = NULL; | ||
937 | RcsStats stats; | ||
938 | int ret; | ||
939 | |||
940 | skip_count = 0; | ||
941 | small_count = 0; | ||
942 | process_count = 0; | ||
943 | large_count = 0; | ||
944 | |||
945 | memset (& stats, 0, sizeof (stats)); | ||
946 | |||
947 | stats.avg_version_size = stat_bincount_new ("AvgVersionSize"); /* @@@ leak */ | ||
948 | stats.version_stat = stat_int_new ("Version"); /* @@@ leak */ | ||
949 | stats.forward_stat = stat_int_new ("Forward"); /* @@@ leak */ | ||
950 | stats.reverse_stat = stat_int_new ("Reverse"); /* @@@ leak */ | ||
951 | stats.branch_stat = stat_int_new ("Branch"); /* @@@ leak */ | ||
952 | stats.unencoded_stat = stat_int_new ("Unencoded"); /* @@@ leak */ | ||
953 | stats.literal_stat = stat_int_new ("Literal"); /* @@@ leak */ | ||
954 | |||
955 | tmp_file_1 = g_strdup_printf ("%s/rcs1.%d", g_get_tmp_dir (), (int) getpid ()); | ||
956 | tmp_file_2 = g_strdup_printf ("%s/rcs2.%d", g_get_tmp_dir (), (int) getpid ()); | ||
957 | |||
958 | if (walker->initialize) | ||
959 | data = walker->initialize (); | ||
960 | |||
961 | if ((ret = rcswalk_dir (rcswalk_input_dir, walker, & stats, data, copy_base))) { | ||
962 | return ret; | ||
963 | } | ||
964 | |||
965 | if (walker->finalize) | ||
966 | { | ||
967 | if ((ret = walker->finalize (& stats, data))) { | ||
968 | return ret; | ||
969 | } | ||
970 | } | ||
971 | |||
972 | unlink (tmp_file_1); | ||
973 | unlink (tmp_file_2); | ||
974 | |||
975 | fprintf (stderr, "rcswalk: processed %d files: too small %d; too large: %d; damaged: %d\n", process_count, small_count, large_count, skip_count); | ||
976 | |||
977 | return 0; | ||
978 | } | ||
979 | |||
980 | /* Statistics | ||
981 | */ | ||
982 | |||
983 | void | ||
984 | rcswalk_report (RcsStats* set) | ||
985 | { | ||
986 | stat_bincount_report (set->avg_version_size); | ||
987 | stat_int_report (set->version_stat); | ||
988 | stat_int_report (set->forward_stat); | ||
989 | stat_int_report (set->reverse_stat); | ||
990 | stat_int_report (set->branch_stat); | ||
991 | stat_int_report (set->unencoded_stat); | ||
992 | stat_int_report (set->literal_stat); | ||
993 | } | ||
994 | |||
995 | /* Int stat | ||
996 | */ | ||
997 | IntStat* | ||
998 | stat_int_new (const char* name) | ||
999 | { | ||
1000 | IntStat* s = g_new0 (IntStat, 1); | ||
1001 | |||
1002 | s->name = name; | ||
1003 | s->values = g_array_new (FALSE, FALSE, sizeof (long long)); | ||
1004 | |||
1005 | return s; | ||
1006 | } | ||
1007 | |||
1008 | void | ||
1009 | stat_int_add_item (IntStat* stat, long long v) | ||
1010 | { | ||
1011 | if (! stat->count) | ||
1012 | stat->min = v; | ||
1013 | stat->count += 1; | ||
1014 | stat->min = MIN (v, stat->min); | ||
1015 | stat->max = MAX (v, stat->max); | ||
1016 | stat->sum += v; | ||
1017 | |||
1018 | g_array_append_val (stat->values, v); | ||
1019 | } | ||
1020 | |||
1021 | double | ||
1022 | stat_int_stddev (IntStat *stat) | ||
1023 | { | ||
1024 | double f = 0; | ||
1025 | double m = (double) stat->sum / (double) stat->count; | ||
1026 | double v; | ||
1027 | int i; | ||
1028 | |||
1029 | for (i = 0; i < stat->count; i += 1) | ||
1030 | { | ||
1031 | long long x = g_array_index (stat->values, long long, i); | ||
1032 | |||
1033 | f += (m - (double) x) * (m - (double) x); | ||
1034 | } | ||
1035 | |||
1036 | v = f / (double) stat->count; | ||
1037 | |||
1038 | return sqrt (v); | ||
1039 | } | ||
1040 | |||
1041 | int | ||
1042 | ll_comp (const void* a, const void* b) | ||
1043 | { | ||
1044 | const long long* lla = a; | ||
1045 | const long long* llb = b; | ||
1046 | return (*lla) - (*llb); | ||
1047 | } | ||
1048 | |||
1049 | void | ||
1050 | stat_int_histogram (IntStat *stat) | ||
1051 | { | ||
1052 | int i, consec; | ||
1053 | long long cum = 0; | ||
1054 | |||
1055 | FILE* p_out; | ||
1056 | FILE* s_out; | ||
1057 | |||
1058 | if (! (p_out = config_output ("%s.pop.hist", stat->name))) | ||
1059 | abort (); | ||
1060 | |||
1061 | if (! (s_out = config_output ("%s.sum.hist", stat->name))) | ||
1062 | abort (); | ||
1063 | |||
1064 | qsort (stat->values->data, stat->count, sizeof (long long), ll_comp); | ||
1065 | |||
1066 | for (i = 0; i < stat->count; i += consec) | ||
1067 | { | ||
1068 | long long ix = g_array_index (stat->values, long long, i); | ||
1069 | |||
1070 | for (consec = 1; (i+consec) < stat->count; consec += 1) | ||
1071 | { | ||
1072 | long long jx = g_array_index (stat->values, long long, i+consec); | ||
1073 | |||
1074 | if (ix != jx) | ||
1075 | break; | ||
1076 | } | ||
1077 | |||
1078 | cum += consec * g_array_index (stat->values, long long, i); | ||
1079 | |||
1080 | fprintf (p_out, "%qd, %0.3f\n", g_array_index (stat->values, long long, i), (double) (i+consec) / (double) stat->count); | ||
1081 | fprintf (s_out, "%qd, %0.3f\n", g_array_index (stat->values, long long, i), (double) cum / (double) stat->sum); | ||
1082 | } | ||
1083 | |||
1084 | if (fclose (p_out) < 0 || fclose (s_out) < 0) | ||
1085 | { | ||
1086 | g_error ("fclose failed\n"); | ||
1087 | } | ||
1088 | } | ||
1089 | |||
1090 | void | ||
1091 | stat_int_report (IntStat* stat) | ||
1092 | { | ||
1093 | FILE* out; | ||
1094 | |||
1095 | if (! (out = config_output ("%s.stat", stat->name))) | ||
1096 | abort (); | ||
1097 | |||
1098 | fprintf (out, "Name: %s\n", stat->name); | ||
1099 | fprintf (out, "Count: %d\n", stat->count); | ||
1100 | fprintf (out, "Min: %qd\n", stat->min); | ||
1101 | fprintf (out, "Max: %qd\n", stat->max); | ||
1102 | fprintf (out, "Sum: %qd\n", stat->sum); | ||
1103 | fprintf (out, "Mean: %0.2f\n", (double) stat->sum / (double) stat->count); | ||
1104 | fprintf (out, "Stddev: %0.2f\n", stat_int_stddev (stat)); | ||
1105 | |||
1106 | if (fclose (out) < 0) | ||
1107 | g_error ("fclose failed"); | ||
1108 | |||
1109 | stat_int_histogram (stat); | ||
1110 | } | ||
1111 | |||
1112 | /* Dbl stat | ||
1113 | */ | ||
1114 | |||
1115 | DblStat* | ||
1116 | stat_dbl_new (const char* name) | ||
1117 | { | ||
1118 | DblStat* s = g_new0 (DblStat, 1); | ||
1119 | |||
1120 | s->name = name; | ||
1121 | s->values = g_array_new (FALSE, FALSE, sizeof (double)); | ||
1122 | |||
1123 | return s; | ||
1124 | } | ||
1125 | |||
1126 | void | ||
1127 | stat_dbl_add_item (DblStat* stat, double v) | ||
1128 | { | ||
1129 | if (! stat->count) | ||
1130 | stat->min = v; | ||
1131 | stat->count += 1; | ||
1132 | stat->min = MIN (v, stat->min); | ||
1133 | stat->max = MAX (v, stat->max); | ||
1134 | stat->sum += v; | ||
1135 | |||
1136 | g_array_append_val (stat->values, v); | ||
1137 | } | ||
1138 | |||
1139 | double | ||
1140 | stat_dbl_stddev (DblStat *stat) | ||
1141 | { | ||
1142 | double f = 0; | ||
1143 | double m = stat->sum / stat->count; | ||
1144 | double v; | ||
1145 | int i; | ||
1146 | |||
1147 | for (i = 0; i < stat->count; i += 1) | ||
1148 | { | ||
1149 | double x = g_array_index (stat->values, double, i); | ||
1150 | |||
1151 | f += (m - x) * (m - x); | ||
1152 | } | ||
1153 | |||
1154 | v = f / stat->count; | ||
1155 | |||
1156 | return sqrt (v); | ||
1157 | } | ||
1158 | |||
1159 | int | ||
1160 | dbl_comp (const void* a, const void* b) | ||
1161 | { | ||
1162 | const double* da = a; | ||
1163 | const double* db = b; | ||
1164 | double diff = (*da) - (*db); | ||
1165 | |||
1166 | if (diff > 0.0) | ||
1167 | return 1; | ||
1168 | else if (diff < 0.0) | ||
1169 | return -1; | ||
1170 | else | ||
1171 | return 0; | ||
1172 | } | ||
1173 | |||
1174 | void | ||
1175 | stat_dbl_histogram (DblStat *stat) | ||
1176 | { | ||
1177 | int i, consec; | ||
1178 | double cum = 0.0; | ||
1179 | |||
1180 | FILE* p_out; | ||
1181 | FILE* s_out; | ||
1182 | |||
1183 | if (! (p_out = config_output ("%s.pop.hist", stat->name))) | ||
1184 | abort (); | ||
1185 | |||
1186 | if (! (s_out = config_output ("%s.sum.hist", stat->name))) | ||
1187 | abort (); | ||
1188 | |||
1189 | qsort (stat->values->data, stat->count, sizeof (double), dbl_comp); | ||
1190 | |||
1191 | for (i = 0; i < stat->count; i += consec) | ||
1192 | { | ||
1193 | double ix = g_array_index (stat->values, double, i); | ||
1194 | |||
1195 | for (consec = 1; (i+consec) < stat->count; consec += 1) | ||
1196 | { | ||
1197 | double jx = g_array_index (stat->values, double, i+consec); | ||
1198 | |||
1199 | if (ix != jx) | ||
1200 | break; | ||
1201 | } | ||
1202 | |||
1203 | cum += ((double) consec) * g_array_index (stat->values, double, i); | ||
1204 | |||
1205 | fprintf (p_out, "%0.6f, %0.3f\n", g_array_index (stat->values, double, i), (double) (i+consec) / (double) stat->count); | ||
1206 | fprintf (s_out, "%0.6f, %0.3f\n", g_array_index (stat->values, double, i), cum / stat->sum); | ||
1207 | } | ||
1208 | |||
1209 | if (fclose (p_out) < 0 || fclose (s_out) < 0) | ||
1210 | { | ||
1211 | g_error ("fclose failed\n"); | ||
1212 | } | ||
1213 | } | ||
1214 | |||
1215 | void | ||
1216 | stat_dbl_report (DblStat* stat) | ||
1217 | { | ||
1218 | FILE* out; | ||
1219 | |||
1220 | if (! (out = config_output ("%s.stat", stat->name))) | ||
1221 | abort (); | ||
1222 | |||
1223 | fprintf (out, "Name: %s\n", stat->name); | ||
1224 | fprintf (out, "Count: %d\n", stat->count); | ||
1225 | fprintf (out, "Min: %0.6f\n", stat->min); | ||
1226 | fprintf (out, "Max: %0.6f\n", stat->max); | ||
1227 | fprintf (out, "Sum: %0.6f\n", stat->sum); | ||
1228 | fprintf (out, "Mean: %0.6f\n", stat->sum / stat->count); | ||
1229 | fprintf (out, "Stddev: %0.6f\n", stat_dbl_stddev (stat)); | ||
1230 | |||
1231 | if (fclose (out) < 0) | ||
1232 | g_error ("fclose failed"); | ||
1233 | |||
1234 | stat_dbl_histogram (stat); | ||
1235 | } | ||
1236 | |||
1237 | /* Bincount | ||
1238 | */ | ||
1239 | BinCounter* | ||
1240 | stat_bincount_new (const char* name) | ||
1241 | { | ||
1242 | BinCounter* bc = g_new0 (BinCounter, 1); | ||
1243 | |||
1244 | bc->name = name; | ||
1245 | bc->bins = g_ptr_array_new (); | ||
1246 | |||
1247 | return bc; | ||
1248 | } | ||
1249 | |||
1250 | void | ||
1251 | stat_bincount_add_item (BinCounter* bc, int bin, double val) | ||
1252 | { | ||
1253 | GArray* one; | ||
1254 | int last; | ||
1255 | |||
1256 | if (bin >= bc->bins->len) | ||
1257 | { | ||
1258 | g_ptr_array_set_size (bc->bins, bin+1); | ||
1259 | } | ||
1260 | |||
1261 | if (! (one = bc->bins->pdata[bin])) | ||
1262 | { | ||
1263 | one = bc->bins->pdata[bin] = g_array_new (FALSE, TRUE, sizeof (double)); | ||
1264 | } | ||
1265 | |||
1266 | g_assert (one); | ||
1267 | |||
1268 | last = one->len; | ||
1269 | |||
1270 | g_array_set_size (one, last + 1); | ||
1271 | |||
1272 | g_array_index (one, double, last) = val; | ||
1273 | } | ||
1274 | |||
1275 | void | ||
1276 | stat_bincount_report (BinCounter* bc) | ||
1277 | { | ||
1278 | FILE *avg_out; | ||
1279 | FILE *raw_out; | ||
1280 | int i; | ||
1281 | |||
1282 | if (! (avg_out = config_output ("%s.avg", bc->name))) | ||
1283 | abort (); | ||
1284 | |||
1285 | if (! (raw_out = config_output ("%s.raw", bc->name))) | ||
1286 | abort (); | ||
1287 | |||
1288 | for (i = 0; i < bc->bins->len; i += 1) | ||
1289 | { | ||
1290 | GArray* one = bc->bins->pdata[i]; | ||
1291 | |||
1292 | double sum = 0.0; | ||
1293 | int j; | ||
1294 | |||
1295 | for (j = 0; j < one->len; j += 1) | ||
1296 | { | ||
1297 | double d = g_array_index (one, double, j); | ||
1298 | |||
1299 | sum += d; | ||
1300 | |||
1301 | fprintf (raw_out, "%e ", d); | ||
1302 | } | ||
1303 | |||
1304 | fprintf (raw_out, "\n"); | ||
1305 | fprintf (avg_out, "%e %d\n", sum / one->len, one->len); | ||
1306 | } | ||
1307 | |||
1308 | if (fclose (avg_out) < 0) | ||
1309 | g_error ("fclose failed"); | ||
1310 | |||
1311 | if (fclose (raw_out) < 0) | ||
1312 | g_error ("fclose failed"); | ||
1313 | } | ||
1314 | |||
1315 | /* Config stuff | ||
1316 | */ | ||
1317 | |||
1318 | int | ||
1319 | config_create_dir (const char* dirname) | ||
1320 | { | ||
1321 | struct stat buf; | ||
1322 | |||
1323 | if (stat (dirname, & buf) < 0) | ||
1324 | { | ||
1325 | if (mkdir (dirname, 0777) < 0) | ||
1326 | { | ||
1327 | fprintf (stderr, "mkdir failed: %s\n", dirname); | ||
1328 | return errno; | ||
1329 | } | ||
1330 | } | ||
1331 | else | ||
1332 | { | ||
1333 | if (! S_ISDIR (buf.st_mode)) | ||
1334 | { | ||
1335 | fprintf (stderr, "not a directory: %s\n", dirname); | ||
1336 | return errno; | ||
1337 | } | ||
1338 | } | ||
1339 | |||
1340 | return 0; | ||
1341 | } | ||
1342 | |||
1343 | int | ||
1344 | config_clear_dir (const char* dir) | ||
1345 | { | ||
1346 | char buf[1024]; | ||
1347 | |||
1348 | if (dir) | ||
1349 | { | ||
1350 | sprintf (buf, "rm -rf %s", dir); | ||
1351 | |||
1352 | system (buf); | ||
1353 | } | ||
1354 | |||
1355 | return 0; | ||
1356 | } | ||
1357 | |||
1358 | static ConfigOption all_options[64]; | ||
1359 | static int option_count; | ||
1360 | |||
1361 | void | ||
1362 | config_init () | ||
1363 | { | ||
1364 | static gboolean once = FALSE; | ||
1365 | if (! once) | ||
1366 | { | ||
1367 | once = TRUE; | ||
1368 | config_register (config_options, ARRAY_SIZE (config_options)); | ||
1369 | } | ||
1370 | } | ||
1371 | |||
1372 | void | ||
1373 | config_register (ConfigOption *opts, int nopts) | ||
1374 | { | ||
1375 | int i; | ||
1376 | |||
1377 | config_init (); | ||
1378 | |||
1379 | for (i = 0; i < nopts; i += 1) | ||
1380 | { | ||
1381 | all_options[option_count++] = opts[i]; | ||
1382 | } | ||
1383 | } | ||
1384 | |||
1385 | void | ||
1386 | config_set_string (const char* var, const char* val) | ||
1387 | { | ||
1388 | int i; | ||
1389 | |||
1390 | for (i = 0; i < option_count; i += 1) | ||
1391 | { | ||
1392 | ConfigOption *opt = all_options + i; | ||
1393 | |||
1394 | if (strcmp (opt->name, var) == 0) | ||
1395 | { | ||
1396 | (* (const char**) opt->value) = val; | ||
1397 | opt->found = TRUE; | ||
1398 | return; | ||
1399 | } | ||
1400 | } | ||
1401 | } | ||
1402 | |||
1403 | int | ||
1404 | config_parse (const char* config_file) | ||
1405 | { | ||
1406 | FILE *in; | ||
1407 | char oname[1024], value[1024]; | ||
1408 | int i; | ||
1409 | |||
1410 | if (! (in = fopen (config_file, "r"))) | ||
1411 | { | ||
1412 | fprintf (stderr, "fopen failed: %s\n", config_file); | ||
1413 | return errno; | ||
1414 | } | ||
1415 | |||
1416 | for (;;) | ||
1417 | { | ||
1418 | ConfigOption *opt = NULL; | ||
1419 | |||
1420 | if (fscanf (in, "%s", oname) != 1) | ||
1421 | break; | ||
1422 | |||
1423 | for (i = 0; i < option_count; i += 1) | ||
1424 | { | ||
1425 | if (strcmp (oname, all_options[i].name) == 0) | ||
1426 | { | ||
1427 | opt = all_options + i; | ||
1428 | break; | ||
1429 | } | ||
1430 | } | ||
1431 | |||
1432 | if (opt && opt->arg == CO_None) | ||
1433 | { | ||
1434 | (* (gboolean*) opt->value) = TRUE; | ||
1435 | opt->found = TRUE; | ||
1436 | continue; | ||
1437 | } | ||
1438 | |||
1439 | if (fscanf (in, "%s", value) != 1) | ||
1440 | { | ||
1441 | fprintf (stderr, "no value for option: %s; file: %s\n", oname, config_file); | ||
1442 | goto abort; | ||
1443 | } | ||
1444 | |||
1445 | if (! opt) | ||
1446 | { | ||
1447 | /*fprintf (stderr, "unrecognized option: %s\n", oname);*/ | ||
1448 | continue; | ||
1449 | } | ||
1450 | |||
1451 | switch (opt->type) | ||
1452 | { | ||
1453 | case CD_Bool: | ||
1454 | |||
1455 | if (strcasecmp (value, "yes") == 0 || | ||
1456 | strcasecmp (value, "true") == 0 || | ||
1457 | strcmp (value, "1") == 0 || | ||
1458 | strcasecmp (value, "on") == 0) | ||
1459 | { | ||
1460 | ((gboolean*) opt->value) = TRUE; | ||
1461 | } | ||
1462 | else | ||
1463 | { | ||
1464 | ((gboolean*) opt->value) = FALSE; | ||
1465 | } | ||
1466 | |||
1467 | break; | ||
1468 | case CD_Int32: | ||
1469 | |||
1470 | if (sscanf (value, "%d", (gint32*) opt->value) != 1) | ||
1471 | { | ||
1472 | fprintf (stderr, "parse error for option: %s; file: %s\n", oname, config_file); | ||
1473 | goto abort; | ||
1474 | } | ||
1475 | |||
1476 | break; | ||
1477 | case CD_Double: | ||
1478 | |||
1479 | if (sscanf (value, "%lf", (double*) opt->value) != 1) | ||
1480 | { | ||
1481 | fprintf (stderr, "parse error for option: %s; file: %s\n", oname, config_file); | ||
1482 | goto abort; | ||
1483 | } | ||
1484 | |||
1485 | break; | ||
1486 | case CD_String: | ||
1487 | |||
1488 | (* (const char**) opt->value) = g_strdup (value); | ||
1489 | |||
1490 | break; | ||
1491 | } | ||
1492 | |||
1493 | opt->found = TRUE; | ||
1494 | } | ||
1495 | |||
1496 | fclose (in); | ||
1497 | |||
1498 | return 0; | ||
1499 | |||
1500 | abort: | ||
1501 | |||
1502 | fclose (in); | ||
1503 | |||
1504 | return -1; | ||
1505 | } | ||
1506 | |||
1507 | int | ||
1508 | config_compute_output_dir () | ||
1509 | { | ||
1510 | char tmp[1024]; | ||
1511 | char buf[1024]; | ||
1512 | int i; | ||
1513 | gboolean last = FALSE; | ||
1514 | |||
1515 | buf[0] = 0; | ||
1516 | |||
1517 | for (i = 0; i < option_count; i += 1) | ||
1518 | { | ||
1519 | ConfigOption *opt = all_options + i; | ||
1520 | |||
1521 | if (opt->style == CS_Ignore) | ||
1522 | continue; | ||
1523 | |||
1524 | if (! opt->found) | ||
1525 | continue; | ||
1526 | |||
1527 | if (last) | ||
1528 | strcat (buf, ","); | ||
1529 | |||
1530 | last = TRUE; | ||
1531 | |||
1532 | strcat (buf, opt->abbrev); | ||
1533 | strcat (buf, "="); | ||
1534 | |||
1535 | switch (opt->type) | ||
1536 | { | ||
1537 | case CD_Bool: | ||
1538 | |||
1539 | if (* (gboolean*) opt->value) | ||
1540 | strcat (buf, "true"); | ||
1541 | else | ||
1542 | strcat (buf, "false"); | ||
1543 | |||
1544 | break; | ||
1545 | case CD_Int32: | ||
1546 | |||
1547 | sprintf (tmp, "%d", (* (gint32*) opt->value)); | ||
1548 | strcat (buf, tmp); | ||
1549 | |||
1550 | break; | ||
1551 | case CD_Double: | ||
1552 | |||
1553 | sprintf (tmp, "%0.2f", (* (double*) opt->value)); | ||
1554 | strcat (buf, tmp); | ||
1555 | |||
1556 | break; | ||
1557 | case CD_String: | ||
1558 | |||
1559 | if (opt->style == CS_UseAsFile) | ||
1560 | { | ||
1561 | const char* str = (* (const char**) opt->value); | ||
1562 | const char* ls = strrchr (str, '/'); | ||
1563 | |||
1564 | strcat (buf, ls ? (ls + 1) : str); | ||
1565 | } | ||
1566 | else | ||
1567 | { | ||
1568 | strcat (buf, (* (const char**) opt->value)); | ||
1569 | } | ||
1570 | |||
1571 | break; | ||
1572 | } | ||
1573 | } | ||
1574 | |||
1575 | config_output_dir = g_strdup_printf ("%s/%s", config_output_base, buf); | ||
1576 | |||
1577 | return 0; | ||
1578 | } | ||
1579 | |||
1580 | int | ||
1581 | config_done (void) | ||
1582 | { | ||
1583 | int i, ret; | ||
1584 | FILE *out; | ||
1585 | |||
1586 | for (i = 0; i < option_count; i += 1) | ||
1587 | { | ||
1588 | ConfigOption *opt = all_options + i; | ||
1589 | |||
1590 | if (! opt->found && opt->arg == CO_Required) | ||
1591 | { | ||
1592 | fprintf (stderr, "required option not found: %s\n", all_options[i].name); | ||
1593 | return -1; | ||
1594 | } | ||
1595 | } | ||
1596 | |||
1597 | if ((ret = config_compute_output_dir ())) { | ||
1598 | return ret; | ||
1599 | } | ||
1600 | |||
1601 | if ((ret = config_clear_dir (config_output_dir))) { | ||
1602 | return ret; | ||
1603 | } | ||
1604 | |||
1605 | if ((ret = config_create_dir (config_output_dir))) { | ||
1606 | return ret; | ||
1607 | } | ||
1608 | |||
1609 | if (! (out = config_output ("Options"))) | ||
1610 | abort (); | ||
1611 | |||
1612 | for (i = 0; i < option_count; i += 1) | ||
1613 | { | ||
1614 | ConfigOption *opt = all_options + i; | ||
1615 | |||
1616 | fprintf (out, "option: %s; value: ", all_options[i].name); | ||
1617 | |||
1618 | switch (opt->type) | ||
1619 | { | ||
1620 | case CD_Bool: | ||
1621 | |||
1622 | fprintf (out, "%s", (* (gboolean*) opt->value) ? "TRUE" : "FALSE"); | ||
1623 | |||
1624 | break; | ||
1625 | case CD_Int32: | ||
1626 | |||
1627 | fprintf (out, "%d", (* (gint32*) opt->value)); | ||
1628 | |||
1629 | break; | ||
1630 | case CD_Double: | ||
1631 | |||
1632 | fprintf (out, "%0.2f", (* (double*) opt->value)); | ||
1633 | |||
1634 | break; | ||
1635 | case CD_String: | ||
1636 | |||
1637 | fprintf (out, "%s", (* (const char**) opt->value)); | ||
1638 | |||
1639 | break; | ||
1640 | } | ||
1641 | |||
1642 | fprintf (out, "\n"); | ||
1643 | } | ||
1644 | |||
1645 | if (fclose (out)) | ||
1646 | { | ||
1647 | fprintf (stderr, "fclose failed\n"); | ||
1648 | return errno; | ||
1649 | } | ||
1650 | |||
1651 | return 0; | ||
1652 | } | ||
1653 | |||
1654 | const char* | ||
1655 | config_help_arg (ConfigOption *opt) | ||
1656 | { | ||
1657 | switch (opt->arg) | ||
1658 | { | ||
1659 | case CO_Required: | ||
1660 | return "required"; | ||
1661 | case CO_Optional: | ||
1662 | return "optional"; | ||
1663 | case CO_None: | ||
1664 | return "no value"; | ||
1665 | } | ||
1666 | |||
1667 | return "unknown"; | ||
1668 | } | ||
1669 | |||
1670 | const char* | ||
1671 | config_help_type (ConfigOption *opt) | ||
1672 | { | ||
1673 | switch (opt->arg) | ||
1674 | { | ||
1675 | case CO_None: | ||
1676 | return "boolean"; | ||
1677 | default: | ||
1678 | break; | ||
1679 | } | ||
1680 | |||
1681 | switch (opt->type) | ||
1682 | { | ||
1683 | case CD_Bool: | ||
1684 | return "boolean"; | ||
1685 | case CD_Int32: | ||
1686 | return "int"; | ||
1687 | case CD_Double: | ||
1688 | return "double"; | ||
1689 | case CD_String: | ||
1690 | return "string"; | ||
1691 | } | ||
1692 | |||
1693 | return "unknown"; | ||
1694 | } | ||
1695 | |||
1696 | void | ||
1697 | config_help (void) | ||
1698 | { | ||
1699 | int i; | ||
1700 | |||
1701 | fprintf (stderr, "Expecting the following options in one or more config files on the command line:\n"); | ||
1702 | |||
1703 | for (i = 0; i < option_count; i += 1) | ||
1704 | { | ||
1705 | ConfigOption *opt = all_options + i; | ||
1706 | |||
1707 | fprintf (stderr, "%s: %s %s\n", | ||
1708 | opt->name, | ||
1709 | config_help_arg (opt), | ||
1710 | config_help_type (opt)); | ||
1711 | } | ||
1712 | } | ||
1713 | |||
1714 | FILE* | ||
1715 | config_output (const char* format, ...) | ||
1716 | { | ||
1717 | gchar *buffer; | ||
1718 | gchar *file; | ||
1719 | va_list args; | ||
1720 | FILE *f; | ||
1721 | |||
1722 | va_start (args, format); | ||
1723 | buffer = g_strdup_vprintf (format, args); | ||
1724 | va_end (args); | ||
1725 | |||
1726 | file = g_strdup_printf ("%s/%s", config_output_dir, buffer); | ||
1727 | |||
1728 | if (! (f = fopen (file, "w"))) | ||
1729 | g_error ("fopen failed: %s\n", buffer); | ||
1730 | |||
1731 | g_free (file); | ||
1732 | |||
1733 | g_free (buffer); | ||
1734 | |||
1735 | return f; | ||
1736 | } | ||
1737 | |||
1738 | |||
1739 | #include <edsio.h> | ||
1740 | #include <edsiostdio.h> | ||
1741 | #include <ctype.h> | ||
1742 | #include "xdfs.h" | ||
1743 | |||
1744 | /* Warning: very cheesy! | ||
1745 | */ | ||
1746 | |||
1747 | #ifdef DEBUG_EXTRACT | ||
1748 | FileHandle *fh2 = handle_read_file (filename); | ||
1749 | |||
1750 | guint8* debug_buf = g_malloc (buflen); | ||
1751 | |||
1752 | if (! handle_read (fh2, debug_buf, buflen)) | ||
1753 | g_error ("read failed"); | ||
1754 | #endif | ||
1755 | |||
1756 | gboolean | ||
1757 | rcs_count (const char* filename, guint *encoded_size) | ||
1758 | { | ||
1759 | char *readbuf0, *readbuf; | ||
1760 | gboolean in_string = FALSE; | ||
1761 | gboolean in_text = FALSE; | ||
1762 | guint string_start = 0; | ||
1763 | guint string_end = 0; | ||
1764 | guint current_pos = 0; | ||
1765 | /*char *current_delta = NULL;*/ | ||
1766 | FileHandle *fh = handle_read_file (filename); | ||
1767 | guint buflen = handle_length (fh); | ||
1768 | |||
1769 | (* encoded_size) = 0; | ||
1770 | |||
1771 | readbuf0 = g_new (guint8, buflen); | ||
1772 | |||
1773 | for (;;) | ||
1774 | { | ||
1775 | int c = handle_gets (fh, readbuf0, buflen); | ||
1776 | |||
1777 | readbuf = readbuf0; | ||
1778 | |||
1779 | if (c < 0) | ||
1780 | break; | ||
1781 | |||
1782 | if (strncmp (readbuf, "text", 4) == 0) | ||
1783 | in_text = TRUE; | ||
1784 | |||
1785 | if (! in_string && readbuf[0] == '@') | ||
1786 | { | ||
1787 | string_start = current_pos + 1; | ||
1788 | in_string = TRUE; | ||
1789 | readbuf += 1; | ||
1790 | } | ||
1791 | |||
1792 | current_pos += c; | ||
1793 | |||
1794 | if (in_string) | ||
1795 | { | ||
1796 | while ((readbuf = strchr (readbuf, '@'))) | ||
1797 | { | ||
1798 | if (readbuf[1] == '@') | ||
1799 | { | ||
1800 | string_start += 1; /* @@@ bogus, just counting. */ | ||
1801 | readbuf += 2; | ||
1802 | continue; | ||
1803 | } | ||
1804 | |||
1805 | in_string = FALSE; | ||
1806 | break; | ||
1807 | } | ||
1808 | |||
1809 | string_end = current_pos - 2; | ||
1810 | |||
1811 | if (in_text && ! in_string) | ||
1812 | { | ||
1813 | in_text = FALSE; | ||
1814 | |||
1815 | /*g_free (current_delta); | ||
1816 | current_delta = NULL;*/ | ||
1817 | |||
1818 | (* encoded_size) += (string_end - string_start); | ||
1819 | } | ||
1820 | |||
1821 | continue; | ||
1822 | } | ||
1823 | |||
1824 | if (isdigit (readbuf[0])) | ||
1825 | { | ||
1826 | #if 0 | ||
1827 | (* strchr (readbuf, '\n')) = 0; | ||
1828 | if (current_delta) | ||
1829 | g_free (current_delta); | ||
1830 | current_delta = g_strdup (readbuf); | ||
1831 | #endif | ||
1832 | } | ||
1833 | } | ||
1834 | |||
1835 | handle_close (fh); | ||
1836 | |||
1837 | g_free (readbuf0); | ||
1838 | |||
1839 | #if 0 | ||
1840 | if (current_delta) | ||
1841 | g_free (current_delta); | ||
1842 | #endif | ||
1843 | |||
1844 | return TRUE; | ||
1845 | } | ||
1846 | |||
1847 | #if 0 | ||
1848 | int | ||
1849 | main (int argc, char** argv) | ||
1850 | { | ||
1851 | guint size; | ||
1852 | |||
1853 | if (argc != 2) | ||
1854 | g_error ("usage: %s RCS_file\n", argv[0]); | ||
1855 | |||
1856 | if (! rcs_count (argv[1], &size)) | ||
1857 | g_error ("rcs_parse failed"); | ||
1858 | |||
1859 | return 0; | ||
1860 | } | ||
1861 | #endif | ||
diff --git a/xdelta3/setup.py b/xdelta3/setup.py new file mode 100755 index 0000000..9d717e8 --- /dev/null +++ b/xdelta3/setup.py | |||
@@ -0,0 +1,33 @@ | |||
1 | # xdelta 3 - delta compression tools and library | ||
2 | # Copyright (C) 2004 and onward. Joshua P. MacDonald | ||
3 | # | ||
4 | # This program is free software; you can redistribute it and/or modify | ||
5 | # it under the terms of the GNU General Public License as published by | ||
6 | # the Free Software Foundation; either version 2 of the License, or | ||
7 | # (at your option) any later version. | ||
8 | # | ||
9 | # This program is distributed in the hope that it will be useful, | ||
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | # GNU General Public License for more details. | ||
13 | # | ||
14 | # You should have received a copy of the GNU General Public License | ||
15 | # along with this program; if not, write to the Free Software | ||
16 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | # | ||
18 | # | ||
19 | from distutils.core import setup, Extension | ||
20 | |||
21 | xdelta3_ext = Extension('xdelta3', | ||
22 | ['xdelta3.c'], | ||
23 | define_macros = [('PYTHON_MODULE',1), | ||
24 | ('XD3_POSIX',1), | ||
25 | ('REGRESSION_TEST',1), | ||
26 | ('XD3_DEBUG',1), | ||
27 | ('EXTCOMP',1), | ||
28 | ('VCDIFF_TOOLS',1), | ||
29 | ('XD3_USE_LARGEFILE64',1)]) | ||
30 | |||
31 | setup(name='xdelta3', | ||
32 | version='pre', | ||
33 | ext_modules=[xdelta3_ext]) | ||
diff --git a/xdelta3/show.c b/xdelta3/show.c new file mode 100755 index 0000000..f53f2ca --- /dev/null +++ b/xdelta3/show.c | |||
@@ -0,0 +1,41 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <stdlib.h> | ||
3 | |||
4 | #define BUFSZ (1 << 22) | ||
5 | |||
6 | int main(int argc, char **argv) | ||
7 | { | ||
8 | int c; | ||
9 | int offset; | ||
10 | int bytes; | ||
11 | |||
12 | if (argc != 3) | ||
13 | { | ||
14 | fprintf (stderr, "usage: show offset bytes\n"); | ||
15 | return 1; | ||
16 | } | ||
17 | |||
18 | offset = atoi (argv[1]); | ||
19 | bytes = atoi (argv[2]); | ||
20 | |||
21 | for (; offset != 0; offset -= 1) | ||
22 | { | ||
23 | if ((c = fgetc (stdin)) == EOF) | ||
24 | { | ||
25 | fprintf (stderr, "EOF before offset\n"); | ||
26 | } | ||
27 | } | ||
28 | |||
29 | for (; bytes != 0; bytes -= 1) | ||
30 | { | ||
31 | if ((c = fgetc (stdin)) == EOF) | ||
32 | { | ||
33 | fprintf (stderr, "\nEOF before offset + bytes\n"); | ||
34 | } | ||
35 | |||
36 | fprintf (stderr, "%02x", c); | ||
37 | } | ||
38 | |||
39 | fprintf (stderr, "\n"); | ||
40 | return 0; | ||
41 | } | ||
diff --git a/xdelta3/testh.c b/xdelta3/testh.c new file mode 100755 index 0000000..1be01df --- /dev/null +++ b/xdelta3/testh.c | |||
@@ -0,0 +1 @@ | |||
#include "xdelta3.h" | |||
diff --git a/xdelta3/www/xdelta3-api-guide.html b/xdelta3/www/xdelta3-api-guide.html new file mode 100755 index 0000000..b3513ea --- /dev/null +++ b/xdelta3/www/xdelta3-api-guide.html | |||
@@ -0,0 +1,212 @@ | |||
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> | ||
2 | <html> | ||
3 | <head> | ||
4 | <title>Xdelta3 API guide (BETA)</title> | ||
5 | <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"> | ||
6 | <link rel="stylesheet" type="text/css" href="xdelta3.css"/> | ||
7 | </head> | ||
8 | <body> | ||
9 | |||
10 | <!-- $Format: "$WWWLeftNavBar$" $ --!> | ||
11 | <table cellpadding="20px" width=700> <tr> <td class="leftbdr" valign=top height=600 width=100> <div class="leftbody"> <h1>Xdelta</h1> <a href="xdelta3.html">overview</a><br> <a href="xdelta3-cmdline.html">command line</a><br> <a href="xdelta3-api-guide.html">api guide</a><br> <br><a href="http://xdelta.org">xdelta.org</a></h2> </div> </td> <td valign=top width=500> | ||
12 | |||
13 | |||
14 | <!-- Copyright (C) 2003 and onward. Joshua P. MacDonald --!> | ||
15 | |||
16 | <h1>api guide</h1> | ||
17 | |||
18 | <p>This guide intends to give you a quick start to the Xdelta3 programming | ||
19 | interface. This is not a complete reference, the comments inside source file | ||
20 | <code>xdelta3.h</code> and the command-line application, | ||
21 | <code>xdelta3-main.h</code> offer more complete information.</p> | ||
22 | |||
23 | <p>Have you read the <a href="xdelta3-cmdline.html">command-line interface</a>?</p> | ||
24 | |||
25 | <h1>stream interface</h1> | ||
26 | |||
27 | <p> | ||
28 | To begin with, there are three external structures, only two of which are | ||
29 | discussed here. The <code>xd3_stream</code> struct plays the main role, one | ||
30 | of these contains the state necessary to encode or decode one stream of data. | ||
31 | An <code>xd3_source</code> struct maintains state about the (optional) source | ||
32 | file, against which differences are computed. The third structure, | ||
33 | <code>xd3_config</code> deals with configuring various encoder parameters.</p> | ||
34 | |||
35 | <p> | ||
36 | At a glance, the interface resembles Zlib. The program puts data in, which | ||
37 | the xd3_stream consumes. After computing over the data, the xd3_stream in | ||
38 | turn generates output for the application to consume, or it requests more | ||
39 | input. The xd3_stream also issues requests to the application to read a block | ||
40 | of source data. The request to read a source block may be handled in one of | ||
41 | two ways, according to application preference. If a <code>xd3_getblk</code> | ||
42 | callback function is provided, the application handler will be called from | ||
43 | within the library, suspending computation until the request completes. If no | ||
44 | callback function is provided the library returns a special code | ||
45 | (XD3_GETSRCBLK), allowing the application to issue the request and resume | ||
46 | computation whenever it likes. In both cases, the xd3_source struct contains | ||
47 | the requested block number and a place to store the result.</p> | ||
48 | |||
49 | <h1>setup</h1> | ||
50 | <p>The code to declare and initialize the xd3_stream:</p> | ||
51 | <div class="example"> | ||
52 | <pre> | ||
53 | int ret; | ||
54 | xd3_stream stream; | ||
55 | xd3_config config; | ||
56 | |||
57 | xd3_init_config (&config, 0 /* flags */); | ||
58 | config.winsize = 32768; | ||
59 | ret = xd3_config_stream (&stream, &config); | ||
60 | |||
61 | if (ret != 0) { /* error */ } | ||
62 | </pre> | ||
63 | </div> | ||
64 | |||
65 | <p> | ||
66 | <code>xd3_init_config()</code> initializes the <code>xd3_config</code> struct | ||
67 | with default values. Many settings remain undocumented in the beta release. | ||
68 | The most relevant setting, <code>xd3_config.winsize</code>, sets the encoder | ||
69 | window size. The encoder allocates a buffer of this size if the program | ||
70 | supplies input in smaller units (unless the <code>XD3_FLUSH</code> flag is | ||
71 | set). <code>xd3_config_stream()</code> initializes the <code>xd3_stream</code> | ||
72 | object with the supplied configuration. | ||
73 | </p> | ||
74 | |||
75 | <h1>setting the source</h1> | ||
76 | <p> | ||
77 | The stream is ready for input at this point, though for encoding the source | ||
78 | data must be supplied now. To declare an initialize the xd3_source:</p> | ||
79 | |||
80 | <div class="example"> | ||
81 | <pre> | ||
82 | xd3_source source; | ||
83 | void *IO_handle = ...; | ||
84 | |||
85 | source.name = "..."; | ||
86 | source.size = file_size; | ||
87 | source.ioh= IO_handle; | ||
88 | source.blksize= 32768; | ||
89 | source.curblkno = (xoff_t) -1; | ||
90 | source.curblk = NULL; | ||
91 | |||
92 | ret = xd3_set_source (&stream, &source); | ||
93 | |||
94 | if (ret != 0) { /* error */ } | ||
95 | </pre> | ||
96 | </div> | ||
97 | |||
98 | <p> | ||
99 | The decoder sets source data in the same manner, but it may delay this step | ||
100 | until the application header has been received (<code>XD3_GOTHEADER</code>). | ||
101 | The application can also check whether source data is required for decoding | ||
102 | with the <code>xd3_decoder_needs_source()</code>.</p> | ||
103 | |||
104 | <p> | ||
105 | <code>xd3_source.blksize</code> determines the block size used for requesting | ||
106 | source blocks. If the first source block (or the entire source) is already in | ||
107 | memory, set <code>curblkno</code> to 0 and <code>curblk</code> to that block | ||
108 | of data.</p> | ||
109 | |||
110 | <h1>input/output loop</h1> | ||
111 | |||
112 | <p>The stream is now ready for input, which the application provides by | ||
113 | calling <code>xd3_avail_input()</code>. The application initiates | ||
114 | encoding or decoding at this point by calling one of two functions:</p> | ||
115 | |||
116 | <div class="example"> | ||
117 | <pre> | ||
118 | int xd3_encode_input (xd3_stream *stream) | ||
119 | int xd3_decode_input (xd3_stream *stream) | ||
120 | </pre> | ||
121 | </div> | ||
122 | |||
123 | <p>Unless there is an error, these routines return one of six result | ||
124 | codes which the application must handle. In many cases, all or most | ||
125 | of the handler code is shared between encoding and decoding. The | ||
126 | codes are:</p> | ||
127 | |||
128 | <ul> | ||
129 | <li> <code>XD3_INPUT</code>: The stream is ready for (or requires) more input. The | ||
130 | application should call xd3_avail_input when (if) more data is | ||
131 | available. | ||
132 | |||
133 | <li> <code>XD3_OUTPUT</code>: The stream has pending output. The application | ||
134 | should write or otherwise consume the block of data found in the | ||
135 | xd3_stream fields <code>next_out</code> and <code>avail_out</code>, | ||
136 | then call <code>xd3_consume_output</code>. | ||
137 | |||
138 | <li> <code>XD3_GETSRCBLK</code>: The stream is requesting a source block be read, | ||
139 | as described above. This is only ever returned if the xd3_getblk | ||
140 | callback was not provided. | ||
141 | |||
142 | <li> <code>XD3_GOTHEADER</code>: This decoder-specific code indicates that the | ||
143 | first VCDIFF window header has been received. This gives the | ||
144 | application a chance to inspect the application header before | ||
145 | encoding the first window. | ||
146 | |||
147 | <li> <code>XD3_WINSTART</code>: This is returned by both encoder and decoder prior to | ||
148 | processing a window. For encoding, this code is returned once there is enough | ||
149 | available input. For decoding, this is returned following each window header | ||
150 | (except the first, when XD3_GOTHEADER is returned instead). | ||
151 | |||
152 | <li> <code>XD3_WINFINISH</code>: This is called when the output from a single | ||
153 | window has been fully consumed. | ||
154 | </ul> | ||
155 | |||
156 | <p>An application could be structured something like this:</p> | ||
157 | |||
158 | <div class="example"> | ||
159 | <pre> | ||
160 | do { | ||
161 | read (&indata, &insize); | ||
162 | if (reached_EOF) { | ||
163 | xd3_set_flags (&stream, XD3_FLUSH); | ||
164 | } | ||
165 | xd3_avail_input (&stream, indata, insize); | ||
166 | process: | ||
167 | ret = xd3_xxcode_input (&stream); | ||
168 | switch (ret) { | ||
169 | case XD3_INPUT: | ||
170 | continue; | ||
171 | case XD3_OUTPUT: | ||
172 | /* write data */ | ||
173 | goto process; | ||
174 | case XD3_GETSRCBLK: | ||
175 | /* set source block */ | ||
176 | goto process; | ||
177 | case XD3_GOTHEADER: | ||
178 | case XD3_WINSTART: | ||
179 | case XD3_WINFINISH: | ||
180 | /* no action necessary */ | ||
181 | goto process; | ||
182 | default: | ||
183 | /* error */ | ||
184 | } | ||
185 | } while (! reached_EOF); | ||
186 | </pre> | ||
187 | </div> | ||
188 | |||
189 | <p> | ||
190 | All that remains is to close the stream and free its resources. The | ||
191 | <code>xd3_close_stream()</code> checks several error conditions but otherwise | ||
192 | involves no input or output. The <code>xd3_free_stream()</code> routine frees | ||
193 | all memory allocated by the stream.</p> | ||
194 | |||
195 | <h1>misc</h1> | ||
196 | |||
197 | <p> | ||
198 | There are two convenience functions for encoding to and decoding from | ||
199 | in-memory buffers. See the <code>xd3_encode_completely</code> and | ||
200 | <code>xd3_decode_completely</code> interfaces.</p> | ||
201 | |||
202 | <p> | ||
203 | There are two routines to get and set the application header. When | ||
204 | encoding, sthe application header must be set before the first | ||
205 | <code>XD3_WINSTART</code>. When decoding, the application header is available | ||
206 | after after the first <code>XD3_GOTHEADER</code>.</p> | ||
207 | |||
208 | </td> | ||
209 | </tr> | ||
210 | </table> | ||
211 | </body> | ||
212 | </html> | ||
diff --git a/xdelta3/www/xdelta3-cmdline.html b/xdelta3/www/xdelta3-cmdline.html new file mode 100755 index 0000000..cabb547 --- /dev/null +++ b/xdelta3/www/xdelta3-cmdline.html | |||
@@ -0,0 +1,166 @@ | |||
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> | ||
2 | <html> | ||
3 | <head> | ||
4 | <title>Xdelta3 command-line guide (BETA)</title> | ||
5 | <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"> | ||
6 | <link rel="stylesheet" type="text/css" href="xdelta3.css"/> | ||
7 | </head> | ||
8 | <body> | ||
9 | |||
10 | <!-- $Format: "$WWWLeftNavBar$" $ --!> | ||
11 | <table cellpadding="20px" width=700> <tr> <td class="leftbdr" valign=top height=600 width=100> <div class="leftbody"> <h1>Xdelta</h1> <a href="xdelta3.html">overview</a><br> <a href="xdelta3-cmdline.html">command line</a><br> <a href="xdelta3-api-guide.html">api guide</a><br> <br><a href="http://xdelta.org">xdelta.org</a></h2> </div> </td> <td valign=top width=500> | ||
12 | |||
13 | |||
14 | <!-- Copyright (C) 2003 and onward. Joshua P. MacDonald --!> | ||
15 | |||
16 | <h1>command-line guide</h1> | ||
17 | |||
18 | <code>xdelta3</code> can be run with syntax familiar but not similar to gzip; | ||
19 | it requires you to specify the output file in most cases, rather than applying | ||
20 | any default filename extensions. These are cases that resemble gzip:<p> | ||
21 | |||
22 | <div class="example"> | ||
23 | <pre> | ||
24 | xdelta3 -c file_to_compress > delta_file | ||
25 | xdelta3 -dc delta_file > file_uncompressed | ||
26 | </pre> | ||
27 | </div> | ||
28 | <p> | ||
29 | |||
30 | The <code>-c</code> option says to write to the standard output. The | ||
31 | <code>-d</code> option says to decode. The default action is to encode (also | ||
32 | specified by <code>-e</code>). <code>xdelta3</code> also supports long | ||
33 | command names, these two commands are equivalent to the ones abvove:<p> | ||
34 | |||
35 | <div class="example"> | ||
36 | <pre> | ||
37 | xdelta3 encode file_to_compress > delta_file | ||
38 | xdelta3 decode delta_file > file_uncompressed | ||
39 | </pre> | ||
40 | </div> | ||
41 | <p> | ||
42 | |||
43 | <code>xdelta3</code> has the notion of a default filename for decoding. If | ||
44 | you specified a file name during the encode step, it is used as the default | ||
45 | for decoding. The <code>-s</code> option specifies a <em>source file</em> for | ||
46 | delta-compression.<p> | ||
47 | |||
48 | <div class="example"> | ||
49 | <pre> | ||
50 | xdelta3 -s source_file target_file delta_file | ||
51 | xdelta3 -d delta_file | ||
52 | </pre> | ||
53 | </div> | ||
54 | <p> | ||
55 | |||
56 | The second line above fills in "source_file" and "target_file" as the input | ||
57 | and output filenames. Without the <code>-f</code> option, | ||
58 | <code>xdelta3</code> will not overwrite an existing file. When there are no | ||
59 | default filenames (e.g., in decode), standard input and standard output are | ||
60 | used. In the example below, the default source filename is applied in | ||
61 | decoding. | ||
62 | <p> | ||
63 | |||
64 | <div class="example"> | ||
65 | <pre> | ||
66 | cat target_file | xdelta3 -s source_file > delta_file | ||
67 | xdelta3 -d < delta_file > target_file.1 | ||
68 | </pre> | ||
69 | </div> | ||
70 | <p> | ||
71 | |||
72 | <code>xdelta3</code> recognizes externally compressed inputs, so the following | ||
73 | command produces the expected results:<p> | ||
74 | |||
75 | <div class="example"> | ||
76 | <pre> | ||
77 | xdelta3 -s beta2.tar.gz beta3.tar.gz beta3.tar.gz.xd | ||
78 | xdelta3 -ds beta2.tar.gz beta3.tar.gz.xd beta3.tar.gz.1 | ||
79 | </pre> | ||
80 | </div> | ||
81 | <p> | ||
82 | |||
83 | You can avoid the intermediate file and use <code>xdelta3</code> together | ||
84 | with a <code>tar</code>-pipeline. | ||
85 | |||
86 | <div class="example"> | ||
87 | <pre> | ||
88 | tar -cf - beta3 | xdelta3 -s beta2.tar > beta3.tar.xd | ||
89 | xdelta3 -d beta3.tar.xd | tar -xf - | ||
90 | </pre> | ||
91 | </div> | ||
92 | <p> | ||
93 | |||
94 | <code>xdelta</code> can print various information about a compressed file with | ||
95 | the "printhdr" command. The "printhdrs" command prints information about each | ||
96 | <em>window</em> of the encoding. The "printdelta" command prints the actual | ||
97 | encoding for each window, in human-readable format.<p> | ||
98 | |||
99 | <div class="example"> | ||
100 | <pre> | ||
101 | # xdelta3 printdelta delta_file | ||
102 | VCDIFF version: 0 | ||
103 | VCDIFF header size: 5 | ||
104 | VCDIFF header indicator: none | ||
105 | VCDIFF secondary compressor: none | ||
106 | VCDIFF window number: 0 | ||
107 | VCDIFF window indicator: VCD_SOURCE VCD_ADLER32 | ||
108 | VCDIFF adler32 checksum: 48BFADB6 | ||
109 | VCDIFF copy window length: 2813 | ||
110 | VCDIFF copy window offset: 0 | ||
111 | VCDIFF delta encoding length: 93 | ||
112 | VCDIFF target window length: 2903 | ||
113 | VCDIFF data section length: 72 | ||
114 | VCDIFF inst section length: 8 | ||
115 | VCDIFF addr section length: 3 | ||
116 | Offset Code Type1 Size1 @Addr1 + Type2 Size2 @Addr2 | ||
117 | 000000 019 CPY_0 1535 @0 | ||
118 | 001535 001 ADD 72 | ||
119 | 001607 019 CPY_0 1296 @1517 | ||
120 | </pre> | ||
121 | </div> | ||
122 | <br> | ||
123 | <p> | ||
124 | |||
125 | |||
126 | <h1>xdelta3 -h</h1> | ||
127 | |||
128 | <pre> | ||
129 | usage: xdelta3 [command/options] [input [output]] | ||
130 | commands are: | ||
131 | encode encodes the input | ||
132 | decode decodes the input | ||
133 | config prints xdelta3 configuration | ||
134 | test run the builtin tests | ||
135 | special commands for VCDIFF inputs: | ||
136 | printhdr print information about the first window | ||
137 | printhdrs print information about all windows | ||
138 | printdelta print information about the entire delta | ||
139 | options are: | ||
140 | -c use stdout instead of default | ||
141 | -d same as decode command | ||
142 | -e same as encode command | ||
143 | -f force overwrite | ||
144 | -n disable checksum (encode/decode) | ||
145 | -D disable external decompression (encode/decode) | ||
146 | -R disable external recompression (decode) | ||
147 | -N disable small string-matching compression | ||
148 | -S [djw|fgk] disable/enable secondary compression | ||
149 | -A [apphead] disable/provide application header | ||
150 | -s source source file to copy from (if any) | ||
151 | -B blksize source file block size | ||
152 | -W winsize input window buffer size | ||
153 | -v be verbose (max 2) | ||
154 | -q be quiet | ||
155 | -h show help | ||
156 | -V show version | ||
157 | -P repeat count (for profiling) | ||
158 | </pre> | ||
159 | <p> | ||
160 | |||
161 | </td> | ||
162 | </tr> | ||
163 | </table> | ||
164 | |||
165 | </body> | ||
166 | </html> | ||
diff --git a/xdelta3/www/xdelta3.css b/xdelta3/www/xdelta3.css new file mode 100755 index 0000000..269b1c9 --- /dev/null +++ b/xdelta3/www/xdelta3.css | |||
@@ -0,0 +1,69 @@ | |||
1 | body { | ||
2 | margin-top: 15px; | ||
3 | margin-left: 15px; | ||
4 | background-color:#b0b0b0; | ||
5 | color:#204080; | ||
6 | font-family: serif; | ||
7 | word-spacing: 0.5pt; | ||
8 | text-indent: 0pt; | ||
9 | } | ||
10 | |||
11 | A:visited { | ||
12 | color: #204080; | ||
13 | } | ||
14 | A:link { | ||
15 | color: #102040; | ||
16 | } | ||
17 | h1 { | ||
18 | color: #103060; | ||
19 | font-size: 150%; | ||
20 | } | ||
21 | |||
22 | h2 { | ||
23 | color: #103060; | ||
24 | font-size: 80%; | ||
25 | } | ||
26 | |||
27 | code, pre { | ||
28 | font-family: monospace; | ||
29 | } | ||
30 | |||
31 | pre { | ||
32 | color: #102040; | ||
33 | } | ||
34 | |||
35 | code { | ||
36 | color:#0060c0; | ||
37 | } | ||
38 | |||
39 | .example { | ||
40 | margin-right: 20px; | ||
41 | margin-left: 20px; | ||
42 | |||
43 | padding-left: 20px; | ||
44 | padding-right: 20px; | ||
45 | padding-top: 0px; | ||
46 | padding-bottom: 0px; | ||
47 | |||
48 | background-color: #808080; | ||
49 | border-style: solid; | ||
50 | border-width: 1px; | ||
51 | border-color: #000000; | ||
52 | } | ||
53 | |||
54 | .leftbdr { | ||
55 | font-family: sans-serif; | ||
56 | color: #103060; | ||
57 | background-color: #606060; | ||
58 | border-style: solid; | ||
59 | border-width: 1px; | ||
60 | border-color: #000000; | ||
61 | } | ||
62 | .leftbody A:visited { | ||
63 | color: #102040; | ||
64 | text-decoration: none; | ||
65 | } | ||
66 | .leftbody A:link { | ||
67 | color: #102040; | ||
68 | text-decoration: none; | ||
69 | } | ||
diff --git a/xdelta3/www/xdelta3.html b/xdelta3/www/xdelta3.html new file mode 100755 index 0000000..3bddfd9 --- /dev/null +++ b/xdelta3/www/xdelta3.html | |||
@@ -0,0 +1,89 @@ | |||
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> | ||
2 | <html> | ||
3 | <head> | ||
4 | <title>Xdelta3 delta compression library (BETA)</title> | ||
5 | <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"> | ||
6 | <link rel="stylesheet" type="text/css" href="xdelta3.css"/> | ||
7 | </head> | ||
8 | <body> | ||
9 | |||
10 | <!-- $Format: "$WWWLeftNavBar$" $ --!> | ||
11 | <table cellpadding="20px" width=700> <tr> <td class="leftbdr" valign=top height=600 width=100> <div class="leftbody"> <h1>Xdelta</h1> <a href="xdelta3.html">overview</a><br> <a href="xdelta3-cmdline.html">command line</a><br> <a href="xdelta3-api-guide.html">api guide</a><br> <br><a href="http://xdelta.org">xdelta.org</a></h2> </div> </td> <td valign=top width=500> | ||
12 | |||
13 | <!-- Copyright (C) 2003 and onward. Joshua P. MacDonald --!> | ||
14 | |||
15 | <h1>version three?</h1> | ||
16 | |||
17 | Xdelta3 is the third and latest release of Xdelta, which is a set of tools and | ||
18 | APIs for reading and writing compressed <em>deltas</em>. Deltas encode the | ||
19 | differences between two versions of a document. This release features a | ||
20 | completely new compression engine, several algorithmic improvements, a fully | ||
21 | programmable interface modelled after zlib, in addition to a command-line | ||
22 | utility, use of the RFC3284 (VCDIFF) encoding, a python extension, and now | ||
23 | 64-bit support.<p> | ||
24 | |||
25 | Xdelta3 is <em>tiny</em>. A minimal, fully functional VCDIFF decoder library | ||
26 | pipes in at 16KB. The command-line utility complete with encoder/decoder | ||
27 | tools, external compression support, and the <code>djw</code> secondary | ||
28 | compression routines, is just under 60KB, slightly larger than a | ||
29 | <code>gzip</code> executable.<p> | ||
30 | |||
31 | Xdelta3 has few dependencies because it's capable of stand-alone file | ||
32 | compression (i.e., what zlib and gzip do). The stand-alone compression of | ||
33 | Xdelta3/VCDIFF is 10-20% worse than <code>gzip</code>, you may view this as | ||
34 | paying for the convenience-cost of having a single encoding, tool, and api | ||
35 | designed to do both <em>data-compression</em> and <em>differencing</em> at | ||
36 | once.<p> | ||
37 | |||
38 | The Xdelta3 command-line tool, <code>xdelta3</code>, supports several | ||
39 | convenience routines. Delta compression works when the two inputs are | ||
40 | similar, but often we would like to compute the difference between two | ||
41 | compressed documents. <code>xdelta3</code> has (optional) support to | ||
42 | recognize externally compressed inputs and process them correctly. This | ||
43 | support is facilitated, in part, using the VCDIFF <em>application header</em> | ||
44 | field to store <code>xdelta3</code> meta-data, which includes the original | ||
45 | file names (if any) and codes to incidate whether the inputs were externally | ||
46 | compressed. Applications may provide their own application header.<p> | ||
47 | |||
48 | <h1>what are version one and version two?</h1> | ||
49 | |||
50 | Many shortcomings in the Xdelta1.x release are fixed in its replacement, | ||
51 | Xdelta3. Xdelta1 used both a simplistic compression algorithm and a | ||
52 | simplistic encoding. For example, Xdelta1 compresses the entire document at | ||
53 | once and thus uses memory proportional to the input size.<p> | ||
54 | |||
55 | The Xdelta1 compression engine made no attempt to find matching strings | ||
56 | smaller than say 16 or 32 bytes, and the encoding does not attempt to | ||
57 | efficiently encode the <code>COPY</code> and <code>ADD</code> instructions | ||
58 | which constitute a delta. For documents with highly similar data, however, | ||
59 | these techniques degrade performance by a relatively insignificant amount. | ||
60 | (Xdelta1.x compresses the delta with Zlib to improve matters, but this | ||
61 | dependency stinks.)<p> | ||
62 | |||
63 | Despite leaving much to be desired, Xdelta1 showed that you can do well | ||
64 | without great complexity; as it turns out, the particulars of the compression | ||
65 | aengine are a relatively insignificant compared to the difficulty of | ||
66 | programming an application that uses delta-compression. Better solve that | ||
67 | first.<p> | ||
68 | |||
69 | What we want are <em>systems</em> that manage compressed storage and network | ||
70 | communication. The second major release, Xdelta2, addresses these issues. | ||
71 | Xdelta2 features a storage interface -- part database and part file system -- | ||
72 | which allows indexing and labeling compressed documents. The feature set is | ||
73 | similar to RCS. The Xdelta2 interface supports efficient algorithms for | ||
74 | <em>extracting</em> deltas between any pair of versions in storage. The | ||
75 | extraction technique also does not rely on hierarchy or centralizing the | ||
76 | namespace, making the techniques ideal for peer-to-peer communication and | ||
77 | proxy architectures. I am grateful to Mihut Ionescu for implementing the | ||
78 | Xproxy HTTP delta-compressing proxy system based on this interface and | ||
79 | studying the benefits of delta-compression in that context. Xdelta2 stressed | ||
80 | the Xdelta1 compression engine beyond its limits; so Xdelta3 is designed as | ||
81 | the ideal replacement. The Xdelta2 techniques are yet to be ported to the new | ||
82 | implementation.<p> | ||
83 | |||
84 | </td> | ||
85 | </tr> | ||
86 | </table> | ||
87 | |||
88 | </body> | ||
89 | </html> | ||
diff --git a/xdelta3/xdelta3-cfgs.h b/xdelta3/xdelta3-cfgs.h new file mode 100755 index 0000000..329f3e9 --- /dev/null +++ b/xdelta3/xdelta3-cfgs.h | |||
@@ -0,0 +1,118 @@ | |||
1 | /* xdelta 3 - delta compression tools and library | ||
2 | * Copyright (C) 2001 and onward. Joshua P. MacDonald | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | /****************************************************************************************** | ||
20 | SOFT string matcher | ||
21 | ******************************************************************************************/ | ||
22 | |||
23 | #if XD3_BUILD_SOFT | ||
24 | |||
25 | #define TEMPLATE soft | ||
26 | #define LLOOK stream->large_look | ||
27 | #define LSTEP stream->large_step | ||
28 | #define SLOOK stream->small_look | ||
29 | #define SCHAIN stream->small_chain | ||
30 | #define SLCHAIN stream->small_lchain | ||
31 | #define SSMATCH stream->ssmatch | ||
32 | #define TRYLAZY stream->try_lazy | ||
33 | #define MAXLAZY stream->max_lazy | ||
34 | #define LONGENOUGH stream->long_enough | ||
35 | #define PROMOTE stream->promote | ||
36 | |||
37 | #define SOFTCFG 1 | ||
38 | #include "xdelta3.c" | ||
39 | #undef SOFTCFG | ||
40 | |||
41 | #undef TEMPLATE | ||
42 | #undef LLOOK | ||
43 | #undef SLOOK | ||
44 | #undef LSTEP | ||
45 | #undef SCHAIN | ||
46 | #undef SLCHAIN | ||
47 | #undef SSMATCH | ||
48 | #undef TRYLAZY | ||
49 | #undef MAXLAZY | ||
50 | #undef LONGENOUGH | ||
51 | #undef PROMOTE | ||
52 | #endif | ||
53 | |||
54 | #define SOFTCFG 0 | ||
55 | |||
56 | /****************************************************************************************** | ||
57 | FAST string matcher | ||
58 | ******************************************************************************************/ | ||
59 | #if XD3_BUILD_FAST | ||
60 | #define TEMPLATE fast | ||
61 | #define LLOOK 32 | ||
62 | #define LSTEP 32 | ||
63 | #define SLOOK 4 | ||
64 | |||
65 | #define SCHAIN 2 // For testcase/3, this produces miserable performance | ||
66 | #define SLCHAIN 2 // with these values != 1 and large input window size | ||
67 | |||
68 | #define SSMATCH 1 | ||
69 | #define TRYLAZY 0 | ||
70 | #define MAXLAZY 0 | ||
71 | #define LONGENOUGH 64 | ||
72 | #define PROMOTE 0 | ||
73 | |||
74 | #include "xdelta3.c" | ||
75 | |||
76 | #undef TEMPLATE | ||
77 | #undef LLOOK | ||
78 | #undef SLOOK | ||
79 | #undef LSTEP | ||
80 | #undef SCHAIN | ||
81 | #undef SLCHAIN | ||
82 | #undef SSMATCH | ||
83 | #undef TRYLAZY | ||
84 | #undef MAXLAZY | ||
85 | #undef LONGENOUGH | ||
86 | #undef PROMOTE | ||
87 | #endif | ||
88 | |||
89 | /****************************************************************************************** | ||
90 | SLOW string matcher | ||
91 | ******************************************************************************************/ | ||
92 | #if XD3_BUILD_SLOW | ||
93 | #define TEMPLATE slow | ||
94 | #define LLOOK 64 | ||
95 | #define LSTEP 64 // TODO | ||
96 | #define SLOOK 4 | ||
97 | #define SCHAIN 128 | ||
98 | #define SLCHAIN 16 | ||
99 | #define SSMATCH 0 | ||
100 | #define TRYLAZY 1 | ||
101 | #define MAXLAZY 8 | ||
102 | #define LONGENOUGH 128 | ||
103 | #define PROMOTE 0 | ||
104 | |||
105 | #include "xdelta3.c" | ||
106 | |||
107 | #undef TEMPLATE | ||
108 | #undef LLOOK | ||
109 | #undef SLOOK | ||
110 | #undef LSTEP | ||
111 | #undef SCHAIN | ||
112 | #undef SLCHAIN | ||
113 | #undef SSMATCH | ||
114 | #undef TRYLAZY | ||
115 | #undef MAXLAZY | ||
116 | #undef LONGENOUGH | ||
117 | #undef PROMOTE | ||
118 | #endif | ||
diff --git a/xdelta3/xdelta3-djw.h b/xdelta3/xdelta3-djw.h new file mode 100755 index 0000000..90f58e2 --- /dev/null +++ b/xdelta3/xdelta3-djw.h | |||
@@ -0,0 +1,1917 @@ | |||
1 | /* xdelta 3 - delta compression tools and library | ||
2 | * Copyright (C) 2002 and onward. Joshua P. MacDonald | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | #ifndef _XDELTA3_DJW_H_ | ||
20 | #define _XDELTA3_DJW_H_ | ||
21 | |||
22 | /* The following people deserve much credit for the algorithms and techniques contained in | ||
23 | * this file: | ||
24 | |||
25 | Julian Seward | ||
26 | Bzip2 sources, implementation of the multi-table Huffman technique. | ||
27 | |||
28 | Jean-loup Gailly and Mark Adler and L. Peter Deutsch | ||
29 | Zlib source code, RFC 1951 | ||
30 | |||
31 | Daniel S. Hirschberg and Debra A. LeLewer | ||
32 | "Efficient Decoding of Prefix Codes" | ||
33 | Communications of the ACM, April 1990 33(4). | ||
34 | |||
35 | David J. Wheeler | ||
36 | Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps. | ||
37 | This contains the idea behind the multi-table Huffman and 1-2 coding techniques. | ||
38 | ftp://ftp.cl.cam.ac.uk/users/djw3/ | ||
39 | |||
40 | */ | ||
41 | |||
42 | /* OPT: during the multi-table iteration, pick the worst-overall performing table and | ||
43 | * replace it with exactly the frequencies of the worst-overall performing sector or | ||
44 | * N-worst performing sectors. */ | ||
45 | |||
46 | /* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with the Bzip prefix coding | ||
47 | * strategy. xdfs-0.256 contains the last of the other-format tests, including RFC1950 | ||
48 | * and the RFC1950+MTF tests. */ | ||
49 | |||
50 | #define DJW_MAX_CODELEN 32 /* Maximum length of an alphabet code. */ | ||
51 | |||
52 | #define DJW_TOTAL_CODES (DJW_MAX_CODELEN+2) /* [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */ | ||
53 | |||
54 | #define RUN_0 0 /* Symbols used in MTF+1/2 coding. */ | ||
55 | #define RUN_1 1 | ||
56 | |||
57 | #define DJW_BASIC_CODES 5 /* Number of code lengths always encoded (djw_encode_basic array) */ | ||
58 | #define DJW_RUN_CODES 2 /* Number of run codes */ | ||
59 | #define DJW_EXTRA_12OFFSET 7 /* Offset of extra codes */ | ||
60 | #define DJW_EXTRA_CODES 15 /* Number of optionally encoded code lengths (djw_encode_extra array) */ | ||
61 | #define DJW_EXTRA_CODE_BITS 4 /* Number of bits to code [0-DJW_EXTRA_CODES] */ | ||
62 | |||
63 | #define DJW_MAX_GROUPS 8 /* Max number of group coding tables */ | ||
64 | #define DJW_GROUP_BITS 3 /* Number of bits to code [1-DJW_MAX_GROUPS] */ | ||
65 | |||
66 | #define DJW_SECTORSZ_MULT 5 /* Multiplier for encoded sectorsz */ | ||
67 | #define DJW_SECTORSZ_BITS 5 /* Number of bits to code group size */ | ||
68 | #define DJW_SECTORSZ_MAX ((1 << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT) | ||
69 | |||
70 | #define DJW_MAX_ITER 6 /* Maximum number of iterations to find group tables. */ | ||
71 | #define DJW_MIN_IMPROVEMENT 20 /* Minimum number of bits an iteration must reduce coding by. */ | ||
72 | |||
73 | #define DJW_MAX_CLCLEN 15 /* Maximum code length of a prefix code length */ | ||
74 | #define DJW_CLCLEN_BITS 4 /* Number of bits to code [0-DJW_MAX_CLCLEN] */ | ||
75 | |||
76 | #define DJW_MAX_GBCLEN 7 /* Maximum code length of a group selector */ | ||
77 | #define DJW_GBCLEN_BITS 3 /* Number of bits to code [0-DJW_MAX_GBCLEN] | ||
78 | * @!@ Actually, should never have zero code lengths here, or | ||
79 | * else a group went unused. Write a test for this: if a group | ||
80 | * goes unused, eliminate it? */ | ||
81 | |||
82 | #define EFFICIENCY_BITS 16 /* It has to save at least this many bits... */ | ||
83 | |||
84 | typedef struct _djw_stream djw_stream; | ||
85 | typedef struct _djw_heapen djw_heapen; | ||
86 | typedef struct _djw_prefix djw_prefix; | ||
87 | typedef uint32_t djw_weight; | ||
88 | |||
89 | /* To enable Huffman tuning code... */ | ||
90 | #ifndef TUNE_HUFFMAN | ||
91 | #define TUNE_HUFFMAN 0 | ||
92 | #endif | ||
93 | |||
94 | #if TUNE_HUFFMAN == 0 | ||
95 | #define xd3_real_encode_huff xd3_encode_huff | ||
96 | #define IF_TUNE(x) | ||
97 | #define IF_NTUNE(x) x | ||
98 | #else | ||
99 | static uint xd3_bitsof_output (xd3_output *output, bit_state *bstate); | ||
100 | #define IF_TUNE(x) x | ||
101 | #define IF_NTUNE(x) | ||
102 | static djw_weight tune_freq[DJW_TOTAL_CODES]; | ||
103 | static uint8_t tune_clen[DJW_MAX_GROUPS][ALPHABET_SIZE]; | ||
104 | static usize_t tune_prefix_bits; | ||
105 | static usize_t tune_select_bits; | ||
106 | static usize_t tune_encode_bits; | ||
107 | #endif | ||
108 | struct _djw_heapen | ||
109 | { | ||
110 | uint32_t depth; | ||
111 | uint32_t freq; | ||
112 | uint32_t parent; | ||
113 | }; | ||
114 | |||
115 | struct _djw_prefix | ||
116 | { | ||
117 | usize_t scount; | ||
118 | uint8_t *symbol; | ||
119 | usize_t mcount; | ||
120 | uint8_t *mtfsym; | ||
121 | uint8_t *repcnt; | ||
122 | }; | ||
123 | |||
124 | struct _djw_stream | ||
125 | { | ||
126 | int unused; | ||
127 | }; | ||
128 | |||
129 | /* Each Huffman table consists of 256 "code length" (CLEN) codes, which are themselves | ||
130 | * Huffman coded after eliminating repeats and move-to-front coding. The prefix consists | ||
131 | * of all the CLEN codes in djw_encode_basic plus a 4-bit value stating how many of the | ||
132 | * djw_encode_extra codes are actually coded (the rest are presumed zero, or unused CLEN | ||
133 | * codes). | ||
134 | * | ||
135 | * These values of these two arrays were arrived at by studying the distribution of min | ||
136 | * and max clen over a collection of DATA, INST, and ADDR inputs. The goal is to specify | ||
137 | * the order of djw_extra_codes that is most likely to minimize the number of extra codes | ||
138 | * that must be encoded. | ||
139 | * | ||
140 | * Results: 158896 sections were counted by compressing files (window size 512K) listed | ||
141 | * with: `find / -type f ( -user jmacd -o -perm +444 )` | ||
142 | * | ||
143 | * The distribution of CLEN codes for each efficient invocation of the secondary | ||
144 | * compressor (taking the best number of groups/sector size) was recorded. Then we look at | ||
145 | * the distribution of min and max clen values, counting the number of times the value | ||
146 | * C_low is less than the min and C_high is greater than the max. Values >= C_high and <= | ||
147 | * C_low will not have their lengths coded. The results are sorted and the least likely | ||
148 | * 15 are placed into the djw_encode_extra[] array in order. These values are used as | ||
149 | * the initial MTF ordering. | ||
150 | |||
151 | clow[1] = 155119 | ||
152 | clow[2] = 140325 | ||
153 | clow[3] = 84072 | ||
154 | --- | ||
155 | clow[4] = 7225 | ||
156 | clow[5] = 1093 | ||
157 | clow[6] = 215 | ||
158 | --- | ||
159 | chigh[4] = 1 | ||
160 | chigh[5] = 30 | ||
161 | chigh[6] = 218 | ||
162 | chigh[7] = 2060 | ||
163 | chigh[8] = 13271 | ||
164 | --- | ||
165 | chigh[9] = 39463 | ||
166 | chigh[10] = 77360 | ||
167 | chigh[11] = 118298 | ||
168 | chigh[12] = 141360 | ||
169 | chigh[13] = 154086 | ||
170 | chigh[14] = 157967 | ||
171 | chigh[15] = 158603 | ||
172 | chigh[16] = 158864 | ||
173 | chigh[17] = 158893 | ||
174 | chigh[18] = 158895 | ||
175 | chigh[19] = 158896 | ||
176 | chigh[20] = 158896 | ||
177 | |||
178 | */ | ||
179 | |||
180 | static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] = | ||
181 | { | ||
182 | 9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20 | ||
183 | }; | ||
184 | |||
185 | static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] = | ||
186 | { | ||
187 | 4, 5, 6, 7, 8, | ||
188 | }; | ||
189 | |||
190 | /*********************************************************************/ | ||
191 | /* DECLS */ | ||
192 | /*********************************************************************/ | ||
193 | |||
194 | static djw_stream* djw_alloc (xd3_stream *stream /*, int alphabet_size */); | ||
195 | static void djw_init (djw_stream *h); | ||
196 | static void djw_destroy (xd3_stream *stream, | ||
197 | djw_stream *h); | ||
198 | |||
199 | #if XD3_ENCODER | ||
200 | static int xd3_encode_huff (xd3_stream *stream, | ||
201 | djw_stream *sec_stream, | ||
202 | xd3_output *input, | ||
203 | xd3_output *output, | ||
204 | xd3_sec_cfg *cfg); | ||
205 | #endif | ||
206 | |||
207 | static int xd3_decode_huff (xd3_stream *stream, | ||
208 | djw_stream *sec_stream, | ||
209 | const uint8_t **input, | ||
210 | const uint8_t *const input_end, | ||
211 | uint8_t **output, | ||
212 | const uint8_t *const output_end); | ||
213 | |||
214 | /*********************************************************************/ | ||
215 | /* HUFFMAN */ | ||
216 | /*********************************************************************/ | ||
217 | |||
218 | static djw_stream* | ||
219 | djw_alloc (xd3_stream *stream) | ||
220 | { | ||
221 | return xd3_alloc (stream, sizeof (djw_stream), 1); | ||
222 | } | ||
223 | |||
224 | static void | ||
225 | djw_init (djw_stream *h) | ||
226 | { | ||
227 | /* Fields are initialized prior to use. */ | ||
228 | } | ||
229 | |||
230 | static void | ||
231 | djw_destroy (xd3_stream *stream, | ||
232 | djw_stream *h) | ||
233 | { | ||
234 | xd3_free (stream, h); | ||
235 | } | ||
236 | |||
237 | |||
238 | /*********************************************************************/ | ||
239 | /* HEAP */ | ||
240 | /*********************************************************************/ | ||
241 | |||
242 | static INLINE int | ||
243 | heap_less (const djw_heapen *a, const djw_heapen *b) | ||
244 | { | ||
245 | return a->freq < b->freq || | ||
246 | (a->freq == b->freq && | ||
247 | a->depth < b->depth); | ||
248 | } | ||
249 | |||
250 | static INLINE void | ||
251 | heap_insert (uint *heap, const djw_heapen *ents, uint p, const uint e) | ||
252 | { | ||
253 | /* Insert ents[e] into next slot heap[p] */ | ||
254 | uint pp = p/2; /* P's parent */ | ||
255 | |||
256 | while (heap_less (& ents[e], & ents[heap[pp]])) | ||
257 | { | ||
258 | heap[p] = heap[pp]; | ||
259 | p = pp; | ||
260 | pp = p/2; | ||
261 | } | ||
262 | |||
263 | heap[p] = e; | ||
264 | } | ||
265 | |||
266 | static INLINE djw_heapen* | ||
267 | heap_extract (uint *heap, const djw_heapen *ents, uint heap_last) | ||
268 | { | ||
269 | uint smallest = heap[1]; | ||
270 | uint p, pc, t; | ||
271 | |||
272 | /* Caller decrements heap_last, so heap_last+1 is the replacement elt. */ | ||
273 | heap[1] = heap[heap_last+1]; | ||
274 | |||
275 | /* Re-heapify */ | ||
276 | for (p = 1; ; p = pc) | ||
277 | { | ||
278 | pc = p*2; | ||
279 | |||
280 | /* Reached bottom of heap */ | ||
281 | if (pc > heap_last) { break; } | ||
282 | |||
283 | /* See if second child is smaller. */ | ||
284 | if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]])) { pc += 1; } | ||
285 | |||
286 | /* If pc is not smaller than p, heap property re-established. */ | ||
287 | if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; } | ||
288 | |||
289 | t = heap[pc]; | ||
290 | heap[pc] = heap[p]; | ||
291 | heap[p] = t; | ||
292 | } | ||
293 | |||
294 | return (djw_heapen*) & ents[smallest]; | ||
295 | } | ||
296 | |||
297 | #if XD3_DEBUG | ||
298 | static void | ||
299 | heap_check (uint *heap, djw_heapen *ents, uint heap_last) | ||
300 | { | ||
301 | uint i; | ||
302 | for (i = 1; i <= heap_last; i += 1) | ||
303 | { | ||
304 | /* Heap property: child not less than parent */ | ||
305 | XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]])); | ||
306 | } | ||
307 | } | ||
308 | #endif | ||
309 | |||
310 | /*********************************************************************/ | ||
311 | /* MTF, 1/2 */ | ||
312 | /*********************************************************************/ | ||
313 | |||
314 | static INLINE usize_t | ||
315 | djw_update_mtf (uint8_t *mtf, usize_t mtf_i) | ||
316 | { | ||
317 | int k; | ||
318 | usize_t sym = mtf[mtf_i]; | ||
319 | |||
320 | for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; } | ||
321 | |||
322 | mtf[0] = sym; | ||
323 | return sym; | ||
324 | } | ||
325 | |||
326 | static INLINE void | ||
327 | djw_update_1_2 (int *mtf_run, usize_t *mtf_i, uint8_t *mtfsym, djw_weight *freq) | ||
328 | { | ||
329 | int code; | ||
330 | |||
331 | do | ||
332 | { | ||
333 | /* Offset by 1, since any number of RUN_ symbols implies run>0... */ | ||
334 | *mtf_run -= 1; | ||
335 | |||
336 | code = (*mtf_run & 1) ? RUN_1 : RUN_0; | ||
337 | |||
338 | mtfsym[(*mtf_i)++] = code; | ||
339 | freq[code] += 1; | ||
340 | *mtf_run >>= 1; | ||
341 | } | ||
342 | while (*mtf_run >= 1); | ||
343 | |||
344 | *mtf_run = 0; | ||
345 | } | ||
346 | |||
347 | static void | ||
348 | djw_init_clen_mtf_1_2 (uint8_t *clmtf) | ||
349 | { | ||
350 | int i, cl_i = 0; | ||
351 | |||
352 | clmtf[cl_i++] = 0; | ||
353 | for (i = 0; i < DJW_BASIC_CODES; i += 1) { clmtf[cl_i++] = djw_encode_12basic[i]; } | ||
354 | for (i = 0; i < DJW_EXTRA_CODES; i += 1) { clmtf[cl_i++] = djw_encode_12extra[i]; } | ||
355 | } | ||
356 | |||
357 | /*********************************************************************/ | ||
358 | /* PREFIX CODES */ | ||
359 | /*********************************************************************/ | ||
360 | #if XD3_ENCODER | ||
361 | static usize_t | ||
362 | djw_build_prefix (const djw_weight *freq, uint8_t *clen, int asize, int maxlen) | ||
363 | { | ||
364 | /* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1 internal nodes, | ||
365 | * never more than ALPHABET_SIZE entries actually in the heap (minimum weight subtrees | ||
366 | * during prefix construction). First ALPHABET_SIZE entries are the actual symbols, | ||
367 | * next ALPHABET_SIZE-1 are internal nodes. */ | ||
368 | djw_heapen ents[ALPHABET_SIZE * 2]; | ||
369 | uint heap[ALPHABET_SIZE + 1]; | ||
370 | |||
371 | uint heap_last; /* Index of the last _valid_ heap entry. */ | ||
372 | uint ents_size; /* Number of entries, including 0th fake entry */ | ||
373 | int overflow; /* Number of code lengths that overflow */ | ||
374 | uint32_t total_bits; | ||
375 | int i; | ||
376 | |||
377 | IF_DEBUG (uint32_t first_bits = 0); | ||
378 | |||
379 | /* Insert real symbol frequences. */ | ||
380 | for (i = 0; i < asize; i += 1) | ||
381 | { | ||
382 | ents[i+1].freq = freq[i]; | ||
383 | } | ||
384 | |||
385 | again: | ||
386 | |||
387 | /* The loop is re-entered each time an overflow occurs. Re-initialize... */ | ||
388 | heap_last = 0; | ||
389 | ents_size = 1; | ||
390 | overflow = 0; | ||
391 | total_bits = 0; | ||
392 | |||
393 | /* 0th entry terminates the while loop in heap_insert (its the parent of the smallest | ||
394 | * element, always less-than) */ | ||
395 | heap[0] = 0; | ||
396 | ents[0].depth = 0; | ||
397 | ents[0].freq = 0; | ||
398 | |||
399 | /* Initial heap. */ | ||
400 | for (i = 0; i < asize; i += 1, ents_size += 1) | ||
401 | { | ||
402 | ents[ents_size].depth = 0; | ||
403 | ents[ents_size].parent = 0; | ||
404 | |||
405 | if (ents[ents_size].freq != 0) | ||
406 | { | ||
407 | heap_insert (heap, ents, ++heap_last, ents_size); | ||
408 | } | ||
409 | } | ||
410 | |||
411 | IF_DEBUG (heap_check (heap, ents, heap_last)); | ||
412 | |||
413 | /* Must be at least one symbol, or else we can't get here. */ | ||
414 | XD3_ASSERT (heap_last != 0); | ||
415 | |||
416 | /* If there is only one symbol, fake a second to prevent zero-length codes. */ | ||
417 | if (unlikely (heap_last == 1)) | ||
418 | { | ||
419 | /* Pick either the first or last symbol. */ | ||
420 | int s = freq[0] ? asize-1 : 0; | ||
421 | ents[s+1].freq = 1; | ||
422 | goto again; | ||
423 | } | ||
424 | |||
425 | /* Build prefix tree. */ | ||
426 | while (heap_last > 1) | ||
427 | { | ||
428 | djw_heapen *h1 = heap_extract (heap, ents, --heap_last); | ||
429 | djw_heapen *h2 = heap_extract (heap, ents, --heap_last); | ||
430 | |||
431 | ents[ents_size].freq = h1->freq + h2->freq; | ||
432 | ents[ents_size].depth = 1 + max (h1->depth, h2->depth); | ||
433 | ents[ents_size].parent = 0; | ||
434 | |||
435 | h1->parent = h2->parent = ents_size; | ||
436 | |||
437 | heap_insert (heap, ents, ++heap_last, ents_size++); | ||
438 | |||
439 | IF_DEBUG (heap_check (heap, ents, heap_last)); | ||
440 | } | ||
441 | |||
442 | /* Now compute prefix code lengths, counting parents. */ | ||
443 | for (i = 1; i < asize+1; i += 1) | ||
444 | { | ||
445 | int b = 0; | ||
446 | |||
447 | if (ents[i].freq != 0) | ||
448 | { | ||
449 | int p = i; | ||
450 | |||
451 | while ((p = ents[p].parent) != 0) { b += 1; } | ||
452 | |||
453 | if (b > maxlen) { overflow = 1; } | ||
454 | |||
455 | total_bits += b * freq[i-1]; | ||
456 | } | ||
457 | |||
458 | /* clen is 0-origin, unlike ents. */ | ||
459 | clen[i-1] = b; | ||
460 | } | ||
461 | |||
462 | IF_DEBUG (if (first_bits == 0) first_bits = total_bits); | ||
463 | |||
464 | if (! overflow) | ||
465 | { | ||
466 | IF_DEBUG (if (first_bits != total_bits) | ||
467 | { | ||
468 | P(RINT "code length overflow changed %d bits\n", total_bits - first_bits); | ||
469 | }); | ||
470 | return total_bits; | ||
471 | } | ||
472 | |||
473 | /* OPT: There is a non-looping way to fix overflow shown in zlib, but this is easier | ||
474 | * (for now), as done in bzip2. */ | ||
475 | for (i = 1; i < asize+1; i += 1) | ||
476 | { | ||
477 | ents[i].freq = ents[i].freq / 2 + 1; | ||
478 | } | ||
479 | |||
480 | goto again; | ||
481 | } | ||
482 | |||
483 | static void | ||
484 | djw_build_codes (uint *codes, const uint8_t *clen, int asize DEBUG_ARG (int abs_max)) | ||
485 | { | ||
486 | int i, l; | ||
487 | int min_clen = DJW_MAX_CODELEN; | ||
488 | int max_clen = 0; | ||
489 | uint code = 0; | ||
490 | |||
491 | for (i = 0; i < asize; i += 1) | ||
492 | { | ||
493 | if (clen[i] > 0 && clen[i] < min_clen) | ||
494 | { | ||
495 | min_clen = clen[i]; | ||
496 | } | ||
497 | |||
498 | max_clen = max (max_clen, (int) clen[i]); | ||
499 | } | ||
500 | |||
501 | XD3_ASSERT (max_clen <= abs_max); | ||
502 | |||
503 | for (l = min_clen; l <= max_clen; l += 1) | ||
504 | { | ||
505 | for (i = 0; i < asize; i += 1) | ||
506 | { | ||
507 | if (clen[i] == l) { codes[i] = code++; } | ||
508 | } | ||
509 | |||
510 | code <<= 1; | ||
511 | } | ||
512 | } | ||
513 | |||
514 | /*********************************************************************/ | ||
515 | /* MOVE-TO-FRONT */ | ||
516 | /*********************************************************************/ | ||
517 | static void | ||
518 | djw_compute_mtf_1_2 (djw_prefix *prefix, | ||
519 | uint8_t *mtf, | ||
520 | djw_weight *freq_out, /* freak out! */ | ||
521 | usize_t nsym) | ||
522 | { | ||
523 | int i, j, k; | ||
524 | usize_t sym; | ||
525 | usize_t size = prefix->scount; | ||
526 | usize_t mtf_i = 0; | ||
527 | int mtf_run = 0; | ||
528 | |||
529 | memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+1)); | ||
530 | |||
531 | for (i = 0; i < size; ) | ||
532 | { | ||
533 | /* OPT: Bzip optimizes this algorithm a little by effectively checking j==0 before | ||
534 | * the MTF update. */ | ||
535 | sym = prefix->symbol[i++]; | ||
536 | |||
537 | for (j = 0; mtf[j] != sym; j += 1) { } | ||
538 | |||
539 | XD3_ASSERT (j < nsym); | ||
540 | |||
541 | for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; } | ||
542 | |||
543 | mtf[0] = sym; | ||
544 | |||
545 | if (j == 0) | ||
546 | { | ||
547 | mtf_run += 1; | ||
548 | continue; | ||
549 | } | ||
550 | |||
551 | if (mtf_run > 0) | ||
552 | { | ||
553 | djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out); | ||
554 | } | ||
555 | |||
556 | /* Non-zero symbols are offset by RUN_1 */ | ||
557 | prefix->mtfsym[mtf_i++] = j+RUN_1; | ||
558 | freq_out[j+RUN_1] += 1; | ||
559 | } | ||
560 | |||
561 | if (mtf_run > 0) | ||
562 | { | ||
563 | djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out); | ||
564 | } | ||
565 | |||
566 | prefix->mcount = mtf_i; | ||
567 | } | ||
568 | |||
569 | static usize_t | ||
570 | djw_count_freqs (djw_weight *freq, xd3_output *input) | ||
571 | { | ||
572 | xd3_output *in; | ||
573 | usize_t size = 0; | ||
574 | |||
575 | memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE); | ||
576 | |||
577 | /* Freqency counting. OPT: can be accomplished beforehand. */ | ||
578 | for (in = input; in; in = in->next_page) | ||
579 | { | ||
580 | const uint8_t *p = in->base; | ||
581 | const uint8_t *p_max = p + in->next; | ||
582 | |||
583 | size += in->next; | ||
584 | |||
585 | do { freq[*p++] += 1; } while (p < p_max); | ||
586 | } | ||
587 | |||
588 | IF_DEBUG1 ({int i; | ||
589 | P(RINT "freqs: "); | ||
590 | for (i = 0; i < ALPHABET_SIZE; i += 1) { P(RINT "%u ", freq[i]); } | ||
591 | P(RINT "\n");}); | ||
592 | |||
593 | return size; | ||
594 | } | ||
595 | |||
596 | static void | ||
597 | djw_compute_multi_prefix (int groups, | ||
598 | uint8_t clen[DJW_MAX_GROUPS][ALPHABET_SIZE], | ||
599 | djw_prefix *prefix) | ||
600 | { | ||
601 | int gp, i; | ||
602 | |||
603 | prefix->scount = ALPHABET_SIZE; | ||
604 | memcpy (prefix->symbol, clen[0], ALPHABET_SIZE); | ||
605 | |||
606 | for (gp = 1; gp < groups; gp += 1) | ||
607 | { | ||
608 | for (i = 0; i < ALPHABET_SIZE; i += 1) | ||
609 | { | ||
610 | if (clen[gp][i] == 0) | ||
611 | { | ||
612 | continue; | ||
613 | } | ||
614 | |||
615 | prefix->symbol[prefix->scount++] = clen[gp][i]; | ||
616 | } | ||
617 | } | ||
618 | } | ||
619 | |||
620 | static void | ||
621 | djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq) | ||
622 | { | ||
623 | uint8_t clmtf[DJW_MAX_CODELEN+1]; | ||
624 | |||
625 | djw_init_clen_mtf_1_2 (clmtf); | ||
626 | |||
627 | djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN+1); | ||
628 | } | ||
629 | |||
630 | static int | ||
631 | djw_encode_prefix (xd3_stream *stream, | ||
632 | xd3_output **output, | ||
633 | bit_state *bstate, | ||
634 | djw_prefix *prefix) | ||
635 | { | ||
636 | int ret, i; | ||
637 | uint num_to_encode; | ||
638 | djw_weight clfreq[DJW_TOTAL_CODES]; | ||
639 | uint8_t clclen[DJW_TOTAL_CODES]; | ||
640 | uint clcode[DJW_TOTAL_CODES]; | ||
641 | |||
642 | IF_TUNE (memset (clfreq, 0, sizeof (clfreq))); | ||
643 | |||
644 | /* Move-to-front encode prefix symbols, count frequencies */ | ||
645 | djw_compute_prefix_1_2 (prefix, clfreq); | ||
646 | |||
647 | /* Compute codes */ | ||
648 | djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN); | ||
649 | djw_build_codes (clcode, clclen, DJW_TOTAL_CODES DEBUG_ARG (DJW_MAX_CLCLEN)); | ||
650 | |||
651 | /* Compute number of extra codes beyond basic ones for this template. */ | ||
652 | num_to_encode = DJW_TOTAL_CODES; | ||
653 | while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0) { num_to_encode -= 1; } | ||
654 | XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS)); | ||
655 | |||
656 | /* Encode: # of extra codes */ | ||
657 | if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS, | ||
658 | num_to_encode - DJW_EXTRA_12OFFSET))) { return ret; } | ||
659 | |||
660 | /* Encode: MTF code lengths */ | ||
661 | for (i = 0; i < num_to_encode; i += 1) | ||
662 | { | ||
663 | if ((ret = xd3_encode_bits (stream, output, bstate, DJW_CLCLEN_BITS, clclen[i]))) { return ret; } | ||
664 | } | ||
665 | |||
666 | /* Encode: CLEN code lengths */ | ||
667 | for (i = 0; i < prefix->mcount; i += 1) | ||
668 | { | ||
669 | usize_t mtf_sym = prefix->mtfsym[i]; | ||
670 | usize_t bits = clclen[mtf_sym]; | ||
671 | usize_t code = clcode[mtf_sym]; | ||
672 | |||
673 | if ((ret = xd3_encode_bits (stream, output, bstate, bits, code))) { return ret; } | ||
674 | } | ||
675 | |||
676 | IF_TUNE (memcpy (tune_freq, clfreq, sizeof (clfreq))); | ||
677 | |||
678 | return 0; | ||
679 | } | ||
680 | |||
681 | static void | ||
682 | djw_compute_selector_1_2 (djw_prefix *prefix, | ||
683 | usize_t groups, | ||
684 | djw_weight *gbest_freq) | ||
685 | { | ||
686 | uint8_t grmtf[DJW_MAX_GROUPS]; | ||
687 | usize_t i; | ||
688 | |||
689 | for (i = 0; i < groups; i += 1) { grmtf[i] = i; } | ||
690 | |||
691 | djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups); | ||
692 | } | ||
693 | |||
694 | static int | ||
695 | xd3_encode_howmany_groups (xd3_stream *stream, | ||
696 | xd3_sec_cfg *cfg, | ||
697 | usize_t input_size, | ||
698 | usize_t *ret_groups, | ||
699 | usize_t *ret_sector_size) | ||
700 | { | ||
701 | usize_t cfg_groups = 0; | ||
702 | usize_t cfg_sector_size = 0; | ||
703 | usize_t sugg_groups = 0; | ||
704 | usize_t sugg_sector_size = 0; | ||
705 | |||
706 | if (cfg->ngroups != 0) | ||
707 | { | ||
708 | if (cfg->ngroups < 0 || cfg->ngroups > DJW_MAX_GROUPS) | ||
709 | { | ||
710 | stream->msg = "invalid secondary encoder group number"; | ||
711 | return EINVAL; | ||
712 | } | ||
713 | |||
714 | cfg_groups = cfg->ngroups; | ||
715 | } | ||
716 | |||
717 | if (cfg->sector_size != 0) | ||
718 | { | ||
719 | if (cfg->sector_size < DJW_SECTORSZ_MULT || cfg->sector_size > DJW_SECTORSZ_MAX || (cfg->sector_size % DJW_SECTORSZ_MULT) != 0) | ||
720 | { | ||
721 | stream->msg = "invalid secondary encoder sector size"; | ||
722 | return EINVAL; | ||
723 | } | ||
724 | |||
725 | cfg_sector_size = cfg->sector_size; | ||
726 | } | ||
727 | |||
728 | if (cfg_groups == 0 || cfg_sector_size == 0) | ||
729 | { | ||
730 | /* These values were found empirically using xdelta3-tune around version | ||
731 | * xdfs-0.256. */ | ||
732 | switch (cfg->data_type) | ||
733 | { | ||
734 | case DATA_SECTION: | ||
735 | if (input_size < 1000) { sugg_groups = 1; sugg_sector_size = 0; } | ||
736 | else if (input_size < 4000) { sugg_groups = 2; sugg_sector_size = 10; } | ||
737 | else if (input_size < 7000) { sugg_groups = 3; sugg_sector_size = 10; } | ||
738 | else if (input_size < 10000) { sugg_groups = 4; sugg_sector_size = 10; } | ||
739 | else if (input_size < 25000) { sugg_groups = 5; sugg_sector_size = 10; } | ||
740 | else if (input_size < 50000) { sugg_groups = 7; sugg_sector_size = 20; } | ||
741 | else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; } | ||
742 | else { sugg_groups = 8; sugg_sector_size = 70; } | ||
743 | break; | ||
744 | case INST_SECTION: | ||
745 | if (input_size < 7000) { sugg_groups = 1; sugg_sector_size = 0; } | ||
746 | else if (input_size < 10000) { sugg_groups = 2; sugg_sector_size = 50; } | ||
747 | else if (input_size < 25000) { sugg_groups = 3; sugg_sector_size = 50; } | ||
748 | else if (input_size < 50000) { sugg_groups = 6; sugg_sector_size = 40; } | ||
749 | else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; } | ||
750 | else { sugg_groups = 8; sugg_sector_size = 40; } | ||
751 | break; | ||
752 | case ADDR_SECTION: | ||
753 | if (input_size < 9000) { sugg_groups = 1; sugg_sector_size = 0; } | ||
754 | else if (input_size < 25000) { sugg_groups = 2; sugg_sector_size = 130; } | ||
755 | else if (input_size < 50000) { sugg_groups = 3; sugg_sector_size = 130; } | ||
756 | else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; } | ||
757 | else { sugg_groups = 7; sugg_sector_size = 130; } | ||
758 | break; | ||
759 | } | ||
760 | |||
761 | if (cfg_groups == 0) | ||
762 | { | ||
763 | cfg_groups = sugg_groups; | ||
764 | } | ||
765 | |||
766 | if (cfg_sector_size == 0) | ||
767 | { | ||
768 | cfg_sector_size = sugg_sector_size; | ||
769 | } | ||
770 | } | ||
771 | |||
772 | if (cfg_groups != 1 && cfg_sector_size == 0) | ||
773 | { | ||
774 | switch (cfg->data_type) | ||
775 | { | ||
776 | case DATA_SECTION: | ||
777 | cfg_sector_size = 20; | ||
778 | break; | ||
779 | case INST_SECTION: | ||
780 | cfg_sector_size = 50; | ||
781 | break; | ||
782 | case ADDR_SECTION: | ||
783 | cfg_sector_size = 130; | ||
784 | break; | ||
785 | } | ||
786 | } | ||
787 | |||
788 | (*ret_groups) = cfg_groups; | ||
789 | (*ret_sector_size) = cfg_sector_size; | ||
790 | |||
791 | XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS); | ||
792 | XD3_ASSERT (cfg_groups == 1 || (cfg_sector_size >= DJW_SECTORSZ_MULT && cfg_sector_size <= DJW_SECTORSZ_MAX)); | ||
793 | |||
794 | return 0; | ||
795 | } | ||
796 | |||
797 | static int | ||
798 | xd3_real_encode_huff (xd3_stream *stream, | ||
799 | djw_stream *h, | ||
800 | xd3_output *input, | ||
801 | xd3_output *output, | ||
802 | xd3_sec_cfg *cfg) | ||
803 | { | ||
804 | int ret; | ||
805 | usize_t groups, sector_size; | ||
806 | bit_state bstate = BIT_STATE_ENCODE_INIT; | ||
807 | xd3_output *in; | ||
808 | int encode_bits; | ||
809 | usize_t input_bits; | ||
810 | usize_t input_bytes; | ||
811 | usize_t initial_offset = output->next; | ||
812 | djw_weight real_freq[ALPHABET_SIZE]; | ||
813 | uint8_t *gbest = NULL; /* Dynamic allocations: could put these in djw_stream. */ | ||
814 | uint8_t *gbest_mtf = NULL; | ||
815 | |||
816 | input_bytes = djw_count_freqs (real_freq, input); | ||
817 | input_bits = input_bytes * 8; | ||
818 | |||
819 | XD3_ASSERT (input_bytes > 0); | ||
820 | |||
821 | if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes, & groups, & sector_size))) | ||
822 | { | ||
823 | return ret; | ||
824 | } | ||
825 | |||
826 | if (0) | ||
827 | { | ||
828 | regroup: | ||
829 | /* Sometimes we dynamically decide there are too many groups. Arrive here. */ | ||
830 | output->next = initial_offset; | ||
831 | xd3_bit_state_encode_init (& bstate); | ||
832 | } | ||
833 | |||
834 | /* Encode: # of groups (3 bits) */ | ||
835 | if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GROUP_BITS, groups-1))) { goto failure; } | ||
836 | |||
837 | if (groups == 1) | ||
838 | { | ||
839 | /* Single Huffman group. */ | ||
840 | uint code[ALPHABET_SIZE]; /* Codes */ | ||
841 | IF_TUNE (uint8_t *clen = tune_clen[0];) | ||
842 | IF_NTUNE (uint8_t clen[ALPHABET_SIZE];) | ||
843 | uint8_t prefix_mtfsym[ALPHABET_SIZE]; | ||
844 | djw_prefix prefix; | ||
845 | |||
846 | encode_bits = | ||
847 | djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN); | ||
848 | djw_build_codes (code, clen, ALPHABET_SIZE DEBUG_ARG (DJW_MAX_CODELEN)); | ||
849 | |||
850 | if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; } | ||
851 | |||
852 | /* Encode: prefix */ | ||
853 | prefix.mtfsym = prefix_mtfsym; | ||
854 | prefix.symbol = clen; | ||
855 | prefix.scount = ALPHABET_SIZE; | ||
856 | |||
857 | if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; } | ||
858 | |||
859 | if (encode_bits + (8 * output->next) + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; } | ||
860 | |||
861 | IF_TUNE (tune_prefix_bits = xd3_bitsof_output (output, & bstate)); | ||
862 | IF_TUNE (tune_select_bits = 0); | ||
863 | IF_TUNE (tune_encode_bits = encode_bits); | ||
864 | |||
865 | /* Encode: data */ | ||
866 | for (in = input; in; in = in->next_page) | ||
867 | { | ||
868 | const uint8_t *p = in->base; | ||
869 | const uint8_t *p_max = p + in->next; | ||
870 | |||
871 | do | ||
872 | { | ||
873 | usize_t sym = *p++; | ||
874 | usize_t bits = clen[sym]; | ||
875 | |||
876 | IF_DEBUG (encode_bits -= bits); | ||
877 | |||
878 | if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code[sym]))) { goto failure; } | ||
879 | } | ||
880 | while (p < p_max); | ||
881 | } | ||
882 | |||
883 | XD3_ASSERT (encode_bits == 0); | ||
884 | } | ||
885 | else | ||
886 | { | ||
887 | /* DJW Huffman */ | ||
888 | djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE]; | ||
889 | #if TUNE_HUFFMAN == 0 | ||
890 | uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE]; | ||
891 | #else | ||
892 | #define evolve_clen tune_clen | ||
893 | #endif | ||
894 | djw_weight left = input_bytes; | ||
895 | int gp; | ||
896 | int niter = 0; | ||
897 | usize_t select_bits; | ||
898 | usize_t sym1 = 0, sym2 = 0, s; | ||
899 | usize_t gcost[DJW_MAX_GROUPS]; | ||
900 | uint gbest_code[DJW_MAX_GROUPS+1]; | ||
901 | uint8_t gbest_clen[DJW_MAX_GROUPS+1]; | ||
902 | usize_t gbest_max = 1 + (input_bytes - 1) / sector_size; | ||
903 | int best_bits = 0; | ||
904 | usize_t gbest_no; | ||
905 | usize_t gpcnt; | ||
906 | const uint8_t *p; | ||
907 | IF_DEBUG1 (usize_t gcount[DJW_MAX_GROUPS]); | ||
908 | |||
909 | /* Encode: sector size (5 bits) */ | ||
910 | if ((ret = xd3_encode_bits (stream, & output, & bstate, | ||
911 | DJW_SECTORSZ_BITS, (sector_size/DJW_SECTORSZ_MULT)-1))) { goto failure; } | ||
912 | |||
913 | /* Dynamic allocation. */ | ||
914 | if (gbest == NULL) { gbest = xd3_alloc (stream, gbest_max, 1); } | ||
915 | if (gbest_mtf == NULL) { gbest_mtf = xd3_alloc (stream, gbest_max, 1); } | ||
916 | |||
917 | /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */ | ||
918 | |||
919 | /* Generate initial code length tables. */ | ||
920 | for (gp = 0; gp < groups; gp += 1) | ||
921 | { | ||
922 | djw_weight sum = 0; | ||
923 | djw_weight goal = left / (groups - gp); | ||
924 | |||
925 | IF_DEBUG1 (usize_t nz = 0); | ||
926 | |||
927 | /* Due to the single-code granularity of this distribution, it may be that we | ||
928 | * can't generate a distribution for each group. In that case subtract one | ||
929 | * gropu and try again. If (inefficient), we're testing group behavior, so | ||
930 | * don't mess things up. */ | ||
931 | if (goal == 0 && !cfg->inefficient) | ||
932 | { | ||
933 | IF_DEBUG1 (P(RINT "too many groups (%u), dropping one\n", groups)); | ||
934 | groups -= 1; | ||
935 | goto regroup; | ||
936 | } | ||
937 | |||
938 | /* Sum == goal is possible when (cfg->inefficient)... */ | ||
939 | while (sum < goal) | ||
940 | { | ||
941 | XD3_ASSERT (sym2 < ALPHABET_SIZE); | ||
942 | IF_DEBUG1 (nz += real_freq[sym2] != 0); | ||
943 | sum += real_freq[sym2++]; | ||
944 | } | ||
945 | |||
946 | IF_DEBUG1(P(RINT "group %u has symbols %u..%u (%u non-zero) (%u/%u = %.3f)\n", | ||
947 | gp, sym1, sym2, nz, sum, input_bytes, sum / (double)input_bytes);); | ||
948 | |||
949 | for (s = 0; s < ALPHABET_SIZE; s += 1) | ||
950 | { | ||
951 | evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16; | ||
952 | } | ||
953 | |||
954 | left -= sum; | ||
955 | sym1 = sym2+1; | ||
956 | } | ||
957 | |||
958 | repeat: | ||
959 | |||
960 | niter += 1; | ||
961 | gbest_no = 0; | ||
962 | memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups); | ||
963 | IF_DEBUG1 (memset (gcount, 0, sizeof (gcount[0]) * groups)); | ||
964 | |||
965 | /* For each input page (loop is irregular to allow non-pow2-size group size. */ | ||
966 | in = input; | ||
967 | p = in->base; | ||
968 | |||
969 | /* For each group-size sector. */ | ||
970 | do | ||
971 | { | ||
972 | const uint8_t *p0 = p; | ||
973 | xd3_output *in0 = in; | ||
974 | usize_t best = 0; | ||
975 | usize_t winner = 0; | ||
976 | |||
977 | /* Select best group for each sector, update evolve_freq. */ | ||
978 | memset (gcost, 0, sizeof (gcost[0]) * groups); | ||
979 | |||
980 | /* For each byte in sector. */ | ||
981 | for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) | ||
982 | { | ||
983 | /* For each group. */ | ||
984 | for (gp = 0; gp < groups; gp += 1) | ||
985 | { | ||
986 | gcost[gp] += evolve_clen[gp][*p]; | ||
987 | } | ||
988 | |||
989 | /* Check end-of-input-page. */ | ||
990 | # define GP_PAGE() \ | ||
991 | if (++p - in->base == in->next) \ | ||
992 | { \ | ||
993 | in = in->next_page; \ | ||
994 | if (in == NULL) { break; } \ | ||
995 | p = in->base; \ | ||
996 | } | ||
997 | |||
998 | GP_PAGE (); | ||
999 | } | ||
1000 | |||
1001 | /* Find min cost group for this sector */ | ||
1002 | best = -1U; | ||
1003 | for (gp = 0; gp < groups; gp += 1) | ||
1004 | { | ||
1005 | if (gcost[gp] < best) { best = gcost[gp]; winner = gp; } | ||
1006 | } | ||
1007 | |||
1008 | gbest[gbest_no++] = winner; | ||
1009 | IF_DEBUG1 (gcount[winner] += 1); | ||
1010 | |||
1011 | p = p0; | ||
1012 | in = in0; | ||
1013 | |||
1014 | /* Update group frequencies. */ | ||
1015 | for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) | ||
1016 | { | ||
1017 | evolve_freq[winner][*p] += 1; | ||
1018 | |||
1019 | GP_PAGE (); | ||
1020 | } | ||
1021 | } | ||
1022 | while (in != NULL); | ||
1023 | |||
1024 | XD3_ASSERT (gbest_no == gbest_max); | ||
1025 | |||
1026 | /* Recompute code lengths. */ | ||
1027 | encode_bits = 0; | ||
1028 | for (gp = 0; gp < groups; gp += 1) | ||
1029 | { | ||
1030 | int i; | ||
1031 | uint8_t evolve_zero[ALPHABET_SIZE]; | ||
1032 | int any_zeros = 0; | ||
1033 | |||
1034 | memset (evolve_zero, 0, sizeof (evolve_zero)); | ||
1035 | |||
1036 | /* Cannot allow a zero clen when the real frequency is non-zero. Note: this | ||
1037 | * means we are going to encode a fairly long code for these unused entries. An | ||
1038 | * improvement would be to implement a NOTUSED code for when these are actually | ||
1039 | * zero, but this requires another data structure (evolve_zero) since we don't | ||
1040 | * know when evolve_freq[i] == 0... Briefly tested, looked worse. */ | ||
1041 | for (i = 0; i < ALPHABET_SIZE; i += 1) | ||
1042 | { | ||
1043 | if (evolve_freq[gp][i] == 0 && real_freq[i] != 0) | ||
1044 | { | ||
1045 | evolve_freq[gp][i] = 1; | ||
1046 | evolve_zero[i] = 1; | ||
1047 | any_zeros = 1; | ||
1048 | } | ||
1049 | } | ||
1050 | |||
1051 | encode_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp], ALPHABET_SIZE, DJW_MAX_CODELEN); | ||
1052 | |||
1053 | /* The above faking of frequencies does not matter for the last iteration, but | ||
1054 | * we don't know when that is yet. However, it also breaks the encode_bits | ||
1055 | * computation. Necessary for accuracy, and for the (encode_bits==0) assert | ||
1056 | * after all bits are output. */ | ||
1057 | if (any_zeros) | ||
1058 | { | ||
1059 | IF_DEBUG1 (usize_t save_total = encode_bits); | ||
1060 | |||
1061 | for (i = 0; i < ALPHABET_SIZE; i += 1) | ||
1062 | { | ||
1063 | if (evolve_zero[i]) { encode_bits -= evolve_clen[gp][i]; } | ||
1064 | } | ||
1065 | |||
1066 | IF_DEBUG1 (P(RINT "evolve_zero reduced %u bits in group %u\n", save_total - encode_bits, gp)); | ||
1067 | } | ||
1068 | } | ||
1069 | |||
1070 | IF_DEBUG1( | ||
1071 | P(RINT "pass %u total bits: %u group uses: ", niter, encode_bits); | ||
1072 | for (gp = 0; gp < groups; gp += 1) { P(RINT "%u ", gcount[gp]); } | ||
1073 | P(RINT "\n");); | ||
1074 | |||
1075 | /* End iteration. (The following assertion proved invalid.) */ | ||
1076 | /*XD3_ASSERT (niter == 1 || best_bits >= encode_bits);*/ | ||
1077 | |||
1078 | IF_DEBUG1 (if (niter > 1 && best_bits < encode_bits) { | ||
1079 | P(RINT "iteration lost %u bits\n", encode_bits - best_bits); }); | ||
1080 | |||
1081 | if (niter == 1 || (niter < DJW_MAX_ITER && (best_bits - encode_bits) >= DJW_MIN_IMPROVEMENT)) | ||
1082 | { | ||
1083 | best_bits = encode_bits; | ||
1084 | goto repeat; | ||
1085 | } | ||
1086 | |||
1087 | /* Efficiency check. */ | ||
1088 | if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; } | ||
1089 | |||
1090 | IF_DEBUG1 (P(RINT "djw compression: %u -> %0.3f\n", input_bytes, encode_bits / 8.0)); | ||
1091 | |||
1092 | /* Encode: prefix */ | ||
1093 | { | ||
1094 | uint8_t prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE]; | ||
1095 | uint8_t prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE]; | ||
1096 | uint8_t prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE]; | ||
1097 | djw_prefix prefix; | ||
1098 | |||
1099 | prefix.symbol = prefix_symbol; | ||
1100 | prefix.mtfsym = prefix_mtfsym; | ||
1101 | prefix.repcnt = prefix_repcnt; | ||
1102 | |||
1103 | djw_compute_multi_prefix (groups, evolve_clen, & prefix); | ||
1104 | if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; } | ||
1105 | } | ||
1106 | |||
1107 | /* Encode: selector frequencies */ | ||
1108 | { | ||
1109 | djw_weight gbest_freq[DJW_MAX_GROUPS+1]; | ||
1110 | djw_prefix gbest_prefix; | ||
1111 | usize_t i; | ||
1112 | |||
1113 | gbest_prefix.scount = gbest_no; | ||
1114 | gbest_prefix.symbol = gbest; | ||
1115 | gbest_prefix.mtfsym = gbest_mtf; | ||
1116 | |||
1117 | djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq); | ||
1118 | |||
1119 | select_bits = | ||
1120 | djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN); | ||
1121 | djw_build_codes (gbest_code, gbest_clen, groups+1 DEBUG_ARG (DJW_MAX_GBCLEN)); | ||
1122 | |||
1123 | IF_TUNE (tune_prefix_bits = xd3_bitsof_output (output, & bstate)); | ||
1124 | IF_TUNE (tune_select_bits = select_bits); | ||
1125 | IF_TUNE (tune_encode_bits = encode_bits); | ||
1126 | |||
1127 | for (i = 0; i < groups+1; i += 1) | ||
1128 | { | ||
1129 | if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GBCLEN_BITS, gbest_clen[i]))) { goto failure; } | ||
1130 | } | ||
1131 | |||
1132 | for (i = 0; i < gbest_prefix.mcount; i += 1) | ||
1133 | { | ||
1134 | usize_t gp_mtf = gbest_mtf[i]; | ||
1135 | usize_t gp_sel_bits = gbest_clen[gp_mtf]; | ||
1136 | usize_t gp_sel_code = gbest_code[gp_mtf]; | ||
1137 | |||
1138 | XD3_ASSERT (gp_mtf < groups+1); | ||
1139 | |||
1140 | if ((ret = xd3_encode_bits (stream, & output, & bstate, gp_sel_bits, gp_sel_code))) { goto failure; } | ||
1141 | |||
1142 | IF_DEBUG (select_bits -= gp_sel_bits); | ||
1143 | } | ||
1144 | |||
1145 | XD3_ASSERT (select_bits == 0); | ||
1146 | } | ||
1147 | |||
1148 | /* Efficiency check. */ | ||
1149 | if (encode_bits + select_bits + (8 * output->next) + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; } | ||
1150 | |||
1151 | /* Encode: data */ | ||
1152 | { | ||
1153 | uint evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE]; | ||
1154 | usize_t sector = 0; | ||
1155 | |||
1156 | /* Build code tables for each group. */ | ||
1157 | for (gp = 0; gp < groups; gp += 1) | ||
1158 | { | ||
1159 | djw_build_codes (evolve_code[gp], evolve_clen[gp], ALPHABET_SIZE DEBUG_ARG (DJW_MAX_CODELEN)); | ||
1160 | } | ||
1161 | |||
1162 | /* Now loop over the input. */ | ||
1163 | in = input; | ||
1164 | p = in->base; | ||
1165 | |||
1166 | do | ||
1167 | { | ||
1168 | /* For each sector. */ | ||
1169 | usize_t gp_best = gbest[sector]; | ||
1170 | uint *gp_codes = evolve_code[gp_best]; | ||
1171 | uint8_t *gp_clens = evolve_clen[gp_best]; | ||
1172 | |||
1173 | XD3_ASSERT (sector < gbest_no); | ||
1174 | |||
1175 | sector += 1; | ||
1176 | |||
1177 | /* Encode the sector data. */ | ||
1178 | for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) | ||
1179 | { | ||
1180 | usize_t sym = *p; | ||
1181 | usize_t bits = gp_clens[sym]; | ||
1182 | usize_t code = gp_codes[sym]; | ||
1183 | |||
1184 | IF_DEBUG (encode_bits -= bits); | ||
1185 | |||
1186 | if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code))) { goto failure; } | ||
1187 | |||
1188 | GP_PAGE (); | ||
1189 | } | ||
1190 | } | ||
1191 | while (in != NULL); | ||
1192 | |||
1193 | XD3_ASSERT (select_bits == 0); | ||
1194 | XD3_ASSERT (encode_bits == 0); | ||
1195 | |||
1196 | #undef evolve_clen | ||
1197 | } | ||
1198 | } | ||
1199 | |||
1200 | ret = xd3_flush_bits (stream, & output, & bstate); | ||
1201 | |||
1202 | if (0) | ||
1203 | { | ||
1204 | nosecond: | ||
1205 | stream->msg = "secondary compression was inefficient"; | ||
1206 | ret = XD3_NOSECOND; | ||
1207 | } | ||
1208 | |||
1209 | failure: | ||
1210 | |||
1211 | xd3_free (stream, gbest); | ||
1212 | xd3_free (stream, gbest_mtf); | ||
1213 | return ret; | ||
1214 | } | ||
1215 | #endif /* XD3_ENCODER */ | ||
1216 | |||
1217 | /*********************************************************************/ | ||
1218 | /* DECODE */ | ||
1219 | /*********************************************************************/ | ||
1220 | |||
1221 | static void | ||
1222 | djw_build_decoder (xd3_stream *stream, | ||
1223 | usize_t asize, | ||
1224 | usize_t abs_max, | ||
1225 | const uint8_t *clen, | ||
1226 | uint8_t *inorder, | ||
1227 | uint *base, | ||
1228 | uint *limit, | ||
1229 | uint *min_clenp, | ||
1230 | uint *max_clenp) | ||
1231 | { | ||
1232 | int i, l; | ||
1233 | const uint8_t *ci; | ||
1234 | uint nr_clen [DJW_MAX_CODELEN+1]; | ||
1235 | uint tmp_base[DJW_MAX_CODELEN+1]; | ||
1236 | int min_clen; | ||
1237 | int max_clen; | ||
1238 | |||
1239 | /* Assumption: the two temporary arrays are large enough to hold abs_max. */ | ||
1240 | XD3_ASSERT (abs_max <= DJW_MAX_CODELEN); | ||
1241 | |||
1242 | /* This looks something like the start of zlib's inftrees.c */ | ||
1243 | memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1)); | ||
1244 | |||
1245 | /* Count number of each code length */ | ||
1246 | i = asize; | ||
1247 | ci = clen; | ||
1248 | do | ||
1249 | { | ||
1250 | /* Caller _must_ check that values are in-range. Most of the time | ||
1251 | * the caller decodes a specific number of bits, which imply the max value, and the | ||
1252 | * other time the caller decodes a huffman value, which must be in-range. Therefore, | ||
1253 | * its an assertion and this function cannot otherwise fail. */ | ||
1254 | XD3_ASSERT (*ci <= abs_max); | ||
1255 | |||
1256 | nr_clen[*ci++]++; | ||
1257 | } | ||
1258 | while (--i != 0); | ||
1259 | |||
1260 | /* Compute min, max. */ | ||
1261 | for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } } | ||
1262 | min_clen = i; | ||
1263 | for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } } | ||
1264 | max_clen = i; | ||
1265 | |||
1266 | /* Fill the BASE, LIMIT table. */ | ||
1267 | tmp_base[min_clen] = 0; | ||
1268 | base[min_clen] = 0; | ||
1269 | limit[min_clen] = nr_clen[min_clen] - 1; | ||
1270 | for (i = min_clen + 1; i <= max_clen; i += 1) | ||
1271 | { | ||
1272 | uint last_limit = ((limit[i-1] + 1) << 1); | ||
1273 | tmp_base[i] = tmp_base[i-1] + nr_clen[i-1]; | ||
1274 | limit[i] = last_limit + nr_clen[i] - 1; | ||
1275 | base[i] = last_limit - tmp_base[i]; | ||
1276 | } | ||
1277 | |||
1278 | /* Fill the inorder array, canonically ordered codes. */ | ||
1279 | ci = clen; | ||
1280 | for (i = 0; i < asize; i += 1) | ||
1281 | { | ||
1282 | if ((l = *ci++) != 0) | ||
1283 | { | ||
1284 | inorder[tmp_base[l]++] = i; | ||
1285 | } | ||
1286 | } | ||
1287 | |||
1288 | *min_clenp = min_clen; | ||
1289 | *max_clenp = max_clen; | ||
1290 | } | ||
1291 | |||
1292 | static INLINE int | ||
1293 | djw_decode_symbol (xd3_stream *stream, | ||
1294 | bit_state *bstate, | ||
1295 | const uint8_t **input, | ||
1296 | const uint8_t *input_end, | ||
1297 | const uint8_t *inorder, | ||
1298 | const uint *base, | ||
1299 | const uint *limit, | ||
1300 | uint min_clen, | ||
1301 | uint max_clen, | ||
1302 | usize_t *sym, | ||
1303 | usize_t max_sym) | ||
1304 | { | ||
1305 | usize_t code = 0; | ||
1306 | usize_t bits = 0; | ||
1307 | |||
1308 | /* OPT: Supposedly a small lookup table improves speed here... */ | ||
1309 | |||
1310 | /* Code outline is similar to xd3_decode_bits... */ | ||
1311 | if (bstate->cur_mask == 0x100) { goto next_byte; } | ||
1312 | |||
1313 | for (;;) | ||
1314 | { | ||
1315 | do | ||
1316 | { | ||
1317 | if (bits == max_clen) { goto corrupt; } | ||
1318 | |||
1319 | bits += 1; | ||
1320 | code = (code << 1); | ||
1321 | |||
1322 | if (bstate->cur_byte & bstate->cur_mask) { code |= 1; } | ||
1323 | |||
1324 | IF_DEBUG1 (P(RINT "%u", (bstate->cur_byte & bstate->cur_mask) && 1)); | ||
1325 | |||
1326 | bstate->cur_mask <<= 1; | ||
1327 | |||
1328 | if (bits >= min_clen && code <= limit[bits]) { goto done; } | ||
1329 | } | ||
1330 | while (bstate->cur_mask != 0x100); | ||
1331 | |||
1332 | next_byte: | ||
1333 | |||
1334 | if (*input == input_end) | ||
1335 | { | ||
1336 | stream->msg = "secondary decoder end of input"; | ||
1337 | return EINVAL; | ||
1338 | } | ||
1339 | |||
1340 | bstate->cur_byte = *(*input)++; | ||
1341 | bstate->cur_mask = 1; | ||
1342 | } | ||
1343 | |||
1344 | done: | ||
1345 | |||
1346 | if (base[bits] <= code) | ||
1347 | { | ||
1348 | usize_t offset = code - base[bits]; | ||
1349 | |||
1350 | if (offset <= max_sym) | ||
1351 | { | ||
1352 | IF_DEBUG1 (P(RINT " (%u) ", bits)); | ||
1353 | *sym = inorder[offset]; | ||
1354 | return 0; | ||
1355 | } | ||
1356 | } | ||
1357 | |||
1358 | corrupt: | ||
1359 | stream->msg = "secondary decoder invalid code"; | ||
1360 | return EINVAL; | ||
1361 | } | ||
1362 | |||
1363 | static int | ||
1364 | djw_decode_clclen (xd3_stream *stream, | ||
1365 | bit_state *bstate, | ||
1366 | const uint8_t **input, | ||
1367 | const uint8_t *input_end, | ||
1368 | uint8_t *cl_inorder, | ||
1369 | uint *cl_base, | ||
1370 | uint *cl_limit, | ||
1371 | uint *cl_minlen, | ||
1372 | uint *cl_maxlen, | ||
1373 | uint8_t *cl_mtf) | ||
1374 | { | ||
1375 | int ret; | ||
1376 | uint8_t cl_clen[DJW_TOTAL_CODES]; | ||
1377 | usize_t num_codes, value; | ||
1378 | int i; | ||
1379 | |||
1380 | /* How many extra code lengths to encode. */ | ||
1381 | if ((ret = xd3_decode_bits (stream, bstate, input, input_end, DJW_EXTRA_CODE_BITS, & num_codes))) { return ret; } | ||
1382 | |||
1383 | num_codes += DJW_EXTRA_12OFFSET; | ||
1384 | |||
1385 | /* Read num_codes. */ | ||
1386 | for (i = 0; i < num_codes; i += 1) | ||
1387 | { | ||
1388 | if ((ret = xd3_decode_bits (stream, bstate, input, input_end, DJW_CLCLEN_BITS, & value))) { return ret; } | ||
1389 | |||
1390 | cl_clen[i] = value; | ||
1391 | } | ||
1392 | |||
1393 | /* Set the rest to zero. */ | ||
1394 | for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; } | ||
1395 | |||
1396 | /* No need to check for in-range clen values, because: */ | ||
1397 | XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1); | ||
1398 | |||
1399 | /* Build the code-length decoder. */ | ||
1400 | djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN, | ||
1401 | cl_clen, cl_inorder, cl_base, cl_limit, cl_minlen, cl_maxlen); | ||
1402 | |||
1403 | /* Initialize the MTF state. */ | ||
1404 | djw_init_clen_mtf_1_2 (cl_mtf); | ||
1405 | |||
1406 | return 0; | ||
1407 | } | ||
1408 | |||
1409 | static INLINE int | ||
1410 | djw_decode_1_2 (xd3_stream *stream, | ||
1411 | bit_state *bstate, | ||
1412 | const uint8_t **input, | ||
1413 | const uint8_t *input_end, | ||
1414 | const uint8_t *inorder, | ||
1415 | const uint *base, | ||
1416 | const uint *limit, | ||
1417 | const uint *minlen, | ||
1418 | const uint *maxlen, | ||
1419 | uint8_t *mtfvals, | ||
1420 | usize_t elts, | ||
1421 | usize_t skip_offset, | ||
1422 | uint8_t *values) | ||
1423 | { | ||
1424 | usize_t n = 0, rep = 0, mtf = 0, s = 0; | ||
1425 | int ret; | ||
1426 | |||
1427 | while (n < elts) | ||
1428 | { | ||
1429 | /* Special case inside generic code: CLEN only: If not the first group, we already | ||
1430 | * know the zero frequencies. */ | ||
1431 | if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0) | ||
1432 | { | ||
1433 | values[n++] = 0; | ||
1434 | continue; | ||
1435 | } | ||
1436 | |||
1437 | /* Repeat last symbol. */ | ||
1438 | if (rep != 0) | ||
1439 | { | ||
1440 | values[n++] = mtfvals[0]; | ||
1441 | rep -= 1; | ||
1442 | continue; | ||
1443 | } | ||
1444 | |||
1445 | /* Symbol following last repeat code. */ | ||
1446 | if (mtf != 0) | ||
1447 | { | ||
1448 | usize_t sym = djw_update_mtf (mtfvals, mtf); | ||
1449 | values[n++] = sym; | ||
1450 | mtf = 0; | ||
1451 | continue; | ||
1452 | } | ||
1453 | |||
1454 | /* Decode next symbol/repeat code. */ | ||
1455 | if ((ret = djw_decode_symbol (stream, bstate, input, input_end, | ||
1456 | inorder, base, limit, *minlen, *maxlen, | ||
1457 | & mtf, DJW_TOTAL_CODES))) { return ret; } | ||
1458 | |||
1459 | if (mtf <= RUN_1) | ||
1460 | { | ||
1461 | /* Repetition. */ | ||
1462 | rep = ((mtf + 1) << s); | ||
1463 | mtf = 0; | ||
1464 | s += 1; | ||
1465 | } | ||
1466 | else | ||
1467 | { | ||
1468 | /* Remove the RUN_1 MTF offset. */ | ||
1469 | mtf -= 1; | ||
1470 | s = 0; | ||
1471 | } | ||
1472 | } | ||
1473 | |||
1474 | /* If (rep != 0) there were too many codes received. */ | ||
1475 | if (rep != 0) | ||
1476 | { | ||
1477 | stream->msg = "secondary decoder invalid repeat code"; | ||
1478 | return EINVAL; | ||
1479 | } | ||
1480 | |||
1481 | return 0; | ||
1482 | } | ||
1483 | |||
1484 | static INLINE int | ||
1485 | djw_decode_prefix (xd3_stream *stream, | ||
1486 | bit_state *bstate, | ||
1487 | const uint8_t **input, | ||
1488 | const uint8_t *input_end, | ||
1489 | const uint8_t *cl_inorder, | ||
1490 | const uint *cl_base, | ||
1491 | const uint *cl_limit, | ||
1492 | const uint *cl_minlen, | ||
1493 | const uint *cl_maxlen, | ||
1494 | uint8_t *cl_mtf, | ||
1495 | usize_t groups, | ||
1496 | uint8_t *clen) | ||
1497 | { | ||
1498 | return djw_decode_1_2 (stream, bstate, input, input_end, | ||
1499 | cl_inorder, cl_base, cl_limit, cl_minlen, cl_maxlen, cl_mtf, | ||
1500 | ALPHABET_SIZE * groups, ALPHABET_SIZE, clen); | ||
1501 | } | ||
1502 | |||
1503 | static int | ||
1504 | xd3_decode_huff (xd3_stream *stream, | ||
1505 | djw_stream *h, | ||
1506 | const uint8_t **input_pos, | ||
1507 | const uint8_t *const input_end, | ||
1508 | uint8_t **output_pos, | ||
1509 | const uint8_t *const output_end) | ||
1510 | { | ||
1511 | const uint8_t *input = *input_pos; | ||
1512 | uint8_t *output = *output_pos; | ||
1513 | bit_state bstate = BIT_STATE_DECODE_INIT; | ||
1514 | uint8_t *sel_group = NULL; | ||
1515 | usize_t groups, gp; | ||
1516 | usize_t output_bytes = (output_end - output); | ||
1517 | usize_t sector_size; | ||
1518 | usize_t sectors; | ||
1519 | int ret; | ||
1520 | |||
1521 | /* Invalid input. */ | ||
1522 | if (output_bytes == 0) | ||
1523 | { | ||
1524 | stream->msg = "secondary decoder invalid input"; | ||
1525 | return EINVAL; | ||
1526 | } | ||
1527 | |||
1528 | /* Decode: number of groups */ | ||
1529 | if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_GROUP_BITS, & groups))) { goto fail; } | ||
1530 | |||
1531 | groups += 1; | ||
1532 | |||
1533 | if (groups > 1) | ||
1534 | { | ||
1535 | /* Decode: group size */ | ||
1536 | if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_SECTORSZ_BITS, & sector_size))) { goto fail; } | ||
1537 | |||
1538 | sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT; | ||
1539 | } | ||
1540 | else | ||
1541 | { | ||
1542 | /* Default for groups == 1 */ | ||
1543 | sector_size = output_bytes; | ||
1544 | } | ||
1545 | |||
1546 | sectors = 1 + (output_bytes - 1) / sector_size; | ||
1547 | |||
1548 | /* @!@ In the case of groups==1, lots of extra stack space gets used here. Could | ||
1549 | * dynamically allocate this memory, which would help with excess parameter passing, | ||
1550 | * too. Passing too many parameters in this file, simplify it! */ | ||
1551 | |||
1552 | /* Outer scope: per-group symbol decoder tables. */ | ||
1553 | { | ||
1554 | uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE]; | ||
1555 | uint base [DJW_MAX_GROUPS][DJW_MAX_CODELEN+1]; | ||
1556 | uint limit [DJW_MAX_GROUPS][DJW_MAX_CODELEN+1]; | ||
1557 | uint minlen [DJW_MAX_GROUPS]; | ||
1558 | uint maxlen [DJW_MAX_GROUPS]; | ||
1559 | |||
1560 | /* Nested scope: code length decoder tables. */ | ||
1561 | { | ||
1562 | uint8_t clen [DJW_MAX_GROUPS][ALPHABET_SIZE]; | ||
1563 | uint8_t cl_inorder[DJW_TOTAL_CODES]; | ||
1564 | uint cl_base [DJW_MAX_CLCLEN+1]; | ||
1565 | uint cl_limit [DJW_MAX_CLCLEN+1]; | ||
1566 | uint8_t cl_mtf [DJW_TOTAL_CODES]; | ||
1567 | uint cl_minlen; | ||
1568 | uint cl_maxlen; | ||
1569 | |||
1570 | /* Compute the code length decoder. */ | ||
1571 | if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end, | ||
1572 | cl_inorder, cl_base, cl_limit, & cl_minlen, | ||
1573 | & cl_maxlen, cl_mtf))) { goto fail; } | ||
1574 | |||
1575 | /* Now decode each group decoder. */ | ||
1576 | if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end, | ||
1577 | cl_inorder, cl_base, cl_limit, | ||
1578 | & cl_minlen, & cl_maxlen, cl_mtf, | ||
1579 | groups, clen[0]))) { goto fail; } | ||
1580 | |||
1581 | /* Prepare the actual decoding tables. */ | ||
1582 | for (gp = 0; gp < groups; gp += 1) | ||
1583 | { | ||
1584 | djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN, | ||
1585 | clen[gp], inorder[gp], base[gp], limit[gp], | ||
1586 | & minlen[gp], & maxlen[gp]); | ||
1587 | } | ||
1588 | } | ||
1589 | |||
1590 | /* Decode: selector clens. */ | ||
1591 | { | ||
1592 | uint8_t sel_inorder[DJW_MAX_GROUPS+1]; | ||
1593 | uint sel_base [DJW_MAX_GBCLEN+1]; | ||
1594 | uint sel_limit [DJW_MAX_GBCLEN+1]; | ||
1595 | uint8_t sel_mtf [DJW_MAX_GROUPS+1]; | ||
1596 | uint sel_minlen; | ||
1597 | uint sel_maxlen; | ||
1598 | |||
1599 | /* Setup group selection. */ | ||
1600 | if (groups > 1) | ||
1601 | { | ||
1602 | uint8_t sel_clen[DJW_MAX_GROUPS+1]; | ||
1603 | |||
1604 | for (gp = 0; gp < groups+1; gp += 1) | ||
1605 | { | ||
1606 | usize_t value; | ||
1607 | |||
1608 | if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_GBCLEN_BITS, & value))) { goto fail; } | ||
1609 | |||
1610 | sel_clen[gp] = value; | ||
1611 | sel_mtf[gp] = gp; | ||
1612 | } | ||
1613 | |||
1614 | if ((sel_group = xd3_alloc (stream, sectors, 1)) == NULL) { ret = ENOMEM; goto fail; } | ||
1615 | |||
1616 | djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen, | ||
1617 | sel_inorder, sel_base, sel_limit, & sel_minlen, & sel_maxlen); | ||
1618 | |||
1619 | if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end, | ||
1620 | sel_inorder, sel_base, sel_limit, & sel_minlen, & sel_maxlen, sel_mtf, | ||
1621 | sectors, 0, sel_group))) { goto fail; } | ||
1622 | } | ||
1623 | |||
1624 | /* Now decode each sector. */ | ||
1625 | { | ||
1626 | uint8_t *gp_inorder = inorder[0]; /* Initialize for (groups==1) case. */ | ||
1627 | uint *gp_base = base[0]; | ||
1628 | uint *gp_limit = limit[0]; | ||
1629 | uint gp_minlen = minlen[0]; | ||
1630 | uint gp_maxlen = maxlen[0]; | ||
1631 | usize_t c; | ||
1632 | |||
1633 | for (c = 0; c < sectors; c += 1) | ||
1634 | { | ||
1635 | usize_t n; | ||
1636 | |||
1637 | if (groups >= 2) | ||
1638 | { | ||
1639 | gp = sel_group[c]; | ||
1640 | |||
1641 | XD3_ASSERT (gp < groups); | ||
1642 | |||
1643 | gp_inorder = inorder[gp]; | ||
1644 | gp_base = base[gp]; | ||
1645 | gp_limit = limit[gp]; | ||
1646 | gp_minlen = minlen[gp]; | ||
1647 | gp_maxlen = maxlen[gp]; | ||
1648 | } | ||
1649 | |||
1650 | XD3_ASSERT (output_end - output > 0); | ||
1651 | |||
1652 | /* Decode next sector. */ | ||
1653 | n = min (sector_size, (usize_t) (output_end - output)); | ||
1654 | |||
1655 | do | ||
1656 | { | ||
1657 | usize_t sym; | ||
1658 | |||
1659 | if ((ret = djw_decode_symbol (stream, & bstate, & input, input_end, | ||
1660 | gp_inorder, gp_base, gp_limit, gp_minlen, gp_maxlen, | ||
1661 | & sym, ALPHABET_SIZE))) { goto fail; } | ||
1662 | |||
1663 | *output++ = sym; | ||
1664 | } | ||
1665 | while (--n); | ||
1666 | } | ||
1667 | } | ||
1668 | } | ||
1669 | } | ||
1670 | |||
1671 | IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate))) { goto fail; }); | ||
1672 | XD3_ASSERT (ret == 0); | ||
1673 | |||
1674 | fail: | ||
1675 | xd3_free (stream, sel_group); | ||
1676 | |||
1677 | (*input_pos) = input; | ||
1678 | (*output_pos) = output; | ||
1679 | return ret; | ||
1680 | } | ||
1681 | |||
1682 | /*********************************************************************/ | ||
1683 | /* TUNING */ | ||
1684 | /*********************************************************************/ | ||
1685 | |||
1686 | #if TUNE_HUFFMAN && XD3_ENCODER | ||
1687 | #include <stdio.h> | ||
1688 | #include "xdelta3-fgk.h" | ||
1689 | |||
1690 | static uint | ||
1691 | xd3_bitsof_output (xd3_output *output, bit_state *bstate) | ||
1692 | { | ||
1693 | uint x = 0; | ||
1694 | uint m = bstate->cur_mask; | ||
1695 | |||
1696 | while (m != 1) | ||
1697 | { | ||
1698 | x += 1; | ||
1699 | m >>= 1; | ||
1700 | } | ||
1701 | |||
1702 | return x + 8 * xd3_sizeof_output (output); | ||
1703 | } | ||
1704 | |||
1705 | static const char* xd3_sect_type (xd3_section_type type) | ||
1706 | { | ||
1707 | switch (type) | ||
1708 | { | ||
1709 | case DATA_SECTION: return "DATA"; | ||
1710 | case INST_SECTION: return "INST"; | ||
1711 | case ADDR_SECTION: return "ADDR"; | ||
1712 | } | ||
1713 | abort (); | ||
1714 | } | ||
1715 | |||
1716 | static int | ||
1717 | xd3_encode_huff (xd3_stream *stream, | ||
1718 | djw_stream *h, | ||
1719 | xd3_output *input, | ||
1720 | xd3_output *unused_output, | ||
1721 | xd3_sec_cfg *cfg) | ||
1722 | { | ||
1723 | int ret = 0; | ||
1724 | int input_size = xd3_sizeof_output (input); | ||
1725 | static int hdr = 0; | ||
1726 | const char *sect_type = xd3_sect_type (cfg->data_type); | ||
1727 | xd3_output *output; | ||
1728 | usize_t output_size; | ||
1729 | |||
1730 | if (hdr == 0) { hdr = 1; P(RINT "____ SECT INSZ SECTORSZ GPNO OUTSZ PREFIX SELECT ENCODE\n"); } | ||
1731 | |||
1732 | P(RINT "SECTION %s %u\n", sect_type, input_size); | ||
1733 | |||
1734 | { | ||
1735 | int gp, i; | ||
1736 | int best_size = 99999999; | ||
1737 | usize_t best_prefix = 0, best_select = 0, best_encode = 0, best_sector_size = 0; | ||
1738 | int best_gpno = -1; | ||
1739 | const char *t12 = "12"; | ||
1740 | usize_t clen_count[DJW_MAX_CODELEN+1]; | ||
1741 | djw_weight best_freq[DJW_TOTAL_CODES]; | ||
1742 | |||
1743 | for (cfg->ngroups = 1; cfg->ngroups <= /*1*/ DJW_MAX_GROUPS; cfg->ngroups += 1) | ||
1744 | { | ||
1745 | for (cfg->sector_size = 10; cfg->sector_size <= DJW_SECTORSZ_MAX; cfg->sector_size += 10) | ||
1746 | { | ||
1747 | output = xd3_alloc_output (stream, NULL); | ||
1748 | |||
1749 | if ((ret = xd3_real_encode_huff (stream, h, input, output, cfg))) { goto fail; } | ||
1750 | |||
1751 | output_size = xd3_sizeof_output (output); | ||
1752 | |||
1753 | if (output_size < best_size) | ||
1754 | { | ||
1755 | best_size = output_size; | ||
1756 | best_gpno = cfg->ngroups; | ||
1757 | best_prefix = tune_prefix_bits; | ||
1758 | best_select = tune_select_bits; | ||
1759 | best_encode = tune_encode_bits; | ||
1760 | best_sector_size = cfg->sector_size; | ||
1761 | memset (clen_count, 0, sizeof (clen_count)); | ||
1762 | |||
1763 | for (gp = 0; gp < cfg->ngroups; gp += 1) | ||
1764 | { | ||
1765 | for (i = 0; i < ALPHABET_SIZE; i += 1) | ||
1766 | { | ||
1767 | clen_count[tune_clen[gp][i]] += 1; | ||
1768 | } | ||
1769 | } | ||
1770 | |||
1771 | memcpy (best_freq, tune_freq, sizeof (tune_freq)); | ||
1772 | |||
1773 | XD3_ASSERT (sizeof (tune_freq) == sizeof (mtf_freq)); | ||
1774 | } | ||
1775 | |||
1776 | if (1) | ||
1777 | { | ||
1778 | P(RINT "COMP%s %u %u %u %u %u %u\n", | ||
1779 | t12, cfg->ngroups, cfg->sector_size, | ||
1780 | output_size, tune_prefix_bits, tune_select_bits, tune_encode_bits); | ||
1781 | } | ||
1782 | else | ||
1783 | { | ||
1784 | fail: | ||
1785 | P(RINT "COMP%s %u %u %u %u %u %u\n", | ||
1786 | t12, cfg->ngroups, cfg->sector_size, | ||
1787 | input_size, 0, 0, 0); | ||
1788 | } | ||
1789 | |||
1790 | xd3_free_output (stream, output); | ||
1791 | |||
1792 | XD3_ASSERT (ret == 0 || ret == XD3_NOSECOND); | ||
1793 | |||
1794 | if (cfg->ngroups == 1) { break; } | ||
1795 | } | ||
1796 | } | ||
1797 | |||
1798 | if (best_gpno > 0) | ||
1799 | { | ||
1800 | P(RINT "BEST%s %u %u %u %u %u %u\n", | ||
1801 | t12, best_gpno, best_sector_size, | ||
1802 | best_size, best_prefix, best_select, best_encode); | ||
1803 | |||
1804 | #if 0 | ||
1805 | P(RINT "CLEN%s ", t12); | ||
1806 | for (i = 1; i <= DJW_MAX_CODELEN; i += 1) | ||
1807 | { | ||
1808 | P(RINT "%u ", clen_count[i]); | ||
1809 | } | ||
1810 | P(RINT "\n"); | ||
1811 | |||
1812 | P(RINT "FREQ%s ", t12); | ||
1813 | for (i = 0; i < DJW_TOTAL_CODES; i += 1) | ||
1814 | { | ||
1815 | P(RINT "%u ", tune_freq[i]); | ||
1816 | } | ||
1817 | P(RINT "\n"); | ||
1818 | #endif | ||
1819 | } | ||
1820 | } | ||
1821 | |||
1822 | /* Compare to split single-table windows. */ | ||
1823 | { | ||
1824 | int parts, i; | ||
1825 | |||
1826 | cfg->ngroups = 1; | ||
1827 | |||
1828 | for (parts = 2; parts <= DJW_MAX_GROUPS; parts += 1) | ||
1829 | { | ||
1830 | usize_t part_size = input_size / parts; | ||
1831 | xd3_output *inp = input, *partin, *partin_head; | ||
1832 | usize_t off = 0; | ||
1833 | usize_t part_total = 0; | ||
1834 | |||
1835 | if (part_size < 1000) { break; } | ||
1836 | |||
1837 | for (i = 0; i < parts; i += 1) | ||
1838 | { | ||
1839 | usize_t inc; | ||
1840 | |||
1841 | partin = partin_head = xd3_alloc_output (stream, NULL); | ||
1842 | output = xd3_alloc_output (stream, NULL); | ||
1843 | |||
1844 | for (inc = 0; ((i < parts-1) && inc < part_size) || | ||
1845 | ((i == parts-1) && inp != NULL); ) | ||
1846 | { | ||
1847 | usize_t take; | ||
1848 | |||
1849 | if (i < parts-1) | ||
1850 | { | ||
1851 | take = min (part_size - inc, inp->next - off); | ||
1852 | } | ||
1853 | else | ||
1854 | { | ||
1855 | take = inp->next - off; | ||
1856 | } | ||
1857 | |||
1858 | ret = xd3_emit_bytes (stream, & partin, inp->base + off, take); | ||
1859 | |||
1860 | off += take; | ||
1861 | inc += take; | ||
1862 | |||
1863 | if (off == inp->next) | ||
1864 | { | ||
1865 | inp = inp->next_page; | ||
1866 | off = 0; | ||
1867 | } | ||
1868 | } | ||
1869 | |||
1870 | ret = xd3_real_encode_huff (stream, h, partin_head, output, cfg); | ||
1871 | |||
1872 | part_total += xd3_sizeof_output (output); | ||
1873 | |||
1874 | xd3_free_output (stream, partin_head); | ||
1875 | xd3_free_output (stream, output); | ||
1876 | |||
1877 | XD3_ASSERT (ret == 0 || ret == XD3_NOSECOND); | ||
1878 | |||
1879 | if (ret == XD3_NOSECOND) | ||
1880 | { | ||
1881 | break; | ||
1882 | } | ||
1883 | } | ||
1884 | |||
1885 | if (ret != XD3_NOSECOND) | ||
1886 | { | ||
1887 | P(RINT "PART %u %u\n", parts, part_total); | ||
1888 | } | ||
1889 | } | ||
1890 | } | ||
1891 | |||
1892 | /* Compare to FGK */ | ||
1893 | { | ||
1894 | fgk_stream *fgk = fgk_alloc (stream); | ||
1895 | |||
1896 | fgk_init (fgk); | ||
1897 | |||
1898 | output = xd3_alloc_output (stream, NULL); | ||
1899 | |||
1900 | ret = xd3_encode_fgk (stream, fgk, input, output, NULL); | ||
1901 | |||
1902 | output_size = xd3_sizeof_output (output); | ||
1903 | xd3_free_output (stream, output); | ||
1904 | fgk_destroy (stream, fgk); | ||
1905 | |||
1906 | XD3_ASSERT (ret == 0); | ||
1907 | |||
1908 | P(RINT "FGK %u\n", output_size); | ||
1909 | } | ||
1910 | |||
1911 | P(RINT "END_SECTION %s %u\n", sect_type, input_size); | ||
1912 | |||
1913 | return 0; | ||
1914 | } | ||
1915 | #endif | ||
1916 | |||
1917 | #endif | ||
diff --git a/xdelta3/xdelta3-fgk.h b/xdelta3/xdelta3-fgk.h new file mode 100755 index 0000000..a19d65c --- /dev/null +++ b/xdelta3/xdelta3-fgk.h | |||
@@ -0,0 +1,851 @@ | |||
1 | /* xdelta 3 - delta compression tools and library | ||
2 | * Copyright (C) 2002 and onward. Joshua P. MacDonald | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | /* For demonstration purposes only. | ||
20 | */ | ||
21 | |||
22 | #ifndef _XDELTA3_FGK_h_ | ||
23 | #define _XDELTA3_FGK_h_ | ||
24 | |||
25 | /* An implementation of the FGK algorithm described by D.E. Knuth in "Dynamic Huffman | ||
26 | * Coding" in Journal of Algorithms 6. */ | ||
27 | |||
28 | /* A 32bit counter (fgk_weight) is used as the frequency counter for nodes in the huffman | ||
29 | * tree. @!@ Need to test for overflow and/or reset stats. */ | ||
30 | |||
31 | typedef struct _fgk_stream fgk_stream; | ||
32 | typedef struct _fgk_node fgk_node; | ||
33 | typedef struct _fgk_block fgk_block; | ||
34 | typedef unsigned int fgk_bit; | ||
35 | typedef uint32_t fgk_weight; | ||
36 | |||
37 | struct _fgk_block { | ||
38 | union { | ||
39 | fgk_node *un_leader; | ||
40 | fgk_block *un_freeptr; | ||
41 | } un; | ||
42 | }; | ||
43 | |||
44 | #define block_leader un.un_leader | ||
45 | #define block_freeptr un.un_freeptr | ||
46 | |||
47 | /* The code can also support fixed huffman encoding/decoding. */ | ||
48 | #define IS_ADAPTIVE 1 | ||
49 | |||
50 | /* weight is a count of the number of times this element has been seen in the current | ||
51 | * encoding/decoding. parent, right_child, and left_child are pointers defining the tree | ||
52 | * structure. right and left point to neighbors in an ordered sequence of | ||
53 | * weights. The left child of a node is always guaranteed to have weight not greater than | ||
54 | * its sibling. fgk_blockLeader points to the element with the same weight as itself which is | ||
55 | * closest to the next increasing weight block. */ | ||
56 | struct _fgk_node | ||
57 | { | ||
58 | fgk_weight weight; | ||
59 | fgk_node *parent; | ||
60 | fgk_node *left_child; | ||
61 | fgk_node *right_child; | ||
62 | fgk_node *left; | ||
63 | fgk_node *right; | ||
64 | fgk_block *my_block; | ||
65 | }; | ||
66 | |||
67 | /* alphabet_size is the a count of the number of possible leaves in the huffman tree. The | ||
68 | * number of total nodes counting internal nodes is ((2 * alphabet_size) - 1). | ||
69 | * zero_freq_count is the number of elements remaining which have zero frequency. | ||
70 | * zero_freq_exp and zero_freq_rem satisfy the equation zero_freq_count = 2^zero_freq_exp + | ||
71 | * zero_freq_rem. root_node is the root of the tree, which is initialized to a node with | ||
72 | * zero frequency and contains the 0th such element. free_node contains a pointer to the | ||
73 | * next available fgk_node space. alphabet contains all the elements and is indexed by N. | ||
74 | * remaining_zeros points to the head of the list of zeros. */ | ||
75 | struct _fgk_stream | ||
76 | { | ||
77 | int alphabet_size; | ||
78 | int zero_freq_count; | ||
79 | int zero_freq_exp; | ||
80 | int zero_freq_rem; | ||
81 | int coded_depth; | ||
82 | |||
83 | int total_nodes; | ||
84 | int total_blocks; | ||
85 | |||
86 | fgk_bit *coded_bits; | ||
87 | |||
88 | fgk_block *block_array; | ||
89 | fgk_block *free_block; | ||
90 | |||
91 | fgk_node *decode_ptr; | ||
92 | fgk_node *remaining_zeros; | ||
93 | fgk_node *alphabet; | ||
94 | fgk_node *root_node; | ||
95 | fgk_node *free_node; | ||
96 | }; | ||
97 | |||
98 | /*********************************************************************/ | ||
99 | /* Encoder */ | ||
100 | /*********************************************************************/ | ||
101 | |||
102 | static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size */); | ||
103 | static void fgk_init (fgk_stream *h); | ||
104 | static int fgk_encode_data (fgk_stream *h, | ||
105 | int n); | ||
106 | static INLINE fgk_bit fgk_get_encoded_bit (fgk_stream *h); | ||
107 | |||
108 | static int xd3_encode_fgk (xd3_stream *stream, | ||
109 | fgk_stream *sec_stream, | ||
110 | xd3_output *input, | ||
111 | xd3_output *output, | ||
112 | xd3_sec_cfg *cfg); | ||
113 | |||
114 | /*********************************************************************/ | ||
115 | /* Decoder */ | ||
116 | /*********************************************************************/ | ||
117 | |||
118 | static INLINE int fgk_decode_bit (fgk_stream *h, | ||
119 | fgk_bit b); | ||
120 | static int fgk_decode_data (fgk_stream *h); | ||
121 | static void fgk_destroy (xd3_stream *stream, | ||
122 | fgk_stream *h); | ||
123 | |||
124 | static int xd3_decode_fgk (xd3_stream *stream, | ||
125 | fgk_stream *sec_stream, | ||
126 | const uint8_t **input, | ||
127 | const uint8_t *const input_end, | ||
128 | uint8_t **output, | ||
129 | const uint8_t *const output_end); | ||
130 | |||
131 | /*********************************************************************/ | ||
132 | /* Private */ | ||
133 | /*********************************************************************/ | ||
134 | |||
135 | static unsigned int fgk_find_nth_zero (fgk_stream *h, int n); | ||
136 | static int fgk_nth_zero (fgk_stream *h, int n); | ||
137 | static void fgk_update_tree (fgk_stream *h, int n); | ||
138 | static fgk_node* fgk_increase_zero_weight (fgk_stream *h, int n); | ||
139 | static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node); | ||
140 | static void fgk_move_right (fgk_stream *h, fgk_node *node); | ||
141 | static void fgk_promote (fgk_stream *h, fgk_node *node); | ||
142 | static void fgk_init_node (fgk_node *node, int i, int size); | ||
143 | static fgk_block* fgk_make_block (fgk_stream *h, fgk_node *l); | ||
144 | static void fgk_free_block (fgk_stream *h, fgk_block *b); | ||
145 | static void fgk_factor_remaining (fgk_stream *h); | ||
146 | static INLINE void fgk_swap_ptrs (fgk_node **one, fgk_node **two); | ||
147 | |||
148 | /*********************************************************************/ | ||
149 | /* Basic Routines */ | ||
150 | /*********************************************************************/ | ||
151 | |||
152 | /* returns an initialized huffman encoder for an alphabet with the | ||
153 | * given size. returns NULL if enough memory cannot be allocated */ | ||
154 | static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size0 */) | ||
155 | { | ||
156 | int alphabet_size0 = ALPHABET_SIZE; | ||
157 | fgk_stream *h; | ||
158 | |||
159 | if ((h = (fgk_stream*) xd3_alloc (stream, 1, sizeof (fgk_stream))) == NULL) | ||
160 | { | ||
161 | return NULL; | ||
162 | } | ||
163 | |||
164 | h->total_nodes = (2 * alphabet_size0) - 1; | ||
165 | h->total_blocks = (2 * h->total_nodes); | ||
166 | h->alphabet = (fgk_node*) xd3_alloc (stream, h->total_nodes, sizeof (fgk_node)); | ||
167 | h->block_array = (fgk_block*) xd3_alloc (stream, h->total_blocks, sizeof (fgk_block)); | ||
168 | h->coded_bits = (fgk_bit*) xd3_alloc (stream, alphabet_size0, sizeof (fgk_bit)); | ||
169 | |||
170 | if (h->coded_bits == NULL || | ||
171 | h->alphabet == NULL || | ||
172 | h->block_array == NULL) | ||
173 | { | ||
174 | fgk_destroy (stream, h); | ||
175 | return NULL; | ||
176 | } | ||
177 | |||
178 | h->alphabet_size = alphabet_size0; | ||
179 | |||
180 | return h; | ||
181 | } | ||
182 | |||
183 | static void fgk_init (fgk_stream *h) | ||
184 | { | ||
185 | int i; | ||
186 | |||
187 | h->root_node = h->alphabet; | ||
188 | h->decode_ptr = h->root_node; | ||
189 | h->free_node = h->alphabet + h->alphabet_size; | ||
190 | h->remaining_zeros = h->alphabet; | ||
191 | h->coded_depth = 0; | ||
192 | h->zero_freq_count = h->alphabet_size + 2; | ||
193 | |||
194 | /* after two calls to factor_remaining, zero_freq_count == alphabet_size */ | ||
195 | fgk_factor_remaining(h); /* set ZFE and ZFR */ | ||
196 | fgk_factor_remaining(h); /* set ZFDB according to prev state */ | ||
197 | |||
198 | IF_DEBUG (memset (h->alphabet, 0, sizeof (h->alphabet[0]) * h->total_nodes)); | ||
199 | |||
200 | for (i = 0; i < h->total_blocks-1; i += 1) | ||
201 | { | ||
202 | h->block_array[i].block_freeptr = &h->block_array[i + 1]; | ||
203 | } | ||
204 | |||
205 | h->block_array[h->total_blocks - 1].block_freeptr = NULL; | ||
206 | h->free_block = h->block_array; | ||
207 | |||
208 | /* Zero frequency nodes are inserted in the first alphabet_size | ||
209 | * positions, with Value, weight, and a pointer to the next zero | ||
210 | * frequency node. */ | ||
211 | for (i = h->alphabet_size - 1; i >= 0; i -= 1) | ||
212 | { | ||
213 | fgk_init_node (h->alphabet + i, i, h->alphabet_size); | ||
214 | } | ||
215 | } | ||
216 | |||
217 | static void fgk_swap_ptrs(fgk_node **one, fgk_node **two) | ||
218 | { | ||
219 | fgk_node *tmp = *one; | ||
220 | *one = *two; | ||
221 | *two = tmp; | ||
222 | } | ||
223 | |||
224 | /* Takes huffman transmitter h and n, the nth elt in the alphabet, and | ||
225 | * returns the number of required to encode n. */ | ||
226 | static int fgk_encode_data (fgk_stream* h, int n) | ||
227 | { | ||
228 | fgk_node *target_ptr = h->alphabet + n; | ||
229 | |||
230 | XD3_ASSERT (n < h->alphabet_size); | ||
231 | |||
232 | h->coded_depth = 0; | ||
233 | |||
234 | /* First encode the binary representation of the nth remaining | ||
235 | * zero frequency element in reverse such that bit, which will be | ||
236 | * encoded from h->coded_depth down to 0 will arrive in increasing | ||
237 | * order following the tree path. If there is only one left, it | ||
238 | * is not neccesary to encode these bits. */ | ||
239 | if (IS_ADAPTIVE && target_ptr->weight == 0) | ||
240 | { | ||
241 | unsigned int where, shift; | ||
242 | int bits; | ||
243 | |||
244 | where = fgk_find_nth_zero(h, n); | ||
245 | shift = 1; | ||
246 | |||
247 | if (h->zero_freq_rem == 0) | ||
248 | { | ||
249 | bits = h->zero_freq_exp; | ||
250 | } | ||
251 | else | ||
252 | { | ||
253 | bits = h->zero_freq_exp + 1; | ||
254 | } | ||
255 | |||
256 | while (bits > 0) | ||
257 | { | ||
258 | h->coded_bits[h->coded_depth++] = (shift & where) && 1; | ||
259 | |||
260 | bits -= 1; | ||
261 | shift <<= 1; | ||
262 | }; | ||
263 | |||
264 | target_ptr = h->remaining_zeros; | ||
265 | } | ||
266 | |||
267 | /* The path from root to node is filled into coded_bits in reverse so | ||
268 | * that it is encoded in the right order */ | ||
269 | while (target_ptr != h->root_node) | ||
270 | { | ||
271 | h->coded_bits[h->coded_depth++] = (target_ptr->parent->right_child == target_ptr); | ||
272 | |||
273 | target_ptr = target_ptr->parent; | ||
274 | } | ||
275 | |||
276 | if (IS_ADAPTIVE) | ||
277 | { | ||
278 | fgk_update_tree(h, n); | ||
279 | } | ||
280 | |||
281 | return h->coded_depth; | ||
282 | } | ||
283 | |||
284 | /* Should be called as many times as fgk_encode_data returns. | ||
285 | */ | ||
286 | static INLINE fgk_bit fgk_get_encoded_bit (fgk_stream *h) | ||
287 | { | ||
288 | XD3_ASSERT (h->coded_depth > 0); | ||
289 | |||
290 | return h->coded_bits[--h->coded_depth]; | ||
291 | } | ||
292 | |||
293 | /* This procedure updates the tree after alphabet[n] has been encoded | ||
294 | * or decoded. | ||
295 | */ | ||
296 | static void fgk_update_tree (fgk_stream *h, int n) | ||
297 | { | ||
298 | fgk_node *incr_node; | ||
299 | |||
300 | if (h->alphabet[n].weight == 0) | ||
301 | { | ||
302 | incr_node = fgk_increase_zero_weight (h, n); | ||
303 | } | ||
304 | else | ||
305 | { | ||
306 | incr_node = h->alphabet + n; | ||
307 | } | ||
308 | |||
309 | while (incr_node != h->root_node) | ||
310 | { | ||
311 | fgk_move_right (h, incr_node); | ||
312 | fgk_promote (h, incr_node); | ||
313 | incr_node->weight += 1; /* incr the parent */ | ||
314 | incr_node = incr_node->parent; /* repeat */ | ||
315 | } | ||
316 | |||
317 | h->root_node->weight += 1; | ||
318 | } | ||
319 | |||
320 | static void fgk_move_right (fgk_stream *h, fgk_node *move_fwd) | ||
321 | { | ||
322 | fgk_node **fwd_par_ptr, **back_par_ptr; | ||
323 | fgk_node *move_back, *tmp; | ||
324 | |||
325 | move_back = move_fwd->my_block->block_leader; | ||
326 | |||
327 | if (move_fwd == move_back || | ||
328 | move_fwd->parent == move_back || | ||
329 | move_fwd->weight == 0) | ||
330 | { | ||
331 | return; | ||
332 | } | ||
333 | |||
334 | move_back->right->left = move_fwd; | ||
335 | |||
336 | if (move_fwd->left) | ||
337 | { | ||
338 | move_fwd->left->right = move_back; | ||
339 | } | ||
340 | |||
341 | tmp = move_fwd->right; | ||
342 | move_fwd->right = move_back->right; | ||
343 | |||
344 | if (tmp == move_back) | ||
345 | { | ||
346 | move_back->right = move_fwd; | ||
347 | } | ||
348 | else | ||
349 | { | ||
350 | tmp->left = move_back; | ||
351 | move_back->right = tmp; | ||
352 | } | ||
353 | |||
354 | tmp = move_back->left; | ||
355 | move_back->left = move_fwd->left; | ||
356 | |||
357 | if (tmp == move_fwd) | ||
358 | { | ||
359 | move_fwd->left = move_back; | ||
360 | } | ||
361 | else | ||
362 | { | ||
363 | tmp->right = move_fwd; | ||
364 | move_fwd->left = tmp; | ||
365 | } | ||
366 | |||
367 | if (move_fwd->parent->right_child == move_fwd) | ||
368 | { | ||
369 | fwd_par_ptr = &move_fwd->parent->right_child; | ||
370 | } | ||
371 | else | ||
372 | { | ||
373 | fwd_par_ptr = &move_fwd->parent->left_child; | ||
374 | } | ||
375 | |||
376 | if (move_back->parent->right_child == move_back) | ||
377 | { | ||
378 | back_par_ptr = &move_back->parent->right_child; | ||
379 | } | ||
380 | else | ||
381 | { | ||
382 | back_par_ptr = &move_back->parent->left_child; | ||
383 | } | ||
384 | |||
385 | fgk_swap_ptrs (&move_fwd->parent, &move_back->parent); | ||
386 | fgk_swap_ptrs (fwd_par_ptr, back_par_ptr); | ||
387 | |||
388 | move_fwd->my_block->block_leader = move_fwd; | ||
389 | } | ||
390 | |||
391 | /* Shifts node, the leader of its block, into the next block. */ | ||
392 | static void fgk_promote (fgk_stream *h, fgk_node *node) | ||
393 | { | ||
394 | fgk_node *my_left, *my_right; | ||
395 | fgk_block *cur_block; | ||
396 | |||
397 | my_right = node->right; | ||
398 | my_left = node->left; | ||
399 | cur_block = node->my_block; | ||
400 | |||
401 | if (node->weight == 0) | ||
402 | { | ||
403 | return; | ||
404 | } | ||
405 | |||
406 | /* if left is right child, parent of remaining zeros case (?), means parent | ||
407 | * has same weight as right child. */ | ||
408 | if (my_left == node->right_child && | ||
409 | node->left_child && | ||
410 | node->left_child->weight == 0) | ||
411 | { | ||
412 | XD3_ASSERT (node->left_child == h->remaining_zeros); | ||
413 | XD3_ASSERT (node->right_child->weight == (node->weight+1)); /* child weight was already incremented */ | ||
414 | |||
415 | if (node->weight == (my_right->weight - 1) && my_right != h->root_node) | ||
416 | { | ||
417 | fgk_free_block (h, cur_block); | ||
418 | node->my_block = my_right->my_block; | ||
419 | my_left->my_block = my_right->my_block; | ||
420 | } | ||
421 | |||
422 | return; | ||
423 | } | ||
424 | |||
425 | if (my_left == h->remaining_zeros) | ||
426 | { | ||
427 | return; | ||
428 | } | ||
429 | |||
430 | /* true if not the leftmost node */ | ||
431 | if (my_left->my_block == cur_block) | ||
432 | { | ||
433 | my_left->my_block->block_leader = my_left; | ||
434 | } | ||
435 | else | ||
436 | { | ||
437 | fgk_free_block (h, cur_block); | ||
438 | } | ||
439 | |||
440 | /* node->parent != my_right */ | ||
441 | if ((node->weight == (my_right->weight - 1)) && (my_right != h->root_node)) | ||
442 | { | ||
443 | node->my_block = my_right->my_block; | ||
444 | } | ||
445 | else | ||
446 | { | ||
447 | node->my_block = fgk_make_block (h, node); | ||
448 | } | ||
449 | } | ||
450 | |||
451 | /* When an element is seen the first time this is called to remove it from the list of | ||
452 | * zero weight elements and introduce a new internal node to the tree. */ | ||
453 | static fgk_node* fgk_increase_zero_weight (fgk_stream *h, int n) | ||
454 | { | ||
455 | fgk_node *this_zero, *new_internal, *zero_ptr; | ||
456 | |||
457 | this_zero = h->alphabet + n; | ||
458 | |||
459 | if (h->zero_freq_count == 1) | ||
460 | { | ||
461 | /* this is the last one */ | ||
462 | this_zero->right_child = NULL; | ||
463 | |||
464 | if (this_zero->right->weight == 1) | ||
465 | { | ||
466 | this_zero->my_block = this_zero->right->my_block; | ||
467 | } | ||
468 | else | ||
469 | { | ||
470 | this_zero->my_block = fgk_make_block (h, this_zero); | ||
471 | } | ||
472 | |||
473 | h->remaining_zeros = NULL; | ||
474 | |||
475 | return this_zero; | ||
476 | } | ||
477 | |||
478 | zero_ptr = h->remaining_zeros; | ||
479 | |||
480 | new_internal = h->free_node++; | ||
481 | |||
482 | new_internal->parent = zero_ptr->parent; | ||
483 | new_internal->right = zero_ptr->right; | ||
484 | new_internal->weight = 0; | ||
485 | new_internal->right_child = this_zero; | ||
486 | new_internal->left = this_zero; | ||
487 | |||
488 | if (h->remaining_zeros == h->root_node) | ||
489 | { | ||
490 | /* This is the first element to be coded */ | ||
491 | h->root_node = new_internal; | ||
492 | this_zero->my_block = fgk_make_block (h, this_zero); | ||
493 | new_internal->my_block = fgk_make_block (h, new_internal); | ||
494 | } | ||
495 | else | ||
496 | { | ||
497 | new_internal->right->left = new_internal; | ||
498 | |||
499 | if (zero_ptr->parent->right_child == zero_ptr) | ||
500 | { | ||
501 | zero_ptr->parent->right_child = new_internal; | ||
502 | } | ||
503 | else | ||
504 | { | ||
505 | zero_ptr->parent->left_child = new_internal; | ||
506 | } | ||
507 | |||
508 | if (new_internal->right->weight == 1) | ||
509 | { | ||
510 | new_internal->my_block = new_internal->right->my_block; | ||
511 | } | ||
512 | else | ||
513 | { | ||
514 | new_internal->my_block = fgk_make_block (h, new_internal); | ||
515 | } | ||
516 | |||
517 | this_zero->my_block = new_internal->my_block; | ||
518 | } | ||
519 | |||
520 | fgk_eliminate_zero (h, this_zero); | ||
521 | |||
522 | new_internal->left_child = h->remaining_zeros; | ||
523 | |||
524 | this_zero->right = new_internal; | ||
525 | this_zero->left = h->remaining_zeros; | ||
526 | this_zero->parent = new_internal; | ||
527 | this_zero->left_child = NULL; | ||
528 | this_zero->right_child = NULL; | ||
529 | |||
530 | h->remaining_zeros->parent = new_internal; | ||
531 | h->remaining_zeros->right = this_zero; | ||
532 | |||
533 | return this_zero; | ||
534 | } | ||
535 | |||
536 | /* When a zero frequency element is encoded, it is followed by the binary representation | ||
537 | * of the index into the remaining elements. Sets a cache to the element before it so | ||
538 | * that it can be removed without calling this procedure again. */ | ||
539 | static unsigned int fgk_find_nth_zero (fgk_stream* h, int n) | ||
540 | { | ||
541 | fgk_node *target_ptr = h->alphabet + n; | ||
542 | fgk_node *head_ptr = h->remaining_zeros; | ||
543 | unsigned int idx = 0; | ||
544 | |||
545 | while (target_ptr != head_ptr) | ||
546 | { | ||
547 | head_ptr = head_ptr->right_child; | ||
548 | idx += 1; | ||
549 | } | ||
550 | |||
551 | return idx; | ||
552 | } | ||
553 | |||
554 | /* Splices node out of the list of zeros. */ | ||
555 | static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node) | ||
556 | { | ||
557 | if (h->zero_freq_count == 1) | ||
558 | { | ||
559 | return; | ||
560 | } | ||
561 | |||
562 | fgk_factor_remaining(h); | ||
563 | |||
564 | if (node->left_child == NULL) | ||
565 | { | ||
566 | h->remaining_zeros = h->remaining_zeros->right_child; | ||
567 | h->remaining_zeros->left_child = NULL; | ||
568 | } | ||
569 | else if (node->right_child == NULL) | ||
570 | { | ||
571 | node->left_child->right_child = NULL; | ||
572 | } | ||
573 | else | ||
574 | { | ||
575 | node->right_child->left_child = node->left_child; | ||
576 | node->left_child->right_child = node->right_child; | ||
577 | } | ||
578 | } | ||
579 | |||
580 | static void fgk_init_node (fgk_node *node, int i, int size) | ||
581 | { | ||
582 | if (i < size - 1) | ||
583 | { | ||
584 | node->right_child = node + 1; | ||
585 | } | ||
586 | else | ||
587 | { | ||
588 | node->right_child = NULL; | ||
589 | } | ||
590 | |||
591 | if (i >= 1) | ||
592 | { | ||
593 | node->left_child = node - 1; | ||
594 | } | ||
595 | else | ||
596 | { | ||
597 | node->left_child = NULL; | ||
598 | } | ||
599 | |||
600 | node->weight = 0; | ||
601 | node->parent = NULL; | ||
602 | node->right = NULL; | ||
603 | node->left = NULL; | ||
604 | node->my_block = NULL; | ||
605 | } | ||
606 | |||
607 | /* The data structure used is an array of blocks, which are unions of free pointers and | ||
608 | * huffnode pointers. free blocks are a linked list of free blocks, the front of which is | ||
609 | * h->free_block. The used blocks are pointers to the head of each block. */ | ||
610 | static fgk_block* fgk_make_block (fgk_stream *h, fgk_node* lead) | ||
611 | { | ||
612 | fgk_block *ret = h->free_block; | ||
613 | |||
614 | XD3_ASSERT (h->free_block != NULL); | ||
615 | |||
616 | h->free_block = h->free_block->block_freeptr; | ||
617 | |||
618 | ret->block_leader = lead; | ||
619 | |||
620 | return ret; | ||
621 | } | ||
622 | |||
623 | /* Restores the block to the front of the free list. */ | ||
624 | static void fgk_free_block (fgk_stream *h, fgk_block *b) | ||
625 | { | ||
626 | b->block_freeptr = h->free_block; | ||
627 | h->free_block = b; | ||
628 | } | ||
629 | |||
630 | /* sets zero_freq_count, zero_freq_rem, and zero_freq_exp to satsity the equation given | ||
631 | * above. */ | ||
632 | static void fgk_factor_remaining (fgk_stream *h) | ||
633 | { | ||
634 | unsigned int i; | ||
635 | |||
636 | i = (--h->zero_freq_count); | ||
637 | h->zero_freq_exp = 0; | ||
638 | |||
639 | while (i > 1) | ||
640 | { | ||
641 | h->zero_freq_exp += 1; | ||
642 | i >>= 1; | ||
643 | } | ||
644 | |||
645 | i = 1 << h->zero_freq_exp; | ||
646 | |||
647 | h->zero_freq_rem = h->zero_freq_count - i; | ||
648 | } | ||
649 | |||
650 | /* receives a bit at a time and returns true when a complete code has | ||
651 | * been received. | ||
652 | */ | ||
653 | static int INLINE fgk_decode_bit (fgk_stream* h, fgk_bit b) | ||
654 | { | ||
655 | XD3_ASSERT (b == 1 || b == 0); | ||
656 | |||
657 | if (IS_ADAPTIVE && h->decode_ptr->weight == 0) | ||
658 | { | ||
659 | int bitsreq; | ||
660 | |||
661 | if (h->zero_freq_rem == 0) | ||
662 | { | ||
663 | bitsreq = h->zero_freq_exp; | ||
664 | } | ||
665 | else | ||
666 | { | ||
667 | bitsreq = h->zero_freq_exp + 1; | ||
668 | } | ||
669 | |||
670 | h->coded_bits[h->coded_depth] = b; | ||
671 | h->coded_depth += 1; | ||
672 | |||
673 | return h->coded_depth >= bitsreq; | ||
674 | } | ||
675 | else | ||
676 | { | ||
677 | if (b) | ||
678 | { | ||
679 | h->decode_ptr = h->decode_ptr->right_child; | ||
680 | } | ||
681 | else | ||
682 | { | ||
683 | h->decode_ptr = h->decode_ptr->left_child; | ||
684 | } | ||
685 | |||
686 | if (h->decode_ptr->left_child == NULL) | ||
687 | { | ||
688 | /* If the weight is non-zero, finished. */ | ||
689 | if (h->decode_ptr->weight != 0) | ||
690 | { | ||
691 | return 1; | ||
692 | } | ||
693 | |||
694 | /* zero_freq_count is dropping to 0, finished. */ | ||
695 | return h->zero_freq_count == 1; | ||
696 | } | ||
697 | else | ||
698 | { | ||
699 | return 0; | ||
700 | } | ||
701 | } | ||
702 | } | ||
703 | |||
704 | static int fgk_nth_zero (fgk_stream* h, int n) | ||
705 | { | ||
706 | fgk_node *ret = h->remaining_zeros; | ||
707 | |||
708 | /* ERROR: if during this loop (ret->right_child == NULL) then the encoder's zero count | ||
709 | * is too high. Could return an error code now, but is probably unnecessary overhead, | ||
710 | * since the caller should check integrity anyway. */ | ||
711 | for (; n != 0 && ret->right_child != NULL; n -= 1) | ||
712 | { | ||
713 | ret = ret->right_child; | ||
714 | } | ||
715 | |||
716 | return ret - h->alphabet; | ||
717 | } | ||
718 | |||
719 | /* once fgk_decode_bit returns 1, this retrieves an index into the | ||
720 | * alphabet otherwise this returns 0, indicating more bits are | ||
721 | * required. | ||
722 | */ | ||
723 | static int fgk_decode_data (fgk_stream* h) | ||
724 | { | ||
725 | unsigned int elt = h->decode_ptr - h->alphabet; | ||
726 | |||
727 | if (IS_ADAPTIVE && h->decode_ptr->weight == 0) { | ||
728 | int i; | ||
729 | unsigned int n = 0; | ||
730 | |||
731 | for (i = 0; i < h->coded_depth - 1; i += 1) | ||
732 | { | ||
733 | n |= h->coded_bits[i]; | ||
734 | n <<= 1; | ||
735 | } | ||
736 | |||
737 | n |= h->coded_bits[i]; | ||
738 | elt = fgk_nth_zero(h, n); | ||
739 | } | ||
740 | |||
741 | h->coded_depth = 0; | ||
742 | |||
743 | if (IS_ADAPTIVE) | ||
744 | { | ||
745 | fgk_update_tree(h, elt); | ||
746 | } | ||
747 | |||
748 | h->decode_ptr = h->root_node; | ||
749 | |||
750 | return elt; | ||
751 | } | ||
752 | |||
753 | static void fgk_destroy (xd3_stream *stream, | ||
754 | fgk_stream *h) | ||
755 | { | ||
756 | if (h != NULL) | ||
757 | { | ||
758 | IF_DEBUG1({ | ||
759 | int i; | ||
760 | for (i = 0; i < ALPHABET_SIZE; i += 1) | ||
761 | { | ||
762 | XP(OF, "freq[%u] = %u\n", i, h->alphabet[i].weight); | ||
763 | } | ||
764 | }); | ||
765 | |||
766 | xd3_free (stream, h->alphabet); | ||
767 | xd3_free (stream, h->coded_bits); | ||
768 | xd3_free (stream, h->block_array); | ||
769 | xd3_free (stream, h); | ||
770 | } | ||
771 | } | ||
772 | |||
773 | /*********************************************************************/ | ||
774 | /* Xdelta */ | ||
775 | /*********************************************************************/ | ||
776 | |||
777 | static int | ||
778 | xd3_encode_fgk (xd3_stream *stream, fgk_stream *sec_stream, xd3_output *input, xd3_output *output, xd3_sec_cfg *cfg) | ||
779 | { | ||
780 | bit_state bstate = BIT_STATE_ENCODE_INIT; | ||
781 | xd3_output *cur_page; | ||
782 | int ret; | ||
783 | |||
784 | /* OPT: quit compression early if it looks bad */ | ||
785 | for (cur_page = input; cur_page; cur_page = cur_page->next_page) | ||
786 | { | ||
787 | const uint8_t *inp = cur_page->base; | ||
788 | const uint8_t *inp_max = inp + cur_page->next; | ||
789 | |||
790 | while (inp < inp_max) | ||
791 | { | ||
792 | usize_t bits = fgk_encode_data (sec_stream, *inp++); | ||
793 | |||
794 | while (bits--) | ||
795 | { | ||
796 | if ((ret = xd3_encode_bit (stream, & output, & bstate, fgk_get_encoded_bit (sec_stream)))) { return ret; } | ||
797 | } | ||
798 | } | ||
799 | } | ||
800 | |||
801 | return xd3_flush_bits (stream, & output, & bstate); | ||
802 | } | ||
803 | |||
804 | static int | ||
805 | xd3_decode_fgk (xd3_stream *stream, | ||
806 | fgk_stream *sec_stream, | ||
807 | const uint8_t **input_pos, | ||
808 | const uint8_t *const input_max, | ||
809 | uint8_t **output_pos, | ||
810 | const uint8_t *const output_max) | ||
811 | { | ||
812 | bit_state bstate; | ||
813 | uint8_t *output = *output_pos; | ||
814 | const uint8_t *input = *input_pos; | ||
815 | |||
816 | for (;;) | ||
817 | { | ||
818 | if (input == input_max) | ||
819 | { | ||
820 | stream->msg = "secondary decoder end of input"; | ||
821 | return EINVAL; | ||
822 | } | ||
823 | |||
824 | bstate.cur_byte = *input++; | ||
825 | |||
826 | for (bstate.cur_mask = 1; bstate.cur_mask != 0x100; bstate.cur_mask <<= 1) | ||
827 | { | ||
828 | int done = fgk_decode_bit (sec_stream, (bstate.cur_byte & bstate.cur_mask) && 1); | ||
829 | |||
830 | if (! done) { continue; } | ||
831 | |||
832 | *output++ = fgk_decode_data (sec_stream); | ||
833 | |||
834 | if (unlikely (output == output_max)) | ||
835 | { | ||
836 | /* During regression testing: */ | ||
837 | IF_REGRESSION ({ | ||
838 | int ret; | ||
839 | bstate.cur_mask <<= 1; | ||
840 | if ((ret = xd3_test_clean_bits (stream, & bstate))) { return ret; } | ||
841 | }); | ||
842 | |||
843 | (*output_pos) = output; | ||
844 | (*input_pos) = input; | ||
845 | return 0; | ||
846 | } | ||
847 | } | ||
848 | } | ||
849 | } | ||
850 | |||
851 | #endif /* _XDELTA3_FGK_ */ | ||
diff --git a/xdelta3/xdelta3-list.h b/xdelta3/xdelta3-list.h new file mode 100755 index 0000000..64a2582 --- /dev/null +++ b/xdelta3/xdelta3-list.h | |||
@@ -0,0 +1,130 @@ | |||
1 | /* xdelta 3 - delta compression tools and library | ||
2 | * Copyright (C) 2002 and onward. Joshua P. MacDonald | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | #ifndef __XDELTA3_LIST__ | ||
20 | #define __XDELTA3_LIST__ | ||
21 | |||
22 | #define XD3_MAKELIST(LTYPE,ETYPE,LNAME) \ | ||
23 | \ | ||
24 | static inline ETYPE* \ | ||
25 | LTYPE ## _entry (LTYPE* l) \ | ||
26 | { \ | ||
27 | return (ETYPE*) ((char*) l - (unsigned long) &((ETYPE*) 0)->LNAME); \ | ||
28 | } \ | ||
29 | \ | ||
30 | static inline void \ | ||
31 | LTYPE ## _init (LTYPE *l) \ | ||
32 | { \ | ||
33 | l->next = l; \ | ||
34 | l->prev = l; \ | ||
35 | } \ | ||
36 | \ | ||
37 | static inline void \ | ||
38 | LTYPE ## _add (LTYPE *prev, LTYPE *next, LTYPE *ins) \ | ||
39 | { \ | ||
40 | next->prev = ins; \ | ||
41 | prev->next = ins; \ | ||
42 | ins->next = next; \ | ||
43 | ins->prev = prev; \ | ||
44 | } \ | ||
45 | \ | ||
46 | static inline void \ | ||
47 | LTYPE ## _push_back (LTYPE *l, ETYPE *i) \ | ||
48 | { \ | ||
49 | LTYPE ## _add (l->prev, l, & i->LNAME); \ | ||
50 | } \ | ||
51 | \ | ||
52 | static inline void \ | ||
53 | LTYPE ## _del (LTYPE *next, \ | ||
54 | LTYPE *prev) \ | ||
55 | { \ | ||
56 | next->prev = prev; \ | ||
57 | prev->next = next; \ | ||
58 | } \ | ||
59 | \ | ||
60 | static inline ETYPE* \ | ||
61 | LTYPE ## _remove (ETYPE *f) \ | ||
62 | { \ | ||
63 | LTYPE *i = f->LNAME.next; \ | ||
64 | LTYPE ## _del (f->LNAME.next, f->LNAME.prev); \ | ||
65 | return LTYPE ## _entry (i); \ | ||
66 | } \ | ||
67 | \ | ||
68 | static inline ETYPE* \ | ||
69 | LTYPE ## _pop_back (LTYPE *l) \ | ||
70 | { \ | ||
71 | LTYPE *i = l->prev; \ | ||
72 | LTYPE ## _del (i->next, i->prev); \ | ||
73 | return LTYPE ## _entry (i); \ | ||
74 | } \ | ||
75 | \ | ||
76 | static inline ETYPE* \ | ||
77 | LTYPE ## _pop_front (LTYPE *l) \ | ||
78 | { \ | ||
79 | LTYPE *i = l->next; \ | ||
80 | LTYPE ## _del (i->next, i->prev); \ | ||
81 | return LTYPE ## _entry (i); \ | ||
82 | } \ | ||
83 | \ | ||
84 | static inline int \ | ||
85 | LTYPE ## _empty (LTYPE *l) \ | ||
86 | { \ | ||
87 | return l == l->next; \ | ||
88 | } \ | ||
89 | \ | ||
90 | static inline ETYPE* \ | ||
91 | LTYPE ## _front (LTYPE *f) \ | ||
92 | { \ | ||
93 | return LTYPE ## _entry (f->next); \ | ||
94 | } \ | ||
95 | \ | ||
96 | static inline ETYPE* \ | ||
97 | LTYPE ## _back (LTYPE *f) \ | ||
98 | { \ | ||
99 | return LTYPE ## _entry (f->prev); \ | ||
100 | } \ | ||
101 | \ | ||
102 | static inline int \ | ||
103 | LTYPE ## _end (LTYPE *f, ETYPE *i) \ | ||
104 | { \ | ||
105 | return f == & i->LNAME; \ | ||
106 | } \ | ||
107 | \ | ||
108 | static inline ETYPE* \ | ||
109 | LTYPE ## _next (ETYPE *f) \ | ||
110 | { \ | ||
111 | return LTYPE ## _entry (f->LNAME.next); \ | ||
112 | } \ | ||
113 | \ | ||
114 | static inline int \ | ||
115 | LTYPE ## _length (LTYPE *l) \ | ||
116 | { \ | ||
117 | LTYPE *p; \ | ||
118 | int c = 0; \ | ||
119 | \ | ||
120 | for (p = l->next; p != l; p = p->next) \ | ||
121 | { \ | ||
122 | c += 1; \ | ||
123 | } \ | ||
124 | \ | ||
125 | return c; \ | ||
126 | } \ | ||
127 | \ | ||
128 | typedef int unused_ ## LTYPE | ||
129 | |||
130 | #endif | ||
diff --git a/xdelta3/xdelta3-main.h b/xdelta3/xdelta3-main.h new file mode 100755 index 0000000..29469c3 --- /dev/null +++ b/xdelta3/xdelta3-main.h | |||
@@ -0,0 +1,2923 @@ | |||
1 | /* xdelta 3 - delta compression tools and library | ||
2 | * Copyright (C) 2001 and onward. Joshua P. MacDonald | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | /* This is all the extra stuff you need for convenience to users in a command line | ||
20 | * application. It contains these major components: | ||
21 | * | ||
22 | * 1. VCDIFF tools | ||
23 | * 2. external compression support (this is POSIX-specific). | ||
24 | * 3. a general read/write loop that handles all of the Xdelta decode/encode/VCDIFF-print | ||
25 | * functions | ||
26 | * 4. command-line interpreter | ||
27 | * 5. an Xdelta application header which stores default filename, external compression settings | ||
28 | * 6. output/error printing | ||
29 | * 7. basic file support and OS interface | ||
30 | */ | ||
31 | |||
32 | /* Definite TODO list: | ||
33 | * 1. do exact gzip-like filename, stdout handling. make a .xz extension, refuse | ||
34 | * to encode to stdout without -cf, etc. | ||
35 | * 2. Allow the user to add a comment string to the app header without disturbing the default | ||
36 | * behavior. | ||
37 | * 3. Define zero-length window behavior | ||
38 | * 4. Separate getopt() code from main and make flags modular, implement help. | ||
39 | * 5. Catch up on related research! | ||
40 | */ | ||
41 | |||
42 | /* Nice idea TODO list: | ||
43 | * | ||
44 | * 1. Should probably have a write buffer (option)? | ||
45 | * 2. Add a reporting function for displaying progress, warning messages out of the library. | ||
46 | * 3. Add WIN32 support in addition to XD3_POSIX/XD3_STDIO. Should almost compile under windows | ||
47 | * with XD3_STDIO, but not quite (e.g., stat()?). | ||
48 | * 4. Update-in-place, partial-encoding per the latest RFC: see "Wishful TODO" comments below | ||
49 | */ | ||
50 | |||
51 | /* On error handling and printing: | ||
52 | * | ||
53 | * The xdelta library sets stream->msg to indicate what condition caused an internal | ||
54 | * failure, but many failures originate here and are printed here. The return convention | ||
55 | * is 0 for success, as throughout Xdelta code, but special attention is required here for | ||
56 | * the operating system calls with different error handling. See the main_file_* routines. | ||
57 | * All errors in this file have a message printed at the time of occurance. Since some of | ||
58 | * these calls occur within calls to the library, the error may end up being printed again | ||
59 | * with a more general error message. | ||
60 | */ | ||
61 | |||
62 | /******************************************************************************************/ | ||
63 | |||
64 | #ifndef XD3_POSIX | ||
65 | #define XD3_POSIX 0 | ||
66 | #endif | ||
67 | #ifndef XD3_STDIO | ||
68 | #define XD3_STDIO 0 | ||
69 | #endif | ||
70 | #ifndef XD3_WIN32 | ||
71 | #define XD3_WIN32 0 | ||
72 | #endif | ||
73 | |||
74 | /* XPRINTX (used by main) prefixes an "xdelta3: " to the output. */ | ||
75 | #if 0 && XD3_DEBUG | ||
76 | #define XPR fprintf (stderr, "xdelta3[%u]: ", getpid()); fprintf | ||
77 | #define NT stderr, | ||
78 | #else | ||
79 | #define XPR fprintf | ||
80 | #define NT stderr, "xdelta3: " | ||
81 | #endif | ||
82 | |||
83 | #define VC fprintf | ||
84 | #define OUT vcout, | ||
85 | |||
86 | /* If none are set, default to posix. */ | ||
87 | #if (XD3_POSIX + XD3_STDIO + XD3_WIN32) == 0 | ||
88 | #undef XD3_POSIX | ||
89 | #define XD3_POSIX 1 | ||
90 | #endif | ||
91 | |||
92 | /* Handle externally-compressed inputs. */ | ||
93 | #ifndef EXTERNAL_COMPRESSION | ||
94 | #define EXTERNAL_COMPRESSION 1 | ||
95 | #endif | ||
96 | |||
97 | #define PRINTHDR_SPECIAL -4378291 | ||
98 | |||
99 | #define PIPE_BUFSIZE (usize_t)(1 << 12) | ||
100 | #define MIN_BUFSIZE (usize_t)(1 << 12) | ||
101 | |||
102 | /* The number of soft-config variables. Update as field count changes! */ | ||
103 | #define XD3_SOFTCFG_VARCNT 10 | ||
104 | |||
105 | /* this is used as in XPR(NT XD3_LIB_ERRMSG (stream, ret)) to print an error message | ||
106 | * from the library. */ | ||
107 | #define XD3_LIB_ERRMSG(stream, ret) "%s: %s\n", xd3_errstring (stream), xd3_strerror (ret) | ||
108 | |||
109 | #include <stdio.h> /* fprintf */ | ||
110 | #include <unistd.h> /* lots */ | ||
111 | |||
112 | #if XD3_POSIX | ||
113 | #include <unistd.h> /* close, read, write... */ | ||
114 | #include <sys/types.h> | ||
115 | #include <fcntl.h> | ||
116 | #endif | ||
117 | |||
118 | #include <sys/time.h> /* gettimeofday() */ | ||
119 | #include <sys/stat.h> /* stat() and fstat() */ | ||
120 | |||
121 | /****************************************************************************************** | ||
122 | ENUMS and TYPES | ||
123 | ******************************************************************************************/ | ||
124 | |||
125 | /* These flags (mainly pertaining to main_read() operations) are set in the | ||
126 | * main_file->flags variable. All are related to with external decompression support. | ||
127 | * | ||
128 | * RD_FIRST causes the external decompression check when the input is first read. | ||
129 | * | ||
130 | * RD_NONEXTERNAL disables external decompression for reading a compressed input, in the | ||
131 | * case of Xdelta inputs. Note: Xdelta is supported as an external compression type, | ||
132 | * which makes is the reason for this flag. An example to justify this is: to create a | ||
133 | * delta between two files that are VCDIFF-compressed. Two external Xdelta decoders are | ||
134 | * run to supply decompressed source and target inputs to the Xdelta encoder. */ | ||
135 | typedef enum | ||
136 | { | ||
137 | RD_FIRST = (1 << 0), | ||
138 | RD_NONEXTERNAL = (1 << 1), | ||
139 | } xd3_read_flags; | ||
140 | |||
141 | /* main_file->mode values */ | ||
142 | typedef enum | ||
143 | { | ||
144 | XO_READ = 0, | ||
145 | XO_WRITE = 1, | ||
146 | } main_file_modes; | ||
147 | |||
148 | /* Main commands. For example, CMD_PRINTHDR is the "xdelta printhdr" command. */ | ||
149 | typedef enum | ||
150 | { | ||
151 | CMD_NONE = 0, | ||
152 | CMD_PRINTHDR, | ||
153 | CMD_PRINTHDRS, | ||
154 | CMD_PRINTDELTA, | ||
155 | #if XD3_ENCODER | ||
156 | CMD_ENCODE, | ||
157 | #endif | ||
158 | CMD_DECODE, | ||
159 | CMD_TEST, | ||
160 | CMD_CONFIG, | ||
161 | } xd3_cmd; | ||
162 | |||
163 | #if XD3_ENCODER | ||
164 | #define CMD_DEFAULT CMD_ENCODE | ||
165 | #define IS_ENCODE(cmd) (cmd == CMD_ENCODE) | ||
166 | #else | ||
167 | #define CMD_DEFAULT CMD_DECODE | ||
168 | #define IS_ENCODE(cmd) (0) | ||
169 | #endif | ||
170 | |||
171 | typedef struct _main_file main_file; | ||
172 | typedef struct _main_extcomp main_extcomp; | ||
173 | typedef struct _main_blklru main_blklru; | ||
174 | typedef struct _main_blklru_list main_blklru_list; | ||
175 | |||
176 | /* The main_file object supports abstract system calls like open, close, read, write, seek, | ||
177 | * stat. The program uses these to represent both seekable files and non-seekable files. | ||
178 | * Source files must be seekable, but the target input and any output file do not require | ||
179 | * seekability. | ||
180 | */ | ||
181 | struct _main_file | ||
182 | { | ||
183 | #if XD3_STDIO | ||
184 | FILE *file; | ||
185 | #elif XD3_POSIX | ||
186 | int file; | ||
187 | #endif | ||
188 | |||
189 | int mode; /* XO_READ and XO_WRITE */ | ||
190 | const char *filename; /* File name or /dev/stdin, /dev/stdout, /dev/stderr. */ | ||
191 | const char *realname; /* File name or /dev/stdin, /dev/stdout, /dev/stderr. */ | ||
192 | const main_extcomp *compressor; /* External compression struct. */ | ||
193 | int flags; /* RD_FIRST or RD_NONEXTERNAL */ | ||
194 | xoff_t nread; /* for input position */ | ||
195 | xoff_t nwrite; /* for output position */ | ||
196 | }; | ||
197 | |||
198 | /* Various strings and magic values used to detect and call external compression. See | ||
199 | * below for examples. */ | ||
200 | struct _main_extcomp | ||
201 | { | ||
202 | const char *recomp_cmdname; | ||
203 | const char *recomp_options; | ||
204 | |||
205 | const char *decomp_cmdname; | ||
206 | const char *decomp_options; | ||
207 | |||
208 | const char *ident; | ||
209 | const char *magic; | ||
210 | int magic_size; | ||
211 | int flags; | ||
212 | }; | ||
213 | |||
214 | /* This file implements a small LRU of source blocks. For encoding purposes, | ||
215 | * we prevent paging in blocks we've already scanned in the source (return | ||
216 | * XD3_NOTAVAIL). */ | ||
217 | struct _main_blklru_list | ||
218 | { | ||
219 | main_blklru_list *next; | ||
220 | main_blklru_list *prev; | ||
221 | }; | ||
222 | |||
223 | struct _main_blklru | ||
224 | { | ||
225 | uint8_t *blk; | ||
226 | xoff_t blkno; | ||
227 | main_blklru_list link; | ||
228 | }; | ||
229 | |||
230 | /* ... represented as a list (no cache index). */ | ||
231 | XD3_MAKELIST(main_blklru_list,main_blklru,link); | ||
232 | |||
233 | /* Program options: various command line flags and options. */ | ||
234 | static int option_stdout = 0; | ||
235 | static int option_force = 0; | ||
236 | static int option_verbose = 0; | ||
237 | static int option_quiet = 0; | ||
238 | static int option_level = 6; | ||
239 | static int option_use_appheader = 1; | ||
240 | static uint8_t* option_appheader = NULL; | ||
241 | static int option_use_secondary = /* until-standardized, leave this off */ 0; | ||
242 | static char* option_secondary = NULL; | ||
243 | static int option_use_checksum = 1; | ||
244 | static int option_use_altcodetable = 0; | ||
245 | static char* option_smatch_config = NULL; | ||
246 | static int option_no_compress = 0; | ||
247 | static int option_no_output = 0; /* go through the motions, but do not open or write output */ | ||
248 | static const char *option_source_filename = NULL; | ||
249 | |||
250 | static usize_t option_winsize = XD3_DEFAULT_WINSIZE; | ||
251 | static usize_t option_srcwinsz = XD3_DEFAULT_SRCWINSZ; | ||
252 | |||
253 | /* Wishful TODO: Support should probably be for partial deltas & update-in-place deltas, | ||
254 | * following the latest draft RFC specs partial deltas [the changes have moderate | ||
255 | * complexity]. The following flags implement primitive controls to skip sections | ||
256 | * of the input & output, mainly for debugging purposes. */ | ||
257 | |||
258 | /* DECODE-ONLY: Skips processing windows up to first_window and past last_window using the | ||
259 | * XD3_SKIP_WINDOW flag, but main_ still reads reads/parses every window. TODO: make it | ||
260 | * meaningful for encode, etc... */ | ||
261 | /*static xoff_t option_first_window = 0;*/ | ||
262 | /*static xoff_t option_last_window = XOFF_T_MAX;*/ | ||
263 | |||
264 | /* ENCODE-ONLY: Seeks to first_offset, EOF at last_offset, done entirely in this main_ | ||
265 | * routines, so the library actually sees a shortened input. TODO: implement this for | ||
266 | * decode, implement proper partial deltas, works with external compression?, works with | ||
267 | * non-seekable inputs?, change ranges, etc... */ | ||
268 | /*static xoff_t option_first_offset = 0;*/ | ||
269 | /*static xoff_t option_last_offset = XOFF_T_MAX;*/ | ||
270 | |||
271 | /* This controls the number of times main repeats itself, only for profiling. */ | ||
272 | static int option_profile_cnt = 0; | ||
273 | |||
274 | /* These variables are supressed to avoid their use w/o support. main() warns | ||
275 | * appropriately. */ | ||
276 | #if EXTERNAL_COMPRESSION | ||
277 | static int option_decompress_inputs = 1; | ||
278 | static int option_recompress_outputs = 1; | ||
279 | #endif | ||
280 | |||
281 | /* This is for comparing "printdelta" output without attention to | ||
282 | * copy-instruction modes, useful for reverse engineering. */ | ||
283 | #if VCDIFF_TOOLS | ||
284 | static int option_print_cpymode = 1; | ||
285 | #endif | ||
286 | |||
287 | /* Static variables */ | ||
288 | IF_DEBUG(static int main_mallocs = 0;) | ||
289 | |||
290 | static char* program_name = NULL; | ||
291 | static uint8_t* appheader_used = NULL; | ||
292 | static uint8_t* main_bdata = NULL; | ||
293 | |||
294 | /* The LRU: obviously this is shared by all callers. */ | ||
295 | static int lru_size = 0; | ||
296 | static main_blklru *lru = NULL; /* array of lru_size elts */ | ||
297 | static main_blklru_list lru_list; | ||
298 | static main_blklru_list lru_free; | ||
299 | static int do_not_lru = 0; /* set to avoid lru, instead discard oldest */ | ||
300 | |||
301 | static int lru_hits = 0; | ||
302 | static int lru_misses = 0; | ||
303 | static int lru_filled = 0; | ||
304 | |||
305 | /* Hacks for VCDIFF tools */ | ||
306 | static int allow_fake_source = 0; | ||
307 | |||
308 | /* This array of compressor types is compiled even if EXTERNAL_COMPRESSION is false just so | ||
309 | * the program knows the mapping of IDENT->NAME. */ | ||
310 | static main_extcomp extcomp_types[] = | ||
311 | { | ||
312 | /* The entry for xdelta must be first because the program_name is set here. */ | ||
313 | { "xdelta3", "-cfq", "xdelta3", "-dcfq", "X", "\xd6\xc3\xc4", 3, RD_NONEXTERNAL }, | ||
314 | { "bzip2", "-cf", "bzip2", "-dcf", "B", "BZh", 3, 0 }, | ||
315 | { "gzip", "-cf", "gzip", "-dcf", "G", "\037\213", 2, 0 }, | ||
316 | { "compress", "-cf", "uncompress", "-cf", "Z", "\037\235", 2, 0 }, | ||
317 | }; | ||
318 | |||
319 | static void main_get_appheader (xd3_stream *stream, main_file *output, main_file *sfile); | ||
320 | |||
321 | static int main_help (void); | ||
322 | |||
323 | static int | ||
324 | main_version (void) | ||
325 | { | ||
326 | P(RINT "VERSION=3_PRERFC_0\n"); | ||
327 | return EXIT_SUCCESS; | ||
328 | } | ||
329 | |||
330 | static int | ||
331 | main_config (void) | ||
332 | { | ||
333 | main_version (); | ||
334 | /* Compile-time */ | ||
335 | P(RINT "VCDIFF_TOOLS=%d\n", VCDIFF_TOOLS); | ||
336 | P(RINT "REGRESSION_TEST=%d\n", REGRESSION_TEST); | ||
337 | P(RINT "SECONDARY_FGK=%d\n", SECONDARY_FGK); | ||
338 | P(RINT "SECONDARY_DJW=%d\n", SECONDARY_DJW); | ||
339 | P(RINT "GENERIC_ENCODE_TABLES=%d\n", GENERIC_ENCODE_TABLES); | ||
340 | P(RINT "GENERIC_ENCODE_TABLES_COMPUTE=%d\n", GENERIC_ENCODE_TABLES_COMPUTE); | ||
341 | P(RINT "EXTERNAL_COMPRESSION=%d\n", EXTERNAL_COMPRESSION); | ||
342 | P(RINT "XD3_POSIX=%d\n", XD3_POSIX); | ||
343 | P(RINT "XD3_DEBUG=%d\n", XD3_DEBUG); | ||
344 | P(RINT "XD3_USE_LARGEFILE64=%d\n", XD3_USE_LARGEFILE64); | ||
345 | P(RINT "XD3_ENCODER=%d\n", XD3_ENCODER); | ||
346 | /* Runtime sizes */ | ||
347 | P(RINT "XD3_DEFAULT_WINSIZE=%d\n", XD3_DEFAULT_WINSIZE); | ||
348 | P(RINT "XD3_DEFAULT_SRCBLKSZ=%d\n", XD3_DEFAULT_SRCBLKSZ); | ||
349 | P(RINT "XD3_DEFAULT_SRCWINSZ=%d\n", XD3_DEFAULT_SRCWINSZ); | ||
350 | P(RINT "XD3_DEFAULT_MEMSIZE=%d\n", XD3_DEFAULT_MEMSIZE); | ||
351 | P(RINT "XD3_ALLOCSIZE=%d\n", XD3_ALLOCSIZE); | ||
352 | P(RINT "XD3_HARDMAXWINSIZE=%d\n", XD3_HARDMAXWINSIZE); | ||
353 | P(RINT "XD3_NODECOMPRESSSIZE=%d\n", XD3_NODECOMPRESSSIZE); | ||
354 | P(RINT "XD3_DEFAULT_IOPT_SIZE=%d\n", XD3_DEFAULT_IOPT_SIZE); | ||
355 | P(RINT "XD3_DEFAULT_SPREVSZ=%d\n", XD3_DEFAULT_SPREVSZ); | ||
356 | |||
357 | return EXIT_SUCCESS; | ||
358 | } | ||
359 | |||
360 | static void* | ||
361 | main_malloc1 (usize_t size) | ||
362 | { | ||
363 | void* r = malloc (size); | ||
364 | if (r == NULL) { XPR(NT "malloc: %s\n", xd3_strerror (ENOMEM)); } | ||
365 | else if (option_verbose > 2) { XPR(NT "malloc: %u\n", size); } | ||
366 | return r; | ||
367 | } | ||
368 | |||
369 | static void* | ||
370 | main_malloc (usize_t size) | ||
371 | { | ||
372 | void *r = main_malloc1 (size); | ||
373 | if (r) { IF_DEBUG (main_mallocs += 1); } | ||
374 | return r; | ||
375 | } | ||
376 | |||
377 | static void* | ||
378 | main_alloc (void *opaque, | ||
379 | usize_t items, | ||
380 | usize_t size) | ||
381 | { | ||
382 | return main_malloc1 (items * size); | ||
383 | } | ||
384 | |||
385 | static void | ||
386 | main_free (void **ptr) | ||
387 | { | ||
388 | if (*ptr) | ||
389 | { | ||
390 | IF_DEBUG (main_mallocs -= 1); | ||
391 | free (*ptr); | ||
392 | (*ptr) = NULL; | ||
393 | } | ||
394 | } | ||
395 | |||
396 | static void | ||
397 | main_free1 (void *opaque, void *ptr) | ||
398 | { | ||
399 | free (ptr); | ||
400 | } | ||
401 | |||
402 | /* This ensures that (ret = errno) always indicates failure, in case errno was | ||
403 | * accidentally not set. If this prints there's a bug somewhere. */ | ||
404 | static int | ||
405 | get_errno (void) | ||
406 | { | ||
407 | if (errno == 0) | ||
408 | { | ||
409 | XPR(NT "you found a bug: expected errno != 0\n"); | ||
410 | errno = EINVAL; | ||
411 | } | ||
412 | return errno; | ||
413 | } | ||
414 | |||
415 | static long | ||
416 | get_millisecs_now (void) | ||
417 | { | ||
418 | struct timeval tv; | ||
419 | |||
420 | gettimeofday (& tv, NULL); | ||
421 | |||
422 | return (tv.tv_sec) * 1000L + (tv.tv_usec) / 1000; | ||
423 | } | ||
424 | |||
425 | /* Always >= 1 millisec, right? */ | ||
426 | static long | ||
427 | get_millisecs_since (void) | ||
428 | { | ||
429 | double millis; | ||
430 | struct timeval tv; | ||
431 | /* static holds the first timeval */ | ||
432 | static struct timeval init; | ||
433 | |||
434 | gettimeofday (& tv, NULL); | ||
435 | |||
436 | millis = (tv.tv_sec - init.tv_sec) * 1e6; | ||
437 | millis += (tv.tv_usec - init.tv_usec); | ||
438 | millis /= 1000; | ||
439 | |||
440 | init = tv; | ||
441 | |||
442 | return max ((long)millis, 1L); | ||
443 | } | ||
444 | |||
445 | static char* | ||
446 | main_format_bcnt (xoff_t r, char *buf) | ||
447 | { | ||
448 | static const char* fmts[] = { "B", "KB", "MB", "GB" }; | ||
449 | int i; | ||
450 | |||
451 | for (i = 0; i < SIZEOF_ARRAY(fmts); i += 1) | ||
452 | { | ||
453 | if (r < 10 * 1e3 || i == -1 + SIZEOF_ARRAY(fmts)) | ||
454 | { | ||
455 | sprintf (buf, "%"Q"u %s", r, fmts[i]); | ||
456 | break; | ||
457 | } | ||
458 | r /= 1000; | ||
459 | } | ||
460 | return buf; | ||
461 | } | ||
462 | |||
463 | static char* | ||
464 | main_format_rate (xoff_t bytes, long millis, char *buf) | ||
465 | { | ||
466 | xoff_t r = 1.0 * bytes / (1.0 * millis / 1000.0); | ||
467 | static char lbuf[32]; | ||
468 | |||
469 | main_format_bcnt (r, lbuf); | ||
470 | sprintf (buf, "%s/sec", lbuf); | ||
471 | return buf; | ||
472 | } | ||
473 | |||
474 | static char* | ||
475 | main_format_millis (long millis, char *buf) | ||
476 | { | ||
477 | if (millis < 1000) { sprintf (buf, "%lu ms", millis); } | ||
478 | else if (millis < 10000) { sprintf (buf, "%.1f sec", millis / 1000.0); } | ||
479 | else { sprintf (buf, "%lu sec", millis / 1000L); } | ||
480 | return buf; | ||
481 | } | ||
482 | |||
483 | /* A safe version of strtol for xoff_t. */ | ||
484 | static int | ||
485 | main_strtoxoff (const char* s, xoff_t *xo, char which) | ||
486 | { | ||
487 | char *e; | ||
488 | xoff_t x; | ||
489 | |||
490 | XD3_ASSERT(s && *s != 0); | ||
491 | |||
492 | { | ||
493 | /* Should check LONG_MIN, LONG_MAX, LLONG_MIN, LLONG_MAX? */ | ||
494 | #if SIZEOF_XOFF_T == 4 | ||
495 | long xx = strtol (s, &e, 0); | ||
496 | #else | ||
497 | long long xx = strtoll (s, &e, 0); | ||
498 | #endif | ||
499 | |||
500 | if (xx < 0) | ||
501 | { | ||
502 | XPR(NT "-%c: negative integer: %s\n", which, s); | ||
503 | return EXIT_FAILURE; | ||
504 | } | ||
505 | |||
506 | x = xx; | ||
507 | } | ||
508 | |||
509 | if (*e != 0) | ||
510 | { | ||
511 | XPR(NT "-%c: invalid integer: %s\n", which, s); | ||
512 | return EXIT_FAILURE; | ||
513 | } | ||
514 | |||
515 | (*xo) = x; | ||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | static int | ||
520 | main_atou (const char* arg, usize_t *xo, usize_t low, char which) | ||
521 | { | ||
522 | xoff_t x; | ||
523 | int ret; | ||
524 | |||
525 | if ((ret = main_strtoxoff (arg, & x, which))) { return ret; } | ||
526 | |||
527 | if (x > USIZE_T_MAX || x < low) | ||
528 | { | ||
529 | XPR(NT "-%c: minimum value: %u", which, low); | ||
530 | return EXIT_FAILURE; | ||
531 | } | ||
532 | (*xo) = x; | ||
533 | return 0; | ||
534 | } | ||
535 | |||
536 | /****************************************************************************************** | ||
537 | FILE BASICS | ||
538 | ******************************************************************************************/ | ||
539 | |||
540 | /* With all the variation in file system-call semantics, arguments, return values and | ||
541 | * error-handling for the POSIX and STDIO file APIs, the insides of these functions make | ||
542 | * me sick, which is why these wrappers exist. */ | ||
543 | |||
544 | #define XOPEN_OPNAME (xfile->mode == XO_READ ? "read" : "write") | ||
545 | #define XOPEN_STDIO (xfile->mode == XO_READ ? "rb" : "wb") | ||
546 | #define XOPEN_POSIX (xfile->mode == XO_READ ? O_RDONLY : O_WRONLY | O_CREAT | O_TRUNC) | ||
547 | #define XOPEN_MODE (xfile->mode == XO_READ ? 0 : 0666) | ||
548 | |||
549 | #define XF_ERROR(op, name, ret) XPR(NT "file %s failed: %s: %s: %s\n", (op), XOPEN_OPNAME, (name), xd3_strerror (ret)) | ||
550 | |||
551 | #if XD3_STDIO | ||
552 | #define XFNO(f) fileno(f->file) | ||
553 | #define XSTDOUT_XF(f) { (f)->file = stdout; (f)->filename = "/dev/stdout"; } | ||
554 | #define XSTDERR_XF(f) { (f)->file = stderr; (f)->filename = "/dev/stderr"; } | ||
555 | #define XSTDIN_XF(f) { (f)->file = stdin; (f)->filename = "/dev/stdin"; } | ||
556 | |||
557 | #elif XD3_POSIX | ||
558 | #define XFNO(f) f->file | ||
559 | #define XSTDOUT_XF(f) { (f)->file = STDOUT_FILENO; (f)->filename = "/dev/stdout"; } | ||
560 | #define XSTDERR_XF(f) { (f)->file = STDERR_FILENO; (f)->filename = "/dev/stderr"; } | ||
561 | #define XSTDIN_XF(f) { (f)->file = STDIN_FILENO; (f)->filename = "/dev/stdin"; } | ||
562 | #endif | ||
563 | |||
564 | static void | ||
565 | main_file_init (main_file *xfile) | ||
566 | { | ||
567 | memset (xfile, 0, sizeof (*xfile)); | ||
568 | |||
569 | #if XD3_POSIX | ||
570 | xfile->file = -1; | ||
571 | #endif | ||
572 | } | ||
573 | |||
574 | static int | ||
575 | main_file_isopen (main_file *xfile) | ||
576 | { | ||
577 | #if XD3_STDIO | ||
578 | return xfile->file != NULL; | ||
579 | |||
580 | #elif XD3_POSIX | ||
581 | return xfile->file != -1; | ||
582 | #endif | ||
583 | } | ||
584 | |||
585 | static int | ||
586 | main_file_close (main_file *xfile) | ||
587 | { | ||
588 | int ret = 0; | ||
589 | |||
590 | if (! main_file_isopen (xfile)) | ||
591 | { | ||
592 | return 0; | ||
593 | } | ||
594 | |||
595 | #if XD3_STDIO | ||
596 | ret = fclose (xfile->file); | ||
597 | xfile->file = NULL; | ||
598 | |||
599 | #elif XD3_POSIX | ||
600 | ret = close (xfile->file); | ||
601 | xfile->file = -1; | ||
602 | #endif | ||
603 | |||
604 | if (ret != 0) { XF_ERROR ("close", xfile->filename, ret = get_errno ()); } | ||
605 | return ret; | ||
606 | } | ||
607 | |||
608 | static int | ||
609 | main_file_open (main_file *xfile, const char* name, int mode) | ||
610 | { | ||
611 | int ret = 0; | ||
612 | |||
613 | xfile->mode = mode; | ||
614 | |||
615 | XD3_ASSERT (! main_file_isopen (xfile)); | ||
616 | |||
617 | #if XD3_STDIO | ||
618 | xfile->file = fopen (name, XOPEN_STDIO); | ||
619 | |||
620 | ret = (xfile->file == NULL) ? get_errno () : 0; | ||
621 | |||
622 | #elif XD3_POSIX | ||
623 | if ((ret = open (name, XOPEN_POSIX, XOPEN_MODE)) < 0) | ||
624 | { | ||
625 | ret = get_errno (); | ||
626 | } | ||
627 | else | ||
628 | { | ||
629 | xfile->file = ret; | ||
630 | ret = 0; | ||
631 | } | ||
632 | #endif | ||
633 | if (ret) { XF_ERROR ("open", name, ret); } | ||
634 | else { xfile->realname = name; xfile->nread = 0; } | ||
635 | return ret; | ||
636 | } | ||
637 | |||
638 | static int | ||
639 | main_file_stat (main_file *xfile, xoff_t *size, int err_ifnoseek) | ||
640 | { | ||
641 | int ret; | ||
642 | struct stat sbuf; | ||
643 | |||
644 | XD3_ASSERT (main_file_isopen (xfile)); | ||
645 | |||
646 | if (fstat (XFNO (xfile), & sbuf) < 0) | ||
647 | { | ||
648 | ret = get_errno (); | ||
649 | if (err_ifnoseek) { XF_ERROR ("stat", xfile->filename, ret); } | ||
650 | return ret; | ||
651 | } | ||
652 | |||
653 | if (! S_ISREG (sbuf.st_mode)) | ||
654 | { | ||
655 | if (err_ifnoseek) { XPR(NT "source file must be seekable: %s\n", xfile->filename); } | ||
656 | return ESPIPE; | ||
657 | } | ||
658 | |||
659 | (*size) = sbuf.st_size; | ||
660 | return 0; | ||
661 | } | ||
662 | |||
663 | static int | ||
664 | main_file_exists (main_file *xfile) | ||
665 | { | ||
666 | struct stat sbuf; | ||
667 | return stat (xfile->filename, & sbuf) == 0 && S_ISREG (sbuf.st_mode); | ||
668 | } | ||
669 | |||
670 | #if (XD3_POSIX || EXTERNAL_COMPRESSION) | ||
671 | /* POSIX-generic code takes a function pointer to read() or write(). This calls the | ||
672 | * function repeatedly until the buffer is full or EOF. The NREAD parameter is not | ||
673 | * set for write, NULL is passed. Return is signed, < 0 indicate errors, otherwise | ||
674 | * byte count. */ | ||
675 | typedef int (xd3_posix_func) (int fd, uint8_t *buf, usize_t size); | ||
676 | |||
677 | static int | ||
678 | xd3_posix_io (int fd, uint8_t *buf, usize_t size, xd3_posix_func *func, usize_t *nread) | ||
679 | { | ||
680 | int ret; | ||
681 | usize_t nproc = 0; | ||
682 | |||
683 | while (nproc < size) | ||
684 | { | ||
685 | int result = (*func) (fd, buf + nproc, size - nproc); | ||
686 | |||
687 | if (result < 0) | ||
688 | { | ||
689 | ret = get_errno (); | ||
690 | if (ret != EAGAIN && ret != EINTR) | ||
691 | { | ||
692 | return ret; | ||
693 | } | ||
694 | result = 0; | ||
695 | } | ||
696 | |||
697 | if (nread != NULL && result == 0) { break; } | ||
698 | |||
699 | nproc += result; | ||
700 | } | ||
701 | if (nread != NULL) { (*nread) = nproc; } | ||
702 | return 0; | ||
703 | } | ||
704 | #endif | ||
705 | |||
706 | /* POSIX is unbuffered, while STDIO is buffered. main_file_read() should always be called | ||
707 | * on blocks. */ | ||
708 | static int | ||
709 | main_file_read (main_file *ifile, | ||
710 | uint8_t *buf, | ||
711 | usize_t size, | ||
712 | usize_t *nread, | ||
713 | const char *msg) | ||
714 | { | ||
715 | int ret = 0; | ||
716 | |||
717 | #if XD3_STDIO | ||
718 | usize_t result; | ||
719 | |||
720 | result = fread (buf, 1, size, ifile->file); | ||
721 | |||
722 | if (result < size && ferror (ifile->file)) | ||
723 | { | ||
724 | ret = get_errno (); | ||
725 | } | ||
726 | else | ||
727 | { | ||
728 | *nread = result; | ||
729 | } | ||
730 | |||
731 | #elif XD3_POSIX | ||
732 | ret = xd3_posix_io (ifile->file, buf, size, (xd3_posix_func*) &read, nread); | ||
733 | #endif | ||
734 | |||
735 | if (ret) | ||
736 | { | ||
737 | XPR(NT "%s: %s: %s\n", msg, ifile->filename, xd3_strerror (ret)); | ||
738 | } | ||
739 | else | ||
740 | { | ||
741 | if (option_verbose > 2) { XPR(NT "main read: %s: %u\n", ifile->filename, (*nread)); } | ||
742 | ifile->nread += (*nread); | ||
743 | } | ||
744 | |||
745 | return ret; | ||
746 | } | ||
747 | |||
748 | static int | ||
749 | main_file_write (main_file *ofile, uint8_t *buf, usize_t size, const char *msg) | ||
750 | { | ||
751 | int ret = 0; | ||
752 | |||
753 | #if XD3_STDIO | ||
754 | usize_t result; | ||
755 | |||
756 | result = fwrite (buf, 1, size, ofile->file); | ||
757 | |||
758 | if (result != size) { ret = get_errno (); } | ||
759 | |||
760 | #elif XD3_POSIX | ||
761 | ret = xd3_posix_io (ofile->file, buf, size, (xd3_posix_func*) &write, NULL); | ||
762 | #endif | ||
763 | |||
764 | if (ret) | ||
765 | { | ||
766 | XPR(NT "%s: %s: %s\n", msg, ofile->filename, xd3_strerror (ret)); | ||
767 | } | ||
768 | else | ||
769 | { | ||
770 | if (option_verbose > 2) { XPR(NT "main write: %s: %u\n", ofile->filename, size); } | ||
771 | ofile->nwrite += size; | ||
772 | } | ||
773 | |||
774 | return ret; | ||
775 | } | ||
776 | |||
777 | static int | ||
778 | main_file_seek (main_file *xfile, xoff_t pos) | ||
779 | { | ||
780 | int ret = 0; | ||
781 | |||
782 | #if XD3_STDIO | ||
783 | if (fseek (xfile->file, pos, SEEK_SET) != 0) { ret = get_errno (); } | ||
784 | #else | ||
785 | if (lseek (xfile->file, pos, SEEK_SET) != pos) { ret = get_errno (); } | ||
786 | #endif | ||
787 | |||
788 | if (ret) | ||
789 | { | ||
790 | XPR(NT "seek failed: %s: %s\n", xfile->filename, xd3_strerror (ret)); | ||
791 | } | ||
792 | |||
793 | return ret; | ||
794 | } | ||
795 | |||
796 | /****************************************************************************************** | ||
797 | VCDIFF TOOLS | ||
798 | ******************************************************************************************/ | ||
799 | |||
800 | #if VCDIFF_TOOLS | ||
801 | /* This function prints a single VCDIFF window, mainly for debugging purposes. */ | ||
802 | static int | ||
803 | main_print_window (xd3_stream* stream, FILE *vcout) | ||
804 | { | ||
805 | int ret; | ||
806 | usize_t size = 0; | ||
807 | |||
808 | VC(OUT " Offset Code Type1 Size1 @Addr1 + Type2 Size2 @Addr2\n"); | ||
809 | |||
810 | while (stream->inst_sect.buf < stream->inst_sect.buf_max) | ||
811 | { | ||
812 | uint code = stream->inst_sect.buf[0]; | ||
813 | |||
814 | if ((ret = xd3_decode_instruction (stream))) { return ret; } | ||
815 | |||
816 | VC(OUT " %06"Q"u %03u %s %3u", stream->dec_winstart + size, code, | ||
817 | xd3_rtype_to_string (stream->dec_current1.type, option_print_cpymode), | ||
818 | stream->dec_current1.size); | ||
819 | |||
820 | if (stream->dec_current1.type != XD3_NOOP) | ||
821 | { | ||
822 | size += stream->dec_current1.size; | ||
823 | if (stream->dec_current1.type >= XD3_CPY) | ||
824 | { | ||
825 | VC(OUT " @%-6u", stream->dec_current1.addr); | ||
826 | } | ||
827 | else | ||
828 | { | ||
829 | VC(OUT " "); | ||
830 | } | ||
831 | } | ||
832 | |||
833 | if (stream->dec_current2.type != XD3_NOOP) | ||
834 | { | ||
835 | size += stream->dec_current2.size; | ||
836 | VC(OUT " %s %3u", | ||
837 | xd3_rtype_to_string (stream->dec_current2.type, option_print_cpymode), | ||
838 | stream->dec_current2.size); | ||
839 | |||
840 | if (stream->dec_current2.type >= XD3_CPY) | ||
841 | { | ||
842 | VC(OUT " @%-6u", stream->dec_current2.addr); | ||
843 | } | ||
844 | } | ||
845 | |||
846 | VC(OUT "\n"); | ||
847 | } | ||
848 | |||
849 | if (stream->dec_tgtlen != size && (stream->flags & XD3_SKIP_WINDOW) == 0) | ||
850 | { | ||
851 | XPR(NT "target window size inconsistency"); | ||
852 | return EINVAL; | ||
853 | } | ||
854 | |||
855 | if (stream->dec_position != stream->dec_maxpos) | ||
856 | { | ||
857 | XPR(NT "target window position inconsistency"); | ||
858 | return EINVAL; | ||
859 | } | ||
860 | |||
861 | if (stream->addr_sect.buf != stream->addr_sect.buf_max) | ||
862 | { | ||
863 | XPR(NT "address section inconsistency"); | ||
864 | return EINVAL; | ||
865 | } | ||
866 | |||
867 | IF_DEBUG (VC(OUT "SIZE=%u TGTLEN=%u\n", size, stream->dec_tgtlen)); | ||
868 | |||
869 | return 0; | ||
870 | } | ||
871 | |||
872 | static void | ||
873 | main_print_vcdiff_file (main_file *file, const char *type, FILE *vcout) | ||
874 | { | ||
875 | if (file->filename) { VC(OUT "XDELTA filename (%s): %s\n", type, file->filename); } | ||
876 | if (file->compressor) { VC(OUT "XDELTA ext comp (%s): %s\n", type, file->compressor->recomp_cmdname); } | ||
877 | } | ||
878 | |||
879 | /* This function prints a VCDIFF input, mainly for debugging purposes. */ | ||
880 | static int | ||
881 | main_print_func (xd3_stream* stream, main_file *xfile) | ||
882 | { | ||
883 | int ret; | ||
884 | FILE *vcout; | ||
885 | #if XD3_POSIX | ||
886 | if (! (vcout = fdopen (dup(xfile->file), "w"))) | ||
887 | { | ||
888 | ret = get_errno (); | ||
889 | XPR(NT "fdopen: %s: %s\n", xfile->filename, xd3_strerror (ret)); | ||
890 | return ret; | ||
891 | } | ||
892 | #elif XD3_STDIO | ||
893 | vcout = xfile->file; | ||
894 | #endif | ||
895 | XD3_ASSERT (vcout); | ||
896 | if (stream->dec_winstart == 0) | ||
897 | { | ||
898 | VC(OUT "VCDIFF version: 0\n"); | ||
899 | |||
900 | VC(OUT "VCDIFF header size: %d\n", stream->dec_hdrsize); | ||
901 | VC(OUT "VCDIFF header indicator: "); | ||
902 | if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0) VC(OUT "VCD_SECONDARY "); | ||
903 | if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) VC(OUT "VCD_CODETABLE "); | ||
904 | if ((stream->dec_hdr_ind & VCD_APPHEADER) != 0) VC(OUT "VCD_APPHEADER "); | ||
905 | if (stream->dec_hdr_ind == 0) VC(OUT "none"); | ||
906 | VC(OUT "\n"); | ||
907 | |||
908 | IF_SEC(VC(OUT "VCDIFF secondary compressor: %s\n", stream->sec_type ? stream->sec_type->name : "none")); | ||
909 | IF_NSEC(VC(OUT "VCDIFF secondary compressor: unsupported\n")); | ||
910 | |||
911 | if (stream->dec_hdr_ind & VCD_APPHEADER) | ||
912 | { | ||
913 | uint8_t *apphead; | ||
914 | usize_t appheadsz; | ||
915 | ret = xd3_get_appheader (stream, & apphead, & appheadsz); | ||
916 | |||
917 | if (ret == 0 && appheadsz > 0) | ||
918 | { | ||
919 | int sq = option_quiet; | ||
920 | main_file o, s; | ||
921 | XD3_ASSERT (apphead != NULL); | ||
922 | VC(OUT "VCDIFF application header: "); | ||
923 | fwrite (apphead, 1, appheadsz, vcout); | ||
924 | VC(OUT "\n"); | ||
925 | |||
926 | main_file_init (& o); | ||
927 | main_file_init (& s); | ||
928 | option_quiet = 1; | ||
929 | main_get_appheader (stream, & o, & s); | ||
930 | option_quiet = sq; | ||
931 | main_print_vcdiff_file (& o, "output", vcout); | ||
932 | main_print_vcdiff_file (& s, "source", vcout); | ||
933 | } | ||
934 | } | ||
935 | } | ||
936 | else | ||
937 | { | ||
938 | VC(OUT "\n"); | ||
939 | } | ||
940 | |||
941 | VC(OUT "VCDIFF window number: %"Q"u\n", stream->current_window); | ||
942 | VC(OUT "VCDIFF window indicator: "); | ||
943 | if ((stream->dec_win_ind & VCD_SOURCE) != 0) VC(OUT "VCD_SOURCE "); | ||
944 | if ((stream->dec_win_ind & VCD_TARGET) != 0) VC(OUT "VCD_TARGET "); | ||
945 | if ((stream->dec_win_ind & VCD_ADLER32) != 0) VC(OUT "VCD_ADLER32 "); | ||
946 | if (stream->dec_win_ind == 0) VC(OUT "none"); | ||
947 | VC(OUT "\n"); | ||
948 | |||
949 | if ((stream->dec_win_ind & VCD_ADLER32) != 0) | ||
950 | { | ||
951 | VC(OUT "VCDIFF adler32 checksum: %08X\n", stream->dec_adler32); | ||
952 | } | ||
953 | |||
954 | if (stream->dec_del_ind != 0) | ||
955 | { | ||
956 | VC(OUT "VCDIFF delta indicator: "); | ||
957 | if ((stream->dec_del_ind & VCD_DATACOMP) != 0) VC(OUT "VCD_DATACOMP "); | ||
958 | if ((stream->dec_del_ind & VCD_INSTCOMP) != 0) VC(OUT "VCD_INSTCOMP "); | ||
959 | if ((stream->dec_del_ind & VCD_ADDRCOMP) != 0) VC(OUT "VCD_ADDRCOMP "); | ||
960 | if (stream->dec_del_ind == 0) VC(OUT "none"); | ||
961 | VC(OUT "\n"); | ||
962 | } | ||
963 | |||
964 | if (stream->dec_winstart != 0) | ||
965 | { | ||
966 | VC(OUT "VCDIFF window at offset: %"Q"u\n", stream->dec_winstart); | ||
967 | } | ||
968 | |||
969 | if (SRCORTGT (stream->dec_win_ind)) | ||
970 | { | ||
971 | VC(OUT "VCDIFF copy window length: %u\n", stream->dec_cpylen); | ||
972 | VC(OUT "VCDIFF copy window offset: %"Q"u\n", stream->dec_cpyoff); | ||
973 | } | ||
974 | |||
975 | VC(OUT "VCDIFF delta encoding length: %u\n", stream->dec_enclen); | ||
976 | VC(OUT "VCDIFF target window length: %u\n", stream->dec_tgtlen); | ||
977 | |||
978 | VC(OUT "VCDIFF data section length: %u\n", stream->data_sect.size); | ||
979 | VC(OUT "VCDIFF inst section length: %u\n", stream->inst_sect.size); | ||
980 | VC(OUT "VCDIFF addr section length: %u\n", stream->addr_sect.size); | ||
981 | |||
982 | ret = 0; | ||
983 | if ((stream->flags & XD3_JUST_HDR) != 0) | ||
984 | { | ||
985 | /* Print a header -- finished! */ | ||
986 | ret = PRINTHDR_SPECIAL; | ||
987 | } | ||
988 | else if ((stream->flags & XD3_SKIP_WINDOW) == 0) | ||
989 | { | ||
990 | ret = main_print_window (stream, vcout); | ||
991 | } | ||
992 | |||
993 | fclose (vcout); | ||
994 | return ret; | ||
995 | } | ||
996 | #endif /* VCDIFF_TOOLS */ | ||
997 | |||
998 | /****************************************************************************************** | ||
999 | Input decompression, output recompression | ||
1000 | ******************************************************************************************/ | ||
1001 | |||
1002 | #if EXTERNAL_COMPRESSION | ||
1003 | /* This is tricky POSIX-specific code with lots of fork(), pipe(), dup(), waitpid(), and | ||
1004 | * exec() business. Most of this code originated in PRCS1, which did automatic | ||
1005 | * package-file decompression. It works with both XD3_POSIX and XD3_STDIO file | ||
1006 | * disciplines. | ||
1007 | * | ||
1008 | * To automatically detect compressed inputs requires a child process to reconstruct the | ||
1009 | * input stream, which was advanced in order to detect compression, because it may not be | ||
1010 | * seekable. In other words, the main program reads part of the input stream, and if it | ||
1011 | * detects a compressed input it then forks a pipe copier process, which copies the | ||
1012 | * first-read block out of the main-program's memory, then streams the remaining | ||
1013 | * compressed input into the input-decompression pipe. | ||
1014 | */ | ||
1015 | |||
1016 | #include <unistd.h> | ||
1017 | #include <sys/stat.h> | ||
1018 | #include <sys/wait.h> | ||
1019 | |||
1020 | /* Remember which pipe FD is which. */ | ||
1021 | #define PIPE_READ_FD 0 | ||
1022 | #define PIPE_WRITE_FD 1 | ||
1023 | |||
1024 | static pid_t ext_subprocs[2]; | ||
1025 | static const char* ext_tmpfile = NULL; | ||
1026 | |||
1027 | /* Like write(), but makes repeated calls to empty the buffer. */ | ||
1028 | static int | ||
1029 | main_pipe_write (int outfd, const uint8_t *exist_buf, usize_t remain) | ||
1030 | { | ||
1031 | int ret; | ||
1032 | |||
1033 | if ((ret = xd3_posix_io (outfd, (uint8_t*) exist_buf, remain, (xd3_posix_func*) &write, NULL))) | ||
1034 | { | ||
1035 | XPR(NT "pipe write failed: %s", xd3_strerror (ret)); | ||
1036 | return ret; | ||
1037 | } | ||
1038 | |||
1039 | return 0; | ||
1040 | } | ||
1041 | |||
1042 | /* A simple error-reporting waitpid interface. */ | ||
1043 | static int | ||
1044 | main_waitpid_check(pid_t pid) | ||
1045 | { | ||
1046 | int status; | ||
1047 | int ret = 0; | ||
1048 | |||
1049 | if (waitpid (pid, & status, 0) < 0) | ||
1050 | { | ||
1051 | ret = get_errno (); | ||
1052 | XPR(NT "compression subprocess: wait: %s\n", xd3_strerror (ret)); | ||
1053 | } | ||
1054 | else if (! WIFEXITED (status)) | ||
1055 | { | ||
1056 | ret = ECHILD; | ||
1057 | XPR(NT "compression subprocess: signal %d\n", | ||
1058 | WIFSIGNALED (status) ? WTERMSIG (status) : WSTOPSIG (status)); | ||
1059 | } | ||
1060 | else if (WEXITSTATUS (status) != 0) | ||
1061 | { | ||
1062 | ret = ECHILD; | ||
1063 | XPR(NT "compression subprocess: exit %d\n", WEXITSTATUS (status)); | ||
1064 | } | ||
1065 | |||
1066 | return ret; | ||
1067 | } | ||
1068 | |||
1069 | /* Wait for any existing child processes to check for abnormal exit. */ | ||
1070 | static int | ||
1071 | main_external_compression_finish (void) | ||
1072 | { | ||
1073 | int i; | ||
1074 | int ret; | ||
1075 | |||
1076 | for (i = 0; i < 2; i += 1) | ||
1077 | { | ||
1078 | if (! ext_subprocs[i]) { continue; } | ||
1079 | |||
1080 | if ((ret = main_waitpid_check (ext_subprocs[i]))) | ||
1081 | { | ||
1082 | return ret; | ||
1083 | } | ||
1084 | } | ||
1085 | |||
1086 | return 0; | ||
1087 | } | ||
1088 | |||
1089 | /* This runs as a forked process of main_input_decompress_setup() to copy input to the | ||
1090 | * decompression process. First, the available input is copied out of the existing | ||
1091 | * buffer, then the buffer is reused to continue reading from the compressed input | ||
1092 | * file. */ | ||
1093 | static int | ||
1094 | main_pipe_copier (uint8_t *pipe_buf, | ||
1095 | usize_t pipe_bufsize, | ||
1096 | usize_t nread, | ||
1097 | main_file *ifile, | ||
1098 | int outfd) | ||
1099 | { | ||
1100 | int ret; | ||
1101 | |||
1102 | for (;;) | ||
1103 | { | ||
1104 | if (nread > 0 && (ret = main_pipe_write (outfd, pipe_buf, nread))) | ||
1105 | { | ||
1106 | return ret; | ||
1107 | } | ||
1108 | |||
1109 | if (nread < pipe_bufsize) | ||
1110 | { | ||
1111 | break; | ||
1112 | } | ||
1113 | |||
1114 | if ((ret = main_file_read (ifile, pipe_buf, pipe_bufsize, & nread, "pipe read failed")) < 0) | ||
1115 | { | ||
1116 | return ret; | ||
1117 | } | ||
1118 | } | ||
1119 | |||
1120 | return 0; | ||
1121 | } | ||
1122 | |||
1123 | /* This function is called after we have read some amount of data from the input file and | ||
1124 | * detected a compressed input. Here we start a decompression subprocess by forking | ||
1125 | * twice. The first process runs the decompression command, the second process copies | ||
1126 | * data to the input of the first. */ | ||
1127 | static int | ||
1128 | main_input_decompress_setup (const main_extcomp *decomp, | ||
1129 | main_file *ifile, | ||
1130 | uint8_t *input_buf, | ||
1131 | usize_t input_bufsize, | ||
1132 | uint8_t *pipe_buf, | ||
1133 | usize_t pipe_bufsize, | ||
1134 | usize_t pipe_avail, | ||
1135 | usize_t *nread) | ||
1136 | { | ||
1137 | int outpipefd[2], inpipefd[2]; /* The two pipes: input and output file descriptors. */ | ||
1138 | int input_fd = -1; /* The resulting input_fd (output of decompression). */ | ||
1139 | pid_t decomp_id, copier_id; /* The two subprocs. */ | ||
1140 | int ret; | ||
1141 | |||
1142 | outpipefd[0] = outpipefd[1] = -1; | ||
1143 | inpipefd[0] = inpipefd[1] = -1; | ||
1144 | |||
1145 | if (pipe (outpipefd) || pipe (inpipefd)) | ||
1146 | { | ||
1147 | XPR(NT "pipe failed: %s\n", xd3_strerror (ret = get_errno ())); | ||
1148 | goto pipe_cleanup; | ||
1149 | } | ||
1150 | |||
1151 | if ((decomp_id = fork ()) < 0) | ||
1152 | { | ||
1153 | XPR(NT "fork failed: %s\n", xd3_strerror (ret = get_errno ())); | ||
1154 | goto pipe_cleanup; | ||
1155 | } | ||
1156 | |||
1157 | /* The first child runs the decompression process: */ | ||
1158 | if (decomp_id == 0) | ||
1159 | { | ||
1160 | /* Setup pipes: write to the outpipe, read from the inpipe. */ | ||
1161 | if (dup2 (outpipefd[PIPE_WRITE_FD], STDOUT_FILENO) < 0 || | ||
1162 | dup2 (inpipefd[PIPE_READ_FD], STDIN_FILENO) < 0 || | ||
1163 | close (outpipefd[PIPE_READ_FD]) || | ||
1164 | close (outpipefd[PIPE_WRITE_FD]) || | ||
1165 | close (inpipefd[PIPE_READ_FD]) || | ||
1166 | close (inpipefd[PIPE_WRITE_FD]) || | ||
1167 | execlp (decomp->decomp_cmdname, decomp->decomp_cmdname, decomp->decomp_options, NULL)) | ||
1168 | { | ||
1169 | XPR(NT "child process %s failed to execute: %s\n", decomp->decomp_cmdname, xd3_strerror (get_errno ())); | ||
1170 | } | ||
1171 | |||
1172 | _exit (127); | ||
1173 | } | ||
1174 | |||
1175 | ext_subprocs[0] = decomp_id; | ||
1176 | |||
1177 | if ((copier_id = fork ()) < 0) | ||
1178 | { | ||
1179 | XPR(NT "fork failed: %s\n", xd3_strerror (ret = get_errno ())); | ||
1180 | goto pipe_cleanup; | ||
1181 | } | ||
1182 | |||
1183 | /* The second child runs the copier process: */ | ||
1184 | if (copier_id == 0) | ||
1185 | { | ||
1186 | int exitval = 0; | ||
1187 | |||
1188 | if (close (inpipefd[PIPE_READ_FD]) || | ||
1189 | main_pipe_copier (pipe_buf, pipe_bufsize, pipe_avail, ifile, inpipefd[PIPE_WRITE_FD]) || | ||
1190 | close (inpipefd[PIPE_WRITE_FD])) | ||
1191 | { | ||
1192 | XPR(NT "child copier process failed: %s\n", xd3_strerror (get_errno ())); | ||
1193 | exitval = 1; | ||
1194 | } | ||
1195 | |||
1196 | _exit (exitval); | ||
1197 | } | ||
1198 | |||
1199 | ext_subprocs[1] = copier_id; | ||
1200 | |||
1201 | /* The parent closes both pipes after duplicating the output of compression. */ | ||
1202 | input_fd = dup (outpipefd[PIPE_READ_FD]); | ||
1203 | |||
1204 | if (input_fd < 0 || | ||
1205 | main_file_close (ifile) || | ||
1206 | close (outpipefd[PIPE_READ_FD]) || | ||
1207 | close (outpipefd[PIPE_WRITE_FD]) || | ||
1208 | close (inpipefd[PIPE_READ_FD]) || | ||
1209 | close (inpipefd[PIPE_WRITE_FD])) | ||
1210 | { | ||
1211 | XPR(NT "dup/close failed: %s\n", xd3_strerror (ret = get_errno ())); | ||
1212 | goto pipe_cleanup; | ||
1213 | } | ||
1214 | |||
1215 | #if XD3_STDIO | ||
1216 | /* Note: fdopen() acquires the fd, closes it when finished. */ | ||
1217 | if ((ifile->file = fdopen (input_fd, "r")) == NULL) | ||
1218 | { | ||
1219 | XPR(NT "fdopen failed: %s\n", xd3_strerror (ret = get_errno ())); | ||
1220 | goto pipe_cleanup; | ||
1221 | } | ||
1222 | |||
1223 | #elif XD3_POSIX | ||
1224 | ifile->file = input_fd; | ||
1225 | #endif | ||
1226 | |||
1227 | ifile->compressor = decomp; | ||
1228 | |||
1229 | /* Now the input file is decompressed. */ | ||
1230 | return main_file_read (ifile, input_buf, input_bufsize, nread, "input decompression failed"); | ||
1231 | |||
1232 | pipe_cleanup: | ||
1233 | close (input_fd); | ||
1234 | close (outpipefd[PIPE_READ_FD]); | ||
1235 | close (outpipefd[PIPE_WRITE_FD]); | ||
1236 | close (inpipefd[PIPE_READ_FD]); | ||
1237 | close (inpipefd[PIPE_WRITE_FD]); | ||
1238 | return ret; | ||
1239 | } | ||
1240 | |||
1241 | |||
1242 | /* This routine is called when the first buffer of input data is read by the main program | ||
1243 | * (unless input decompression is disabled by command-line option). If it recognizes the | ||
1244 | * magic number of a known input type it invokes decompression. | ||
1245 | * | ||
1246 | * Skips decompression if the decompression type or the file type is RD_NONEXTERNAL. | ||
1247 | * | ||
1248 | * Behaves exactly like main_file_read, otherwise. | ||
1249 | * | ||
1250 | * This function uses a separate buffer to read the first small block of input. If a | ||
1251 | * compressed input is detected, the separate buffer is passed to the pipe copier. This | ||
1252 | * avoids using the same size buffer in both cases. */ | ||
1253 | static int | ||
1254 | main_decompress_input_check (main_file *ifile, | ||
1255 | uint8_t *input_buf, | ||
1256 | usize_t input_size, | ||
1257 | usize_t *nread) | ||
1258 | { | ||
1259 | int i; | ||
1260 | int ret; | ||
1261 | uint8_t check_buf[PIPE_BUFSIZE]; | ||
1262 | usize_t check_nread; | ||
1263 | |||
1264 | if ((ret = main_file_read (ifile, check_buf, min (input_size, PIPE_BUFSIZE), & check_nread, "input read failed"))) | ||
1265 | { | ||
1266 | return ret; | ||
1267 | } | ||
1268 | |||
1269 | for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1) | ||
1270 | { | ||
1271 | const main_extcomp *decomp = & extcomp_types[i]; | ||
1272 | |||
1273 | if ((check_nread > decomp->magic_size) && | ||
1274 | /* The following expr skips decompression if we are trying to read a VCDIFF | ||
1275 | * input and that is the magic number. */ | ||
1276 | !((decomp->flags & RD_NONEXTERNAL) && (ifile->flags & RD_NONEXTERNAL)) && | ||
1277 | memcmp (check_buf, decomp->magic, decomp->magic_size) == 0) | ||
1278 | { | ||
1279 | if (! option_quiet) | ||
1280 | { | ||
1281 | XPR(NT "%s | %s %s\n", | ||
1282 | ifile->filename, | ||
1283 | decomp->decomp_cmdname, | ||
1284 | decomp->decomp_options); | ||
1285 | } | ||
1286 | |||
1287 | return main_input_decompress_setup (decomp, ifile, | ||
1288 | input_buf, input_size, | ||
1289 | check_buf, PIPE_BUFSIZE, | ||
1290 | check_nread, nread); | ||
1291 | } | ||
1292 | } | ||
1293 | |||
1294 | /* Now read the rest of the input block. */ | ||
1295 | (*nread) = 0; | ||
1296 | |||
1297 | if (check_nread == PIPE_BUFSIZE) | ||
1298 | { | ||
1299 | ret = main_file_read (ifile, input_buf + PIPE_BUFSIZE, input_size - PIPE_BUFSIZE, nread, "input read failed"); | ||
1300 | } | ||
1301 | |||
1302 | memcpy (input_buf, check_buf, check_nread); | ||
1303 | |||
1304 | (*nread) += check_nread; | ||
1305 | |||
1306 | return 0; | ||
1307 | } | ||
1308 | |||
1309 | /* This is called when the source file needs to be decompressed. We fork/exec a | ||
1310 | * decompression command with the proper input and output to a temporary file. */ | ||
1311 | static int | ||
1312 | main_decompress_source (main_file *sfile, xd3_source *source) | ||
1313 | { | ||
1314 | const main_extcomp *decomp = sfile->compressor; | ||
1315 | pid_t decomp_id; /* One subproc. */ | ||
1316 | int input_fd = -1; | ||
1317 | int output_fd = -1; | ||
1318 | int ret; | ||
1319 | char *tmpname = NULL; | ||
1320 | char *tmpdir = getenv ("TMPDIR"); | ||
1321 | static const char tmpl[] = "/xd3src.XXXXXX"; | ||
1322 | |||
1323 | /* Make a template for mkstmp() */ | ||
1324 | if (tmpdir == NULL) { tmpdir = "/tmp"; } | ||
1325 | if ((tmpname = main_malloc (strlen (tmpdir) + sizeof (tmpl) + 1)) == NULL) { return ENOMEM; } | ||
1326 | sprintf (tmpname, "%s%s", tmpdir, tmpl); | ||
1327 | |||
1328 | XD3_ASSERT (ext_tmpfile == NULL); | ||
1329 | ext_tmpfile = tmpname; | ||
1330 | |||
1331 | /* Open the output FD. */ | ||
1332 | if ((output_fd = mkstemp (tmpname)) < 0) | ||
1333 | { | ||
1334 | XPR(NT "mkstemp failed: %s: %s", tmpname, xd3_strerror (ret = get_errno ())); | ||
1335 | goto cleanup; | ||
1336 | } | ||
1337 | |||
1338 | /* Copy the input FD, reset file position. */ | ||
1339 | XD3_ASSERT (main_file_isopen (sfile)); | ||
1340 | #if XD3_STDIO | ||
1341 | if ((input_fd = dup (fileno (sfile->file))) < 0) | ||
1342 | { | ||
1343 | XPR(NT "dup failed: %s", xd3_strerror (ret = get_errno ())); | ||
1344 | goto cleanup; | ||
1345 | } | ||
1346 | main_file_close (sfile); | ||
1347 | sfile->file = NULL; | ||
1348 | #elif XD3_POSIX | ||
1349 | input_fd = sfile->file; | ||
1350 | sfile->file = -1; | ||
1351 | #endif | ||
1352 | |||
1353 | if ((ret = lseek (input_fd, SEEK_SET, 0)) != 0) | ||
1354 | { | ||
1355 | XPR(NT "lseek failed: : %s", xd3_strerror (ret = get_errno ())); | ||
1356 | goto cleanup; | ||
1357 | } | ||
1358 | |||
1359 | if ((decomp_id = fork ()) < 0) | ||
1360 | { | ||
1361 | XPR(NT "fork failed: %s", xd3_strerror (ret = get_errno ())); | ||
1362 | goto cleanup; | ||
1363 | } | ||
1364 | |||
1365 | /* The child runs the decompression process: */ | ||
1366 | if (decomp_id == 0) | ||
1367 | { | ||
1368 | /* Setup pipes: write to the output file, read from the pipe. */ | ||
1369 | if (dup2 (input_fd, STDIN_FILENO) < 0 || | ||
1370 | dup2 (output_fd, STDOUT_FILENO) < 0 || | ||
1371 | execlp (decomp->decomp_cmdname, decomp->decomp_cmdname, decomp->decomp_options, NULL)) | ||
1372 | { | ||
1373 | XPR(NT "child process %s failed to execute: %s\n", | ||
1374 | decomp->decomp_cmdname, xd3_strerror (get_errno ())); | ||
1375 | } | ||
1376 | |||
1377 | _exit (127); | ||
1378 | } | ||
1379 | |||
1380 | close (input_fd); | ||
1381 | close (output_fd); | ||
1382 | input_fd = -1; | ||
1383 | output_fd = -1; | ||
1384 | |||
1385 | /* Then wait for completion. */ | ||
1386 | if ((ret = main_waitpid_check (decomp_id))) | ||
1387 | { | ||
1388 | goto cleanup; | ||
1389 | } | ||
1390 | |||
1391 | /* Open/stat the decompressed source file. */ | ||
1392 | if ((ret = main_file_open (sfile, tmpname, XO_READ))) { goto cleanup; } | ||
1393 | if ((ret = main_file_stat (sfile, & source->size, 1))) { goto cleanup; } | ||
1394 | return 0; | ||
1395 | |||
1396 | cleanup: | ||
1397 | close (input_fd); | ||
1398 | close (output_fd); | ||
1399 | if (tmpname) { free (tmpname); } | ||
1400 | ext_tmpfile = NULL; | ||
1401 | return ret; | ||
1402 | } | ||
1403 | |||
1404 | /* Initiate re-compression of the output stream. This is easier than input decompression | ||
1405 | * because we know beforehand that the stream will be compressed, whereas the input has | ||
1406 | * already been read when we decide it should be decompressed. Thus, it only requires one | ||
1407 | * subprocess and one pipe. */ | ||
1408 | static int | ||
1409 | main_recompress_output (main_file *ofile) | ||
1410 | { | ||
1411 | pid_t recomp_id; /* One subproc. */ | ||
1412 | int pipefd[2]; /* One pipe. */ | ||
1413 | int output_fd = -1; | ||
1414 | int ret; | ||
1415 | const main_extcomp *recomp = ofile->compressor; | ||
1416 | |||
1417 | pipefd[0] = pipefd[1] = -1; | ||
1418 | |||
1419 | if (pipe (pipefd)) | ||
1420 | { | ||
1421 | XPR(NT "pipe failed: %s\n", xd3_strerror (ret = get_errno ())); | ||
1422 | goto pipe_cleanup; | ||
1423 | } | ||
1424 | |||
1425 | if ((recomp_id = fork ()) < 0) | ||
1426 | { | ||
1427 | XPR(NT "fork failed: %s\n", xd3_strerror (ret = get_errno ())); | ||
1428 | goto pipe_cleanup; | ||
1429 | } | ||
1430 | |||
1431 | /* The child runs the recompression process: */ | ||
1432 | if (recomp_id == 0) | ||
1433 | { | ||
1434 | /* Setup pipes: write to the output file, read from the pipe. */ | ||
1435 | if (dup2 (XFNO (ofile), STDOUT_FILENO) < 0 || | ||
1436 | dup2 (pipefd[PIPE_READ_FD], STDIN_FILENO) < 0 || | ||
1437 | close (pipefd[PIPE_READ_FD]) || | ||
1438 | close (pipefd[PIPE_WRITE_FD]) || | ||
1439 | execlp (recomp->recomp_cmdname, recomp->recomp_cmdname, recomp->recomp_options, NULL)) | ||
1440 | { | ||
1441 | XPR(NT "child process %s failed to execute: %s\n", recomp->recomp_cmdname, xd3_strerror (get_errno ())); | ||
1442 | } | ||
1443 | |||
1444 | _exit (127); | ||
1445 | } | ||
1446 | |||
1447 | ext_subprocs[0] = recomp_id; | ||
1448 | |||
1449 | /* The parent closes both pipes after duplicating the output-fd for writing to the | ||
1450 | * compression pipe. */ | ||
1451 | output_fd = dup (pipefd[PIPE_WRITE_FD]); | ||
1452 | |||
1453 | if (output_fd < 0 || | ||
1454 | main_file_close (ofile) || | ||
1455 | close (pipefd[PIPE_READ_FD]) || | ||
1456 | close (pipefd[PIPE_WRITE_FD])) | ||
1457 | { | ||
1458 | XPR(NT "close failed: %s\n", xd3_strerror (ret = get_errno ())); | ||
1459 | goto pipe_cleanup; | ||
1460 | } | ||
1461 | |||
1462 | #if XD3_STDIO | ||
1463 | /* Note: fdopen() acquires the fd, closes it when finished. */ | ||
1464 | if ((ofile->file = fdopen (output_fd, "w")) == NULL) | ||
1465 | { | ||
1466 | XPR(NT "fdopen failed: %s\n", xd3_strerror (ret = get_errno ())); | ||
1467 | goto pipe_cleanup; | ||
1468 | } | ||
1469 | |||
1470 | #elif XD3_POSIX | ||
1471 | ofile->file = output_fd; | ||
1472 | #endif | ||
1473 | |||
1474 | /* Now the output file will be compressed. */ | ||
1475 | return 0; | ||
1476 | |||
1477 | pipe_cleanup: | ||
1478 | close (output_fd); | ||
1479 | close (pipefd[PIPE_READ_FD]); | ||
1480 | close (pipefd[PIPE_WRITE_FD]); | ||
1481 | return ret; | ||
1482 | } | ||
1483 | #endif /* EXTERNAL_COMPRESSION */ | ||
1484 | |||
1485 | /* Identify the compressor that was used based on its ident string, which is passed in the | ||
1486 | * application header. */ | ||
1487 | static const main_extcomp* | ||
1488 | main_ident_compressor (const char *ident) | ||
1489 | { | ||
1490 | int i; | ||
1491 | |||
1492 | for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1) | ||
1493 | { | ||
1494 | if (strcmp (extcomp_types[i].ident, ident) == 0) | ||
1495 | { | ||
1496 | return & extcomp_types[i]; | ||
1497 | } | ||
1498 | } | ||
1499 | |||
1500 | return NULL; | ||
1501 | } | ||
1502 | |||
1503 | /* Return the main_extcomp record to use for this identifier, if possible. */ | ||
1504 | static const main_extcomp* | ||
1505 | main_get_compressor (const char *ident) | ||
1506 | { | ||
1507 | const main_extcomp *ext = main_ident_compressor (ident); | ||
1508 | |||
1509 | if (ext == NULL) | ||
1510 | { | ||
1511 | if (! option_quiet) | ||
1512 | { | ||
1513 | XPR(NT "warning: cannot recompress output: " | ||
1514 | "unrecognized external compression ID: %s\n", ident); | ||
1515 | } | ||
1516 | return NULL; | ||
1517 | } | ||
1518 | else if (! EXTERNAL_COMPRESSION) | ||
1519 | { | ||
1520 | if (! option_quiet) | ||
1521 | { | ||
1522 | XPR(NT "warning: external support not compiled: " | ||
1523 | "original input was compressed: %s\n", ext->recomp_cmdname); | ||
1524 | } | ||
1525 | return NULL; | ||
1526 | } | ||
1527 | else | ||
1528 | { | ||
1529 | return ext; | ||
1530 | } | ||
1531 | } | ||
1532 | |||
1533 | /****************************************************************************************** | ||
1534 | APPLICATION HEADER | ||
1535 | ******************************************************************************************/ | ||
1536 | |||
1537 | #if XD3_ENCODER | ||
1538 | static const char* | ||
1539 | main_apphead_string (const char* x) | ||
1540 | { | ||
1541 | const char *y; | ||
1542 | |||
1543 | if (x == NULL) { return ""; } | ||
1544 | |||
1545 | if (strcmp (x, "/dev/stdin") == 0 || | ||
1546 | strcmp (x, "/dev/stdout") == 0 || | ||
1547 | strcmp (x, "/dev/stderr") == 0) { return "-"; } | ||
1548 | |||
1549 | return (y = strrchr (x, '/')) == NULL ? x : y + 1; | ||
1550 | } | ||
1551 | |||
1552 | static int | ||
1553 | main_set_appheader (xd3_stream *stream, main_file *input, main_file *sfile) | ||
1554 | { | ||
1555 | /* The user may disable the application header. Once the appheader is set, this | ||
1556 | * disables setting it again. */ | ||
1557 | if (appheader_used || ! option_use_appheader) { return 0; } | ||
1558 | |||
1559 | /* The user may specify the application header, otherwise format the default header. */ | ||
1560 | if (option_appheader) | ||
1561 | { | ||
1562 | appheader_used = option_appheader; | ||
1563 | } | ||
1564 | else | ||
1565 | { | ||
1566 | const char *iname; | ||
1567 | const char *icomp; | ||
1568 | const char *sname; | ||
1569 | const char *scomp; | ||
1570 | int len; | ||
1571 | |||
1572 | iname = main_apphead_string (input->filename); | ||
1573 | icomp = (input->compressor == NULL) ? "" : input->compressor->ident; | ||
1574 | len = strlen (iname) + strlen (icomp) + 2; | ||
1575 | |||
1576 | if (sfile->filename != NULL) | ||
1577 | { | ||
1578 | sname = main_apphead_string (sfile->filename); | ||
1579 | scomp = (sfile->compressor == NULL) ? "" : sfile->compressor->ident; | ||
1580 | len += strlen (sname) + strlen (scomp) + 2; | ||
1581 | } | ||
1582 | else | ||
1583 | { | ||
1584 | sname = scomp = ""; | ||
1585 | } | ||
1586 | |||
1587 | if ((appheader_used = main_malloc (len)) == NULL) | ||
1588 | { | ||
1589 | return ENOMEM; | ||
1590 | } | ||
1591 | |||
1592 | if (sfile->filename == NULL) | ||
1593 | { | ||
1594 | sprintf ((char*)appheader_used, "%s/%s", iname, icomp); | ||
1595 | } | ||
1596 | else | ||
1597 | { | ||
1598 | sprintf ((char*)appheader_used, "%s/%s/%s/%s", iname, icomp, sname, scomp); | ||
1599 | } | ||
1600 | } | ||
1601 | |||
1602 | xd3_set_appheader (stream, appheader_used, strlen ((char*)appheader_used)); | ||
1603 | |||
1604 | return 0; | ||
1605 | } | ||
1606 | #endif | ||
1607 | |||
1608 | static void | ||
1609 | main_get_appheader_params (main_file *file, char **parsed, int output, const char *type) | ||
1610 | { | ||
1611 | /* Set the filename if it was not specified. If output, option_stdout (-c) overrides. */ | ||
1612 | if (file->filename == NULL && ! (output && option_stdout) && strcmp (parsed[0], "-") != 0) | ||
1613 | { | ||
1614 | file->filename = parsed[0]; | ||
1615 | |||
1616 | if (! option_quiet) | ||
1617 | { | ||
1618 | XPR(NT "using default %s filename: %s\n", type, file->filename); | ||
1619 | } | ||
1620 | } | ||
1621 | |||
1622 | /* Set the compressor, initiate de/recompression later. */ | ||
1623 | if (file->compressor == NULL && *parsed[1] != 0) | ||
1624 | { | ||
1625 | file->compressor = main_get_compressor (parsed[1]); | ||
1626 | } | ||
1627 | } | ||
1628 | |||
1629 | static void | ||
1630 | main_get_appheader (xd3_stream *stream, main_file *output, main_file *sfile) | ||
1631 | { | ||
1632 | uint8_t *apphead; | ||
1633 | usize_t appheadsz; | ||
1634 | int ret; | ||
1635 | |||
1636 | /* The user may disable the application header. Once the appheader is set, this | ||
1637 | * disables setting it again. */ | ||
1638 | if (! option_use_appheader) { return; } | ||
1639 | |||
1640 | ret = xd3_get_appheader (stream, & apphead, & appheadsz); | ||
1641 | |||
1642 | /* Ignore failure, it only means we haven't received a header yet. */ | ||
1643 | if (ret != 0) { return; } | ||
1644 | |||
1645 | if (appheadsz > 0) | ||
1646 | { | ||
1647 | char *start = (char*)apphead; | ||
1648 | char *slash; | ||
1649 | int place = 0; | ||
1650 | char *parsed[4]; | ||
1651 | |||
1652 | memset (parsed, 0, sizeof (parsed)); | ||
1653 | |||
1654 | while ((slash = strchr (start, '/')) != NULL) | ||
1655 | { | ||
1656 | *slash = 0; | ||
1657 | parsed[place++] = start; | ||
1658 | start = slash + 1; | ||
1659 | } | ||
1660 | |||
1661 | parsed[place++] = start; | ||
1662 | |||
1663 | /* First take the output parameters. */ | ||
1664 | if (place == 2 || place == 4) | ||
1665 | { | ||
1666 | main_get_appheader_params (output, parsed, 1, "output"); | ||
1667 | } | ||
1668 | |||
1669 | /* Then take the source parameters. */ | ||
1670 | if (place == 4) | ||
1671 | { | ||
1672 | main_get_appheader_params (sfile, parsed+2, 0, "source"); | ||
1673 | } | ||
1674 | } | ||
1675 | |||
1676 | option_use_appheader = 0; | ||
1677 | return; | ||
1678 | } | ||
1679 | |||
1680 | /****************************************************************************************** | ||
1681 | Main I/O routines | ||
1682 | ******************************************************************************************/ | ||
1683 | |||
1684 | /* This function acts like the above except it may also try to recognize a compressed | ||
1685 | * input when the first buffer of data is read. The EXTERNAL_COMPRESSION code is called | ||
1686 | * to search for magic numbers. */ | ||
1687 | static int | ||
1688 | main_read_primary_input (main_file *ifile, | ||
1689 | uint8_t *buf, | ||
1690 | usize_t size, | ||
1691 | usize_t *nread) | ||
1692 | { | ||
1693 | #if EXTERNAL_COMPRESSION | ||
1694 | if (option_decompress_inputs && ifile->flags & RD_FIRST) | ||
1695 | { | ||
1696 | ifile->flags &= ~RD_FIRST; | ||
1697 | |||
1698 | return main_decompress_input_check (ifile, buf, size, nread); | ||
1699 | } | ||
1700 | #endif | ||
1701 | |||
1702 | return main_file_read (ifile, buf, size, nread, "input read failed"); | ||
1703 | } | ||
1704 | |||
1705 | /* This function simply writes the stream output buffer, if there is any. This is used | ||
1706 | * for both encode and decode commands. (The VCDIFF tools use main_print_func()). */ | ||
1707 | static int | ||
1708 | main_write_output (xd3_stream* stream, main_file *ofile) | ||
1709 | { | ||
1710 | int ret; | ||
1711 | |||
1712 | if (stream->avail_out > 0 && (ret = main_file_write (ofile, stream->next_out, stream->avail_out, "write failed"))) | ||
1713 | { | ||
1714 | return ret; | ||
1715 | } | ||
1716 | |||
1717 | return 0; | ||
1718 | } | ||
1719 | |||
1720 | /* Open the main output file, sets a default file name, initiate recompression. This | ||
1721 | * function is expected to fprint any error messages. */ | ||
1722 | static int | ||
1723 | main_open_output (xd3_stream *stream, main_file *ofile) | ||
1724 | { | ||
1725 | int ret; | ||
1726 | |||
1727 | if (ofile->filename == NULL) | ||
1728 | { | ||
1729 | XSTDOUT_XF (ofile); | ||
1730 | |||
1731 | if (option_verbose > 1) { XPR(NT "using standard output: %s\n", ofile->filename); } | ||
1732 | } | ||
1733 | else | ||
1734 | { | ||
1735 | /* Stat the file to check for overwrite. */ | ||
1736 | if (option_force == 0 && main_file_exists (ofile)) | ||
1737 | { | ||
1738 | XPR(NT "to overwrite output file specify -f: %s\n", ofile->filename); | ||
1739 | return EEXIST; | ||
1740 | } | ||
1741 | |||
1742 | if ((ret = main_file_open (ofile, ofile->filename, XO_WRITE))) | ||
1743 | { | ||
1744 | return ret; | ||
1745 | } | ||
1746 | |||
1747 | if (option_verbose > 1) { XPR(NT "open output: %s\n", ofile->filename); } | ||
1748 | } | ||
1749 | |||
1750 | #if EXTERNAL_COMPRESSION | ||
1751 | /* Do output recompression. */ | ||
1752 | if (ofile->compressor != NULL && option_recompress_outputs == 1) | ||
1753 | { | ||
1754 | if (! option_quiet) | ||
1755 | { | ||
1756 | XPR(NT "%s %s | %s\n", | ||
1757 | ofile->compressor->recomp_cmdname, | ||
1758 | ofile->compressor->recomp_options, | ||
1759 | ofile->filename); | ||
1760 | } | ||
1761 | |||
1762 | if ((ret = main_recompress_output (ofile))) | ||
1763 | { | ||
1764 | return ret; | ||
1765 | } | ||
1766 | } | ||
1767 | #endif | ||
1768 | |||
1769 | return 0; | ||
1770 | } | ||
1771 | |||
1772 | /* This is called at different times for encoding and decoding. The encoder calls it | ||
1773 | * immediately, the decoder delays until the application header is received. */ | ||
1774 | static int | ||
1775 | main_set_source (xd3_stream *stream, int cmd, main_file *sfile, xd3_source *source) | ||
1776 | { | ||
1777 | int ret, i; | ||
1778 | |||
1779 | /* Open it, check for seekability, set required xd3_source fields. */ | ||
1780 | if (allow_fake_source) | ||
1781 | { | ||
1782 | sfile->mode = XO_READ; | ||
1783 | sfile->realname = sfile->filename; | ||
1784 | sfile->nread = 0; | ||
1785 | source->size = UINT64_MAX; | ||
1786 | } | ||
1787 | else if ((ret = main_file_open (sfile, sfile->filename, XO_READ)) || | ||
1788 | (ret = main_file_stat (sfile, & source->size, 1))) | ||
1789 | { | ||
1790 | return ret; | ||
1791 | } | ||
1792 | |||
1793 | source->name = sfile->filename; | ||
1794 | source->ioh = sfile; | ||
1795 | source->curblkno = (xoff_t) -1; | ||
1796 | source->curblk = NULL; | ||
1797 | |||
1798 | /* Source block LRU init. */ | ||
1799 | main_blklru_list_init (& lru_list); | ||
1800 | main_blklru_list_init (& lru_free); | ||
1801 | |||
1802 | option_srcwinsz = min(source->size, (xoff_t) option_srcwinsz); | ||
1803 | |||
1804 | if (option_verbose > 1) { XPR(NT "source window size: %u\n", option_srcwinsz); } | ||
1805 | if (option_verbose > 1) { XPR(NT "source block size: %u\n", source->blksize); } | ||
1806 | |||
1807 | lru_size = (option_srcwinsz / source->blksize) + 1; | ||
1808 | |||
1809 | XD3_ASSERT(lru_size <= 128); /* TODO: fix performance here */ | ||
1810 | |||
1811 | if ((lru = main_malloc (sizeof (main_blklru) * lru_size)) == NULL) | ||
1812 | { | ||
1813 | return ENOMEM; | ||
1814 | } | ||
1815 | |||
1816 | for (i = 0; i < lru_size; i += 1) | ||
1817 | { | ||
1818 | lru[i].blkno = (xoff_t) -1; | ||
1819 | |||
1820 | if ((lru[i].blk = main_malloc (source->blksize)) == NULL) | ||
1821 | { | ||
1822 | return ENOMEM; | ||
1823 | } | ||
1824 | |||
1825 | main_blklru_list_push_back (& lru_free, & lru[i]); | ||
1826 | } | ||
1827 | |||
1828 | #if EXTERNAL_COMPRESSION | ||
1829 | if (option_decompress_inputs) | ||
1830 | { | ||
1831 | if (IS_ENCODE (cmd)) | ||
1832 | { | ||
1833 | usize_t nread; | ||
1834 | |||
1835 | source->curblk = lru[0].blk; | ||
1836 | |||
1837 | /* If encoding, read the first block now to check for decompression. */ | ||
1838 | if ((ret = main_file_read (sfile, (uint8_t*) source->curblk, source->blksize, & nread, "source read failed"))) | ||
1839 | { | ||
1840 | return ret; | ||
1841 | } | ||
1842 | |||
1843 | /* Check known magic numbers. */ | ||
1844 | for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1) | ||
1845 | { | ||
1846 | const main_extcomp *decomp = & extcomp_types[i]; | ||
1847 | |||
1848 | if ((nread > decomp->magic_size) && memcmp (source->curblk, decomp->magic, decomp->magic_size) == 0) | ||
1849 | { | ||
1850 | sfile->compressor = decomp; | ||
1851 | break; | ||
1852 | } | ||
1853 | } | ||
1854 | |||
1855 | /* If no decompression, the current buffer is now a valid source->curblock. */ | ||
1856 | if (sfile->compressor == NULL) | ||
1857 | { | ||
1858 | main_blklru_list_remove (& lru[0]); | ||
1859 | main_blklru_list_push_back (& lru_list, & lru[0]); | ||
1860 | |||
1861 | lru[0].blkno = 0; | ||
1862 | source->curblkno = 0; | ||
1863 | source->onblk = nread; | ||
1864 | |||
1865 | if (option_verbose > 1) | ||
1866 | { | ||
1867 | XPR(NT "source block 0 read (not compressed)\n"); | ||
1868 | } | ||
1869 | } | ||
1870 | } | ||
1871 | |||
1872 | /* In either the encoder or decoder, start decompression. */ | ||
1873 | if (sfile->compressor) | ||
1874 | { | ||
1875 | xoff_t osize = source->size; | ||
1876 | |||
1877 | if (osize > XD3_NODECOMPRESSSIZE) | ||
1878 | { | ||
1879 | XPR(NT "source file too large for external decompression: %s: %"Q"u\n", | ||
1880 | sfile->filename, osize); | ||
1881 | return EFBIG; | ||
1882 | } | ||
1883 | |||
1884 | if ((ret = main_decompress_source (sfile, source))) | ||
1885 | { | ||
1886 | return ret; | ||
1887 | } | ||
1888 | |||
1889 | if (! option_quiet) | ||
1890 | { | ||
1891 | char s1[32], s2[32]; | ||
1892 | XPR(NT "%s | %s %s => %s %.1f%% [ %s , %s ]\n", | ||
1893 | sfile->filename, | ||
1894 | sfile->compressor->decomp_cmdname, | ||
1895 | sfile->compressor->decomp_options, | ||
1896 | sfile->realname, | ||
1897 | 100.0 * source->size / osize, | ||
1898 | main_format_bcnt (osize, s1), | ||
1899 | main_format_bcnt (source->size, s2)); | ||
1900 | } | ||
1901 | } | ||
1902 | } | ||
1903 | #endif | ||
1904 | |||
1905 | if (option_verbose > 1) { XPR(NT "source file: %s: %"Q"u bytes\n", sfile->realname, source->size); } | ||
1906 | |||
1907 | if ((ret = xd3_set_source (stream, source))) | ||
1908 | { | ||
1909 | XPR(NT XD3_LIB_ERRMSG (stream, ret)); | ||
1910 | return EXIT_FAILURE; | ||
1911 | } | ||
1912 | |||
1913 | return 0; | ||
1914 | } | ||
1915 | |||
1916 | /****************************************************************************************** | ||
1917 | Source routines | ||
1918 | ******************************************************************************************/ | ||
1919 | |||
1920 | /* This is the callback for reading a block of source. This function is blocking and it | ||
1921 | * implements a small LRU. | ||
1922 | * | ||
1923 | * Note that it is possible for main_input() to handle getblk requests in a non-blocking | ||
1924 | * manner. If the callback is NULL then the caller of xd3_*_input() must handle the | ||
1925 | * XD3_GETSRCBLK return value and fill the source in the same way. See xd3_getblk for | ||
1926 | * details. To see an example of non-blocking getblk, see xdelta-test.h. */ | ||
1927 | static int | ||
1928 | main_getblk_func (xd3_stream *stream, | ||
1929 | xd3_source *source, | ||
1930 | xoff_t blkno) | ||
1931 | { | ||
1932 | xoff_t pos = blkno * source->blksize; | ||
1933 | main_file *sfile = (main_file*) source->ioh; | ||
1934 | main_blklru *blru = NULL; | ||
1935 | usize_t onblk = xd3_bytes_on_srcblk (source, blkno); | ||
1936 | usize_t nread; | ||
1937 | int ret; | ||
1938 | int i; | ||
1939 | |||
1940 | if (allow_fake_source) | ||
1941 | { | ||
1942 | source->curblkno = blkno; | ||
1943 | source->onblk = onblk; | ||
1944 | source->curblk = lru[0].blk; | ||
1945 | return 0; | ||
1946 | } | ||
1947 | |||
1948 | if (do_not_lru) | ||
1949 | { | ||
1950 | /* Direct lookup assumes sequential scan w/o skipping blocks. */ | ||
1951 | int idx = blkno % lru_size; | ||
1952 | if (lru[idx].blkno == blkno) | ||
1953 | { | ||
1954 | source->curblkno = blkno; | ||
1955 | source->onblk = onblk; | ||
1956 | source->curblk = lru[idx].blk; | ||
1957 | lru_hits += 1; | ||
1958 | return 0; | ||
1959 | } | ||
1960 | XD3_ASSERT (lru[idx].blkno == -1LL || | ||
1961 | lru[idx].blkno == blkno - lru_size); | ||
1962 | } | ||
1963 | else | ||
1964 | { | ||
1965 | /* Sequential search through LRU. */ | ||
1966 | for (i = 0; i < lru_size; i += 1) | ||
1967 | { | ||
1968 | if (lru[i].blkno == blkno) | ||
1969 | { | ||
1970 | main_blklru_list_remove (& lru[i]); | ||
1971 | main_blklru_list_push_back (& lru_list, & lru[i]); | ||
1972 | |||
1973 | source->curblkno = blkno; | ||
1974 | source->onblk = onblk; | ||
1975 | source->curblk = lru[i].blk; | ||
1976 | lru_hits += 1; | ||
1977 | return 0; | ||
1978 | } | ||
1979 | } | ||
1980 | } | ||
1981 | |||
1982 | if (! main_blklru_list_empty (& lru_free)) | ||
1983 | { | ||
1984 | blru = main_blklru_list_pop_front (& lru_free); | ||
1985 | } | ||
1986 | else if (! main_blklru_list_empty (& lru_list)) | ||
1987 | { | ||
1988 | if (do_not_lru) { | ||
1989 | blru = & lru[blkno % lru_size]; | ||
1990 | main_blklru_list_remove(blru); | ||
1991 | } else { | ||
1992 | blru = main_blklru_list_pop_front (& lru_list); | ||
1993 | } | ||
1994 | lru_misses += 1; | ||
1995 | } | ||
1996 | |||
1997 | lru_filled += 1; | ||
1998 | |||
1999 | if ((ret = main_file_seek (sfile, pos))) | ||
2000 | { | ||
2001 | return ret; | ||
2002 | } | ||
2003 | |||
2004 | if ((ret = main_file_read (sfile, (uint8_t*) blru->blk, source->blksize, | ||
2005 | & nread, "source read failed"))) | ||
2006 | { | ||
2007 | return ret; | ||
2008 | } | ||
2009 | |||
2010 | if (nread != onblk) | ||
2011 | { | ||
2012 | XPR(NT "source file size change: %s\n", sfile->filename); | ||
2013 | return EINVAL; | ||
2014 | } | ||
2015 | |||
2016 | main_blklru_list_push_back (& lru_list, blru); | ||
2017 | |||
2018 | if (option_verbose > 1) | ||
2019 | { | ||
2020 | if (blru->blkno != -1LL) | ||
2021 | { | ||
2022 | XPR(NT "source block %"Q"u ejects %"Q"u (lru_hits=%u, lru_misses=%u, lru_filled=%u)\n", | ||
2023 | blkno, blru->blkno, lru_hits, lru_misses, lru_filled); | ||
2024 | } | ||
2025 | else | ||
2026 | { | ||
2027 | XPR(NT "source block %"Q"u read (lru_hits=%u, lru_misses=%u, lru_filled=%u)\n", | ||
2028 | blkno, lru_hits, lru_misses, lru_filled); | ||
2029 | } | ||
2030 | } | ||
2031 | |||
2032 | blru->blkno = blkno; | ||
2033 | source->curblk = blru->blk; | ||
2034 | source->curblkno = blkno; | ||
2035 | source->onblk = onblk; | ||
2036 | |||
2037 | return 0; | ||
2038 | } | ||
2039 | |||
2040 | /****************************************************************************************** | ||
2041 | Main routines | ||
2042 | ******************************************************************************************/ | ||
2043 | |||
2044 | /* This is a generic input function. It calls the xd3_encode_input or xd3_decode_input | ||
2045 | * functions and makes calls to the various input handling routines above, which | ||
2046 | * coordinate external decompression. | ||
2047 | * | ||
2048 | * TODO config: Still need options for the at least: smatch config, memsize, sprevsz, | ||
2049 | * XD3_SEC_* flags, greedy/1.5 | ||
2050 | */ | ||
2051 | static int | ||
2052 | main_input (xd3_cmd cmd, | ||
2053 | main_file *ifile, | ||
2054 | main_file *ofile, | ||
2055 | main_file *sfile) | ||
2056 | { | ||
2057 | int ret; | ||
2058 | xd3_stream stream; | ||
2059 | usize_t nread; | ||
2060 | int stream_flags = 0; | ||
2061 | xd3_config config; | ||
2062 | xd3_source source; | ||
2063 | xoff_t last_total_in = 0; | ||
2064 | xoff_t last_total_out = 0; | ||
2065 | long start_time; | ||
2066 | |||
2067 | int (*input_func) (xd3_stream*); | ||
2068 | int (*output_func) (xd3_stream*, main_file *); | ||
2069 | |||
2070 | memset (& source, 0, sizeof (source)); | ||
2071 | memset (& config, 0, sizeof (config)); | ||
2072 | |||
2073 | config.alloc = main_alloc; | ||
2074 | config.freef = main_free1; | ||
2075 | config.sec_data.ngroups = 1; | ||
2076 | config.sec_addr.ngroups = 1; | ||
2077 | config.sec_inst.ngroups = 1; | ||
2078 | |||
2079 | /* main_input setup. */ | ||
2080 | switch ((int) cmd) | ||
2081 | { | ||
2082 | #if VCDIFF_TOOLS | ||
2083 | if (1) { case CMD_PRINTHDR: stream_flags = XD3_JUST_HDR; } | ||
2084 | else if (1) { case CMD_PRINTHDRS: stream_flags = XD3_SKIP_WINDOW; } | ||
2085 | else { case CMD_PRINTDELTA: stream_flags = XD3_SKIP_EMIT; } | ||
2086 | ifile->flags |= RD_NONEXTERNAL; | ||
2087 | input_func = xd3_decode_input; | ||
2088 | output_func = main_print_func; | ||
2089 | stream_flags |= XD3_ADLER32_NOVER; | ||
2090 | break; | ||
2091 | #endif | ||
2092 | #if XD3_ENCODER | ||
2093 | case CMD_ENCODE: | ||
2094 | input_func = xd3_encode_input; | ||
2095 | output_func = main_write_output; | ||
2096 | |||
2097 | if (option_use_checksum) { stream_flags |= XD3_ADLER32; } | ||
2098 | if (option_use_secondary) | ||
2099 | { | ||
2100 | /* The default secondary compressor is DJW, if it's compiled, being used, etc. */ | ||
2101 | if (option_secondary == NULL) | ||
2102 | { | ||
2103 | if (SECONDARY_DJW) { stream_flags |= XD3_SEC_DJW; } | ||
2104 | } | ||
2105 | else | ||
2106 | { | ||
2107 | if (strcmp (option_secondary, "fgk") == 0 && SECONDARY_FGK) | ||
2108 | { | ||
2109 | stream_flags |= XD3_SEC_FGK; | ||
2110 | } | ||
2111 | else if (strcmp (option_secondary, "djw") == 0 && SECONDARY_DJW) | ||
2112 | { | ||
2113 | stream_flags |= XD3_SEC_DJW; | ||
2114 | } | ||
2115 | else | ||
2116 | { | ||
2117 | XPR(NT "unrecognized secondary compressor type: %s\n", option_secondary); | ||
2118 | return EXIT_FAILURE; | ||
2119 | } | ||
2120 | } | ||
2121 | } | ||
2122 | if (option_no_compress) { stream_flags |= XD3_NOCOMPRESS; } | ||
2123 | if (option_use_altcodetable) { stream_flags |= XD3_ALT_CODE_TABLE; } | ||
2124 | if (option_smatch_config) | ||
2125 | { | ||
2126 | char *s = option_smatch_config, *e; | ||
2127 | int values[XD3_SOFTCFG_VARCNT]; | ||
2128 | int got; | ||
2129 | |||
2130 | config.smatch_cfg = XD3_SMATCH_SOFT; | ||
2131 | |||
2132 | for (got = 0; got < XD3_SOFTCFG_VARCNT; got += 1, s = e + 1) | ||
2133 | { | ||
2134 | values[got] = strtol (s, &e, 10); | ||
2135 | |||
2136 | if ((values[got] < 0) || | ||
2137 | (e == s) || | ||
2138 | (got < XD3_SOFTCFG_VARCNT-1 && *e == 0) || | ||
2139 | (got == XD3_SOFTCFG_VARCNT-1 && *e != 0)) | ||
2140 | { | ||
2141 | XPR(NT "invalid string match specifier (-C)\n"); | ||
2142 | return EXIT_FAILURE; | ||
2143 | } | ||
2144 | } | ||
2145 | |||
2146 | config.large_look = values[0]; | ||
2147 | config.large_step = values[1]; | ||
2148 | config.small_look = values[2]; | ||
2149 | config.small_chain = values[3]; | ||
2150 | config.small_lchain = values[4]; | ||
2151 | config.ssmatch = values[5]; | ||
2152 | config.try_lazy = values[6]; | ||
2153 | config.max_lazy = values[7]; | ||
2154 | config.long_enough = values[8]; | ||
2155 | config.promote = values[9]; | ||
2156 | config.srcwin_size = values[10]; | ||
2157 | config.srcwin_maxsz = values[11]; | ||
2158 | } | ||
2159 | else if (option_level < 5) { config.smatch_cfg = XD3_SMATCH_FAST; } | ||
2160 | else { config.smatch_cfg = XD3_SMATCH_SLOW; } | ||
2161 | break; | ||
2162 | #endif | ||
2163 | case CMD_DECODE: | ||
2164 | if (option_use_checksum == 0) { stream_flags |= XD3_ADLER32_NOVER; } | ||
2165 | stream_flags = 0; | ||
2166 | ifile->flags |= RD_NONEXTERNAL; | ||
2167 | input_func = xd3_decode_input; | ||
2168 | output_func = main_write_output; | ||
2169 | break; | ||
2170 | default: | ||
2171 | XPR(NT "internal error\n"); | ||
2172 | return EXIT_FAILURE; | ||
2173 | } | ||
2174 | |||
2175 | start_time = get_millisecs_now (); | ||
2176 | |||
2177 | /* allocate an input buffer. min(file_size, option_winsize) */ | ||
2178 | { | ||
2179 | xoff_t input_size = 0; | ||
2180 | config.winsize = option_winsize; | ||
2181 | if (main_file_stat (ifile, & input_size, 0) == 0) | ||
2182 | { | ||
2183 | config.winsize = min (input_size, (xoff_t) option_winsize); | ||
2184 | } | ||
2185 | config.winsize = xd3_round_blksize (config.winsize, MIN_BUFSIZE); | ||
2186 | config.winsize = max (config.winsize, MIN_BUFSIZE); | ||
2187 | } | ||
2188 | { | ||
2189 | /* Source blocksize is not user-settable, only option_srcwinsz is, | ||
2190 | * which determines the number of blocks. */ | ||
2191 | source.blksize = XD3_DEFAULT_SRCBLKSZ; | ||
2192 | option_srcwinsz = xd3_round_blksize(option_srcwinsz, MIN_BUFSIZE); | ||
2193 | option_srcwinsz = max(option_srcwinsz, MIN_BUFSIZE); | ||
2194 | config.srcwin_maxsz = option_srcwinsz; | ||
2195 | } | ||
2196 | |||
2197 | if (option_verbose > 1) { XPR(NT "input buffer size: %u\n", config.winsize); } | ||
2198 | |||
2199 | if ((main_bdata = main_malloc (config.winsize)) == NULL) | ||
2200 | { | ||
2201 | return EXIT_FAILURE; | ||
2202 | } | ||
2203 | |||
2204 | config.getblk = main_getblk_func; | ||
2205 | config.flags = stream_flags; | ||
2206 | |||
2207 | if ((ret = xd3_config_stream (& stream, & config))) | ||
2208 | { | ||
2209 | XPR(NT XD3_LIB_ERRMSG (& stream, ret)); | ||
2210 | return EXIT_FAILURE; | ||
2211 | } | ||
2212 | |||
2213 | if (IS_ENCODE (cmd)) | ||
2214 | { | ||
2215 | /* When encoding, open the source file, possibly decompress it. The decoder delays | ||
2216 | * this step until XD3_GOTHEADER. */ | ||
2217 | if (sfile->filename != NULL && (ret = main_set_source (& stream, cmd, sfile, & source))) | ||
2218 | { | ||
2219 | return EXIT_FAILURE; | ||
2220 | } | ||
2221 | } | ||
2222 | |||
2223 | /*XD3_ASSERT (option_first_offset <= option_last_offset);*/ | ||
2224 | /*XD3_ASSERT (option_first_window <= option_last_window);*/ | ||
2225 | |||
2226 | /*if (option_first_offset != 0 && (ret = main_file_seek (ifile, option_first_offset))) | ||
2227 | { | ||
2228 | return EXIT_FAILURE; | ||
2229 | }*/ | ||
2230 | |||
2231 | /* This times each window. */ | ||
2232 | get_millisecs_since (); | ||
2233 | |||
2234 | /* Main input loop. */ | ||
2235 | do | ||
2236 | { | ||
2237 | xoff_t input_offset; | ||
2238 | xoff_t input_remain; | ||
2239 | usize_t try_read; | ||
2240 | |||
2241 | input_offset = ifile->nread; | ||
2242 | /*XD3_ASSERT (input_offset <= option_last_offset);*/ | ||
2243 | |||
2244 | input_remain = /*option_last_offset*/ XOFF_T_MAX - input_offset; | ||
2245 | |||
2246 | try_read = (usize_t) min ((xoff_t) config.winsize, input_remain); | ||
2247 | |||
2248 | if ((ret = main_read_primary_input (ifile, main_bdata, try_read, & nread))) | ||
2249 | { | ||
2250 | return EXIT_FAILURE; | ||
2251 | } | ||
2252 | |||
2253 | /* If we've reached EOF tell the stream to flush. */ | ||
2254 | if (nread < try_read) | ||
2255 | { | ||
2256 | stream_flags |= XD3_FLUSH; | ||
2257 | xd3_set_flags (& stream, stream_flags); | ||
2258 | } | ||
2259 | |||
2260 | #if XD3_ENCODER | ||
2261 | /* After the first main_read_primary_input completes, we know all the information | ||
2262 | * needed to encode the application header. */ | ||
2263 | if (cmd == CMD_ENCODE && (ret = main_set_appheader (& stream, ifile, sfile))) | ||
2264 | { | ||
2265 | return EXIT_FAILURE; | ||
2266 | } | ||
2267 | #endif | ||
2268 | xd3_avail_input (& stream, main_bdata, nread); | ||
2269 | |||
2270 | /* If we read zero bytes after encoding at least one window... */ | ||
2271 | if (nread == 0 && stream.current_window > 0) { | ||
2272 | break; | ||
2273 | } | ||
2274 | |||
2275 | again: | ||
2276 | ret = input_func (& stream); | ||
2277 | /*if (option_verbose > 1) { XPR(NT XD3_LIB_ERRMSG (& stream, ret)); }*/ | ||
2278 | |||
2279 | switch (ret) | ||
2280 | { | ||
2281 | case XD3_INPUT: | ||
2282 | continue; | ||
2283 | |||
2284 | case XD3_GOTHEADER: | ||
2285 | { | ||
2286 | XD3_ASSERT (stream.current_window == 0); | ||
2287 | |||
2288 | /* Need to process the appheader as soon as possible. It may contain a | ||
2289 | * suggested default filename/decompression routine for the ofile, and it may | ||
2290 | * contain default/decompression routine for the sources. */ | ||
2291 | if (cmd == CMD_DECODE) | ||
2292 | { | ||
2293 | int have_src = sfile->filename != NULL; | ||
2294 | int need_src = xd3_decoder_needs_source (& stream); | ||
2295 | int recv_src; | ||
2296 | |||
2297 | /* May need to set the sfile->filename if none was given. */ | ||
2298 | main_get_appheader (& stream, ofile, sfile); | ||
2299 | |||
2300 | recv_src = sfile->filename != NULL; | ||
2301 | |||
2302 | /* Check if the user expected a source to be required although it was not. */ | ||
2303 | if (have_src && ! need_src && ! option_quiet) | ||
2304 | { | ||
2305 | XPR(NT "warning: output window %"Q"u does not copy source\n", stream.current_window); | ||
2306 | } | ||
2307 | |||
2308 | /* Check if we have no source name and need one. */ | ||
2309 | /* TODO: this doesn't fire due to cpyblocks_ calculation check */ | ||
2310 | if (need_src && ! recv_src) | ||
2311 | { | ||
2312 | XPR(NT "input requires a source file, use -s\n"); | ||
2313 | return EXIT_FAILURE; | ||
2314 | } | ||
2315 | |||
2316 | /* Now open the source file. */ | ||
2317 | if (need_src && (ret = main_set_source (& stream, cmd, sfile, & source))) | ||
2318 | { | ||
2319 | return EXIT_FAILURE; | ||
2320 | } | ||
2321 | } | ||
2322 | else if (cmd == CMD_PRINTHDR || | ||
2323 | cmd == CMD_PRINTHDRS || | ||
2324 | cmd == CMD_PRINTDELTA) | ||
2325 | { | ||
2326 | if (xd3_decoder_needs_source (& stream) && sfile->filename == NULL) | ||
2327 | { | ||
2328 | allow_fake_source = 1; | ||
2329 | sfile->filename = "<placeholder>"; | ||
2330 | main_set_source (& stream, cmd, sfile, & source); | ||
2331 | } | ||
2332 | } | ||
2333 | } | ||
2334 | /* FALLTHROUGH */ | ||
2335 | case XD3_WINSTART: | ||
2336 | { | ||
2337 | /* Set or unset XD3_SKIP_WINDOW. */ | ||
2338 | /*if (stream.current_window < option_first_window || stream.current_window > option_last_window) | ||
2339 | { stream_flags |= XD3_SKIP_WINDOW; } | ||
2340 | else | ||
2341 | { stream_flags &= ~XD3_SKIP_WINDOW; }*/ | ||
2342 | |||
2343 | xd3_set_flags (& stream, stream_flags); | ||
2344 | goto again; | ||
2345 | } | ||
2346 | |||
2347 | case XD3_OUTPUT: | ||
2348 | { | ||
2349 | if (option_no_output == 0/* && | ||
2350 | stream.current_window >= option_first_window && | ||
2351 | stream.current_window <= option_last_window*/) | ||
2352 | { | ||
2353 | /* Defer opening the output file until the stream produces its first | ||
2354 | * output for both encoder and decoder, this way we delay long enough for | ||
2355 | * the decoder to receive the application header. (Or longer if there are | ||
2356 | * skipped windows, but I can't think of any reason not to delay open.) */ | ||
2357 | |||
2358 | if (! main_file_isopen (ofile) && (ret = main_open_output (& stream, ofile)) != 0) | ||
2359 | { | ||
2360 | return EXIT_FAILURE; | ||
2361 | } | ||
2362 | if ((ret = output_func (& stream, ofile)) && (ret != PRINTHDR_SPECIAL)) | ||
2363 | { | ||
2364 | return EXIT_FAILURE; | ||
2365 | } | ||
2366 | if (ret == PRINTHDR_SPECIAL) | ||
2367 | { | ||
2368 | xd3_abort_stream (& stream); | ||
2369 | ret = EXIT_SUCCESS; | ||
2370 | goto done; | ||
2371 | } | ||
2372 | ret = 0; | ||
2373 | } | ||
2374 | |||
2375 | xd3_consume_output (& stream); | ||
2376 | goto again; | ||
2377 | } | ||
2378 | |||
2379 | case XD3_WINFINISH: | ||
2380 | { | ||
2381 | if (IS_ENCODE (cmd) || cmd == CMD_DECODE) | ||
2382 | { | ||
2383 | int used_source = xd3_encoder_used_source (& stream); | ||
2384 | |||
2385 | if (! option_quiet && IS_ENCODE (cmd) && main_file_isopen (sfile) && ! used_source) | ||
2386 | { | ||
2387 | XPR(NT "warning: input position %"Q"u no source copies\n", | ||
2388 | stream.current_window * source.blksize); | ||
2389 | } | ||
2390 | |||
2391 | if (option_verbose) | ||
2392 | { | ||
2393 | char rrateavg[32], wrateavg[32], tm[32]; | ||
2394 | char rdb[32], wdb[32], sb[32]; | ||
2395 | char trdb[32], twdb[32], tsb[32]; | ||
2396 | char srcbuf[48], tsrcbuf[48]; | ||
2397 | long millis = get_millisecs_since (); | ||
2398 | usize_t this_read = stream.total_in - last_total_in; | ||
2399 | usize_t this_write = stream.total_out - last_total_out; | ||
2400 | last_total_in = stream.total_in; | ||
2401 | last_total_out = stream.total_out; | ||
2402 | |||
2403 | tsrcbuf[0] = srcbuf[0] = 0; | ||
2404 | if (used_source) | ||
2405 | { | ||
2406 | sprintf (srcbuf, ": src %s", main_format_bcnt (xd3_encoder_srclen (& stream), sb)); | ||
2407 | sprintf (tsrcbuf, ": src %s", main_format_bcnt (stream.srcwin_cksum_pos, tsb)); | ||
2408 | } | ||
2409 | /*if (stream.current_window >= option_first_window && | ||
2410 | stream.current_window <= option_last_window)*/ | ||
2411 | { | ||
2412 | XPR(NT "%"Q"u: in %s (%s): out %s (%s)%s: total in %s: out %s%s: %s\n", | ||
2413 | stream.current_window, | ||
2414 | main_format_bcnt (this_read, rdb), | ||
2415 | main_format_rate (this_read, millis, rrateavg), | ||
2416 | main_format_bcnt (this_write, wdb), | ||
2417 | main_format_rate (this_write, millis, wrateavg), | ||
2418 | srcbuf, | ||
2419 | main_format_bcnt (stream.total_in, trdb), | ||
2420 | main_format_bcnt (stream.total_out, twdb), | ||
2421 | tsrcbuf, | ||
2422 | main_format_millis (millis, tm)); | ||
2423 | } | ||
2424 | } | ||
2425 | } | ||
2426 | goto again; | ||
2427 | } | ||
2428 | |||
2429 | default: | ||
2430 | /* input_func() error */ | ||
2431 | XPR(NT XD3_LIB_ERRMSG (& stream, ret)); | ||
2432 | return EXIT_FAILURE; | ||
2433 | } | ||
2434 | } | ||
2435 | while (nread == config.winsize); | ||
2436 | done: | ||
2437 | /* Close the inputs. (ifile must be open, sfile may be open) */ | ||
2438 | main_file_close (ifile); | ||
2439 | main_file_close (sfile); | ||
2440 | |||
2441 | /* If output file is not open yet because of delayed-open, it means we never encountered | ||
2442 | * a window in the delta, but it could have had a VCDIFF header? TODO: solve this | ||
2443 | * elsewhere. For now, it prints "nothing to output" below, but the check doesn't | ||
2444 | * happen in case of option_no_output. */ | ||
2445 | if (! option_no_output) | ||
2446 | { | ||
2447 | if (! main_file_isopen (ofile)) | ||
2448 | { | ||
2449 | XPR(NT "nothing to output: %s\n", ifile->filename); | ||
2450 | return EXIT_FAILURE; | ||
2451 | } | ||
2452 | |||
2453 | /* Have to close the output before calling main_external_compression_finish, or else it hangs. */ | ||
2454 | if (main_file_close (ofile) != 0) | ||
2455 | { | ||
2456 | return EXIT_FAILURE; | ||
2457 | } | ||
2458 | } | ||
2459 | |||
2460 | if ((ret = xd3_close_stream (& stream))) | ||
2461 | { | ||
2462 | XPR(NT XD3_LIB_ERRMSG (& stream, ret)); | ||
2463 | return EXIT_FAILURE; | ||
2464 | } | ||
2465 | |||
2466 | #if EXTERNAL_COMPRESSION | ||
2467 | if ((ret = main_external_compression_finish ())) { return EXIT_FAILURE; } | ||
2468 | #endif | ||
2469 | |||
2470 | xd3_free_stream (& stream); | ||
2471 | |||
2472 | if (option_verbose) | ||
2473 | { | ||
2474 | char tm[32]; | ||
2475 | long end_time = get_millisecs_now (); | ||
2476 | XPR(NT "command finished in %s\n", main_format_millis (end_time - start_time, tm)); | ||
2477 | } | ||
2478 | if (option_verbose > 1) | ||
2479 | { | ||
2480 | XPR(NT "input bytes: %"Q"u\n", ifile->nread); | ||
2481 | XPR(NT "output bytes: %"Q"u\n", ofile->nwrite); | ||
2482 | } | ||
2483 | |||
2484 | return EXIT_SUCCESS; | ||
2485 | } | ||
2486 | |||
2487 | /* free memory before exit, reset single-use variables. */ | ||
2488 | static void | ||
2489 | main_cleanup (void) | ||
2490 | { | ||
2491 | int i; | ||
2492 | |||
2493 | if (option_appheader) { appheader_used = NULL; } | ||
2494 | |||
2495 | main_free ((void**) & appheader_used); | ||
2496 | main_free ((void**) & main_bdata); | ||
2497 | |||
2498 | #if EXTERNAL_COMPRESSION | ||
2499 | main_free ((void**) & ext_tmpfile); | ||
2500 | #endif | ||
2501 | |||
2502 | for (i = 0; lru && i < lru_size; i += 1) | ||
2503 | { | ||
2504 | main_free ((void**) & lru[i].blk); | ||
2505 | } | ||
2506 | |||
2507 | main_free ((void**) & lru); | ||
2508 | |||
2509 | lru_hits = 0; | ||
2510 | lru_misses = 0; | ||
2511 | lru_filled = 0; | ||
2512 | |||
2513 | XD3_ASSERT (main_mallocs == 0); | ||
2514 | } | ||
2515 | |||
2516 | int | ||
2517 | #if PYTHON_MODULE | ||
2518 | xd3_main_cmdline (int argc, char **argv) | ||
2519 | #else | ||
2520 | main (int argc, char **argv) | ||
2521 | #endif | ||
2522 | { | ||
2523 | xd3_cmd cmd; | ||
2524 | main_file ifile; | ||
2525 | main_file ofile; | ||
2526 | main_file sfile; | ||
2527 | static char *flags = "0123456789cdefhnqvDJNRTVs:B:C:E:F:L:O:P:W:A::S::"; | ||
2528 | int my_optind; | ||
2529 | char *my_optarg; | ||
2530 | char *my_optstr; | ||
2531 | char *sfilename; | ||
2532 | int orig_argc = argc; | ||
2533 | char **orig_argv = argv; | ||
2534 | int ret; | ||
2535 | |||
2536 | go: /* Go. */ | ||
2537 | cmd = CMD_NONE; | ||
2538 | sfilename = NULL; | ||
2539 | my_optind = 1; | ||
2540 | argv = orig_argv; | ||
2541 | argc = orig_argc; | ||
2542 | program_name = argv[0]; | ||
2543 | extcomp_types[0].recomp_cmdname = program_name; | ||
2544 | extcomp_types[0].decomp_cmdname = program_name; | ||
2545 | takearg: | ||
2546 | my_optarg = NULL; | ||
2547 | my_optstr = argv[my_optind]; | ||
2548 | /* This doesn't use getopt() because it makes trouble for -P & python which reenter | ||
2549 | * main() and thus care about freeing all memory. I never had much trust for getopt | ||
2550 | * anyway, it's too opaque. This implements a fairly standard non-long-option getopt | ||
2551 | * with support for named operations (e.g., "xdelta3 [encode|decode|printhdr...] < in > | ||
2552 | * out"). I'll probably add long options at some point. See TODO. */ | ||
2553 | if (my_optstr) | ||
2554 | { | ||
2555 | if (*my_optstr == '-') { my_optstr += 1; } | ||
2556 | else if (cmd == CMD_NONE) { goto nonflag; } | ||
2557 | else { my_optstr = NULL; } | ||
2558 | } | ||
2559 | while (my_optstr) | ||
2560 | { | ||
2561 | char *s; | ||
2562 | my_optarg = NULL; | ||
2563 | if ((ret = *my_optstr++) == 0) { my_optind += 1; goto takearg; } | ||
2564 | |||
2565 | /* Option handling: first check for one ':' following the option in flags, then | ||
2566 | * check for two. The syntax allows: | ||
2567 | * | ||
2568 | * 1. -Afoo defines optarg="foo" | ||
2569 | * 2. -A foo defines optarg="foo" | ||
2570 | * 3. -A "" defines optarg="" (allows optional empty-string) | ||
2571 | * 4. -A [EOA or -moreargs] error (mandatory case) | ||
2572 | * 5. -A [EOA -moreargs] defines optarg=NULL (optional case) | ||
2573 | * 6. -A=foo defines optarg="foo" | ||
2574 | * 7. -A= defines optarg="" (mandatory case) | ||
2575 | * 8. -A= defines optarg=NULL (optional case) | ||
2576 | * | ||
2577 | * See tests in test_command_line_arguments(). | ||
2578 | */ | ||
2579 | s = strchr (flags, ret); | ||
2580 | if (s && s[1] && s[1] == ':') | ||
2581 | { | ||
2582 | int eqcase = 0; | ||
2583 | int option = s[2] && s[2] == ':'; | ||
2584 | |||
2585 | /* Case 1, set optarg to the remaining characters. */ | ||
2586 | my_optarg = my_optstr; | ||
2587 | my_optstr = ""; | ||
2588 | |||
2589 | /* Case 2-5 */ | ||
2590 | if (*my_optarg == 0) | ||
2591 | { | ||
2592 | /* Condition 4-5 */ | ||
2593 | int have_arg = my_optind < (argc - 1) && *argv[my_optind+1] != '-'; | ||
2594 | |||
2595 | if (! have_arg) | ||
2596 | { | ||
2597 | if (! option) | ||
2598 | { | ||
2599 | /* Case 4 */ | ||
2600 | XPR(NT "-%c: requires an argument\n", ret); | ||
2601 | ret = EXIT_FAILURE; | ||
2602 | goto cleanup; | ||
2603 | } | ||
2604 | /* Case 5. */ | ||
2605 | my_optarg = NULL; | ||
2606 | } | ||
2607 | else | ||
2608 | { | ||
2609 | /* Case 2-3. */ | ||
2610 | my_optarg = argv[++my_optind]; | ||
2611 | } | ||
2612 | } | ||
2613 | /* Case 6-8. */ | ||
2614 | else if (*my_optarg == '=') | ||
2615 | { | ||
2616 | /* Remove the = in all cases. */ | ||
2617 | my_optarg += 1; | ||
2618 | eqcase = 1; | ||
2619 | |||
2620 | if (option && *my_optarg == 0) | ||
2621 | { | ||
2622 | /* Case 8. */ | ||
2623 | my_optarg = NULL; | ||
2624 | } | ||
2625 | } | ||
2626 | } | ||
2627 | |||
2628 | switch (ret) | ||
2629 | { | ||
2630 | /* case: if no '-' was found, maybe check for a command name. */ | ||
2631 | nonflag: | ||
2632 | if (strcmp (my_optstr, "decode") == 0) { cmd = CMD_DECODE; } | ||
2633 | else if (strcmp (my_optstr, "encode") == 0) | ||
2634 | { | ||
2635 | #if XD3_ENCODER | ||
2636 | cmd = CMD_ENCODE; | ||
2637 | #else | ||
2638 | XPR(NT "encoder support not compiled\n"); | ||
2639 | return EXIT_FAILURE; | ||
2640 | #endif | ||
2641 | } | ||
2642 | else if (strcmp (my_optstr, "config") == 0) { cmd = CMD_CONFIG; } | ||
2643 | #if REGRESSION_TEST | ||
2644 | else if (strcmp (my_optstr, "test") == 0) { cmd = CMD_TEST; } | ||
2645 | #endif | ||
2646 | #if VCDIFF_TOOLS | ||
2647 | else if (strcmp (my_optstr, "printhdr") == 0) { cmd = CMD_PRINTHDR; } | ||
2648 | else if (strcmp (my_optstr, "printhdrs") == 0) { cmd = CMD_PRINTHDRS; } | ||
2649 | else if (strcmp (my_optstr, "printdelta") == 0) { cmd = CMD_PRINTDELTA; } | ||
2650 | #endif | ||
2651 | |||
2652 | /* If no option was found and still no command, let the default command be | ||
2653 | * encode. The remaining args are treated as filenames. */ | ||
2654 | if (cmd == CMD_NONE) | ||
2655 | { | ||
2656 | cmd = CMD_DEFAULT; | ||
2657 | my_optstr = NULL; | ||
2658 | break; | ||
2659 | } | ||
2660 | else | ||
2661 | { | ||
2662 | /* But if we find a command name, continue the getopt loop. */ | ||
2663 | my_optind += 1; | ||
2664 | goto takearg; | ||
2665 | } | ||
2666 | |||
2667 | /* gzip-like options */ | ||
2668 | case '0': case '1': case '2': case '3': case '4': | ||
2669 | case '5': case '6': case '7': case '8': case '9': | ||
2670 | option_level = ret - '0'; | ||
2671 | break; | ||
2672 | case 'f': option_force = 1; break; | ||
2673 | case 'v': option_verbose += 1; option_quiet = 0; break; | ||
2674 | case 'q': option_quiet = 1; option_verbose = 0; break; | ||
2675 | case 'c': option_stdout = 1; break; | ||
2676 | case 'd': | ||
2677 | if (cmd == CMD_NONE) { cmd = CMD_DECODE; } | ||
2678 | else { ret = main_help (); goto exit; } | ||
2679 | break; | ||
2680 | case 'e': | ||
2681 | #if XD3_ENCODER | ||
2682 | if (cmd == CMD_NONE) { cmd = CMD_ENCODE; } | ||
2683 | else { ret = main_help (); goto exit; } | ||
2684 | break; | ||
2685 | #else | ||
2686 | XPR(NT "encoder support not compiled\n"); | ||
2687 | return EXIT_FAILURE; | ||
2688 | #endif | ||
2689 | |||
2690 | //case 'F': if ((ret = main_strtoxoff (my_optarg, & option_first_window, 'F'))) { goto exit; } break; | ||
2691 | //case 'L': if ((ret = main_strtoxoff (my_optarg, & option_last_window, 'L'))) { goto exit; } break; | ||
2692 | //case 'O': if ((ret = main_strtoxoff (my_optarg, & option_first_offset, 'O'))) { goto exit; } break; | ||
2693 | //case 'E': if ((ret = main_strtoxoff (my_optarg, & option_last_offset, 'E'))) { goto exit; } break; | ||
2694 | |||
2695 | case 'P': | ||
2696 | /* only set profile count once, since... */ | ||
2697 | if (option_profile_cnt == 0) | ||
2698 | { | ||
2699 | if ((ret = main_atou(my_optarg, (usize_t*) & option_profile_cnt, 0, 'P'))) { goto exit; } | ||
2700 | |||
2701 | if (option_profile_cnt <= 0) | ||
2702 | { | ||
2703 | ret = EXIT_SUCCESS; | ||
2704 | goto exit; | ||
2705 | } | ||
2706 | } | ||
2707 | break; | ||
2708 | |||
2709 | case 'n': option_use_checksum = 0; break; | ||
2710 | case 'N': option_no_compress = 1; break; | ||
2711 | case 'T': option_use_altcodetable = 1; break; | ||
2712 | case 'C': option_smatch_config = my_optarg; break; | ||
2713 | case 'J': option_no_output = 1; break; | ||
2714 | case 'S': if (my_optarg == NULL) { option_use_secondary = 0; } | ||
2715 | else { option_use_secondary = 1; option_secondary = my_optarg; } break; | ||
2716 | case 'A': if (my_optarg == NULL) { option_use_appheader = 0; } | ||
2717 | else { option_appheader = (uint8_t*) my_optarg; } break; | ||
2718 | case 'B': if ((ret = main_atou (my_optarg, & option_srcwinsz, MIN_BUFSIZE, 'B'))) { goto exit; } break; | ||
2719 | case 'W': if ((ret = main_atou (my_optarg, & option_winsize, MIN_BUFSIZE, 'W'))) { goto exit; } break; | ||
2720 | case 'D': | ||
2721 | #if EXTERNAL_COMPRESSION == 0 | ||
2722 | if (! option_quiet) | ||
2723 | { | ||
2724 | XPR(NT "warning: -D option ignored, " | ||
2725 | "external compression support was not compiled\n"); | ||
2726 | } | ||
2727 | #else | ||
2728 | option_decompress_inputs = 0; | ||
2729 | #endif | ||
2730 | break; | ||
2731 | case 'R': | ||
2732 | #if EXTERNAL_COMPRESSION == 0 | ||
2733 | if (! option_quiet) | ||
2734 | { | ||
2735 | XPR(NT "warning: -R option ignored, " | ||
2736 | "external compression support was not compiled\n"); | ||
2737 | } | ||
2738 | #else | ||
2739 | option_recompress_outputs = 0; | ||
2740 | #endif | ||
2741 | break; | ||
2742 | case 's': | ||
2743 | if (sfilename != NULL) | ||
2744 | { | ||
2745 | XPR(NT "specify only one source file\n"); | ||
2746 | goto cleanup; | ||
2747 | } | ||
2748 | |||
2749 | sfilename = my_optarg; | ||
2750 | break; | ||
2751 | |||
2752 | case 'V': | ||
2753 | ret = main_version (); goto exit; | ||
2754 | default: | ||
2755 | ret = main_help (); goto exit; | ||
2756 | } | ||
2757 | } | ||
2758 | |||
2759 | option_source_filename = sfilename; | ||
2760 | |||
2761 | /* In case there were no arguments, set the default command. */ | ||
2762 | if (cmd == CMD_NONE) { cmd = CMD_DEFAULT; } | ||
2763 | |||
2764 | argc -= my_optind; | ||
2765 | argv += my_optind; | ||
2766 | |||
2767 | /* There may be up to two more arguments. */ | ||
2768 | if (argc > 2) | ||
2769 | { | ||
2770 | XPR(NT "too many filenames: %s ...\n", argv[2]); | ||
2771 | ret = EXIT_FAILURE; | ||
2772 | goto cleanup; | ||
2773 | } | ||
2774 | |||
2775 | if (option_verbose > 1) | ||
2776 | { | ||
2777 | int l = 1; | ||
2778 | int i; | ||
2779 | char buf[1024]; | ||
2780 | for (i = 0; i < orig_argc; i += 1) | ||
2781 | { | ||
2782 | l += strlen (orig_argv[i]) + 1; | ||
2783 | } | ||
2784 | buf[0] = 0; | ||
2785 | for (i = 0; i < orig_argc; i += 1) | ||
2786 | { | ||
2787 | strcat (buf, orig_argv[i]); | ||
2788 | strcat (buf, " "); | ||
2789 | } | ||
2790 | XPR(NT "command line: %s\n", buf); | ||
2791 | } | ||
2792 | |||
2793 | main_file_init (& ifile); | ||
2794 | main_file_init (& ofile); | ||
2795 | main_file_init (& sfile); | ||
2796 | |||
2797 | ifile.flags = RD_FIRST; | ||
2798 | sfile.flags = RD_FIRST; | ||
2799 | sfile.filename = option_source_filename; | ||
2800 | |||
2801 | /* The infile takes the next argument, if there is one. But if not, infile is set to | ||
2802 | * stdin. */ | ||
2803 | if (argc > 0) | ||
2804 | { | ||
2805 | ifile.filename = argv[0]; | ||
2806 | |||
2807 | if ((ret = main_file_open (& ifile, ifile.filename, XO_READ))) | ||
2808 | { | ||
2809 | goto cleanup; | ||
2810 | } | ||
2811 | } | ||
2812 | else | ||
2813 | { | ||
2814 | XSTDIN_XF (& ifile); | ||
2815 | } | ||
2816 | |||
2817 | /* The ofile takes the following argument, if there is one. But if not, it is left NULL | ||
2818 | * until the application header is processed. It will be set in main_open_output. */ | ||
2819 | if (argc > 1) | ||
2820 | { | ||
2821 | /* Check for conflicting arguments. */ | ||
2822 | if (option_stdout && ! option_quiet) | ||
2823 | { | ||
2824 | XPR(NT "warning: -c option overrides output filename: %s\n", argv[1]); | ||
2825 | } | ||
2826 | |||
2827 | if (! option_stdout) { ofile.filename = argv[1]; } | ||
2828 | } | ||
2829 | |||
2830 | switch (cmd) | ||
2831 | { | ||
2832 | case CMD_PRINTHDR: | ||
2833 | case CMD_PRINTHDRS: | ||
2834 | case CMD_PRINTDELTA: | ||
2835 | #if XD3_ENCODER | ||
2836 | case CMD_ENCODE: | ||
2837 | if (cmd == CMD_ENCODE) | ||
2838 | { | ||
2839 | do_not_lru = 1; | ||
2840 | } | ||
2841 | #endif | ||
2842 | case CMD_DECODE: | ||
2843 | ret = main_input (cmd, & ifile, & ofile, & sfile); | ||
2844 | break; | ||
2845 | |||
2846 | #if REGRESSION_TEST | ||
2847 | case CMD_TEST: | ||
2848 | ret = xd3_selftest (); | ||
2849 | break; | ||
2850 | #endif | ||
2851 | |||
2852 | case CMD_CONFIG: | ||
2853 | ret = main_config (); | ||
2854 | break; | ||
2855 | |||
2856 | default: | ||
2857 | ret = main_help (); | ||
2858 | break; | ||
2859 | } | ||
2860 | |||
2861 | #if EXTERNAL_COMPRESSION | ||
2862 | if (ext_tmpfile != NULL) { unlink (ext_tmpfile); } | ||
2863 | #endif | ||
2864 | |||
2865 | if (0) | ||
2866 | { | ||
2867 | cleanup: | ||
2868 | ret = EXIT_FAILURE; | ||
2869 | exit: | ||
2870 | (void)0; | ||
2871 | } | ||
2872 | |||
2873 | main_cleanup (); | ||
2874 | |||
2875 | if (--option_profile_cnt > 0 && ret == EXIT_SUCCESS) { goto go; } | ||
2876 | |||
2877 | return ret; | ||
2878 | } | ||
2879 | |||
2880 | static int | ||
2881 | main_help (void) | ||
2882 | { | ||
2883 | /* Not all options are shown, yet: 0-9, l J T C P F L O E | ||
2884 | * Remember to update www/xdelta3-cmdline.html | ||
2885 | */ | ||
2886 | |||
2887 | main_version (); | ||
2888 | P(RINT "usage: xdelta3 [command/options] [input [output]]\n"); | ||
2889 | P(RINT "commands are:\n"); | ||
2890 | P(RINT " encode encodes the input%s\n", XD3_ENCODER ? "" : " [Not compiled]"); | ||
2891 | P(RINT " decode decodes the input\n"); | ||
2892 | P(RINT " config prints xdelta3 configuration\n"); | ||
2893 | #if REGRESSION_TEST | ||
2894 | P(RINT " test run the builtin tests\n"); | ||
2895 | #endif | ||
2896 | #if VCDIFF_TOOLS | ||
2897 | P(RINT "special commands for VCDIFF inputs:\n"); | ||
2898 | P(RINT " printhdr print information about the first window\n"); | ||
2899 | P(RINT " printhdrs print information about all windows\n"); | ||
2900 | P(RINT " printdelta print information about the entire delta\n"); | ||
2901 | #endif | ||
2902 | P(RINT "options are:\n"); | ||
2903 | P(RINT " -c use stdout instead of default\n"); | ||
2904 | P(RINT " -d same as decode command\n"); | ||
2905 | P(RINT " -e same as encode command\n"); | ||
2906 | P(RINT " -f force overwrite\n"); | ||
2907 | P(RINT " -n disable checksum (encode/decode)\n"); | ||
2908 | P(RINT " -D disable external decompression (encode/decode)\n"); | ||
2909 | P(RINT " -R disable external recompression (decode)\n"); | ||
2910 | P(RINT " -N disable small string-matching compression\n"); | ||
2911 | P(RINT " -S [djw|fgk] disable/enable secondary compression\n"); | ||
2912 | P(RINT " -A [apphead] disable/provide application header\n"); | ||
2913 | P(RINT " -s source source file to copy from (if any)\n"); | ||
2914 | P(RINT " -B blksize source file block size\n"); | ||
2915 | P(RINT " -W winsize input window buffer size\n"); | ||
2916 | P(RINT " -v be verbose (max 2)\n"); | ||
2917 | P(RINT " -q be quiet\n"); | ||
2918 | P(RINT " -h show help\n"); | ||
2919 | P(RINT " -V show version\n"); | ||
2920 | P(RINT " -P repeat count (for profiling)\n"); | ||
2921 | |||
2922 | return EXIT_FAILURE; | ||
2923 | } | ||
diff --git a/xdelta3/xdelta3-python.h b/xdelta3/xdelta3-python.h new file mode 100755 index 0000000..cfd6095 --- /dev/null +++ b/xdelta3/xdelta3-python.h | |||
@@ -0,0 +1,86 @@ | |||
1 | /* xdelta 3 - delta compression tools and library | ||
2 | * Copyright (C) 2003 and onward. Joshua P. MacDonald | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | #include "Python.h" | ||
20 | |||
21 | static PyObject *pyxd3_error; | ||
22 | |||
23 | /* spam: xdelta3.main([string,list,...]) */ | ||
24 | PyObject *xdelta3_main_cmdline (PyObject *self, PyObject *args) | ||
25 | { | ||
26 | int ret, i, nargs; | ||
27 | char **argv = NULL; | ||
28 | int argc = 0; | ||
29 | PyObject *result = NULL; | ||
30 | PyObject *o; | ||
31 | |||
32 | if (! PyArg_ParseTuple (args, "O", &o) | ||
33 | || ! PyList_Check (o)) | ||
34 | { | ||
35 | goto cleanup; | ||
36 | } | ||
37 | |||
38 | argc = PyList_Size (o); | ||
39 | nargs = argc + 2; | ||
40 | |||
41 | if (! (argv = malloc (sizeof(argv[0]) * nargs))) | ||
42 | { | ||
43 | PyErr_NoMemory (); | ||
44 | goto cleanup; | ||
45 | } | ||
46 | memset (argv, 0, sizeof(argv[0]) * nargs); | ||
47 | |||
48 | for (i = 1; i < nargs-1; i += 1) | ||
49 | { | ||
50 | char *ps; | ||
51 | PyObject *s; | ||
52 | if ((s = PyList_GetItem (o, i-1)) == NULL) { goto cleanup; } | ||
53 | ps = PyString_AsString (s); | ||
54 | argv[i] = ps; | ||
55 | } | ||
56 | |||
57 | ret = xd3_main_cmdline (argc+1, argv); | ||
58 | |||
59 | if (ret == 0) | ||
60 | { | ||
61 | result = Py_BuildValue ("i", ret); | ||
62 | } | ||
63 | else | ||
64 | { | ||
65 | PyErr_SetString (pyxd3_error, "failed :("); | ||
66 | } | ||
67 | cleanup: | ||
68 | if (argv) | ||
69 | { | ||
70 | free (argv); | ||
71 | } | ||
72 | return result; | ||
73 | } | ||
74 | static PyMethodDef xdelta3_methods[] = { | ||
75 | { "main", xdelta3_main_cmdline, METH_VARARGS, "xdelta3 main()" }, | ||
76 | { NULL, NULL } | ||
77 | }; | ||
78 | |||
79 | DL_EXPORT(void) initxdelta3 (void) | ||
80 | { | ||
81 | PyObject *m, *d; | ||
82 | m = Py_InitModule ("xdelta3", xdelta3_methods); | ||
83 | d = PyModule_GetDict (m); | ||
84 | pyxd3_error = PyErr_NewException ("xdelta3.error", NULL, NULL); | ||
85 | PyDict_SetItemString (d, "error", pyxd3_error); | ||
86 | } | ||
diff --git a/xdelta3/xdelta3-regtest.py b/xdelta3/xdelta3-regtest.py new file mode 100755 index 0000000..f3313a4 --- /dev/null +++ b/xdelta3/xdelta3-regtest.py | |||
@@ -0,0 +1,596 @@ | |||
1 | #!/usr/bin/python2.3 | ||
2 | # xdelta 3 - delta compression tools and library | ||
3 | # Copyright (C) 2003 and onward. Joshua P. MacDonald | ||
4 | # | ||
5 | # This program is free software; you can redistribute it and/or modify | ||
6 | # it under the terms of the GNU General Public License as published by | ||
7 | # the Free Software Foundation; either version 2 of the License, or | ||
8 | # (at your option) any later version. | ||
9 | # | ||
10 | # This program is distributed in the hope that it will be useful, | ||
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | # GNU General Public License for more details. | ||
14 | # | ||
15 | # You should have received a copy of the GNU General Public License | ||
16 | # along with this program; if not, write to the Free Software | ||
17 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | |||
19 | # Under construction. | ||
20 | |||
21 | # TODO: This is really part test, part performance evaluation suite, and | ||
22 | # really incomplete. | ||
23 | |||
24 | import os, sys, math, re, time, types, array | ||
25 | import xdelta3 | ||
26 | |||
27 | HIST_SIZE = 10 # the number of buckets | ||
28 | MIN_SIZE = 0 | ||
29 | |||
30 | TIME_TOO_SHORT = 0.050 | ||
31 | |||
32 | MIN_REPS = 1 | ||
33 | MAX_REPS = 1 | ||
34 | SKIP_TRIALS = 1 | ||
35 | MIN_TRIALS = 3 | ||
36 | MAX_TRIALS = 15 | ||
37 | |||
38 | SKIP_TRIALS = 0 | ||
39 | MIN_TRIALS = 1 | ||
40 | MAX_TRIALS = 1 | ||
41 | |||
42 | MIN_STDDEV_PCT = 1.5 # stop | ||
43 | MAX_RUN = 1000 * 1000 * 10 | ||
44 | |||
45 | XD3CMD = './xdelta3-64' | ||
46 | #XD3CMD = './xdelta3' | ||
47 | |||
48 | # kind: | ||
49 | PYEXT = 1 | ||
50 | FORK = 0 | ||
51 | |||
52 | # | ||
53 | # | ||
54 | RCSDIR = '/Volumes/LACIE120RAID/orbit_linux/home/jmacd/PRCS/prcs/b' | ||
55 | |||
56 | TMPDIR = '/tmp/xd3regtest.%d' % os.getpid() | ||
57 | |||
58 | RUNFILE = os.path.join(TMPDIR, 'run') | ||
59 | HFILE = os.path.join(TMPDIR, 'hdr') | ||
60 | DFILE = os.path.join(TMPDIR, 'output') | ||
61 | RFILE = os.path.join(TMPDIR, 'recon') | ||
62 | |||
63 | HEAD_STATE = 0 | ||
64 | BAR_STATE = 1 | ||
65 | REV_STATE = 2 | ||
66 | DATE_STATE = 3 | ||
67 | |||
68 | # rcs output | ||
69 | RE_TOTREV = re.compile('total revisions: (\\d+)') | ||
70 | RE_BAR = re.compile('----------------------------') | ||
71 | RE_REV = re.compile('revision (.+)') | ||
72 | RE_DATE = re.compile('date: ([^;]+);.*') | ||
73 | # xdelta output | ||
74 | RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)') | ||
75 | RE_EXTCOMP = re.compile('XDELTA ext comp.*') | ||
76 | |||
77 | # | ||
78 | # exceptions | ||
79 | class SkipRcsException: | ||
80 | def __init__(self,reason): | ||
81 | self.reason = reason | ||
82 | class NotEnoughVersions: | ||
83 | def __init__(self): | ||
84 | pass | ||
85 | class CommandError: | ||
86 | def __init__(self,cmd,str): | ||
87 | if type(cmd) is types.TupleType or \ | ||
88 | type(cmd) is types.ListType: | ||
89 | cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd) | ||
90 | print 'command was: ',cmd | ||
91 | print 'command failed: ',str | ||
92 | print 'have fun debugging' | ||
93 | # | ||
94 | # one version | ||
95 | class RcsVersion: | ||
96 | def __init__(self,vstr): | ||
97 | self.vstr = vstr | ||
98 | def __cmp__(self,other): | ||
99 | return cmp(self.date, other.date) | ||
100 | def Print(self): | ||
101 | print '%s %s' % (self.vstr, self.date) | ||
102 | # | ||
103 | # one rcsfile | ||
104 | class RcsFile: | ||
105 | |||
106 | def __init__(self, fname): | ||
107 | self.fname = fname | ||
108 | self.versions = [] | ||
109 | self.state = HEAD_STATE | ||
110 | |||
111 | def SetTotRev(self,s): | ||
112 | self.totrev = int(s) | ||
113 | |||
114 | def Rev(self,s): | ||
115 | self.rev = RcsVersion(s) | ||
116 | if len(self.versions) >= self.totrev: | ||
117 | raise SkipRcsException('too many versions (in log messages)') | ||
118 | self.versions.append(self.rev) | ||
119 | |||
120 | def Date(self,s): | ||
121 | self.rev.date = s | ||
122 | |||
123 | def Match(self, line, state, rx, gp, newstate, f): | ||
124 | if state == self.state: | ||
125 | m = rx.match(line) | ||
126 | if m: | ||
127 | if f: | ||
128 | f(m.group(gp)) | ||
129 | self.state = newstate | ||
130 | return 1 | ||
131 | return None | ||
132 | |||
133 | def Sum1Rlog(self): | ||
134 | f = os.popen('rlog '+self.fname, "r") | ||
135 | l = f.readline() | ||
136 | while l: | ||
137 | if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev): | ||
138 | pass | ||
139 | elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None): | ||
140 | pass | ||
141 | elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev): | ||
142 | pass | ||
143 | elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date): | ||
144 | pass | ||
145 | l = f.readline() | ||
146 | c = f.close() | ||
147 | if c != None: | ||
148 | raise c | ||
149 | #print '%s versions %d' % (self.fname, len(self.versions)) | ||
150 | #for v in self.versions: | ||
151 | # v.Print() | ||
152 | |||
153 | def Sum1(self): | ||
154 | st = os.stat(self.fname) | ||
155 | self.rcssize = st.st_size | ||
156 | self.Sum1Rlog() | ||
157 | if self.totrev != len(self.versions): | ||
158 | raise SkipRcsException('wrong version count') | ||
159 | self.versions.sort() | ||
160 | |||
161 | def Checkout(self,n): | ||
162 | v = self.versions[n] | ||
163 | out = open(self.Verf(n), "w") | ||
164 | cmd = 'co -ko -p%s %s' % (v.vstr, self.fname) | ||
165 | total = 0 | ||
166 | (inf, | ||
167 | stream, | ||
168 | err) = os.popen3(cmd, "r") | ||
169 | inf.close() | ||
170 | buf = stream.read() | ||
171 | while buf: | ||
172 | total = total + len(buf) | ||
173 | out.write(buf) | ||
174 | buf = stream.read() | ||
175 | v.vsize = total | ||
176 | estr = '' | ||
177 | buf = err.read() | ||
178 | while buf: | ||
179 | estr = estr + buf | ||
180 | buf = err.read() | ||
181 | if stream.close(): | ||
182 | raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr)) | ||
183 | out.close() | ||
184 | err.close() | ||
185 | |||
186 | def Vdate(self,n): | ||
187 | return self.versions[n].date | ||
188 | |||
189 | def Vstr(self,n): | ||
190 | return self.versions[n].vstr | ||
191 | |||
192 | def Verf(self,n): | ||
193 | return os.path.join(TMPDIR, 'input.%d' % n) | ||
194 | |||
195 | def PairsByDate(self,runnable): | ||
196 | if self.totrev < 2: | ||
197 | raise NotEnoughVersions() | ||
198 | self.Checkout(0) | ||
199 | ntrials = [] | ||
200 | if self.totrev < 2: | ||
201 | return vtrials | ||
202 | for v in range(0,self.totrev-1): | ||
203 | if v > 1: | ||
204 | os.remove(self.Verf(v-1)) | ||
205 | self.Checkout(v+1) | ||
206 | if os.stat(self.Verf(v)).st_size < MIN_SIZE or \ | ||
207 | os.stat(self.Verf(v+1)).st_size < MIN_SIZE: | ||
208 | continue | ||
209 | |||
210 | result = TimeRun(runnable.Runner(self.Verf(v), | ||
211 | self.Vstr(v), | ||
212 | self.Verf(v+1), | ||
213 | self.Vstr(v+1))) | ||
214 | print 'testing %s %s: ideal %.3f%%: time %.7f: in %u/%u trials' % \ | ||
215 | (os.path.basename(self.fname), | ||
216 | self.Vstr(v+1), | ||
217 | result.r1.ideal, | ||
218 | result.time.mean, | ||
219 | result.trials, | ||
220 | result.reps) | ||
221 | ntrials.append(result) | ||
222 | |||
223 | os.remove(self.Verf(self.totrev-1)) | ||
224 | os.remove(self.Verf(self.totrev-2)) | ||
225 | return ntrials | ||
226 | # | ||
227 | # This class recursively scans a directory for rcsfiles | ||
228 | class RcsFinder: | ||
229 | def __init__(self): | ||
230 | self.subdirs = [] | ||
231 | self.rcsfiles = [] | ||
232 | self.others = [] | ||
233 | self.skipped = [] | ||
234 | |||
235 | def Scan1(self,dir): | ||
236 | dents = os.listdir(dir) | ||
237 | subdirs = [] | ||
238 | rcsfiles = [] | ||
239 | others = [] | ||
240 | for dent in dents: | ||
241 | full = os.path.join(dir, dent) | ||
242 | if os.path.isdir(full): | ||
243 | subdirs.append(full) | ||
244 | elif dent[len(dent)-2:] == ",v": | ||
245 | rcsfiles.append(RcsFile(full)) | ||
246 | else: | ||
247 | others.append(full) | ||
248 | self.subdirs = self.subdirs + subdirs | ||
249 | self.rcsfiles = self.rcsfiles + rcsfiles | ||
250 | self.others = self.others + others | ||
251 | return subdirs | ||
252 | |||
253 | def Crawl(self, dir): | ||
254 | subdirs = [dir] | ||
255 | while subdirs: | ||
256 | s1 = self.Scan1(subdirs[0]) | ||
257 | subdirs = subdirs[1:] + s1 | ||
258 | |||
259 | def Summarize(self): | ||
260 | good = [] | ||
261 | for rf in self.rcsfiles: | ||
262 | try: | ||
263 | rf.Sum1() | ||
264 | if rf.totrev < 2: | ||
265 | raise SkipRcsException('too few versions (< 2)') | ||
266 | except SkipRcsException, e: | ||
267 | #print 'skipping file %s: %s' % (rf.fname, e.reason) | ||
268 | self.skipped.append(rf) | ||
269 | else: | ||
270 | good.append(rf) | ||
271 | self.rcsfiles = good | ||
272 | |||
273 | def PairsByDate(self,runnable): | ||
274 | allvtrials = [] | ||
275 | good = [] | ||
276 | for rf in self.rcsfiles: | ||
277 | print 'testing %s on %s with %d versions' % (runnable.type, rf.fname, rf.totrev) | ||
278 | try: | ||
279 | allvtrials.append(rf.PairsByDate(runnable)) | ||
280 | except SkipRcsException: | ||
281 | print 'file %s has compressed versions: skipping' % (rf.fname) | ||
282 | except NotEnoughVersions: | ||
283 | print 'testing %s on %s: not enough versions' % (runnable.type, rf.fname) | ||
284 | else: | ||
285 | good.append(rf) | ||
286 | self.rcsfiles = good | ||
287 | return allvtrials | ||
288 | # | ||
289 | # | ||
290 | class Bucks: | ||
291 | def __init__(self,low,high): | ||
292 | self.low = low | ||
293 | self.high = high | ||
294 | self.spread = high - low | ||
295 | self.bucks = [] | ||
296 | for i in range(0,HIST_SIZE): | ||
297 | self.bucks.append([low+(self.spread * (i+0.0) / float(HIST_SIZE)), | ||
298 | low+(self.spread * (i+0.5) / float(HIST_SIZE)), | ||
299 | low+(self.spread * (i+1.0) / float(HIST_SIZE)), | ||
300 | 0]) | ||
301 | def Add(self, x): | ||
302 | assert(x>=self.low) | ||
303 | assert(x<self.high) | ||
304 | t = self.bucks[int((x-self.low)/float(self.spread)*HIST_SIZE)] | ||
305 | t[3] = t[3] + 1 | ||
306 | def Print(self, f): | ||
307 | for i in self.bucks: | ||
308 | # gnuplot -persist "plot %s using 2:4 | ||
309 | f.write("%.1f %.1f %.1f %d\n" % (i[0],i[1],i[2],i[3])) | ||
310 | # | ||
311 | # | ||
312 | class TimeRun: | ||
313 | def __init__(self,runnable,set_reps=1,reps=MIN_REPS,max_reps=MAX_REPS,\ | ||
314 | skip_trials=SKIP_TRIALS,min_trials=MIN_TRIALS,max_trials=MAX_TRIALS, \ | ||
315 | min_stddev_pct=MIN_STDDEV_PCT): | ||
316 | |||
317 | min_trials = min(min_trials,max_trials) | ||
318 | self.trials = 0 | ||
319 | self.measured = [] | ||
320 | self.r1 = None | ||
321 | self.reps = reps | ||
322 | while 1: | ||
323 | try: | ||
324 | os.remove(DFILE) | ||
325 | os.remove(RFILE) | ||
326 | except OSError: | ||
327 | pass | ||
328 | |||
329 | start_time = time.time() | ||
330 | start_clock = time.clock() | ||
331 | |||
332 | result = runnable.Run(self.trials, self.reps) | ||
333 | |||
334 | if self.r1 == None: | ||
335 | self.r1 = result | ||
336 | |||
337 | total_clock = (time.clock() - start_clock) | ||
338 | total_time = (time.time() - start_time) | ||
339 | |||
340 | elap_time = max((total_time) / self.reps, 0.000001) | ||
341 | elap_clock = max((total_clock) / self.reps, 0.000001) | ||
342 | |||
343 | #print 'trial: %d' % self.trials | ||
344 | if set_reps and runnable.canrep and total_time < TIME_TOO_SHORT and self.reps < max_reps: | ||
345 | self.reps = max(self.reps+1,int(self.reps * TIME_TOO_SHORT / total_time)) | ||
346 | self.reps = min(self.reps,max_reps) | ||
347 | #print 'continue: need more reps: %d' % self.reps | ||
348 | continue | ||
349 | |||
350 | self.trials = self.trials + 1 | ||
351 | |||
352 | # skip some of the first trials | ||
353 | if self.trials > skip_trials: | ||
354 | self.measured.append((elap_clock,elap_time)) | ||
355 | #print 'measurement total: %.1f ms' % (total_time * 1000.0) | ||
356 | |||
357 | # at least so many | ||
358 | if self.trials < (skip_trials + min_trials): | ||
359 | #print 'continue: need more trials: %d' % self.trials | ||
360 | continue | ||
361 | |||
362 | # compute %variance | ||
363 | done = 0 | ||
364 | if skip_trials + min_trials <= 2: | ||
365 | done = 1 | ||
366 | self.measured = self.measured + self.measured; | ||
367 | |||
368 | self.time = StatList([x[1] for x in self.measured], 'elap time') | ||
369 | sp = float(self.time.s) / float(self.time.mean) | ||
370 | |||
371 | # what if MAX_TRIALS is exceeded? | ||
372 | too_many = (self.trials-skip_trials) >= max_trials | ||
373 | good = (100.0 * sp) < min_stddev_pct | ||
374 | if done or too_many or good: | ||
375 | self.trials = self.trials - skip_trials | ||
376 | if not done and not good: | ||
377 | #print 'too many trials: %d' % self.trials | ||
378 | pass | ||
379 | self.clock = StatList([x[0] for x in self.measured], 'elap clock') | ||
380 | return | ||
381 | # | ||
382 | # | ||
383 | # | ||
384 | def SumList(l): | ||
385 | return reduce(lambda x,y: x+y, l) | ||
386 | # | ||
387 | # returns (total, mean, stddev, q2 (median), | ||
388 | # (q3-q1)/2 ("semi-interquartile range"), max-min (spread)) | ||
389 | class StatList: | ||
390 | def __init__(self,l,desc,hist=0): | ||
391 | cnt = len(l) | ||
392 | assert(cnt > 1) | ||
393 | l.sort() | ||
394 | self.cnt = cnt | ||
395 | self.l = l | ||
396 | self.total = SumList(l) | ||
397 | self.mean = self.total / float(self.cnt) | ||
398 | self.s = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1)) | ||
399 | self.q0 = l[0] | ||
400 | self.q1 = l[int(self.cnt/4.0+0.5)] | ||
401 | self.q2 = l[int(self.cnt/2.0+0.5)] | ||
402 | self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))] | ||
403 | self.q4 = l[self.cnt-1]+1 | ||
404 | self.hf = "./%s.hist" % desc | ||
405 | self.siqr = (self.q3-self.q1)/2.0; | ||
406 | self.spread = (self.q4-self.q0) | ||
407 | self.str = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \ | ||
408 | (desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread) | ||
409 | if hist: | ||
410 | f = open(self.hf, "w") | ||
411 | self.bucks = Bucks(self.q0,self.q4) | ||
412 | for i in l: | ||
413 | self.bucks.Add(i) | ||
414 | self.bucks.Print(f) | ||
415 | f.close() | ||
416 | |||
417 | def RunCommand(args): | ||
418 | #print "run command", args | ||
419 | p = os.spawnvp(os.P_WAIT, args[0], args) | ||
420 | if p != 0: | ||
421 | raise CommandError(args, 'exited %d' % p) | ||
422 | |||
423 | def RunCommandIO(args,infn,outfn): | ||
424 | #print "run command io", args | ||
425 | p = os.fork() | ||
426 | if p == 0: | ||
427 | os.dup2(os.open(infn,os.O_RDONLY),0) | ||
428 | os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1) | ||
429 | os.execvp(args[0], args) | ||
430 | else: | ||
431 | s = os.waitpid(p,0) | ||
432 | o = os.WEXITSTATUS(s[1]) | ||
433 | if not os.WIFEXITED(s[1]) or o != 0: | ||
434 | raise CommandError(args, 'exited %d' % o) | ||
435 | |||
436 | def RunXdelta3(args,kind=FORK): | ||
437 | if 0: # kind == FORK: | ||
438 | RunCommand([XD3CMD] + args) | ||
439 | else: | ||
440 | try: | ||
441 | xdelta3.main(args) | ||
442 | except Exception, e: | ||
443 | raise CommandError(args, "xdelta3.main exception") | ||
444 | |||
445 | class GzipInfo: | ||
446 | def __init__(self,target,delta): | ||
447 | self.tgtsize = os.stat(target).st_size | ||
448 | self.dsize = os.stat(delta).st_size | ||
449 | |||
450 | class Xdelta3Info: | ||
451 | def __init__(self,target,delta): | ||
452 | RunXdelta3(['printhdr', | ||
453 | '-f', | ||
454 | delta, | ||
455 | HFILE]) | ||
456 | o = open(HFILE, "r") | ||
457 | l = o.readline() | ||
458 | self.extcomp = 0 | ||
459 | self.hdrsize = 0 | ||
460 | self.tgtsize = os.stat(target).st_size | ||
461 | self.dsize = os.stat(delta).st_size | ||
462 | if self.tgtsize > 0: | ||
463 | self.ideal = 100.0 * self.dsize / self.tgtsize; | ||
464 | else: | ||
465 | self.ideal = 0.0 | ||
466 | while l: | ||
467 | #print l.strip() | ||
468 | m = RE_HDRSZ.match(l) | ||
469 | if m: | ||
470 | self.hdrsize = int(m.group(1)) | ||
471 | m = RE_EXTCOMP.match(l) | ||
472 | if m: | ||
473 | #print 'EXTCOMP', m.group(0) | ||
474 | self.extcomp = 1 | ||
475 | l = o.readline() | ||
476 | if self.hdrsize == 0: | ||
477 | raise CommandError(cmd, 'no hdrsize') | ||
478 | o.close() | ||
479 | |||
480 | class Xdelta3Pair: | ||
481 | def __init__(self): | ||
482 | self.type = 'xdelta3' | ||
483 | self.decode_args = '-dqf' | ||
484 | self.encode_args = '-eqf' | ||
485 | self.presrc = '-s' | ||
486 | self.canrep = 1 | ||
487 | |||
488 | def Runner(self,old,oldv,new,newv): | ||
489 | self.old = old | ||
490 | self.oldv = oldv | ||
491 | self.new = new | ||
492 | self.newv = newv | ||
493 | return self | ||
494 | |||
495 | def Run(self,trial,reps): | ||
496 | RunXdelta3(['-P', | ||
497 | '%d' % reps, | ||
498 | self.encode_args, | ||
499 | self.presrc, | ||
500 | self.old, | ||
501 | self.new, | ||
502 | DFILE]) | ||
503 | if trial > 0: | ||
504 | return None | ||
505 | self.dinfo = Xdelta3Info(self.new,DFILE) | ||
506 | if self.dinfo.extcomp: | ||
507 | raise SkipRcsException('ext comp') | ||
508 | RunXdelta3([self.decode_args, | ||
509 | self.presrc, | ||
510 | self.old, | ||
511 | DFILE, | ||
512 | RFILE]) | ||
513 | RunCommand(('cmp', | ||
514 | self.new, | ||
515 | RFILE)) | ||
516 | return self.dinfo | ||
517 | |||
518 | def Test(): | ||
519 | rcsf = RcsFinder() | ||
520 | rcsf.Crawl(RCSDIR) | ||
521 | if len(rcsf.rcsfiles) == 0: | ||
522 | sys.exit(1) | ||
523 | rcsf.Summarize() | ||
524 | print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (len(rcsf.rcsfiles), | ||
525 | len(rcsf.subdirs), | ||
526 | len(rcsf.others), | ||
527 | len(rcsf.skipped)) | ||
528 | print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize", 1).str | ||
529 | print StatList([x.totrev for x in rcsf.rcsfiles], "totrev", 1).str | ||
530 | pairs = rcsf.PairsByDate(Xdelta3Pair()) | ||
531 | |||
532 | def Decimals(max): | ||
533 | l = [0] | ||
534 | step = 1 | ||
535 | while 1: | ||
536 | r = range(step, step * 10, step) | ||
537 | l = l + r | ||
538 | if step * 10 >= max: | ||
539 | l.append(step * 10) | ||
540 | break | ||
541 | step = step * 10 | ||
542 | return l | ||
543 | |||
544 | class Xdelta3Run1: | ||
545 | def __init__(self,file,kind,reps=0): | ||
546 | self.file = file | ||
547 | self.reps = reps | ||
548 | self.canrep = 1 | ||
549 | self.kind = kind | ||
550 | def Run(self,trial,reps): | ||
551 | if self.reps: | ||
552 | assert(reps == 1) | ||
553 | reps = self.reps | ||
554 | RunXdelta3(['-P', '%d' % reps, '-efq', self.file, DFILE],kind=self.kind) | ||
555 | if trial > 0: | ||
556 | return None | ||
557 | return Xdelta3Info(self.file,DFILE) | ||
558 | |||
559 | class GzipRun1: | ||
560 | def __init__(self,file): | ||
561 | self.file = file | ||
562 | self.canrep = 0 | ||
563 | def Run(self,trial,reps): | ||
564 | assert(reps == 1) | ||
565 | RunCommandIO(['gzip', '-cf'], self.file, DFILE) | ||
566 | if trial > 0: | ||
567 | return None | ||
568 | return GzipInfo(self.file,DFILE) | ||
569 | |||
570 | def SetFileSize(F,L): | ||
571 | fd = os.open(F, os.O_CREAT | os.O_WRONLY) | ||
572 | os.ftruncate(fd,L) | ||
573 | assert(os.fstat(fd).st_size == L) | ||
574 | os.close(fd) | ||
575 | |||
576 | def ReportSpeed(L,tr,desc): | ||
577 | print '%s 0-run length %u: dsize %u: time %.3f ms: encode %.0f B/sec: in %ux%u trials' % \ | ||
578 | (desc, L, tr.r1.dsize, tr.time.mean * 1000.0, ((L+tr.r1.dsize) / tr.time.mean), tr.trials, tr.reps) | ||
579 | |||
580 | def RunSpeed(): | ||
581 | for L in Decimals(MAX_RUN): | ||
582 | SetFileSize(RUNFILE, L) | ||
583 | trx = TimeRun(Xdelta3Run1(RUNFILE,kind=PYEXT)) | ||
584 | ReportSpeed(L,trx,'xdelta3') | ||
585 | trg = TimeRun(GzipRun1(RUNFILE)) | ||
586 | ReportSpeed(L,trg,'gzip ') | ||
587 | |||
588 | if __name__ == "__main__": | ||
589 | try: | ||
590 | os.mkdir(TMPDIR) | ||
591 | Test() | ||
592 | RunSpeed() | ||
593 | except CommandError: | ||
594 | pass | ||
595 | else: | ||
596 | RunCommand(['rm', '-rf', TMPDIR]) | ||
diff --git a/xdelta3/xdelta3-second.h b/xdelta3/xdelta3-second.h new file mode 100755 index 0000000..89287f0 --- /dev/null +++ b/xdelta3/xdelta3-second.h | |||
@@ -0,0 +1,363 @@ | |||
1 | /* xdelta 3 - delta compression tools and library | ||
2 | * Copyright (C) 2002 and onward. Joshua P. MacDonald | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | #ifndef _XDELTA3_SECOND_H_ | ||
20 | #define _XDELTA3_SECOND_H_ | ||
21 | |||
22 | /****************************************************************************************** | ||
23 | Secondary compression | ||
24 | ******************************************************************************************/ | ||
25 | |||
26 | #define xd3_sec_data(s) ((s)->sec_stream_d) | ||
27 | #define xd3_sec_inst(s) ((s)->sec_stream_i) | ||
28 | #define xd3_sec_addr(s) ((s)->sec_stream_a) | ||
29 | |||
30 | struct _xd3_sec_type | ||
31 | { | ||
32 | int id; | ||
33 | const char *name; | ||
34 | xd3_secondary_flags flags; | ||
35 | |||
36 | /* xd3_sec_stream is opaque to the generic code */ | ||
37 | xd3_sec_stream* (*alloc) (xd3_stream *stream); | ||
38 | void (*destroy) (xd3_stream *stream, | ||
39 | xd3_sec_stream *sec); | ||
40 | void (*init) (xd3_sec_stream *sec); | ||
41 | int (*decode) (xd3_stream *stream, | ||
42 | xd3_sec_stream *sec_stream, | ||
43 | const uint8_t **input, | ||
44 | const uint8_t *input_end, | ||
45 | uint8_t **output, | ||
46 | const uint8_t *output_end); | ||
47 | #if XD3_ENCODER | ||
48 | int (*encode) (xd3_stream *stream, | ||
49 | xd3_sec_stream *sec_stream, | ||
50 | xd3_output *input, | ||
51 | xd3_output *output, | ||
52 | xd3_sec_cfg *cfg); | ||
53 | #endif | ||
54 | }; | ||
55 | |||
56 | #define BIT_STATE_ENCODE_INIT { 0, 1 } | ||
57 | #define BIT_STATE_DECODE_INIT { 0, 0x100 } | ||
58 | |||
59 | typedef struct _bit_state bit_state; | ||
60 | struct _bit_state | ||
61 | { | ||
62 | usize_t cur_byte; | ||
63 | usize_t cur_mask; | ||
64 | }; | ||
65 | |||
66 | static INLINE void xd3_bit_state_encode_init (bit_state *bits) | ||
67 | { | ||
68 | bits->cur_byte = 0; | ||
69 | bits->cur_mask = 1; | ||
70 | } | ||
71 | |||
72 | static INLINE int xd3_decode_bits (xd3_stream *stream, | ||
73 | bit_state *bits, | ||
74 | const uint8_t **input, | ||
75 | const uint8_t *input_max, | ||
76 | usize_t nbits, | ||
77 | usize_t *valuep) | ||
78 | { | ||
79 | usize_t value = 0; | ||
80 | usize_t vmask = 1 << nbits; | ||
81 | |||
82 | if (bits->cur_mask == 0x100) { goto next_byte; } | ||
83 | |||
84 | for (;;) | ||
85 | { | ||
86 | do | ||
87 | { | ||
88 | vmask >>= 1; | ||
89 | |||
90 | if (bits->cur_byte & bits->cur_mask) | ||
91 | { | ||
92 | value |= vmask; | ||
93 | } | ||
94 | |||
95 | IF_DEBUG1 (P(RINT "[dbits] %u", (bits->cur_byte & bits->cur_mask) && 1)); | ||
96 | |||
97 | bits->cur_mask <<= 1; | ||
98 | |||
99 | if (vmask == 1) { goto done; } | ||
100 | } | ||
101 | while (bits->cur_mask != 0x100); | ||
102 | |||
103 | next_byte: | ||
104 | |||
105 | if (*input == input_max) | ||
106 | { | ||
107 | stream->msg = "secondary decoder end of input"; | ||
108 | return EINVAL; | ||
109 | } | ||
110 | |||
111 | bits->cur_byte = *(*input)++; | ||
112 | bits->cur_mask = 1; | ||
113 | } | ||
114 | |||
115 | done: | ||
116 | |||
117 | (*valuep) = value; | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | static INLINE int xd3_decode_bit (xd3_stream *stream, | ||
122 | bit_state *bits, | ||
123 | const uint8_t **input, | ||
124 | const uint8_t *input_max, | ||
125 | usize_t *valuep) | ||
126 | { | ||
127 | if (bits->cur_mask == 0x100) | ||
128 | { | ||
129 | if (*input == input_max) | ||
130 | { | ||
131 | stream->msg = "secondary decoder end of input"; | ||
132 | return EINVAL; | ||
133 | } | ||
134 | |||
135 | bits->cur_byte = *(*input)++; | ||
136 | bits->cur_mask = 1; | ||
137 | } | ||
138 | |||
139 | *valuep = (bits->cur_byte & bits->cur_mask) && 1; | ||
140 | |||
141 | IF_DEBUG1 (P(RINT "[dbit] %u", (bits->cur_byte & bits->cur_mask) && 1)); | ||
142 | |||
143 | bits->cur_mask <<= 1; | ||
144 | |||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | #if REGRESSION_TEST | ||
149 | /* There may be extra bits at the end of secondary decompression, this macro checks for | ||
150 | * non-zero bits. This is overly strict, but helps pass the single-bit-error regression | ||
151 | * test. */ | ||
152 | static int | ||
153 | xd3_test_clean_bits (xd3_stream *stream, bit_state *bits) | ||
154 | { | ||
155 | for (; bits->cur_mask != 0x100; bits->cur_mask <<= 1) | ||
156 | { | ||
157 | if (bits->cur_byte & bits->cur_mask) | ||
158 | { | ||
159 | stream->msg = "secondary decoder garbage"; | ||
160 | return EINVAL; | ||
161 | } | ||
162 | } | ||
163 | |||
164 | return 0; | ||
165 | } | ||
166 | #endif | ||
167 | |||
168 | static xd3_sec_stream* | ||
169 | xd3_get_secondary (xd3_stream *stream, xd3_sec_stream **sec_streamp) | ||
170 | { | ||
171 | xd3_sec_stream *sec_stream; | ||
172 | |||
173 | if ((sec_stream = *sec_streamp) == NULL) | ||
174 | { | ||
175 | if ((*sec_streamp = stream->sec_type->alloc (stream)) == NULL) | ||
176 | { | ||
177 | return NULL; | ||
178 | } | ||
179 | |||
180 | sec_stream = *sec_streamp; | ||
181 | |||
182 | /* If cuumulative stats, init once. */ | ||
183 | stream->sec_type->init (sec_stream); | ||
184 | } | ||
185 | |||
186 | return sec_stream; | ||
187 | } | ||
188 | |||
189 | static int | ||
190 | xd3_decode_secondary (xd3_stream *stream, | ||
191 | xd3_desect *sect, | ||
192 | xd3_sec_stream **sec_streamp) | ||
193 | { | ||
194 | xd3_sec_stream *sec_stream; | ||
195 | uint32_t dec_size; | ||
196 | uint8_t *out_used; | ||
197 | int ret; | ||
198 | |||
199 | if ((sec_stream = xd3_get_secondary (stream, sec_streamp)) == NULL) { return ENOMEM; } | ||
200 | |||
201 | /* Decode the size, allocate the buffer. */ | ||
202 | if ((ret = xd3_read_size (stream, & sect->buf, sect->buf_max, & dec_size)) || | ||
203 | (ret = xd3_decode_allocate (stream, dec_size, & sect->copied2, & sect->alloc2, NULL, NULL))) | ||
204 | { | ||
205 | return ret; | ||
206 | } | ||
207 | |||
208 | out_used = sect->copied2; | ||
209 | |||
210 | if ((ret = stream->sec_type->decode (stream, sec_stream, | ||
211 | & sect->buf, sect->buf_max, | ||
212 | & out_used, out_used + dec_size))) { return ret; } | ||
213 | |||
214 | if (sect->buf != sect->buf_max) | ||
215 | { | ||
216 | stream->msg = "secondary decoder finished with unused input"; | ||
217 | return EINVAL; | ||
218 | } | ||
219 | |||
220 | if (out_used != sect->copied2 + dec_size) | ||
221 | { | ||
222 | stream->msg = "secondary decoder short output"; | ||
223 | return EINVAL; | ||
224 | } | ||
225 | |||
226 | sect->buf = sect->copied2; | ||
227 | sect->buf_max = sect->copied2 + dec_size; | ||
228 | |||
229 | return 0; | ||
230 | } | ||
231 | |||
232 | #if XD3_ENCODER | ||
233 | /* OPT: Should these be inline? */ | ||
234 | static INLINE int xd3_encode_bit (xd3_stream *stream, | ||
235 | xd3_output **output, | ||
236 | bit_state *bits, | ||
237 | int bit) | ||
238 | { | ||
239 | int ret; | ||
240 | |||
241 | if (bit) | ||
242 | { | ||
243 | bits->cur_byte |= bits->cur_mask; | ||
244 | } | ||
245 | |||
246 | IF_DEBUG1 (P(RINT "[ebit] %u", bit && 1)); | ||
247 | |||
248 | /* OPT: Might help to buffer more than 8 bits at once. */ | ||
249 | if (bits->cur_mask == 0x80) | ||
250 | { | ||
251 | if ((ret = xd3_emit_byte (stream, output, bits->cur_byte)) != 0) { return ret; } | ||
252 | |||
253 | bits->cur_mask = 1; | ||
254 | bits->cur_byte = 0; | ||
255 | } | ||
256 | else | ||
257 | { | ||
258 | bits->cur_mask <<= 1; | ||
259 | } | ||
260 | |||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static INLINE int xd3_flush_bits (xd3_stream *stream, | ||
265 | xd3_output **output, | ||
266 | bit_state *bits) | ||
267 | { | ||
268 | return (bits->cur_mask == 1) ? 0 : xd3_emit_byte (stream, output, bits->cur_byte); | ||
269 | } | ||
270 | |||
271 | static INLINE int xd3_encode_bits (xd3_stream *stream, | ||
272 | xd3_output **output, | ||
273 | bit_state *bits, | ||
274 | usize_t nbits, | ||
275 | usize_t value) | ||
276 | { | ||
277 | int ret; | ||
278 | usize_t mask = 1 << nbits; | ||
279 | |||
280 | XD3_ASSERT (nbits > 0); | ||
281 | XD3_ASSERT (nbits < sizeof (usize_t) * 8); | ||
282 | XD3_ASSERT (value < mask); | ||
283 | |||
284 | do | ||
285 | { | ||
286 | mask >>= 1; | ||
287 | |||
288 | if ((ret = xd3_encode_bit (stream, output, bits, value & mask))) { return ret; } | ||
289 | } | ||
290 | while (mask != 1); | ||
291 | |||
292 | return 0; | ||
293 | } | ||
294 | |||
295 | static int | ||
296 | xd3_encode_secondary (xd3_stream *stream, | ||
297 | xd3_output **head, | ||
298 | xd3_output **tail, | ||
299 | xd3_sec_stream **sec_streamp, | ||
300 | xd3_sec_cfg *cfg, | ||
301 | int *did_it) | ||
302 | { | ||
303 | xd3_sec_stream *sec_stream; | ||
304 | xd3_output *tmp_head; | ||
305 | xd3_output *tmp_tail; | ||
306 | |||
307 | usize_t comp_size; | ||
308 | usize_t orig_size; | ||
309 | |||
310 | int ret; | ||
311 | |||
312 | orig_size = xd3_sizeof_output (*head); | ||
313 | |||
314 | if (orig_size < SECONDARY_MIN_INPUT) { return 0; } | ||
315 | |||
316 | if ((sec_stream = xd3_get_secondary (stream, sec_streamp)) == NULL) { return ENOMEM; } | ||
317 | |||
318 | tmp_head = xd3_alloc_output (stream, NULL); | ||
319 | |||
320 | /* Encode the size, encode the data. @@ Encoding the size makes it simpler, but is a | ||
321 | * little gross. Should not need the entire section in contiguous memory, but it is | ||
322 | * much easier this way. */ | ||
323 | if ((ret = xd3_emit_size (stream, & tmp_head, orig_size)) || | ||
324 | (ret = stream->sec_type->encode (stream, sec_stream, *head, tmp_head, cfg))) { goto getout; } | ||
325 | |||
326 | /* If the secondary compressor determines its no good, it returns XD3_NOSECOND. */ | ||
327 | |||
328 | /* Setup tmp_tail, comp_size */ | ||
329 | tmp_tail = tmp_head; | ||
330 | comp_size = tmp_head->next; | ||
331 | |||
332 | while (tmp_tail->next_page != NULL) | ||
333 | { | ||
334 | tmp_tail = tmp_tail->next_page; | ||
335 | comp_size += tmp_tail->next; | ||
336 | } | ||
337 | |||
338 | XD3_ASSERT (comp_size == xd3_sizeof_output (tmp_head)); | ||
339 | XD3_ASSERT (tmp_tail != NULL); | ||
340 | |||
341 | if (comp_size < (orig_size - SECONDARY_MIN_SAVINGS)) | ||
342 | { | ||
343 | IF_DEBUG1(P(RINT "secondary saved %u bytes: %u -> %u (%0.2f%%)\n", | ||
344 | orig_size - comp_size, orig_size, comp_size, | ||
345 | (double) comp_size / (double) orig_size)); | ||
346 | |||
347 | xd3_free_output (stream, *head); | ||
348 | |||
349 | *head = tmp_head; | ||
350 | *tail = tmp_tail; | ||
351 | *did_it = 1; | ||
352 | } | ||
353 | else | ||
354 | { | ||
355 | getout: | ||
356 | if (ret == XD3_NOSECOND) { ret = 0; } | ||
357 | xd3_free_output (stream, tmp_head); | ||
358 | } | ||
359 | |||
360 | return ret; | ||
361 | } | ||
362 | #endif /* XD3_ENCODER */ | ||
363 | #endif /* _XDELTA3_SECOND_H_ */ | ||
diff --git a/xdelta3/xdelta3-test.h b/xdelta3/xdelta3-test.h new file mode 100755 index 0000000..198d440 --- /dev/null +++ b/xdelta3/xdelta3-test.h | |||
@@ -0,0 +1,2229 @@ | |||
1 | /* xdelta 3 - delta compression tools and library | ||
2 | * Copyright (C) 2001, 2003, 2004, 2005, 2006. Joshua P. MacDonald | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | #include <math.h> | ||
20 | #include <sys/wait.h> | ||
21 | |||
22 | #define MSG_IS(x) (stream->msg != NULL && strcmp ((x), stream->msg) == 0) | ||
23 | |||
24 | static const usize_t TWO_MEGS_AND_DELTA = (2 << 20) + (1 << 10); | ||
25 | static const usize_t ADDR_CACHE_ROUNDS = 10000; | ||
26 | |||
27 | static const usize_t TEST_FILE_MEAN = 16384; | ||
28 | static const double TEST_ADD_MEAN = 16; | ||
29 | static const double TEST_ADD_MAX = 256; | ||
30 | static const double TEST_ADD_RATIO = 0.1; | ||
31 | static const double TEST_EPSILON = 0.5; | ||
32 | |||
33 | static char TEST_TARGET_FILE[32]; | ||
34 | static char TEST_SOURCE_FILE[32]; | ||
35 | static char TEST_DELTA_FILE[32]; | ||
36 | static char TEST_RECON_FILE[32]; | ||
37 | static char TEST_RECON2_FILE[32]; | ||
38 | static char TEST_COPY_FILE[32]; | ||
39 | |||
40 | static int TESTBUFSIZE = 1024 * 16; | ||
41 | |||
42 | static int test_exponential_dist (usize_t mean, usize_t max); | ||
43 | |||
44 | /* TODO | ||
45 | * | ||
46 | * 1. Test state changes: that config is called before open, open called before | ||
47 | * encode/decode..., close incomplete works, invalid options, consume_output always | ||
48 | * called, no mixing of encode/decode, etc. | ||
49 | * | ||
50 | * 2. Test window selection, window alignment, 1.5 pass alg vs. greedy | ||
51 | */ | ||
52 | |||
53 | /****************************************************************************************** | ||
54 | TEST HELPERS | ||
55 | ******************************************************************************************/ | ||
56 | |||
57 | static void DOT (void) { P(RINT "."); } | ||
58 | static int do_cmd (xd3_stream *stream, const char *buf) | ||
59 | { | ||
60 | int ret; | ||
61 | if ((ret = system (buf)) != 0) | ||
62 | { | ||
63 | if (WIFEXITED (ret)) | ||
64 | { | ||
65 | stream->msg = "command exited non-zero"; | ||
66 | } | ||
67 | else | ||
68 | { | ||
69 | stream->msg = "abnormal command termination"; | ||
70 | } | ||
71 | return EINVAL; | ||
72 | } | ||
73 | DOT (); | ||
74 | return 0; | ||
75 | } | ||
76 | static int do_fail (xd3_stream *stream, const char *buf) | ||
77 | { | ||
78 | int ret; | ||
79 | ret = system (buf); | ||
80 | if (! WIFEXITED (ret) || WEXITSTATUS (ret) != 1) | ||
81 | { | ||
82 | stream->msg = "command should have not succeeded"; | ||
83 | P(RINT "command was %s", buf); | ||
84 | return EINVAL; | ||
85 | } | ||
86 | DOT (); | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | static int | ||
91 | test_exponential_dist (usize_t mean, usize_t max) | ||
92 | { | ||
93 | double mean_d = mean; | ||
94 | double erand = log (1.0 / drand48 ()); | ||
95 | usize_t x = (usize_t) (mean_d * erand + 0.5); | ||
96 | |||
97 | return min (x, max); | ||
98 | } | ||
99 | |||
100 | /* Test that the exponential distribution actually produces its mean. */ | ||
101 | static int | ||
102 | test_random_numbers (xd3_stream *stream, int ignore) | ||
103 | { | ||
104 | int i; | ||
105 | usize_t sum = 0; | ||
106 | usize_t mean = 50; | ||
107 | usize_t n_rounds = 10000; | ||
108 | double average, error; | ||
109 | double allowed_error = 1.0; | ||
110 | |||
111 | for (i = 0; i < n_rounds; i += 1) | ||
112 | { | ||
113 | sum += test_exponential_dist (mean, USIZE_T_MAX); | ||
114 | } | ||
115 | |||
116 | average = (double) sum / (double) n_rounds; | ||
117 | error = average - (double) mean; | ||
118 | |||
119 | if (error < allowed_error && error > -allowed_error) | ||
120 | { | ||
121 | /*P(RINT "error is %f\n", error);*/ | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | stream->msg = "random distribution looks broken"; | ||
126 | return EINVAL; | ||
127 | } | ||
128 | |||
129 | static int | ||
130 | test_setup (void) | ||
131 | { | ||
132 | static int x = 0; | ||
133 | x++; | ||
134 | //P(RINT "test setup: %d", x); | ||
135 | sprintf (TEST_TARGET_FILE, "/tmp/xdtest.target.%d", x); | ||
136 | sprintf (TEST_SOURCE_FILE, "/tmp/xdtest.source.%d", x); | ||
137 | sprintf (TEST_DELTA_FILE, "/tmp/xdtest.delta.%d", x); | ||
138 | sprintf (TEST_RECON_FILE, "/tmp/xdtest.recon.%d", x); | ||
139 | sprintf (TEST_RECON2_FILE, "/tmp/xdtest.recon2.%d", x); | ||
140 | sprintf (TEST_COPY_FILE, "/tmp/xdtest.copy.%d", x); | ||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | static void | ||
145 | test_unlink (char* file) | ||
146 | { | ||
147 | while (unlink (file) != 0) | ||
148 | { | ||
149 | if (errno == ENOENT) | ||
150 | { | ||
151 | break; | ||
152 | } | ||
153 | char buf[TESTBUFSIZE]; | ||
154 | sprintf (buf, "rm -f %s", file); | ||
155 | system (buf); | ||
156 | } | ||
157 | } | ||
158 | |||
159 | static void | ||
160 | test_cleanup (void) | ||
161 | { | ||
162 | static int x = 0; | ||
163 | x++; | ||
164 | //P(RINT "test cleanup: %d", x); | ||
165 | test_unlink (TEST_TARGET_FILE); | ||
166 | test_unlink (TEST_SOURCE_FILE); | ||
167 | test_unlink (TEST_DELTA_FILE); | ||
168 | test_unlink (TEST_RECON_FILE); | ||
169 | test_unlink (TEST_RECON2_FILE); | ||
170 | test_unlink (TEST_COPY_FILE); | ||
171 | } | ||
172 | |||
173 | static int | ||
174 | test_make_inputs (xd3_stream *stream, xoff_t *ss_out, xoff_t *ts_out) | ||
175 | { | ||
176 | usize_t ts = (lrand48 () % TEST_FILE_MEAN) + TEST_FILE_MEAN; | ||
177 | usize_t ss = (lrand48 () % TEST_FILE_MEAN) + TEST_FILE_MEAN; | ||
178 | uint8_t *buf = malloc (ts + ss), *sbuf = buf /*, *tbuf = buf + ss*/; | ||
179 | usize_t sadd = 0, sadd_max = ss * TEST_ADD_RATIO; | ||
180 | FILE *tf /*, *sf*/; | ||
181 | usize_t i, j; | ||
182 | int ret; | ||
183 | |||
184 | if (buf == NULL) { return ENOMEM; } | ||
185 | |||
186 | if ((tf = fopen (TEST_TARGET_FILE, "w")) == NULL) | ||
187 | { | ||
188 | stream->msg = "write failed"; | ||
189 | ret = get_errno (); | ||
190 | goto failure; | ||
191 | } | ||
192 | |||
193 | /* Then modify the data to produce copies, everything not copied is an add. The | ||
194 | * following logic produces the TEST_ADD_RATIO. The variable SADD contains the number | ||
195 | * of adds so far, which should not exceed SADD_MAX. */ | ||
196 | for (i = 0; i < ss; ) | ||
197 | { | ||
198 | usize_t left = ss - i; | ||
199 | usize_t next = test_exponential_dist (TEST_ADD_MEAN, TEST_ADD_MAX); | ||
200 | usize_t add_left = sadd_max - sadd; | ||
201 | double add_prob = (left == 0) ? 0 : (add_left / left); | ||
202 | |||
203 | next = min (left, next); | ||
204 | |||
205 | if (i > 0 && (next > add_left || drand48 () >= add_prob)) | ||
206 | { | ||
207 | /* Copy */ | ||
208 | usize_t offset = lrand48 () % i; | ||
209 | |||
210 | for (j = 0; j < next; j += 1) | ||
211 | { | ||
212 | sbuf[i++] = sbuf[offset + j]; | ||
213 | } | ||
214 | } | ||
215 | else | ||
216 | { | ||
217 | /* Add */ | ||
218 | for (j = 0; j < next; j += 1) | ||
219 | { | ||
220 | sbuf[i++] = lrand48 (); | ||
221 | } | ||
222 | } | ||
223 | } | ||
224 | |||
225 | if ((fwrite (sbuf, 1, ss, tf) != ss)) | ||
226 | { | ||
227 | stream->msg = "write failed"; | ||
228 | ret = get_errno (); | ||
229 | goto failure; | ||
230 | } | ||
231 | |||
232 | if ((ret = fclose (tf)) /* || (ret = fclose (sf))*/) | ||
233 | { | ||
234 | stream->msg = "close failed"; | ||
235 | ret = get_errno (); | ||
236 | goto failure; | ||
237 | } | ||
238 | |||
239 | if (ts_out) { (*ts_out) = ts; } | ||
240 | if (ss_out) { (*ss_out) = ss; } | ||
241 | |||
242 | failure: | ||
243 | free (buf); | ||
244 | return ret; | ||
245 | } | ||
246 | |||
247 | static int | ||
248 | compare_files (xd3_stream *stream, const char* tgt, const char *rec) | ||
249 | { | ||
250 | FILE *orig, *recons; | ||
251 | uint8_t obuf[TESTBUFSIZE], rbuf[TESTBUFSIZE]; | ||
252 | int offset = 0; | ||
253 | int i; | ||
254 | int oc, rc; | ||
255 | |||
256 | if ((orig = fopen (tgt, "r")) == NULL || | ||
257 | (recons = fopen (rec, "r")) == NULL) | ||
258 | { | ||
259 | stream->msg = "read failed"; | ||
260 | return get_errno (); | ||
261 | } | ||
262 | |||
263 | for (;;) | ||
264 | { | ||
265 | oc = fread (obuf, 1, TESTBUFSIZE, orig); | ||
266 | rc = fread (rbuf, 1, TESTBUFSIZE, recons); | ||
267 | |||
268 | if (oc < 0 || rc < 0) | ||
269 | { | ||
270 | stream->msg = "read failed"; | ||
271 | return get_errno (); | ||
272 | } | ||
273 | |||
274 | if (oc != rc) | ||
275 | { | ||
276 | stream->msg = "compare files: different length"; | ||
277 | return EINVAL; | ||
278 | } | ||
279 | |||
280 | if (oc == 0) | ||
281 | { | ||
282 | break; | ||
283 | } | ||
284 | |||
285 | for (i = 0; i < oc; i += 1) | ||
286 | { | ||
287 | if (obuf[i] != rbuf[i]) | ||
288 | { | ||
289 | stream->msg = "compare files: different values"; | ||
290 | return EINVAL; | ||
291 | } | ||
292 | } | ||
293 | |||
294 | offset += oc; | ||
295 | } | ||
296 | |||
297 | fclose (orig); | ||
298 | fclose (recons); | ||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | static int | ||
303 | test_save_copy (const char *origname) | ||
304 | { | ||
305 | char buf[TESTBUFSIZE]; | ||
306 | int ret; | ||
307 | |||
308 | sprintf (buf, "cp -f %s %s", origname, TEST_COPY_FILE); | ||
309 | |||
310 | if ((ret = system (buf)) != 0) | ||
311 | { | ||
312 | return EINVAL; | ||
313 | } | ||
314 | |||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | static int | ||
319 | test_file_size (const char* file, xoff_t *size) | ||
320 | { | ||
321 | struct stat sbuf; | ||
322 | int ret; | ||
323 | |||
324 | if (stat (file, & sbuf) < 0) | ||
325 | { | ||
326 | ret = get_errno (); | ||
327 | P(RINT "xdelta3: stat failed: %s: %s\n", file, strerror (ret)); | ||
328 | return ret; | ||
329 | } | ||
330 | |||
331 | if (! S_ISREG (sbuf.st_mode)) | ||
332 | { | ||
333 | ret = EINVAL; | ||
334 | P(RINT "xdelta3: not a regular file: %s: %s\n", file, strerror (ret)); | ||
335 | return ret; | ||
336 | } | ||
337 | |||
338 | (*size) = sbuf.st_size; | ||
339 | return 0; | ||
340 | } | ||
341 | |||
342 | /****************************************************************************************** | ||
343 | READ OFFSET | ||
344 | ******************************************************************************************/ | ||
345 | |||
346 | /* Common test for read_integer errors: encodes a 64-bit value and then attempts to read | ||
347 | * as a 32-bit value. If TRUNC is non-zero, attempts to get errors by shortening the | ||
348 | * input, otherwise it should overflow. Expects EINVAL and MSG. */ | ||
349 | static int | ||
350 | test_read_integer_error (xd3_stream *stream, int trunto, const char *msg) | ||
351 | { | ||
352 | uint64_t eval = (uint64_t) UINT32_MAX + 1ULL; | ||
353 | uint32_t rval; | ||
354 | xd3_output *buf = NULL; | ||
355 | const uint8_t *max; | ||
356 | const uint8_t *inp; | ||
357 | int ret; | ||
358 | |||
359 | buf = xd3_alloc_output (stream, buf); | ||
360 | |||
361 | if ((ret = xd3_emit_uint64_t (stream, & buf, eval))) | ||
362 | { | ||
363 | goto fail; | ||
364 | } | ||
365 | |||
366 | again: | ||
367 | |||
368 | inp = buf->base; | ||
369 | max = buf->base + buf->next - trunto; | ||
370 | |||
371 | if ((ret = xd3_read_uint32_t (stream, & inp, max, & rval)) != EINVAL || !MSG_IS (msg)) | ||
372 | { | ||
373 | ret = EINVAL; | ||
374 | } | ||
375 | else if (trunto && trunto < buf->next) | ||
376 | { | ||
377 | trunto += 1; | ||
378 | goto again; | ||
379 | } | ||
380 | else | ||
381 | { | ||
382 | ret = 0; | ||
383 | } | ||
384 | |||
385 | fail: | ||
386 | xd3_free_output (stream, buf); | ||
387 | return ret; | ||
388 | } | ||
389 | |||
390 | /* Test integer overflow using the above routine. */ | ||
391 | static int | ||
392 | test_decode_integer_overflow (xd3_stream *stream, int unused) | ||
393 | { | ||
394 | return test_read_integer_error (stream, 0, "overflow in read_intger"); | ||
395 | } | ||
396 | |||
397 | /* Test integer EOI using the above routine. */ | ||
398 | static int | ||
399 | test_decode_integer_end_of_input (xd3_stream *stream, int unused) | ||
400 | { | ||
401 | return test_read_integer_error (stream, 1, "end-of-input in read_integer"); | ||
402 | } | ||
403 | |||
404 | /* Test that emit_integer/decode_integer/sizeof_integer/read_integer work on correct | ||
405 | * inputs. Tests powers of (2^7), plus or minus, up to the maximum value. */ | ||
406 | #define TEST_ENCODE_DECODE_INTEGER(TYPE,ONE,MAX) \ | ||
407 | xd3_output *rbuf = NULL; \ | ||
408 | xd3_output *dbuf = NULL; \ | ||
409 | TYPE values[64]; \ | ||
410 | int nvalues = 0; \ | ||
411 | int i, ret = 0; \ | ||
412 | \ | ||
413 | for (i = 0; i < (sizeof (TYPE) * 8); i += 7) \ | ||
414 | { \ | ||
415 | values[nvalues++] = (ONE << i) - ONE; \ | ||
416 | values[nvalues++] = (ONE << i); \ | ||
417 | values[nvalues++] = (ONE << i) + ONE; \ | ||
418 | } \ | ||
419 | \ | ||
420 | values[nvalues++] = MAX-ONE; \ | ||
421 | values[nvalues++] = MAX; \ | ||
422 | \ | ||
423 | rbuf = xd3_alloc_output (stream, rbuf); \ | ||
424 | dbuf = xd3_alloc_output (stream, dbuf); \ | ||
425 | \ | ||
426 | for (i = 0; i < nvalues; i += 1) \ | ||
427 | { \ | ||
428 | const uint8_t *max; \ | ||
429 | const uint8_t *inp; \ | ||
430 | TYPE val; \ | ||
431 | \ | ||
432 | DOT (); \ | ||
433 | rbuf->next = 0; \ | ||
434 | \ | ||
435 | if ((ret = xd3_emit_ ## TYPE (stream, & rbuf, values[i])) || \ | ||
436 | (ret = xd3_emit_ ## TYPE (stream, & dbuf, values[i]))) \ | ||
437 | { \ | ||
438 | goto fail; \ | ||
439 | } \ | ||
440 | \ | ||
441 | inp = rbuf->base; \ | ||
442 | max = rbuf->base + rbuf->next; \ | ||
443 | \ | ||
444 | if (rbuf->next != xd3_sizeof_ ## TYPE (values[i])) \ | ||
445 | { \ | ||
446 | ret = EINVAL; \ | ||
447 | goto fail; \ | ||
448 | } \ | ||
449 | \ | ||
450 | if ((ret = xd3_read_ ## TYPE (stream, & inp, max, & val))) \ | ||
451 | { \ | ||
452 | goto fail; \ | ||
453 | } \ | ||
454 | \ | ||
455 | if (val != values[i]) \ | ||
456 | { \ | ||
457 | ret = EINVAL; \ | ||
458 | goto fail; \ | ||
459 | } \ | ||
460 | \ | ||
461 | DOT (); \ | ||
462 | } \ | ||
463 | \ | ||
464 | stream->next_in = dbuf->base; \ | ||
465 | stream->avail_in = dbuf->next; \ | ||
466 | \ | ||
467 | for (i = 0; i < nvalues; i += 1) \ | ||
468 | { \ | ||
469 | TYPE val; \ | ||
470 | \ | ||
471 | if ((ret = xd3_decode_ ## TYPE (stream, & val))) \ | ||
472 | { \ | ||
473 | goto fail; \ | ||
474 | } \ | ||
475 | \ | ||
476 | if (val != values[i]) \ | ||
477 | { \ | ||
478 | ret = EINVAL; \ | ||
479 | goto fail; \ | ||
480 | } \ | ||
481 | } \ | ||
482 | \ | ||
483 | if (stream->avail_in != 0) \ | ||
484 | { \ | ||
485 | ret = EINVAL; \ | ||
486 | goto fail; \ | ||
487 | } \ | ||
488 | \ | ||
489 | fail: \ | ||
490 | xd3_free_output (stream, rbuf); \ | ||
491 | xd3_free_output (stream, dbuf); \ | ||
492 | \ | ||
493 | return ret | ||
494 | |||
495 | static int | ||
496 | test_encode_decode_uint32_t (xd3_stream *stream, int unused) | ||
497 | { | ||
498 | TEST_ENCODE_DECODE_INTEGER(uint32_t,1U,UINT32_MAX); | ||
499 | } | ||
500 | |||
501 | static int | ||
502 | test_encode_decode_uint64_t (xd3_stream *stream, int unused) | ||
503 | { | ||
504 | TEST_ENCODE_DECODE_INTEGER(uint64_t,1ULL,UINT64_MAX); | ||
505 | } | ||
506 | |||
507 | static int | ||
508 | test_usize_t_overflow (xd3_stream *stream, int unused) | ||
509 | { | ||
510 | if (USIZE_T_OVERFLOW (0, 0)) { goto fail; } | ||
511 | if (USIZE_T_OVERFLOW (USIZE_T_MAX, 0)) { goto fail; } | ||
512 | if (USIZE_T_OVERFLOW (0, USIZE_T_MAX)) { goto fail; } | ||
513 | if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, 0)) { goto fail; } | ||
514 | if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, USIZE_T_MAX / 2)) { goto fail; } | ||
515 | if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, USIZE_T_MAX / 2 + 1)) { goto fail; } | ||
516 | |||
517 | if (! USIZE_T_OVERFLOW (USIZE_T_MAX, 1)) { goto fail; } | ||
518 | if (! USIZE_T_OVERFLOW (1, USIZE_T_MAX)) { goto fail; } | ||
519 | if (! USIZE_T_OVERFLOW (USIZE_T_MAX / 2 + 1, USIZE_T_MAX / 2 + 1)) { goto fail; } | ||
520 | |||
521 | return 0; | ||
522 | |||
523 | fail: | ||
524 | stream->msg = "incorrect overflow computation"; | ||
525 | return EINVAL; | ||
526 | } | ||
527 | |||
528 | /****************************************************************************************** | ||
529 | Address cache | ||
530 | ******************************************************************************************/ | ||
531 | |||
532 | static int | ||
533 | test_address_cache (xd3_stream *stream, int unused) | ||
534 | { | ||
535 | int ret, i; | ||
536 | usize_t offset; | ||
537 | usize_t *addrs; | ||
538 | uint8_t *big_buf, *buf_max; | ||
539 | const uint8_t *buf; | ||
540 | xd3_output *outp; | ||
541 | uint8_t *modes; | ||
542 | int mode_counts[16]; | ||
543 | |||
544 | stream->acache.s_near = stream->code_table_desc->near_modes; | ||
545 | stream->acache.s_same = stream->code_table_desc->same_modes; | ||
546 | |||
547 | if ((ret = xd3_encode_init (stream))) { return ret; } | ||
548 | |||
549 | addrs = xd3_alloc (stream, sizeof (usize_t), ADDR_CACHE_ROUNDS); | ||
550 | modes = xd3_alloc (stream, sizeof (uint8_t), ADDR_CACHE_ROUNDS); | ||
551 | |||
552 | memset (mode_counts, 0, sizeof (mode_counts)); | ||
553 | memset (modes, 0, ADDR_CACHE_ROUNDS); | ||
554 | |||
555 | addrs[0] = 0; | ||
556 | |||
557 | srand48 (0x9f73f7fc); | ||
558 | |||
559 | /* First pass: encode addresses */ | ||
560 | xd3_init_cache (& stream->acache); | ||
561 | |||
562 | for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1) | ||
563 | { | ||
564 | double p; | ||
565 | usize_t addr; | ||
566 | usize_t prev_i; | ||
567 | usize_t nearby; | ||
568 | |||
569 | p = drand48 (); | ||
570 | prev_i = lrand48 () % offset; | ||
571 | nearby = (lrand48 () % 256) % offset, 1; | ||
572 | nearby = max (1U, nearby); | ||
573 | |||
574 | if (p < 0.1) { addr = addrs[offset-nearby]; } | ||
575 | else if (p < 0.4) { addr = min (addrs[prev_i] + nearby, offset-1); } | ||
576 | else { addr = prev_i; } | ||
577 | |||
578 | if ((ret = xd3_encode_address (stream, addr, offset, & modes[offset]))) { return ret; } | ||
579 | |||
580 | addrs[offset] = addr; | ||
581 | mode_counts[modes[offset]] += 1; | ||
582 | } | ||
583 | |||
584 | /* Copy addresses into a contiguous buffer. */ | ||
585 | big_buf = xd3_alloc (stream, xd3_sizeof_output (ADDR_HEAD (stream)), 1); | ||
586 | |||
587 | for (offset = 0, outp = ADDR_HEAD (stream); outp != NULL; offset += outp->next, outp = outp->next_page) | ||
588 | { | ||
589 | memcpy (big_buf + offset, outp->base, outp->next); | ||
590 | } | ||
591 | |||
592 | buf_max = big_buf + offset; | ||
593 | buf = big_buf; | ||
594 | |||
595 | /* Second pass: decode addresses */ | ||
596 | xd3_init_cache (& stream->acache); | ||
597 | |||
598 | for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1) | ||
599 | { | ||
600 | usize_t addr; | ||
601 | |||
602 | if ((ret = xd3_decode_address (stream, offset, modes[offset], & buf, buf_max, & addr))) { return ret; } | ||
603 | |||
604 | if (addr != addrs[offset]) | ||
605 | { | ||
606 | stream->msg = "incorrect decoded address"; | ||
607 | return EINVAL; | ||
608 | } | ||
609 | } | ||
610 | |||
611 | /* Check that every byte, mode was used. */ | ||
612 | if (buf != buf_max) | ||
613 | { | ||
614 | stream->msg = "address bytes not used"; | ||
615 | return EINVAL; | ||
616 | } | ||
617 | |||
618 | for (i = 0; i < (2 + stream->acache.s_same + stream->acache.s_near); i += 1) | ||
619 | { | ||
620 | if (mode_counts[i] == 0) | ||
621 | { | ||
622 | stream->msg = "address mode not used"; | ||
623 | return EINVAL; | ||
624 | } | ||
625 | } | ||
626 | |||
627 | xd3_free (stream, modes); | ||
628 | xd3_free (stream, addrs); | ||
629 | xd3_free (stream, big_buf); | ||
630 | |||
631 | return 0; | ||
632 | } | ||
633 | |||
634 | /****************************************************************************************** | ||
635 | Encode and decode with single bit error | ||
636 | ******************************************************************************************/ | ||
637 | |||
638 | /* It compresses from 256 to around 185 bytes. | ||
639 | * Avoids matching addresses that are a single-bit difference. | ||
640 | * Avoids matching address 0. */ | ||
641 | static const uint8_t test_text[] = | ||
642 | "this is a story\n" | ||
643 | "abouttttttttttt\n" | ||
644 | "- his is a stor\n" | ||
645 | "- about nothing " | ||
646 | " all. boutique -" | ||
647 | "his story is a -" | ||
648 | "about " | ||
649 | "what happens all" | ||
650 | " the time what -" | ||
651 | "am I ttttttt the" | ||
652 | " person said, so" | ||
653 | " what, per son -" | ||
654 | " gory story is -" | ||
655 | " about nothing -" | ||
656 | "tttttt to test -" | ||
657 | "his sto nothing"; | ||
658 | |||
659 | static const uint8_t test_apphead[] = "header test"; | ||
660 | |||
661 | static int | ||
662 | test_compress_text (xd3_stream *stream, | ||
663 | uint8_t *encoded, | ||
664 | usize_t *encoded_size) | ||
665 | { | ||
666 | int ret; | ||
667 | xd3_config cfg; | ||
668 | int flags = stream->flags; | ||
669 | |||
670 | stream->flags |= XD3_FLUSH; | ||
671 | |||
672 | (*encoded_size) = 0; | ||
673 | |||
674 | xd3_set_appheader (stream, test_apphead, sizeof (test_apphead)); | ||
675 | |||
676 | if ((ret = xd3_encode_completely (stream, test_text, sizeof (test_text), | ||
677 | encoded, encoded_size, 4*sizeof (test_text)))) { goto fail; } | ||
678 | |||
679 | if ((ret = xd3_close_stream (stream))) { goto fail; } | ||
680 | |||
681 | fail: | ||
682 | xd3_free_stream (stream); | ||
683 | xd3_init_config (& cfg, flags); | ||
684 | xd3_config_stream (stream, & cfg); | ||
685 | return ret; | ||
686 | } | ||
687 | |||
688 | static int | ||
689 | test_decompress_text (xd3_stream *stream, uint8_t *enc, usize_t enc_size, usize_t test_desize) | ||
690 | { | ||
691 | xd3_config cfg; | ||
692 | char decoded[sizeof (test_text)]; | ||
693 | uint8_t *apphead; | ||
694 | usize_t apphead_size; | ||
695 | usize_t decoded_size; | ||
696 | const char *msg; | ||
697 | int ret; | ||
698 | usize_t pos = 0; | ||
699 | int flags = stream->flags; | ||
700 | usize_t take; | ||
701 | |||
702 | input: | ||
703 | /* Test decoding test_desize input bytes at a time */ | ||
704 | take = min (enc_size - pos, test_desize); | ||
705 | XD3_ASSERT (take > 0); | ||
706 | |||
707 | xd3_avail_input (stream, enc + pos, take); | ||
708 | again: | ||
709 | ret = xd3_decode_input (stream); | ||
710 | |||
711 | pos += take; | ||
712 | take = 0; | ||
713 | |||
714 | switch (ret) | ||
715 | { | ||
716 | case XD3_OUTPUT: | ||
717 | break; | ||
718 | case XD3_WINSTART: | ||
719 | case XD3_GOTHEADER: | ||
720 | goto again; | ||
721 | case XD3_INPUT: | ||
722 | if (pos < enc_size) { goto input; } | ||
723 | /* else fallthrough */ | ||
724 | case XD3_WINFINISH: | ||
725 | default: | ||
726 | goto fail; | ||
727 | } | ||
728 | |||
729 | XD3_ASSERT (ret == XD3_OUTPUT); | ||
730 | XD3_ASSERT (pos == enc_size); | ||
731 | |||
732 | if (stream->avail_out != sizeof (test_text)) | ||
733 | { | ||
734 | stream->msg = "incorrect output size"; | ||
735 | ret = EINVAL; | ||
736 | goto fail; | ||
737 | } | ||
738 | |||
739 | decoded_size = stream->avail_out; | ||
740 | memcpy (decoded, stream->next_out, stream->avail_out); | ||
741 | |||
742 | xd3_consume_output (stream); | ||
743 | |||
744 | if ((ret = xd3_get_appheader (stream, & apphead, & apphead_size))) { goto fail; } | ||
745 | |||
746 | if (apphead_size != sizeof (test_apphead) || memcmp (apphead, test_apphead, sizeof (test_apphead)) != 0) | ||
747 | { | ||
748 | stream->msg = "incorrect appheader"; | ||
749 | ret = EINVAL; | ||
750 | goto fail; | ||
751 | } | ||
752 | |||
753 | if ((ret = xd3_decode_input (stream)) != XD3_WINFINISH || | ||
754 | (ret = xd3_close_stream (stream)) != 0) | ||
755 | { | ||
756 | goto fail; | ||
757 | } | ||
758 | |||
759 | if (decoded_size != sizeof (test_text) || memcmp (decoded, test_text, sizeof (test_text)) != 0) | ||
760 | { | ||
761 | stream->msg = "incorrect output text"; | ||
762 | ret = EIO; | ||
763 | } | ||
764 | |||
765 | fail: | ||
766 | msg = stream->msg; | ||
767 | xd3_free_stream (stream); | ||
768 | xd3_init_config (& cfg, flags); | ||
769 | xd3_config_stream (stream, & cfg); | ||
770 | stream->msg = msg; | ||
771 | |||
772 | return ret; | ||
773 | } | ||
774 | |||
775 | static int | ||
776 | test_decompress_single_bit_error (xd3_stream *stream, int expected_non_failures) | ||
777 | { | ||
778 | int ret; | ||
779 | int i; | ||
780 | uint8_t encoded[4*sizeof (test_text)]; /* make room for alt code table */ | ||
781 | usize_t encoded_size; | ||
782 | int non_failures = 0; | ||
783 | int cksum = (stream->flags & XD3_ADLER32) != 0; | ||
784 | |||
785 | #if 1 | ||
786 | #define TEST_FAILURES() | ||
787 | #else | ||
788 | /* For checking non-failure cases by hand, enable this macro and run xdelta printdelta | ||
789 | * with print_cpymode enabled. Every non-failure should change a copy address mode, | ||
790 | * which doesn't cause a failure because the address cache starts out with all zeros. | ||
791 | |||
792 | ./xdelta3 test | ||
793 | for i in test_text.xz.*; do ./xdelta3 printdelta $i > $i.out; diff $i.out test_text.xz.0.out; done | ||
794 | |||
795 | */ | ||
796 | system ("rm -rf test_text.*"); | ||
797 | { | ||
798 | char buf[64]; | ||
799 | FILE *f; | ||
800 | sprintf (buf, "test_text"); | ||
801 | f = fopen (buf, "w"); | ||
802 | fwrite (test_text,1,sizeof (test_text),f); | ||
803 | fclose (f); | ||
804 | } | ||
805 | #define TEST_FAILURES() \ | ||
806 | do { \ | ||
807 | char buf[64]; \ | ||
808 | FILE *f; \ | ||
809 | sprintf (buf, "test_text.xz.%d", non_failures); \ | ||
810 | f = fopen (buf, "w"); \ | ||
811 | fwrite (encoded,1,encoded_size,f); \ | ||
812 | fclose (f); \ | ||
813 | } while (0) | ||
814 | #endif | ||
815 | |||
816 | stream->sec_data.inefficient = 1; | ||
817 | stream->sec_inst.inefficient = 1; | ||
818 | stream->sec_addr.inefficient = 1; | ||
819 | |||
820 | /* Encode text, test correct input */ | ||
821 | if ((ret = test_compress_text (stream, encoded, & encoded_size))) | ||
822 | { | ||
823 | /*stream->msg = "without error: encode failure";*/ | ||
824 | return ret; | ||
825 | } | ||
826 | if ((ret = test_decompress_text (stream, encoded, encoded_size, sizeof (test_text) / 4))) | ||
827 | { | ||
828 | /*stream->msg = "without error: decode failure";*/ | ||
829 | return ret; | ||
830 | } | ||
831 | |||
832 | TEST_FAILURES(); | ||
833 | |||
834 | for (i = 0; i < encoded_size*8; i += 1) | ||
835 | { | ||
836 | /* Single bit error. */ | ||
837 | encoded[i/8] ^= 1 << (i%8); | ||
838 | |||
839 | if ((ret = test_decompress_text (stream, encoded, encoded_size, sizeof (test_text))) == 0) | ||
840 | { | ||
841 | non_failures += 1; | ||
842 | /*P(RINT "%u[%u] non-failure %u\n", i/8, i%8, non_failures);*/ | ||
843 | TEST_FAILURES(); | ||
844 | } | ||
845 | else | ||
846 | { | ||
847 | /*P(RINT "%u[%u] failure: %s\n", i/8, i%8, stream->msg);*/ | ||
848 | } | ||
849 | |||
850 | /* decompress_text returns EIO when the final memcmp() fails, but that | ||
851 | * should never happen with checksumming on. */ | ||
852 | if (cksum && ret == EIO) | ||
853 | { | ||
854 | /*P(RINT "%u[%u] cksum mismatch\n", i/8, i%8);*/ | ||
855 | stream->msg = "checksum mismatch"; | ||
856 | return EINVAL; | ||
857 | } | ||
858 | |||
859 | /* Undo single bit error. */ | ||
860 | encoded[i/8] ^= 1 << (i%8); | ||
861 | } | ||
862 | |||
863 | /* Test correct input again */ | ||
864 | if ((ret = test_decompress_text (stream, encoded, encoded_size, 1))) | ||
865 | { | ||
866 | /*stream->msg = "without error: decode failure";*/ | ||
867 | return ret; | ||
868 | } | ||
869 | |||
870 | /* Check expected non-failures */ | ||
871 | if (non_failures != expected_non_failures) | ||
872 | { | ||
873 | P(RINT "non-failures %u; expected %u", non_failures, expected_non_failures); | ||
874 | stream->msg = "incorrect"; | ||
875 | return EINVAL; | ||
876 | } | ||
877 | |||
878 | DOT (); | ||
879 | |||
880 | return 0; | ||
881 | } | ||
882 | |||
883 | /****************************************************************************************** | ||
884 | Secondary compression tests | ||
885 | ******************************************************************************************/ | ||
886 | |||
887 | #if SECONDARY_ANY | ||
888 | typedef int (*sec_dist_func) (xd3_stream *stream, xd3_output *data); | ||
889 | |||
890 | static int sec_dist_func1 (xd3_stream *stream, xd3_output *data); | ||
891 | static int sec_dist_func2 (xd3_stream *stream, xd3_output *data); | ||
892 | static int sec_dist_func3 (xd3_stream *stream, xd3_output *data); | ||
893 | static int sec_dist_func4 (xd3_stream *stream, xd3_output *data); | ||
894 | static int sec_dist_func5 (xd3_stream *stream, xd3_output *data); | ||
895 | static int sec_dist_func6 (xd3_stream *stream, xd3_output *data); | ||
896 | static int sec_dist_func7 (xd3_stream *stream, xd3_output *data); | ||
897 | static int sec_dist_func8 (xd3_stream *stream, xd3_output *data); | ||
898 | static int sec_dist_func9 (xd3_stream *stream, xd3_output *data); | ||
899 | |||
900 | static sec_dist_func sec_dists[] = | ||
901 | { | ||
902 | sec_dist_func1, | ||
903 | sec_dist_func2, | ||
904 | sec_dist_func3, | ||
905 | sec_dist_func4, | ||
906 | sec_dist_func5, | ||
907 | sec_dist_func6, | ||
908 | sec_dist_func7, | ||
909 | sec_dist_func8, | ||
910 | sec_dist_func9, | ||
911 | }; | ||
912 | |||
913 | /* Test ditsribution: 100 bytes of the same character (13). */ | ||
914 | static int | ||
915 | sec_dist_func1 (xd3_stream *stream, xd3_output *data) | ||
916 | { | ||
917 | int i, ret; | ||
918 | for (i = 0; i < 100; i += 1) | ||
919 | { | ||
920 | if ((ret = xd3_emit_byte (stream, & data, 13))) { return ret; } | ||
921 | } | ||
922 | return 0; | ||
923 | } | ||
924 | |||
925 | /* Test ditsribution: uniform covering half the alphabet. */ | ||
926 | static int | ||
927 | sec_dist_func2 (xd3_stream *stream, xd3_output *data) | ||
928 | { | ||
929 | int i, ret; | ||
930 | for (i = 0; i < ALPHABET_SIZE; i += 1) | ||
931 | { | ||
932 | if ((ret = xd3_emit_byte (stream, & data, i%(ALPHABET_SIZE/2)))) { return ret; } | ||
933 | } | ||
934 | return 0; | ||
935 | } | ||
936 | |||
937 | /* Test ditsribution: uniform covering the entire alphabet. */ | ||
938 | static int | ||
939 | sec_dist_func3 (xd3_stream *stream, xd3_output *data) | ||
940 | { | ||
941 | int i, ret; | ||
942 | for (i = 0; i < ALPHABET_SIZE; i += 1) | ||
943 | { | ||
944 | if ((ret = xd3_emit_byte (stream, & data, i%ALPHABET_SIZE))) { return ret; } | ||
945 | } | ||
946 | return 0; | ||
947 | } | ||
948 | |||
949 | /* Test distribution: An exponential distribution covering half the alphabet */ | ||
950 | static int | ||
951 | sec_dist_func4 (xd3_stream *stream, xd3_output *data) | ||
952 | { | ||
953 | int i, ret, x; | ||
954 | for (i = 0; i < ALPHABET_SIZE*20; i += 1) | ||
955 | { | ||
956 | x = test_exponential_dist (10, ALPHABET_SIZE/2); | ||
957 | if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; } | ||
958 | } | ||
959 | return 0; | ||
960 | } | ||
961 | |||
962 | /* Test distribution: An exponential distribution covering the entire alphabet */ | ||
963 | static int | ||
964 | sec_dist_func5 (xd3_stream *stream, xd3_output *data) | ||
965 | { | ||
966 | int i, ret, x; | ||
967 | for (i = 0; i < ALPHABET_SIZE*20; i += 1) | ||
968 | { | ||
969 | x = test_exponential_dist (10, ALPHABET_SIZE-1); | ||
970 | if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; } | ||
971 | } | ||
972 | return 0; | ||
973 | } | ||
974 | |||
975 | /* Test distribution: An uniform random distribution covering half the alphabet */ | ||
976 | static int | ||
977 | sec_dist_func6 (xd3_stream *stream, xd3_output *data) | ||
978 | { | ||
979 | int i, ret, x; | ||
980 | for (i = 0; i < ALPHABET_SIZE*20; i += 1) | ||
981 | { | ||
982 | x = lrand48 () % (ALPHABET_SIZE/2); | ||
983 | if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; } | ||
984 | } | ||
985 | return 0; | ||
986 | } | ||
987 | |||
988 | /* Test distribution: An uniform random distribution covering the entire alphabet */ | ||
989 | static int | ||
990 | sec_dist_func7 (xd3_stream *stream, xd3_output *data) | ||
991 | { | ||
992 | int i, ret, x; | ||
993 | for (i = 0; i < ALPHABET_SIZE*20; i += 1) | ||
994 | { | ||
995 | x = lrand48 () % ALPHABET_SIZE; | ||
996 | if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; } | ||
997 | } | ||
998 | return 0; | ||
999 | } | ||
1000 | |||
1001 | /* Test distribution: A small number of frequent characters, difficult to divide into many | ||
1002 | * groups */ | ||
1003 | static int | ||
1004 | sec_dist_func8 (xd3_stream *stream, xd3_output *data) | ||
1005 | { | ||
1006 | int i, ret; | ||
1007 | for (i = 0; i < ALPHABET_SIZE*5; i += 1) | ||
1008 | { | ||
1009 | if ((ret = xd3_emit_byte (stream, & data, 0))) { return ret; } | ||
1010 | if ((ret = xd3_emit_byte (stream, & data, 64))) { return ret; } | ||
1011 | if ((ret = xd3_emit_byte (stream, & data, 128))) { return ret; } | ||
1012 | if ((ret = xd3_emit_byte (stream, & data, 255))) { return ret; } | ||
1013 | } | ||
1014 | return 0; | ||
1015 | } | ||
1016 | |||
1017 | /* Test distribution: One that causes many FGK block promotions (found a bug) */ | ||
1018 | static int | ||
1019 | sec_dist_func9 (xd3_stream *stream, xd3_output *data) | ||
1020 | { | ||
1021 | int i, ret; | ||
1022 | |||
1023 | int ramp = 0; | ||
1024 | int rcount = 0; | ||
1025 | int prom = 0; | ||
1026 | int pcount = 0; | ||
1027 | |||
1028 | /* 200 was long enough to trigger it--only when stricter checking that counted all | ||
1029 | * blocks was turned on, but it seems I deleted this code. (missing fgk_free_block on | ||
1030 | * line 398). */ | ||
1031 | for (i = 0; i < ALPHABET_SIZE*200; i += 1) | ||
1032 | { | ||
1033 | repeat: | ||
1034 | if (ramp < ALPHABET_SIZE) | ||
1035 | { | ||
1036 | /* Initially Nth symbol has (N+1) frequency */ | ||
1037 | if (rcount <= ramp) | ||
1038 | { | ||
1039 | rcount += 1; | ||
1040 | if ((ret = xd3_emit_byte (stream, & data, ramp))) { return ret; } | ||
1041 | continue; | ||
1042 | } | ||
1043 | |||
1044 | ramp += 1; | ||
1045 | rcount = 0; | ||
1046 | goto repeat; | ||
1047 | } | ||
1048 | |||
1049 | /* Thereafter, promote least freq to max freq */ | ||
1050 | if (pcount == ALPHABET_SIZE) | ||
1051 | { | ||
1052 | pcount = 0; | ||
1053 | prom = (prom + 1) % ALPHABET_SIZE; | ||
1054 | } | ||
1055 | |||
1056 | pcount += 1; | ||
1057 | if ((ret = xd3_emit_byte (stream, & data, prom))) { return ret; } | ||
1058 | } | ||
1059 | |||
1060 | return 0; | ||
1061 | } | ||
1062 | |||
1063 | static int | ||
1064 | test_secondary_decode (xd3_stream *stream, | ||
1065 | const xd3_sec_type *sec, | ||
1066 | usize_t input_size, | ||
1067 | usize_t compress_size, | ||
1068 | const uint8_t *dec_input, | ||
1069 | const uint8_t *dec_correct, | ||
1070 | uint8_t *dec_output) | ||
1071 | { | ||
1072 | int ret; | ||
1073 | xd3_sec_stream *dec_stream; | ||
1074 | const uint8_t *dec_input_used, *dec_input_end; | ||
1075 | uint8_t *dec_output_used, *dec_output_end; | ||
1076 | |||
1077 | if ((dec_stream = sec->alloc (stream)) == NULL) { return ENOMEM; } | ||
1078 | |||
1079 | sec->init (dec_stream); | ||
1080 | |||
1081 | dec_input_used = dec_input; | ||
1082 | dec_input_end = dec_input + compress_size; | ||
1083 | |||
1084 | dec_output_used = dec_output; | ||
1085 | dec_output_end = dec_output + input_size; | ||
1086 | |||
1087 | if ((ret = sec->decode (stream, dec_stream, | ||
1088 | & dec_input_used, dec_input_end, | ||
1089 | & dec_output_used, dec_output_end))) | ||
1090 | { | ||
1091 | goto fail; | ||
1092 | } | ||
1093 | |||
1094 | if (dec_input_used != dec_input_end) | ||
1095 | { | ||
1096 | stream->msg = "unused input"; | ||
1097 | ret = EINVAL; | ||
1098 | goto fail; | ||
1099 | } | ||
1100 | |||
1101 | if (dec_output_used != dec_output_end) | ||
1102 | { | ||
1103 | stream->msg = "unfinished output"; | ||
1104 | ret = EINVAL; | ||
1105 | goto fail; | ||
1106 | } | ||
1107 | |||
1108 | if (memcmp (dec_output, dec_correct, input_size) != 0) | ||
1109 | { | ||
1110 | stream->msg = "incorrect output"; | ||
1111 | ret = EINVAL; | ||
1112 | goto fail; | ||
1113 | } | ||
1114 | |||
1115 | fail: | ||
1116 | sec->destroy (stream, dec_stream); | ||
1117 | return ret; | ||
1118 | } | ||
1119 | |||
1120 | static int | ||
1121 | test_secondary (xd3_stream *stream, const xd3_sec_type *sec, int groups) | ||
1122 | { | ||
1123 | int test_i, ret; | ||
1124 | xd3_output *in_head, *out_head, *p; | ||
1125 | usize_t p_off, input_size, compress_size; | ||
1126 | uint8_t *dec_input = NULL, *dec_output = NULL, *dec_correct = NULL; | ||
1127 | xd3_sec_stream *enc_stream; | ||
1128 | xd3_sec_cfg cfg; | ||
1129 | |||
1130 | memset (& cfg, 0, sizeof (cfg)); | ||
1131 | |||
1132 | cfg.inefficient = 1; | ||
1133 | |||
1134 | for (cfg.ngroups = 1; cfg.ngroups <= groups; cfg.ngroups += 1) | ||
1135 | { | ||
1136 | P(RINT "\n..."); | ||
1137 | for (test_i = 0; test_i < SIZEOF_ARRAY (sec_dists); test_i += 1) | ||
1138 | { | ||
1139 | srand48 (0x84687674); | ||
1140 | |||
1141 | in_head = xd3_alloc_output (stream, NULL); | ||
1142 | out_head = xd3_alloc_output (stream, NULL); | ||
1143 | enc_stream = sec->alloc (stream); | ||
1144 | dec_input = NULL; | ||
1145 | dec_output = NULL; | ||
1146 | dec_correct = NULL; | ||
1147 | |||
1148 | if (in_head == NULL || out_head == NULL || enc_stream == NULL) { goto nomem; } | ||
1149 | |||
1150 | if ((ret = sec_dists[test_i] (stream, in_head))) { goto fail; } | ||
1151 | |||
1152 | sec->init (enc_stream); | ||
1153 | |||
1154 | /* Encode data */ | ||
1155 | if ((ret = sec->encode (stream, enc_stream, in_head, out_head, & cfg))) | ||
1156 | { | ||
1157 | P(RINT "test %u: encode: %s", test_i, stream->msg); | ||
1158 | goto fail; | ||
1159 | } | ||
1160 | |||
1161 | /* Calculate sizes, allocate contiguous arrays for decoding */ | ||
1162 | input_size = xd3_sizeof_output (in_head); | ||
1163 | compress_size = xd3_sizeof_output (out_head); | ||
1164 | |||
1165 | P(RINT "%.3f", 8.0 * (double) compress_size / (double) input_size); | ||
1166 | |||
1167 | if ((dec_input = xd3_alloc (stream, compress_size, 1)) == NULL || | ||
1168 | (dec_output = xd3_alloc (stream, input_size, 1)) == NULL || | ||
1169 | (dec_correct = xd3_alloc (stream, input_size, 1)) == NULL) { goto nomem; } | ||
1170 | |||
1171 | /* Fill the compressed data array */ | ||
1172 | for (p_off = 0, p = out_head; p != NULL; p_off += p->next, p = p->next_page) | ||
1173 | { | ||
1174 | memcpy (dec_input + p_off, p->base, p->next); | ||
1175 | } | ||
1176 | |||
1177 | XD3_ASSERT (p_off == compress_size); | ||
1178 | |||
1179 | /* Fill the input data array */ | ||
1180 | for (p_off = 0, p = in_head; p != NULL; p_off += p->next, p = p->next_page) | ||
1181 | { | ||
1182 | memcpy (dec_correct + p_off, p->base, p->next); | ||
1183 | } | ||
1184 | |||
1185 | XD3_ASSERT (p_off == input_size); | ||
1186 | |||
1187 | if ((ret = test_secondary_decode (stream, sec, input_size, compress_size, dec_input, dec_correct, dec_output))) | ||
1188 | { | ||
1189 | P(RINT "test %u: decode: %s", test_i, stream->msg); | ||
1190 | goto fail; | ||
1191 | } | ||
1192 | |||
1193 | /* Single-bit error test, only cover the first 10 bytes. Some non-failures are | ||
1194 | * expected in the Huffman case: Changing the clclen array, for example, may not | ||
1195 | * harm the decoding. Really looking for faults here. */ | ||
1196 | { | ||
1197 | int i; | ||
1198 | int bytes = min (compress_size, 10U); | ||
1199 | for (i = 0; i < bytes * 8; i += 1) | ||
1200 | { | ||
1201 | dec_input[i/8] ^= 1 << (i%8); | ||
1202 | |||
1203 | if ((ret = test_secondary_decode (stream, sec, input_size, compress_size, dec_input, dec_correct, dec_output)) == 0) | ||
1204 | { | ||
1205 | /*P(RINT "test %u: decode single-bit [%u/%u] error non-failure", test_i, i/8, i%8);*/ | ||
1206 | } | ||
1207 | |||
1208 | dec_input[i/8] ^= 1 << (i%8); | ||
1209 | |||
1210 | if ((i % (2*bytes)) == (2*bytes)-1) | ||
1211 | { | ||
1212 | DOT (); | ||
1213 | } | ||
1214 | } | ||
1215 | ret = 0; | ||
1216 | } | ||
1217 | |||
1218 | if (0) { nomem: ret = ENOMEM; } | ||
1219 | |||
1220 | fail: | ||
1221 | sec->destroy (stream, enc_stream); | ||
1222 | xd3_free_output (stream, in_head); | ||
1223 | xd3_free_output (stream, out_head); | ||
1224 | xd3_free (stream, dec_input); | ||
1225 | xd3_free (stream, dec_output); | ||
1226 | xd3_free (stream, dec_correct); | ||
1227 | |||
1228 | if (ret != 0) { return ret; } | ||
1229 | } | ||
1230 | } | ||
1231 | |||
1232 | return 0; | ||
1233 | } | ||
1234 | |||
1235 | IF_FGK (static int test_secondary_fgk (xd3_stream *stream, int gp) { return test_secondary (stream, & fgk_sec_type, gp); }) | ||
1236 | IF_DJW (static int test_secondary_huff (xd3_stream *stream, int gp) { return test_secondary (stream, & djw_sec_type, gp); }) | ||
1237 | #endif | ||
1238 | |||
1239 | /****************************************************************************************** | ||
1240 | TEST INSTRUCTION TABLE | ||
1241 | ******************************************************************************************/ | ||
1242 | |||
1243 | /* Test that xd3_choose_instruction() does the right thing for its code table. */ | ||
1244 | static int | ||
1245 | test_choose_instruction (xd3_stream *stream, int ignore) | ||
1246 | { | ||
1247 | int i; | ||
1248 | |||
1249 | stream->code_table = (*stream->code_table_func) (); | ||
1250 | |||
1251 | for (i = 0; i < 256; i += 1) | ||
1252 | { | ||
1253 | const xd3_dinst *d = stream->code_table + i; | ||
1254 | xd3_rinst prev, inst; | ||
1255 | |||
1256 | XD3_ASSERT (d->type1 > 0); | ||
1257 | |||
1258 | memset (& prev, 0, sizeof (prev)); | ||
1259 | memset (& inst, 0, sizeof (inst)); | ||
1260 | |||
1261 | if (d->type2 == 0) | ||
1262 | { | ||
1263 | inst.type = d->type1; | ||
1264 | |||
1265 | if ((inst.size = d->size1) == 0) | ||
1266 | { | ||
1267 | inst.size = TESTBUFSIZE; | ||
1268 | } | ||
1269 | |||
1270 | XD3_CHOOSE_INSTRUCTION (stream, NULL, & inst); | ||
1271 | |||
1272 | if (inst.code2 != 0 || inst.code1 != i) | ||
1273 | { | ||
1274 | stream->msg = "wrong single instruction"; | ||
1275 | return EINVAL; | ||
1276 | } | ||
1277 | } | ||
1278 | else | ||
1279 | { | ||
1280 | prev.type = d->type1; | ||
1281 | prev.size = d->size1; | ||
1282 | inst.type = d->type2; | ||
1283 | inst.size = d->size2; | ||
1284 | |||
1285 | XD3_CHOOSE_INSTRUCTION (stream, & prev, & inst); | ||
1286 | |||
1287 | if (prev.code2 != i) | ||
1288 | { | ||
1289 | stream->msg = "wrong double instruction"; | ||
1290 | return EINVAL; | ||
1291 | } | ||
1292 | } | ||
1293 | } | ||
1294 | |||
1295 | return 0; | ||
1296 | } | ||
1297 | |||
1298 | /****************************************************************************************** | ||
1299 | TEST INSTRUCTION TABLE CODING | ||
1300 | ******************************************************************************************/ | ||
1301 | |||
1302 | #if GENERIC_ENCODE_TABLES | ||
1303 | /* Test that encoding and decoding a code table works */ | ||
1304 | static int | ||
1305 | test_encode_code_table (xd3_stream *stream, int ignore) | ||
1306 | { | ||
1307 | int ret; | ||
1308 | const uint8_t *comp_data; | ||
1309 | usize_t comp_size; | ||
1310 | |||
1311 | if ((ret = xd3_compute_alternate_table_encoding (stream, & comp_data, & comp_size))) | ||
1312 | { | ||
1313 | return ret; | ||
1314 | } | ||
1315 | |||
1316 | stream->acache.s_near = __alternate_code_table_desc.near_modes; | ||
1317 | stream->acache.s_same = __alternate_code_table_desc.same_modes; | ||
1318 | |||
1319 | if ((ret = xd3_apply_table_encoding (stream, comp_data, comp_size))) | ||
1320 | { | ||
1321 | return ret; | ||
1322 | } | ||
1323 | |||
1324 | if (memcmp (stream->code_table, xd3_alternate_code_table (), sizeof (xd3_dinst) * 256) != 0) | ||
1325 | { | ||
1326 | stream->msg = "wrong code table reconstruction"; | ||
1327 | return EINVAL; | ||
1328 | } | ||
1329 | |||
1330 | return 0; | ||
1331 | } | ||
1332 | #endif | ||
1333 | |||
1334 | /****************************************************************************************** | ||
1335 | 64BIT STREAMING | ||
1336 | ******************************************************************************************/ | ||
1337 | |||
1338 | /* This test encodes and decodes a series of 1 megabyte windows, each containing a long | ||
1339 | * run of zeros along with a single xoff_t size record to indicate the sequence. */ | ||
1340 | static int | ||
1341 | test_streaming (xd3_stream *in_stream, uint8_t *encbuf, uint8_t *decbuf, uint8_t *delbuf, usize_t megs) | ||
1342 | { | ||
1343 | xd3_stream estream, dstream; | ||
1344 | int ret; | ||
1345 | usize_t i, delsize, decsize; | ||
1346 | |||
1347 | if ((ret = xd3_config_stream (& estream, NULL)) || | ||
1348 | (ret = xd3_config_stream (& dstream, NULL))) | ||
1349 | { | ||
1350 | goto fail; | ||
1351 | } | ||
1352 | |||
1353 | for (i = 0; i < megs; i += 1) | ||
1354 | { | ||
1355 | ((usize_t*) encbuf)[0] = i; | ||
1356 | |||
1357 | if ((i % 200) == 199) { DOT (); } | ||
1358 | |||
1359 | if ((ret = xd3_process_completely (& estream, xd3_encode_input, 0, | ||
1360 | encbuf, 1 << 20, | ||
1361 | delbuf, & delsize, 1 << 10))) | ||
1362 | { | ||
1363 | in_stream->msg = estream.msg; | ||
1364 | goto fail; | ||
1365 | } | ||
1366 | |||
1367 | if ((ret = xd3_process_completely (& dstream, xd3_decode_input, 0, | ||
1368 | delbuf, delsize, | ||
1369 | decbuf, & decsize, 1 << 20))) | ||
1370 | { | ||
1371 | in_stream->msg = dstream.msg; | ||
1372 | goto fail; | ||
1373 | } | ||
1374 | |||
1375 | if (decsize != 1 << 20 || | ||
1376 | memcmp (encbuf, decbuf, 1 << 20) != 0) | ||
1377 | { | ||
1378 | in_stream->msg = "wrong result"; | ||
1379 | ret = EINVAL; | ||
1380 | goto fail; | ||
1381 | } | ||
1382 | } | ||
1383 | |||
1384 | if ((ret = xd3_close_stream (& estream)) || | ||
1385 | (ret = xd3_close_stream (& dstream))) | ||
1386 | { | ||
1387 | goto fail; | ||
1388 | } | ||
1389 | |||
1390 | fail: | ||
1391 | xd3_free_stream (& estream); | ||
1392 | xd3_free_stream (& dstream); | ||
1393 | return ret; | ||
1394 | } | ||
1395 | |||
1396 | /* Run tests of data streaming of over and around 4GB of data. */ | ||
1397 | static int | ||
1398 | test_compressed_stream_overflow (xd3_stream *stream, int ignore) | ||
1399 | { | ||
1400 | int ret; | ||
1401 | uint8_t *buf; | ||
1402 | |||
1403 | if ((buf = malloc (TWO_MEGS_AND_DELTA)) == NULL) { return ENOMEM; } | ||
1404 | |||
1405 | memset (buf, 0, TWO_MEGS_AND_DELTA); | ||
1406 | |||
1407 | /* Test overflow of a 32-bit file offset. */ | ||
1408 | if (SIZEOF_XOFF_T == 4) | ||
1409 | { | ||
1410 | ret = test_streaming (stream, buf, buf + (1 << 20), buf + (2 << 20), (1 << 12) + 1); | ||
1411 | |||
1412 | if (ret == EINVAL && MSG_IS ("decoder file offset overflow")) | ||
1413 | { | ||
1414 | ret = 0; | ||
1415 | } | ||
1416 | else | ||
1417 | { | ||
1418 | stream->msg = "expected overflow condition"; | ||
1419 | ret = EINVAL; | ||
1420 | goto fail; | ||
1421 | } | ||
1422 | } | ||
1423 | |||
1424 | /* Test transfer of exactly 32bits worth of data. */ | ||
1425 | if ((ret = test_streaming (stream, buf, buf + (1 << 20), buf + (2 << 20), 1 << 12))) { goto fail; } | ||
1426 | |||
1427 | fail: | ||
1428 | free (buf); | ||
1429 | return ret; | ||
1430 | } | ||
1431 | |||
1432 | /****************************************************************************************** | ||
1433 | COMMAND LINE | ||
1434 | ******************************************************************************************/ | ||
1435 | |||
1436 | /* For each pair of command templates in the array below, test that encoding and decoding | ||
1437 | * commands work. Also check for the expected size delta, which should be approximately | ||
1438 | * TEST_ADD_RATIO times the file size created by test_make_inputs. Due to differences in | ||
1439 | * the application header, it is suppressed (-A) so that all delta files are the same. */ | ||
1440 | static int | ||
1441 | test_command_line_arguments (xd3_stream *stream, int ignore) | ||
1442 | { | ||
1443 | int i, ret; | ||
1444 | |||
1445 | static const char* cmdpairs[] = | ||
1446 | { | ||
1447 | /* standard input, output */ | ||
1448 | "%s -A < %s > %s", "%s -d < %s > %s", | ||
1449 | "%s -A -e < %s > %s", "%s -d < %s > %s", | ||
1450 | "%s -A= encode < %s > %s", "%s decode < %s > %s", | ||
1451 | "%s -A -q encode < %s > %s", "%s -qdq < %s > %s", | ||
1452 | |||
1453 | /* file input, standard output */ | ||
1454 | "%s -A= %s > %s", "%s -d %s > %s", | ||
1455 | "%s -A -e %s > %s", "%s -d %s > %s", | ||
1456 | "%s encode -A= %s > %s", "%s decode %s > %s", | ||
1457 | |||
1458 | /* file input, output */ | ||
1459 | "%s -A= %s %s", "%s -d %s %s", | ||
1460 | "%s -A -e %s %s", "%s -d %s %s", | ||
1461 | "%s -A= encode %s %s", "%s decode %s %s", | ||
1462 | |||
1463 | /* option placement */ | ||
1464 | "%s -A -f %s %s", "%s -f -d %s %s", | ||
1465 | "%s -e -A= %s %s", "%s -d -f %s %s", | ||
1466 | "%s -f encode -A= %s %s", "%s -f decode -f %s %s", | ||
1467 | }; | ||
1468 | |||
1469 | char ecmd[128], dcmd[128]; | ||
1470 | int pairs = SIZEOF_ARRAY (cmdpairs) / 2; | ||
1471 | xoff_t tsize; | ||
1472 | xoff_t dsize; | ||
1473 | double ratio; | ||
1474 | |||
1475 | srand48 (0x89162337); | ||
1476 | |||
1477 | for (i = 0; i < pairs; i += 1) | ||
1478 | { | ||
1479 | sprintf (ecmd, cmdpairs[2*i], program_name, TEST_TARGET_FILE, TEST_DELTA_FILE); | ||
1480 | sprintf (dcmd, cmdpairs[2*i+1], program_name, TEST_DELTA_FILE, TEST_RECON_FILE); | ||
1481 | |||
1482 | test_setup (); | ||
1483 | if ((ret = test_make_inputs (stream, NULL, & tsize))) { return ret; } | ||
1484 | |||
1485 | /* Encode and decode. */ | ||
1486 | if ((ret = system (ecmd)) != 0) | ||
1487 | { | ||
1488 | P(RINT "xdelta3: command was: %s\n", ecmd); | ||
1489 | stream->msg = "encode cmd failed"; | ||
1490 | return EINVAL; | ||
1491 | } | ||
1492 | |||
1493 | if ((ret = system (dcmd)) != 0) | ||
1494 | { | ||
1495 | stream->msg = "decode cmd failed"; | ||
1496 | return EINVAL; | ||
1497 | } | ||
1498 | |||
1499 | /* Compare the target file. */ | ||
1500 | if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) | ||
1501 | { | ||
1502 | return ret; | ||
1503 | } | ||
1504 | |||
1505 | if (i == 0) | ||
1506 | { | ||
1507 | /* The first time through, check the compression ratio and save a copy of the | ||
1508 | * delta. */ | ||
1509 | if ((ret = test_save_copy (TEST_DELTA_FILE))) | ||
1510 | { | ||
1511 | stream->msg = "copy failed"; | ||
1512 | return ret; | ||
1513 | } | ||
1514 | |||
1515 | if ((ret = test_file_size (TEST_DELTA_FILE, & dsize))) | ||
1516 | { | ||
1517 | return ret; | ||
1518 | } | ||
1519 | |||
1520 | ratio = (double) dsize / (double) tsize; | ||
1521 | |||
1522 | /* Check that it is not too small, not too large. */ | ||
1523 | if (ratio >= TEST_ADD_RATIO + TEST_EPSILON) | ||
1524 | { | ||
1525 | P(RINT "xdelta3: test encode with size ratio %.3f, expected < %.3f\n", | ||
1526 | ratio, TEST_ADD_RATIO + TEST_EPSILON); | ||
1527 | stream->msg = "strange encoding"; | ||
1528 | return EINVAL; | ||
1529 | } | ||
1530 | |||
1531 | if (ratio <= TEST_ADD_RATIO - TEST_EPSILON) | ||
1532 | { | ||
1533 | P(RINT "xdelta3: test encode with size ratio %.3f, expected > %.3f\n", | ||
1534 | ratio, TEST_ADD_RATIO - TEST_EPSILON); | ||
1535 | stream->msg = "strange encoding"; | ||
1536 | return EINVAL; | ||
1537 | } | ||
1538 | |||
1539 | /* Also check that compare_files works. The delta and original should not be | ||
1540 | * identical. */ | ||
1541 | if ((ret = compare_files (stream, TEST_DELTA_FILE, TEST_TARGET_FILE)) == 0) | ||
1542 | { | ||
1543 | stream->msg = "broken compare_files"; | ||
1544 | return EINVAL; | ||
1545 | } | ||
1546 | } | ||
1547 | else | ||
1548 | { | ||
1549 | /* In subsequent passes, verify that the copy and delta are the same. */ | ||
1550 | if ((ret = compare_files (stream, TEST_COPY_FILE, TEST_DELTA_FILE))) | ||
1551 | { | ||
1552 | return ret; | ||
1553 | } | ||
1554 | } | ||
1555 | |||
1556 | test_cleanup (); | ||
1557 | DOT (); | ||
1558 | } | ||
1559 | |||
1560 | return 0; | ||
1561 | } | ||
1562 | |||
1563 | /****************************************************************************************** | ||
1564 | EXTERNAL I/O DECOMPRESSION/RECOMPRESSION | ||
1565 | ******************************************************************************************/ | ||
1566 | |||
1567 | #if EXTERNAL_COMPRESSION | ||
1568 | /* This performs one step of the test_externally_compressed_io function described below. | ||
1569 | * It builds a pipe containing both Xdelta and external compression/decompression that | ||
1570 | * should not modify the data passing through. */ | ||
1571 | static int | ||
1572 | test_compressed_pipe (xd3_stream *stream, main_extcomp *ext, char* buf, | ||
1573 | const char* comp_options, const char* decomp_options, | ||
1574 | int do_ext_recomp, const char* msg) | ||
1575 | { | ||
1576 | int ret; | ||
1577 | char decomp_buf[TESTBUFSIZE]; | ||
1578 | |||
1579 | if (do_ext_recomp) | ||
1580 | { | ||
1581 | sprintf (decomp_buf, " | %s %s", ext->decomp_cmdname, ext->decomp_options); | ||
1582 | } | ||
1583 | else | ||
1584 | { | ||
1585 | decomp_buf[0] = 0; | ||
1586 | } | ||
1587 | |||
1588 | sprintf (buf, "%s %s < %s | %s %s | %s %s%s > %s", | ||
1589 | ext->recomp_cmdname, ext->recomp_options, | ||
1590 | TEST_TARGET_FILE, | ||
1591 | program_name, comp_options, | ||
1592 | program_name, decomp_options, | ||
1593 | decomp_buf, | ||
1594 | TEST_RECON_FILE); | ||
1595 | |||
1596 | if ((ret = system (buf)) != 0) | ||
1597 | { | ||
1598 | stream->msg = msg; | ||
1599 | return EINVAL; | ||
1600 | } | ||
1601 | |||
1602 | if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) | ||
1603 | { | ||
1604 | return EINVAL; | ||
1605 | } | ||
1606 | |||
1607 | DOT (); | ||
1608 | return 0; | ||
1609 | } | ||
1610 | |||
1611 | /* We want to test that a pipe such as: | ||
1612 | * | ||
1613 | * --> | gzip -cf | xdelta3 -cf | xdelta3 -dcf | gzip -dcf | --> | ||
1614 | * | ||
1615 | * is transparent, i.e., does not modify the stream of data. However, we also want to | ||
1616 | * verify that at the center the data is properly compressed, i.e., that we do not just | ||
1617 | * have a re-compressed gzip format, that we have an VCDIFF format. We do this in two | ||
1618 | * steps. First test the above pipe, then test with suppressed output recompression | ||
1619 | * (-D). The result should be the original input: | ||
1620 | * | ||
1621 | * --> | gzip -cf | xdelta3 -cf | xdelta3 -Ddcf | --> | ||
1622 | * | ||
1623 | * Finally we want to test that -D also disables input decompression: | ||
1624 | * | ||
1625 | * --> | gzip -cf | xdelta3 -Dcf | xdelta3 -Ddcf | gzip -dcf | --> | ||
1626 | */ | ||
1627 | static int | ||
1628 | test_externally_compressed_io (xd3_stream *stream, int ignore) | ||
1629 | { | ||
1630 | int i, ret; | ||
1631 | char buf[TESTBUFSIZE]; | ||
1632 | |||
1633 | srand48 (0x91723913); | ||
1634 | |||
1635 | if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; } | ||
1636 | |||
1637 | for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1) | ||
1638 | { | ||
1639 | main_extcomp *ext = & extcomp_types[i]; | ||
1640 | |||
1641 | /* Test for the existence of the external command first, if not skip. */ | ||
1642 | sprintf (buf, "%s %s < /dev/null > /dev/null", ext->recomp_cmdname, ext->recomp_options); | ||
1643 | |||
1644 | if ((ret = system (buf)) != 0) | ||
1645 | { | ||
1646 | P(RINT "%s=0", ext->recomp_cmdname); | ||
1647 | continue; | ||
1648 | } | ||
1649 | |||
1650 | if ((ret = test_compressed_pipe (stream, ext, buf, "-cfq", "-dcfq", 1, | ||
1651 | "compression failed: identity pipe")) || | ||
1652 | (ret = test_compressed_pipe (stream, ext, buf, "-cfq", "-Rdcfq", 0, | ||
1653 | "compression failed: without recompression")) || | ||
1654 | (ret = test_compressed_pipe (stream, ext, buf, "-Dcfq", "-Rdcfq", 1, | ||
1655 | "compression failed: without decompression"))) | ||
1656 | { | ||
1657 | return ret; | ||
1658 | } | ||
1659 | } | ||
1660 | |||
1661 | return 0; | ||
1662 | } | ||
1663 | |||
1664 | /* This tests the proper functioning of external decompression for source files. The | ||
1665 | * source and target files are identical and compressed by gzip. Decoding such a delta | ||
1666 | * with recompression disbaled (-R) should produce the original, uncompressed | ||
1667 | * source/target file. Then it checks with output recompression enabled--in this case the | ||
1668 | * output should be a compressed copy of the original source/target file. Then it checks | ||
1669 | * that encoding with decompression disabled works--the compressed files are identical and | ||
1670 | * decoding them should always produce a compressed output, regardless of -R since the | ||
1671 | * encoded delta file had decompression disabled.. | ||
1672 | */ | ||
1673 | static int | ||
1674 | test_source_decompression (xd3_stream *stream, int ignore) | ||
1675 | { | ||
1676 | int ret; | ||
1677 | char buf[TESTBUFSIZE]; | ||
1678 | const main_extcomp *ext; | ||
1679 | |||
1680 | srand48 (0x9ff56acb); | ||
1681 | |||
1682 | test_setup (); | ||
1683 | if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; } | ||
1684 | |||
1685 | /* Use gzip. */ | ||
1686 | if ((ext = main_get_compressor ("G")) == NULL) { P(RINT "skipped"); return 0; } | ||
1687 | |||
1688 | /* Save an uncompressed copy. */ | ||
1689 | if ((ret = test_save_copy (TEST_TARGET_FILE))) { return ret; } | ||
1690 | |||
1691 | /* Compress the target. */ | ||
1692 | sprintf (buf, "%s %s < %s > %s", ext->recomp_cmdname, ext->recomp_options, TEST_TARGET_FILE, TEST_SOURCE_FILE); | ||
1693 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1694 | |||
1695 | /* Copy back to the source. */ | ||
1696 | sprintf (buf, "cp -f %s %s", TEST_SOURCE_FILE, TEST_TARGET_FILE); | ||
1697 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1698 | |||
1699 | /* Now the two identical files are compressed. Delta-encode the target, with decompression. */ | ||
1700 | sprintf (buf, "%s -eq -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE); | ||
1701 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1702 | |||
1703 | /* Decode the delta file with recompression disabled, should get an uncompressed file | ||
1704 | * out. */ | ||
1705 | sprintf (buf, "%s -dq -R -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE); | ||
1706 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1707 | if ((ret = compare_files (stream, TEST_COPY_FILE, TEST_RECON_FILE))) { return ret; } | ||
1708 | |||
1709 | /* Decode the delta file with recompression, should get a compressed file out. But we | ||
1710 | * can't compare compressed files directly. */ | ||
1711 | sprintf (buf, "%s -dqf -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE); | ||
1712 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1713 | sprintf (buf, "%s %s < %s > %s", ext->decomp_cmdname, ext->decomp_options, TEST_RECON_FILE, TEST_RECON2_FILE); | ||
1714 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1715 | if ((ret = compare_files (stream, TEST_COPY_FILE, TEST_RECON2_FILE))) { return ret; } | ||
1716 | |||
1717 | /* Encode with decompression disabled */ | ||
1718 | sprintf (buf, "%s -feqD -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE); | ||
1719 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1720 | |||
1721 | /* Decode the delta file with recompression enabled, it doesn't matter, should get the | ||
1722 | * compressed file out. */ | ||
1723 | sprintf (buf, "%s -fdq -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE); | ||
1724 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1725 | if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) { return ret; } | ||
1726 | |||
1727 | /* Try again with recompression disabled, it doesn't make a difference. */ | ||
1728 | sprintf (buf, "%s -fqRd -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE); | ||
1729 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1730 | if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) { return ret; } | ||
1731 | test_cleanup(); | ||
1732 | return 0; | ||
1733 | } | ||
1734 | #endif | ||
1735 | |||
1736 | /****************************************************************************************** | ||
1737 | FORCE, STDOUT | ||
1738 | ******************************************************************************************/ | ||
1739 | |||
1740 | /* This tests that output will not overwrite an existing file unless -f was specified. | ||
1741 | * The test is for encoding (the same code handles it for decoding). */ | ||
1742 | static int | ||
1743 | test_force_behavior (xd3_stream *stream, int ignore) | ||
1744 | { | ||
1745 | int ret; | ||
1746 | char buf[128]; | ||
1747 | |||
1748 | /* Create empty target file */ | ||
1749 | test_setup (); | ||
1750 | sprintf (buf, "cp /dev/null %s", TEST_TARGET_FILE); | ||
1751 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1752 | |||
1753 | /* Encode to delta file */ | ||
1754 | sprintf (buf, "%s -e %s %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE); | ||
1755 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1756 | |||
1757 | /* Encode again, should fail. */ | ||
1758 | sprintf (buf, "%s -e %s %s 2> /dev/null", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE); | ||
1759 | if ((ret = do_fail (stream, buf))) { return ret; } | ||
1760 | |||
1761 | /* Force it, should succeed. */ | ||
1762 | sprintf (buf, "%s -ef %s %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE); | ||
1763 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1764 | test_cleanup(); | ||
1765 | return 0; | ||
1766 | } | ||
1767 | |||
1768 | /* This checks the proper operation of the -c flag. When specified the default output | ||
1769 | * becomes stdout, otherwise the input must be provided (encode) or it may be defaulted | ||
1770 | * (decode w/ app header). */ | ||
1771 | static int | ||
1772 | test_stdout_behavior (xd3_stream *stream, int ignore) | ||
1773 | { | ||
1774 | int ret; | ||
1775 | char buf[128]; | ||
1776 | |||
1777 | test_setup(); | ||
1778 | sprintf (buf, "cp /dev/null %s", TEST_TARGET_FILE); | ||
1779 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1780 | |||
1781 | /* Without -c, encode writes to delta file */ | ||
1782 | sprintf (buf, "%s -e %s %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE); | ||
1783 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1784 | |||
1785 | /* With -c, encode writes to stdout */ | ||
1786 | sprintf (buf, "%s -e -c %s > %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE); | ||
1787 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1788 | |||
1789 | /* Without -c, decode writes to target file name, but it fails because the file exists. */ | ||
1790 | sprintf (buf, "%s -d %s 2> /dev/null", program_name, TEST_DELTA_FILE); | ||
1791 | if ((ret = do_fail (stream, buf))) { return ret; } | ||
1792 | |||
1793 | /* With -c, decode writes to stdout */ | ||
1794 | sprintf (buf, "%s -d -c %s > /dev/null", program_name, TEST_DELTA_FILE); | ||
1795 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1796 | test_cleanup(); | ||
1797 | |||
1798 | return 0; | ||
1799 | } | ||
1800 | |||
1801 | /* This tests that the no-output flag (-J) works. */ | ||
1802 | static int | ||
1803 | test_no_output (xd3_stream *stream, int ignore) | ||
1804 | { | ||
1805 | int ret; | ||
1806 | char buf[TESTBUFSIZE]; | ||
1807 | |||
1808 | test_setup (); | ||
1809 | if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; } | ||
1810 | |||
1811 | /* Try no_output encode w/out unwritable output file */ | ||
1812 | sprintf (buf, "%s -e %s /dont_run_xdelta3_test_as_root 2> /dev/null", program_name, TEST_TARGET_FILE); | ||
1813 | if ((ret = do_fail (stream, buf))) { return ret; } | ||
1814 | sprintf (buf, "%s -J -e %s /dont_run_xdelta3_test_as_root", program_name, TEST_TARGET_FILE); | ||
1815 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1816 | |||
1817 | /* Now really write the delta to test decode no-output */ | ||
1818 | sprintf (buf, "%s -e %s %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE); | ||
1819 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1820 | |||
1821 | sprintf (buf, "%s -d %s /dont_run_xdelta3_test_as_root 2> /dev/null", program_name, TEST_DELTA_FILE); | ||
1822 | if ((ret = do_fail (stream, buf))) { return ret; } | ||
1823 | sprintf (buf, "%s -J -d %s /dont_run_xdelta3_test_as_root", program_name, TEST_DELTA_FILE); | ||
1824 | if ((ret = do_cmd (stream, buf))) { return ret; } | ||
1825 | test_cleanup (); | ||
1826 | return 0; | ||
1827 | } | ||
1828 | |||
1829 | /****************************************************************************************** | ||
1830 | Source identical optimization | ||
1831 | ******************************************************************************************/ | ||
1832 | |||
1833 | /* Computing a delta should be fastest when the two inputs are identical, this checks it. | ||
1834 | * The library is called to compute a delta between a 10000 byte file, 1000 byte winsize, | ||
1835 | * 500 byte source blocksize. The same buffer is used for both source and target. */ | ||
1836 | static int | ||
1837 | test_identical_behavior (xd3_stream *stream, int ignore) | ||
1838 | { | ||
1839 | #define IDB_TGTSZ 10000 | ||
1840 | #define IDB_BLKSZ 500 | ||
1841 | #define IDB_WINSZ 1000 | ||
1842 | #define IDB_DELSZ 1000 | ||
1843 | #define IDB_WINCNT (IDB_TGTSZ / IDB_WINSZ) | ||
1844 | |||
1845 | int ret, i; | ||
1846 | uint8_t buf[IDB_TGTSZ]; | ||
1847 | uint8_t del[IDB_DELSZ]; | ||
1848 | uint8_t rec[IDB_TGTSZ]; | ||
1849 | xd3_source source; | ||
1850 | int encwin = 0; | ||
1851 | usize_t delpos = 0, recsize; | ||
1852 | xd3_config config; | ||
1853 | |||
1854 | for (i = 0; i < IDB_TGTSZ; i += 1) { buf[i] = lrand48 (); } | ||
1855 | |||
1856 | stream->winsize = IDB_WINSZ; | ||
1857 | |||
1858 | source.size = IDB_TGTSZ; | ||
1859 | source.blksize = IDB_BLKSZ; | ||
1860 | source.name = ""; | ||
1861 | source.curblk = NULL; | ||
1862 | source.curblkno = -1; | ||
1863 | |||
1864 | if ((ret = xd3_set_source (stream, & source))) { goto fail; } | ||
1865 | |||
1866 | /* Compute an delta between identical source and targets. */ | ||
1867 | for (;;) | ||
1868 | { | ||
1869 | ret = xd3_encode_input (stream); | ||
1870 | |||
1871 | if (ret == XD3_INPUT) | ||
1872 | { | ||
1873 | if (encwin == IDB_WINCNT-1) { break; } | ||
1874 | xd3_avail_input (stream, buf + (IDB_WINSZ * encwin), IDB_WINSZ); | ||
1875 | encwin += 1; | ||
1876 | continue; | ||
1877 | } | ||
1878 | |||
1879 | if (ret == XD3_GETSRCBLK) | ||
1880 | { | ||
1881 | source.curblkno = source.getblkno; | ||
1882 | source.onblk = IDB_BLKSZ; | ||
1883 | source.curblk = buf + source.getblkno * IDB_BLKSZ; | ||
1884 | continue; | ||
1885 | } | ||
1886 | |||
1887 | if (ret == XD3_WINSTART) { continue; } | ||
1888 | if (ret == XD3_WINFINISH) { continue; } | ||
1889 | |||
1890 | if (ret != XD3_OUTPUT) { goto fail; } | ||
1891 | |||
1892 | XD3_ASSERT (delpos + stream->avail_out <= IDB_DELSZ); | ||
1893 | |||
1894 | memcpy (del + delpos, stream->next_out, stream->avail_out); | ||
1895 | |||
1896 | delpos += stream->avail_out; | ||
1897 | |||
1898 | xd3_consume_output (stream); | ||
1899 | } | ||
1900 | |||
1901 | /* Reset. */ | ||
1902 | source.blksize = IDB_TGTSZ; | ||
1903 | source.onblk = IDB_TGTSZ; | ||
1904 | source.curblk = buf; | ||
1905 | source.curblkno = 0; | ||
1906 | |||
1907 | if ((ret = xd3_close_stream (stream))) { goto fail; } | ||
1908 | xd3_free_stream (stream); | ||
1909 | xd3_init_config (& config, 0); | ||
1910 | if ((ret = xd3_config_stream (stream, & config))) { goto fail; } | ||
1911 | if ((ret = xd3_set_source (stream, & source))) { goto fail; } | ||
1912 | |||
1913 | /* Decode. */ | ||
1914 | if ((ret = xd3_decode_completely (stream, del, delpos, rec, & recsize, IDB_TGTSZ))) { goto fail; } | ||
1915 | |||
1916 | /* Check result size and data. */ | ||
1917 | if (recsize != IDB_TGTSZ) { stream->msg = "wrong size reconstruction"; goto fail; } | ||
1918 | if (memcmp (rec, buf, IDB_TGTSZ) != 0) { stream->msg = "wrong data reconstruction"; goto fail; } | ||
1919 | |||
1920 | /* Check that there was one copy per window. */ | ||
1921 | IF_DEBUG (if (stream->n_cpy != IDB_WINCNT || | ||
1922 | stream->n_add != 0 || | ||
1923 | stream->n_run != 0) { stream->msg = "wrong copy count"; goto fail; }); | ||
1924 | |||
1925 | /* Check that no checksums were computed because the initial match was presumed. */ | ||
1926 | IF_DEBUG (if (stream->large_ckcnt != 0) { stream->msg = "wrong checksum behavior"; goto fail; }); | ||
1927 | |||
1928 | ret = 0; | ||
1929 | fail: | ||
1930 | return ret; | ||
1931 | } | ||
1932 | |||
1933 | /****************************************************************************************** | ||
1934 | String matching test | ||
1935 | ******************************************************************************************/ | ||
1936 | |||
1937 | /* Check particular matching behaviors by calling xd3_string_match_soft directly with | ||
1938 | * specific arguments. */ | ||
1939 | typedef struct _string_match_test string_match_test; | ||
1940 | |||
1941 | typedef enum | ||
1942 | { | ||
1943 | SM_NONE = 0, | ||
1944 | SM_SSMATCH = (1 << 0), | ||
1945 | SM_LAZY = (1 << 1), | ||
1946 | SM_PROMOTE = (1 << 2), | ||
1947 | } string_match_flags; | ||
1948 | |||
1949 | struct _string_match_test | ||
1950 | { | ||
1951 | const char *input; | ||
1952 | int flags; | ||
1953 | const char *result; | ||
1954 | }; | ||
1955 | |||
1956 | static const string_match_test match_tests[] = | ||
1957 | { | ||
1958 | /* nothing */ | ||
1959 | { "1234567890", SM_NONE, "" }, | ||
1960 | |||
1961 | /* basic run, copy */ | ||
1962 | { "11111111112323232323", SM_NONE, "R0/10 C12/8@10" }, | ||
1963 | |||
1964 | /* no run smaller than MIN_RUN=8 */ | ||
1965 | { "1111111", SM_NONE, "C1/6@0" }, | ||
1966 | { "11111111", SM_NONE, "R0/8" }, | ||
1967 | |||
1968 | /* simple promotion: the third copy address depends on promotion */ | ||
1969 | { "ABCDEF_ABCDEF^ABCDEF", SM_NONE, "C7/6@0 C14/6@7" }, | ||
1970 | { "ABCDEF_ABCDEF^ABCDEF", SM_PROMOTE, "C7/6@0 C14/6@0" }, | ||
1971 | |||
1972 | /* simple lazy: there is a better copy starting with "23 X" than "123 " */ | ||
1973 | { "123 23 XYZ 123 XYZ", SM_NONE, "C11/4@0" }, | ||
1974 | { "123 23 XYZ 123 XYZ", SM_LAZY, "C11/4@0 C12/6@4" }, | ||
1975 | |||
1976 | /* trylazy: no lazy matches unless there are at least two characters beyond the first | ||
1977 | * match */ | ||
1978 | { "2123_121212", SM_LAZY, "C7/4@5" }, | ||
1979 | { "2123_1212123", SM_LAZY, "C7/4@5" }, | ||
1980 | { "2123_1212123_", SM_LAZY, "C7/4@5 C8/5@0" }, | ||
1981 | |||
1982 | /* trylazy: no lazy matches if the copy is >= MAXLAZY=10 */ | ||
1983 | { "2123_121212123_", SM_LAZY, "C7/6@5 C10/5@0" }, | ||
1984 | { "2123_12121212123_", SM_LAZY, "C7/8@5 C12/5@0" }, | ||
1985 | { "2123_1212121212123_", SM_LAZY, "C7/10@5" }, | ||
1986 | |||
1987 | /* lazy run: check a run overlapped by a longer copy */ | ||
1988 | { "11111112 111111112 1", SM_LAZY, "C1/6@0 R9/8 C10/10@0" }, | ||
1989 | |||
1990 | /* lazy match: match_length,run_l >= min_match tests, shouldn't get any copies within | ||
1991 | * the run, no run within the copy */ | ||
1992 | { "^________^________ ", SM_LAZY, "R1/8 C9/9@0" }, | ||
1993 | |||
1994 | /* chain depth: it only goes back 10. this checks that the 10th match hits and the 11th | ||
1995 | * misses. */ | ||
1996 | { "1234 1234_1234-1234=1234+1234[1234]1234{1234}1234<1234 ", SM_NONE, | ||
1997 | "C5/4@0 C10/4@5 C15/4@10 C20/4@15 C25/4@20 C30/4@25 C35/4@30 C40/4@35 C45/4@40 C50/5@0" }, | ||
1998 | { "1234 1234_1234-1234=1234+1234[1234]1234{1234}1234<1234>1234 ", SM_NONE, | ||
1999 | "C5/4@0 C10/4@5 C15/4@10 C20/4@15 C25/4@20 C30/4@25 C35/4@30 C40/4@35 C45/4@40 C50/4@45 C55/4@50" }, | ||
2000 | |||
2001 | /* ssmatch test */ | ||
2002 | { "ABCDE___ABCDE*** BCDE***", SM_NONE, "C8/5@0 C17/4@1" }, | ||
2003 | { "ABCDE___ABCDE*** BCDE***", SM_SSMATCH, "C8/5@0 C17/7@9" }, | ||
2004 | }; | ||
2005 | |||
2006 | static int | ||
2007 | test_string_matching (xd3_stream *stream, int ignore) | ||
2008 | { | ||
2009 | int i, ret; | ||
2010 | xd3_config config; | ||
2011 | char rbuf[TESTBUFSIZE]; | ||
2012 | |||
2013 | for (i = 0; i < SIZEOF_ARRAY (match_tests); i += 1) | ||
2014 | { | ||
2015 | const string_match_test *test = & match_tests[i]; | ||
2016 | char *rptr = rbuf; | ||
2017 | usize_t len = strlen (test->input); | ||
2018 | |||
2019 | xd3_free_stream (stream); | ||
2020 | xd3_init_config (& config, 0); | ||
2021 | |||
2022 | config.smatch_cfg = XD3_SMATCH_SOFT; | ||
2023 | config.large_look = 4; | ||
2024 | config.large_step = 4; | ||
2025 | config.small_look = 4; | ||
2026 | config.small_chain = 10; | ||
2027 | config.small_lchain = 10; | ||
2028 | config.max_lazy = 10; | ||
2029 | config.long_enough = 10; | ||
2030 | config.ssmatch = (test->flags & SM_SSMATCH) && 1; | ||
2031 | config.try_lazy = (test->flags & SM_LAZY) && 1; | ||
2032 | config.promote = (test->flags & SM_PROMOTE) && 1; | ||
2033 | |||
2034 | if ((ret = xd3_config_stream (stream, & config))) { return ret; } | ||
2035 | if ((ret = xd3_encode_init (stream))) { return ret; } | ||
2036 | |||
2037 | xd3_avail_input (stream, (uint8_t*)test->input, len); | ||
2038 | |||
2039 | if ((ret = stream->string_match (stream))) { return ret; } | ||
2040 | |||
2041 | *rptr = 0; | ||
2042 | while (! xd3_rlist_empty (& stream->iopt.used)) | ||
2043 | { | ||
2044 | xd3_rinst *inst = xd3_rlist_pop_front (& stream->iopt.used); | ||
2045 | |||
2046 | switch (inst->type) | ||
2047 | { | ||
2048 | case XD3_RUN: *rptr++ = 'R'; break; | ||
2049 | case XD3_CPY: *rptr++ = 'C'; break; | ||
2050 | default: XD3_ASSERT (0); | ||
2051 | } | ||
2052 | |||
2053 | sprintf (rptr, "%d/%d", inst->pos, inst->size); | ||
2054 | rptr += strlen (rptr); | ||
2055 | |||
2056 | if (inst->type == XD3_CPY) | ||
2057 | { | ||
2058 | *rptr++ = '@'; | ||
2059 | sprintf (rptr, "%"Q"d", inst->addr); | ||
2060 | rptr += strlen (rptr); | ||
2061 | } | ||
2062 | |||
2063 | *rptr++ = ' '; | ||
2064 | |||
2065 | xd3_rlist_push_back (& stream->iopt.free, inst); | ||
2066 | } | ||
2067 | |||
2068 | if (rptr != rbuf) | ||
2069 | { | ||
2070 | rptr -= 1; *rptr = 0; | ||
2071 | } | ||
2072 | |||
2073 | if (strcmp (rbuf, test->result) != 0) | ||
2074 | { | ||
2075 | P(RINT "test %u: expected %s: got %s", i, test->result, rbuf); | ||
2076 | stream->msg = "wrong result"; | ||
2077 | return EINVAL; | ||
2078 | } | ||
2079 | } | ||
2080 | |||
2081 | return 0; | ||
2082 | } | ||
2083 | |||
2084 | /****************************************************************************************** | ||
2085 | Source window advance, update | ||
2086 | ******************************************************************************************/ | ||
2087 | |||
2088 | /* | ||
2089 | * This is a test for many overlapping instructions. It must be a lazy | ||
2090 | * matcher. | ||
2091 | */ | ||
2092 | static int | ||
2093 | test_iopt_flush_instructions (xd3_stream *stream, int ignore) | ||
2094 | { | ||
2095 | int ret, i, tpos = 0; | ||
2096 | usize_t delta_size, recon_size; | ||
2097 | xd3_config config; | ||
2098 | uint8_t target[TESTBUFSIZE]; | ||
2099 | uint8_t delta[TESTBUFSIZE]; | ||
2100 | uint8_t recon[TESTBUFSIZE]; | ||
2101 | |||
2102 | xd3_free_stream (stream); | ||
2103 | xd3_init_config (& config, 0); | ||
2104 | |||
2105 | config.smatch_cfg = XD3_SMATCH_SOFT; | ||
2106 | config.large_look = 16; | ||
2107 | config.large_step = 16; | ||
2108 | config.small_look = 4; | ||
2109 | config.small_chain = 128; | ||
2110 | config.small_lchain = 16; | ||
2111 | config.ssmatch = 0; | ||
2112 | config.try_lazy = 1; | ||
2113 | config.max_lazy = 8; | ||
2114 | config.long_enough = 128; | ||
2115 | config.promote = 0; | ||
2116 | |||
2117 | if ((ret = xd3_config_stream (stream, & config))) { return ret; } | ||
2118 | |||
2119 | for (i = 1; i < 250; i++) | ||
2120 | { | ||
2121 | target[tpos++] = i; | ||
2122 | target[tpos++] = i+1; | ||
2123 | target[tpos++] = i+2; | ||
2124 | target[tpos++] = i+3; | ||
2125 | target[tpos++] = 0; | ||
2126 | } | ||
2127 | for (i = 1; i < 253; i++) | ||
2128 | { | ||
2129 | target[tpos++] = i; | ||
2130 | } | ||
2131 | |||
2132 | if ((ret = xd3_encode_completely (stream, target, tpos, | ||
2133 | delta, & delta_size, sizeof (delta)))) | ||
2134 | { | ||
2135 | return ret; | ||
2136 | } | ||
2137 | |||
2138 | xd3_free_stream(stream); | ||
2139 | if ((ret = xd3_config_stream (stream, & config))) { return ret; } | ||
2140 | |||
2141 | if ((ret = xd3_decode_completely (stream, delta, delta_size, | ||
2142 | recon, & recon_size, sizeof (recon)))) | ||
2143 | { | ||
2144 | return ret; | ||
2145 | } | ||
2146 | |||
2147 | XD3_ASSERT(tpos == recon_size); | ||
2148 | XD3_ASSERT(memcmp(target, recon, recon_size) == 0); | ||
2149 | |||
2150 | return 0; | ||
2151 | } | ||
2152 | |||
2153 | /****************************************************************************************** | ||
2154 | TEST MAIN | ||
2155 | ******************************************************************************************/ | ||
2156 | |||
2157 | static int | ||
2158 | xd3_selftest (void) | ||
2159 | { | ||
2160 | #define DO_TEST(fn,flags,arg) \ | ||
2161 | do { \ | ||
2162 | xd3_stream stream; \ | ||
2163 | xd3_config config; \ | ||
2164 | xd3_init_config (& config, flags); \ | ||
2165 | P(RINT "xdelta3: testing " #fn "%s...", \ | ||
2166 | flags ? (" (" #flags ")") : ""); \ | ||
2167 | if ((ret = xd3_config_stream (& stream, & config) == 0) && \ | ||
2168 | (ret = test_ ## fn (& stream, arg)) == 0) { \ | ||
2169 | P(RINT " success\n"); \ | ||
2170 | } else { \ | ||
2171 | P(RINT " failed: %s: %s\n", xd3_errstring (& stream), \ | ||
2172 | xd3_strerror (ret)); } \ | ||
2173 | xd3_free_stream (& stream); \ | ||
2174 | if (ret != 0) { goto failure; } \ | ||
2175 | } while (0) | ||
2176 | |||
2177 | int ret; | ||
2178 | |||
2179 | DO_TEST (random_numbers, 0, 0); | ||
2180 | DO_TEST (decode_integer_end_of_input, 0, 0); | ||
2181 | DO_TEST (decode_integer_overflow, 0, 0); | ||
2182 | DO_TEST (encode_decode_uint32_t, 0, 0); | ||
2183 | DO_TEST (encode_decode_uint64_t, 0, 0); | ||
2184 | DO_TEST (usize_t_overflow, 0, 0); | ||
2185 | |||
2186 | DO_TEST (address_cache, 0, 0); | ||
2187 | IF_GENCODETBL (DO_TEST (address_cache, XD3_ALT_CODE_TABLE, 0)); | ||
2188 | |||
2189 | DO_TEST (string_matching, 0, 0); | ||
2190 | |||
2191 | DO_TEST (choose_instruction, 0, 0); | ||
2192 | IF_GENCODETBL (DO_TEST (choose_instruction, XD3_ALT_CODE_TABLE, 0)); | ||
2193 | IF_GENCODETBL (DO_TEST (encode_code_table, 0, 0)); | ||
2194 | |||
2195 | DO_TEST (identical_behavior, 0, 0); | ||
2196 | DO_TEST (iopt_flush_instructions, 0, 0); | ||
2197 | |||
2198 | IF_DJW (DO_TEST (secondary_huff, 0, DJW_MAX_GROUPS)); | ||
2199 | IF_FGK (DO_TEST (secondary_fgk, 0, 1)); | ||
2200 | |||
2201 | DO_TEST (decompress_single_bit_error, 0, 3); | ||
2202 | DO_TEST (decompress_single_bit_error, XD3_ADLER32, 3); | ||
2203 | |||
2204 | IF_FGK (DO_TEST (decompress_single_bit_error, XD3_SEC_FGK, 3)); | ||
2205 | IF_DJW (DO_TEST (decompress_single_bit_error, XD3_SEC_DJW, 8)); | ||
2206 | |||
2207 | /* There are many expected non-failures for ALT_CODE_TABLE because not all of the | ||
2208 | * instruction codes are used. */ | ||
2209 | IF_GENCODETBL (DO_TEST (decompress_single_bit_error, XD3_ALT_CODE_TABLE, 224)); | ||
2210 | |||
2211 | DO_TEST (compressed_stream_overflow, 0, 0); | ||
2212 | |||
2213 | /* The following tests have random failures on my OSX box. | ||
2214 | */ | ||
2215 | DO_TEST (force_behavior, 0, 0); | ||
2216 | DO_TEST (stdout_behavior, 0, 0); | ||
2217 | DO_TEST (no_output, 0, 0); | ||
2218 | DO_TEST (command_line_arguments, 0, 0); | ||
2219 | |||
2220 | #if EXTERNAL_COMPRESSION | ||
2221 | DO_TEST (source_decompression, 0, 0); | ||
2222 | DO_TEST (externally_compressed_io, 0, 0); | ||
2223 | #endif | ||
2224 | |||
2225 | failure: | ||
2226 | test_cleanup (); | ||
2227 | return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; | ||
2228 | #undef DO_TEST | ||
2229 | } | ||
diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c new file mode 100755 index 0000000..fb9a09f --- /dev/null +++ b/xdelta3/xdelta3.c | |||
@@ -0,0 +1,6022 @@ | |||
1 | /* xdelta 3 - delta compression tools and library | ||
2 | * Copyright (C) 2001, 2003, 2004, 2005, 2006. Joshua P. MacDonald | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | |||
18 | ------------------------------------------------------------------- | ||
19 | |||
20 | Xdelta 3 | ||
21 | |||
22 | The goal of this library is to to implement both the (stand-alone) | ||
23 | data-compression and delta-compression aspects of VCDIFF encoding, and | ||
24 | to support a programming interface that works like Zlib | ||
25 | (http://www.gzip.org/zlib.html). See RFC3284: The VCDIFF Generic | ||
26 | Differencing and Compression Data Format. | ||
27 | |||
28 | VCDIFF is a unified encoding that combines data-compression and | ||
29 | delta-encoding ("differencing"). | ||
30 | |||
31 | VCDIFF has a detailed byte-code instruction set with many features. | ||
32 | The instruction format supports an immediate size operand for small | ||
33 | COPYs and ADDs (e.g., under 18 bytes). There are also instruction | ||
34 | "modes", which are used to compress COPY addresses by using two | ||
35 | address caches. An instruction mode refers to slots in the NEAR | ||
36 | and SAME caches for recent addresses. NEAR remembers the | ||
37 | previous 4 (by default) COPY addresses, and SAME catches | ||
38 | frequent re-uses of the same address using a 3-way (by default) | ||
39 | 256-entry associative cache of [ADDR mod 256], the encoded byte. | ||
40 | A hit in the NEAR/SAME cache requires 0/1 ADDR bytes. | ||
41 | |||
42 | VCDIFF has a default instruction table, but an alternate | ||
43 | instruction tables may themselves be be delta-compressed and | ||
44 | included in the encoding header. This allows even more freedom. | ||
45 | There are 9 instruction modes in the default code table, 4 near, 3 | ||
46 | same, VCD_SELF (absolute encoding) and VCD_HERE (relative to the | ||
47 | current position). | ||
48 | |||
49 | ---------------------------------------------------------------------- | ||
50 | |||
51 | Algorithms | ||
52 | |||
53 | Aside from the details of encoding and decoding, there are a bunch | ||
54 | of algorithms needed. | ||
55 | |||
56 | 1. STRING-MATCH. A two-level fingerprinting approach is used. A | ||
57 | single loop computes the two checksums -- small and large -- at | ||
58 | successive offsets in the TARGET file. The large checksum is more | ||
59 | accurate and is used to discover SOURCE matches, which are | ||
60 | potentially very long. The small checksum is used to discover | ||
61 | copies within the TARGET. Small matching, which is more expensive, | ||
62 | usually dominates the large STRING-MATCH costs in this code - the | ||
63 | more exhaustive the search, the better the results. Either of the | ||
64 | two string-matching mechanisms may be disabled. Currently, large | ||
65 | checksums are only performed in the source file, if present, and | ||
66 | small checksums are performed only in the left-over target input. | ||
67 | However, small matches are possible in the source file too, with a | ||
68 | range of possibilities. [I've seen a paper on this subject, but | ||
69 | I lost it.] | ||
70 | |||
71 | 2. INSTRUCTION SELECTION. The IOPT buffer here represents a queue | ||
72 | used to store overlapping copy instructions. There are two possible | ||
73 | optimizations that go beyond a greedy search. Both of these fall | ||
74 | into the category of "non-greedy matching" optimizations. | ||
75 | |||
76 | The first optimization stems from backward SOURCE-COPY matching. | ||
77 | When a new SOURCE-COPY instruction covers a previous instruction in | ||
78 | the target completely, it is erased from the queue. Randal Burns | ||
79 | originally analyzed these algorithms and did a lot of related work | ||
80 | (\cite the 1.5-pass algorithm). | ||
81 | |||
82 | The second optimization comes by the encoding of common very-small | ||
83 | COPY and ADD instructions, for which there are special DOUBLE-code | ||
84 | instructions, which code two instructions in a single byte. | ||
85 | |||
86 | The cost of bad instruction-selection overhead is relatively high | ||
87 | for data-compression, relative to delta-compression, so this second | ||
88 | optimization is fairly important. With "lazy" matching (the name | ||
89 | used in Zlib for a similar optimization), the string-match | ||
90 | algorithm searches after a match for potential overlapping copy | ||
91 | instructions. In Xdelta and by default, VCDIFF, the minimum match | ||
92 | size is 4 bytes, whereas Zlib searches with a 3-byte minimum. This | ||
93 | feature, combined with double instructions, provides a nice | ||
94 | challenge. Search in this file for "black magic", a heuristic. | ||
95 | |||
96 | 3. STREAM ALIGNMENT. Stream alignment is needed to compress large | ||
97 | inputs in constant space. TODO: redocument | ||
98 | |||
99 | 4. WINDOW SELECTION. When the IOPT buffer flushes, in the first call | ||
100 | to xd3_iopt_finish_encoding containing any kind of copy instruction, | ||
101 | the parameters of the source window must be decided: the offset into | ||
102 | the source and the length of the window. Since the IOPT buffer is | ||
103 | finite, the program may be forced to fix these values before knowing | ||
104 | the best offset/length. XD3_DEFAULT_SRCBACK limits the length, but a | ||
105 | smaller length is preferred because all target copies are addressed | ||
106 | after source copies in the VCDIFF address space. Picking too large a | ||
107 | source window means larger address encoding. | ||
108 | |||
109 | If the IOPT buffer is filling easily, perhaps the target window is | ||
110 | too large. In any case, a decision is made (though an alternative is | ||
111 | to emit the sub-window right away, to reduce the winsize | ||
112 | automatically - not implemented, another alternative is to grow the | ||
113 | IOPT buffer, it is after all bounded in size by winsize.) | ||
114 | |||
115 | The algorithm is in xd3_srcwin_setup. | ||
116 | |||
117 | 5. SECONDARY COMPRESSION. VCDIFF supports a secondary encoding to | ||
118 | be applied to the individual sections of the data format, which are | ||
119 | ADDRess, INSTruction, and DATA. Several secondary compressor | ||
120 | variations are implemented here, although none is standardized yet. | ||
121 | |||
122 | One is an adaptive huffman algorithm -- the FGK algorithm (Faller, | ||
123 | Gallager, and Knuth, 1985). This compressor is extremely slow. | ||
124 | |||
125 | The other is a simple static Huffman routine, which is the base | ||
126 | case of a semi-adaptive scheme published by D.J. Wheeler and first | ||
127 | widely used in bzip2 (by Julian Seward). This is a very | ||
128 | interesting algorithm, originally published in nearly cryptic form | ||
129 | by D.J. Wheeler. !!!NOTE!!! Because these are not standardized, the | ||
130 | -S option (no secondary compression) remains on by default. | ||
131 | ftp://ftp.cl.cam.ac.uk/users/djw3/bred3.{c,ps} | ||
132 | -------------------------------------------------------------------- | ||
133 | |||
134 | Other Features | ||
135 | |||
136 | 1. USER CONVENIENCE | ||
137 | |||
138 | For user convenience, it is essential to recognize Gzip-compressed | ||
139 | files and automatically Gzip-decompress them prior to | ||
140 | delta-compression (or else no delta-compression will be achieved | ||
141 | unless the user manually decompresses the inputs). The compressed | ||
142 | represention competes with Xdelta, and this must be hidden from the | ||
143 | command-line user interface. The Xdelta-1.x encoding was simple, not | ||
144 | compressed itself, so Xdelta-1.x uses Zlib internally to compress the | ||
145 | representation. | ||
146 | |||
147 | This implementation supports external compression, which implements | ||
148 | the necessary fork() and pipe() mechanics. There is a tricky step | ||
149 | involved to support automatic detection of a compressed input in a | ||
150 | non-seekable input. First you read a bit of the input to detect | ||
151 | magic headers. When a compressed format is recognized, exec() the | ||
152 | external compression program and create a second child process to | ||
153 | copy the original input stream. [Footnote: There is a difficulty | ||
154 | related to using Gzip externally. It is not possible to decompress | ||
155 | and recompress a Gzip file transparently. If FILE.GZ had a | ||
156 | cryptographic signature, then, after: (1) Gzip-decompression, (2) | ||
157 | Xdelta-encoding, (3) Gzip-compression the signature could be | ||
158 | broken. The only way to solve this problem is to guess at Gzip's | ||
159 | compression level or control it by other means. I recommend that | ||
160 | specific implementations of any compression scheme store | ||
161 | information needed to exactly re-compress the input, that way | ||
162 | external compression is transparent - however, this won't happen | ||
163 | here until it has stabilized.] | ||
164 | |||
165 | 2. APPLICATION-HEADER | ||
166 | |||
167 | This feature was introduced in RFC3284. It allows any application | ||
168 | to include a header within the VCDIFF file format. This allows | ||
169 | general inter-application data exchange with support for | ||
170 | application-specific extensions to communicate metadata. | ||
171 | |||
172 | 3. VCDIFF CHECKSUM | ||
173 | |||
174 | An optional checksum value is included with each window, which can | ||
175 | be used to validate the final result. This verifies the correct source | ||
176 | file was used for decompression as well as the obvious advantage: | ||
177 | checking the implementation (and underlying) correctness. | ||
178 | |||
179 | 4. LIGHT WEIGHT | ||
180 | |||
181 | The code makes efforts to avoid copying data more than necessary. | ||
182 | The code delays many initialization tasks until the first use, it | ||
183 | optimizes for identical (perfectly matching) inputs. It does not | ||
184 | compute any checksums until the first lookup misses. Memory usage | ||
185 | is reduced. String-matching is templatized (by slightly gross use | ||
186 | of CPP) to hard-code alternative compile-time defaults. The code | ||
187 | has few outside dependencies. | ||
188 | ---------------------------------------------------------------------- | ||
189 | |||
190 | The default rfc3284 instruction table: | ||
191 | (see RFC for the explanation) | ||
192 | |||
193 | TYPE SIZE MODE TYPE SIZE MODE INDEX | ||
194 | -------------------------------------------------------------------- | ||
195 | 1. Run 0 0 Noop 0 0 0 | ||
196 | 2. Add 0, [1,17] 0 Noop 0 0 [1,18] | ||
197 | 3. Copy 0, [4,18] 0 Noop 0 0 [19,34] | ||
198 | 4. Copy 0, [4,18] 1 Noop 0 0 [35,50] | ||
199 | 5. Copy 0, [4,18] 2 Noop 0 0 [51,66] | ||
200 | 6. Copy 0, [4,18] 3 Noop 0 0 [67,82] | ||
201 | 7. Copy 0, [4,18] 4 Noop 0 0 [83,98] | ||
202 | 8. Copy 0, [4,18] 5 Noop 0 0 [99,114] | ||
203 | 9. Copy 0, [4,18] 6 Noop 0 0 [115,130] | ||
204 | 10. Copy 0, [4,18] 7 Noop 0 0 [131,146] | ||
205 | 11. Copy 0, [4,18] 8 Noop 0 0 [147,162] | ||
206 | 12. Add [1,4] 0 Copy [4,6] 0 [163,174] | ||
207 | 13. Add [1,4] 0 Copy [4,6] 1 [175,186] | ||
208 | 14. Add [1,4] 0 Copy [4,6] 2 [187,198] | ||
209 | 15. Add [1,4] 0 Copy [4,6] 3 [199,210] | ||
210 | 16. Add [1,4] 0 Copy [4,6] 4 [211,222] | ||
211 | 17. Add [1,4] 0 Copy [4,6] 5 [223,234] | ||
212 | 18. Add [1,4] 0 Copy 4 6 [235,238] | ||
213 | 19. Add [1,4] 0 Copy 4 7 [239,242] | ||
214 | 20. Add [1,4] 0 Copy 4 8 [243,246] | ||
215 | 21. Copy 4 [0,8] Add 1 0 [247,255] | ||
216 | -------------------------------------------------------------------- | ||
217 | |||
218 | Reading the source: Overview | ||
219 | |||
220 | This file includes itself in several passes to macro-expand certain | ||
221 | sections with variable forms. Just read ahead, there's only a | ||
222 | little confusion. I know this sounds ugly, but hard-coding some of | ||
223 | the string-matching parameters results in a 10-15% increase in | ||
224 | string-match performance. The only time this hurts is when you have | ||
225 | unbalanced #if/endifs. | ||
226 | |||
227 | A single compilation unit tames the Makefile. In short, this is to | ||
228 | allow the above-described hack without an explodingMakefile. The | ||
229 | single compilation unit includes the core library features, | ||
230 | configurable string-match templates, optional main() command-line | ||
231 | tool, misc optional features, and a regression test. Features are | ||
232 | controled with CPP #defines, see Makefile.am. | ||
233 | |||
234 | The initial __XDELTA3_C_HEADER_PASS__ starts first, the INLINE and | ||
235 | TEMPLATE sections follow. Easy stuff first, hard stuff last. | ||
236 | |||
237 | Optional features include: | ||
238 | |||
239 | xdelta3-main.h The command-line interface, external compression | ||
240 | support, POSIX-specific, info & VCDIFF-debug tools. | ||
241 | xdelta3-second.h The common secondary compression routines. | ||
242 | xdelta3-djw.h The semi-adaptive huffman secondary encoder. | ||
243 | xdelta3-fgk.h The adaptive huffman secondary encoder. | ||
244 | xdelta3-test.h The unit test covers major algorithms, | ||
245 | encoding and decoding. There are single-bit | ||
246 | error decoding tests. There are 32/64-bit file size | ||
247 | boundary tests. There are command-line tests. | ||
248 | There are compression tests. There are external | ||
249 | compression tests. There are string-matching tests. | ||
250 | There should be more tests... | ||
251 | |||
252 | Additional headers include: | ||
253 | |||
254 | xdelta3.h The public header file. | ||
255 | xdelta3-cfgs.h The default settings for default, built-in | ||
256 | encoders. These are hard-coded at | ||
257 | compile-time. There is also a single | ||
258 | soft-coded string matcher for experimenting | ||
259 | with arbitrary values. | ||
260 | xdelta3-list.h A cyclic list template | ||
261 | |||
262 | Misc little debug utilities: | ||
263 | |||
264 | badcopy.c Randomly modifies an input file based on two | ||
265 | parameters: (1) the probability that a byte in | ||
266 | the file is replaced with a pseudo-random value, | ||
267 | and (2) the mean change size. Changes are | ||
268 | generated using an expoential distribution | ||
269 | which approximates the expected error_prob | ||
270 | distribution. | ||
271 | show.c Prints an offset/length segment from a file. | ||
272 | testh.c Checks that xdelta3.h is can be #included | ||
273 | -------------------------------------------------------------------- | ||
274 | |||
275 | This file itself is unusually large. I hope to defend this layout | ||
276 | with lots of comments. Everything in this file is related to | ||
277 | encoding and decoding. I like it all together - the template stuff | ||
278 | is just a hack. */ | ||
279 | |||
280 | #ifndef __XDELTA3_C_HEADER_PASS__ | ||
281 | #define __XDELTA3_C_HEADER_PASS__ | ||
282 | |||
283 | #include <errno.h> | ||
284 | #include <string.h> | ||
285 | |||
286 | #include "xdelta3.h" | ||
287 | |||
288 | /****************************************************************************************** | ||
289 | STATIC CONFIGURATION | ||
290 | ******************************************************************************************/ | ||
291 | |||
292 | #ifndef XD3_MAIN /* the main application */ | ||
293 | #define XD3_MAIN 0 | ||
294 | #endif | ||
295 | |||
296 | #ifndef VCDIFF_TOOLS | ||
297 | #define VCDIFF_TOOLS XD3_MAIN | ||
298 | #endif | ||
299 | |||
300 | #ifndef SECONDARY_FGK /* one from the algorithm preservation department: */ | ||
301 | #define SECONDARY_FGK 0 /* adaptive Huffman routines */ | ||
302 | #endif | ||
303 | |||
304 | #ifndef SECONDARY_DJW /* semi-adaptive/static Huffman for the eventual */ | ||
305 | #define SECONDARY_DJW 0 /* standardization, off by default until such time. */ | ||
306 | #endif | ||
307 | |||
308 | #ifndef GENERIC_ENCODE_TABLES /* These three are the RFC-spec'd app-specific */ | ||
309 | #define GENERIC_ENCODE_TABLES 0 /* code features. This is tested but not recommended */ | ||
310 | #endif /* unless there's a real application. */ | ||
311 | #ifndef GENERIC_ENCODE_TABLES_COMPUTE | ||
312 | #define GENERIC_ENCODE_TABLES_COMPUTE 0 | ||
313 | #endif | ||
314 | #ifndef GENERIC_ENCODE_TABLES_COMPUTE_PRINT | ||
315 | #define GENERIC_ENCODE_TABLES_COMPUTE_PRINT 0 | ||
316 | #endif | ||
317 | |||
318 | #if XD3_USE_LARGEFILE64 /* How does everyone else do this? */ | ||
319 | #define Q "q" | ||
320 | #else | ||
321 | #define Q | ||
322 | #endif | ||
323 | |||
324 | #if XD3_ENCODER | ||
325 | #define IF_ENCODER(x) x | ||
326 | #else | ||
327 | #define IF_ENCODER(x) | ||
328 | #endif | ||
329 | |||
330 | /******************************************************************************************/ | ||
331 | |||
332 | typedef enum { | ||
333 | |||
334 | /* header indicator bits */ | ||
335 | VCD_SECONDARY = (1 << 0), /* uses secondary compressor */ | ||
336 | VCD_CODETABLE = (1 << 1), /* supplies code table data */ | ||
337 | VCD_APPHEADER = (1 << 2), /* supplies application data */ | ||
338 | VCD_INVHDR = ~7U, | ||
339 | |||
340 | /* window indicator bits */ | ||
341 | VCD_SOURCE = (1 << 0), /* copy window in source file */ | ||
342 | VCD_TARGET = (1 << 1), /* copy window in target file */ | ||
343 | VCD_ADLER32 = (1 << 2), /* has adler32 checksum */ | ||
344 | VCD_INVWIN = ~7U, | ||
345 | |||
346 | VCD_SRCORTGT = VCD_SOURCE | VCD_TARGET, | ||
347 | |||
348 | /* delta indicator bits */ | ||
349 | VCD_DATACOMP = (1 << 0), | ||
350 | VCD_INSTCOMP = (1 << 1), | ||
351 | VCD_ADDRCOMP = (1 << 2), | ||
352 | VCD_INVDEL = ~0x7U, | ||
353 | |||
354 | } xd3_indicator; | ||
355 | |||
356 | typedef enum { | ||
357 | VCD_DJW_ID = 1, | ||
358 | VCD_FGK_ID = 16, /* !!!Note: these are not a standard IANA-allocated ID!!! */ | ||
359 | } xd3_secondary_ids; | ||
360 | |||
361 | typedef enum { | ||
362 | SEC_NOFLAGS = 0, | ||
363 | SEC_COUNT_FREQS = (1 << 0), /* OPT: Not implemented: Could eliminate first pass of Huffman... */ | ||
364 | } xd3_secondary_flags; | ||
365 | |||
366 | typedef enum { | ||
367 | DATA_SECTION, /* These indicate which section to the secondary compressor. */ | ||
368 | INST_SECTION, /* The header section is not compressed, therefore not listed here. */ | ||
369 | ADDR_SECTION, | ||
370 | } xd3_section_type; | ||
371 | |||
372 | typedef enum | ||
373 | { | ||
374 | XD3_NOOP = 0, | ||
375 | XD3_ADD = 1, | ||
376 | XD3_RUN = 2, | ||
377 | XD3_CPY = 3, /* XD3_CPY rtypes are represented as (XD3_CPY + copy-mode value) */ | ||
378 | } xd3_rtype; | ||
379 | |||
380 | /******************************************************************************************/ | ||
381 | |||
382 | #include "xdelta3-list.h" | ||
383 | |||
384 | XD3_MAKELIST(xd3_rlist, xd3_rinst, link); | ||
385 | |||
386 | /******************************************************************************************/ | ||
387 | |||
388 | #ifndef unlikely /* The unlikely macro - any good? */ | ||
389 | #if defined(__GNUC__) && __GNUC__ >= 3 | ||
390 | #define unlikely(x) __builtin_expect((x),0) | ||
391 | #define likely(x) __builtin_expect((x),1) | ||
392 | #else | ||
393 | #define unlikely(x) (x) | ||
394 | #define likely(x) (x) | ||
395 | #endif | ||
396 | #endif | ||
397 | |||
398 | #define SECONDARY_MIN_SAVINGS 2 /* Secondary compression has to save at least this many bytes. */ | ||
399 | #define SECONDARY_MIN_INPUT 10 /* Secondary compression needs at least this many bytes. */ | ||
400 | |||
401 | #define VCDIFF_MAGIC1 0xd6 /* 1st file byte */ | ||
402 | #define VCDIFF_MAGIC2 0xc3 /* 2nd file byte */ | ||
403 | #define VCDIFF_MAGIC3 0xc4 /* 3rd file byte */ | ||
404 | #define VCDIFF_VERSION 0x00 /* 4th file byte */ | ||
405 | |||
406 | #define VCD_SELF 0 /* 1st address mode */ | ||
407 | #define VCD_HERE 1 /* 2nd address mode */ | ||
408 | |||
409 | #define CODE_TABLE_STRING_SIZE (6 * 256) /* Should fit a code table string. */ | ||
410 | #define CODE_TABLE_VCDIFF_SIZE (6 * 256) /* Should fit a compressed code table string */ | ||
411 | |||
412 | #define SECONDARY_ANY (SECONDARY_DJW || SECONDARY_FGK) /* True if any secondary compressor is used. */ | ||
413 | |||
414 | #define ALPHABET_SIZE 256 /* Used in test code--size of the secondary compressor alphabet. */ | ||
415 | |||
416 | #define HASH_PRIME 0 /* Old hashing experiments */ | ||
417 | #define HASH_PERMUTE 1 | ||
418 | #define ARITH_SMALL_CKSUM 1 | ||
419 | |||
420 | #define HASH_CKOFFSET 1U /* Table entries distinguish "no-entry" from offset 0 using this offset. */ | ||
421 | |||
422 | #define MIN_SMALL_LOOK 2U /* Match-optimization stuff. */ | ||
423 | #define MIN_LARGE_LOOK 2U | ||
424 | #define MIN_MATCH_OFFSET 1U | ||
425 | #define MAX_MATCH_SPLIT 18U /* VCDIFF code table: 18 is the default limit for direct-coded ADD sizes */ | ||
426 | |||
427 | #define LEAST_MATCH_INCR 0 /* The least number of bytes an overlapping match must beat | ||
428 | * the preceding match by. This is a bias for the lazy | ||
429 | * match optimization. A non-zero value means that an | ||
430 | * adjacent match has to be better by more than the step | ||
431 | * between them. 0. */ | ||
432 | |||
433 | #define MIN_MATCH 4U /* VCDIFF code table: MIN_MATCH=4 */ | ||
434 | #define MIN_ADD 1U /* 1 */ | ||
435 | #define MIN_RUN 8U /* The shortest run, if it is shorter than this an immediate | ||
436 | * add/copy will be just as good. ADD1/COPY6 = 1I+1D+1A bytes, | ||
437 | * RUN18 = 1I+1D+1A. */ | ||
438 | |||
439 | #define MAX_MODES 9 /* Maximum number of nodes used for compression--does not limit decompression. */ | ||
440 | |||
441 | #define ENC_SECTS 4 /* Number of separate output sections. */ | ||
442 | |||
443 | #define HDR_TAIL(s) (stream->enc_tails[0]) | ||
444 | #define DATA_TAIL(s) (stream->enc_tails[1]) | ||
445 | #define INST_TAIL(s) (stream->enc_tails[2]) | ||
446 | #define ADDR_TAIL(s) (stream->enc_tails[3]) | ||
447 | |||
448 | #define HDR_HEAD(s) (stream->enc_heads[0]) | ||
449 | #define DATA_HEAD(s) (stream->enc_heads[1]) | ||
450 | #define INST_HEAD(s) (stream->enc_heads[2]) | ||
451 | #define ADDR_HEAD(s) (stream->enc_heads[3]) | ||
452 | |||
453 | #define SIZEOF_ARRAY(x) (sizeof(x) / sizeof(x[0])) | ||
454 | |||
455 | #define TOTAL_MODES(x) (2+(x)->acache.s_same+(x)->acache.s_near) | ||
456 | |||
457 | /* Template instances. */ | ||
458 | #if XD3_BUILD_SLOW | ||
459 | #define IF_BUILD_SLOW(x) x | ||
460 | #else | ||
461 | #define IF_BUILD_SLOW(x) | ||
462 | #endif | ||
463 | #if XD3_BUILD_FAST | ||
464 | #define IF_BUILD_FAST(x) x | ||
465 | #else | ||
466 | #define IF_BUILD_FAST(x) | ||
467 | #endif | ||
468 | #if XD3_BUILD_SOFT | ||
469 | #define IF_BUILD_SOFT(x) x | ||
470 | #else | ||
471 | #define IF_BUILD_SOFT(x) | ||
472 | #endif | ||
473 | |||
474 | IF_BUILD_SOFT(static const xd3_smatcher __smatcher_soft;) | ||
475 | IF_BUILD_FAST(static const xd3_smatcher __smatcher_fast;) | ||
476 | IF_BUILD_SLOW(static const xd3_smatcher __smatcher_slow;) | ||
477 | |||
478 | #if XD3_DEBUG | ||
479 | #define SMALL_HASH_DEBUG1(s,inp) \ | ||
480 | usize_t debug_hval = xd3_checksum_hash (& (s)->small_hash, \ | ||
481 | xd3_scksum ((inp), (s)->small_look)) | ||
482 | #define SMALL_HASH_DEBUG2(s,inp) \ | ||
483 | XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \ | ||
484 | xd3_scksum ((inp), (s)->small_look))) | ||
485 | #define SMALL_HASH_STATS(x) x | ||
486 | #else | ||
487 | #define SMALL_HASH_DEBUG1(s,inp) | ||
488 | #define SMALL_HASH_DEBUG2(s,inp) | ||
489 | #define SMALL_HASH_STATS(x) | ||
490 | #endif /* XD3_DEBUG */ | ||
491 | |||
492 | /* Config fields: three structures contain these variables, so this is non-typed. */ | ||
493 | #define XD3_COPY_CONFIG_FIELDS(dst,src) \ | ||
494 | do { \ | ||
495 | (dst)->large_look = (src)->large_look; \ | ||
496 | (dst)->large_step = (src)->large_step; \ | ||
497 | (dst)->small_look = (src)->small_look; \ | ||
498 | (dst)->small_chain = (src)->small_chain; \ | ||
499 | (dst)->small_lchain = (src)->small_lchain; \ | ||
500 | (dst)->ssmatch = (src)->ssmatch; \ | ||
501 | (dst)->try_lazy = (src)->try_lazy; \ | ||
502 | (dst)->max_lazy = (src)->max_lazy; \ | ||
503 | (dst)->long_enough = (src)->long_enough; \ | ||
504 | (dst)->promote = (src)->promote; \ | ||
505 | } while (0) | ||
506 | |||
507 | /* Update the run-length state */ | ||
508 | #define NEXTRUN(c) do { if ((c) == run_c) { run_l += 1; } else { run_c = (c); run_l = 1; } } while (0) | ||
509 | |||
510 | /* Update the checksum state. */ | ||
511 | #define LARGE_CKSUM_UPDATE(cksum,base,look) \ | ||
512 | do { \ | ||
513 | uint32_t old_c = PERMUTE((base)[0]); \ | ||
514 | uint32_t new_c = PERMUTE((base)[(look)]); \ | ||
515 | uint32_t low = (((cksum) & 0xffff) - old_c + new_c) & 0xffff; \ | ||
516 | uint32_t high = (((cksum) >> 16) - (old_c * (look)) + low) & 0xffff; \ | ||
517 | (cksum) = (high << 16) | low; \ | ||
518 | } while (0) | ||
519 | |||
520 | /* Multiply and add hash function */ | ||
521 | #if ARITH_SMALL_CKSUM | ||
522 | #define SMALL_CKSUM_UPDATE(cksum,base,look) (cksum) = ((*(unsigned long*)(base+1)) * 71143) | ||
523 | #else | ||
524 | #define SMALL_CKSUM_UPDATE LARGE_CKSUM_UPDATE | ||
525 | #endif | ||
526 | |||
527 | /* Consume N bytes of input, only used by the decoder. */ | ||
528 | #define DECODE_INPUT(n) \ | ||
529 | do { \ | ||
530 | stream->total_in += (xoff_t) (n); \ | ||
531 | stream->avail_in -= (n); \ | ||
532 | stream->next_in += (n); \ | ||
533 | } while (0) | ||
534 | |||
535 | /* This CPP-conditional stuff can be cleaned up... */ | ||
536 | #if XD3_DEBUG | ||
537 | #define IF_DEBUG(x) x | ||
538 | #define DEBUG_ARG(x) , x | ||
539 | #else | ||
540 | #define IF_DEBUG(x) | ||
541 | #define DEBUG_ARG(x) | ||
542 | #endif | ||
543 | #if XD3_DEBUG > 1 | ||
544 | #define IF_DEBUG1(x) x | ||
545 | #else | ||
546 | #define IF_DEBUG1(x) | ||
547 | #endif | ||
548 | #if REGRESSION_TEST | ||
549 | #define IF_REGRESSION(x) x | ||
550 | #else | ||
551 | #define IF_REGRESSION(x) | ||
552 | #endif | ||
553 | |||
554 | /******************************************************************************************/ | ||
555 | |||
556 | #if XD3_ENCODER | ||
557 | static void* xd3_alloc0 (xd3_stream *stream, | ||
558 | usize_t elts, | ||
559 | usize_t size); | ||
560 | |||
561 | |||
562 | static xd3_output* xd3_alloc_output (xd3_stream *stream, | ||
563 | xd3_output *old_output); | ||
564 | |||
565 | |||
566 | |||
567 | static void xd3_free_output (xd3_stream *stream, | ||
568 | xd3_output *output); | ||
569 | |||
570 | static int xd3_emit_byte (xd3_stream *stream, | ||
571 | xd3_output **outputp, | ||
572 | uint8_t code); | ||
573 | |||
574 | static int xd3_emit_bytes (xd3_stream *stream, | ||
575 | xd3_output **outputp, | ||
576 | const uint8_t *base, | ||
577 | usize_t size); | ||
578 | |||
579 | static int xd3_emit_double (xd3_stream *stream, xd3_rinst *first, xd3_rinst *second, uint code); | ||
580 | static int xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code); | ||
581 | |||
582 | static usize_t xd3_sizeof_output (xd3_output *output); | ||
583 | |||
584 | static int xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos); | ||
585 | static int xd3_source_extend_match (xd3_stream *stream); | ||
586 | static int xd3_srcwin_setup (xd3_stream *stream); | ||
587 | static int xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point); | ||
588 | static usize_t xd3_iopt_last_matched (xd3_stream *stream); | ||
589 | static int xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num); | ||
590 | |||
591 | #endif /* XD3_ENCODER */ | ||
592 | |||
593 | static int xd3_decode_allocate (xd3_stream *stream, usize_t size, | ||
594 | uint8_t **copied1, usize_t *alloc1, | ||
595 | uint8_t **copied2, usize_t *alloc2); | ||
596 | |||
597 | static void xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str); | ||
598 | static void* xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size); | ||
599 | static void xd3_free (xd3_stream *stream, void *ptr); | ||
600 | |||
601 | static int xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, | ||
602 | const uint8_t *max, uint32_t *valp); | ||
603 | |||
604 | #if REGRESSION_TEST | ||
605 | static int xd3_selftest (void); | ||
606 | #endif | ||
607 | |||
608 | /******************************************************************************************/ | ||
609 | |||
610 | #define UINT32_OFLOW_MASK 0xfe000000U | ||
611 | #define UINT64_OFLOW_MASK 0xfe00000000000000ULL | ||
612 | |||
613 | #define UINT32_MAX 4294967295U | ||
614 | #define UINT64_MAX 18446744073709551615ULL | ||
615 | |||
616 | #if SIZEOF_USIZE_T == 4 | ||
617 | #define USIZE_T_MAX UINT32_MAX | ||
618 | #define xd3_decode_size xd3_decode_uint32_t | ||
619 | #define xd3_emit_size xd3_emit_uint32_t | ||
620 | #define xd3_sizeof_size xd3_sizeof_uint32_t | ||
621 | #define xd3_read_size xd3_read_uint32_t | ||
622 | #elif SIZEOF_USIZE_T == 8 | ||
623 | #define USIZE_T_MAX UINT64_MAX | ||
624 | #define xd3_decode_size xd3_decode_uint64_t | ||
625 | #define xd3_emit_size xd3_emit_uint64_t | ||
626 | #define xd3_sizeof_size xd3_sizeof_uint64_t | ||
627 | #define xd3_read_size xd3_read_uint64_t | ||
628 | #endif | ||
629 | |||
630 | #if SIZEOF_XOFF_T == 4 | ||
631 | #define XOFF_T_MAX UINT32_MAX | ||
632 | #define xd3_decode_offset xd3_decode_uint32_t | ||
633 | //#define xd3_emit_offset xd3_emit_uint32_t | ||
634 | //#define xd3_sizeof_offset xd3_sizeof_uint32_t | ||
635 | //#define xd3_read_offset xd3_read_uint32_t | ||
636 | #elif SIZEOF_XOFF_T == 8 | ||
637 | #define XOFF_T_MAX UINT64_MAX | ||
638 | #define xd3_decode_offset xd3_decode_uint64_t | ||
639 | //#define xd3_emit_offset xd3_emit_uint64_t | ||
640 | //#define xd3_sizeof_offset xd3_sizeof_uint64_t | ||
641 | //#define xd3_read_offset xd3_read_uint64_t | ||
642 | #endif | ||
643 | |||
644 | #define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b)) | ||
645 | #define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b)) | ||
646 | |||
647 | const char* xd3_strerror (int ret) | ||
648 | { | ||
649 | switch (ret) | ||
650 | { | ||
651 | case XD3_INPUT: return "XD3_INPUT"; | ||
652 | case XD3_OUTPUT: return "XD3_OUTPUT"; | ||
653 | case XD3_GETSRCBLK: return "XD3_GETSRCBLK"; | ||
654 | case XD3_GOTHEADER: return "XD3_GOTHEADER"; | ||
655 | case XD3_WINSTART: return "XD3_WINSTART"; | ||
656 | case XD3_WINFINISH: return "XD3_WINFINISH"; | ||
657 | } | ||
658 | return strerror (ret); | ||
659 | } | ||
660 | |||
661 | /******************************************************************************************/ | ||
662 | |||
663 | #if SECONDARY_ANY == 0 | ||
664 | #define IF_SEC(x) | ||
665 | #define IF_NSEC(x) x | ||
666 | #else /* yuck */ | ||
667 | #define IF_SEC(x) x | ||
668 | #define IF_NSEC(x) | ||
669 | #include "xdelta3-second.h" | ||
670 | #endif /* SECONDARY_ANY */ | ||
671 | |||
672 | #if SECONDARY_FGK | ||
673 | #include "xdelta3-fgk.h" | ||
674 | |||
675 | static const xd3_sec_type fgk_sec_type = | ||
676 | { | ||
677 | VCD_FGK_ID, | ||
678 | "FGK Adaptive Huffman", | ||
679 | SEC_NOFLAGS, | ||
680 | (xd3_sec_stream* (*)()) fgk_alloc, | ||
681 | (void (*)()) fgk_destroy, | ||
682 | (void (*)()) fgk_init, | ||
683 | (int (*)()) xd3_decode_fgk, | ||
684 | IF_ENCODER((int (*)()) xd3_encode_fgk) | ||
685 | }; | ||
686 | |||
687 | #define IF_FGK(x) x | ||
688 | #define FGK_CASE(s) \ | ||
689 | s->sec_type = & fgk_sec_type; \ | ||
690 | break; | ||
691 | #else | ||
692 | #define IF_FGK(x) | ||
693 | #define FGK_CASE(s) \ | ||
694 | s->msg = "unavailable secondary compressor: FGK Adaptive Huffman"; \ | ||
695 | return EINVAL; | ||
696 | #endif | ||
697 | |||
698 | #if SECONDARY_DJW | ||
699 | #include "xdelta3-djw.h" | ||
700 | |||
701 | static const xd3_sec_type djw_sec_type = | ||
702 | { | ||
703 | VCD_DJW_ID, | ||
704 | "Static Huffman", | ||
705 | SEC_COUNT_FREQS, | ||
706 | (xd3_sec_stream* (*)()) djw_alloc, | ||
707 | (void (*)()) djw_destroy, | ||
708 | (void (*)()) djw_init, | ||
709 | (int (*)()) xd3_decode_huff, | ||
710 | IF_ENCODER((int (*)()) xd3_encode_huff) | ||
711 | }; | ||
712 | |||
713 | #define IF_DJW(x) x | ||
714 | #define DJW_CASE(s) \ | ||
715 | s->sec_type = & djw_sec_type; \ | ||
716 | break; | ||
717 | #else | ||
718 | #define IF_DJW(x) | ||
719 | #define DJW_CASE(s) \ | ||
720 | s->msg = "unavailable secondary compressor: DJW Static Huffman"; \ | ||
721 | return EINVAL; | ||
722 | #endif | ||
723 | |||
724 | /******************************************************************************************/ | ||
725 | |||
726 | /* Abbreviate frequently referenced fields. */ | ||
727 | #define max_in stream->avail_in | ||
728 | #define pos_in stream->input_position | ||
729 | #define min_match stream->min_match | ||
730 | |||
731 | /* Process the inline pass. */ | ||
732 | #define __XDELTA3_C_INLINE_PASS__ | ||
733 | #include "xdelta3.c" | ||
734 | #undef __XDELTA3_C_INLINE_PASS__ | ||
735 | |||
736 | /* Process template passes - this includes xdelta3.c several times. */ | ||
737 | #define __XDELTA3_C_TEMPLATE_PASS__ | ||
738 | #include "xdelta3-cfgs.h" | ||
739 | #undef __XDELTA3_C_TEMPLATE_PASS__ | ||
740 | |||
741 | #undef max_in | ||
742 | #undef pos_in | ||
743 | #undef min_match | ||
744 | |||
745 | #if XD3_MAIN || PYTHON_MODULE | ||
746 | #include "xdelta3-main.h" | ||
747 | #endif | ||
748 | |||
749 | #if REGRESSION_TEST | ||
750 | #include "xdelta3-test.h" | ||
751 | #endif | ||
752 | |||
753 | #if PYTHON_MODULE | ||
754 | #include "xdelta3-python.h" | ||
755 | #endif | ||
756 | |||
757 | #endif /* __XDELTA3_C_HEADER_PASS__ */ | ||
758 | #ifdef __XDELTA3_C_INLINE_PASS__ | ||
759 | |||
760 | /****************************************************************************************** | ||
761 | Instruction tables | ||
762 | ******************************************************************************************/ | ||
763 | |||
764 | /* The following code implements a parametrized description of the | ||
765 | * code table given above for a few reasons. It is not necessary for | ||
766 | * implementing the standard, to support compression with variable | ||
767 | * tables, so an implementation is only required to know the default | ||
768 | * code table to begin decompression. (If the encoder uses an | ||
769 | * alternate table, the table is included in compressed form inside | ||
770 | * the VCDIFF file.) | ||
771 | * | ||
772 | * Before adding variable-table support there were two functions which | ||
773 | * were hard-coded to the default table above. | ||
774 | * xd3_compute_default_table() would create the default table by | ||
775 | * filling a 256-elt array of xd3_dinst values. The corresponding | ||
776 | * function, xd3_choose_instruction(), would choose an instruction | ||
777 | * based on the hard-coded parameters of the default code table. | ||
778 | * | ||
779 | * Notes: The parametrized code table description here only generates | ||
780 | * tables of a certain regularity similar to the default table by | ||
781 | * allowing to vary the distribution of single- and | ||
782 | * double-instructions and change the number of near and same copy | ||
783 | * modes. More exotic tables are only possible by extending this | ||
784 | * code, but a detailed experiment would need to be carried out first, | ||
785 | * probably using separate code. I would like to experiment with a | ||
786 | * double-copy instruction, for example. | ||
787 | * | ||
788 | * For performance reasons, both the parametrized and non-parametrized | ||
789 | * versions of xd3_choose_instruction remain. The parametrized | ||
790 | * version is only needed for testing multi-table decoding support. | ||
791 | * If ever multi-table encoding is required, this can be optimized by | ||
792 | * compiling static functions for each table. | ||
793 | */ | ||
794 | |||
795 | /* The XD3_CHOOSE_INSTRUCTION calls xd3_choose_instruction with the | ||
796 | * table description when GENERIC_ENCODE_TABLES are in use. The | ||
797 | * IF_GENCODETBL macro enables generic-code-table specific code. */ | ||
798 | #if GENERIC_ENCODE_TABLES | ||
799 | #define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (stream->code_table_desc, prev, inst) | ||
800 | #define IF_GENCODETBL(x) x | ||
801 | #else | ||
802 | #define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (prev, inst) | ||
803 | #define IF_GENCODETBL(x) | ||
804 | #endif | ||
805 | |||
806 | /* This structure maintains information needed by | ||
807 | * xd3_choose_instruction to compute the code for a double instruction | ||
808 | * by first indexing an array of code_table_sizes by copy mode, then | ||
809 | * using (offset + (muliplier * X)) */ | ||
810 | struct _xd3_code_table_sizes { | ||
811 | uint8_t cpy_max; | ||
812 | uint8_t offset; | ||
813 | uint8_t mult; | ||
814 | }; | ||
815 | |||
816 | /* This contains a complete description of a code table. */ | ||
817 | struct _xd3_code_table_desc | ||
818 | { | ||
819 | /* Assumes a single RUN instruction */ | ||
820 | /* Assumes that MIN_MATCH is 4 */ | ||
821 | |||
822 | uint8_t add_sizes; /* Number of immediate-size single adds (default 17) */ | ||
823 | uint8_t near_modes; /* Number of near copy modes (default 4) */ | ||
824 | uint8_t same_modes; /* Number of same copy modes (default 3) */ | ||
825 | uint8_t cpy_sizes; /* Number of immediate-size single copies (default 15) */ | ||
826 | |||
827 | uint8_t addcopy_add_max; /* Maximum add size for an add-copy double instruction, all modes (default 4) */ | ||
828 | uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy double instruction, up through VCD_NEAR modes (default 6) */ | ||
829 | uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy double instruction, VCD_SAME modes (default 4) */ | ||
830 | |||
831 | uint8_t copyadd_add_max; /* Maximum add size for a copy-add double instruction, all modes (default 1) */ | ||
832 | uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add double instruction, up through VCD_NEAR modes (default 4) */ | ||
833 | uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add double instruction, VCD_SAME modes (default 4) */ | ||
834 | |||
835 | xd3_code_table_sizes addcopy_max_sizes[MAX_MODES]; | ||
836 | xd3_code_table_sizes copyadd_max_sizes[MAX_MODES]; | ||
837 | }; | ||
838 | |||
839 | /* The rfc3284 code table is represented: */ | ||
840 | static const xd3_code_table_desc __rfc3284_code_table_desc = { | ||
841 | 17, /* add sizes */ | ||
842 | 4, /* near modes */ | ||
843 | 3, /* same modes */ | ||
844 | 15, /* copy sizes */ | ||
845 | |||
846 | 4, /* add-copy max add */ | ||
847 | 6, /* add-copy max cpy, near */ | ||
848 | 4, /* add-copy max cpy, same */ | ||
849 | |||
850 | 1, /* copy-add max add */ | ||
851 | 4, /* copy-add max cpy, near */ | ||
852 | 4, /* copy-add max cpy, same */ | ||
853 | |||
854 | /* addcopy */ | ||
855 | { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{4,235,1},{4,239,1},{4,243,1} }, | ||
856 | /* copyadd */ | ||
857 | { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1},{4,253,1},{4,254,1},{4,255,1} }, | ||
858 | }; | ||
859 | |||
860 | #if GENERIC_ENCODE_TABLES | ||
861 | /* An alternate code table for testing (5 near, 0 same): | ||
862 | * | ||
863 | * TYPE SIZE MODE TYPE SIZE MODE INDEX | ||
864 | * --------------------------------------------------------------- | ||
865 | * 1. Run 0 0 Noop 0 0 0 | ||
866 | * 2. Add 0, [1,23] 0 Noop 0 0 [1,24] | ||
867 | * 3. Copy 0, [4,20] 0 Noop 0 0 [25,42] | ||
868 | * 4. Copy 0, [4,20] 1 Noop 0 0 [43,60] | ||
869 | * 5. Copy 0, [4,20] 2 Noop 0 0 [61,78] | ||
870 | * 6. Copy 0, [4,20] 3 Noop 0 0 [79,96] | ||
871 | * 7. Copy 0, [4,20] 4 Noop 0 0 [97,114] | ||
872 | * 8. Copy 0, [4,20] 5 Noop 0 0 [115,132] | ||
873 | * 9. Copy 0, [4,20] 6 Noop 0 0 [133,150] | ||
874 | * 10. Add [1,4] 0 Copy [4,6] 0 [151,162] | ||
875 | * 11. Add [1,4] 0 Copy [4,6] 1 [163,174] | ||
876 | * 12. Add [1,4] 0 Copy [4,6] 2 [175,186] | ||
877 | * 13. Add [1,4] 0 Copy [4,6] 3 [187,198] | ||
878 | * 14. Add [1,4] 0 Copy [4,6] 4 [199,210] | ||
879 | * 15. Add [1,4] 0 Copy [4,6] 5 [211,222] | ||
880 | * 16. Add [1,4] 0 Copy [4,6] 6 [223,234] | ||
881 | * 17. Copy 4 [0,6] Add [1,3] 0 [235,255] | ||
882 | * --------------------------------------------------------------- */ | ||
883 | static const xd3_code_table_desc __alternate_code_table_desc = { | ||
884 | 23, /* add sizes */ | ||
885 | 5, /* near modes */ | ||
886 | 0, /* same modes */ | ||
887 | 17, /* copy sizes */ | ||
888 | |||
889 | 4, /* add-copy max add */ | ||
890 | 6, /* add-copy max cpy, near */ | ||
891 | 0, /* add-copy max cpy, same */ | ||
892 | |||
893 | 3, /* copy-add max add */ | ||
894 | 4, /* copy-add max cpy, near */ | ||
895 | 0, /* copy-add max cpy, same */ | ||
896 | |||
897 | /* addcopy */ | ||
898 | { {6,151,3},{6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{0,0,0},{0,0,0} }, | ||
899 | /* copyadd */ | ||
900 | { {4,235,1},{4,238,1},{4,241,1},{4,244,1},{4,247,1},{4,250,1},{4,253,1},{0,0,0},{0,0,0} }, | ||
901 | }; | ||
902 | #endif | ||
903 | |||
904 | /* Computes code table entries of TBL using the specified description. */ | ||
905 | static void | ||
906 | xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl) | ||
907 | { | ||
908 | int size1, size2, mode; | ||
909 | int cpy_modes = 2 + desc->near_modes + desc->same_modes; | ||
910 | xd3_dinst *d = tbl; | ||
911 | |||
912 | (d++)->type1 = XD3_RUN; | ||
913 | (d++)->type1 = XD3_ADD; | ||
914 | |||
915 | for (size1 = 1; size1 <= desc->add_sizes; size1 += 1, d += 1) | ||
916 | { | ||
917 | d->type1 = XD3_ADD; | ||
918 | d->size1 = size1; | ||
919 | } | ||
920 | |||
921 | for (mode = 0; mode < cpy_modes; mode += 1) | ||
922 | { | ||
923 | (d++)->type1 = XD3_CPY + mode; | ||
924 | |||
925 | for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes; size1 += 1, d += 1) | ||
926 | { | ||
927 | d->type1 = XD3_CPY + mode; | ||
928 | d->size1 = size1; | ||
929 | } | ||
930 | } | ||
931 | |||
932 | for (mode = 0; mode < cpy_modes; mode += 1) | ||
933 | { | ||
934 | for (size1 = 1; size1 <= desc->addcopy_add_max; size1 += 1) | ||
935 | { | ||
936 | int max = (mode < 2 + desc->near_modes) ? desc->addcopy_near_cpy_max : desc->addcopy_same_cpy_max; | ||
937 | |||
938 | for (size2 = MIN_MATCH; size2 <= max; size2 += 1, d += 1) | ||
939 | { | ||
940 | d->type1 = XD3_ADD; | ||
941 | d->size1 = size1; | ||
942 | d->type2 = XD3_CPY + mode; | ||
943 | d->size2 = size2; | ||
944 | } | ||
945 | } | ||
946 | } | ||
947 | |||
948 | for (mode = 0; mode < cpy_modes; mode += 1) | ||
949 | { | ||
950 | int max = (mode < 2 + desc->near_modes) ? desc->copyadd_near_cpy_max : desc->copyadd_same_cpy_max; | ||
951 | |||
952 | for (size1 = MIN_MATCH; size1 <= max; size1 += 1) | ||
953 | { | ||
954 | for (size2 = 1; size2 <= desc->copyadd_add_max; size2 += 1, d += 1) | ||
955 | { | ||
956 | d->type1 = XD3_CPY + mode; | ||
957 | d->size1 = size1; | ||
958 | d->type2 = XD3_ADD; | ||
959 | d->size2 = size2; | ||
960 | } | ||
961 | } | ||
962 | } | ||
963 | |||
964 | XD3_ASSERT (d - tbl == 256); | ||
965 | } | ||
966 | |||
967 | /* This function generates the static default code table. */ | ||
968 | static const xd3_dinst* | ||
969 | xd3_rfc3284_code_table (void) | ||
970 | { | ||
971 | static xd3_dinst __rfc3284_code_table[256]; | ||
972 | |||
973 | if (__rfc3284_code_table[0].type1 != XD3_RUN) | ||
974 | { | ||
975 | xd3_build_code_table (& __rfc3284_code_table_desc, __rfc3284_code_table); | ||
976 | } | ||
977 | |||
978 | return __rfc3284_code_table; | ||
979 | } | ||
980 | |||
981 | #if XD3_ENCODER | ||
982 | #if GENERIC_ENCODE_TABLES | ||
983 | /* This function generates the alternate code table. */ | ||
984 | static const xd3_dinst* | ||
985 | xd3_alternate_code_table (void) | ||
986 | { | ||
987 | static xd3_dinst __alternate_code_table[256]; | ||
988 | |||
989 | if (__alternate_code_table[0].type1 != XD3_RUN) | ||
990 | { | ||
991 | xd3_build_code_table (& __alternate_code_table_desc, __alternate_code_table); | ||
992 | } | ||
993 | |||
994 | return __alternate_code_table; | ||
995 | } | ||
996 | |||
997 | /* This function computes the ideal second instruction INST based on preceding instruction | ||
998 | * PREV. If it is possible to issue a double instruction based on this pair it sets | ||
999 | * PREV->code2, otherwise it sets INST->code1. */ | ||
1000 | static void | ||
1001 | xd3_choose_instruction (const xd3_code_table_desc *desc, xd3_rinst *prev, xd3_rinst *inst) | ||
1002 | { | ||
1003 | switch (inst->type) | ||
1004 | { | ||
1005 | case XD3_RUN: | ||
1006 | /* The 0th instruction is RUN */ | ||
1007 | inst->code1 = 0; | ||
1008 | break; | ||
1009 | |||
1010 | case XD3_ADD: | ||
1011 | |||
1012 | if (inst->size > desc->add_sizes) | ||
1013 | { | ||
1014 | /* The first instruction is non-immediate ADD */ | ||
1015 | inst->code1 = 1; | ||
1016 | } | ||
1017 | else | ||
1018 | { | ||
1019 | /* The following ADD_SIZES instructions are immediate ADDs */ | ||
1020 | inst->code1 = 1 + inst->size; | ||
1021 | |||
1022 | /* Now check for a possible COPY-ADD double instruction */ | ||
1023 | if (prev != NULL) | ||
1024 | { | ||
1025 | int prev_mode = prev->type - XD3_CPY; | ||
1026 | |||
1027 | /* If previous is a copy. Note: as long as the previous is not a RUN | ||
1028 | * instruction, it should be a copy because it cannot be an add. This check | ||
1029 | * is more clear. */ | ||
1030 | if (prev_mode >= 0 && inst->size <= desc->copyadd_add_max) | ||
1031 | { | ||
1032 | const xd3_code_table_sizes *sizes = & desc->copyadd_max_sizes[prev_mode]; | ||
1033 | |||
1034 | /* This check and the inst->size-<= above are == in the default table. */ | ||
1035 | if (prev->size <= sizes->cpy_max) | ||
1036 | { | ||
1037 | /* The second and third exprs are 0 in the default table. */ | ||
1038 | prev->code2 = sizes->offset + (sizes->mult * (prev->size - MIN_MATCH)) + (inst->size - MIN_ADD); | ||
1039 | } | ||
1040 | } | ||
1041 | } | ||
1042 | } | ||
1043 | break; | ||
1044 | |||
1045 | default: | ||
1046 | { | ||
1047 | int mode = inst->type - XD3_CPY; | ||
1048 | |||
1049 | /* The large copy instruction is offset by the run, large add, and immediate adds, | ||
1050 | * then multipled by the number of immediate copies plus one (the large copy) | ||
1051 | * (i.e., if there are 15 immediate copy instructions then there are 16 copy | ||
1052 | * instructions per mode). */ | ||
1053 | inst->code1 = 2 + desc->add_sizes + (1 + desc->cpy_sizes) * mode; | ||
1054 | |||
1055 | /* Now if the copy is short enough for an immediate instruction. */ | ||
1056 | if (inst->size < MIN_MATCH + desc->cpy_sizes) | ||
1057 | { | ||
1058 | inst->code1 += inst->size + 1 - MIN_MATCH; | ||
1059 | |||
1060 | /* Now check for a possible ADD-COPY double instruction. */ | ||
1061 | if ( (prev != NULL) && | ||
1062 | (prev->type == XD3_ADD) && | ||
1063 | (prev->size <= desc->addcopy_add_max) ) | ||
1064 | { | ||
1065 | const xd3_code_table_sizes *sizes = & desc->addcopy_max_sizes[mode]; | ||
1066 | |||
1067 | if (inst->size <= sizes->cpy_max) | ||
1068 | { | ||
1069 | prev->code2 = sizes->offset + (sizes->mult * (prev->size - MIN_ADD)) + (inst->size - MIN_MATCH); | ||
1070 | } | ||
1071 | } | ||
1072 | } | ||
1073 | } | ||
1074 | } | ||
1075 | } | ||
1076 | #else /* GENERIC_ENCODE_TABLES */ | ||
1077 | |||
1078 | /* This version of xd3_choose_instruction is hard-coded for the default table. */ | ||
1079 | static void | ||
1080 | xd3_choose_instruction (/* const xd3_code_table_desc *desc,*/ xd3_rinst *prev, xd3_rinst *inst) | ||
1081 | { | ||
1082 | switch (inst->type) | ||
1083 | { | ||
1084 | case XD3_RUN: | ||
1085 | inst->code1 = 0; | ||
1086 | break; | ||
1087 | |||
1088 | case XD3_ADD: | ||
1089 | inst->code1 = 1; | ||
1090 | |||
1091 | if (inst->size <= 17) | ||
1092 | { | ||
1093 | inst->code1 += inst->size; | ||
1094 | |||
1095 | if ( (inst->size == 1) && | ||
1096 | (prev != NULL) && | ||
1097 | (prev->size == 4) && | ||
1098 | (prev->type >= XD3_CPY) ) | ||
1099 | { | ||
1100 | prev->code2 = 247 + (prev->type - XD3_CPY); | ||
1101 | } | ||
1102 | } | ||
1103 | |||
1104 | break; | ||
1105 | |||
1106 | default: | ||
1107 | { | ||
1108 | int mode = inst->type - XD3_CPY; | ||
1109 | |||
1110 | XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12); | ||
1111 | |||
1112 | inst->code1 = 19 + 16 * mode; | ||
1113 | |||
1114 | if (inst->size <= 18) | ||
1115 | { | ||
1116 | inst->code1 += inst->size - 3; | ||
1117 | |||
1118 | if ( (prev != NULL) && | ||
1119 | (prev->type == XD3_ADD) && | ||
1120 | (prev->size <= 4) ) | ||
1121 | { | ||
1122 | if ( (inst->size <= 6) && | ||
1123 | (mode <= 5) ) | ||
1124 | { | ||
1125 | prev->code2 = 163 + (mode * 12) + (3 * (prev->size - 1)) + (inst->size - 4); | ||
1126 | |||
1127 | XD3_ASSERT (prev->code2 <= 234); | ||
1128 | } | ||
1129 | else if ( (inst->size == 4) && | ||
1130 | (mode >= 6) ) | ||
1131 | { | ||
1132 | prev->code2 = 235 + ((mode - 6) * 4) + (prev->size - 1); | ||
1133 | |||
1134 | XD3_ASSERT (prev->code2 <= 246); | ||
1135 | } | ||
1136 | } | ||
1137 | } | ||
1138 | |||
1139 | XD3_ASSERT (inst->code1 <= 162); | ||
1140 | } | ||
1141 | break; | ||
1142 | } | ||
1143 | } | ||
1144 | #endif /* GENERIC_ENCODE_TABLES */ | ||
1145 | |||
1146 | /****************************************************************************************** | ||
1147 | Instruction table encoder/decoder | ||
1148 | ******************************************************************************************/ | ||
1149 | |||
1150 | #if GENERIC_ENCODE_TABLES | ||
1151 | #if GENERIC_ENCODE_TABLES_COMPUTE == 0 | ||
1152 | |||
1153 | /* In this case, we hard-code the result of compute_code_table_encoding for each alternate | ||
1154 | * code table, presuming that saves time/space. This has been 131 bytes, but secondary | ||
1155 | * compression was turned off. */ | ||
1156 | static const uint8_t __alternate_code_table_compressed[178] = | ||
1157 | {0xd6,0xc3,0xc4,0x00,0x00,0x01,0x8a,0x6f,0x40,0x81,0x27,0x8c,0x00,0x00,0x4a,0x4a,0x0d,0x02,0x01,0x03, | ||
1158 | 0x01,0x03,0x00,0x01,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e, | ||
1159 | 0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x00,0x01,0x01,0x01,0x02,0x02,0x02,0x03,0x03,0x03,0x04, | ||
1160 | 0x04,0x04,0x04,0x00,0x04,0x05,0x06,0x01,0x02,0x03,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x05,0x05,0x05, | ||
1161 | 0x06,0x06,0x06,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x00,0x02,0x00,0x18,0x13,0x63,0x00,0x1b,0x00,0x54, | ||
1162 | 0x00,0x15,0x23,0x6f,0x00,0x28,0x13,0x54,0x00,0x15,0x01,0x1a,0x31,0x23,0x6c,0x0d,0x23,0x48,0x00,0x15, | ||
1163 | 0x93,0x6f,0x00,0x28,0x04,0x23,0x51,0x04,0x32,0x00,0x2b,0x00,0x12,0x00,0x12,0x00,0x12,0x00,0x12,0x00, | ||
1164 | 0x12,0x00,0x12,0x53,0x57,0x9c,0x07,0x43,0x6f,0x00,0x34,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00, | ||
1165 | 0x0c,0x00,0x0c,0x00,0x15,0x00,0x82,0x6f,0x00,0x15,0x12,0x0c,0x00,0x03,0x03,0x00,0x06,0x00,}; | ||
1166 | |||
1167 | static int | ||
1168 | xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size) | ||
1169 | { | ||
1170 | (*data) = __alternate_code_table_compressed; | ||
1171 | (*size) = sizeof (__alternate_code_table_compressed); | ||
1172 | return 0; | ||
1173 | } | ||
1174 | |||
1175 | #else | ||
1176 | |||
1177 | /* The alternate code table will be computed and stored here. */ | ||
1178 | static uint8_t __alternate_code_table_compressed[CODE_TABLE_VCDIFF_SIZE]; | ||
1179 | static usize_t __alternate_code_table_compressed_size; | ||
1180 | |||
1181 | /* This function generates a delta describing the code table for encoding within a VCDIFF | ||
1182 | * file. This function is NOT thread safe because it is only intended that this function | ||
1183 | * is used to generate statically-compiled strings. */ | ||
1184 | int xd3_compute_code_table_encoding (xd3_stream *in_stream, const xd3_dinst *code_table, | ||
1185 | uint8_t *comp_string, usize_t *comp_string_size) | ||
1186 | { | ||
1187 | uint8_t dflt_string[CODE_TABLE_STRING_SIZE]; | ||
1188 | uint8_t code_string[CODE_TABLE_STRING_SIZE]; | ||
1189 | xd3_stream stream; | ||
1190 | xd3_source source; | ||
1191 | xd3_config config; | ||
1192 | int ret; | ||
1193 | |||
1194 | memset (& source, 0, sizeof (source)); | ||
1195 | |||
1196 | xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string); | ||
1197 | xd3_compute_code_table_string (code_table, code_string); | ||
1198 | |||
1199 | /* Use DJW secondary compression if it is on by default. This saves about 20 bytes. */ | ||
1200 | xd3_init_config (& config, XD3_FLUSH | (SECONDARY_DJW ? XD3_SEC_DJW : 0)); | ||
1201 | |||
1202 | /* Be exhaustive. */ | ||
1203 | config.sprevsz = 1<<11; | ||
1204 | config.memsize = CODE_TABLE_STRING_SIZE * 10; | ||
1205 | |||
1206 | config.large_look = 4; | ||
1207 | config.large_step = 1; | ||
1208 | config.small_look = 4; | ||
1209 | config.small_chain = CODE_TABLE_STRING_SIZE; | ||
1210 | config.small_lchain = CODE_TABLE_STRING_SIZE; | ||
1211 | config.ssmatch = 1; | ||
1212 | config.try_lazy = 1; | ||
1213 | config.max_lazy = CODE_TABLE_STRING_SIZE; | ||
1214 | config.long_enough = CODE_TABLE_STRING_SIZE; | ||
1215 | config.promote = 1; | ||
1216 | config.srcwin_size = CODE_TABLE_STRING_SIZE; | ||
1217 | config.srcwin_maxsz = CODE_TABLE_STRING_SIZE; | ||
1218 | |||
1219 | if ((ret = xd3_config_stream (& stream, & config))) { goto fail; } | ||
1220 | |||
1221 | source.size = CODE_TABLE_STRING_SIZE; | ||
1222 | source.blksize = CODE_TABLE_STRING_SIZE; | ||
1223 | source.onblk = CODE_TABLE_STRING_SIZE; | ||
1224 | source.name = ""; | ||
1225 | source.curblk = dflt_string; | ||
1226 | source.curblkno = 0; | ||
1227 | |||
1228 | if ((ret = xd3_set_source (& stream, & source))) { goto fail; } | ||
1229 | |||
1230 | if ((ret = xd3_encode_completely (& stream, code_string, CODE_TABLE_STRING_SIZE, | ||
1231 | comp_string, comp_string_size, CODE_TABLE_VCDIFF_SIZE))) { goto fail; } | ||
1232 | |||
1233 | fail: | ||
1234 | |||
1235 | in_stream->msg = stream.msg; | ||
1236 | xd3_free_stream (& stream); | ||
1237 | return ret; | ||
1238 | } | ||
1239 | |||
1240 | /* Compute a delta between alternate and rfc3284 tables. As soon as another alternate | ||
1241 | * table is added, this code should become generic. For now there is only one alternate | ||
1242 | * table for testing. */ | ||
1243 | static int | ||
1244 | xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size) | ||
1245 | { | ||
1246 | int ret; | ||
1247 | |||
1248 | if (__alternate_code_table_compressed[0] == 0) | ||
1249 | { | ||
1250 | if ((ret = xd3_compute_code_table_encoding (stream, xd3_alternate_code_table (), | ||
1251 | __alternate_code_table_compressed, | ||
1252 | & __alternate_code_table_compressed_size))) | ||
1253 | { | ||
1254 | return ret; | ||
1255 | } | ||
1256 | |||
1257 | /* During development of a new code table, enable this variable to print the new | ||
1258 | * static contents and determine its size. At run time the table will be filled in | ||
1259 | * appropriately, but at least it should have the proper size beforehand. */ | ||
1260 | #if GENERIC_ENCODE_TABLES_COMPUTE_PRINT | ||
1261 | { | ||
1262 | int i; | ||
1263 | |||
1264 | P(RINT, "\nstatic const usize_t __alternate_code_table_compressed_size = %u;\n", | ||
1265 | __alternate_code_table_compressed_size); | ||
1266 | |||
1267 | P(RINT, "static const uint8_t __alternate_code_table_compressed[%u] =\n{", | ||
1268 | __alternate_code_table_compressed_size); | ||
1269 | |||
1270 | for (i = 0; i < __alternate_code_table_compressed_size; i += 1) | ||
1271 | { | ||
1272 | P(RINT, "0x%02x,", __alternate_code_table_compressed[i]); | ||
1273 | if ((i % 20) == 19) { P(RINT, "\n"); } | ||
1274 | } | ||
1275 | |||
1276 | P(RINT, "};\n"); | ||
1277 | } | ||
1278 | #endif | ||
1279 | } | ||
1280 | |||
1281 | (*data) = __alternate_code_table_compressed; | ||
1282 | (*size) = __alternate_code_table_compressed_size; | ||
1283 | |||
1284 | return 0; | ||
1285 | } | ||
1286 | #endif /* GENERIC_ENCODE_TABLES_COMPUTE != 0 */ | ||
1287 | #endif /* GENERIC_ENCODE_TABLES */ | ||
1288 | |||
1289 | #endif /* XD3_ENCODER */ | ||
1290 | |||
1291 | /* This function generates the 1536-byte string specified in sections 5.4 and 7 of | ||
1292 | * rfc3284, which is used to represent a code table within a VCDIFF file. */ | ||
1293 | void xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str) | ||
1294 | { | ||
1295 | int i, s; | ||
1296 | |||
1297 | XD3_ASSERT (CODE_TABLE_STRING_SIZE == 6 * 256); | ||
1298 | |||
1299 | for (s = 0; s < 6; s += 1) | ||
1300 | { | ||
1301 | for (i = 0; i < 256; i += 1) | ||
1302 | { | ||
1303 | switch (s) | ||
1304 | { | ||
1305 | case 0: *str++ = (code_table[i].type1 >= XD3_CPY ? XD3_CPY : code_table[i].type1); break; | ||
1306 | case 1: *str++ = (code_table[i].type2 >= XD3_CPY ? XD3_CPY : code_table[i].type2); break; | ||
1307 | case 2: *str++ = (code_table[i].size1); break; | ||
1308 | case 3: *str++ = (code_table[i].size2); break; | ||
1309 | case 4: *str++ = (code_table[i].type1 >= XD3_CPY ? code_table[i].type1 - XD3_CPY : 0); break; | ||
1310 | case 5: *str++ = (code_table[i].type2 >= XD3_CPY ? code_table[i].type2 - XD3_CPY : 0); break; | ||
1311 | } | ||
1312 | } | ||
1313 | } | ||
1314 | } | ||
1315 | |||
1316 | /* This function translates the code table string into the internal representation. The | ||
1317 | * stream's near and same-modes should already be set. */ | ||
1318 | static int | ||
1319 | xd3_apply_table_string (xd3_stream *stream, const uint8_t *code_string) | ||
1320 | { | ||
1321 | int i, s; | ||
1322 | int modes = TOTAL_MODES (stream); | ||
1323 | xd3_dinst *code_table; | ||
1324 | |||
1325 | if ((code_table = stream->code_table_alloc = xd3_alloc (stream, sizeof (xd3_dinst), 256)) == NULL) | ||
1326 | { | ||
1327 | return ENOMEM; | ||
1328 | } | ||
1329 | |||
1330 | for (s = 0; s < 6; s += 1) | ||
1331 | { | ||
1332 | for (i = 0; i < 256; i += 1) | ||
1333 | { | ||
1334 | switch (s) | ||
1335 | { | ||
1336 | case 0: | ||
1337 | if (*code_string > XD3_CPY) | ||
1338 | { | ||
1339 | stream->msg = "invalid code-table opcode"; | ||
1340 | return EINVAL; | ||
1341 | } | ||
1342 | code_table[i].type1 = *code_string++; | ||
1343 | break; | ||
1344 | case 1: | ||
1345 | if (*code_string > XD3_CPY) | ||
1346 | { | ||
1347 | stream->msg = "invalid code-table opcode"; | ||
1348 | return EINVAL; | ||
1349 | } | ||
1350 | code_table[i].type2 = *code_string++; | ||
1351 | break; | ||
1352 | case 2: | ||
1353 | if (*code_string != 0 && code_table[i].type1 == XD3_NOOP) | ||
1354 | { | ||
1355 | stream->msg = "invalid code-table size"; | ||
1356 | return EINVAL; | ||
1357 | } | ||
1358 | code_table[i].size1 = *code_string++; | ||
1359 | break; | ||
1360 | case 3: | ||
1361 | if (*code_string != 0 && code_table[i].type2 == XD3_NOOP) | ||
1362 | { | ||
1363 | stream->msg = "invalid code-table size"; | ||
1364 | return EINVAL; | ||
1365 | } | ||
1366 | code_table[i].size2 = *code_string++; | ||
1367 | break; | ||
1368 | case 4: | ||
1369 | if (*code_string >= modes) | ||
1370 | { | ||
1371 | stream->msg = "invalid code-table mode"; | ||
1372 | return EINVAL; | ||
1373 | } | ||
1374 | if (*code_string != 0 && code_table[i].type1 != XD3_CPY) | ||
1375 | { | ||
1376 | stream->msg = "invalid code-table mode"; | ||
1377 | return EINVAL; | ||
1378 | } | ||
1379 | code_table[i].type1 += *code_string++; | ||
1380 | break; | ||
1381 | case 5: | ||
1382 | if (*code_string >= modes) | ||
1383 | { | ||
1384 | stream->msg = "invalid code-table mode"; | ||
1385 | return EINVAL; | ||
1386 | } | ||
1387 | if (*code_string != 0 && code_table[i].type2 != XD3_CPY) | ||
1388 | { | ||
1389 | stream->msg = "invalid code-table mode"; | ||
1390 | return EINVAL; | ||
1391 | } | ||
1392 | code_table[i].type2 += *code_string++; | ||
1393 | break; | ||
1394 | } | ||
1395 | } | ||
1396 | } | ||
1397 | |||
1398 | stream->code_table = code_table; | ||
1399 | return 0; | ||
1400 | } | ||
1401 | |||
1402 | /* This function applies a code table delta and returns an actual code table. */ | ||
1403 | static int | ||
1404 | xd3_apply_table_encoding (xd3_stream *in_stream, const uint8_t *data, usize_t size) | ||
1405 | { | ||
1406 | uint8_t dflt_string[CODE_TABLE_STRING_SIZE]; | ||
1407 | uint8_t code_string[CODE_TABLE_STRING_SIZE]; | ||
1408 | usize_t code_size; | ||
1409 | xd3_stream stream; | ||
1410 | xd3_source source; | ||
1411 | int ret; | ||
1412 | |||
1413 | /* The default code table string can be cached if alternate code tables ever become | ||
1414 | * popular. */ | ||
1415 | xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string); | ||
1416 | |||
1417 | source.size = CODE_TABLE_STRING_SIZE; | ||
1418 | source.blksize = CODE_TABLE_STRING_SIZE; | ||
1419 | source.onblk = CODE_TABLE_STRING_SIZE; | ||
1420 | source.name = "rfc3284 code table"; | ||
1421 | source.curblk = dflt_string; | ||
1422 | source.curblkno = 0; | ||
1423 | |||
1424 | if ((ret = xd3_config_stream (& stream, NULL)) || | ||
1425 | (ret = xd3_set_source (& stream, & source)) || | ||
1426 | (ret = xd3_decode_completely (& stream, data, size, code_string, & code_size, sizeof (code_string)))) | ||
1427 | { | ||
1428 | in_stream->msg = stream.msg; | ||
1429 | goto fail; | ||
1430 | } | ||
1431 | |||
1432 | if (code_size != sizeof (code_string)) | ||
1433 | { | ||
1434 | stream.msg = "corrupt code-table encoding"; | ||
1435 | ret = EINVAL; | ||
1436 | goto fail; | ||
1437 | } | ||
1438 | |||
1439 | if ((ret = xd3_apply_table_string (in_stream, code_string))) { goto fail; } | ||
1440 | |||
1441 | fail: | ||
1442 | |||
1443 | xd3_free_stream (& stream); | ||
1444 | return ret; | ||
1445 | } | ||
1446 | |||
1447 | /****************************************************************************************** | ||
1448 | Permute stuff | ||
1449 | ******************************************************************************************/ | ||
1450 | |||
1451 | #if HASH_PERMUTE == 0 | ||
1452 | #define PERMUTE(x) (x) | ||
1453 | #else | ||
1454 | #define PERMUTE(x) (__single_hash[(uint)x]) | ||
1455 | |||
1456 | static const uint16_t __single_hash[256] = | ||
1457 | { | ||
1458 | /* Random numbers generated using SLIB's pseudo-random number generator. This hashes | ||
1459 | * the input alphabet. */ | ||
1460 | 0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46, | ||
1461 | 0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602, | ||
1462 | 0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6, | ||
1463 | 0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a, | ||
1464 | 0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58, | ||
1465 | 0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b, | ||
1466 | 0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94, | ||
1467 | 0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176, | ||
1468 | 0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743, | ||
1469 | 0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6, | ||
1470 | 0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227, | ||
1471 | 0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d, | ||
1472 | 0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a, | ||
1473 | 0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31, | ||
1474 | 0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9, | ||
1475 | 0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6, | ||
1476 | 0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f, | ||
1477 | 0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f, | ||
1478 | 0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49, | ||
1479 | 0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037, | ||
1480 | 0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad, | ||
1481 | 0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f, | ||
1482 | 0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d, | ||
1483 | 0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b, | ||
1484 | 0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624, | ||
1485 | 0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272, | ||
1486 | 0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806, | ||
1487 | 0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050, | ||
1488 | 0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113, | ||
1489 | 0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2, | ||
1490 | 0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25, | ||
1491 | 0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5 | ||
1492 | }; | ||
1493 | #endif | ||
1494 | |||
1495 | /****************************************************************************************** | ||
1496 | Ctable stuff | ||
1497 | ******************************************************************************************/ | ||
1498 | |||
1499 | #if HASH_PRIME | ||
1500 | static const usize_t __primes[] = | ||
1501 | { | ||
1502 | 11, 19, 37, 73, 109, | ||
1503 | 163, 251, 367, 557, 823, | ||
1504 | 1237, 1861, 2777, 4177, 6247, | ||
1505 | 9371, 14057, 21089, 31627, 47431, | ||
1506 | 71143, 106721, 160073, 240101, 360163, | ||
1507 | 540217, 810343, 1215497, 1823231, 2734867, | ||
1508 | 4102283, 6153409, 9230113, 13845163, 20767711, | ||
1509 | 31151543, 46727321, 70090921, 105136301, 157704401, | ||
1510 | 236556601, 354834919, 532252367, 798378509, 1197567719, | ||
1511 | 1796351503 | ||
1512 | }; | ||
1513 | |||
1514 | static const usize_t __nprimes = SIZEOF_ARRAY (__primes); | ||
1515 | #endif | ||
1516 | |||
1517 | static INLINE uint32_t | ||
1518 | xd3_checksum_hash (const xd3_hash_cfg *cfg, const uint32_t cksum) | ||
1519 | { | ||
1520 | #if HASH_PRIME | ||
1521 | /* If the table is prime compute the modulus. */ | ||
1522 | return (cksum % cfg->size); | ||
1523 | #else | ||
1524 | /* If the table is power-of-two compute the mask.*/ | ||
1525 | return (cksum ^ (cksum >> cfg->shift)) & cfg->mask; | ||
1526 | #endif | ||
1527 | } | ||
1528 | |||
1529 | /****************************************************************************************** | ||
1530 | Create the hash table. | ||
1531 | ******************************************************************************************/ | ||
1532 | |||
1533 | static INLINE void | ||
1534 | xd3_swap_uint8p (uint8_t** p1, uint8_t** p2) | ||
1535 | { | ||
1536 | uint8_t *t = (*p1); | ||
1537 | (*p1) = (*p2); | ||
1538 | (*p2) = t; | ||
1539 | } | ||
1540 | |||
1541 | static INLINE void | ||
1542 | xd3_swap_usize_t (usize_t* p1, usize_t* p2) | ||
1543 | { | ||
1544 | usize_t t = (*p1); | ||
1545 | (*p1) = (*p2); | ||
1546 | (*p2) = t; | ||
1547 | } | ||
1548 | |||
1549 | /* It's not constant time, but it computes the log. */ | ||
1550 | static int | ||
1551 | xd3_check_pow2 (usize_t value, usize_t *logof) | ||
1552 | { | ||
1553 | usize_t x = 1; | ||
1554 | usize_t nolog; | ||
1555 | if (logof == NULL) { | ||
1556 | logof = &nolog; | ||
1557 | } | ||
1558 | |||
1559 | *logof = 0; | ||
1560 | |||
1561 | for (; x != 0; x <<= 1, *logof += 1) | ||
1562 | { | ||
1563 | if (x == value) | ||
1564 | { | ||
1565 | return 0; | ||
1566 | } | ||
1567 | } | ||
1568 | |||
1569 | return EINVAL; | ||
1570 | } | ||
1571 | |||
1572 | static usize_t | ||
1573 | xd3_round_blksize (usize_t sz, usize_t blksz) | ||
1574 | { | ||
1575 | usize_t mod = sz & (blksz-1); | ||
1576 | |||
1577 | XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0); | ||
1578 | |||
1579 | return mod ? (sz + (blksz - mod)) : sz; | ||
1580 | } | ||
1581 | |||
1582 | #if XD3_ENCODER | ||
1583 | #if !HASH_PRIME | ||
1584 | static usize_t | ||
1585 | xd3_size_log2 (usize_t slots) | ||
1586 | { | ||
1587 | int bits = 28; /* This should not be an unreasonable limit. */ | ||
1588 | int i; | ||
1589 | |||
1590 | for (i = 3; i <= bits; i += 1) | ||
1591 | { | ||
1592 | if (slots < (1 << i)) | ||
1593 | { | ||
1594 | bits = i-1; | ||
1595 | break; | ||
1596 | } | ||
1597 | } | ||
1598 | |||
1599 | return bits; | ||
1600 | } | ||
1601 | #endif | ||
1602 | |||
1603 | static void | ||
1604 | xd3_size_hashtable (xd3_stream *stream, | ||
1605 | usize_t space, | ||
1606 | xd3_hash_cfg *cfg) | ||
1607 | { | ||
1608 | usize_t slots = space / sizeof (usize_t); | ||
1609 | |||
1610 | /* initialize ctable: the number of hash buckets is computed from the table of primes or | ||
1611 | * the nearest power-of-two, in both cases rounding down in favor of using less | ||
1612 | * memory. */ | ||
1613 | |||
1614 | #if HASH_PRIME | ||
1615 | usize_t i; | ||
1616 | |||
1617 | cfg->size = __primes[__nprimes-1]; | ||
1618 | |||
1619 | for (i = 1; i < __nprimes; i += 1) | ||
1620 | { | ||
1621 | if (slots < __primes[i]) | ||
1622 | { | ||
1623 | cfg->size = __primes[i-1]; | ||
1624 | break; | ||
1625 | } | ||
1626 | } | ||
1627 | #else | ||
1628 | int bits = xd3_size_log2 (slots); | ||
1629 | |||
1630 | cfg->size = (1 << bits); | ||
1631 | cfg->mask = (cfg->size - 1); | ||
1632 | cfg->shift = min (32 - bits, 16); | ||
1633 | #endif | ||
1634 | } | ||
1635 | #endif | ||
1636 | |||
1637 | /****************************************************************************************** | ||
1638 | Cksum function | ||
1639 | ******************************************************************************************/ | ||
1640 | |||
1641 | /* OPT: It turns out that the compiler can't unroll the loop as well as you can by hand. */ | ||
1642 | static INLINE uint32_t | ||
1643 | xd3_lcksum (const uint8_t *seg, const int ln) | ||
1644 | { | ||
1645 | int i = 0; | ||
1646 | uint32_t low = 0; | ||
1647 | uint32_t high = 0; | ||
1648 | |||
1649 | for (; i < ln; i += 1) | ||
1650 | { | ||
1651 | low += PERMUTE(*seg++); | ||
1652 | high += low; | ||
1653 | } | ||
1654 | |||
1655 | return ((high & 0xffff) << 16) | (low & 0xffff); | ||
1656 | } | ||
1657 | |||
1658 | #if ARITH_SMALL_CKSUM | ||
1659 | static INLINE usize_t | ||
1660 | xd3_scksum (const uint8_t *seg, const int ln) | ||
1661 | { | ||
1662 | usize_t c; | ||
1663 | /* The -1 is because UPDATE operates on seg[1..ln] */ | ||
1664 | SMALL_CKSUM_UPDATE (c,(seg-1),ln); | ||
1665 | return c; | ||
1666 | } | ||
1667 | #else | ||
1668 | #define xd3_scksum(seg,ln) xd3_lcksum(seg,ln) | ||
1669 | #endif | ||
1670 | |||
1671 | /****************************************************************************************** | ||
1672 | Adler32 stream function: code copied from Zlib, defined in RFC1950 | ||
1673 | ******************************************************************************************/ | ||
1674 | |||
1675 | #define A32_BASE 65521L /* Largest prime smaller than 2^16 */ | ||
1676 | #define A32_NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ | ||
1677 | |||
1678 | #define A32_DO1(buf,i) {s1 += buf[i]; s2 += s1;} | ||
1679 | #define A32_DO2(buf,i) A32_DO1(buf,i); A32_DO1(buf,i+1); | ||
1680 | #define A32_DO4(buf,i) A32_DO2(buf,i); A32_DO2(buf,i+2); | ||
1681 | #define A32_DO8(buf,i) A32_DO4(buf,i); A32_DO4(buf,i+4); | ||
1682 | #define A32_DO16(buf) A32_DO8(buf,0); A32_DO8(buf,8); | ||
1683 | |||
1684 | static unsigned long adler32 (unsigned long adler, const uint8_t *buf, usize_t len) | ||
1685 | { | ||
1686 | unsigned long s1 = adler & 0xffff; | ||
1687 | unsigned long s2 = (adler >> 16) & 0xffff; | ||
1688 | int k; | ||
1689 | |||
1690 | while (len > 0) | ||
1691 | { | ||
1692 | k = (len < A32_NMAX) ? len : A32_NMAX; | ||
1693 | len -= k; | ||
1694 | |||
1695 | while (k >= 16) | ||
1696 | { | ||
1697 | A32_DO16(buf); | ||
1698 | buf += 16; | ||
1699 | k -= 16; | ||
1700 | } | ||
1701 | |||
1702 | if (k != 0) | ||
1703 | { | ||
1704 | do | ||
1705 | { | ||
1706 | s1 += *buf++; | ||
1707 | s2 += s1; | ||
1708 | } | ||
1709 | while (--k); | ||
1710 | } | ||
1711 | |||
1712 | s1 %= A32_BASE; | ||
1713 | s2 %= A32_BASE; | ||
1714 | } | ||
1715 | |||
1716 | return (s2 << 16) | s1; | ||
1717 | } | ||
1718 | |||
1719 | /****************************************************************************************** | ||
1720 | Run-length function | ||
1721 | ******************************************************************************************/ | ||
1722 | |||
1723 | static INLINE int | ||
1724 | xd3_comprun (const uint8_t *seg, int slook, uint8_t *run_cp) | ||
1725 | { | ||
1726 | int i; | ||
1727 | int run_l = 0; | ||
1728 | uint8_t run_c = 0; | ||
1729 | |||
1730 | for (i = 0; i < slook; i += 1) | ||
1731 | { | ||
1732 | NEXTRUN(seg[i]); | ||
1733 | } | ||
1734 | |||
1735 | (*run_cp) = run_c; | ||
1736 | |||
1737 | return run_l; | ||
1738 | } | ||
1739 | |||
1740 | /****************************************************************************************** | ||
1741 | Basic encoder/decoder functions | ||
1742 | ******************************************************************************************/ | ||
1743 | |||
1744 | static int | ||
1745 | xd3_decode_byte (xd3_stream *stream, uint *val) | ||
1746 | { | ||
1747 | if (stream->avail_in == 0) | ||
1748 | { | ||
1749 | stream->msg = "further input required"; | ||
1750 | return XD3_INPUT; | ||
1751 | } | ||
1752 | |||
1753 | (*val) = stream->next_in[0]; | ||
1754 | |||
1755 | DECODE_INPUT (1); | ||
1756 | return 0; | ||
1757 | } | ||
1758 | |||
1759 | static int | ||
1760 | xd3_decode_bytes (xd3_stream *stream, uint8_t *buf, usize_t *pos, usize_t size) | ||
1761 | { | ||
1762 | usize_t want; | ||
1763 | usize_t take; | ||
1764 | |||
1765 | /* Note: The case where (*pos == size) happens when a zero-length appheader or code | ||
1766 | * table is transmitted, but there is nothing in the standard against that. */ | ||
1767 | |||
1768 | while (*pos < size) | ||
1769 | { | ||
1770 | if (stream->avail_in == 0) | ||
1771 | { | ||
1772 | stream->msg = "further input required"; | ||
1773 | return XD3_INPUT; | ||
1774 | } | ||
1775 | |||
1776 | want = size - *pos; | ||
1777 | take = min (want, stream->avail_in); | ||
1778 | |||
1779 | memcpy (buf + *pos, stream->next_in, take); | ||
1780 | |||
1781 | DECODE_INPUT (take); | ||
1782 | (*pos) += take; | ||
1783 | } | ||
1784 | |||
1785 | return 0; | ||
1786 | } | ||
1787 | |||
1788 | #if XD3_ENCODER | ||
1789 | static int | ||
1790 | xd3_emit_byte (xd3_stream *stream, | ||
1791 | xd3_output **outputp, | ||
1792 | uint8_t code) | ||
1793 | { | ||
1794 | xd3_output *output = (*outputp); | ||
1795 | |||
1796 | if (output->next == output->avail) | ||
1797 | { | ||
1798 | xd3_output *aoutput; | ||
1799 | |||
1800 | if ((aoutput = xd3_alloc_output (stream, output)) == NULL) | ||
1801 | { | ||
1802 | return ENOMEM; | ||
1803 | } | ||
1804 | |||
1805 | output = (*outputp) = aoutput; | ||
1806 | } | ||
1807 | |||
1808 | output->base[output->next++] = code; | ||
1809 | |||
1810 | return 0; | ||
1811 | } | ||
1812 | |||
1813 | static int | ||
1814 | xd3_emit_bytes (xd3_stream *stream, | ||
1815 | xd3_output **outputp, | ||
1816 | const uint8_t *base, | ||
1817 | usize_t size) | ||
1818 | { | ||
1819 | xd3_output *output = (*outputp); | ||
1820 | |||
1821 | do | ||
1822 | { | ||
1823 | usize_t take; | ||
1824 | |||
1825 | if (output->next == output->avail) | ||
1826 | { | ||
1827 | xd3_output *aoutput; | ||
1828 | |||
1829 | if ((aoutput = xd3_alloc_output (stream, output)) == NULL) | ||
1830 | { | ||
1831 | return ENOMEM; | ||
1832 | } | ||
1833 | |||
1834 | output = (*outputp) = aoutput; | ||
1835 | } | ||
1836 | |||
1837 | take = min (output->avail - output->next, size); | ||
1838 | |||
1839 | memcpy (output->base + output->next, base, take); | ||
1840 | |||
1841 | output->next += take; | ||
1842 | size -= take; | ||
1843 | base += take; | ||
1844 | } | ||
1845 | while (size > 0); | ||
1846 | |||
1847 | return 0; | ||
1848 | } | ||
1849 | #endif /* XD3_ENCODER */ | ||
1850 | |||
1851 | /****************************************************************************************** | ||
1852 | Integer encoder/decoder functions | ||
1853 | ******************************************************************************************/ | ||
1854 | |||
1855 | #define DECODE_INTEGER_TYPE(PART,OFLOW) \ | ||
1856 | while (stream->avail_in != 0) \ | ||
1857 | { \ | ||
1858 | uint next = stream->next_in[0]; \ | ||
1859 | \ | ||
1860 | DECODE_INPUT(1); \ | ||
1861 | \ | ||
1862 | if (PART & OFLOW) \ | ||
1863 | { \ | ||
1864 | stream->msg = "overflow in decode_integer"; \ | ||
1865 | return EINVAL; \ | ||
1866 | } \ | ||
1867 | \ | ||
1868 | PART = (PART << 7) | (next & 127); \ | ||
1869 | \ | ||
1870 | if ((next & 128) == 0) \ | ||
1871 | { \ | ||
1872 | (*val) = PART; \ | ||
1873 | PART = 0; \ | ||
1874 | return 0; \ | ||
1875 | } \ | ||
1876 | } \ | ||
1877 | \ | ||
1878 | stream->msg = "further input required"; \ | ||
1879 | return XD3_INPUT | ||
1880 | |||
1881 | #define READ_INTEGER_TYPE(TYPE, OFLOW) \ | ||
1882 | TYPE val = 0; \ | ||
1883 | const uint8_t *inp = (*inpp); \ | ||
1884 | uint next; \ | ||
1885 | \ | ||
1886 | do \ | ||
1887 | { \ | ||
1888 | if (inp == max) \ | ||
1889 | { \ | ||
1890 | stream->msg = "end-of-input in read_integer"; \ | ||
1891 | return EINVAL; \ | ||
1892 | } \ | ||
1893 | \ | ||
1894 | if (val & OFLOW) \ | ||
1895 | { \ | ||
1896 | stream->msg = "overflow in read_intger"; \ | ||
1897 | return EINVAL; \ | ||
1898 | } \ | ||
1899 | \ | ||
1900 | next = (*inp++); \ | ||
1901 | val = (val << 7) | (next & 127); \ | ||
1902 | } \ | ||
1903 | while (next & 128); \ | ||
1904 | \ | ||
1905 | (*valp) = val; \ | ||
1906 | (*inpp) = inp; \ | ||
1907 | \ | ||
1908 | return 0 | ||
1909 | |||
1910 | #define EMIT_INTEGER_TYPE() \ | ||
1911 | /* max 64-bit value in base-7 encoding is 9.1 bytes */ \ | ||
1912 | uint8_t buf[10]; \ | ||
1913 | usize_t bufi = 10; \ | ||
1914 | \ | ||
1915 | XD3_ASSERT (num >= 0); \ | ||
1916 | \ | ||
1917 | /* This loop performs division and turns on all MSBs. */ \ | ||
1918 | do \ | ||
1919 | { \ | ||
1920 | buf[--bufi] = (num & 127) | 128; \ | ||
1921 | num >>= 7; \ | ||
1922 | } \ | ||
1923 | while (num != 0); \ | ||
1924 | \ | ||
1925 | /* Turn off MSB of the last byte. */ \ | ||
1926 | buf[9] &= 127; \ | ||
1927 | \ | ||
1928 | XD3_ASSERT (bufi >= 0); \ | ||
1929 | \ | ||
1930 | return xd3_emit_bytes (stream, output, buf + bufi, 10 - bufi) | ||
1931 | |||
1932 | #define IF_SIZEOF32(x) if (num < (1U << (7 * (x)))) return (x); | ||
1933 | #define IF_SIZEOF64(x) if (num < (1ULL << (7 * (x)))) return (x); | ||
1934 | |||
1935 | #if USE_UINT32 | ||
1936 | static uint | ||
1937 | xd3_sizeof_uint32_t (uint32_t num) | ||
1938 | { | ||
1939 | IF_SIZEOF32(1); | ||
1940 | IF_SIZEOF32(2); | ||
1941 | IF_SIZEOF32(3); | ||
1942 | IF_SIZEOF32(4); | ||
1943 | |||
1944 | return 5; | ||
1945 | } | ||
1946 | |||
1947 | static int | ||
1948 | xd3_decode_uint32_t (xd3_stream *stream, uint32_t *val) | ||
1949 | { DECODE_INTEGER_TYPE (stream->dec_32part, UINT32_OFLOW_MASK); } | ||
1950 | static int | ||
1951 | xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, const uint8_t *max, uint32_t *valp) | ||
1952 | { READ_INTEGER_TYPE (uint32_t, UINT32_OFLOW_MASK); } | ||
1953 | #if XD3_ENCODER | ||
1954 | static int | ||
1955 | xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num) | ||
1956 | { EMIT_INTEGER_TYPE (); } | ||
1957 | #endif | ||
1958 | #endif | ||
1959 | |||
1960 | #if USE_UINT64 | ||
1961 | /* We only ever decode offsets, but the other three are part of the regression test | ||
1962 | * anyway. */ | ||
1963 | static int | ||
1964 | xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val) | ||
1965 | { DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); } | ||
1966 | #if REGRESSION_TEST | ||
1967 | #if XD3_ENCODER | ||
1968 | static int | ||
1969 | xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num) | ||
1970 | { EMIT_INTEGER_TYPE (); } | ||
1971 | #endif | ||
1972 | static int | ||
1973 | xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp, const uint8_t *max, uint64_t *valp) | ||
1974 | { READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); } | ||
1975 | |||
1976 | static uint | ||
1977 | xd3_sizeof_uint64_t (uint64_t num) | ||
1978 | { | ||
1979 | IF_SIZEOF64(1); | ||
1980 | IF_SIZEOF64(2); | ||
1981 | IF_SIZEOF64(3); | ||
1982 | IF_SIZEOF64(4); | ||
1983 | IF_SIZEOF64(5); | ||
1984 | IF_SIZEOF64(6); | ||
1985 | IF_SIZEOF64(7); | ||
1986 | IF_SIZEOF64(8); | ||
1987 | IF_SIZEOF64(9); | ||
1988 | |||
1989 | return 10; | ||
1990 | } | ||
1991 | #endif | ||
1992 | #endif | ||
1993 | |||
1994 | /****************************************************************************************** | ||
1995 | Debug instruction statistics | ||
1996 | ******************************************************************************************/ | ||
1997 | |||
1998 | #if XD3_DEBUG | ||
1999 | static void | ||
2000 | xd3_count_inst (xd3_stream *stream, uint code) | ||
2001 | { | ||
2002 | IF_DEBUG1 ({ | ||
2003 | if (stream->i_freqs == NULL && | ||
2004 | (stream->i_freqs = xd3_alloc0 (stream, sizeof (stream->i_freqs[0]), 256)) == NULL) { abort (); } | ||
2005 | |||
2006 | stream->i_freqs[code] += 1; | ||
2007 | }); | ||
2008 | stream->n_ibytes += 1; | ||
2009 | } | ||
2010 | |||
2011 | static void | ||
2012 | xd3_count_mode (xd3_stream *stream, uint mode) | ||
2013 | { | ||
2014 | IF_DEBUG1 ({ | ||
2015 | if (stream->i_modes == NULL && | ||
2016 | (stream->i_modes = xd3_alloc0 (stream, sizeof (stream->i_modes[0]), TOTAL_MODES (stream))) == NULL) { abort (); } | ||
2017 | stream->i_modes[mode] += 1; | ||
2018 | }); | ||
2019 | } | ||
2020 | |||
2021 | static void | ||
2022 | xd3_count_size (xd3_stream *stream, usize_t size) | ||
2023 | { | ||
2024 | IF_DEBUG1({ | ||
2025 | if (stream->i_sizes == NULL && | ||
2026 | (stream->i_sizes = xd3_alloc0 (stream, sizeof (stream->i_sizes[0]), 64)) == NULL) { abort (); } | ||
2027 | |||
2028 | if (size < 64) { stream->i_sizes[size] += 1; } | ||
2029 | }); | ||
2030 | stream->n_sbytes += xd3_sizeof_size (size); | ||
2031 | } | ||
2032 | #endif | ||
2033 | |||
2034 | /****************************************************************************************** | ||
2035 | Address cache stuff | ||
2036 | ******************************************************************************************/ | ||
2037 | |||
2038 | static int | ||
2039 | xd3_alloc_cache (xd3_stream *stream) | ||
2040 | { | ||
2041 | if (((stream->acache.s_near > 0) && | ||
2042 | (stream->acache.near_array = xd3_alloc (stream, stream->acache.s_near, sizeof (usize_t))) == NULL) || | ||
2043 | ((stream->acache.s_same > 0) && | ||
2044 | (stream->acache.same_array = xd3_alloc (stream, stream->acache.s_same * 256, sizeof (usize_t))) == NULL)) | ||
2045 | { | ||
2046 | return ENOMEM; | ||
2047 | } | ||
2048 | |||
2049 | return 0; | ||
2050 | } | ||
2051 | |||
2052 | static void | ||
2053 | xd3_init_cache (xd3_addr_cache* acache) | ||
2054 | { | ||
2055 | if (acache->s_near > 0) | ||
2056 | { | ||
2057 | memset (acache->near_array, 0, acache->s_near * sizeof (usize_t)); | ||
2058 | acache->next_slot = 0; | ||
2059 | } | ||
2060 | |||
2061 | if (acache->s_same > 0) | ||
2062 | { | ||
2063 | memset (acache->same_array, 0, acache->s_same * 256 * sizeof (usize_t)); | ||
2064 | } | ||
2065 | } | ||
2066 | |||
2067 | static void | ||
2068 | xd3_update_cache (xd3_addr_cache* acache, usize_t addr) | ||
2069 | { | ||
2070 | if (acache->s_near > 0) | ||
2071 | { | ||
2072 | acache->near_array[acache->next_slot] = addr; | ||
2073 | acache->next_slot = (acache->next_slot + 1) % acache->s_near; | ||
2074 | } | ||
2075 | |||
2076 | if (acache->s_same > 0) | ||
2077 | { | ||
2078 | acache->same_array[addr % (acache->s_same*256)] = addr; | ||
2079 | } | ||
2080 | } | ||
2081 | |||
2082 | #if XD3_ENCODER | ||
2083 | /* OPT: this gets called a lot, can it be optimized? */ | ||
2084 | static int | ||
2085 | xd3_encode_address (xd3_stream *stream, usize_t addr, usize_t here, uint8_t* mode) | ||
2086 | { | ||
2087 | usize_t d, bestd; | ||
2088 | int i, bestm, ret; | ||
2089 | xd3_addr_cache* acache = & stream->acache; | ||
2090 | |||
2091 | #define SMALLEST_INT(x) do { if (((x) & ~127) == 0) { goto good; } } while (0) | ||
2092 | |||
2093 | /* Attempt to find the address mode that yields the smallest integer value for "d", the | ||
2094 | * encoded address value, thereby minimizing the encoded size of the address. */ | ||
2095 | bestd = addr; | ||
2096 | bestm = VCD_SELF; | ||
2097 | |||
2098 | XD3_ASSERT (addr < here); | ||
2099 | |||
2100 | SMALLEST_INT (bestd); | ||
2101 | |||
2102 | if ((d = here-addr) < bestd) | ||
2103 | { | ||
2104 | bestd = d; | ||
2105 | bestm = VCD_HERE; | ||
2106 | |||
2107 | SMALLEST_INT (bestd); | ||
2108 | } | ||
2109 | |||
2110 | for (i = 0; i < acache->s_near; i += 1) | ||
2111 | { | ||
2112 | d = addr - acache->near_array[i]; | ||
2113 | |||
2114 | if (d >= 0 && d < bestd) | ||
2115 | { | ||
2116 | bestd = d; | ||
2117 | bestm = i+2; /* 2 counts the VCD_SELF, VCD_HERE modes */ | ||
2118 | |||
2119 | SMALLEST_INT (bestd); | ||
2120 | } | ||
2121 | } | ||
2122 | |||
2123 | if (acache->s_same > 0 && acache->same_array[d = addr%(acache->s_same*256)] == addr) | ||
2124 | { | ||
2125 | bestd = d%256; | ||
2126 | bestm = acache->s_near + 2 + d/256; /* 2 + s_near offsets past the VCD_NEAR modes */ | ||
2127 | |||
2128 | if ((ret = xd3_emit_byte (stream, & ADDR_TAIL (stream), bestd))) { return ret; } | ||
2129 | } | ||
2130 | else | ||
2131 | { | ||
2132 | good: | ||
2133 | |||
2134 | if ((ret = xd3_emit_size (stream, & ADDR_TAIL (stream), bestd))) { return ret; } | ||
2135 | } | ||
2136 | |||
2137 | xd3_update_cache (acache, addr); | ||
2138 | |||
2139 | IF_DEBUG (xd3_count_mode (stream, bestm)); | ||
2140 | |||
2141 | (*mode) += bestm; | ||
2142 | |||
2143 | return 0; | ||
2144 | } | ||
2145 | #endif | ||
2146 | |||
2147 | static int | ||
2148 | xd3_decode_address (xd3_stream *stream, usize_t here, uint mode, const uint8_t **inpp, const uint8_t *max, uint32_t *valp) | ||
2149 | { | ||
2150 | int ret; | ||
2151 | uint same_start = 2 + stream->acache.s_near; | ||
2152 | |||
2153 | if (mode < same_start) | ||
2154 | { | ||
2155 | if ((ret = xd3_read_size (stream, inpp, max, valp))) { return ret; } | ||
2156 | |||
2157 | switch (mode) | ||
2158 | { | ||
2159 | case VCD_SELF: | ||
2160 | break; | ||
2161 | case VCD_HERE: | ||
2162 | (*valp) = here - (*valp); | ||
2163 | break; | ||
2164 | default: | ||
2165 | (*valp) += stream->acache.near_array[mode - 2]; | ||
2166 | break; | ||
2167 | } | ||
2168 | } | ||
2169 | else | ||
2170 | { | ||
2171 | if (*inpp == max) | ||
2172 | { | ||
2173 | stream->msg = "address underflow"; | ||
2174 | return EINVAL; | ||
2175 | } | ||
2176 | |||
2177 | mode -= same_start; | ||
2178 | |||
2179 | (*valp) = stream->acache.same_array[mode*256 + (**inpp)]; | ||
2180 | |||
2181 | (*inpp) += 1; | ||
2182 | } | ||
2183 | |||
2184 | xd3_update_cache (& stream->acache, *valp); | ||
2185 | |||
2186 | return 0; | ||
2187 | } | ||
2188 | |||
2189 | /****************************************************************************************** | ||
2190 | Alloc/free | ||
2191 | ******************************************************************************************/ | ||
2192 | |||
2193 | static void* | ||
2194 | __xd3_alloc_func (void* opaque, usize_t items, usize_t size) | ||
2195 | { | ||
2196 | return malloc (items * size); | ||
2197 | } | ||
2198 | |||
2199 | static void | ||
2200 | __xd3_free_func (void* opaque, void* address) | ||
2201 | { | ||
2202 | free (address); | ||
2203 | } | ||
2204 | |||
2205 | static void* | ||
2206 | xd3_alloc (xd3_stream *stream, | ||
2207 | usize_t elts, | ||
2208 | usize_t size) | ||
2209 | { | ||
2210 | void *a = stream->alloc (stream->opaque, elts, size); | ||
2211 | |||
2212 | if (a != NULL) | ||
2213 | { | ||
2214 | IF_DEBUG (stream->alloc_cnt += 1); | ||
2215 | } | ||
2216 | else | ||
2217 | { | ||
2218 | stream->msg = "out of memory"; | ||
2219 | } | ||
2220 | |||
2221 | return a; | ||
2222 | } | ||
2223 | |||
2224 | static void | ||
2225 | xd3_free (xd3_stream *stream, | ||
2226 | void *ptr) | ||
2227 | { | ||
2228 | if (ptr != NULL) | ||
2229 | { | ||
2230 | IF_DEBUG (stream->free_cnt += 1); | ||
2231 | XD3_ASSERT (stream->free_cnt <= stream->alloc_cnt); | ||
2232 | stream->free (stream->opaque, ptr); | ||
2233 | } | ||
2234 | } | ||
2235 | |||
2236 | #if XD3_ENCODER | ||
2237 | static void* | ||
2238 | xd3_alloc0 (xd3_stream *stream, | ||
2239 | usize_t elts, | ||
2240 | usize_t size) | ||
2241 | { | ||
2242 | void *a = xd3_alloc (stream, elts, size); | ||
2243 | |||
2244 | if (a != NULL) | ||
2245 | { | ||
2246 | memset (a, 0, elts * size); | ||
2247 | } | ||
2248 | |||
2249 | return a; | ||
2250 | } | ||
2251 | |||
2252 | static xd3_output* | ||
2253 | xd3_alloc_output (xd3_stream *stream, | ||
2254 | xd3_output *old_output) | ||
2255 | { | ||
2256 | xd3_output *output; | ||
2257 | uint8_t *base; | ||
2258 | |||
2259 | if (stream->enc_free != NULL) | ||
2260 | { | ||
2261 | output = stream->enc_free; | ||
2262 | stream->enc_free = output->next_page; | ||
2263 | } | ||
2264 | else | ||
2265 | { | ||
2266 | if ((output = xd3_alloc (stream, 1, sizeof (xd3_output))) == NULL) | ||
2267 | { | ||
2268 | return NULL; | ||
2269 | } | ||
2270 | |||
2271 | if ((base = xd3_alloc (stream, XD3_ALLOCSIZE, sizeof (uint8_t))) == NULL) | ||
2272 | { | ||
2273 | xd3_free (stream, output); | ||
2274 | return NULL; | ||
2275 | } | ||
2276 | |||
2277 | output->base = base; | ||
2278 | output->avail = XD3_ALLOCSIZE; | ||
2279 | } | ||
2280 | |||
2281 | output->next = 0; | ||
2282 | |||
2283 | if (old_output) | ||
2284 | { | ||
2285 | old_output->next_page = output; | ||
2286 | } | ||
2287 | |||
2288 | output->next_page = NULL; | ||
2289 | |||
2290 | return output; | ||
2291 | } | ||
2292 | |||
2293 | static usize_t | ||
2294 | xd3_sizeof_output (xd3_output *output) | ||
2295 | { | ||
2296 | usize_t s = 0; | ||
2297 | |||
2298 | for (; output; output = output->next_page) | ||
2299 | { | ||
2300 | s += output->next; | ||
2301 | } | ||
2302 | |||
2303 | return s; | ||
2304 | } | ||
2305 | |||
2306 | static void | ||
2307 | xd3_freelist_output (xd3_stream *stream, | ||
2308 | xd3_output *output) | ||
2309 | { | ||
2310 | xd3_output *tmp; | ||
2311 | |||
2312 | while (output) | ||
2313 | { | ||
2314 | tmp = output; | ||
2315 | output = output->next_page; | ||
2316 | |||
2317 | tmp->next = 0; | ||
2318 | tmp->next_page = stream->enc_free; | ||
2319 | stream->enc_free = tmp; | ||
2320 | } | ||
2321 | } | ||
2322 | |||
2323 | static void | ||
2324 | xd3_free_output (xd3_stream *stream, | ||
2325 | xd3_output *output) | ||
2326 | { | ||
2327 | xd3_output *next; | ||
2328 | |||
2329 | again: | ||
2330 | if (output == NULL) | ||
2331 | { | ||
2332 | return; | ||
2333 | } | ||
2334 | |||
2335 | next = output->next_page; | ||
2336 | |||
2337 | xd3_free (stream, output->base); | ||
2338 | xd3_free (stream, output); | ||
2339 | |||
2340 | output = next; | ||
2341 | goto again; | ||
2342 | } | ||
2343 | #endif /* XD3_ENCODER */ | ||
2344 | |||
2345 | void | ||
2346 | xd3_free_stream (xd3_stream *stream) | ||
2347 | { | ||
2348 | |||
2349 | xd3_free (stream, stream->large_table); | ||
2350 | xd3_free (stream, stream->small_table); | ||
2351 | xd3_free (stream, stream->small_prev); | ||
2352 | xd3_free (stream, stream->iopt.buffer); | ||
2353 | |||
2354 | #if XD3_ENCODER | ||
2355 | { | ||
2356 | int i; | ||
2357 | for (i = 0; i < ENC_SECTS; i += 1) | ||
2358 | { | ||
2359 | xd3_free_output (stream, stream->enc_heads[i]); | ||
2360 | } | ||
2361 | xd3_free_output (stream, stream->enc_free); | ||
2362 | } | ||
2363 | #endif | ||
2364 | |||
2365 | xd3_free (stream, stream->acache.near_array); | ||
2366 | xd3_free (stream, stream->acache.same_array); | ||
2367 | |||
2368 | xd3_free (stream, stream->inst_sect.copied1); | ||
2369 | xd3_free (stream, stream->addr_sect.copied1); | ||
2370 | xd3_free (stream, stream->data_sect.copied1); | ||
2371 | |||
2372 | xd3_free (stream, stream->dec_buffer); | ||
2373 | xd3_free (stream, (uint8_t*) stream->dec_lastwin); | ||
2374 | |||
2375 | xd3_free (stream, stream->buf_in); | ||
2376 | xd3_free (stream, stream->dec_appheader); | ||
2377 | xd3_free (stream, stream->dec_codetbl); | ||
2378 | xd3_free (stream, stream->code_table_alloc); | ||
2379 | |||
2380 | #if SECONDARY_ANY | ||
2381 | xd3_free (stream, stream->inst_sect.copied2); | ||
2382 | xd3_free (stream, stream->addr_sect.copied2); | ||
2383 | xd3_free (stream, stream->data_sect.copied2); | ||
2384 | |||
2385 | if (stream->sec_type != NULL) | ||
2386 | { | ||
2387 | stream->sec_type->destroy (stream, stream->sec_stream_d); | ||
2388 | stream->sec_type->destroy (stream, stream->sec_stream_i); | ||
2389 | stream->sec_type->destroy (stream, stream->sec_stream_a); | ||
2390 | } | ||
2391 | #endif | ||
2392 | |||
2393 | IF_DEBUG (xd3_free (stream, stream->i_freqs)); | ||
2394 | IF_DEBUG (xd3_free (stream, stream->i_modes)); | ||
2395 | IF_DEBUG (xd3_free (stream, stream->i_sizes)); | ||
2396 | |||
2397 | XD3_ASSERT (stream->alloc_cnt == stream->free_cnt); | ||
2398 | |||
2399 | memset (stream, 0, sizeof (xd3_stream)); | ||
2400 | } | ||
2401 | |||
2402 | #if (XD3_DEBUG || VCDIFF_TOOLS) | ||
2403 | static const char* | ||
2404 | xd3_rtype_to_string (xd3_rtype type, int print_mode) | ||
2405 | { | ||
2406 | switch (type) | ||
2407 | { | ||
2408 | case XD3_NOOP: | ||
2409 | return "NOOP "; | ||
2410 | case XD3_RUN: | ||
2411 | return "RUN "; | ||
2412 | case XD3_ADD: | ||
2413 | return "ADD "; | ||
2414 | default: break; | ||
2415 | } | ||
2416 | if (! print_mode) | ||
2417 | { | ||
2418 | return "CPY "; | ||
2419 | } | ||
2420 | switch (type) | ||
2421 | { | ||
2422 | case XD3_CPY + 0: return "CPY_0"; | ||
2423 | case XD3_CPY + 1: return "CPY_1"; | ||
2424 | case XD3_CPY + 2: return "CPY_2"; | ||
2425 | case XD3_CPY + 3: return "CPY_3"; | ||
2426 | case XD3_CPY + 4: return "CPY_4"; | ||
2427 | case XD3_CPY + 5: return "CPY_5"; | ||
2428 | case XD3_CPY + 6: return "CPY_6"; | ||
2429 | case XD3_CPY + 7: return "CPY_7"; | ||
2430 | case XD3_CPY + 8: return "CPY_8"; | ||
2431 | case XD3_CPY + 9: return "CPY_9"; | ||
2432 | default: return "CPY>9"; | ||
2433 | } | ||
2434 | } | ||
2435 | #endif | ||
2436 | |||
2437 | /****************************************************************************************** | ||
2438 | Stream configuration | ||
2439 | ******************************************************************************************/ | ||
2440 | |||
2441 | int | ||
2442 | xd3_config_stream(xd3_stream *stream, | ||
2443 | xd3_config *config) | ||
2444 | { | ||
2445 | int ret; | ||
2446 | xd3_config defcfg; | ||
2447 | const xd3_smatcher* smatcher; | ||
2448 | |||
2449 | if (config == NULL) | ||
2450 | { | ||
2451 | config = & defcfg; | ||
2452 | memset (config, 0, sizeof (*config)); | ||
2453 | } | ||
2454 | |||
2455 | /* Initial setup: no error checks yet */ | ||
2456 | memset (stream, 0, sizeof (*stream)); | ||
2457 | |||
2458 | stream->memsize = config->memsize ? config->memsize : XD3_DEFAULT_MEMSIZE; | ||
2459 | stream->winsize = config->winsize ? config->winsize : XD3_DEFAULT_WINSIZE; | ||
2460 | stream->sprevsz = config->sprevsz ? config->sprevsz : XD3_DEFAULT_SPREVSZ; | ||
2461 | stream->srcwin_size = config->srcwin_size ? config->srcwin_size : XD3_DEFAULT_START_CKSUM_ADVANCE; | ||
2462 | stream->srcwin_maxsz = config->srcwin_maxsz ? config->srcwin_maxsz : XD3_DEFAULT_MAX_CKSUM_ADVANCE; | ||
2463 | stream->iopt_size = config->iopt_size ? config->iopt_size : XD3_DEFAULT_IOPT_SIZE; | ||
2464 | stream->getblk = config->getblk; | ||
2465 | stream->alloc = config->alloc ? config->alloc : __xd3_alloc_func; | ||
2466 | stream->free = config->freef ? config->freef : __xd3_free_func; | ||
2467 | stream->opaque = config->opaque; | ||
2468 | stream->flags = config->flags; | ||
2469 | |||
2470 | XD3_ASSERT (stream->winsize > 0); | ||
2471 | |||
2472 | /* Secondary setup. */ | ||
2473 | stream->sec_data = config->sec_data; | ||
2474 | stream->sec_inst = config->sec_inst; | ||
2475 | stream->sec_addr = config->sec_addr; | ||
2476 | |||
2477 | stream->sec_data.data_type = DATA_SECTION; | ||
2478 | stream->sec_inst.data_type = INST_SECTION; | ||
2479 | stream->sec_addr.data_type = ADDR_SECTION; | ||
2480 | |||
2481 | /* Check static sizes. */ | ||
2482 | if (sizeof (usize_t) != SIZEOF_USIZE_T || | ||
2483 | sizeof (xoff_t) != SIZEOF_XOFF_T || | ||
2484 | (ret = xd3_check_pow2(XD3_ALLOCSIZE, NULL))) | ||
2485 | { | ||
2486 | stream->msg = "incorrect compilation: wrong integer sizes"; | ||
2487 | return EINVAL; | ||
2488 | } | ||
2489 | |||
2490 | /* Check/set secondary compressor. */ | ||
2491 | switch (stream->flags & XD3_SEC_TYPE) | ||
2492 | { | ||
2493 | case 0: | ||
2494 | if (stream->flags & XD3_SEC_OTHER) | ||
2495 | { | ||
2496 | stream->msg = "XD3_SEC flags require a secondary compressor type"; | ||
2497 | return EINVAL; | ||
2498 | } | ||
2499 | break; | ||
2500 | case XD3_SEC_FGK: | ||
2501 | FGK_CASE (stream); | ||
2502 | case XD3_SEC_DJW: | ||
2503 | DJW_CASE (stream); | ||
2504 | default: | ||
2505 | stream->msg = "too many secondary compressor types set"; | ||
2506 | return EINVAL; | ||
2507 | } | ||
2508 | |||
2509 | /* Check/set encoder code table. */ | ||
2510 | switch (stream->flags & XD3_ALT_CODE_TABLE) { | ||
2511 | case 0: | ||
2512 | stream->code_table_desc = & __rfc3284_code_table_desc; | ||
2513 | stream->code_table_func = xd3_rfc3284_code_table; | ||
2514 | break; | ||
2515 | #if GENERIC_ENCODE_TABLES | ||
2516 | case XD3_ALT_CODE_TABLE: | ||
2517 | stream->code_table_desc = & __alternate_code_table_desc; | ||
2518 | stream->code_table_func = xd3_alternate_code_table; | ||
2519 | stream->comp_table_func = xd3_compute_alternate_table_encoding; | ||
2520 | break; | ||
2521 | #endif | ||
2522 | default: | ||
2523 | stream->msg = "alternate code table support was not compiled"; | ||
2524 | return EINVAL; | ||
2525 | } | ||
2526 | |||
2527 | /* Check sprevsz */ | ||
2528 | if (config->small_chain == 1) | ||
2529 | { | ||
2530 | stream->sprevsz = 0; | ||
2531 | } | ||
2532 | else | ||
2533 | { | ||
2534 | if ((ret = xd3_check_pow2 (stream->sprevsz, NULL))) | ||
2535 | { | ||
2536 | stream->msg = "sprevsz is required to be a power of two"; | ||
2537 | return EINVAL; | ||
2538 | } | ||
2539 | |||
2540 | stream->sprevmask = stream->sprevsz - 1; | ||
2541 | } | ||
2542 | |||
2543 | /* Default scanner settings. */ | ||
2544 | switch (config->smatch_cfg) | ||
2545 | { | ||
2546 | IF_BUILD_SOFT(case XD3_SMATCH_SOFT: | ||
2547 | smatcher = & __smatcher_soft; break; | ||
2548 | |||
2549 | if (config->large_look < MIN_MATCH || | ||
2550 | config->large_step < 1 || | ||
2551 | config->small_look < MIN_MATCH || | ||
2552 | config->small_chain < 1 || | ||
2553 | config->large_look < config->small_look || | ||
2554 | config->small_chain < config->small_lchain || | ||
2555 | (config->small_lchain == 0 && config->try_lazy) || | ||
2556 | config->srcwin_size < stream->large_look || | ||
2557 | config->srcwin_maxsz < stream->srcwin_size) | ||
2558 | { | ||
2559 | stream->msg = "invalid soft string-match config"; | ||
2560 | return EINVAL; | ||
2561 | } | ||
2562 | break;) | ||
2563 | |||
2564 | IF_BUILD_SLOW(case XD3_SMATCH_DEFAULT:) | ||
2565 | IF_BUILD_SLOW(case XD3_SMATCH_SLOW: smatcher = & __smatcher_slow; break;) | ||
2566 | IF_BUILD_FAST(case XD3_SMATCH_FAST: smatcher = & __smatcher_fast; break;) | ||
2567 | default: | ||
2568 | stream->msg = "invalid string match config type"; | ||
2569 | return EINVAL; | ||
2570 | } | ||
2571 | |||
2572 | stream->string_match = smatcher->string_match; | ||
2573 | XD3_ASSERT(stream->string_match); | ||
2574 | |||
2575 | XD3_COPY_CONFIG_FIELDS (stream, smatcher); | ||
2576 | |||
2577 | /* If it is a soft config, the smatcher fields didn't set anything, copy from config | ||
2578 | * instead. */ | ||
2579 | if (stream->large_look == 0) | ||
2580 | { | ||
2581 | XD3_COPY_CONFIG_FIELDS (stream, config); | ||
2582 | } | ||
2583 | |||
2584 | IF_DEBUG1 (P(RINT "[stream cfg] llook %u lstep %u slook %u\n", | ||
2585 | stream->large_look, stream->large_step, stream->small_look)); | ||
2586 | return 0; | ||
2587 | } | ||
2588 | |||
2589 | /****************************************************************************************** | ||
2590 | Getblk interface | ||
2591 | ******************************************************************************************/ | ||
2592 | |||
2593 | /* This function interfaces with the client getblk function, checks its results, etc. */ | ||
2594 | static int | ||
2595 | xd3_getblk (xd3_stream *stream/*, xd3_source *source*/, xoff_t blkno) | ||
2596 | { | ||
2597 | int ret; | ||
2598 | xd3_source *source = stream->src; | ||
2599 | |||
2600 | if (blkno >= source->blocks) | ||
2601 | { | ||
2602 | stream->msg = "source file too short"; | ||
2603 | return EINVAL; | ||
2604 | } | ||
2605 | |||
2606 | if (blkno != source->curblkno || source->curblk == NULL) | ||
2607 | { | ||
2608 | XD3_ASSERT (source->curblk != NULL || blkno != source->curblkno); | ||
2609 | |||
2610 | source->getblkno = blkno; | ||
2611 | |||
2612 | if (stream->getblk == NULL) | ||
2613 | { | ||
2614 | stream->msg = "getblk source input"; | ||
2615 | return XD3_GETSRCBLK; | ||
2616 | } | ||
2617 | else if ((ret = stream->getblk (stream, source, blkno)) != 0) | ||
2618 | { | ||
2619 | stream->msg = "getblk failed"; | ||
2620 | return ret; | ||
2621 | } | ||
2622 | |||
2623 | XD3_ASSERT (source->curblk != NULL); | ||
2624 | } | ||
2625 | |||
2626 | if (source->onblk != xd3_bytes_on_srcblk (source, blkno)) | ||
2627 | { | ||
2628 | stream->msg = "getblk returned short block"; | ||
2629 | return EINVAL; | ||
2630 | } | ||
2631 | |||
2632 | return 0; | ||
2633 | } | ||
2634 | |||
2635 | /****************************************************************************************** | ||
2636 | Stream open/close | ||
2637 | ******************************************************************************************/ | ||
2638 | |||
2639 | int | ||
2640 | xd3_set_source (xd3_stream *stream, | ||
2641 | xd3_source *src) | ||
2642 | { | ||
2643 | xoff_t blk_num; | ||
2644 | xoff_t tail_size; | ||
2645 | |||
2646 | IF_DEBUG1 (P(RINT "[set source] size %"Q"u\n", src->size)); | ||
2647 | |||
2648 | if (src == NULL || src->size < stream->large_look) { return 0; } | ||
2649 | |||
2650 | stream->src = src; | ||
2651 | blk_num = src->size / src->blksize; | ||
2652 | tail_size = src->size % src->blksize; | ||
2653 | src->blocks = blk_num + (tail_size > 0); | ||
2654 | src->srclen = 0; | ||
2655 | src->srcbase = 0; | ||
2656 | |||
2657 | return 0; | ||
2658 | } | ||
2659 | |||
2660 | void | ||
2661 | xd3_abort_stream (xd3_stream *stream) | ||
2662 | { | ||
2663 | stream->dec_state = DEC_ABORTED; | ||
2664 | stream->enc_state = ENC_ABORTED; | ||
2665 | } | ||
2666 | |||
2667 | int | ||
2668 | xd3_close_stream (xd3_stream *stream) | ||
2669 | { | ||
2670 | if (stream->enc_state != 0 && stream->enc_state != ENC_ABORTED) | ||
2671 | { | ||
2672 | /* If encoding, should be ready for more input but not actually have any. */ | ||
2673 | if (stream->enc_state != ENC_INPUT || stream->avail_in != 0) | ||
2674 | { | ||
2675 | stream->msg = "encoding is incomplete"; | ||
2676 | return EINVAL; | ||
2677 | } | ||
2678 | } | ||
2679 | else | ||
2680 | { | ||
2681 | switch (stream->dec_state) | ||
2682 | { | ||
2683 | case DEC_VCHEAD: | ||
2684 | case DEC_WININD: | ||
2685 | /* TODO: Address the zero-byte ambiguity. Does the encoder emit a window or | ||
2686 | * not? If so, then catch an error here. If not, need another routine to say | ||
2687 | * decode_at_least_one_if_empty. */ | ||
2688 | case DEC_ABORTED: | ||
2689 | break; | ||
2690 | default: | ||
2691 | /* If decoding, should be ready for the next window. */ | ||
2692 | stream->msg = "EOF in decode"; | ||
2693 | return EINVAL; | ||
2694 | } | ||
2695 | } | ||
2696 | |||
2697 | return 0; | ||
2698 | } | ||
2699 | |||
2700 | /****************************************************************************************** | ||
2701 | Application header | ||
2702 | ******************************************************************************************/ | ||
2703 | |||
2704 | int | ||
2705 | xd3_get_appheader (xd3_stream *stream, | ||
2706 | uint8_t **data, | ||
2707 | usize_t *size) | ||
2708 | { | ||
2709 | if (stream->dec_state < DEC_WININD) | ||
2710 | { | ||
2711 | stream->msg = "application header not available"; | ||
2712 | return EINVAL; | ||
2713 | } | ||
2714 | |||
2715 | (*data) = stream->dec_appheader; | ||
2716 | (*size) = stream->dec_appheadsz; | ||
2717 | return 0; | ||
2718 | } | ||
2719 | |||
2720 | #if XD3_ENCODER | ||
2721 | void | ||
2722 | xd3_set_appheader (xd3_stream *stream, | ||
2723 | const uint8_t *data, | ||
2724 | usize_t size) | ||
2725 | { | ||
2726 | stream->enc_appheader = data; | ||
2727 | stream->enc_appheadsz = size; | ||
2728 | } | ||
2729 | |||
2730 | /****************************************************************************************** | ||
2731 | Encoder stuff | ||
2732 | ******************************************************************************************/ | ||
2733 | |||
2734 | #if XD3_DEBUG | ||
2735 | static int | ||
2736 | xd3_iopt_check (xd3_stream *stream) | ||
2737 | { | ||
2738 | int ul = xd3_rlist_length (& stream->iopt.used); | ||
2739 | int fl = xd3_rlist_length (& stream->iopt.free); | ||
2740 | |||
2741 | return (ul + fl + (stream->iout ? 1 : 0)) == stream->iopt_size; | ||
2742 | } | ||
2743 | #endif | ||
2744 | |||
2745 | static xd3_rinst* | ||
2746 | xd3_iopt_free (xd3_stream *stream, xd3_rinst *i) | ||
2747 | { | ||
2748 | xd3_rinst *n = xd3_rlist_remove (i); | ||
2749 | xd3_rlist_push_back (& stream->iopt.free, i); | ||
2750 | return n; | ||
2751 | } | ||
2752 | |||
2753 | static void | ||
2754 | xd3_iopt_free_nonadd (xd3_stream *stream, xd3_rinst *i) | ||
2755 | { | ||
2756 | if (i->type != XD3_ADD) | ||
2757 | { | ||
2758 | xd3_rlist_push_back (& stream->iopt.free, i); | ||
2759 | } | ||
2760 | } | ||
2761 | |||
2762 | /* When an instruction is ready to flush from the iopt buffer, this function is called to | ||
2763 | * produce an encoding. It writes the instruction plus size, address, and data to the | ||
2764 | * various encoding sections. */ | ||
2765 | static int | ||
2766 | xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst) | ||
2767 | { | ||
2768 | int ret; | ||
2769 | |||
2770 | /* Check for input overflow. */ | ||
2771 | XD3_ASSERT (inst->pos + inst->size <= stream->avail_in); | ||
2772 | |||
2773 | switch (inst->type) | ||
2774 | { | ||
2775 | case XD3_CPY: | ||
2776 | { | ||
2777 | /* the address may have an offset if there is a source window. */ | ||
2778 | usize_t addr; | ||
2779 | xd3_source *src = stream->src; | ||
2780 | |||
2781 | if (src != NULL) | ||
2782 | { | ||
2783 | /* If there is a source copy, the source must have its source window decided | ||
2784 | * before we can encode. This can be bad -- we have to make this decision | ||
2785 | * even if no source matches have been found. */ | ||
2786 | if (stream->srcwin_decided == 0) | ||
2787 | { | ||
2788 | if ((ret = xd3_srcwin_setup (stream))) { return ret; } | ||
2789 | } | ||
2790 | |||
2791 | /* xtra field indicates the copy is from the source */ | ||
2792 | if (inst->xtra) | ||
2793 | { | ||
2794 | XD3_ASSERT (inst->addr >= src->srcbase); | ||
2795 | XD3_ASSERT (inst->addr + inst->size <= src->srcbase + src->srclen); | ||
2796 | addr = (inst->addr - src->srcbase); | ||
2797 | } | ||
2798 | else | ||
2799 | { | ||
2800 | /* with source window: target copy address is offset by taroff. */ | ||
2801 | addr = stream->taroff + (usize_t) inst->addr; | ||
2802 | } | ||
2803 | } | ||
2804 | else | ||
2805 | { | ||
2806 | addr = (usize_t) inst->addr; | ||
2807 | } | ||
2808 | |||
2809 | XD3_ASSERT (inst->size >= MIN_MATCH); | ||
2810 | |||
2811 | /* the "here" position is always offset by taroff */ | ||
2812 | if ((ret = xd3_encode_address (stream, addr, inst->pos + stream->taroff, & inst->type))) | ||
2813 | { | ||
2814 | return ret; | ||
2815 | } | ||
2816 | |||
2817 | IF_DEBUG (stream->n_cpy += 1); | ||
2818 | IF_DEBUG (stream->l_cpy += inst->size); | ||
2819 | |||
2820 | IF_DEBUG1 ({ | ||
2821 | static int cnt; | ||
2822 | P(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %u\n", | ||
2823 | cnt++, | ||
2824 | stream->total_in + inst->pos, | ||
2825 | stream->total_in + inst->pos + inst->size, | ||
2826 | inst->addr, inst->addr + inst->size, inst->size); | ||
2827 | }); | ||
2828 | break; | ||
2829 | } | ||
2830 | case XD3_RUN: | ||
2831 | { | ||
2832 | XD3_ASSERT (inst->size >= MIN_MATCH); | ||
2833 | |||
2834 | if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { return ret; } | ||
2835 | |||
2836 | IF_DEBUG (stream->n_run += 1); | ||
2837 | IF_DEBUG (stream->l_run += inst->size); | ||
2838 | IF_DEBUG (stream->n_dbytes += 1); | ||
2839 | |||
2840 | IF_DEBUG1 ({ | ||
2841 | static int cnt; | ||
2842 | P(RINT "[iopt run:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size); | ||
2843 | }); | ||
2844 | break; | ||
2845 | } | ||
2846 | case XD3_ADD: | ||
2847 | { | ||
2848 | if ((ret = xd3_emit_bytes (stream, & DATA_TAIL (stream), | ||
2849 | stream->next_in + inst->pos, inst->size))) { return ret; } | ||
2850 | |||
2851 | IF_DEBUG (stream->n_add += 1); | ||
2852 | IF_DEBUG (stream->l_add += inst->size); | ||
2853 | IF_DEBUG (stream->n_dbytes += inst->size); | ||
2854 | |||
2855 | IF_DEBUG1 ({ | ||
2856 | static int cnt; | ||
2857 | P(RINT "[iopt add:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size); | ||
2858 | }); | ||
2859 | |||
2860 | break; | ||
2861 | } | ||
2862 | } | ||
2863 | |||
2864 | /* This is the only place stream->unencoded_offset is incremented. */ | ||
2865 | XD3_ASSERT (stream->unencoded_offset == inst->pos); | ||
2866 | stream->unencoded_offset += inst->size; | ||
2867 | |||
2868 | IF_DEBUG (stream->n_emit += inst->size); | ||
2869 | |||
2870 | inst->code2 = 0; | ||
2871 | |||
2872 | XD3_CHOOSE_INSTRUCTION (stream, stream->iout, inst); | ||
2873 | |||
2874 | if (stream->iout != NULL) | ||
2875 | { | ||
2876 | if (stream->iout->code2 != 0) | ||
2877 | { | ||
2878 | if ((ret = xd3_emit_double (stream, stream->iout, inst, stream->iout->code2))) { return ret; } | ||
2879 | |||
2880 | xd3_iopt_free_nonadd (stream, stream->iout); | ||
2881 | xd3_iopt_free_nonadd (stream, inst); | ||
2882 | stream->iout = NULL; | ||
2883 | return 0; | ||
2884 | } | ||
2885 | else | ||
2886 | { | ||
2887 | if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; } | ||
2888 | |||
2889 | xd3_iopt_free_nonadd (stream, stream->iout); | ||
2890 | } | ||
2891 | } | ||
2892 | |||
2893 | stream->iout = inst; | ||
2894 | |||
2895 | return 0; | ||
2896 | } | ||
2897 | |||
2898 | /* This possibly encodes an add instruction, iadd, which must remain on the stack until | ||
2899 | * the following call to xd3_iopt_finish_encoding. */ | ||
2900 | static int | ||
2901 | xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd) | ||
2902 | { | ||
2903 | int ret; | ||
2904 | usize_t off = stream->unencoded_offset; | ||
2905 | |||
2906 | if (pos > off) | ||
2907 | { | ||
2908 | iadd->type = XD3_ADD; | ||
2909 | iadd->pos = off; | ||
2910 | iadd->size = pos - off; | ||
2911 | |||
2912 | if ((ret = xd3_iopt_finish_encoding (stream, iadd))) { return ret; } | ||
2913 | } | ||
2914 | |||
2915 | return 0; | ||
2916 | } | ||
2917 | |||
2918 | /* This function calls xd3_iopt_finish_encoding to finish encoding an instruction, and it | ||
2919 | * may also produce an add instruction for an unmatched region. */ | ||
2920 | static int | ||
2921 | xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst) | ||
2922 | { | ||
2923 | int ret; | ||
2924 | xd3_rinst iadd; | ||
2925 | |||
2926 | if ((ret = xd3_iopt_add (stream, inst->pos, & iadd))) { return ret; } | ||
2927 | |||
2928 | if ((ret = xd3_iopt_finish_encoding (stream, inst))) { return ret; } | ||
2929 | |||
2930 | return 0; | ||
2931 | } | ||
2932 | |||
2933 | /* Generates a final add instruction to encode the remaining input. */ | ||
2934 | static int | ||
2935 | xd3_iopt_add_finalize (xd3_stream *stream) | ||
2936 | { | ||
2937 | int ret; | ||
2938 | xd3_rinst iadd; | ||
2939 | |||
2940 | if ((ret = xd3_iopt_add (stream, stream->avail_in, & iadd))) { return ret; } | ||
2941 | |||
2942 | if (stream->iout) | ||
2943 | { | ||
2944 | if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; } | ||
2945 | |||
2946 | xd3_iopt_free_nonadd (stream, stream->iout); | ||
2947 | stream->iout = NULL; | ||
2948 | } | ||
2949 | |||
2950 | return 0; | ||
2951 | } | ||
2952 | |||
2953 | /* Compact the instruction buffer by choosing the best non-overlapping instructions when | ||
2954 | * lazy string-matching. There are no ADDs in the iopt buffer because those are | ||
2955 | * synthesized in xd3_iopt_add_encoding and during xd3_iopt_add_finalize. */ | ||
2956 | static int | ||
2957 | xd3_iopt_flush_instructions (xd3_stream *stream, int force) | ||
2958 | { | ||
2959 | xd3_rinst *r1 = xd3_rlist_front (& stream->iopt.used); | ||
2960 | xd3_rinst *r2; | ||
2961 | xd3_rinst *r3; | ||
2962 | usize_t r1end; | ||
2963 | usize_t r2end; | ||
2964 | usize_t r2off; | ||
2965 | usize_t r2moff; | ||
2966 | usize_t gap; | ||
2967 | usize_t flushed; | ||
2968 | int ret; | ||
2969 | |||
2970 | XD3_ASSERT (xd3_iopt_check (stream)); | ||
2971 | |||
2972 | /* Note: once tried to skip this step if it's possible to assert there are no | ||
2973 | * overlapping instructions. Doesn't work because xd3_opt_erase leaves overlapping | ||
2974 | * instructions. */ | ||
2975 | while (! xd3_rlist_end (& stream->iopt.used, r1) && | ||
2976 | ! xd3_rlist_end (& stream->iopt.used, r2 = xd3_rlist_next (r1))) | ||
2977 | { | ||
2978 | r1end = r1->pos + r1->size; | ||
2979 | |||
2980 | /* If the instructions do not overlap, continue. */ | ||
2981 | if (r1end <= r2->pos) | ||
2982 | { | ||
2983 | r1 = r2; | ||
2984 | continue; | ||
2985 | } | ||
2986 | |||
2987 | r2end = r2->pos + r2->size; | ||
2988 | |||
2989 | /* The min_match adjustments prevent this. */ | ||
2990 | XD3_ASSERT (r2end > (r1end + LEAST_MATCH_INCR)); | ||
2991 | |||
2992 | /* If r3 is available... */ | ||
2993 | if (! xd3_rlist_end (& stream->iopt.used, r3 = xd3_rlist_next (r2))) | ||
2994 | { | ||
2995 | /* If r3 starts before r1 finishes or just about, r2 is irrelevant */ | ||
2996 | if (r3->pos <= r1end + 1) | ||
2997 | { | ||
2998 | xd3_iopt_free (stream, r2); | ||
2999 | continue; | ||
3000 | } | ||
3001 | } | ||
3002 | else if (! force) | ||
3003 | { | ||
3004 | /* Unless force, end the loop when r3 is not available. */ | ||
3005 | break; | ||
3006 | } | ||
3007 | |||
3008 | r2off = r2->pos - r1->pos; | ||
3009 | r2moff = r2end - r1end; | ||
3010 | gap = r2end - r1->pos; | ||
3011 | |||
3012 | /* If the two matches overlap almost entirely, choose the better match and discard | ||
3013 | * the other. This heuristic is BLACK MAGIC. Havesomething better? */ | ||
3014 | if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2) | ||
3015 | { | ||
3016 | /* Only one match should be used, choose the longer one. */ | ||
3017 | if (r1->size < r2->size) | ||
3018 | { | ||
3019 | xd3_iopt_free (stream, r1); | ||
3020 | r1 = r2; | ||
3021 | } | ||
3022 | else | ||
3023 | { | ||
3024 | /* We are guaranteed that r1 does not overlap now, so advance past r2 */ | ||
3025 | r1 = xd3_iopt_free (stream, r2); | ||
3026 | } | ||
3027 | continue; | ||
3028 | } | ||
3029 | else | ||
3030 | { | ||
3031 | /* Shorten one of the instructions -- could be optimized based on the address | ||
3032 | * cache. */ | ||
3033 | usize_t average; | ||
3034 | usize_t newsize; | ||
3035 | usize_t adjust1; | ||
3036 | |||
3037 | XD3_ASSERT (r1end > r2->pos && r2end > r1->pos); | ||
3038 | |||
3039 | /* Try to balance the length of both instructions, but avoid making both longer | ||
3040 | * than MAX_MATCH_SPLIT . */ | ||
3041 | average = (gap) / 2; | ||
3042 | newsize = min (MAX_MATCH_SPLIT, gap - average); | ||
3043 | |||
3044 | /* Should be possible to simplify this code. */ | ||
3045 | if (newsize > r1->size) | ||
3046 | { | ||
3047 | /* shorten r2 */ | ||
3048 | adjust1 = r1end - r2->pos; | ||
3049 | } | ||
3050 | else if (newsize > r2->size) | ||
3051 | { | ||
3052 | /* shorten r1 */ | ||
3053 | adjust1 = r1end - r2->pos; | ||
3054 | |||
3055 | XD3_ASSERT (r1->size > adjust1); | ||
3056 | |||
3057 | r1->size -= adjust1; | ||
3058 | |||
3059 | /* don't shorten r2 */ | ||
3060 | adjust1 = 0; | ||
3061 | } | ||
3062 | else | ||
3063 | { | ||
3064 | /* shorten r1 */ | ||
3065 | adjust1 = r1->size - newsize; | ||
3066 | |||
3067 | if (r2->pos > r1end - adjust1) | ||
3068 | { | ||
3069 | adjust1 -= r2->pos - (r1end - adjust1); | ||
3070 | } | ||
3071 | |||
3072 | XD3_ASSERT (r1->size > adjust1); | ||
3073 | |||
3074 | r1->size -= adjust1; | ||
3075 | |||
3076 | /* shorten r2 */ | ||
3077 | XD3_ASSERT (r1->pos + r1->size >= r2->pos); | ||
3078 | |||
3079 | adjust1 = r1->pos + r1->size - r2->pos; | ||
3080 | } | ||
3081 | |||
3082 | /* Fallthrough above if-else, shorten r2 */ | ||
3083 | XD3_ASSERT (r2->size > adjust1); | ||
3084 | |||
3085 | r2->size -= adjust1; | ||
3086 | r2->pos += adjust1; | ||
3087 | r2->addr += adjust1; | ||
3088 | |||
3089 | XD3_ASSERT (r1->size >= MIN_MATCH); | ||
3090 | XD3_ASSERT (r2->size >= MIN_MATCH); | ||
3091 | |||
3092 | r1 = r2; | ||
3093 | } | ||
3094 | } | ||
3095 | |||
3096 | XD3_ASSERT (xd3_iopt_check (stream)); | ||
3097 | |||
3098 | /* If forcing, pick instructions until the list is empty, otherwise this empties 50% of | ||
3099 | * the queue. */ | ||
3100 | for (flushed = 0; ! xd3_rlist_empty (& stream->iopt.used); ) | ||
3101 | { | ||
3102 | xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt.used); | ||
3103 | if ((ret = xd3_iopt_add_encoding (stream, renc))) | ||
3104 | { | ||
3105 | return ret; | ||
3106 | } | ||
3107 | |||
3108 | if (! force) | ||
3109 | { | ||
3110 | if (++flushed > stream->iopt_size / 2) | ||
3111 | { | ||
3112 | break; | ||
3113 | } | ||
3114 | |||
3115 | /* If there are only two instructions remaining, break, because they were | ||
3116 | * not optimized. This means there were more than 50% eliminated by the | ||
3117 | * loop above. */ | ||
3118 | r1 = xd3_rlist_front (& stream->iopt.used); | ||
3119 | if (xd3_rlist_end(& stream->iopt.used, r1) || | ||
3120 | xd3_rlist_end(& stream->iopt.used, r2 = xd3_rlist_next (r1)) || | ||
3121 | xd3_rlist_end(& stream->iopt.used, r3 = xd3_rlist_next (r2))) | ||
3122 | { | ||
3123 | break; | ||
3124 | } | ||
3125 | } | ||
3126 | } | ||
3127 | |||
3128 | XD3_ASSERT (xd3_iopt_check (stream)); | ||
3129 | |||
3130 | XD3_ASSERT (!force || xd3_rlist_length (& stream->iopt.used) == 0); | ||
3131 | |||
3132 | return 0; | ||
3133 | } | ||
3134 | |||
3135 | static int | ||
3136 | xd3_iopt_get_slot (xd3_stream *stream, xd3_rinst** iptr) | ||
3137 | { | ||
3138 | xd3_rinst *i; | ||
3139 | int ret; | ||
3140 | |||
3141 | if (xd3_rlist_empty (& stream->iopt.free)) | ||
3142 | { | ||
3143 | if ((ret = xd3_iopt_flush_instructions (stream, 0))) { return ret; } | ||
3144 | |||
3145 | XD3_ASSERT (! xd3_rlist_empty (& stream->iopt.free)); | ||
3146 | } | ||
3147 | |||
3148 | i = xd3_rlist_pop_back (& stream->iopt.free); | ||
3149 | |||
3150 | xd3_rlist_push_back (& stream->iopt.used, i); | ||
3151 | |||
3152 | (*iptr) = i; | ||
3153 | |||
3154 | return 0; | ||
3155 | } | ||
3156 | |||
3157 | /* A copy is about to be emitted that extends backwards to POS, therefore it may | ||
3158 | * completely cover some existing instructions in the buffer. If an instruction is | ||
3159 | * completely covered by this new match, erase it. If the new instruction is covered by | ||
3160 | * the previous one, return 1 to skip it. */ | ||
3161 | static void | ||
3162 | xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size) | ||
3163 | { | ||
3164 | while (! xd3_rlist_empty (& stream->iopt.used)) | ||
3165 | { | ||
3166 | xd3_rinst *r = xd3_rlist_back (& stream->iopt.used); | ||
3167 | |||
3168 | /* Verify that greedy is working. The previous instruction should end before the | ||
3169 | * new one begins. */ | ||
3170 | XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos)); | ||
3171 | /* Verify that min_match is working. The previous instruction should end before the | ||
3172 | * new one ends. */ | ||
3173 | XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size)); | ||
3174 | |||
3175 | /* See if the last instruction starts before the new instruction. If so, there is | ||
3176 | * nothing to erase. */ | ||
3177 | if (r->pos < pos) | ||
3178 | { | ||
3179 | return; | ||
3180 | } | ||
3181 | |||
3182 | /* Otherwise, the new instruction covers the old one, delete it and repeat. */ | ||
3183 | xd3_rlist_remove (r); | ||
3184 | xd3_rlist_push_back (& stream->iopt.free, r); | ||
3185 | } | ||
3186 | } | ||
3187 | |||
3188 | /* This function tells the last matched input position. */ | ||
3189 | static usize_t | ||
3190 | xd3_iopt_last_matched (xd3_stream *stream) | ||
3191 | { | ||
3192 | xd3_rinst *r; | ||
3193 | |||
3194 | if (xd3_rlist_empty (& stream->iopt.used)) | ||
3195 | { | ||
3196 | return 0; | ||
3197 | } | ||
3198 | |||
3199 | r = xd3_rlist_back (& stream->iopt.used); | ||
3200 | |||
3201 | return r->pos + r->size; | ||
3202 | } | ||
3203 | |||
3204 | /****************************************************************************************** | ||
3205 | Emit routines | ||
3206 | ******************************************************************************************/ | ||
3207 | |||
3208 | static int | ||
3209 | xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code) | ||
3210 | { | ||
3211 | int has_size = stream->code_table[code].size1 == 0; | ||
3212 | int ret; | ||
3213 | |||
3214 | IF_DEBUG1 (P(RINT "[emit1] %u %s (%u) code %u\n", | ||
3215 | single->pos, | ||
3216 | xd3_rtype_to_string (single->type, 0), | ||
3217 | single->size, | ||
3218 | code)); | ||
3219 | |||
3220 | if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) { return ret; } | ||
3221 | |||
3222 | if (has_size) | ||
3223 | { | ||
3224 | if ((ret = xd3_emit_size (stream, & INST_TAIL (stream), single->size))) { return ret; } | ||
3225 | |||
3226 | IF_DEBUG (xd3_count_size (stream, single->size)); | ||
3227 | } | ||
3228 | |||
3229 | IF_DEBUG (xd3_count_inst (stream, code)); | ||
3230 | |||
3231 | return 0; | ||
3232 | } | ||
3233 | |||
3234 | static int | ||
3235 | xd3_emit_double (xd3_stream *stream, xd3_rinst *first, xd3_rinst *second, uint code) | ||
3236 | { | ||
3237 | int ret; | ||
3238 | |||
3239 | /* All double instructions use fixed sizes, so all we need to do is output the | ||
3240 | * instruction code, no sizes. */ | ||
3241 | XD3_ASSERT (stream->code_table[code].size1 != 0 && | ||
3242 | stream->code_table[code].size2 != 0); | ||
3243 | |||
3244 | if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) { return ret; } | ||
3245 | |||
3246 | IF_DEBUG1 (P(RINT "[emit2]: %u %s (%u) %s (%u) code %u\n", | ||
3247 | first->pos, | ||
3248 | xd3_rtype_to_string (first->type, 0), | ||
3249 | first->size, | ||
3250 | xd3_rtype_to_string (second->type, 0), | ||
3251 | second->size, | ||
3252 | code)); | ||
3253 | |||
3254 | IF_DEBUG (xd3_count_inst (stream, code)); | ||
3255 | |||
3256 | return 0; | ||
3257 | } | ||
3258 | |||
3259 | /* This enters a potential run instruction into the iopt buffer. The position argument is | ||
3260 | * relative to the target window. */ | ||
3261 | static INLINE int | ||
3262 | xd3_emit_run (xd3_stream *stream, usize_t pos, usize_t size, uint8_t run_c) | ||
3263 | { | ||
3264 | xd3_rinst* ri; | ||
3265 | int ret; | ||
3266 | |||
3267 | XD3_ASSERT (pos + size <= stream->avail_in); | ||
3268 | |||
3269 | if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; } | ||
3270 | |||
3271 | ri->type = XD3_RUN; | ||
3272 | ri->xtra = run_c; | ||
3273 | ri->pos = pos; | ||
3274 | ri->size = size; | ||
3275 | |||
3276 | return 0; | ||
3277 | } | ||
3278 | |||
3279 | /* This enters a potential copy instruction into the iopt buffer. The position argument | ||
3280 | * is relative to the target window.. */ | ||
3281 | static INLINE int | ||
3282 | xd3_found_match (xd3_stream *stream, usize_t pos, usize_t size, xoff_t addr, int is_source) | ||
3283 | { | ||
3284 | xd3_rinst* ri; | ||
3285 | int ret; | ||
3286 | |||
3287 | XD3_ASSERT (pos + size <= stream->avail_in); | ||
3288 | |||
3289 | if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; } | ||
3290 | |||
3291 | ri->type = XD3_CPY; | ||
3292 | ri->xtra = is_source; | ||
3293 | ri->pos = pos; | ||
3294 | ri->size = size; | ||
3295 | ri->addr = addr; | ||
3296 | |||
3297 | return 0; | ||
3298 | } | ||
3299 | |||
3300 | static int | ||
3301 | xd3_emit_hdr (xd3_stream *stream) | ||
3302 | { | ||
3303 | int ret; | ||
3304 | int use_secondary = stream->sec_type != NULL; | ||
3305 | int use_adler32 = stream->flags & XD3_ADLER32; | ||
3306 | int vcd_source = xd3_encoder_used_source (stream); | ||
3307 | uint win_ind = 0; | ||
3308 | uint del_ind = 0; | ||
3309 | usize_t enc_len; | ||
3310 | usize_t tgt_len; | ||
3311 | usize_t data_len; | ||
3312 | usize_t inst_len; | ||
3313 | usize_t addr_len; | ||
3314 | |||
3315 | XD3_ASSERT (stream->n_emit == stream->avail_in); | ||
3316 | |||
3317 | if (stream->current_window == 0) | ||
3318 | { | ||
3319 | uint hdr_ind = 0; | ||
3320 | int use_appheader = stream->enc_appheader != NULL; | ||
3321 | int use_gencodetbl = GENERIC_ENCODE_TABLES && (stream->code_table_desc != & __rfc3284_code_table_desc); | ||
3322 | |||
3323 | if (use_secondary) { hdr_ind |= VCD_SECONDARY; } | ||
3324 | if (use_gencodetbl) { hdr_ind |= VCD_CODETABLE; } | ||
3325 | if (use_appheader) { hdr_ind |= VCD_APPHEADER; } | ||
3326 | |||
3327 | if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC1)) != 0 || | ||
3328 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC2)) != 0 || | ||
3329 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC3)) != 0 || | ||
3330 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_VERSION)) != 0 || | ||
3331 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), hdr_ind)) != 0) | ||
3332 | { | ||
3333 | return ret; | ||
3334 | } | ||
3335 | |||
3336 | /* Secondary compressor ID */ | ||
3337 | #if SECONDARY_ANY | ||
3338 | if (use_secondary && (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->sec_type->id))) { return ret; } | ||
3339 | #endif | ||
3340 | |||
3341 | /* Compressed code table */ | ||
3342 | if (use_gencodetbl) | ||
3343 | { | ||
3344 | usize_t code_table_size; | ||
3345 | const uint8_t *code_table_data; | ||
3346 | |||
3347 | if ((ret = stream->comp_table_func (stream, & code_table_data, & code_table_size))) { return ret; } | ||
3348 | |||
3349 | if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), code_table_size + 2)) || | ||
3350 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->code_table_desc->near_modes)) || | ||
3351 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->code_table_desc->same_modes)) || | ||
3352 | (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), code_table_data, code_table_size))) { return ret; } | ||
3353 | } | ||
3354 | |||
3355 | /* Application header */ | ||
3356 | if (use_appheader) | ||
3357 | { | ||
3358 | if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->enc_appheadsz)) || | ||
3359 | (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), stream->enc_appheader, stream->enc_appheadsz))) | ||
3360 | { | ||
3361 | return ret; | ||
3362 | } | ||
3363 | } | ||
3364 | } | ||
3365 | |||
3366 | /* try to compress this window */ | ||
3367 | #if SECONDARY_ANY | ||
3368 | if (use_secondary) | ||
3369 | { | ||
3370 | int data_sec = 0; | ||
3371 | int inst_sec = 0; | ||
3372 | int addr_sec = 0; | ||
3373 | |||
3374 | # define ENCODE_SECONDARY_SECTION(UPPER,LOWER) \ | ||
3375 | ((stream->flags & XD3_SEC_NO ## UPPER) == 0 && \ | ||
3376 | (ret = xd3_encode_secondary (stream, & UPPER ## _HEAD (stream), & UPPER ## _TAIL (stream), \ | ||
3377 | & xd3_sec_ ## LOWER (stream), \ | ||
3378 | & stream->sec_ ## LOWER, & LOWER ## _sec))) | ||
3379 | |||
3380 | if (ENCODE_SECONDARY_SECTION (DATA, data) || | ||
3381 | ENCODE_SECONDARY_SECTION (INST, inst) || | ||
3382 | ENCODE_SECONDARY_SECTION (ADDR, addr)) | ||
3383 | { | ||
3384 | return ret; | ||
3385 | } | ||
3386 | |||
3387 | del_ind |= (data_sec ? VCD_DATACOMP : 0); | ||
3388 | del_ind |= (inst_sec ? VCD_INSTCOMP : 0); | ||
3389 | del_ind |= (addr_sec ? VCD_ADDRCOMP : 0); | ||
3390 | } | ||
3391 | #endif | ||
3392 | |||
3393 | /* if (vcd_target) { win_ind |= VCD_TARGET; } */ | ||
3394 | if (vcd_source) { win_ind |= VCD_SOURCE; } | ||
3395 | if (use_adler32) { win_ind |= VCD_ADLER32; } | ||
3396 | |||
3397 | /* window indicator */ | ||
3398 | if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), win_ind))) { return ret; } | ||
3399 | |||
3400 | /* source window */ | ||
3401 | if (vcd_source) | ||
3402 | { | ||
3403 | /* or (vcd_target) { ... } */ | ||
3404 | if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->src->srclen)) || | ||
3405 | (ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->src->srcbase))) { return ret; } | ||
3406 | } | ||
3407 | |||
3408 | tgt_len = stream->avail_in; | ||
3409 | data_len = xd3_sizeof_output (DATA_HEAD (stream)); | ||
3410 | inst_len = xd3_sizeof_output (INST_HEAD (stream)); | ||
3411 | addr_len = xd3_sizeof_output (ADDR_HEAD (stream)); | ||
3412 | |||
3413 | /* The enc_len field is redundent... doh! */ | ||
3414 | enc_len = (1 + (xd3_sizeof_size (tgt_len) + | ||
3415 | xd3_sizeof_size (data_len) + | ||
3416 | xd3_sizeof_size (inst_len) + | ||
3417 | xd3_sizeof_size (addr_len)) + | ||
3418 | data_len + | ||
3419 | inst_len + | ||
3420 | addr_len + | ||
3421 | (use_adler32 ? 4 : 0)); | ||
3422 | |||
3423 | if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), enc_len)) || | ||
3424 | (ret = xd3_emit_size (stream, & HDR_TAIL (stream), tgt_len)) || | ||
3425 | (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), del_ind)) || | ||
3426 | (ret = xd3_emit_size (stream, & HDR_TAIL (stream), data_len)) || | ||
3427 | (ret = xd3_emit_size (stream, & HDR_TAIL (stream), inst_len)) || | ||
3428 | (ret = xd3_emit_size (stream, & HDR_TAIL (stream), addr_len))) | ||
3429 | { | ||
3430 | return ret; | ||
3431 | } | ||
3432 | |||
3433 | if (use_adler32) | ||
3434 | { | ||
3435 | uint8_t send[4]; | ||
3436 | uint32_t a32 = adler32 (1L, stream->next_in, stream->avail_in); | ||
3437 | |||
3438 | send[0] = (a32 >> 24); | ||
3439 | send[1] = (a32 >> 16); | ||
3440 | send[2] = (a32 >> 8); | ||
3441 | send[3] = (a32 & 0xff); | ||
3442 | |||
3443 | if ((ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), send, 4))) { return ret; } | ||
3444 | } | ||
3445 | |||
3446 | return 0; | ||
3447 | } | ||
3448 | |||
3449 | /****************************************************************************************** | ||
3450 | Encode routines | ||
3451 | ******************************************************************************************/ | ||
3452 | |||
3453 | static int | ||
3454 | xd3_encode_buffer_leftover (xd3_stream *stream) | ||
3455 | { | ||
3456 | usize_t take; | ||
3457 | usize_t room; | ||
3458 | |||
3459 | /* Allocate the buffer. */ | ||
3460 | if (stream->buf_in == NULL && (stream->buf_in = xd3_alloc (stream, stream->winsize, 1)) == NULL) | ||
3461 | { | ||
3462 | return ENOMEM; | ||
3463 | } | ||
3464 | |||
3465 | /* Take leftover input first. */ | ||
3466 | if (stream->buf_leftover != NULL) | ||
3467 | { | ||
3468 | XD3_ASSERT (stream->buf_avail == 0); | ||
3469 | XD3_ASSERT (stream->buf_leftavail < stream->winsize); | ||
3470 | |||
3471 | IF_DEBUG1 (P(RINT "[leftover] previous %u avail %u\n", stream->buf_leftavail, stream->avail_in)); | ||
3472 | |||
3473 | memcpy (stream->buf_in, stream->buf_leftover, stream->buf_leftavail); | ||
3474 | |||
3475 | stream->buf_leftover = NULL; | ||
3476 | stream->buf_avail = stream->buf_leftavail; | ||
3477 | } | ||
3478 | |||
3479 | /* Copy into the buffer. */ | ||
3480 | room = stream->winsize - stream->buf_avail; | ||
3481 | take = min (room, stream->avail_in); | ||
3482 | |||
3483 | memcpy (stream->buf_in + stream->buf_avail, stream->next_in, take); | ||
3484 | |||
3485 | stream->buf_avail += take; | ||
3486 | |||
3487 | if (take < stream->avail_in) | ||
3488 | { | ||
3489 | /* Buffer is full */ | ||
3490 | stream->buf_leftover = stream->next_in + take; | ||
3491 | stream->buf_leftavail = stream->avail_in - take; | ||
3492 | |||
3493 | IF_DEBUG1 (P(RINT "[leftover] take %u remaining %u\n", take, stream->buf_leftavail)); | ||
3494 | } | ||
3495 | else if ((stream->buf_avail < stream->winsize) && !(stream->flags & XD3_FLUSH)) | ||
3496 | { | ||
3497 | /* Buffer has space */ | ||
3498 | IF_DEBUG1 (P(RINT "[leftover] %u emptied\n", take)); | ||
3499 | return XD3_INPUT; | ||
3500 | } | ||
3501 | |||
3502 | /* Use the buffer: */ | ||
3503 | stream->next_in = stream->buf_in; | ||
3504 | stream->avail_in = stream->buf_avail; | ||
3505 | stream->buf_avail = 0; | ||
3506 | |||
3507 | return 0; | ||
3508 | } | ||
3509 | |||
3510 | /* This function allocates all memory initially used by the encoder. */ | ||
3511 | static int | ||
3512 | xd3_encode_init (xd3_stream *stream) | ||
3513 | { | ||
3514 | int i; | ||
3515 | int large_comp = (stream->src != NULL); | ||
3516 | int small_comp = ! (stream->flags & XD3_NOCOMPRESS); | ||
3517 | /*int small_prev = (stream->small_chain > 1);*/ | ||
3518 | int space_fact = (large_comp + small_comp); | ||
3519 | int memsize = stream->memsize; | ||
3520 | |||
3521 | /* Memory allocations for checksum tables are delayed until xd3_string_match_init in the | ||
3522 | * first call to string_match--that way identical or short inputs require no table | ||
3523 | * allocation. */ | ||
3524 | if (large_comp) | ||
3525 | { | ||
3526 | xd3_size_hashtable (stream, memsize / space_fact, & stream->large_hash); | ||
3527 | } | ||
3528 | |||
3529 | if (small_comp) | ||
3530 | { | ||
3531 | xd3_size_hashtable (stream, memsize / space_fact, & stream->small_hash); | ||
3532 | } | ||
3533 | |||
3534 | for (i = 0; i < ENC_SECTS; i += 1) | ||
3535 | { | ||
3536 | if ((stream->enc_heads[i] = stream->enc_tails[i] = | ||
3537 | xd3_alloc_output (stream, NULL)) == NULL) | ||
3538 | { | ||
3539 | goto fail; | ||
3540 | } | ||
3541 | } | ||
3542 | |||
3543 | /* iopt buffer */ | ||
3544 | xd3_rlist_init (& stream->iopt.used); | ||
3545 | xd3_rlist_init (& stream->iopt.free); | ||
3546 | |||
3547 | if ((stream->iopt.buffer = xd3_alloc (stream, sizeof (xd3_rinst), stream->iopt_size)) == NULL) | ||
3548 | { | ||
3549 | goto fail; | ||
3550 | } | ||
3551 | |||
3552 | for (i = 0; i < stream->iopt_size; i += 1) | ||
3553 | { | ||
3554 | xd3_rlist_push_back (& stream->iopt.free, & stream->iopt.buffer[i]); | ||
3555 | } | ||
3556 | |||
3557 | XD3_ASSERT (xd3_rlist_length (& stream->iopt.free) == stream->iopt_size); | ||
3558 | XD3_ASSERT (xd3_rlist_length (& stream->iopt.used) == 0); | ||
3559 | |||
3560 | /* address cache, code table */ | ||
3561 | stream->acache.s_near = stream->code_table_desc->near_modes; | ||
3562 | stream->acache.s_same = stream->code_table_desc->same_modes; | ||
3563 | stream->code_table = stream->code_table_func (); | ||
3564 | |||
3565 | return xd3_alloc_cache (stream); | ||
3566 | |||
3567 | fail: | ||
3568 | |||
3569 | return ENOMEM; | ||
3570 | } | ||
3571 | |||
3572 | #if XD3_DEBUG | ||
3573 | static int | ||
3574 | xd3_check_sprevlist (xd3_stream *stream) | ||
3575 | { | ||
3576 | int i; | ||
3577 | for (i = 0; i < stream->sprevsz; i += 1) | ||
3578 | { | ||
3579 | xd3_slist *l = & stream->small_prev[i]; | ||
3580 | |||
3581 | XD3_ASSERT (l->prev->next == l); | ||
3582 | XD3_ASSERT (l->next->prev == l); | ||
3583 | } | ||
3584 | return 1; | ||
3585 | } | ||
3586 | #endif | ||
3587 | |||
3588 | /* Called after the ENC_POSTOUT state, this puts the output buffers back into separate | ||
3589 | * lists and re-initializes some variables. (The output lists were spliced together | ||
3590 | * during the ENC_FLUSH state.) */ | ||
3591 | static void | ||
3592 | xd3_encode_reset (xd3_stream *stream) | ||
3593 | { | ||
3594 | int i; | ||
3595 | xd3_output *olist; | ||
3596 | |||
3597 | XD3_ASSERT (stream->small_prev == NULL || xd3_check_sprevlist (stream)); | ||
3598 | |||
3599 | IF_DEBUG (stream->n_emit = 0); | ||
3600 | stream->avail_in = 0; | ||
3601 | stream->small_reset = 1; | ||
3602 | |||
3603 | if (stream->src != NULL) | ||
3604 | { | ||
3605 | stream->src->srcbase = 0; | ||
3606 | stream->src->srclen = 0; | ||
3607 | stream->srcwin_decided = 0; | ||
3608 | stream->match_minaddr = 0; | ||
3609 | stream->match_maxaddr = 0; | ||
3610 | stream->taroff = 0; | ||
3611 | } | ||
3612 | |||
3613 | /* Reset output chains. */ | ||
3614 | olist = stream->enc_heads[0]; | ||
3615 | |||
3616 | for (i = 0; i < ENC_SECTS; i += 1) | ||
3617 | { | ||
3618 | XD3_ASSERT (olist != NULL); | ||
3619 | |||
3620 | stream->enc_heads[i] = olist; | ||
3621 | stream->enc_tails[i] = olist; | ||
3622 | olist = olist->next_page; | ||
3623 | |||
3624 | stream->enc_heads[i]->next = 0; | ||
3625 | stream->enc_heads[i]->next_page = NULL; | ||
3626 | |||
3627 | stream->enc_tails[i]->next_page = NULL; | ||
3628 | stream->enc_tails[i] = stream->enc_heads[i]; | ||
3629 | } | ||
3630 | |||
3631 | xd3_freelist_output (stream, olist); | ||
3632 | } | ||
3633 | |||
3634 | /* The main encoding routine. */ | ||
3635 | int | ||
3636 | xd3_encode_input (xd3_stream *stream) | ||
3637 | { | ||
3638 | int ret, i; | ||
3639 | |||
3640 | if (stream->dec_state != 0) | ||
3641 | { | ||
3642 | stream->msg = "encoder/decoder transition"; | ||
3643 | return EINVAL; | ||
3644 | } | ||
3645 | |||
3646 | switch (stream->enc_state) | ||
3647 | { | ||
3648 | case ENC_INIT: | ||
3649 | /* Only reached on first time through: memory setup. */ | ||
3650 | if ((ret = xd3_encode_init (stream))) { return ret; } | ||
3651 | |||
3652 | stream->enc_state = ENC_INPUT; | ||
3653 | |||
3654 | case ENC_INPUT: | ||
3655 | |||
3656 | /* If there is no input yet, just return. This checks for next_in == NULL, not | ||
3657 | * avail_in == 0 since zero bytes is a valid input. There is an assertion in | ||
3658 | * xd3_avail_input() that next_in != NULL for this reason. By returning right away | ||
3659 | * we avoid creating an input buffer before the caller has supplied its first data. | ||
3660 | * It is possible for xd3_avail_input to be called both before and after the first | ||
3661 | * call to xd3_encode_input(). */ | ||
3662 | if (stream->next_in == NULL) | ||
3663 | { | ||
3664 | return XD3_INPUT; | ||
3665 | } | ||
3666 | |||
3667 | enc_flush: | ||
3668 | /* See if we should buffer the input: either if there is already a leftover buffer, | ||
3669 | * or if the input is short of winsize without flush. The label at this point is | ||
3670 | * reached by a goto below, when there is leftover input after postout. */ | ||
3671 | if ((stream->buf_leftover != NULL) || | ||
3672 | (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH))) | ||
3673 | { | ||
3674 | if ((ret = xd3_encode_buffer_leftover (stream))) { return ret; } | ||
3675 | } | ||
3676 | |||
3677 | /* Initalize the address cache before each window. */ | ||
3678 | xd3_init_cache (& stream->acache); | ||
3679 | |||
3680 | pos_in = 0; | ||
3681 | min_match = MIN_MATCH; | ||
3682 | stream->unencoded_offset = 0; | ||
3683 | |||
3684 | stream->enc_state = ENC_SEARCH; | ||
3685 | |||
3686 | IF_DEBUG1 (P(RINT "[input window:%"Q"u] input bytes %u offset %"Q"u\n", | ||
3687 | stream->current_window, stream->avail_in, stream->total_in)); | ||
3688 | |||
3689 | return XD3_WINSTART; | ||
3690 | |||
3691 | case ENC_SEARCH: | ||
3692 | |||
3693 | /* Reentrant matching. */ | ||
3694 | if (stream->src != NULL) | ||
3695 | { | ||
3696 | switch (stream->match_state) | ||
3697 | { | ||
3698 | case MATCH_TARGET: | ||
3699 | /* Try matching forward at the start of the target. This is entered the | ||
3700 | * first time through, to check for a perfect match, and whenever there is a | ||
3701 | * source match that extends to the end of the previous window. The | ||
3702 | * match_srcpos field is initially zero and later set during | ||
3703 | * xd3_source_extend_match. */ | ||
3704 | if (stream->avail_in > 0) { | ||
3705 | /* This call can't fail because the source window is unrestricted. */ | ||
3706 | ret = xd3_source_match_setup (stream, stream->match_srcpos); | ||
3707 | XD3_ASSERT (ret == 0); | ||
3708 | stream->match_state = MATCH_FORWARD; | ||
3709 | } else { | ||
3710 | stream->match_state = MATCH_SEARCHING; | ||
3711 | } | ||
3712 | XD3_ASSERT (stream->match_fwd == 0); | ||
3713 | |||
3714 | case MATCH_FORWARD: | ||
3715 | case MATCH_BACKWARD: | ||
3716 | if (stream->avail_in != 0) | ||
3717 | { | ||
3718 | if ((ret = xd3_source_extend_match (stream)) != 0) | ||
3719 | { | ||
3720 | return ret; | ||
3721 | } | ||
3722 | |||
3723 | stream->input_position += stream->match_fwd; | ||
3724 | } | ||
3725 | |||
3726 | case MATCH_SEARCHING: | ||
3727 | /* Continue string matching. (It's possible that the initial match | ||
3728 | * continued through the entire input, in which case we're still in | ||
3729 | * MATCH_FORWARD and should remain so for the next input window.) */ | ||
3730 | break; | ||
3731 | } | ||
3732 | } | ||
3733 | |||
3734 | /* String matching... */ | ||
3735 | if (stream->avail_in != 0 && | ||
3736 | (ret = stream->string_match (stream))) | ||
3737 | { | ||
3738 | return ret; | ||
3739 | } | ||
3740 | |||
3741 | /* Flush the instrution buffer, then possibly add one more instruction, then emit | ||
3742 | * the header. */ | ||
3743 | stream->enc_state = ENC_FLUSH; | ||
3744 | if ((ret = xd3_iopt_flush_instructions (stream, 1)) || | ||
3745 | (ret = xd3_iopt_add_finalize (stream)) || | ||
3746 | (ret = xd3_emit_hdr (stream))) | ||
3747 | { | ||
3748 | return ret; | ||
3749 | } | ||
3750 | |||
3751 | /* Begin output. */ | ||
3752 | stream->enc_current = HDR_HEAD (stream); | ||
3753 | |||
3754 | /* Chain all the outputs together. After doing this, it looks as if there is only | ||
3755 | * one section. The other enc_heads are set to NULL to avoid freeing them more than | ||
3756 | * once. */ | ||
3757 | for (i = 1; i < ENC_SECTS; i += 1) | ||
3758 | { | ||
3759 | stream->enc_tails[i-1]->next_page = stream->enc_heads[i]; | ||
3760 | stream->enc_heads[i] = NULL; | ||
3761 | } | ||
3762 | |||
3763 | enc_output: | ||
3764 | |||
3765 | stream->enc_state = ENC_POSTOUT; | ||
3766 | stream->next_out = stream->enc_current->base; | ||
3767 | stream->avail_out = stream->enc_current->next; | ||
3768 | stream->total_out += (xoff_t) stream->avail_out; | ||
3769 | |||
3770 | /* If there is any output in this buffer, return it, otherwise fall through to | ||
3771 | * handle the next buffer or finish the window after all buffers have been | ||
3772 | * output. */ | ||
3773 | if (stream->avail_out > 0) | ||
3774 | { | ||
3775 | /* This is the only place xd3_encode returns XD3_OUTPUT */ | ||
3776 | return XD3_OUTPUT; | ||
3777 | } | ||
3778 | |||
3779 | case ENC_POSTOUT: | ||
3780 | |||
3781 | if (stream->avail_out != 0) | ||
3782 | { | ||
3783 | stream->msg = "missed call to consume output"; | ||
3784 | return EINVAL; | ||
3785 | } | ||
3786 | |||
3787 | /* Continue outputting one buffer at a time, until the next is NULL. */ | ||
3788 | if ((stream->enc_current = stream->enc_current->next_page) != NULL) | ||
3789 | { | ||
3790 | goto enc_output; | ||
3791 | } | ||
3792 | |||
3793 | stream->total_in += (xoff_t) stream->avail_in; | ||
3794 | stream->enc_state = ENC_POSTWIN; | ||
3795 | |||
3796 | return XD3_WINFINISH; | ||
3797 | |||
3798 | case ENC_POSTWIN: | ||
3799 | |||
3800 | xd3_encode_reset (stream); | ||
3801 | |||
3802 | stream->current_window += 1; | ||
3803 | stream->enc_state = ENC_INPUT; | ||
3804 | |||
3805 | /* If there is leftover input to flush, repeat. */ | ||
3806 | if ((stream->buf_leftover != NULL) && (stream->flags & XD3_FLUSH)) | ||
3807 | { | ||
3808 | goto enc_flush; | ||
3809 | } | ||
3810 | |||
3811 | /* Ready for more input. */ | ||
3812 | return XD3_INPUT; | ||
3813 | |||
3814 | default: | ||
3815 | stream->msg = "invalid state"; | ||
3816 | return EINVAL; | ||
3817 | } | ||
3818 | } | ||
3819 | #endif /* XD3_ENCODER */ | ||
3820 | |||
3821 | /****************************************************************************************** | ||
3822 | Client convenience functions | ||
3823 | ******************************************************************************************/ | ||
3824 | |||
3825 | /* This function invokes either encode or decode to and from in-memory arrays. The output array | ||
3826 | * must be large enough to hold the output or else ENOSPC is returned. */ | ||
3827 | static int | ||
3828 | xd3_process_completely (xd3_stream *stream, | ||
3829 | int (*func) (xd3_stream *), | ||
3830 | int close_stream, | ||
3831 | const uint8_t *input, | ||
3832 | usize_t input_size, | ||
3833 | uint8_t *output, | ||
3834 | usize_t *output_size, | ||
3835 | usize_t avail_size) | ||
3836 | { | ||
3837 | (*output_size) = 0; | ||
3838 | |||
3839 | stream->flags |= XD3_FLUSH; | ||
3840 | |||
3841 | xd3_avail_input (stream, input, input_size); | ||
3842 | |||
3843 | for (;;) | ||
3844 | { | ||
3845 | int ret; | ||
3846 | switch((ret = func (stream))) | ||
3847 | { | ||
3848 | case XD3_OUTPUT: { /* memcpy below */ break; } | ||
3849 | case XD3_INPUT: { /* this means EOF */ goto done; } | ||
3850 | case XD3_GOTHEADER: { /* ignore */ continue; } | ||
3851 | case XD3_WINSTART: { /* ignore */ continue; } | ||
3852 | case XD3_WINFINISH: { /* ignore */ continue; } | ||
3853 | case XD3_GETSRCBLK: | ||
3854 | { | ||
3855 | stream->msg = "stream requires source input"; | ||
3856 | return EINVAL; | ||
3857 | } | ||
3858 | case 0: /* there is no plain "success" return for xd3_encode/decode */ | ||
3859 | XD3_ASSERT (ret != 0); | ||
3860 | default: | ||
3861 | return ret; | ||
3862 | } | ||
3863 | |||
3864 | if (*output_size + stream->avail_out > avail_size) | ||
3865 | { | ||
3866 | stream->msg = "insufficient output space"; | ||
3867 | return ENOSPC; | ||
3868 | } | ||
3869 | |||
3870 | memcpy (output + *output_size, stream->next_out, stream->avail_out); | ||
3871 | |||
3872 | *output_size += stream->avail_out; | ||
3873 | |||
3874 | xd3_consume_output (stream); | ||
3875 | } | ||
3876 | done: | ||
3877 | return (close_stream == 0) ? 0 : xd3_close_stream (stream); | ||
3878 | } | ||
3879 | |||
3880 | int | ||
3881 | xd3_decode_completely (xd3_stream *stream, | ||
3882 | const uint8_t *input, | ||
3883 | usize_t input_size, | ||
3884 | uint8_t *output, | ||
3885 | usize_t *output_size, | ||
3886 | usize_t avail_size) | ||
3887 | { | ||
3888 | return xd3_process_completely (stream, & xd3_decode_input, 1, | ||
3889 | input, input_size, | ||
3890 | output, output_size, avail_size); | ||
3891 | } | ||
3892 | |||
3893 | #if XD3_ENCODER | ||
3894 | int | ||
3895 | xd3_encode_completely (xd3_stream *stream, | ||
3896 | const uint8_t *input, | ||
3897 | usize_t input_size, | ||
3898 | uint8_t *output, | ||
3899 | usize_t *output_size, | ||
3900 | usize_t avail_size) | ||
3901 | { | ||
3902 | return xd3_process_completely (stream, & xd3_encode_input, 1, | ||
3903 | input, input_size, | ||
3904 | output, output_size, avail_size); | ||
3905 | } | ||
3906 | #endif | ||
3907 | |||
3908 | /****************************************************************************************** | ||
3909 | DECODE stuff | ||
3910 | ******************************************************************************************/ | ||
3911 | |||
3912 | /* Return true if the caller must provide a source. Theoretically, this has to be checked | ||
3913 | * after every window. It could be that the first window requires no source, but the | ||
3914 | * second window does. In practice? */ | ||
3915 | int xd3_decoder_needs_source (xd3_stream *stream) | ||
3916 | { | ||
3917 | return stream->dec_win_ind & VCD_SOURCE; | ||
3918 | } | ||
3919 | |||
3920 | /* Initialize the decoder for a new window. The dec_tgtlen value is preserved across | ||
3921 | * successive window decodings, and the update to dec_winstart is delayed until a new | ||
3922 | * window actually starts. This is to avoid throwing an error due to overflow until the | ||
3923 | * last possible moment. This makes it possible to encode exactly 4GB through a 32-bit | ||
3924 | * encoder. */ | ||
3925 | static int | ||
3926 | xd3_decode_init_window (xd3_stream *stream) | ||
3927 | { | ||
3928 | stream->dec_cpylen = 0; | ||
3929 | stream->dec_cpyoff = 0; | ||
3930 | stream->dec_cksumbytes = 0; | ||
3931 | |||
3932 | xd3_init_cache (& stream->acache); | ||
3933 | |||
3934 | return 0; | ||
3935 | } | ||
3936 | |||
3937 | /* Allocates buffer space for the target window and possibly the VCD_TARGET copy-window. | ||
3938 | * Also sets the base of the two copy segments. */ | ||
3939 | static int | ||
3940 | xd3_decode_setup_buffers (xd3_stream *stream) | ||
3941 | { | ||
3942 | /* If VCD_TARGET is set then the previous buffer may be reused. */ | ||
3943 | if (stream->dec_win_ind & VCD_TARGET) | ||
3944 | { | ||
3945 | /* But this implementation only supports copying from the last target window. If the | ||
3946 | * offset is outside that range, it can't be done. */ | ||
3947 | if (stream->dec_cpyoff < stream->dec_laststart) | ||
3948 | { | ||
3949 | stream->msg = "unsupported VCD_TARGET offset"; | ||
3950 | return EINVAL; | ||
3951 | } | ||
3952 | |||
3953 | /* See if the two windows are the same. This indicates the first time VCD_TARGET is | ||
3954 | * used. This causes a second buffer to be allocated, after that the two are | ||
3955 | * swapped in the DEC_FINISH case. */ | ||
3956 | if (stream->dec_lastwin == stream->next_out) | ||
3957 | { | ||
3958 | stream->next_out = NULL; | ||
3959 | stream->space_out = 0; | ||
3960 | } | ||
3961 | |||
3962 | stream->dec_cpyaddrbase = stream->dec_lastwin + (usize_t) (stream->dec_cpyoff - stream->dec_laststart); | ||
3963 | } | ||
3964 | |||
3965 | /* See if the current output window is large enough. */ | ||
3966 | if (stream->space_out < stream->dec_tgtlen) | ||
3967 | { | ||
3968 | xd3_free (stream, stream->dec_buffer); | ||
3969 | |||
3970 | stream->space_out = xd3_round_blksize (stream->dec_tgtlen, XD3_ALLOCSIZE); | ||
3971 | |||
3972 | if ((stream->dec_buffer = xd3_alloc (stream, stream->space_out, 1)) == NULL) | ||
3973 | { | ||
3974 | return ENOMEM; | ||
3975 | } | ||
3976 | |||
3977 | stream->next_out = stream->dec_buffer; | ||
3978 | } | ||
3979 | |||
3980 | /* dec_tgtaddrbase refers to an invalid base address, but it is always used with a | ||
3981 | * sufficiently large instruction offset (i.e., beyond the copy window). This condition | ||
3982 | * is enforced by xd3_decode_output_halfinst. */ | ||
3983 | stream->dec_tgtaddrbase = stream->next_out - stream->dec_cpylen; | ||
3984 | |||
3985 | return 0; | ||
3986 | } | ||
3987 | |||
3988 | static int | ||
3989 | xd3_decode_allocate (xd3_stream *stream, | ||
3990 | usize_t size, | ||
3991 | uint8_t **copied1, | ||
3992 | usize_t *alloc1, | ||
3993 | uint8_t **copied2, | ||
3994 | usize_t *alloc2) | ||
3995 | { | ||
3996 | if (*copied1 != NULL && *alloc1 < size) | ||
3997 | { | ||
3998 | xd3_free (stream, *copied1); | ||
3999 | *copied1 = NULL; | ||
4000 | } | ||
4001 | |||
4002 | if (*copied1 == NULL) | ||
4003 | { | ||
4004 | #if SECONDARY_ANY | ||
4005 | /* Borrow from the secondary compressor's allocation. */ | ||
4006 | if (copied2 != NULL && *copied2 != NULL && *alloc2 < size) | ||
4007 | { | ||
4008 | *copied1 = *copied2; | ||
4009 | *alloc1 = *alloc2; | ||
4010 | *copied2 = NULL; | ||
4011 | *alloc2 = 0; | ||
4012 | } | ||
4013 | else | ||
4014 | #endif | ||
4015 | { | ||
4016 | *alloc1 = xd3_round_blksize (size, XD3_ALLOCSIZE); | ||
4017 | |||
4018 | if ((*copied1 = xd3_alloc (stream, *alloc1, 1)) == NULL) | ||
4019 | { | ||
4020 | return ENOMEM; | ||
4021 | } | ||
4022 | } | ||
4023 | } | ||
4024 | |||
4025 | return 0; | ||
4026 | } | ||
4027 | |||
4028 | static int | ||
4029 | xd3_decode_section (xd3_stream *stream, | ||
4030 | xd3_desect *section, | ||
4031 | xd3_decode_state nstate, | ||
4032 | int copy) | ||
4033 | { | ||
4034 | XD3_ASSERT (section->pos <= section->size); | ||
4035 | XD3_ASSERT (stream->dec_state != nstate); | ||
4036 | |||
4037 | if (section->pos < section->size) | ||
4038 | { | ||
4039 | usize_t sect_take; | ||
4040 | |||
4041 | if (stream->avail_in == 0) | ||
4042 | { | ||
4043 | return XD3_INPUT; | ||
4044 | } | ||
4045 | |||
4046 | if ((copy == 0) && (section->pos == 0)) | ||
4047 | { | ||
4048 | /* No allocation/copy needed */ | ||
4049 | section->buf = stream->next_in; | ||
4050 | sect_take = section->size; | ||
4051 | } | ||
4052 | else | ||
4053 | { | ||
4054 | usize_t sect_need = section->size - section->pos; | ||
4055 | |||
4056 | /* Allocate and copy */ | ||
4057 | sect_take = min (sect_need, stream->avail_in); | ||
4058 | |||
4059 | if (section->pos == 0) | ||
4060 | { | ||
4061 | int ret; | ||
4062 | |||
4063 | if ((ret = xd3_decode_allocate (stream, | ||
4064 | section->size, | ||
4065 | & section->copied1, | ||
4066 | & section->alloc1, | ||
4067 | & section->copied2, | ||
4068 | & section->alloc2))) { return ret; } | ||
4069 | |||
4070 | section->buf = section->copied1; | ||
4071 | } | ||
4072 | |||
4073 | memcpy (section->copied1 + section->pos, | ||
4074 | stream->next_in, | ||
4075 | sect_take); | ||
4076 | } | ||
4077 | |||
4078 | section->pos += sect_take; | ||
4079 | |||
4080 | stream->dec_winbytes += sect_take; | ||
4081 | |||
4082 | DECODE_INPUT (sect_take); | ||
4083 | } | ||
4084 | |||
4085 | if (section->pos < section->size) | ||
4086 | { | ||
4087 | stream->msg = "further input required"; | ||
4088 | return XD3_INPUT; | ||
4089 | } | ||
4090 | |||
4091 | XD3_ASSERT (section->pos == section->size); | ||
4092 | |||
4093 | stream->dec_state = nstate; | ||
4094 | section->buf_max = section->buf + section->size; | ||
4095 | section->pos = 0; | ||
4096 | return 0; | ||
4097 | } | ||
4098 | |||
4099 | /* Decode the size and address for half of an instruction (i.e., a single opcode). This | ||
4100 | * updates the stream->dec_position, which are bytes already output prior to processing | ||
4101 | * this instruction. Perform bounds checking for sizes and copy addresses, which uses the | ||
4102 | * dec_position (which is why these checks are done here). */ | ||
4103 | static int | ||
4104 | xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst) | ||
4105 | { | ||
4106 | int ret; | ||
4107 | |||
4108 | /* If the size from the instruction table is zero then read a size value. */ | ||
4109 | if ((inst->size == 0) && | ||
4110 | (ret = xd3_read_size (stream, | ||
4111 | & stream->inst_sect.buf, | ||
4112 | stream->inst_sect.buf_max, | ||
4113 | |||
4114 | & inst->size))) | ||
4115 | { | ||
4116 | return EINVAL; | ||
4117 | } | ||
4118 | |||
4119 | /* For copy instructions, read address. */ | ||
4120 | if (inst->type >= XD3_CPY) | ||
4121 | { | ||
4122 | IF_DEBUG1 ({ | ||
4123 | static int cnt = 0; | ||
4124 | P(RINT "DECODE:%u: COPY at %"Q"u (winoffset %u) size %u winaddr %u\n", | ||
4125 | cnt++, | ||
4126 | stream->total_out + (stream->dec_position - stream->dec_cpylen), | ||
4127 | (stream->dec_position - stream->dec_cpylen), | ||
4128 | inst->size, | ||
4129 | inst->addr); | ||
4130 | }); | ||
4131 | |||
4132 | if ((ret = xd3_decode_address (stream, | ||
4133 | stream->dec_position, | ||
4134 | inst->type - XD3_CPY, | ||
4135 | & stream->addr_sect.buf, | ||
4136 | stream->addr_sect.buf_max, | ||
4137 | & inst->addr))) | ||
4138 | { | ||
4139 | return ret; | ||
4140 | } | ||
4141 | |||
4142 | /* Cannot copy an address before it is filled-in. */ | ||
4143 | if (inst->addr >= stream->dec_position) | ||
4144 | { | ||
4145 | stream->msg = "address too large"; | ||
4146 | return EINVAL; | ||
4147 | } | ||
4148 | |||
4149 | /* Check: a VCD_TARGET or VCD_SOURCE copy cannot exceed the remaining buffer space | ||
4150 | * in its own segment. */ | ||
4151 | if (inst->addr < stream->dec_cpylen && inst->addr + inst->size > stream->dec_cpylen) | ||
4152 | { | ||
4153 | stream->msg = "size too large"; | ||
4154 | return EINVAL; | ||
4155 | } | ||
4156 | } | ||
4157 | else | ||
4158 | { | ||
4159 | IF_DEBUG1 ({ | ||
4160 | if (inst->type == XD3_ADD) | ||
4161 | { | ||
4162 | static int cnt; | ||
4163 | P(RINT "DECODE:%d: ADD at %"Q"u (winoffset %u) size %u\n", | ||
4164 | cnt++, | ||
4165 | stream->total_out + stream->dec_position - stream->dec_cpylen, | ||
4166 | stream->dec_position - stream->dec_cpylen, | ||
4167 | inst->size); | ||
4168 | } | ||
4169 | else | ||
4170 | { | ||
4171 | static int cnt; | ||
4172 | XD3_ASSERT (inst->type == XD3_RUN); | ||
4173 | P(RINT "DECODE:%d: RUN at %"Q"u (winoffset %u) size %u\n", | ||
4174 | cnt++, | ||
4175 | stream->total_out + stream->dec_position - stream->dec_cpylen, | ||
4176 | stream->dec_position - stream->dec_cpylen, | ||
4177 | inst->size); | ||
4178 | } | ||
4179 | }); | ||
4180 | } | ||
4181 | |||
4182 | /* Check: The instruction will not overflow the output buffer. */ | ||
4183 | if (stream->dec_position + inst->size > stream->dec_maxpos) | ||
4184 | { | ||
4185 | stream->msg = "size too large"; | ||
4186 | return EINVAL; | ||
4187 | } | ||
4188 | |||
4189 | stream->dec_position += inst->size; | ||
4190 | return 0; | ||
4191 | } | ||
4192 | |||
4193 | /* Decode a single opcode and then decode the two half-instructions. */ | ||
4194 | static int | ||
4195 | xd3_decode_instruction (xd3_stream *stream) | ||
4196 | { | ||
4197 | int ret; | ||
4198 | const xd3_dinst *inst; | ||
4199 | |||
4200 | if (stream->inst_sect.buf == stream->inst_sect.buf_max) | ||
4201 | { | ||
4202 | stream->msg = "instruction underflow"; | ||
4203 | return EINVAL; | ||
4204 | } | ||
4205 | |||
4206 | inst = &stream->code_table[*stream->inst_sect.buf++]; | ||
4207 | |||
4208 | stream->dec_current1.type = inst->type1; | ||
4209 | stream->dec_current2.type = inst->type2; | ||
4210 | stream->dec_current1.size = inst->size1; | ||
4211 | stream->dec_current2.size = inst->size2; | ||
4212 | |||
4213 | /* For each instruction with a real operation, decode the corresponding size and | ||
4214 | * addresses if necessary. Assume a code-table may have NOOP in either position, | ||
4215 | * although this is unlikely. */ | ||
4216 | if (inst->type1 != XD3_NOOP && (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current1))) | ||
4217 | { | ||
4218 | return ret; | ||
4219 | } | ||
4220 | if (inst->type2 != XD3_NOOP && (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current2))) | ||
4221 | { | ||
4222 | return ret; | ||
4223 | } | ||
4224 | return 0; | ||
4225 | } | ||
4226 | |||
4227 | /* Output the result of a single half-instruction. OPT: This the decoder hotspot. */ | ||
4228 | static int | ||
4229 | xd3_decode_output_halfinst (xd3_stream *stream, xd3_hinst *inst) | ||
4230 | { | ||
4231 | /* To make this reentrant, set take = min (inst->size, available space)... */ | ||
4232 | usize_t take = inst->size; | ||
4233 | |||
4234 | XD3_ASSERT (inst->type != XD3_NOOP); | ||
4235 | |||
4236 | switch (inst->type) | ||
4237 | { | ||
4238 | case XD3_RUN: | ||
4239 | { | ||
4240 | /* Only require a single data byte. */ | ||
4241 | if (stream->data_sect.buf == stream->data_sect.buf_max) | ||
4242 | { | ||
4243 | stream->msg = "data underflow"; | ||
4244 | return EINVAL; | ||
4245 | } | ||
4246 | |||
4247 | /* TUNE: Probably want to eliminate memset/memcpy here */ | ||
4248 | memset (stream->next_out + stream->avail_out, | ||
4249 | stream->data_sect.buf[0], | ||
4250 | take); | ||
4251 | |||
4252 | stream->data_sect.buf += 1; | ||
4253 | stream->avail_out += take; | ||
4254 | inst->type = XD3_NOOP; | ||
4255 | break; | ||
4256 | } | ||
4257 | case XD3_ADD: | ||
4258 | { | ||
4259 | /* Require at least TAKE data bytes. */ | ||
4260 | if (stream->data_sect.buf + take > stream->data_sect.buf_max) | ||
4261 | { | ||
4262 | stream->msg = "data underflow"; | ||
4263 | return EINVAL; | ||
4264 | } | ||
4265 | |||
4266 | memcpy (stream->next_out + stream->avail_out, | ||
4267 | stream->data_sect.buf, | ||
4268 | take); | ||
4269 | |||
4270 | stream->data_sect.buf += take; | ||
4271 | stream->avail_out += take; | ||
4272 | inst->type = XD3_NOOP; | ||
4273 | break; | ||
4274 | } | ||
4275 | default: | ||
4276 | { | ||
4277 | usize_t i; | ||
4278 | const uint8_t *src; | ||
4279 | uint8_t *dst; | ||
4280 | |||
4281 | /* See if it copies from the VCD_TARGET/VCD_SOURCE window or the target window. | ||
4282 | * Out-of-bounds checks for the addresses and sizes are performed in | ||
4283 | * xd3_decode_parse_halfinst. */ | ||
4284 | if (inst->addr < stream->dec_cpylen) | ||
4285 | { | ||
4286 | if (stream->dec_win_ind & VCD_TARGET) | ||
4287 | { | ||
4288 | /* For VCD_TARGET we know the entire range is in-memory, as established by | ||
4289 | * decode_setup_buffers. */ | ||
4290 | src = stream->dec_cpyaddrbase + inst->addr; | ||
4291 | inst->type = XD3_NOOP; | ||
4292 | inst->size = 0; | ||
4293 | } | ||
4294 | else | ||
4295 | { | ||
4296 | /* In this case we have to read a source block, which could return control | ||
4297 | * to the caller. We need to know the first block number needed for this | ||
4298 | * copy. */ | ||
4299 | xd3_source *source; | ||
4300 | xoff_t block; | ||
4301 | usize_t blkoff; | ||
4302 | usize_t blksize; | ||
4303 | int ret; | ||
4304 | |||
4305 | more: | ||
4306 | |||
4307 | source = stream->src; | ||
4308 | block = source->cpyoff_blocks; | ||
4309 | blkoff = source->cpyoff_blkoff + inst->addr; | ||
4310 | blksize = source->blksize; | ||
4311 | |||
4312 | while (blkoff >= blksize) | ||
4313 | { | ||
4314 | block += 1; | ||
4315 | blkoff -= blksize; | ||
4316 | } | ||
4317 | |||
4318 | if ((ret = xd3_getblk (stream, block))) | ||
4319 | { | ||
4320 | /* could be a XD3_GETSRCBLK failure. */ | ||
4321 | return ret; | ||
4322 | } | ||
4323 | |||
4324 | src = source->curblk + blkoff; | ||
4325 | |||
4326 | /* This block either contains enough data or the source file is | ||
4327 | * short. */ | ||
4328 | if ((source->onblk != blksize) && (blkoff + take > source->onblk)) | ||
4329 | { | ||
4330 | stream->msg = "source file too short"; | ||
4331 | return EINVAL; | ||
4332 | |||
4333 | } | ||
4334 | |||
4335 | XD3_ASSERT (blkoff != blksize); | ||
4336 | |||
4337 | if (blkoff + take <= blksize) | ||
4338 | { | ||
4339 | inst->type = XD3_NOOP; | ||
4340 | inst->size = 0; | ||
4341 | } | ||
4342 | else | ||
4343 | { | ||
4344 | /* This block doesn't contain all the data, modify the instruction, do | ||
4345 | * not set to XD3_NOOP. */ | ||
4346 | take = blksize - blkoff; | ||
4347 | inst->size -= take; | ||
4348 | inst->addr += take; | ||
4349 | } | ||
4350 | } | ||
4351 | } | ||
4352 | else | ||
4353 | { | ||
4354 | /* For a target-window copy, we know the entire range is in-memory. The | ||
4355 | * dec_tgtaddrbase is negatively offset by dec_cpylen because the addresses | ||
4356 | * start beyond that point. */ | ||
4357 | src = stream->dec_tgtaddrbase + inst->addr; | ||
4358 | inst->type = XD3_NOOP; | ||
4359 | inst->size = 0; | ||
4360 | } | ||
4361 | |||
4362 | dst = stream->next_out + stream->avail_out; | ||
4363 | |||
4364 | stream->avail_out += take; | ||
4365 | |||
4366 | /* Can't just memcpy here due to possible overlap. */ | ||
4367 | for (i = take; i != 0; i -= 1) | ||
4368 | { | ||
4369 | *dst++ = *src++; | ||
4370 | } | ||
4371 | |||
4372 | take = inst->size; | ||
4373 | |||
4374 | /* If there is more to copy, call getblk again. */ | ||
4375 | if (inst->type != XD3_NOOP) | ||
4376 | { | ||
4377 | XD3_ASSERT (take > 0); | ||
4378 | goto more; | ||
4379 | } | ||
4380 | else | ||
4381 | { | ||
4382 | XD3_ASSERT (take == 0); | ||
4383 | } | ||
4384 | } | ||
4385 | } | ||
4386 | |||
4387 | return 0; | ||
4388 | } | ||
4389 | |||
4390 | static int | ||
4391 | xd3_decode_finish_window (xd3_stream *stream) | ||
4392 | { | ||
4393 | stream->dec_winbytes = 0; | ||
4394 | stream->dec_state = DEC_FINISH; | ||
4395 | |||
4396 | stream->data_sect.pos = 0; | ||
4397 | stream->inst_sect.pos = 0; | ||
4398 | stream->addr_sect.pos = 0; | ||
4399 | |||
4400 | return XD3_OUTPUT; | ||
4401 | } | ||
4402 | |||
4403 | static int | ||
4404 | xd3_decode_sections (xd3_stream *stream) | ||
4405 | { | ||
4406 | usize_t need, more, take; | ||
4407 | int copy, ret; | ||
4408 | |||
4409 | if ((stream->flags & XD3_JUST_HDR) != 0) | ||
4410 | { | ||
4411 | /* Nothing left to do. */ | ||
4412 | return xd3_decode_finish_window (stream); | ||
4413 | } | ||
4414 | |||
4415 | /* To avoid copying, need this much data available */ | ||
4416 | need = (stream->inst_sect.size + | ||
4417 | stream->addr_sect.size + | ||
4418 | stream->data_sect.size); | ||
4419 | |||
4420 | /* The window may be entirely processed. */ | ||
4421 | XD3_ASSERT (stream->dec_winbytes <= need); | ||
4422 | |||
4423 | /* Compute how much more input is needed. */ | ||
4424 | more = (need - stream->dec_winbytes); | ||
4425 | |||
4426 | /* How much to consume. */ | ||
4427 | take = min (more, stream->avail_in); | ||
4428 | |||
4429 | /* See if the input is completely available, to avoid copy. */ | ||
4430 | copy = (take != more); | ||
4431 | |||
4432 | /* If the window is skipped... */ | ||
4433 | if ((stream->flags & XD3_SKIP_WINDOW) != 0) | ||
4434 | { | ||
4435 | /* Skip the available input. */ | ||
4436 | DECODE_INPUT (take); | ||
4437 | |||
4438 | stream->dec_winbytes += take; | ||
4439 | |||
4440 | if (copy) | ||
4441 | { | ||
4442 | stream->msg = "further input required"; | ||
4443 | return XD3_INPUT; | ||
4444 | } | ||
4445 | |||
4446 | return xd3_decode_finish_window (stream); | ||
4447 | } | ||
4448 | |||
4449 | /* Process all but the DATA section. */ | ||
4450 | switch (stream->dec_state) | ||
4451 | { | ||
4452 | default: | ||
4453 | stream->msg = "internal error"; | ||
4454 | return EINVAL; | ||
4455 | |||
4456 | case DEC_DATA: | ||
4457 | if ((ret = xd3_decode_section (stream, & stream->data_sect, DEC_INST, copy))) { return ret; } | ||
4458 | case DEC_INST: | ||
4459 | if ((ret = xd3_decode_section (stream, & stream->inst_sect, DEC_ADDR, copy))) { return ret; } | ||
4460 | case DEC_ADDR: | ||
4461 | if ((ret = xd3_decode_section (stream, & stream->addr_sect, DEC_EMIT, copy))) { return ret; } | ||
4462 | } | ||
4463 | |||
4464 | XD3_ASSERT (stream->dec_winbytes == need); | ||
4465 | |||
4466 | #if SECONDARY_ANY | ||
4467 | #define DECODE_SECONDARY_SECTION(UPPER,LOWER) \ | ||
4468 | ((stream->dec_del_ind & VCD_ ## UPPER ## COMP) && \ | ||
4469 | (ret = xd3_decode_secondary (stream, & stream-> LOWER ## _sect, \ | ||
4470 | & xd3_sec_ ## LOWER (stream)))) | ||
4471 | |||
4472 | if (DECODE_SECONDARY_SECTION (DATA, data) || | ||
4473 | DECODE_SECONDARY_SECTION (INST, inst) || | ||
4474 | DECODE_SECONDARY_SECTION (ADDR, addr)) | ||
4475 | { | ||
4476 | return ret; | ||
4477 | } | ||
4478 | #endif | ||
4479 | |||
4480 | if (stream->flags & XD3_SKIP_EMIT) | ||
4481 | { | ||
4482 | return xd3_decode_finish_window (stream); | ||
4483 | } | ||
4484 | |||
4485 | /* OPT: A possible optimization is to avoid allocating memory in decode_setup_buffers | ||
4486 | * and to avoid a large memcpy when the window consists of a single VCD_SOURCE copy | ||
4487 | * instruction. The only potential problem is if the following window is a VCD_TARGET, | ||
4488 | * then you need to remember... */ | ||
4489 | if ((ret = xd3_decode_setup_buffers (stream))) { return ret; } | ||
4490 | |||
4491 | return 0; | ||
4492 | } | ||
4493 | |||
4494 | static int | ||
4495 | xd3_decode_emit (xd3_stream *stream) | ||
4496 | { | ||
4497 | int ret; | ||
4498 | |||
4499 | /* Produce output: originally structured to allow reentrant code that fills as much of | ||
4500 | * the output buffer as possible, but VCDIFF semantics allows to copy from anywhere from | ||
4501 | * the target window, so instead allocate a sufficiently sized buffer after the target | ||
4502 | * window length is decoded. | ||
4503 | * | ||
4504 | * This code still needs to be reentrant to allow XD3_GETSRCBLK to return control. This | ||
4505 | * is handled by setting the stream->dec_currentN instruction types to XD3_NOOP after | ||
4506 | * they have been processed. */ | ||
4507 | XD3_ASSERT (! (stream->flags & XD3_SKIP_EMIT)); | ||
4508 | XD3_ASSERT (stream->avail_out == 0); | ||
4509 | XD3_ASSERT (stream->dec_tgtlen <= stream->space_out); | ||
4510 | |||
4511 | while (stream->inst_sect.buf != stream->inst_sect.buf_max) | ||
4512 | { | ||
4513 | /* Decode next instruction pair. */ | ||
4514 | if ((stream->dec_current1.type == XD3_NOOP) && | ||
4515 | (stream->dec_current2.type == XD3_NOOP) && | ||
4516 | (ret = xd3_decode_instruction (stream))) { return ret; } | ||
4517 | |||
4518 | /* Output for each instruction. */ | ||
4519 | if ((stream->dec_current1.type != XD3_NOOP) && | ||
4520 | (ret = xd3_decode_output_halfinst (stream, & stream->dec_current1))) { return ret; } | ||
4521 | |||
4522 | if ((stream->dec_current2.type != XD3_NOOP) && | ||
4523 | (ret = xd3_decode_output_halfinst (stream, & stream->dec_current2))) { return ret; } | ||
4524 | } | ||
4525 | |||
4526 | if (stream->avail_out != stream->dec_tgtlen) | ||
4527 | { | ||
4528 | IF_DEBUG1 (P(RINT "AVAIL_OUT(%d) != DEC_TGTLEN(%d)\n", stream->avail_out, stream->dec_tgtlen)); | ||
4529 | stream->msg = "wrong window length"; | ||
4530 | return EINVAL; | ||
4531 | } | ||
4532 | |||
4533 | if (stream->data_sect.buf != stream->data_sect.buf_max) | ||
4534 | { | ||
4535 | stream->msg = "extra data section"; | ||
4536 | return EINVAL; | ||
4537 | } | ||
4538 | |||
4539 | if (stream->addr_sect.buf != stream->addr_sect.buf_max) | ||
4540 | { | ||
4541 | stream->msg = "extra address section"; | ||
4542 | return EINVAL; | ||
4543 | } | ||
4544 | |||
4545 | /* OPT: Should cksum computation be combined with the above loop? */ | ||
4546 | if ((stream->dec_win_ind & VCD_ADLER32) != 0 && | ||
4547 | (stream->flags & XD3_ADLER32_NOVER) == 0) | ||
4548 | { | ||
4549 | uint32_t a32 = adler32 (1L, stream->next_out, stream->avail_out); | ||
4550 | |||
4551 | if (a32 != stream->dec_adler32) | ||
4552 | { | ||
4553 | stream->msg = "target window checksum mismatch"; | ||
4554 | return EINVAL; | ||
4555 | } | ||
4556 | } | ||
4557 | |||
4558 | /* Finished with a window. */ | ||
4559 | return xd3_decode_finish_window (stream); | ||
4560 | } | ||
4561 | |||
4562 | int | ||
4563 | xd3_decode_input (xd3_stream *stream) | ||
4564 | { | ||
4565 | int ret; | ||
4566 | |||
4567 | if (stream->enc_state != 0) | ||
4568 | { | ||
4569 | stream->msg = "encoder/decoder transition"; | ||
4570 | return EINVAL; | ||
4571 | } | ||
4572 | |||
4573 | #define BYTE_CASE(expr,x,nstate) \ | ||
4574 | do { \ | ||
4575 | if ( (expr) && \ | ||
4576 | ((ret = xd3_decode_byte (stream, & (x))) != 0) ) { return ret; } \ | ||
4577 | stream->dec_state = (nstate); \ | ||
4578 | } while (0) | ||
4579 | |||
4580 | #define OFFSET_CASE(expr,x,nstate) \ | ||
4581 | do { \ | ||
4582 | if ( (expr) && \ | ||
4583 | ((ret = xd3_decode_offset (stream, & (x))) != 0) ) { return ret; } \ | ||
4584 | stream->dec_state = (nstate); \ | ||
4585 | } while (0) | ||
4586 | |||
4587 | #define SIZE_CASE(expr,x,nstate) \ | ||
4588 | do { \ | ||
4589 | if ( (expr) && \ | ||
4590 | ((ret = xd3_decode_size (stream, & (x))) != 0) ) { return ret; } \ | ||
4591 | stream->dec_state = (nstate); \ | ||
4592 | } while (0) | ||
4593 | |||
4594 | #define SRCORTGT(x) (((x) & VCD_SRCORTGT) == VCD_SOURCE || \ | ||
4595 | ((x) & VCD_SRCORTGT) == VCD_TARGET) | ||
4596 | |||
4597 | switch (stream->dec_state) | ||
4598 | { | ||
4599 | case DEC_VCHEAD: | ||
4600 | { | ||
4601 | if ((ret = xd3_decode_bytes (stream, stream->dec_magic, & stream->dec_magicbytes, 4))) { return ret; } | ||
4602 | |||
4603 | if (stream->dec_magic[0] != VCDIFF_MAGIC1 || | ||
4604 | stream->dec_magic[1] != VCDIFF_MAGIC2 || | ||
4605 | stream->dec_magic[2] != VCDIFF_MAGIC3) | ||
4606 | { | ||
4607 | stream->msg = "not a VCDIFF input"; | ||
4608 | return EINVAL; | ||
4609 | } | ||
4610 | |||
4611 | if (stream->dec_magic[3] != 0) | ||
4612 | { | ||
4613 | stream->msg = "VCDIFF input version > 0 is not supported"; | ||
4614 | return EINVAL; | ||
4615 | } | ||
4616 | |||
4617 | stream->dec_state = DEC_HDRIND; | ||
4618 | } | ||
4619 | case DEC_HDRIND: | ||
4620 | { | ||
4621 | if ((ret = xd3_decode_byte (stream, & stream->dec_hdr_ind))) { return ret; } | ||
4622 | |||
4623 | if ((stream->dec_hdr_ind & VCD_INVHDR) != 0) | ||
4624 | { | ||
4625 | stream->msg = "unrecognized header indicator bits set"; | ||
4626 | return EINVAL; | ||
4627 | } | ||
4628 | |||
4629 | stream->dec_state = DEC_SECONDID; | ||
4630 | } | ||
4631 | |||
4632 | case DEC_SECONDID: | ||
4633 | /* Secondary compressor ID: only if VCD_SECONDARY is set */ | ||
4634 | if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0) | ||
4635 | { | ||
4636 | BYTE_CASE (1, stream->dec_secondid, DEC_TABLEN); | ||
4637 | |||
4638 | switch (stream->dec_secondid) | ||
4639 | { | ||
4640 | case VCD_FGK_ID: | ||
4641 | FGK_CASE (stream); | ||
4642 | case VCD_DJW_ID: | ||
4643 | DJW_CASE (stream); | ||
4644 | default: | ||
4645 | stream->msg = "unknown secondary compressor ID"; | ||
4646 | return EINVAL; | ||
4647 | } | ||
4648 | } | ||
4649 | |||
4650 | case DEC_TABLEN: | ||
4651 | /* Length of code table data: only if VCD_CODETABLE is set */ | ||
4652 | SIZE_CASE ((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->dec_codetblsz, DEC_NEAR); | ||
4653 | |||
4654 | /* The codetblsz counts the two NEAR/SAME bytes */ | ||
4655 | if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) { | ||
4656 | if (stream->dec_codetblsz <= 2) { | ||
4657 | stream->msg = "invalid code table size"; | ||
4658 | return ENOMEM; | ||
4659 | } | ||
4660 | stream->dec_codetblsz -= 2; | ||
4661 | } | ||
4662 | case DEC_NEAR: | ||
4663 | /* Near modes: only if VCD_CODETABLE is set */ | ||
4664 | BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->acache.s_near, DEC_SAME); | ||
4665 | case DEC_SAME: | ||
4666 | /* Same modes: only if VCD_CODETABLE is set */ | ||
4667 | BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->acache.s_same, DEC_TABDAT); | ||
4668 | case DEC_TABDAT: | ||
4669 | /* Compressed code table data */ | ||
4670 | |||
4671 | if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) | ||
4672 | { | ||
4673 | /* Get the code table data. */ | ||
4674 | if ((stream->dec_codetbl == NULL) && | ||
4675 | (stream->dec_codetbl = xd3_alloc (stream, stream->dec_codetblsz, 1)) == NULL) { return ENOMEM; } | ||
4676 | |||
4677 | if ((ret = xd3_decode_bytes (stream, stream->dec_codetbl, & stream->dec_codetblbytes, stream->dec_codetblsz))) | ||
4678 | { | ||
4679 | return ret; | ||
4680 | } | ||
4681 | |||
4682 | if ((ret = xd3_apply_table_encoding (stream, stream->dec_codetbl, stream->dec_codetblbytes))) | ||
4683 | { | ||
4684 | return ret; | ||
4685 | } | ||
4686 | } | ||
4687 | else | ||
4688 | { | ||
4689 | /* Use the default table. */ | ||
4690 | stream->acache.s_near = __rfc3284_code_table_desc.near_modes; | ||
4691 | stream->acache.s_same = __rfc3284_code_table_desc.same_modes; | ||
4692 | stream->code_table = xd3_rfc3284_code_table (); | ||
4693 | } | ||
4694 | |||
4695 | if ((ret = xd3_alloc_cache (stream))) { return ret; } | ||
4696 | |||
4697 | stream->dec_state = DEC_APPLEN; | ||
4698 | |||
4699 | case DEC_APPLEN: | ||
4700 | /* Length of application data */ | ||
4701 | SIZE_CASE((stream->dec_hdr_ind & VCD_APPHEADER) != 0, stream->dec_appheadsz, DEC_APPDAT); | ||
4702 | |||
4703 | case DEC_APPDAT: | ||
4704 | /* Application data */ | ||
4705 | if (stream->dec_hdr_ind & VCD_APPHEADER) | ||
4706 | { | ||
4707 | /* Note: we add an additional byte for padding, to allow 0-termination. */ | ||
4708 | if ((stream->dec_appheader == NULL) && | ||
4709 | (stream->dec_appheader = xd3_alloc (stream, stream->dec_appheadsz+1, 1)) == NULL) { return ENOMEM; } | ||
4710 | |||
4711 | stream->dec_appheader[stream->dec_appheadsz] = 0; | ||
4712 | |||
4713 | if ((ret = xd3_decode_bytes (stream, stream->dec_appheader, & stream->dec_appheadbytes, stream->dec_appheadsz))) | ||
4714 | { | ||
4715 | return ret; | ||
4716 | } | ||
4717 | } | ||
4718 | |||
4719 | stream->dec_hdrsize = stream->total_in; | ||
4720 | stream->dec_state = DEC_WININD; | ||
4721 | |||
4722 | case DEC_WININD: | ||
4723 | { | ||
4724 | /* Start of a window: the window indicator */ | ||
4725 | |||
4726 | if ((ret = xd3_decode_byte (stream, & stream->dec_win_ind))) { return ret; } | ||
4727 | |||
4728 | stream->current_window = stream->dec_window_count; | ||
4729 | |||
4730 | if (XOFF_T_OVERFLOW (stream->dec_winstart, stream->dec_tgtlen)) | ||
4731 | { | ||
4732 | stream->msg = "decoder file offset overflow"; | ||
4733 | return EINVAL; | ||
4734 | } | ||
4735 | |||
4736 | stream->dec_winstart += stream->dec_tgtlen; | ||
4737 | |||
4738 | if ((stream->dec_win_ind & VCD_INVWIN) != 0) | ||
4739 | { | ||
4740 | stream->msg = "unrecognized window indicator bits set"; | ||
4741 | return EINVAL; | ||
4742 | } | ||
4743 | |||
4744 | if ((ret = xd3_decode_init_window (stream))) { return ret; } | ||
4745 | |||
4746 | stream->dec_state = DEC_CPYLEN; | ||
4747 | |||
4748 | IF_DEBUG1 (P(RINT "--------- TARGET WINDOW %"Q"u ------------------\n", stream->current_window)); | ||
4749 | } | ||
4750 | |||
4751 | case DEC_CPYLEN: | ||
4752 | /* Copy window length: only if VCD_SOURCE or VCD_TARGET is set */ | ||
4753 | SIZE_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpylen, DEC_CPYOFF); | ||
4754 | |||
4755 | /* Set the initial, logical decoder position (HERE address) in dec_position. This | ||
4756 | * is set to just after the source/copy window, as we are just about to output the | ||
4757 | * first byte of target window. */ | ||
4758 | stream->dec_position = stream->dec_cpylen; | ||
4759 | |||
4760 | case DEC_CPYOFF: | ||
4761 | /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */ | ||
4762 | OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff, DEC_ENCLEN); | ||
4763 | |||
4764 | /* Copy offset and copy length may not overflow. */ | ||
4765 | if (XOFF_T_OVERFLOW (stream->dec_cpyoff, stream->dec_cpylen)) | ||
4766 | { | ||
4767 | stream->msg = "decoder copy window overflows a file offset"; | ||
4768 | return EINVAL; | ||
4769 | } | ||
4770 | |||
4771 | /* Check copy window bounds: VCD_TARGET window may not exceed current position. */ | ||
4772 | if ((stream->dec_win_ind & VCD_TARGET) && | ||
4773 | (stream->dec_cpyoff + (xoff_t) stream->dec_cpylen > stream->dec_winstart)) | ||
4774 | { | ||
4775 | stream->msg = "VCD_TARGET window out of bounds"; | ||
4776 | return EINVAL; | ||
4777 | } | ||
4778 | |||
4779 | case DEC_ENCLEN: | ||
4780 | /* Length of the delta encoding */ | ||
4781 | SIZE_CASE(1, stream->dec_enclen, DEC_TGTLEN); | ||
4782 | case DEC_TGTLEN: | ||
4783 | /* Length of target window */ | ||
4784 | SIZE_CASE(1, stream->dec_tgtlen, DEC_DELIND); | ||
4785 | |||
4786 | /* Set the maximum decoder position, beyond which we should not decode any data. | ||
4787 | * This is the maximum value for dec_position. This may not exceed the size of a | ||
4788 | * usize_t. */ | ||
4789 | if (USIZE_T_OVERFLOW (stream->dec_cpylen, stream->dec_tgtlen)) | ||
4790 | { | ||
4791 | stream->msg = "decoder target window overflows a usize_t"; | ||
4792 | return EINVAL; | ||
4793 | } | ||
4794 | |||
4795 | /* Check for malicious files. */ | ||
4796 | if (stream->dec_tgtlen > XD3_HARDMAXWINSIZE) | ||
4797 | { | ||
4798 | stream->msg = "hard window size exceeded"; | ||
4799 | return EINVAL; | ||
4800 | } | ||
4801 | |||
4802 | stream->dec_maxpos = stream->dec_cpylen + stream->dec_tgtlen; | ||
4803 | |||
4804 | case DEC_DELIND: | ||
4805 | /* Delta indicator */ | ||
4806 | BYTE_CASE(1, stream->dec_del_ind, DEC_DATALEN); | ||
4807 | |||
4808 | if ((stream->dec_del_ind & VCD_INVDEL) != 0) | ||
4809 | { | ||
4810 | stream->msg = "unrecognized delta indicator bits set"; | ||
4811 | return EINVAL; | ||
4812 | } | ||
4813 | |||
4814 | /* Delta indicator is only used with secondary compression. */ | ||
4815 | if ((stream->dec_del_ind != 0) && (stream->sec_type == NULL)) | ||
4816 | { | ||
4817 | stream->msg = "invalid delta indicator bits set"; | ||
4818 | return EINVAL; | ||
4819 | } | ||
4820 | |||
4821 | /* Section lengths */ | ||
4822 | case DEC_DATALEN: | ||
4823 | SIZE_CASE(1, stream->data_sect.size, DEC_INSTLEN); | ||
4824 | case DEC_INSTLEN: | ||
4825 | SIZE_CASE(1, stream->inst_sect.size, DEC_ADDRLEN); | ||
4826 | case DEC_ADDRLEN: | ||
4827 | SIZE_CASE(1, stream->addr_sect.size, DEC_CKSUM); | ||
4828 | |||
4829 | case DEC_CKSUM: | ||
4830 | /* Window checksum. */ | ||
4831 | if ((stream->dec_win_ind & VCD_ADLER32) != 0) | ||
4832 | { | ||
4833 | int i; | ||
4834 | |||
4835 | if ((ret = xd3_decode_bytes (stream, stream->dec_cksum, & stream->dec_cksumbytes, 4))) { return ret; } | ||
4836 | |||
4837 | for (i = 0; i < 4; i += 1) | ||
4838 | { | ||
4839 | stream->dec_adler32 = (stream->dec_adler32 << 8) | stream->dec_cksum[i]; | ||
4840 | } | ||
4841 | } | ||
4842 | |||
4843 | stream->dec_state = DEC_DATA; | ||
4844 | |||
4845 | /* Check dec_enclen for redundency, otherwise it is not really used. */ | ||
4846 | { | ||
4847 | usize_t enclen_check = (1 + (xd3_sizeof_size (stream->dec_tgtlen) + | ||
4848 | xd3_sizeof_size (stream->data_sect.size) + | ||
4849 | xd3_sizeof_size (stream->inst_sect.size) + | ||
4850 | xd3_sizeof_size (stream->addr_sect.size)) + | ||
4851 | stream->data_sect.size + | ||
4852 | stream->inst_sect.size + | ||
4853 | stream->addr_sect.size + | ||
4854 | ((stream->dec_win_ind & VCD_ADLER32) ? 4 : 0)); | ||
4855 | |||
4856 | if (stream->dec_enclen != enclen_check) | ||
4857 | { | ||
4858 | stream->msg = "incorrect encoding length (redundent)"; | ||
4859 | return EINVAL; | ||
4860 | } | ||
4861 | } | ||
4862 | |||
4863 | /* Returning here gives the application a chance to inspect the header, skip the | ||
4864 | * window, etc. */ | ||
4865 | if (stream->current_window == 0) { return XD3_GOTHEADER; } | ||
4866 | else { return XD3_WINSTART; } | ||
4867 | |||
4868 | case DEC_DATA: | ||
4869 | case DEC_INST: | ||
4870 | case DEC_ADDR: | ||
4871 | /* Next read the three sections. */ | ||
4872 | if ((ret = xd3_decode_sections (stream))) { return ret; } | ||
4873 | |||
4874 | case DEC_EMIT: | ||
4875 | |||
4876 | /* To speed VCD_SOURCE block-address calculations, the source cpyoff_blocks and | ||
4877 | * cpyoff_blkoff are pre-computed. */ | ||
4878 | if (stream->dec_win_ind & VCD_SOURCE) | ||
4879 | { | ||
4880 | xd3_source *src = stream->src; | ||
4881 | |||
4882 | if (src == NULL) | ||
4883 | { | ||
4884 | stream->msg = "source input required"; | ||
4885 | return EINVAL; | ||
4886 | } | ||
4887 | |||
4888 | src->cpyoff_blocks = stream->dec_cpyoff / src->blksize; | ||
4889 | src->cpyoff_blkoff = stream->dec_cpyoff % src->blksize; | ||
4890 | } | ||
4891 | |||
4892 | /* xd3_decode_emit returns XD3_OUTPUT on every success. */ | ||
4893 | if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT) | ||
4894 | { | ||
4895 | stream->total_out += (xoff_t) stream->avail_out; | ||
4896 | } | ||
4897 | |||
4898 | return ret; | ||
4899 | |||
4900 | case DEC_FINISH: | ||
4901 | { | ||
4902 | if (stream->dec_win_ind & VCD_TARGET) | ||
4903 | { | ||
4904 | if (stream->dec_lastwin == NULL) | ||
4905 | { | ||
4906 | stream->dec_lastwin = stream->next_out; | ||
4907 | stream->dec_lastspace = stream->space_out; | ||
4908 | } | ||
4909 | else | ||
4910 | { | ||
4911 | xd3_swap_uint8p (& stream->dec_lastwin, & stream->next_out); | ||
4912 | xd3_swap_usize_t (& stream->dec_lastspace, & stream->space_out); | ||
4913 | } | ||
4914 | } | ||
4915 | |||
4916 | stream->dec_lastlen = stream->dec_tgtlen; | ||
4917 | stream->dec_laststart = stream->dec_winstart; | ||
4918 | stream->dec_window_count += 1; | ||
4919 | |||
4920 | /* Note: the updates to dec_winstart & current_window are deferred until after the | ||
4921 | * next DEC_WININD byte is read. */ | ||
4922 | stream->dec_state = DEC_WININD; | ||
4923 | return XD3_WINFINISH; | ||
4924 | } | ||
4925 | |||
4926 | default: | ||
4927 | stream->msg = "invalid state"; | ||
4928 | return EINVAL; | ||
4929 | } | ||
4930 | } | ||
4931 | |||
4932 | /****************************************************************************************** | ||
4933 | String matching helpers | ||
4934 | ******************************************************************************************/ | ||
4935 | |||
4936 | #if XD3_ENCODER | ||
4937 | /* Do the initial xd3_string_match() checksum table setup. Allocations are delayed until | ||
4938 | * first use to avoid allocation sometimes (e.g., perfect matches, zero-length inputs). */ | ||
4939 | static int | ||
4940 | xd3_string_match_init (xd3_stream *stream) | ||
4941 | { | ||
4942 | const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); | ||
4943 | const int DO_LARGE = (stream->src != NULL); | ||
4944 | |||
4945 | if (DO_SMALL) | ||
4946 | { | ||
4947 | /* Subsequent calls can return immediately after checking reset. */ | ||
4948 | if (stream->small_table != NULL) | ||
4949 | { | ||
4950 | /* The target hash table is reinitialized once per window. */ | ||
4951 | if (stream->small_reset) | ||
4952 | { | ||
4953 | stream->small_reset = 0; | ||
4954 | memset (stream->small_table, 0, sizeof (usize_t) * stream->small_hash.size); | ||
4955 | } | ||
4956 | |||
4957 | return 0; | ||
4958 | } | ||
4959 | |||
4960 | if ((stream->small_table = xd3_alloc0 (stream, stream->small_hash.size, sizeof (usize_t))) == NULL) | ||
4961 | { | ||
4962 | return ENOMEM; | ||
4963 | } | ||
4964 | |||
4965 | /* If there is a previous table needed. */ | ||
4966 | if (stream->small_chain > 1) | ||
4967 | { | ||
4968 | xd3_slist *p, *m; | ||
4969 | |||
4970 | if ((stream->small_prev = xd3_alloc (stream, stream->sprevsz, sizeof (xd3_slist))) == NULL) | ||
4971 | { | ||
4972 | return ENOMEM; | ||
4973 | } | ||
4974 | |||
4975 | /* Initialize circular lists. */ | ||
4976 | for (p = stream->small_prev, m = stream->small_prev + stream->sprevsz; p != m; p += 1) | ||
4977 | { | ||
4978 | p->next = p; | ||
4979 | p->prev = p; | ||
4980 | } | ||
4981 | } | ||
4982 | } | ||
4983 | |||
4984 | if (DO_LARGE && stream->large_table == NULL) | ||
4985 | { | ||
4986 | if ((stream->large_table = xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL) | ||
4987 | { | ||
4988 | return ENOMEM; | ||
4989 | } | ||
4990 | } | ||
4991 | |||
4992 | return 0; | ||
4993 | } | ||
4994 | |||
4995 | /* Called at every entrance to the string-match loop and each time | ||
4996 | * stream->input_position the value returned as *next_move_point. | ||
4997 | * This function computes more source checksums to advance the window. */ | ||
4998 | static int | ||
4999 | xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) | ||
5000 | { | ||
5001 | // The input offset at which the source should ideally be scanned | ||
5002 | xoff_t logical_input_cksum_pos = stream->total_in + pos_in + stream->srcwin_size; | ||
5003 | |||
5004 | if (stream->srcwin_cksum_pos >= stream->src->size) | ||
5005 | { | ||
5006 | *next_move_point = USIZE_T_MAX; | ||
5007 | return 0; | ||
5008 | } | ||
5009 | |||
5010 | if (stream->srcwin_cksum_pos > logical_input_cksum_pos) | ||
5011 | { | ||
5012 | *next_move_point = stream->srcwin_cksum_pos - logical_input_cksum_pos; | ||
5013 | return 0; | ||
5014 | } | ||
5015 | |||
5016 | IF_DEBUG1 (P(RINT "[move_p1] size=%d T=%"Q"d S=%"Q"d\n", stream->srcwin_size, | ||
5017 | stream->total_in + pos_in, stream->srcwin_cksum_pos)); | ||
5018 | |||
5019 | *next_move_point = pos_in + stream->srcwin_size; | ||
5020 | |||
5021 | if (stream->srcwin_cksum_pos == 0) | ||
5022 | { | ||
5023 | // Two windows to start with | ||
5024 | logical_input_cksum_pos += stream->srcwin_size; | ||
5025 | } | ||
5026 | else | ||
5027 | { | ||
5028 | // Otherwise double and add | ||
5029 | stream->srcwin_size = min(stream->srcwin_maxsz, stream->srcwin_size * 2); | ||
5030 | logical_input_cksum_pos += stream->srcwin_size; | ||
5031 | } | ||
5032 | |||
5033 | while (stream->srcwin_cksum_pos < logical_input_cksum_pos && | ||
5034 | stream->srcwin_cksum_pos < stream->src->size) | ||
5035 | { | ||
5036 | xoff_t blkno = stream->srcwin_cksum_pos / stream->src->blksize; | ||
5037 | usize_t blkoff = stream->srcwin_cksum_pos % stream->src->blksize; | ||
5038 | usize_t onblk = xd3_bytes_on_srcblk (stream->src, blkno); | ||
5039 | int ret; | ||
5040 | |||
5041 | if (blkoff + stream->large_look >= onblk) | ||
5042 | { | ||
5043 | /* Next block */ | ||
5044 | stream->srcwin_cksum_pos = (blkno * stream->src->blksize) + onblk; | ||
5045 | continue; | ||
5046 | } | ||
5047 | |||
5048 | if ((ret = xd3_getblk (stream, blkno))) | ||
5049 | { | ||
5050 | return ret; | ||
5051 | } | ||
5052 | |||
5053 | usize_t diff = logical_input_cksum_pos - stream->srcwin_cksum_pos; | ||
5054 | |||
5055 | onblk = min(onblk, diff + blkoff + stream->large_look); | ||
5056 | |||
5057 | while (blkoff + stream->large_look <= onblk) | ||
5058 | { | ||
5059 | uint32_t cksum = xd3_lcksum (stream->src->curblk + blkoff, stream->large_look); | ||
5060 | usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum); | ||
5061 | |||
5062 | stream->large_table[hval] = stream->srcwin_cksum_pos + HASH_CKOFFSET; | ||
5063 | |||
5064 | blkoff += stream->large_step; | ||
5065 | stream->srcwin_cksum_pos += stream->large_step; | ||
5066 | IF_DEBUG (stream->large_ckcnt += 1); | ||
5067 | } | ||
5068 | } | ||
5069 | |||
5070 | IF_DEBUG1 (P(RINT "[move_p2] size=%d T=%"Q"d S=%"Q"d next_move=%d\n", stream->srcwin_size, | ||
5071 | stream->total_in + pos_in, stream->srcwin_cksum_pos, *next_move_point)); | ||
5072 | |||
5073 | return 0; | ||
5074 | } | ||
5075 | |||
5076 | /* This function sets up the stream->src fields srcbase, srclen. The call is delayed | ||
5077 | * until these values are needed to encode a copy address. At this point the decision has | ||
5078 | * to be made. */ | ||
5079 | static int | ||
5080 | xd3_srcwin_setup (xd3_stream *stream) | ||
5081 | { | ||
5082 | xd3_source *src = stream->src; | ||
5083 | xoff_t length; | ||
5084 | |||
5085 | IF_DEBUG1 (P(RINT "[srcwin setup:%"Q"u] iopt buffer %s\n", | ||
5086 | stream->current_window, | ||
5087 | stream->enc_state < ENC_FLUSH ? "overflow" : "fit")); | ||
5088 | |||
5089 | /* Check the undecided state. */ | ||
5090 | XD3_ASSERT (src->srclen == 0 && src->srcbase == 0); | ||
5091 | |||
5092 | /* Avoid repeating this call. */ | ||
5093 | stream->srcwin_decided = 1; | ||
5094 | |||
5095 | /* If the stream is flushing, then the iopt buffer was able to contain the complete | ||
5096 | * encoding. If no copies were issued no source window is actually needed. This | ||
5097 | * prevents the VCDIFF header from including source base/len. xd3_emit_hdr checks | ||
5098 | * for srclen == 0. */ | ||
5099 | if (stream->enc_state == ENC_FLUSH && stream->match_maxaddr == 0) | ||
5100 | { | ||
5101 | goto done; | ||
5102 | } | ||
5103 | |||
5104 | /* Check for overflow, srclen is usize_t - this can't happen unless XD3_DEFAULT_SRCBACK | ||
5105 | * and related parameters are extreme - should use smaller windows. */ | ||
5106 | length = stream->match_maxaddr - stream->match_minaddr; | ||
5107 | |||
5108 | if (length > (xoff_t) USIZE_T_MAX) | ||
5109 | { | ||
5110 | stream->msg = "source window length overflow (not 64bit)"; | ||
5111 | return EINVAL; | ||
5112 | } | ||
5113 | |||
5114 | /* If ENC_FLUSH, then we know the exact source window to use because no more copies can | ||
5115 | * be issued. */ | ||
5116 | if (stream->enc_state == ENC_FLUSH) | ||
5117 | { | ||
5118 | src->srcbase = stream->match_minaddr; | ||
5119 | src->srclen = (usize_t) length; | ||
5120 | XD3_ASSERT (src->srclen); | ||
5121 | goto done; | ||
5122 | } | ||
5123 | |||
5124 | /* Otherwise, we have to make a guess. More copies may still be issued, but we have to | ||
5125 | * decide the source window base and length now. */ | ||
5126 | src->srcbase = stream->match_minaddr; | ||
5127 | src->srclen = max ((usize_t) length, stream->avail_in + (stream->avail_in >> 2)); | ||
5128 | if (src->size < src->srcbase + (xoff_t) src->srclen) | ||
5129 | { | ||
5130 | /* Could reduce srcbase, as well. */ | ||
5131 | src->srclen = src->size - src->srcbase; | ||
5132 | } | ||
5133 | |||
5134 | XD3_ASSERT (src->srclen); | ||
5135 | done: | ||
5136 | IF_DEBUG1 (P(RINT "[srcwin setup:%"Q"u] base %"Q"u size %u\n", | ||
5137 | stream->current_window, | ||
5138 | src->srcbase, | ||
5139 | src->srclen)); | ||
5140 | /* Set the taroff. This convenience variable is used even when stream->src == NULL. */ | ||
5141 | stream->taroff = src->srclen; | ||
5142 | return 0; | ||
5143 | } | ||
5144 | |||
5145 | /* Sets the bounding region for a newly discovered source match, prior to calling | ||
5146 | * xd3_source_extend_match(). This sets the match_maxfwd, match_maxback variables. Note: | ||
5147 | * srcpos is an absolute position (xoff_t) but the match_maxfwd, match_maxback variables | ||
5148 | * are usize_t. Returns 0 if the setup succeeds, or 1 if the source position lies outside | ||
5149 | * an already-decided srcbase/srclen window. */ | ||
5150 | static int | ||
5151 | xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos) | ||
5152 | { | ||
5153 | xd3_source *src = stream->src; | ||
5154 | usize_t greedy_or_not; | ||
5155 | xoff_t farthest_src; | ||
5156 | |||
5157 | stream->match_maxback = 0; | ||
5158 | stream->match_maxfwd = 0; | ||
5159 | stream->match_back = 0; | ||
5160 | stream->match_fwd = 0; | ||
5161 | |||
5162 | farthest_src = max(stream->srcwin_cksum_pos, srcpos); | ||
5163 | |||
5164 | XD3_ASSERT (stream->srcwin_maxsz > src->blksize); | ||
5165 | |||
5166 | /* This prevents the encoder from seeking back more than srcwin_maxsz. Using | ||
5167 | * srcwin_maxsz is incorrect. TODO: Possibly an new option here, how far back to | ||
5168 | * seek? */ | ||
5169 | if (max_in == 0 || | ||
5170 | farthest_src - srcpos > stream->srcwin_maxsz - src->blksize) | ||
5171 | { | ||
5172 | goto bad; // TODO! Note: this prevents catching the TODO/bug below | ||
5173 | } | ||
5174 | |||
5175 | /* TODO: check for boundary crossing */ | ||
5176 | |||
5177 | /* Going backwards, the 1.5-pass algorithm allows some already-matched input may be | ||
5178 | * covered by a longer source match. The greedy algorithm does not allow this. */ | ||
5179 | if (stream->flags & XD3_BEGREEDY) | ||
5180 | { | ||
5181 | /* The greedy algorithm allows backward matching to the last matched position. */ | ||
5182 | greedy_or_not = xd3_iopt_last_matched (stream); | ||
5183 | } | ||
5184 | else | ||
5185 | { | ||
5186 | /* The 1.5-pass algorithm allows backward matching to go back as far as the | ||
5187 | * unencoded offset, which is updated as instructions pass out of the iopt buffer. | ||
5188 | * If this (default) is chosen, it means xd3_iopt_erase may be called to eliminate | ||
5189 | * instructions when a covering source match is found. */ | ||
5190 | greedy_or_not = stream->unencoded_offset; | ||
5191 | } | ||
5192 | |||
5193 | /* Backward target match limit. */ | ||
5194 | XD3_ASSERT (pos_in >= greedy_or_not); | ||
5195 | stream->match_maxback = pos_in - greedy_or_not; | ||
5196 | |||
5197 | /* Forward target match limit. */ | ||
5198 | XD3_ASSERT (max_in > pos_in); | ||
5199 | stream->match_maxfwd = max_in - pos_in; | ||
5200 | |||
5201 | /* Now we take the source position into account. It depends whether the srclen/srcbase | ||
5202 | * have been decided yet. */ | ||
5203 | if (stream->srcwin_decided == 0) | ||
5204 | { | ||
5205 | /* Unrestricted case: the match can cover the entire source, 0--src->size. We | ||
5206 | * compare the usize_t match_maxfwd/match_maxback against the xoff_t src->size/srcpos values | ||
5207 | * and take the min. */ | ||
5208 | xoff_t srcavail; | ||
5209 | |||
5210 | if (srcpos < (xoff_t) stream->match_maxback) | ||
5211 | { | ||
5212 | stream->match_maxback = srcpos; | ||
5213 | } | ||
5214 | |||
5215 | srcavail = src->size - srcpos; | ||
5216 | if (srcavail < (xoff_t) stream->match_maxfwd) | ||
5217 | { | ||
5218 | stream->match_maxfwd = srcavail; | ||
5219 | } | ||
5220 | |||
5221 | goto good; | ||
5222 | } | ||
5223 | |||
5224 | /* Decided some source window. */ | ||
5225 | XD3_ASSERT (src->srclen > 0); | ||
5226 | |||
5227 | /* Restricted case: fail if the srcpos lies outside the source window */ | ||
5228 | if ((srcpos < src->srcbase) || (srcpos > (src->srcbase + (xoff_t) src->srclen))) | ||
5229 | { | ||
5230 | goto bad; | ||
5231 | } | ||
5232 | else | ||
5233 | { | ||
5234 | usize_t srcavail; | ||
5235 | |||
5236 | srcavail = (usize_t) (srcpos - src->srcbase); | ||
5237 | if (srcavail < stream->match_maxback) | ||
5238 | { | ||
5239 | stream->match_maxback = srcavail; | ||
5240 | } | ||
5241 | |||
5242 | srcavail = (usize_t) (src->srcbase + (xoff_t) src->srclen - srcpos); | ||
5243 | if (srcavail < stream->match_maxfwd) { | ||
5244 | stream->match_maxfwd = srcavail; | ||
5245 | } | ||
5246 | |||
5247 | goto good; | ||
5248 | } | ||
5249 | |||
5250 | good: | ||
5251 | stream->match_state = MATCH_BACKWARD; | ||
5252 | stream->match_srcpos = srcpos; | ||
5253 | return 0; | ||
5254 | |||
5255 | bad: | ||
5256 | stream->match_state = MATCH_SEARCHING; | ||
5257 | return 1; | ||
5258 | } | ||
5259 | |||
5260 | /* This function expands the source match backward and forward. It is reentrant, since | ||
5261 | * xd3_getblk may return XD3_GETSRCBLK, so most variables are kept in xd3_stream. There | ||
5262 | * are two callers of this function, the string_matching routine when a checksum match is | ||
5263 | * discovered, and xd3_encode_input whenever a continuing (or initial) match is suspected. | ||
5264 | * The two callers do different things with the input_position, thus this function leaves | ||
5265 | * that variable untouched. If a match is taken the resulting stream->match_fwd is left | ||
5266 | * non-zero. */ | ||
5267 | static int | ||
5268 | xd3_source_extend_match (xd3_stream *stream) | ||
5269 | { | ||
5270 | int ret; | ||
5271 | xd3_source *src = stream->src; | ||
5272 | xoff_t matchoff; /* matchoff is the current right/left-boundary of the source match being tested. */ | ||
5273 | usize_t streamoff; /* streamoff is the current right/left-boundary of the input match being tested. */ | ||
5274 | xoff_t tryblk; /* tryblk, tryoff are the block, offset position of matchoff */ | ||
5275 | usize_t tryoff; | ||
5276 | usize_t tryrem; /* tryrem is the number of matchable bytes on the source block */ | ||
5277 | |||
5278 | XD3_ASSERT (src != NULL); | ||
5279 | |||
5280 | /* Does it make sense to compute backward match AFTER forward match? */ | ||
5281 | if (stream->match_state == MATCH_BACKWARD) | ||
5282 | { | ||
5283 | /* Note: this code is practically duplicated below, substituting | ||
5284 | * match_fwd/match_back and direction. Consolidate? */ | ||
5285 | matchoff = stream->match_srcpos - stream->match_back; | ||
5286 | streamoff = pos_in - stream->match_back; | ||
5287 | tryblk = matchoff / src->blksize; | ||
5288 | tryoff = matchoff % src->blksize; | ||
5289 | |||
5290 | /* this loops backward over source blocks */ | ||
5291 | while (stream->match_back < stream->match_maxback) | ||
5292 | { | ||
5293 | /* see if we're backing across a source block boundary */ | ||
5294 | if (tryoff == 0) | ||
5295 | { | ||
5296 | tryoff = src->blksize; | ||
5297 | tryblk -= 1; | ||
5298 | } | ||
5299 | |||
5300 | if ((ret = xd3_getblk (stream, tryblk))) | ||
5301 | { | ||
5302 | /* could be a XD3_GETSRCBLK failure. */ | ||
5303 | return ret; | ||
5304 | } | ||
5305 | |||
5306 | /* OPT: This code can be optimized. */ | ||
5307 | for (tryrem = min (tryoff, stream->match_maxback - stream->match_back); | ||
5308 | tryrem != 0; | ||
5309 | tryrem -= 1, stream->match_back += 1) | ||
5310 | { | ||
5311 | if (src->curblk[tryoff-1] != stream->next_in[streamoff-1]) | ||
5312 | { | ||
5313 | goto doneback; | ||
5314 | } | ||
5315 | |||
5316 | tryoff -= 1; | ||
5317 | streamoff -= 1; | ||
5318 | } | ||
5319 | } | ||
5320 | |||
5321 | doneback: | ||
5322 | stream->match_state = MATCH_FORWARD; | ||
5323 | } | ||
5324 | |||
5325 | XD3_ASSERT (stream->match_state == MATCH_FORWARD); | ||
5326 | |||
5327 | matchoff = stream->match_srcpos + stream->match_fwd; | ||
5328 | streamoff = pos_in + stream->match_fwd; | ||
5329 | tryblk = matchoff / src->blksize; | ||
5330 | tryoff = matchoff % src->blksize; | ||
5331 | |||
5332 | /* Note: practically the same code as backwards case above: same comments */ | ||
5333 | while (stream->match_fwd < stream->match_maxfwd) | ||
5334 | { | ||
5335 | if ((ret = xd3_getblk (stream, tryblk))) | ||
5336 | { | ||
5337 | return ret; | ||
5338 | } | ||
5339 | |||
5340 | /* There's a good speedup for doing word comparions: see zlib. */ | ||
5341 | for (tryrem = min(stream->match_maxfwd - stream->match_fwd, | ||
5342 | src->blksize - tryoff); | ||
5343 | tryrem != 0; | ||
5344 | tryrem -= 1, stream->match_fwd += 1) | ||
5345 | { | ||
5346 | if (src->curblk[tryoff] != stream->next_in[streamoff]) | ||
5347 | { | ||
5348 | goto donefwd; | ||
5349 | } | ||
5350 | |||
5351 | tryoff += 1; | ||
5352 | streamoff += 1; | ||
5353 | } | ||
5354 | |||
5355 | if (tryoff == src->blksize) | ||
5356 | { | ||
5357 | tryoff = 0; | ||
5358 | tryblk += 1; | ||
5359 | } | ||
5360 | } | ||
5361 | |||
5362 | donefwd: | ||
5363 | stream->match_state = MATCH_SEARCHING; | ||
5364 | |||
5365 | /* Now decide whether to take the match. There are several ways to answer this | ||
5366 | * question and this is likely the best answer. There is currently an assertion | ||
5367 | * in xd3_iopt_erase that checks whether min_match works. This variable maintains | ||
5368 | * that every match exceeds the end of the previous match. However, it is | ||
5369 | * possible that match_back allows us to find a match that goes a long way back | ||
5370 | * but not enough forward. We could try an alternate approach, which might help | ||
5371 | * or it might just be extra complexity: eliminate the next match_fwd >= min_match | ||
5372 | * test and call xd3_iopt_erase right away. Erase instructions as far as it goes | ||
5373 | * back, then either remember what was deleted and re-insert it, or count on the | ||
5374 | * string-matching algorithm to find that match again. I think it is more | ||
5375 | * worthwhile to implement large_hash duplicates. */ | ||
5376 | if (stream->match_fwd < min_match) | ||
5377 | { | ||
5378 | stream->match_fwd = 0; | ||
5379 | } | ||
5380 | else | ||
5381 | { | ||
5382 | usize_t total = stream->match_fwd + stream->match_back; | ||
5383 | xoff_t match_end; | ||
5384 | |||
5385 | /* Correct the variables to remove match_back from the equation. */ | ||
5386 | stream->input_position -= stream->match_back; | ||
5387 | stream->match_srcpos -= stream->match_back; | ||
5388 | stream->match_fwd += stream->match_back; | ||
5389 | match_end = stream->match_srcpos + stream->match_fwd; | ||
5390 | |||
5391 | /* At this point we may have to erase any iopt-buffer instructions that are | ||
5392 | * fully covered by a backward-extending copy. */ | ||
5393 | if (stream->match_back > 0) | ||
5394 | { | ||
5395 | xd3_iopt_erase (stream, pos_in, total); | ||
5396 | } | ||
5397 | |||
5398 | stream->match_back = 0; | ||
5399 | |||
5400 | /* Update ranges. The first source match occurs with both values set to 0. */ | ||
5401 | if (stream->match_maxaddr == 0 || | ||
5402 | stream->match_srcpos < stream->match_minaddr) | ||
5403 | { | ||
5404 | stream->match_minaddr = stream->match_srcpos; | ||
5405 | } | ||
5406 | |||
5407 | if (match_end > stream->match_maxaddr) | ||
5408 | { | ||
5409 | stream->match_maxaddr = match_end; | ||
5410 | } | ||
5411 | |||
5412 | IF_DEBUG1 ({ | ||
5413 | static int x = 0; | ||
5414 | P(RINT "[source match:%d] <inp %"Q"u %"Q"u> <src %"Q"u %"Q"u> (%s) [ %u bytes ]\n", | ||
5415 | x++, | ||
5416 | stream->total_in + pos_in, | ||
5417 | stream->total_in + pos_in + stream->match_fwd, | ||
5418 | stream->match_srcpos, | ||
5419 | stream->match_srcpos + stream->match_fwd, | ||
5420 | (stream->total_in + stream->input_position == stream->match_srcpos) ? "same" : "diff", | ||
5421 | stream->match_fwd); | ||
5422 | }); | ||
5423 | |||
5424 | if ((ret = xd3_found_match (stream, | ||
5425 | /* decoder position */ pos_in, | ||
5426 | /* length */ stream->match_fwd, | ||
5427 | /* address */ stream->match_srcpos, | ||
5428 | /* is_source */ 1))) | ||
5429 | { | ||
5430 | return ret; | ||
5431 | } | ||
5432 | |||
5433 | // TODO: ideally, we would update srcwin_cksum_pos to avoid computing checksums in | ||
5434 | // the middle of an already-discovered long match. | ||
5435 | |||
5436 | /* If the match ends with the available input: */ | ||
5437 | if (pos_in + stream->match_fwd == max_in) | ||
5438 | { | ||
5439 | /* Setup continuing match for the next window. */ | ||
5440 | stream->match_state = MATCH_TARGET; | ||
5441 | stream->match_srcpos += stream->match_fwd; | ||
5442 | } | ||
5443 | } | ||
5444 | |||
5445 | return 0; | ||
5446 | } | ||
5447 | |||
5448 | /* Update the small hash. Values in the small_table are offset by HASH_CKOFFSET (1) to | ||
5449 | * distinguish empty buckets the zero offset. This maintains the previous linked lists. | ||
5450 | * If owrite is true then this entry is replacing the existing record, otherwise it is | ||
5451 | * merely being called to promote the existing record in the hash bucket (for the same | ||
5452 | * address cache). */ | ||
5453 | static void | ||
5454 | xd3_scksum_insert (xd3_stream *stream, usize_t inx, usize_t scksum, usize_t pos) | ||
5455 | { | ||
5456 | /* If we are maintaining previous links. */ | ||
5457 | if (stream->small_prev) | ||
5458 | { | ||
5459 | usize_t last_pos = stream->small_table[inx]; | ||
5460 | xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask]; | ||
5461 | xd3_slist *prev = pos_list->prev; | ||
5462 | xd3_slist *next = pos_list->next; | ||
5463 | |||
5464 | /* Assert link structure, update pos, cksum */ | ||
5465 | XD3_ASSERT (prev->next == pos_list); | ||
5466 | XD3_ASSERT (next->prev == pos_list); | ||
5467 | pos_list->pos = pos; | ||
5468 | pos_list->scksum = scksum; | ||
5469 | |||
5470 | /* Subtract HASH_CKOFFSET and test for a previous offset. */ | ||
5471 | if (last_pos-- != 0) | ||
5472 | { | ||
5473 | xd3_slist *last_list = & stream->small_prev[last_pos & stream->sprevmask]; | ||
5474 | xd3_slist *last_next; | ||
5475 | |||
5476 | /* Verify existing entry. */ | ||
5477 | SMALL_HASH_DEBUG1 (stream, stream->next_in + last_pos); | ||
5478 | SMALL_HASH_DEBUG2 (stream, stream->next_in + pos); | ||
5479 | |||
5480 | /* The two positions (mod sprevsz) may have the same checksum, making the old | ||
5481 | * and new entries the same. That is why the removal step is not before the | ||
5482 | * above if-stmt. */ | ||
5483 | if (last_list != pos_list) | ||
5484 | { | ||
5485 | /* Remove current position from any list it may belong to. */ | ||
5486 | next->prev = prev; | ||
5487 | prev->next = next; | ||
5488 | |||
5489 | /* The ordinary case, add current position to last_list. */ | ||
5490 | last_next = last_list->next; | ||
5491 | |||
5492 | pos_list->next = last_next; | ||
5493 | pos_list->prev = last_list; | ||
5494 | |||
5495 | last_next->prev = pos_list; | ||
5496 | last_list->next = pos_list; | ||
5497 | } | ||
5498 | } | ||
5499 | else | ||
5500 | { | ||
5501 | /* Remove current position from any list it may belong to. */ | ||
5502 | next->prev = prev; | ||
5503 | prev->next = next; | ||
5504 | |||
5505 | /* Re-initialize current position. */ | ||
5506 | pos_list->next = pos_list; | ||
5507 | pos_list->prev = pos_list; | ||
5508 | } | ||
5509 | } | ||
5510 | |||
5511 | /* Enter the new position into the hash bucket. */ | ||
5512 | stream->small_table[inx] = pos + HASH_CKOFFSET; | ||
5513 | } | ||
5514 | |||
5515 | #if XD3_DEBUG | ||
5516 | static int | ||
5517 | xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0, | ||
5518 | const uint8_t *inp_max, usize_t cmp_len) | ||
5519 | { | ||
5520 | int i; | ||
5521 | |||
5522 | for (i = 0; i < cmp_len; i += 1) | ||
5523 | { | ||
5524 | XD3_ASSERT (ref0[i] == inp0[i]); | ||
5525 | } | ||
5526 | |||
5527 | if (inp0 + cmp_len < inp_max) | ||
5528 | { | ||
5529 | XD3_ASSERT (inp0[i] != ref0[i]); | ||
5530 | } | ||
5531 | |||
5532 | return 1; | ||
5533 | } | ||
5534 | #endif /* XD3_DEBUG */ | ||
5535 | |||
5536 | /* When the hash table indicates a possible small string match, it calls this routine to | ||
5537 | * find the best match. The first matching position is taken from the small_table, | ||
5538 | * HASH_CKOFFSET is subtracted to get the actual position. After checking that match, if | ||
5539 | * previous linked lists are in use (because stream->small_chain > 1), previous matches | ||
5540 | * are tested searching for the longest match. If (min_match > MIN_MATCH) then a lazy | ||
5541 | * match is in effect. | ||
5542 | * | ||
5543 | * OPT: This is by far the most expensive function. The slowdown is in part due to the data | ||
5544 | * structure it maintains, which is relatively more expensive than it needs to be (in | ||
5545 | * comparison to zlib) in order to support the PROMOTE decision, which is to prefer the | ||
5546 | * most recently used matching address of a certain string to aid the VCDIFF same cache. | ||
5547 | * | ||
5548 | * Weak reasoning? it's time to modularize this routine...? Let's say the PROMOTE | ||
5549 | * feature supported by this slow data structure contributes around 2% improvement in | ||
5550 | * compressed size, is there a better code table that doesn't use the SAME address cache, | ||
5551 | * for which the speedup-discount could produce a better encoding? | ||
5552 | */ | ||
5553 | static /*inline*/ usize_t | ||
5554 | xd3_smatch (xd3_stream *stream, usize_t base, usize_t scksum, usize_t *match_offset) | ||
5555 | { | ||
5556 | usize_t cmp_len; | ||
5557 | usize_t match_length = 0; | ||
5558 | usize_t chain = (min_match == MIN_MATCH ? | ||
5559 | stream->small_chain : | ||
5560 | stream->small_lchain); | ||
5561 | xd3_slist *current = NULL; | ||
5562 | xd3_slist *first = NULL; | ||
5563 | const uint8_t *inp_max = stream->next_in + max_in; | ||
5564 | const uint8_t *inp; | ||
5565 | const uint8_t *ref; | ||
5566 | |||
5567 | SMALL_HASH_STATS (usize_t search_cnt = 0); | ||
5568 | SMALL_HASH_DEBUG1 (stream, stream->next_in + pos_in); | ||
5569 | SMALL_HASH_STATS (stream->sh_searches += 1); | ||
5570 | |||
5571 | XD3_ASSERT (min_match + pos_in <= max_in); | ||
5572 | |||
5573 | base -= HASH_CKOFFSET; | ||
5574 | |||
5575 | /* Initialize the chain. */ | ||
5576 | if (stream->small_prev != NULL) | ||
5577 | { | ||
5578 | first = current = & stream->small_prev[base & stream->sprevmask]; | ||
5579 | |||
5580 | /* Check if current->pos is correct. */ | ||
5581 | if (current->pos != base) { goto done; } | ||
5582 | } | ||
5583 | |||
5584 | again: | ||
5585 | |||
5586 | SMALL_HASH_STATS (search_cnt += 1); | ||
5587 | |||
5588 | /* For small matches, we can always go to the end-of-input because the matching position | ||
5589 | * must be less than the input position. */ | ||
5590 | XD3_ASSERT (base < pos_in); | ||
5591 | |||
5592 | ref = stream->next_in + base; | ||
5593 | inp = stream->next_in + pos_in; | ||
5594 | |||
5595 | SMALL_HASH_DEBUG2 (stream, ref); | ||
5596 | |||
5597 | /* Expand potential match forward. */ | ||
5598 | while (inp < inp_max && *inp == *ref) | ||
5599 | { | ||
5600 | ++inp; | ||
5601 | ++ref; | ||
5602 | } | ||
5603 | |||
5604 | cmp_len = inp - (stream->next_in + pos_in); | ||
5605 | |||
5606 | /* Verify correctness */ | ||
5607 | XD3_ASSERT (xd3_check_smatch (stream->next_in + base, stream->next_in + pos_in, | ||
5608 | inp_max, cmp_len)); | ||
5609 | |||
5610 | /* Update longest match */ | ||
5611 | if (cmp_len > match_length) | ||
5612 | { | ||
5613 | ( match_length) = cmp_len; | ||
5614 | (*match_offset) = base; | ||
5615 | |||
5616 | /* Stop if we match the entire input or discover a long_enough match. */ | ||
5617 | if (inp == inp_max || cmp_len >= stream->long_enough) | ||
5618 | { | ||
5619 | goto done; | ||
5620 | } | ||
5621 | } | ||
5622 | |||
5623 | /* If we have not reached the chain limit, see if there is another previous position. */ | ||
5624 | if (current) | ||
5625 | { | ||
5626 | while (--chain != 0) | ||
5627 | { | ||
5628 | /* Calculate the next base offset. */ | ||
5629 | current = current->prev; | ||
5630 | base = current->pos; | ||
5631 | |||
5632 | /* Stop if the next position was the first. Stop if the position is wrong | ||
5633 | * (because the lists are not re-initialized across input windows). Skip if the | ||
5634 | * scksum is wrong. */ | ||
5635 | if (current != first && base < pos_in) | ||
5636 | { | ||
5637 | if (current->scksum != scksum) | ||
5638 | { | ||
5639 | continue; | ||
5640 | } | ||
5641 | goto again; | ||
5642 | } | ||
5643 | } | ||
5644 | } | ||
5645 | |||
5646 | done: | ||
5647 | SMALL_HASH_STATS (stream->sh_compares += search_cnt); | ||
5648 | return match_length; | ||
5649 | } | ||
5650 | |||
5651 | #if XD3_DEBUG | ||
5652 | static void | ||
5653 | xd3_verify_small_state (xd3_stream *stream, | ||
5654 | const uint8_t *inp, | ||
5655 | uint32_t x_cksum) | ||
5656 | { | ||
5657 | uint32_t cksum = xd3_scksum (inp, stream->small_look); | ||
5658 | |||
5659 | XD3_ASSERT (cksum == x_cksum); | ||
5660 | } | ||
5661 | |||
5662 | static void | ||
5663 | xd3_verify_large_state (xd3_stream *stream, | ||
5664 | const uint8_t *inp, | ||
5665 | uint32_t x_cksum) | ||
5666 | { | ||
5667 | uint32_t cksum = xd3_lcksum (inp, stream->large_look); | ||
5668 | |||
5669 | XD3_ASSERT (cksum == x_cksum); | ||
5670 | } | ||
5671 | |||
5672 | static void | ||
5673 | xd3_verify_run_state (xd3_stream *stream, | ||
5674 | const uint8_t *inp, | ||
5675 | int x_run_l, | ||
5676 | uint8_t x_run_c) | ||
5677 | { | ||
5678 | int slook = stream->small_look; | ||
5679 | uint8_t run_c; | ||
5680 | int run_l = xd3_comprun (inp, slook, &run_c); | ||
5681 | |||
5682 | XD3_ASSERT (run_l == 0 || run_c == x_run_c); | ||
5683 | XD3_ASSERT (x_run_l > slook || run_l == x_run_l); | ||
5684 | } | ||
5685 | #endif /* XD3_DEBUG */ | ||
5686 | #endif /* XD3_ENCODER */ | ||
5687 | |||
5688 | /****************************************************************************************** | ||
5689 | TEMPLATE pass | ||
5690 | ******************************************************************************************/ | ||
5691 | |||
5692 | #endif /* __XDELTA3_C_INLINE_PASS__ */ | ||
5693 | #ifdef __XDELTA3_C_TEMPLATE_PASS__ | ||
5694 | |||
5695 | #if XD3_ENCODER | ||
5696 | |||
5697 | /****************************************************************************************** | ||
5698 | Templates | ||
5699 | ******************************************************************************************/ | ||
5700 | |||
5701 | /* Template macros: less than 30 lines work. the template parameters appear as, e.g., | ||
5702 | * SLOOK, MIN_MATCH, TRYLAZY, etc. */ | ||
5703 | #define XD3_TEMPLATE(x) XD3_TEMPLATE2(x,TEMPLATE) | ||
5704 | #define XD3_TEMPLATE2(x,n) XD3_TEMPLATE3(x,n) | ||
5705 | #define XD3_TEMPLATE3(x,n) x ## n | ||
5706 | #define XD3_STRINGIFY(x) XD3_STRINGIFY2(x) | ||
5707 | #define XD3_STRINGIFY2(x) #x | ||
5708 | |||
5709 | static int XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream); | ||
5710 | |||
5711 | static const xd3_smatcher XD3_TEMPLATE(__smatcher_) = | ||
5712 | { | ||
5713 | XD3_STRINGIFY(TEMPLATE), | ||
5714 | XD3_TEMPLATE(xd3_string_match_), | ||
5715 | #if SOFTCFG == 1 | ||
5716 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | ||
5717 | #else | ||
5718 | LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, SSMATCH, TRYLAZY, MAXLAZY, | ||
5719 | LONGENOUGH, PROMOTE | ||
5720 | #endif | ||
5721 | }; | ||
5722 | |||
5723 | static int | ||
5724 | XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream) | ||
5725 | { | ||
5726 | /* TODO config: These next three variables should be statically compliled in various | ||
5727 | * scan_cfg configurations? */ | ||
5728 | const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); | ||
5729 | const int DO_LARGE = (stream->src != NULL); | ||
5730 | const int DO_RUN = (1); | ||
5731 | |||
5732 | const uint8_t *inp; | ||
5733 | uint32_t scksum = 0; | ||
5734 | uint32_t lcksum = 0; | ||
5735 | usize_t sinx; | ||
5736 | usize_t linx; | ||
5737 | uint8_t run_c; | ||
5738 | int run_l; | ||
5739 | int ret; | ||
5740 | usize_t match_length; | ||
5741 | usize_t match_offset; // Note: "may be unused" warnings are bogus | ||
5742 | usize_t next_move_point; | ||
5743 | |||
5744 | /* If there will be no compression due to settings or short input, skip it entirely. */ | ||
5745 | if (! (DO_SMALL || DO_LARGE || DO_RUN) || pos_in + SLOOK > max_in) { goto loopnomore; } | ||
5746 | |||
5747 | if ((ret = xd3_string_match_init (stream))) { return ret; } | ||
5748 | |||
5749 | /* The restartloop label is reached when the incremental loop state needs to be | ||
5750 | * reset. */ | ||
5751 | restartloop: | ||
5752 | |||
5753 | /* If there is not enough input remaining for any kind of match, skip it. */ | ||
5754 | if (pos_in + SLOOK > max_in) { goto loopnomore; } | ||
5755 | |||
5756 | IF_DEBUG1 ({ | ||
5757 | static int x = 0; | ||
5758 | P(RINT "[string match:%d] pos_in %d; \n", | ||
5759 | x++, pos_in); | ||
5760 | }); | ||
5761 | |||
5762 | /* Now reset the incremental loop state: */ | ||
5763 | |||
5764 | /* The min_match variable is updated to avoid matching the same lazy match over and over | ||
5765 | * again. For example, if you find a (small) match of length 9 at one position, you | ||
5766 | * will likely find a match of length 8 at the next position. */ | ||
5767 | min_match = MIN_MATCH; | ||
5768 | |||
5769 | /* The current input byte. */ | ||
5770 | inp = stream->next_in + pos_in; | ||
5771 | |||
5772 | /* Small match state. */ | ||
5773 | if (DO_SMALL) | ||
5774 | { | ||
5775 | scksum = xd3_scksum (inp, SLOOK); | ||
5776 | } | ||
5777 | |||
5778 | /* Run state. */ | ||
5779 | if (DO_RUN) | ||
5780 | { | ||
5781 | run_l = xd3_comprun (inp, SLOOK, & run_c); | ||
5782 | } | ||
5783 | |||
5784 | /* Large match state. We continue the loop even after not enough bytes for LLOOK | ||
5785 | * remain, so always check pos_in in DO_LARGE code. */ | ||
5786 | if (DO_LARGE && (pos_in + LLOOK <= max_in)) | ||
5787 | { | ||
5788 | /* Source window: next_move_point is the point that pos_in must reach before | ||
5789 | * computing more source checksum. */ | ||
5790 | if ((ret = xd3_srcwin_move_point (stream, & next_move_point))) | ||
5791 | { | ||
5792 | return ret; | ||
5793 | } | ||
5794 | |||
5795 | lcksum = xd3_lcksum (inp, LLOOK); | ||
5796 | } | ||
5797 | |||
5798 | /* TRYLAZYLEN: True if a certain length match should be followed by lazy search. This | ||
5799 | * checks that LEN is shorter than MAXLAZY and that there is enough leftover data to | ||
5800 | * consider lazy matching. "Enough" is set to 2 since the next match will start at the | ||
5801 | * next offset, it must match two extra characters. */ | ||
5802 | #define TRYLAZYLEN(LEN,POS,MAX) ((TRYLAZY && (LEN) < MAXLAZY) && ((POS) + (LEN) <= (MAX) - 2)) | ||
5803 | |||
5804 | /* HANDLELAZY: This statement is called each time an instruciton is emitted (three | ||
5805 | * cases). If the instruction is large enough, the loop is restarted, otherwise lazy | ||
5806 | * matching may ensue. */ | ||
5807 | #define HANDLELAZY(mlen) \ | ||
5808 | if (TRYLAZYLEN ((mlen), pos_in, max_in)) \ | ||
5809 | { min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \ | ||
5810 | else \ | ||
5811 | { pos_in += (mlen); goto restartloop; } | ||
5812 | |||
5813 | /* Now loop over one input byte at a time until a match is found... */ | ||
5814 | for (;; inp += 1, pos_in += 1) | ||
5815 | { | ||
5816 | /* Now we try three kinds of string match in order of expense: | ||
5817 | * run, large match, small match. */ | ||
5818 | |||
5819 | /* Expand the start of a RUN. The test for (run_l == SLOOK) avoids repeating this | ||
5820 | * check when we pass through a run area performing lazy matching. The run is only | ||
5821 | * expanded once when the min_match is first reached. If lazy matching is | ||
5822 | * performed, the run_l variable will remain inconsistent until the first | ||
5823 | * non-running input character is reached, at which time the run_l may then again | ||
5824 | * grow to SLOOK. */ | ||
5825 | if (DO_RUN && run_l == SLOOK) | ||
5826 | { | ||
5827 | usize_t max_len = max_in - pos_in; | ||
5828 | |||
5829 | IF_DEBUG (xd3_verify_run_state (stream, inp, run_l, run_c)); | ||
5830 | |||
5831 | while (run_l < max_len && inp[run_l] == run_c) { run_l += 1; } | ||
5832 | |||
5833 | /* Output a RUN instruction. */ | ||
5834 | if (run_l >= min_match && run_l >= MIN_RUN) | ||
5835 | { | ||
5836 | if ((ret = xd3_emit_run (stream, pos_in, run_l, run_c))) { return ret; } | ||
5837 | |||
5838 | HANDLELAZY (run_l); | ||
5839 | } | ||
5840 | } | ||
5841 | |||
5842 | /* If there is enough input remaining. */ | ||
5843 | if (DO_LARGE && (pos_in + LLOOK <= max_in)) | ||
5844 | { | ||
5845 | if ((pos_in >= next_move_point) && | ||
5846 | (ret = xd3_srcwin_move_point (stream, & next_move_point))) | ||
5847 | { | ||
5848 | return ret; | ||
5849 | } | ||
5850 | |||
5851 | linx = xd3_checksum_hash (& stream->large_hash, lcksum); | ||
5852 | |||
5853 | IF_DEBUG (xd3_verify_large_state (stream, inp, lcksum)); | ||
5854 | |||
5855 | /* Note: To handle large checksum duplicates, this code should be rearranged to | ||
5856 | * resemble the small_match case more. But how much of the code can be truly | ||
5857 | * shared? The main difference is the need for xd3_source_extend_match to work | ||
5858 | * outside of xd3_string_match, in the case where inputs are identical. */ | ||
5859 | if (unlikely (stream->large_table[linx] != 0)) | ||
5860 | { | ||
5861 | /* the match_setup will fail if the source window has been decided and the | ||
5862 | * match lies outside it. You could consider forcing a window at this point | ||
5863 | * to permit a new source window. */ | ||
5864 | if (xd3_source_match_setup (stream, stream->large_table[linx] - HASH_CKOFFSET) == 0) | ||
5865 | { | ||
5866 | if ((ret = xd3_source_extend_match (stream))) { return ret; } | ||
5867 | |||
5868 | /* Update stream position. match_fwd is zero if no match. */ | ||
5869 | if (stream->match_fwd > 0) | ||
5870 | { | ||
5871 | HANDLELAZY (stream->match_fwd); | ||
5872 | } | ||
5873 | } | ||
5874 | } | ||
5875 | } | ||
5876 | |||
5877 | /* Small matches. */ | ||
5878 | if (DO_SMALL) | ||
5879 | { | ||
5880 | sinx = xd3_checksum_hash (& stream->small_hash, scksum); | ||
5881 | |||
5882 | /* Verify incremental state in debugging mode. */ | ||
5883 | IF_DEBUG (xd3_verify_small_state (stream, inp, scksum)); | ||
5884 | |||
5885 | /* Search for the longest match */ | ||
5886 | if (unlikely (stream->small_table[sinx] != 0)) | ||
5887 | { | ||
5888 | match_length = xd3_smatch (stream, | ||
5889 | stream->small_table[sinx], | ||
5890 | scksum, | ||
5891 | & match_offset); | ||
5892 | } | ||
5893 | else | ||
5894 | { | ||
5895 | match_length = 0; | ||
5896 | } | ||
5897 | |||
5898 | /* Insert a hash for this string. */ | ||
5899 | xd3_scksum_insert (stream, sinx, scksum, pos_in); | ||
5900 | |||
5901 | /* Promote the previous match address to head of the hash bucket. This is | ||
5902 | * intended to improve the same cache hit rate. */ | ||
5903 | if (match_length != 0 && PROMOTE) | ||
5904 | { | ||
5905 | xd3_scksum_insert (stream, sinx, scksum, match_offset); | ||
5906 | } | ||
5907 | |||
5908 | /* Maybe output a COPY instruction */ | ||
5909 | if (unlikely (match_length >= min_match)) | ||
5910 | { | ||
5911 | IF_DEBUG1 ({ | ||
5912 | static int x = 0; | ||
5913 | P(RINT "[target match:%d] <inp %u %u> <cpy %u %u> (-%d) [ %u bytes ]\n", | ||
5914 | x++, | ||
5915 | pos_in, | ||
5916 | pos_in + match_length, | ||
5917 | match_offset, | ||
5918 | match_offset + match_length, | ||
5919 | pos_in - match_offset, | ||
5920 | match_length); | ||
5921 | }); | ||
5922 | |||
5923 | if ((ret = xd3_found_match (stream, | ||
5924 | /* decoder position */ pos_in, | ||
5925 | /* length */ match_length, | ||
5926 | /* address */ match_offset, | ||
5927 | /* is_source */ 0))) { return ret; } | ||
5928 | |||
5929 | /* SSMATCH option: search small matches: continue the incremental checksum | ||
5930 | * through the matched material. Only if not lazy matching. */ | ||
5931 | if (SSMATCH && !TRYLAZYLEN (match_length, pos_in, max_in)) | ||
5932 | { | ||
5933 | usize_t avail = max_in - SLOOK - pos_in; | ||
5934 | usize_t ml_m1 = match_length - 1; | ||
5935 | usize_t right; | ||
5936 | int aincr; | ||
5937 | |||
5938 | IF_DEBUG (usize_t nposi = pos_in + match_length); | ||
5939 | |||
5940 | /* Avail is the last offset we can compute an incremental cksum. If the | ||
5941 | * match length exceeds that offset then we are finished performing | ||
5942 | * incremental updates after this step. */ | ||
5943 | if (ml_m1 < avail) | ||
5944 | { | ||
5945 | right = ml_m1; | ||
5946 | aincr = 1; | ||
5947 | } | ||
5948 | else | ||
5949 | { | ||
5950 | right = avail; | ||
5951 | aincr = 0; | ||
5952 | } | ||
5953 | |||
5954 | /* Compute incremental checksums within the match. */ | ||
5955 | while (right > 0) | ||
5956 | { | ||
5957 | SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); | ||
5958 | if (DO_LARGE && (pos_in + LLOOK < max_in)) { | ||
5959 | LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK); | ||
5960 | } | ||
5961 | |||
5962 | inp += 1; | ||
5963 | pos_in += 1; | ||
5964 | right -= 1; | ||
5965 | sinx = xd3_checksum_hash (& stream->small_hash, scksum); | ||
5966 | |||
5967 | IF_DEBUG (xd3_verify_small_state (stream, inp, scksum)); | ||
5968 | |||
5969 | xd3_scksum_insert (stream, sinx, scksum, pos_in); | ||
5970 | } | ||
5971 | |||
5972 | if (aincr) | ||
5973 | { | ||
5974 | /* Keep searching... */ | ||
5975 | if (DO_RUN) { run_l = xd3_comprun (inp+1, SLOOK-1, & run_c); } | ||
5976 | XD3_ASSERT (nposi == pos_in + 1); | ||
5977 | XD3_ASSERT (pos_in + SLOOK < max_in); | ||
5978 | min_match = MIN_MATCH; | ||
5979 | goto updatesure; | ||
5980 | } | ||
5981 | else | ||
5982 | { | ||
5983 | /* Not enough input for another match. */ | ||
5984 | XD3_ASSERT (pos_in + SLOOK >= max_in); | ||
5985 | goto loopnomore; | ||
5986 | } | ||
5987 | } | ||
5988 | |||
5989 | /* Else case: copy instruction, but no SSMATCH. */ | ||
5990 | HANDLELAZY (match_length); | ||
5991 | } | ||
5992 | } | ||
5993 | |||
5994 | /* The logic above prevents excess work during lazy matching by increasing min_match | ||
5995 | * to avoid smaller matches. Each time we advance pos_in by one, the minimum match | ||
5996 | * shortens as well. */ | ||
5997 | if (min_match > MIN_MATCH) | ||
5998 | { | ||
5999 | min_match -= 1; | ||
6000 | } | ||
6001 | |||
6002 | updateone: | ||
6003 | |||
6004 | /* See if there are no more incremental cksums to compute. */ | ||
6005 | if (pos_in + SLOOK == max_in) | ||
6006 | { | ||
6007 | goto loopnomore; | ||
6008 | } | ||
6009 | |||
6010 | updatesure: | ||
6011 | |||
6012 | /* Compute next RUN, CKSUM */ | ||
6013 | if (DO_RUN) { NEXTRUN (inp[SLOOK]); } | ||
6014 | if (DO_SMALL) { SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); } | ||
6015 | if (DO_LARGE && (pos_in + LLOOK < max_in)) { LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK); } | ||
6016 | } | ||
6017 | |||
6018 | loopnomore: | ||
6019 | return 0; | ||
6020 | } | ||
6021 | #endif /* XD3_ENCODER */ | ||
6022 | #endif /* __XDELTA3_C_TEMPLATE_PASS__ */ | ||
diff --git a/xdelta3/xdelta3.h b/xdelta3/xdelta3.h new file mode 100755 index 0000000..a35c9b0 --- /dev/null +++ b/xdelta3/xdelta3.h | |||
@@ -0,0 +1,1029 @@ | |||
1 | /* xdelta 3 - delta compression tools and library | ||
2 | * Copyright (C) 2001, 2003, 2004, 2005, 2006. Joshua P. MacDonald | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | /* Welcome to Xdelta. If you want to know more about Xdelta, start by reading xdelta3.c. | ||
20 | * If you are ready to use the API, continue reading here. There are two interfaces -- | ||
21 | * xd3_encode_input and xd3_decode_input -- plus a dozen or so related calls. This | ||
22 | * interface is styled after Zlib. */ | ||
23 | |||
24 | #ifndef _XDELTA3_H_ | ||
25 | #define _XDELTA3_H_ | ||
26 | |||
27 | #include <stdlib.h> | ||
28 | #include <string.h> | ||
29 | #include <sys/types.h> | ||
30 | |||
31 | /**********************************************************************/ | ||
32 | |||
33 | /* Default configured value of stream->winsize. If the program supplies | ||
34 | * xd3_encode_input() with data smaller than winsize the stream will | ||
35 | * automatically buffer the input, otherwise the input buffer is used directly. | ||
36 | */ | ||
37 | #ifndef XD3_DEFAULT_WINSIZE | ||
38 | #define XD3_DEFAULT_WINSIZE (1U << 18) | ||
39 | #endif | ||
40 | |||
41 | /* The source block size. | ||
42 | */ | ||
43 | #ifndef XD3_DEFAULT_SRCBLKSZ | ||
44 | #define XD3_DEFAULT_SRCBLKSZ (1U << 18) | ||
45 | #endif | ||
46 | |||
47 | /* The source window starts with only a few checksums, then doubles up to | ||
48 | * XD3_DEFAULT_MAX_CKSUM_ADVANCE. */ | ||
49 | #ifndef XD3_DEFAULT_START_CKSUM_ADVANCE | ||
50 | #define XD3_DEFAULT_START_CKSUM_ADVANCE 1024 | ||
51 | #endif | ||
52 | |||
53 | /* TODO: There is no command-line flag to set this value. */ | ||
54 | #ifndef XD3_DEFAULT_MAX_CKSUM_ADVANCE | ||
55 | #define XD3_DEFAULT_MAX_CKSUM_ADVANCE (1U << 23) | ||
56 | #endif | ||
57 | |||
58 | /* Default total size of the source window used in xdelta3-main.h */ | ||
59 | #ifndef XD3_DEFAULT_SRCWINSZ | ||
60 | #define XD3_DEFAULT_SRCWINSZ (1U << 23) | ||
61 | #endif | ||
62 | |||
63 | /* Default configured value of stream->memsize. This dictates how much memory Xdelta will | ||
64 | * use for string-matching data structures. */ | ||
65 | #ifndef XD3_DEFAULT_MEMSIZE | ||
66 | #define XD3_DEFAULT_MEMSIZE (1U << 18) | ||
67 | #endif | ||
68 | |||
69 | /* When Xdelta requests a memory allocation for certain buffers, it rounds up to units of | ||
70 | * at least this size. The code assumes (and asserts) that this is a power-of-two. */ | ||
71 | #ifndef XD3_ALLOCSIZE | ||
72 | #define XD3_ALLOCSIZE (1U<<13) | ||
73 | #endif | ||
74 | |||
75 | /* The XD3_HARDMAXWINSIZE parameter is a safety mechanism to protect decoders against | ||
76 | * malicious files. The decoder will never decode a window larger than this. If the file | ||
77 | * specifies VCD_TARGET the decoder may require two buffers of this size. Rationale for | ||
78 | * choosing 22-bits as a maximum: this means that in the worst case, any VCDIFF address | ||
79 | * without a copy window will require 3 bytes to encode (7 bits per byte, HERE and SAME | ||
80 | * modes making every address within half the window away. */ | ||
81 | #ifndef XD3_HARDMAXWINSIZE | ||
82 | #define XD3_HARDMAXWINSIZE (1U<<23) | ||
83 | #endif | ||
84 | |||
85 | /* The XD3_NODECOMPRESSSIZE parameter tells the xdelta main routine not to try to | ||
86 | * externally-decompress source inputs that are too large. Since these files must be | ||
87 | * seekable, they are decompressed to a temporary file location and the user may not wish | ||
88 | * for this. */ | ||
89 | #ifndef XD3_NODECOMPRESSSIZE | ||
90 | #define XD3_NODECOMPRESSSIZE (1U<<24) | ||
91 | #endif | ||
92 | |||
93 | /* The IOPT_SIZE value sets the size of a buffer used to batch overlapping copy | ||
94 | * instructions before they are optimized by picking the best non-overlapping ranges. The | ||
95 | * larger this buffer, the longer a forced xd3_srcwin_setup() decision is held off. */ | ||
96 | #ifndef XD3_DEFAULT_IOPT_SIZE | ||
97 | #define XD3_DEFAULT_IOPT_SIZE 128 | ||
98 | #endif | ||
99 | |||
100 | /* The maximum distance backward to search for small matches */ | ||
101 | #ifndef XD3_DEFAULT_SPREVSZ | ||
102 | #define XD3_DEFAULT_SPREVSZ (1U << 16) | ||
103 | #endif | ||
104 | |||
105 | /* Sizes and addresses within VCDIFF windows are represented as usize_t | ||
106 | * | ||
107 | * For source-file offsets and total file sizes, total input and output counts, the xoff_t | ||
108 | * type is used. The decoder and encoder generally check for overflow of the xoff_t size, | ||
109 | * and this is tested at the 32bit boundary [xdelta3-test.h]. | ||
110 | */ | ||
111 | #ifndef _WIN32 | ||
112 | typedef unsigned int usize_t; | ||
113 | typedef u_int8_t uint8_t; | ||
114 | typedef u_int16_t uint16_t; | ||
115 | typedef u_int32_t uint32_t; | ||
116 | typedef u_int64_t uint64_t; | ||
117 | #else | ||
118 | #include <windows.h> | ||
119 | #define INLINE | ||
120 | typedef unsigned int uint; | ||
121 | typedef unsigned int usize_t | ||
122 | typedef unsigned char uint8_t; | ||
123 | typedef unsigned short uint16_t; | ||
124 | typedef unsigned long uint32_t; | ||
125 | typedef ULONGLONG uint64_t; | ||
126 | #endif | ||
127 | |||
128 | #define SIZEOF_USIZE_T 4 | ||
129 | |||
130 | #ifndef XD3_USE_LARGEFILE64 | ||
131 | #define XD3_USE_LARGEFILE64 1 | ||
132 | #endif | ||
133 | |||
134 | #if XD3_USE_LARGEFILE64 | ||
135 | #define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops, ... ? */ | ||
136 | typedef uint64_t xoff_t; | ||
137 | #define SIZEOF_XOFF_T 8 | ||
138 | #else | ||
139 | typedef uint32_t xoff_t; | ||
140 | #define SIZEOF_XOFF_T 4 | ||
141 | #endif | ||
142 | |||
143 | #define USE_UINT32 (SIZEOF_USIZE_T == 4 || SIZEOF_XOFF_T == 4 || REGRESSION_TEST) | ||
144 | #define USE_UINT64 (SIZEOF_USIZE_T == 8 || SIZEOF_XOFF_T == 8 || REGRESSION_TEST) | ||
145 | |||
146 | /**********************************************************************/ | ||
147 | |||
148 | #ifndef INLINE | ||
149 | #define INLINE inline | ||
150 | #endif | ||
151 | |||
152 | /* Whether to build the encoder, otherwise only build the decoder. */ | ||
153 | #ifndef XD3_ENCODER | ||
154 | #define XD3_ENCODER 1 | ||
155 | #endif | ||
156 | |||
157 | /* The code returned when main() fails, also defined in system includes. */ | ||
158 | #ifndef EXIT_FAILURE | ||
159 | #define EXIT_FAILURE 1 | ||
160 | #endif | ||
161 | |||
162 | /* REGRESSION TEST enables the "xdelta3 test" command, which runs a series of self-tests. */ | ||
163 | #ifndef REGRESSION_TEST | ||
164 | #define REGRESSION_TEST 0 | ||
165 | #endif | ||
166 | |||
167 | /* XD3_DEBUG=1 enables assertions and various statistics. Levels > 1 enable some | ||
168 | * additional output only useful during development and debugging. */ | ||
169 | #ifndef XD3_DEBUG | ||
170 | #define XD3_DEBUG 0 | ||
171 | #endif | ||
172 | |||
173 | #ifndef PYTHON_MODULE | ||
174 | #define PYTHON_MODULE 0 | ||
175 | #endif | ||
176 | |||
177 | /* There are three string matching functions supplied: one fast, one slow (default), and | ||
178 | * one soft-configurable. To disable any of these, use the following definitions. */ | ||
179 | #ifndef XD3_BUILD_SLOW | ||
180 | #define XD3_BUILD_SLOW 1 | ||
181 | #endif | ||
182 | #ifndef XD3_BUILD_FAST | ||
183 | #define XD3_BUILD_FAST 1 | ||
184 | #endif | ||
185 | #ifndef XD3_BUILD_SOFT | ||
186 | #define XD3_BUILD_SOFT 1 | ||
187 | #endif | ||
188 | |||
189 | #if XD3_DEBUG | ||
190 | #include <stdio.h> | ||
191 | #endif | ||
192 | |||
193 | /* XPRINT. Debug output and VCDIFF_TOOLS functions report to stderr. I have used an | ||
194 | * irregular style to abbreviate [fprintf(stderr, "] as [P(RINT "]. */ | ||
195 | #define P fprintf | ||
196 | #define RINT stderr, | ||
197 | |||
198 | typedef struct _xd3_stream xd3_stream; | ||
199 | typedef struct _xd3_source xd3_source; | ||
200 | typedef struct _xd3_hash_cfg xd3_hash_cfg; | ||
201 | typedef struct _xd3_smatcher xd3_smatcher; | ||
202 | typedef struct _xd3_rinst xd3_rinst; | ||
203 | typedef struct _xd3_dinst xd3_dinst; | ||
204 | typedef struct _xd3_hinst xd3_hinst; | ||
205 | typedef struct _xd3_rpage xd3_rpage; | ||
206 | typedef struct _xd3_addr_cache xd3_addr_cache; | ||
207 | typedef struct _xd3_output xd3_output; | ||
208 | typedef struct _xd3_desect xd3_desect; | ||
209 | typedef struct _xd3_iopt_buf xd3_iopt_buf; | ||
210 | typedef struct _xd3_rlist xd3_rlist; | ||
211 | typedef struct _xd3_sec_type xd3_sec_type; | ||
212 | typedef struct _xd3_sec_cfg xd3_sec_cfg; | ||
213 | typedef struct _xd3_sec_stream xd3_sec_stream; | ||
214 | typedef struct _xd3_config xd3_config; | ||
215 | typedef struct _xd3_code_table_desc xd3_code_table_desc; | ||
216 | typedef struct _xd3_code_table_sizes xd3_code_table_sizes; | ||
217 | typedef struct _xd3_slist xd3_slist; | ||
218 | |||
219 | /* The stream configuration has three callbacks functions, all of which may be supplied | ||
220 | * with NULL values. If config->getblk is provided as NULL, the stream returns | ||
221 | * XD3_GETSRCBLK. */ | ||
222 | |||
223 | typedef void* (xd3_alloc_func) (void *opaque, | ||
224 | usize_t items, | ||
225 | usize_t size); | ||
226 | typedef void (xd3_free_func) (void *opaque, | ||
227 | void *address); | ||
228 | |||
229 | typedef int (xd3_getblk_func) (xd3_stream *stream, | ||
230 | xd3_source *source, | ||
231 | xoff_t blkno); | ||
232 | |||
233 | /* These are internal functions to delay construction of encoding tables and support | ||
234 | * alternate code tables. See the comments & code enabled by GENERIC_ENCODE_TABLES. */ | ||
235 | |||
236 | typedef const xd3_dinst* (xd3_code_table_func) (void); | ||
237 | typedef int (xd3_comp_table_func) (xd3_stream *stream, const uint8_t **data, usize_t *size); | ||
238 | |||
239 | |||
240 | /* Some junk. */ | ||
241 | |||
242 | #ifndef XD3_ASSERT | ||
243 | #if XD3_DEBUG | ||
244 | #define XD3_ASSERT(x) \ | ||
245 | do { if (! (x)) { P(RINT "%s:%d: XD3 assertion failed: %s\n", __FILE__, __LINE__, #x); \ | ||
246 | abort (); } } while (0) | ||
247 | #else | ||
248 | #define XD3_ASSERT(x) (void)0 | ||
249 | #endif | ||
250 | #endif | ||
251 | |||
252 | #ifdef __GNUC__ | ||
253 | /* As seen on linux-kernel. */ | ||
254 | #ifndef max | ||
255 | #define max(x,y) ({ \ | ||
256 | const typeof(x) _x = (x); \ | ||
257 | const typeof(y) _y = (y); \ | ||
258 | (void) (&_x == &_y); \ | ||
259 | _x > _y ? _x : _y; }) | ||
260 | #endif | ||
261 | |||
262 | #ifndef min | ||
263 | #define min(x,y) ({ \ | ||
264 | const typeof(x) _x = (x); \ | ||
265 | const typeof(y) _y = (y); \ | ||
266 | (void) (&_x == &_y); \ | ||
267 | _x < _y ? _x : _y; }) | ||
268 | #endif | ||
269 | #else | ||
270 | #ifndef max | ||
271 | #define max(x,y) ((x) < (y) ? (y) : (x)) | ||
272 | #endif | ||
273 | #ifndef min | ||
274 | #define min(x,y) ((x) < (y) ? (x) : (y)) | ||
275 | #endif | ||
276 | #endif | ||
277 | |||
278 | /****************************************************************************************** | ||
279 | PUBLIC ENUMS | ||
280 | ******************************************************************************************/ | ||
281 | |||
282 | /* These are the five ordinary status codes returned by the xd3_encode_input() and | ||
283 | * xd3_decode_input() state machines. */ | ||
284 | typedef enum { | ||
285 | |||
286 | /* An application must be prepared to handle these five return values from either | ||
287 | * xd3_encode_input or xd3_decode_input, except in the case of no-source compression, in | ||
288 | * which case XD3_GETSRCBLK is never returned. More detailed comments for these are | ||
289 | * given in xd3_encode_input and xd3_decode_input comments, below. */ | ||
290 | XD3_INPUT = -17703, /* need input */ | ||
291 | XD3_OUTPUT = -17704, /* have output */ | ||
292 | XD3_GETSRCBLK = -17705, /* need a block of source input (with no xd3_getblk function), | ||
293 | * a chance to do non-blocking read. */ | ||
294 | XD3_GOTHEADER = -17706, /* (decode-only) after the initial VCDIFF & first window header */ | ||
295 | XD3_WINSTART = -17707, /* notification: returned before a window is processed, giving a | ||
296 | * chance to XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that window. */ | ||
297 | XD3_WINFINISH = -17708, /* notification: returned after encode/decode & output for a window */ | ||
298 | |||
299 | } xd3_rvalues; | ||
300 | |||
301 | /* special values in config->flags */ | ||
302 | typedef enum | ||
303 | { | ||
304 | XD3_JUST_HDR = (1 << 1), /* used by VCDIFF tools, see xdelta3-main.h. */ | ||
305 | XD3_SKIP_WINDOW = (1 << 2), /* used by VCDIFF tools, see xdelta3-main.h. */ | ||
306 | XD3_SKIP_EMIT = (1 << 3), /* used by VCDIFF tools, see xdelta3-main.h. */ | ||
307 | XD3_FLUSH = (1 << 4), /* flush the stream buffer to prepare for xd3_stream_close(). */ | ||
308 | |||
309 | XD3_SEC_DJW = (1 << 5), /* use DJW static huffman */ | ||
310 | XD3_SEC_FGK = (1 << 6), /* use FGK adaptive huffman */ | ||
311 | XD3_SEC_TYPE = (XD3_SEC_DJW | XD3_SEC_FGK), | ||
312 | |||
313 | XD3_SEC_NODATA = (1 << 7), /* disable secondary compression of the data section. */ | ||
314 | XD3_SEC_NOINST = (1 << 8), /* disable secondary compression of the inst section. */ | ||
315 | XD3_SEC_NOADDR = (1 << 9), /* disable secondary compression of the addr section (which is most random). */ | ||
316 | |||
317 | XD3_SEC_OTHER = (XD3_SEC_NODATA | XD3_SEC_NOINST | XD3_SEC_NOADDR), | ||
318 | |||
319 | XD3_ADLER32 = (1 << 10), /* enable checksum computation in the encoder. */ | ||
320 | XD3_ADLER32_NOVER = (1 << 11), /* disable checksum verification in the decoder. */ | ||
321 | |||
322 | XD3_ALT_CODE_TABLE = (1 << 12), /* for testing the alternate code table encoding. */ | ||
323 | |||
324 | XD3_NOCOMPRESS = (1 << 13), /* disable ordinary data compression feature, | ||
325 | * only search the source, not the target. */ | ||
326 | XD3_BEGREEDY = (1 << 14), /* disable the "1.5-pass algorithm", instead use | ||
327 | * greedy matching. Greedy is off by default. */ | ||
328 | } xd3_flags; | ||
329 | |||
330 | /* The values of this enumeration are set in xd3_config using the smatch_cfg variable. It | ||
331 | * can be set to slow, fast, soft, or default. The fast and slow setting uses preset, | ||
332 | * hardcoded parameters and the soft setting is accompanied by user-supplied parameters. | ||
333 | * If the user supplies 'default' the code selects one of the available string matchers. | ||
334 | * Due to compile-time settings (see XD3_SLOW_SMATCHER, XD3_FAST_SMATCHER, | ||
335 | * XD3_SOFT_SMATCHER variables), not all options may be available. */ | ||
336 | typedef enum | ||
337 | { | ||
338 | XD3_SMATCH_DEFAULT = 0, | ||
339 | XD3_SMATCH_SLOW = 1, | ||
340 | XD3_SMATCH_FAST = 2, | ||
341 | XD3_SMATCH_SOFT = 3, | ||
342 | } xd3_smatch_cfg; | ||
343 | |||
344 | /****************************************************************************************** | ||
345 | PRIVATE ENUMS | ||
346 | ******************************************************************************************/ | ||
347 | |||
348 | /* stream->match_state is part of the xd3_encode_input state machine for source matching: | ||
349 | * | ||
350 | * 1. the XD3_GETSRCBLK block-read mechanism means reentrant matching | ||
351 | * 2. this state spans encoder windows: a match and end-of-window will continue in the next | ||
352 | * 3. the initial target byte and source byte are a presumed match, to avoid some computation | ||
353 | * in case the inputs are identical. | ||
354 | */ | ||
355 | typedef enum { | ||
356 | |||
357 | MATCH_TARGET = 0, /* in this state, attempt to match the start of the target with the | ||
358 | * previously set source address (initially 0). */ | ||
359 | MATCH_BACKWARD = 1, /* currently expanding a match backward in the source/target. */ | ||
360 | MATCH_FORWARD = 2, /* currently expanding a match forward in the source/target. */ | ||
361 | MATCH_SEARCHING = 3, /* currently searching for a match. */ | ||
362 | |||
363 | } xd3_match_state; | ||
364 | |||
365 | /* The xd3_encode_input state machine steps through these states in the following order. | ||
366 | * The matcher is reentrant and returns XD3_INPUT whenever it requires more data. After | ||
367 | * receiving XD3_INPUT, if the application reads EOF it should call xd3_stream_close(). | ||
368 | */ | ||
369 | typedef enum { | ||
370 | |||
371 | ENC_INIT = 0, /* xd3_encode_input has never been called. */ | ||
372 | ENC_INPUT = 1, /* waiting for xd3_avail_input () to be called. */ | ||
373 | ENC_SEARCH = 2, /* currently searching for matches. */ | ||
374 | ENC_FLUSH = 3, /* currently emitting output. */ | ||
375 | ENC_POSTOUT = 4, /* after an output section. */ | ||
376 | ENC_POSTWIN = 5, /* after all output sections. */ | ||
377 | ENC_ABORTED = 6, /* abort. */ | ||
378 | } xd3_encode_state; | ||
379 | |||
380 | /* The xd3_decode_input state machine steps through these states in the following order. | ||
381 | * The matcher is reentrant and returns XD3_INPUT whenever it requires more data. After | ||
382 | * receiving XD3_INPUT, if the application reads EOF it should call xd3_stream_close(). | ||
383 | * | ||
384 | * 0-8: the VCDIFF header | ||
385 | * 9-18: the VCDIFF window header | ||
386 | * 19-21: the three primary sections: data (which I think should have gone last), inst, addr | ||
387 | * 22: producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK, | ||
388 | * 23: return XD3_WINFINISH, set state=9 to decode more input | ||
389 | */ | ||
390 | typedef enum { | ||
391 | |||
392 | DEC_VCHEAD = 0, /* VCDIFF header */ | ||
393 | DEC_HDRIND = 1, /* header indicator */ | ||
394 | |||
395 | DEC_SECONDID = 2, /* secondary compressor ID */ | ||
396 | |||
397 | DEC_TABLEN = 3, /* code table length */ | ||
398 | DEC_NEAR = 4, /* code table near */ | ||
399 | DEC_SAME = 5, /* code table same */ | ||
400 | DEC_TABDAT = 6, /* code table data */ | ||
401 | |||
402 | DEC_APPLEN = 7, /* application data length */ | ||
403 | DEC_APPDAT = 8, /* application data */ | ||
404 | |||
405 | DEC_WININD = 9, /* window indicator */ | ||
406 | |||
407 | DEC_CPYLEN = 10, /* copy window length */ | ||
408 | DEC_CPYOFF = 11, /* copy window offset */ | ||
409 | |||
410 | DEC_ENCLEN = 12, /* length of delta encoding */ | ||
411 | DEC_TGTLEN = 13, /* length of target window */ | ||
412 | DEC_DELIND = 14, /* delta indicator */ | ||
413 | |||
414 | DEC_DATALEN = 15, /* length of ADD+RUN data */ | ||
415 | DEC_INSTLEN = 16, /* length of instruction data */ | ||
416 | DEC_ADDRLEN = 17, /* length of address data */ | ||
417 | |||
418 | DEC_CKSUM = 18, /* window checksum */ | ||
419 | |||
420 | DEC_DATA = 19, /* data section */ | ||
421 | DEC_INST = 20, /* instruction section */ | ||
422 | DEC_ADDR = 21, /* address section */ | ||
423 | |||
424 | DEC_EMIT = 22, /* producing data */ | ||
425 | |||
426 | DEC_FINISH = 23, /* window finished */ | ||
427 | |||
428 | DEC_ABORTED = 24, /* xd3_abort_stream */ | ||
429 | } xd3_decode_state; | ||
430 | |||
431 | /* An application never sees these internal codes: */ | ||
432 | typedef enum { | ||
433 | XD3_NOSECOND = -17708, /* when secondary compression finds no improvement. */ | ||
434 | } xd3_pvalues; | ||
435 | |||
436 | /****************************************************************************************** | ||
437 | internal types | ||
438 | ******************************************************************************************/ | ||
439 | |||
440 | /* instruction lists used in the IOPT buffer */ | ||
441 | struct _xd3_rlist | ||
442 | { | ||
443 | xd3_rlist *next; | ||
444 | xd3_rlist *prev; | ||
445 | }; | ||
446 | |||
447 | /* the raw encoding of an instruction used in the IOPT buffer */ | ||
448 | struct _xd3_rinst | ||
449 | { | ||
450 | uint8_t type; | ||
451 | uint8_t xtra; | ||
452 | uint8_t code1; | ||
453 | uint8_t code2; | ||
454 | usize_t pos; | ||
455 | usize_t size; | ||
456 | xoff_t addr; | ||
457 | xd3_rlist link; | ||
458 | }; | ||
459 | |||
460 | /* the code-table form of an single- or double-instruction */ | ||
461 | struct _xd3_dinst | ||
462 | { | ||
463 | uint8_t type1; | ||
464 | uint8_t size1; | ||
465 | uint8_t type2; | ||
466 | uint8_t size2; | ||
467 | }; | ||
468 | |||
469 | /* the decoded form of a single (half) instruction. */ | ||
470 | struct _xd3_hinst | ||
471 | { | ||
472 | uint8_t type; | ||
473 | usize_t size; | ||
474 | usize_t addr; | ||
475 | }; | ||
476 | |||
477 | /* used by the encoder to buffer output in sections. list of blocks. */ | ||
478 | struct _xd3_output | ||
479 | { | ||
480 | uint8_t *base; | ||
481 | usize_t next; | ||
482 | usize_t avail; | ||
483 | xd3_output *next_page; | ||
484 | }; | ||
485 | |||
486 | /* the VCDIFF address cache, see the RFC */ | ||
487 | struct _xd3_addr_cache | ||
488 | { | ||
489 | uint s_near; | ||
490 | uint s_same; | ||
491 | usize_t next_slot; /* the circular index for near */ | ||
492 | usize_t *near_array; /* array of size s_near */ | ||
493 | usize_t *same_array; /* array of size s_same*256 */ | ||
494 | }; | ||
495 | |||
496 | /* the IOPT buffer has a used list of (ordered) instructions, possibly overlapping in | ||
497 | * target addresses, awaiting a flush */ | ||
498 | struct _xd3_iopt_buf | ||
499 | { | ||
500 | xd3_rlist used; | ||
501 | xd3_rlist free; | ||
502 | xd3_rinst *buffer; | ||
503 | }; | ||
504 | |||
505 | /* This is the record of a pre-compiled configuration, a subset of xd3_config. Keep them | ||
506 | * in sync! The user never sees this structure. Note: update XD3_SOFTCFG_VARCNT when | ||
507 | * changing. */ | ||
508 | struct _xd3_smatcher | ||
509 | { | ||
510 | const char *name; | ||
511 | int (*string_match) (xd3_stream *stream); | ||
512 | uint large_look; | ||
513 | uint large_step; | ||
514 | uint small_look; | ||
515 | uint small_chain; | ||
516 | uint small_lchain; | ||
517 | uint ssmatch; | ||
518 | uint try_lazy; | ||
519 | uint max_lazy; | ||
520 | uint long_enough; | ||
521 | uint promote; | ||
522 | }; | ||
523 | |||
524 | /* hash table size & power-of-two hash function. */ | ||
525 | struct _xd3_hash_cfg | ||
526 | { | ||
527 | usize_t size; | ||
528 | usize_t shift; | ||
529 | usize_t mask; | ||
530 | }; | ||
531 | |||
532 | /* a hash-chain link in the small match table, embedded with position and checksum */ | ||
533 | struct _xd3_slist | ||
534 | { | ||
535 | xd3_slist *next; | ||
536 | xd3_slist *prev; | ||
537 | usize_t pos; | ||
538 | usize_t scksum; | ||
539 | }; | ||
540 | |||
541 | /* a decoder section (data, inst, or addr). there is an optimization to avoid copying | ||
542 | * these sections if all the input is available, related to the copied field below. | ||
543 | * secondation compression uses the copied2 field. */ | ||
544 | struct _xd3_desect | ||
545 | { | ||
546 | const uint8_t *buf; | ||
547 | const uint8_t *buf_max; | ||
548 | usize_t size; | ||
549 | usize_t pos; | ||
550 | uint8_t *copied1; | ||
551 | usize_t alloc1; | ||
552 | uint8_t *copied2; | ||
553 | usize_t alloc2; | ||
554 | }; | ||
555 | |||
556 | /****************************************************************************************** | ||
557 | public types | ||
558 | ******************************************************************************************/ | ||
559 | |||
560 | /* Settings for the secondary compressor. */ | ||
561 | struct _xd3_sec_cfg | ||
562 | { | ||
563 | int data_type; /* Which section. (set automatically) */ | ||
564 | int ngroups; /* Number of DJW Huffman groups. */ | ||
565 | int sector_size; /* Sector size. */ | ||
566 | int inefficient; /* If true, ignore efficiency check [avoid XD3_NOSECOND]. */ | ||
567 | }; | ||
568 | |||
569 | /* This is the user-visible stream configuration. */ | ||
570 | struct _xd3_config | ||
571 | { | ||
572 | usize_t memsize; /* How much memory Xdelta may allocate */ | ||
573 | usize_t winsize; /* The encoder window size. */ | ||
574 | usize_t sprevsz; /* How far back small string matching goes */ | ||
575 | usize_t iopt_size; /* entries in the instruction-optimizing buffer */ | ||
576 | |||
577 | usize_t srcwin_size; /* Initial size of the source-window lookahead */ | ||
578 | usize_t srcwin_maxsz; /* srcwin_size grows by a factor of 2 when no matches are found */ | ||
579 | |||
580 | xd3_getblk_func *getblk; /* The three callbacks. */ | ||
581 | xd3_alloc_func *alloc; | ||
582 | xd3_free_func *freef; | ||
583 | void *opaque; /* Not used. */ | ||
584 | int flags; /* stream->flags are initialized from xd3_config & | ||
585 | * never modified by the library. Use xd3_set_flags | ||
586 | * to modify flags settings mid-stream. */ | ||
587 | |||
588 | xd3_sec_cfg sec_data; /* Secondary compressor config: data */ | ||
589 | xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */ | ||
590 | xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */ | ||
591 | |||
592 | xd3_smatch_cfg smatch_cfg; /* See enum: use fields below for soft config */ | ||
593 | uint large_look; /* large string lookahead (i.e., hashed chars) */ | ||
594 | uint large_step; /* large string interval */ | ||
595 | uint small_look; /* small string lookahead (i.e., hashed chars) */ | ||
596 | uint small_chain; /* small string number of previous matches to try */ | ||
597 | uint small_lchain; /* small string number of previous matches to try, when a lazy match */ | ||
598 | uint ssmatch; /* boolean: insert checksums for matched strings */ | ||
599 | uint try_lazy; /* boolean: whether lazy instruction optimization is attempted */ | ||
600 | uint max_lazy; /* size of smallest match that will disable lazy matching */ | ||
601 | uint long_enough; /* size of smallest match long enough to discontinue string matching. */ | ||
602 | uint promote; /* whether to promote matches in the hash chain */ | ||
603 | }; | ||
604 | |||
605 | /* The primary source file object. You create one of these objects and initialize the first | ||
606 | * four fields. This library maintains the next 5 fields. The configured getblk implementation is | ||
607 | * responsible for setting the final 3 fields when called (and/or when XD3_GETSRCBLK is returned). | ||
608 | */ | ||
609 | struct _xd3_source | ||
610 | { | ||
611 | /* you set */ | ||
612 | xoff_t size; /* size of this source */ | ||
613 | usize_t blksize; /* block size */ | ||
614 | const char *name; /* its name, for debug/print purposes */ | ||
615 | void *ioh; /* opaque handle */ | ||
616 | |||
617 | /* xd3 sets */ | ||
618 | usize_t srclen; /* length of this source window */ | ||
619 | xoff_t srcbase; /* offset of this source window in the source itself */ | ||
620 | xoff_t blocks; /* the total number of blocks in this source */ | ||
621 | usize_t cpyoff_blocks; /* offset of copy window in blocks */ | ||
622 | usize_t cpyoff_blkoff; /* offset of copy window in blocks, remainder */ | ||
623 | xoff_t getblkno; /* request block number: xd3 sets current getblk request */ | ||
624 | |||
625 | /* getblk sets */ | ||
626 | xoff_t curblkno; /* current block number: client sets after getblk request */ | ||
627 | usize_t onblk; /* number of bytes on current block: client sets, xd3 verifies */ | ||
628 | const uint8_t *curblk; /* current block array: client sets after getblk request */ | ||
629 | }; | ||
630 | |||
631 | /* The primary xd3_stream object, used for encoding and decoding. You may access only two | ||
632 | * fields: avail_out, next_out. Use the methods above to operate on xd3_stream. */ | ||
633 | struct _xd3_stream | ||
634 | { | ||
635 | /* input state */ | ||
636 | const uint8_t *next_in; /* next input byte */ | ||
637 | usize_t avail_in; /* number of bytes available at next_in */ | ||
638 | xoff_t total_in; /* how many bytes in */ | ||
639 | |||
640 | /* output state */ | ||
641 | uint8_t *next_out; /* next output byte */ | ||
642 | usize_t avail_out; /* number of bytes available at next_out */ | ||
643 | usize_t space_out; /* total out space */ | ||
644 | xoff_t current_window; /* number of windows encoded/decoded */ | ||
645 | xoff_t total_out; /* how many bytes out */ | ||
646 | |||
647 | /* to indicate an error, xd3 sets */ | ||
648 | const char *msg; /* last error message, NULL if no error */ | ||
649 | |||
650 | /* source configuration */ | ||
651 | xd3_source *src; /* source array */ | ||
652 | |||
653 | /* encoder memory configuration */ | ||
654 | usize_t winsize; /* suggested window size */ | ||
655 | usize_t memsize; /* memory size parameter */ | ||
656 | usize_t sprevsz; /* small string, previous window size (power of 2) */ | ||
657 | usize_t sprevmask; /* small string, previous window size mask */ | ||
658 | uint iopt_size; | ||
659 | |||
660 | /* general configuration */ | ||
661 | xd3_getblk_func *getblk; /* set nxtblk, nxtblkno to scanblkno */ | ||
662 | xd3_alloc_func *alloc; /* malloc function */ | ||
663 | xd3_free_func *free; /* free function */ | ||
664 | void* opaque; /* private data object passed to alloc, free, and getblk */ | ||
665 | int flags; /* various options */ | ||
666 | int aborted; | ||
667 | |||
668 | /* secondary compressor configuration */ | ||
669 | xd3_sec_cfg sec_data; /* Secondary compressor config: data */ | ||
670 | xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */ | ||
671 | xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */ | ||
672 | |||
673 | /* fields common to xd3_stream_config, xd3_smatcher */ | ||
674 | uint large_look; | ||
675 | uint large_step; | ||
676 | uint small_look; | ||
677 | uint small_chain; | ||
678 | uint small_lchain; | ||
679 | uint ssmatch; | ||
680 | uint try_lazy; | ||
681 | uint max_lazy; | ||
682 | uint long_enough; | ||
683 | uint promote; | ||
684 | uint srcwin_size; | ||
685 | uint srcwin_maxsz; | ||
686 | int (*string_match) (xd3_stream *stream); | ||
687 | |||
688 | usize_t *large_table; /* table of large checksums */ | ||
689 | xd3_hash_cfg large_hash; /* large hash config */ | ||
690 | |||
691 | usize_t *small_table; /* table of small checksums */ | ||
692 | xd3_slist *small_prev; /* table of previous offsets, circular linked list (no sentinel) */ | ||
693 | int small_reset; /* true if small table should be reset */ | ||
694 | |||
695 | xd3_hash_cfg small_hash; /* small hash config */ | ||
696 | |||
697 | xd3_addr_cache acache; /* the vcdiff address cache */ | ||
698 | |||
699 | xd3_encode_state enc_state; /* state of the encoder */ | ||
700 | |||
701 | usize_t taroff; /* base offset of the target input */ | ||
702 | usize_t input_position; /* current input position */ | ||
703 | usize_t min_match; /* current minimum match length, avoids redundent matches */ | ||
704 | usize_t unencoded_offset; /* current input, first unencoded offset. this value is <= the first | ||
705 | * instruction's position in the iopt buffer, if there is at least one | ||
706 | * match in the buffer. */ | ||
707 | |||
708 | // SRCWIN | ||
709 | // these variables plus srcwin_size, srcwin_maxsz above (set by config) | ||
710 | int srcwin_decided; /* boolean: true if the srclen,srcbase have been decided. */ | ||
711 | xoff_t srcwin_cksum_pos; /* Source checksum position */ | ||
712 | |||
713 | // MATCH | ||
714 | xd3_match_state match_state; /* encoder match state */ | ||
715 | xoff_t match_srcpos; /* current match source position relative to srcbase */ | ||
716 | xoff_t match_minaddr; /* smallest matching address to set window params | ||
717 | * (reset each window xd3_encode_reset) */ | ||
718 | xoff_t match_maxaddr; /* largest matching address to set window params | ||
719 | * (reset each window xd3_encode_reset) */ | ||
720 | usize_t match_back; /* match extends back so far */ | ||
721 | usize_t match_maxback; /* match extends back maximum */ | ||
722 | usize_t match_fwd; /* match extends forward so far */ | ||
723 | usize_t match_maxfwd; /* match extends forward maximum */ | ||
724 | |||
725 | uint8_t *buf_in; /* for saving buffered input */ | ||
726 | usize_t buf_avail; /* amount of saved input */ | ||
727 | const uint8_t *buf_leftover; /* leftover content of next_in (i.e., user's buffer) */ | ||
728 | usize_t buf_leftavail; /* amount of leftover content */ | ||
729 | |||
730 | xd3_output *enc_current; /* current output buffer */ | ||
731 | xd3_output *enc_free; /* free output buffers */ | ||
732 | xd3_output *enc_heads[4]; /* array of encoded outputs: head of chain */ | ||
733 | xd3_output *enc_tails[4]; /* array of encoded outputs: tail of chain */ | ||
734 | |||
735 | xd3_iopt_buf iopt; /* instruction optimizing buffer */ | ||
736 | xd3_rinst *iout; /* next single instruction */ | ||
737 | |||
738 | const uint8_t *enc_appheader; /* application header to encode */ | ||
739 | usize_t enc_appheadsz; /* application header size */ | ||
740 | |||
741 | /* decoder stuff */ | ||
742 | xd3_decode_state dec_state; /* current DEC_XXX value */ | ||
743 | uint dec_hdr_ind; /* VCDIFF header indicator */ | ||
744 | uint dec_win_ind; /* VCDIFF window indicator */ | ||
745 | uint dec_del_ind; /* VCDIFF delta indicator */ | ||
746 | |||
747 | uint8_t dec_magic[4]; /* First four bytes */ | ||
748 | usize_t dec_magicbytes; /* Magic position. */ | ||
749 | |||
750 | uint dec_secondid; /* Optional secondary compressor ID. */ | ||
751 | |||
752 | usize_t dec_codetblsz; /* Optional code table: length. */ | ||
753 | uint8_t *dec_codetbl; /* Optional code table: storage. */ | ||
754 | usize_t dec_codetblbytes; /* Optional code table: position. */ | ||
755 | |||
756 | usize_t dec_appheadsz; /* Optional application header: size. */ | ||
757 | uint8_t *dec_appheader; /* Optional application header: storage */ | ||
758 | usize_t dec_appheadbytes; /* Optional application header: position. */ | ||
759 | |||
760 | usize_t dec_cksumbytes; /* Optional checksum: position. */ | ||
761 | uint8_t dec_cksum[4]; /* Optional checksum: storage. */ | ||
762 | uint32_t dec_adler32; /* Optional checksum: value. */ | ||
763 | |||
764 | usize_t dec_cpylen; /* length of copy window (VCD_SOURCE or VCD_TARGET) */ | ||
765 | xoff_t dec_cpyoff; /* offset of copy window (VCD_SOURCE or VCD_TARGET) */ | ||
766 | usize_t dec_enclen; /* length of delta encoding */ | ||
767 | usize_t dec_tgtlen; /* length of target window */ | ||
768 | |||
769 | #if USE_UINT64 | ||
770 | uint64_t dec_64part; /* part of a decoded uint64_t */ | ||
771 | #endif | ||
772 | #if USE_UINT32 | ||
773 | uint32_t dec_32part; /* part of a decoded uint32_t */ | ||
774 | #endif | ||
775 | |||
776 | xoff_t dec_winstart; /* offset of the start of current target window */ | ||
777 | xoff_t dec_window_count; /* == current_window + 1 in DEC_FINISH */ | ||
778 | usize_t dec_winbytes; /* bytes of the three sections so far consumed */ | ||
779 | usize_t dec_hdrsize; /* VCDIFF + app header size */ | ||
780 | |||
781 | const uint8_t *dec_tgtaddrbase; /* Base of decoded target addresses (addr >= dec_cpylen). */ | ||
782 | const uint8_t *dec_cpyaddrbase; /* Base of decoded copy addresses (addr < dec_cpylen). */ | ||
783 | |||
784 | usize_t dec_position; /* current decoder position counting the cpylen offset */ | ||
785 | usize_t dec_maxpos; /* maximum decoder position counting the cpylen offset */ | ||
786 | xd3_hinst dec_current1; /* current instruction */ | ||
787 | xd3_hinst dec_current2; /* current instruction */ | ||
788 | |||
789 | uint8_t *dec_buffer; /* Decode buffer */ | ||
790 | uint8_t *dec_lastwin; /* In case of VCD_TARGET, the last target window. */ | ||
791 | usize_t dec_lastlen; /* length of the last target window */ | ||
792 | xoff_t dec_laststart; /* offset of the start of last target window */ | ||
793 | usize_t dec_lastspace; /* allocated space of last target window, for reuse */ | ||
794 | |||
795 | xd3_desect inst_sect; /* staging area for decoding window sections */ | ||
796 | xd3_desect addr_sect; | ||
797 | xd3_desect data_sect; | ||
798 | |||
799 | xd3_code_table_func *code_table_func; | ||
800 | xd3_comp_table_func *comp_table_func; | ||
801 | const xd3_dinst *code_table; | ||
802 | const xd3_code_table_desc *code_table_desc; | ||
803 | xd3_dinst *code_table_alloc; | ||
804 | |||
805 | /* secondary compression */ | ||
806 | const xd3_sec_type *sec_type; | ||
807 | xd3_sec_stream *sec_stream_d; | ||
808 | xd3_sec_stream *sec_stream_i; | ||
809 | xd3_sec_stream *sec_stream_a; | ||
810 | |||
811 | #if XD3_DEBUG | ||
812 | /* statistics */ | ||
813 | usize_t n_cpy; | ||
814 | usize_t n_add; | ||
815 | usize_t n_run; | ||
816 | |||
817 | usize_t n_ibytes; | ||
818 | usize_t n_sbytes; | ||
819 | usize_t n_dbytes; | ||
820 | |||
821 | usize_t l_cpy; | ||
822 | usize_t l_add; | ||
823 | usize_t l_run; | ||
824 | |||
825 | usize_t sh_searches; | ||
826 | usize_t sh_compares; | ||
827 | |||
828 | usize_t *i_freqs; | ||
829 | usize_t *i_modes; | ||
830 | usize_t *i_sizes; | ||
831 | |||
832 | usize_t large_ckcnt; | ||
833 | |||
834 | /* memory usage */ | ||
835 | usize_t alloc_cnt; | ||
836 | usize_t free_cnt; | ||
837 | |||
838 | xoff_t n_emit; | ||
839 | #endif | ||
840 | }; | ||
841 | |||
842 | /****************************************************************************************** | ||
843 | PUBLIC FUNCTIONS | ||
844 | ******************************************************************************************/ | ||
845 | |||
846 | /* The two I/O disciplines, encode and decode, have similar stream semantics. It is | ||
847 | * recommended that applications use the same code for compression and decompression - | ||
848 | * because there are only a few differences in handling encoding/decoding. | ||
849 | * | ||
850 | * See also the xd3_avail_input() and xd3_consume_output() routines, inlined below. | ||
851 | * | ||
852 | * XD3_INPUT: the process requires more input: call xd3_avail_input() then repeat | ||
853 | * XD3_OUTPUT: the process has more output: read stream->next_out, stream->avail_out, | ||
854 | * then call xd3_consume_output(), then repeat | ||
855 | * XD3_GOTHEADER: (decoder-only) notification returned following the VCDIFF header and | ||
856 | * first window header. the decoder may use the header to configure itself. | ||
857 | * XD3_WINSTART: a general notification returned once for each window except the 0-th | ||
858 | * window, which is implied by XD3_GOTHEADER. It is recommended to | ||
859 | * use a switch-stmt such as: | ||
860 | * ... | ||
861 | * again: | ||
862 | * switch ((ret = xd3_decode_input (stream))) { | ||
863 | * case XD3_GOTHEADER: { | ||
864 | * assert(stream->current_window == 0); | ||
865 | * stuff; | ||
866 | * } | ||
867 | * // fallthrough | ||
868 | * case XD3_WINSTART: { | ||
869 | * something(stream->current_window); | ||
870 | * goto again; | ||
871 | * } | ||
872 | * ... | ||
873 | * XD3_WINFINISH: a general notification, following the complete input & output of a | ||
874 | * window. at this point, stream->total_in and stream->total_out are | ||
875 | * consistent for either encoding or decoding. | ||
876 | * XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value is returned to | ||
877 | * initiate a non-blocking source read. | ||
878 | * | ||
879 | * For simple usage, see the xd3_process_completely() function, which underlies | ||
880 | * xd3_encode_completely() and xd3_decode_completely() [xdelta3.c]. For real application | ||
881 | * usage, including the application header, the see command-line utility [xdelta3-main.h]. | ||
882 | * | ||
883 | * main_input() implements the command-line encode and decode as well as the optional | ||
884 | * VCDIFF_TOOLS printhdr, printhdrs, and printdelta with a single loop [xdelta3-main.h]. | ||
885 | */ | ||
886 | int xd3_decode_input (xd3_stream *stream); | ||
887 | int xd3_encode_input (xd3_stream *stream); | ||
888 | |||
889 | /* The xd3_config structure is used to initialize a stream - all data is copied into | ||
890 | * stream so config may be a temporary variable. See the [documentation] or comments on | ||
891 | * the xd3_config structure. */ | ||
892 | int xd3_config_stream (xd3_stream *stream, | ||
893 | xd3_config *config); | ||
894 | |||
895 | /* Since Xdelta3 doesn't open any files, xd3_close_stream is just an error check that the | ||
896 | * stream is in a proper state to be closed: this means the encoder is flushed and the | ||
897 | * decoder is at a window boundary. The application is responsible for freeing any of the | ||
898 | * resources it supplied. */ | ||
899 | int xd3_close_stream (xd3_stream *stream); | ||
900 | |||
901 | /* This unconditionally closes/frees the stream, future close() will succeed.*/ | ||
902 | void xd3_abort_stream (xd3_stream *stream); | ||
903 | |||
904 | /* xd3_free_stream frees all memory allocated for the stream. The application is | ||
905 | * responsible for freeing any of the resources it supplied. */ | ||
906 | void xd3_free_stream (xd3_stream *stream); | ||
907 | |||
908 | /* This function informs the encoder or decoder that source matching (i.e., | ||
909 | * delta-compression) is possible. For encoding, this should be called before the first | ||
910 | * xd3_encode_input. A NULL source is ignored. For decoding, this should be called | ||
911 | * before the first window is decoded, but the appheader may be read first | ||
912 | * (XD3_GOTHEADER). At this point, consult xd3_decoder_needs_source(), inlined below, to | ||
913 | * determine if a source is expected by the decoder. */ | ||
914 | int xd3_set_source (xd3_stream *stream, | ||
915 | xd3_source *source); | ||
916 | |||
917 | /* This function invokes xd3_encode_input using whole-file, in-memory inputs. The output | ||
918 | * array must be large enough to hold the output or else ENOSPC is returned. */ | ||
919 | int xd3_encode_completely (xd3_stream *stream, | ||
920 | const uint8_t *input, | ||
921 | usize_t input_size, | ||
922 | uint8_t *output, | ||
923 | usize_t *output_size, | ||
924 | usize_t avail_output); | ||
925 | |||
926 | /* This function invokes xd3_decode_input using whole-file, in-memory inputs. The output | ||
927 | * array must be large enough to hold the output or else ENOSPC is returned. */ | ||
928 | int xd3_decode_completely (xd3_stream *stream, | ||
929 | const uint8_t *input, | ||
930 | usize_t input_size, | ||
931 | uint8_t *output, | ||
932 | usize_t *output_size, | ||
933 | usize_t avail_size); | ||
934 | |||
935 | /* This should be called before the first call to xd3_encode_input() to include | ||
936 | * application-specific data in the VCDIFF header. */ | ||
937 | void xd3_set_appheader (xd3_stream *stream, | ||
938 | const uint8_t *data, | ||
939 | usize_t size); | ||
940 | |||
941 | /* xd3_get_appheader may be called in the decoder after XD3_GOTHEADER. For convenience, | ||
942 | * the decoder always adds a single byte padding to the end of the application header, | ||
943 | * which is set to zero in case the application header is a string. */ | ||
944 | int xd3_get_appheader (xd3_stream *stream, | ||
945 | uint8_t **data, | ||
946 | usize_t *size); | ||
947 | |||
948 | /* After receiving XD3_GOTHEADER, the decoder should check this function which returns 1 | ||
949 | * if the decoder will require source data. */ | ||
950 | int xd3_decoder_needs_source (xd3_stream *stream); | ||
951 | |||
952 | /* Includes the above rvalues */ | ||
953 | const char* xd3_strerror (int ret); | ||
954 | |||
955 | /* For convenience, zero & initialize the xd3_config structure with specified flags. */ | ||
956 | static inline | ||
957 | void xd3_init_config (xd3_config *config, | ||
958 | int flags) | ||
959 | { | ||
960 | memset (config, 0, sizeof (*config)); | ||
961 | config->flags = flags; | ||
962 | } | ||
963 | |||
964 | /* This supplies some input to the stream. */ | ||
965 | static inline | ||
966 | void xd3_avail_input (xd3_stream *stream, | ||
967 | const uint8_t *idata, | ||
968 | usize_t isize) | ||
969 | { | ||
970 | /* Even if isize is zero, the code expects a non-NULL idata. Why? It uses this value | ||
971 | * to determine whether xd3_avail_input has ever been called. If xd3_encode_input is | ||
972 | * called before xd3_avail_input it will return XD3_INPUT right away without allocating | ||
973 | * a stream->winsize buffer. This is to avoid an unwanted allocation. */ | ||
974 | XD3_ASSERT (idata != NULL); | ||
975 | |||
976 | /* TODO: Should check for a call to xd3_avail_input in the wrong state. */ | ||
977 | stream->next_in = idata; | ||
978 | stream->avail_in = isize; | ||
979 | } | ||
980 | |||
981 | /* This acknowledges receipt of output data, must be called after any XD3_OUTPUT | ||
982 | * return. */ | ||
983 | static inline | ||
984 | void xd3_consume_output (xd3_stream *stream) | ||
985 | { | ||
986 | /* TODO: Is it correct to set avail_in = 0 here, then check == 0 in avail_in? */ | ||
987 | stream->avail_out = 0; | ||
988 | } | ||
989 | |||
990 | /* These are set for each XD3_WINFINISH return. */ | ||
991 | static inline | ||
992 | int xd3_encoder_used_source (xd3_stream *stream) { return stream->src != NULL && stream->src->srclen > 0; } | ||
993 | static inline | ||
994 | xoff_t xd3_encoder_srcbase (xd3_stream *stream) { return stream->src->srcbase; } | ||
995 | static inline | ||
996 | usize_t xd3_encoder_srclen (xd3_stream *stream) { return stream->src->srclen; } | ||
997 | |||
998 | /* Checks for legal flag changes. */ | ||
999 | static inline | ||
1000 | void xd3_set_flags (xd3_stream *stream, int flags) | ||
1001 | { | ||
1002 | /* The bitwise difference should contain only XD3_FLUSH or XD3_SKIP_WINDOW */ | ||
1003 | XD3_ASSERT(((flags ^ stream->flags) & ~(XD3_FLUSH | XD3_SKIP_WINDOW)) == 0); | ||
1004 | stream->flags = flags; | ||
1005 | } | ||
1006 | |||
1007 | /* Gives some extra information about the latest library error, if any is known. */ | ||
1008 | static inline | ||
1009 | const char* xd3_errstring (xd3_stream *stream) | ||
1010 | { | ||
1011 | return stream->msg ? stream->msg : ""; | ||
1012 | } | ||
1013 | |||
1014 | /* This function tells the number of bytes expected to be set in source->onblk after a | ||
1015 | * getblk request. This is for convenience of handling a partial last block. */ | ||
1016 | static inline | ||
1017 | usize_t xd3_bytes_on_srcblk (xd3_source *source, xoff_t blkno) | ||
1018 | { | ||
1019 | XD3_ASSERT (blkno < source->blocks); | ||
1020 | |||
1021 | if (blkno != source->blocks - 1) | ||
1022 | { | ||
1023 | return source->blksize; | ||
1024 | } | ||
1025 | |||
1026 | return ((source->size - 1) % source->blksize) + 1; | ||
1027 | } | ||
1028 | |||
1029 | #endif /* _XDELTA3_H_ */ | ||
diff --git a/xdelta3/xdelta3.prj b/xdelta3/xdelta3.prj new file mode 100755 index 0000000..df1a445 --- /dev/null +++ b/xdelta3/xdelta3.prj | |||
@@ -0,0 +1,133 @@ | |||
1 | ;; -*- Prcs -*- | ||
2 | (Created-By-Prcs-Version 1 3 3) | ||
3 | (Project-Description "") | ||
4 | (Project-Version xdelta3 0 5) | ||
5 | (Parent-Version xdelta3 0 4) | ||
6 | (Version-Log "write a bit of documentation, work to fix/clean the regression test, fixed one actual bug in xd3_stream_close()") | ||
7 | (New-Version-Log "") | ||
8 | (Checkin-Time "Sun, 30 May 2004 14:42:47 -0700") | ||
9 | (Checkin-Login jmacd) | ||
10 | (Populate-Ignore ()) | ||
11 | (Project-Keywords | ||
12 | (WWWLeftNavBar "<table cellpadding=\"20px\" width=700> <tr> <td class=\"leftbdr\" valign=top height=600 width=100> <div class=\"leftbody\"> <h1>Xdelta</h1> <a href=\"xdelta3.html\">overview</a><br> <a href=\"xdelta3-cmdline.html\">command line</a><br> <a href=\"xdelta3-api-guide.html\">api guide</a><br> <br><a href=\"http://xdelta.org\">xdelta.org</a></h2> </div> </td> <td valign=top width=500>") | ||
13 | ) | ||
14 | (Files | ||
15 | |||
16 | ;; Files added by populate at Sun, 20 Jul 2003 04:22:04 +0400, | ||
17 | ;; to version 0.0(w), by jmacd: | ||
18 | |||
19 | (Makefile (xdelta3/0_Makefile 1.3 644)) | ||
20 | (xdelta3.h (xdelta3/1_xdelta3.h 1.2 644)) | ||
21 | (xdelta3-test.h (xdelta3/2_xdelta3-te 1.2 644)) | ||
22 | (xdelta3-second.h (xdelta3/3_xdelta3-se 1.1 644)) | ||
23 | (xdelta3-python.h (xdelta3/4_xdelta3-py 1.1 644)) | ||
24 | (xdelta3-main.h (xdelta3/5_xdelta3-ma 1.3 644)) | ||
25 | (xdelta3-list.h (xdelta3/6_xdelta3-li 1.1 644)) | ||
26 | (xdelta3-fgk.h (xdelta3/7_xdelta3-fg 1.1 644)) | ||
27 | (xdelta3-djw.h (xdelta3/8_xdelta3-dj 1.1 644)) | ||
28 | (xdelta3-cfgs.h (xdelta3/9_xdelta3-cf 1.1 644)) | ||
29 | (xdelta3-regtest.py (xdelta3/10_xdelta3-re 1.3 755)) | ||
30 | (setup.py (xdelta3/11_setup.py 1.1 644)) | ||
31 | (analyze_pfx.py (xdelta3/12_analyze_pf 1.1 644)) | ||
32 | (analyze_gp.py (xdelta3/13_analyze_gp 1.1 644)) | ||
33 | (analyze_clen.py (xdelta3/14_analyze_cl 1.1 644)) | ||
34 | (rcs_junk.cc (xdelta3/15_rcs_junk.c 1.1 644)) | ||
35 | (xdelta3.c (xdelta3/16_xdelta3.c 1.3 644)) | ||
36 | (testh.c (xdelta3/17_testh.c 1.1 644)) | ||
37 | (show.c (xdelta3/18_show.c 1.1 644)) | ||
38 | (linkxd3lib.c (xdelta3/19_linkxd3lib 1.1 644)) | ||
39 | (badcopy.c (xdelta3/20_badcopy.c 1.1 644)) | ||
40 | |||
41 | ;; Files added by populate at Sun, 20 Jul 2003 04:22:08 +0400, | ||
42 | ;; to version 0.0(w), by jmacd: | ||
43 | |||
44 | (save.regtest.bug9/foo,v (xdelta3/21_foo,vx 1.1 444) :no-keywords) | ||
45 | (save.regtest.bug9/foo2,v (xdelta3/22_foo2,vx 1.1 444) :no-keywords) | ||
46 | (save.regtest.bug8/core (xdelta3/23_core 1.1 600) :no-keywords) | ||
47 | (save.regtest.bug8/output.x.right (xdelta3/24_output.x.r 1.1 644) :no-keywords) | ||
48 | (save.regtest.bug8/output.x (xdelta3/25_output.x 1.1 644) :no-keywords) | ||
49 | (save.regtest.bug8/input.1.xz (xdelta3/26_input.1.xz 1.1 644) :no-keywords) | ||
50 | (save.regtest.bug8/input.0.xz (xdelta3/27_input.0.xz 1.1 644) :no-keywords) | ||
51 | (save.regtest.bug8/input.0 (xdelta3/28_input.0 1.1 644)) | ||
52 | (save.regtest.bug8/input.1 (xdelta3/29_input.1 1.1 644)) | ||
53 | (save.regtest.bug7/core (xdelta3/30_core 1.1 600) :no-keywords) | ||
54 | (save.regtest.bug7/recon.x (xdelta3/31_recon.x 1.1 644) :no-keywords) | ||
55 | (save.regtest.bug7/output.x (xdelta3/32_output.x 1.1 644) :no-keywords) | ||
56 | (save.regtest.bug7/input.1 (xdelta3/33_input.1 1.1 644) :no-keywords) | ||
57 | (save.regtest.bug7/input.0 (xdelta3/34_input.0 1.1 644) :no-keywords) | ||
58 | (save.regtest.bug7/output (xdelta3/35_output 1.1 644) :no-keywords) | ||
59 | (save.regtest.bug7/recon (xdelta3/36_recon 1.1 644) :no-keywords) | ||
60 | (save.regtest.bug6/recon (xdelta3/37_recon 1.1 644)) | ||
61 | (save.regtest.bug6/input.21 (xdelta3/38_input.21 1.1 644)) | ||
62 | (save.regtest.bug6/input.20 (xdelta3/39_input.20 1.1 644)) | ||
63 | (save.regtest.bug6/input.0 (xdelta3/40_input.0 1.1 644)) | ||
64 | (save.regtest.bug6/output (xdelta3/41_output 1.1 644) :no-keywords) | ||
65 | (save.regtest.bug5/input.1 (xdelta3/42_input.1 1.1 644) :no-keywords) | ||
66 | (save.regtest.bug5/input.0 (xdelta3/43_input.0 1.1 644) :no-keywords) | ||
67 | (save.regtest.bug4/input.1 (xdelta3/44_input.1 1.1 644) :no-keywords) | ||
68 | (save.regtest.bug4/input.0 (xdelta3/45_input.0 1.1 644) :no-keywords) | ||
69 | (save.regtest.bug3/input.1 (xdelta3/46_input.1 1.1 644)) | ||
70 | (save.regtest.bug3/input.0 (xdelta3/47_input.0 1.1 644)) | ||
71 | (save.regtest.bug2/input.1 (xdelta3/48_input.1 1.1 644) :no-keywords) | ||
72 | (save.regtest.bug2/input.0 (xdelta3/49_input.0 1.1 644) :no-keywords) | ||
73 | (save.regtest.bug12/xd3regtest.27181/input.1 (xdelta3/50_input.1 1.1 644) :no-keywords) | ||
74 | (save.regtest.bug12/xd3regtest.27181/input.0 (xdelta3/51_input.0 1.1 644) :no-keywords) | ||
75 | (save.regtest.bug12/xd3regtest.27181/output (xdelta3/b/0_output 1.1 644) :no-keywords) | ||
76 | (save.regtest.bug12/output.x (xdelta3/b/1_output.x 1.1 644) :no-keywords) | ||
77 | (save.regtest.bug12/input.1 (xdelta3/b/2_input.1 1.1 644)) | ||
78 | (save.regtest.bug12/input.0 (xdelta3/b/3_input.0 1.1 644)) | ||
79 | (save.regtest.bug12/output (xdelta3/b/4_output 1.1 644) :no-keywords) | ||
80 | (save.regtest.bug11/recon.x (xdelta3/b/5_recon.x 1.1 644)) | ||
81 | (save.regtest.bug11/output.x (xdelta3/b/6_output.x 1.1 644) :no-keywords) | ||
82 | (save.regtest.bug11/input.1 (xdelta3/b/7_input.1 1.1 644)) | ||
83 | (save.regtest.bug11/input.0 (xdelta3/b/8_input.0 1.1 644)) | ||
84 | (save.regtest.bug11/output (xdelta3/b/9_output 1.1 644) :no-keywords) | ||
85 | (save.regtest.bug11/recon (xdelta3/b/10_recon 1.1 644)) | ||
86 | (save.regtest.bug10/recon.x (xdelta3/b/11_recon.x 1.1 644) :no-keywords) | ||
87 | (save.regtest.bug10/output.x (xdelta3/b/12_output.x 1.1 644) :no-keywords) | ||
88 | (save.regtest.bug10/input.1 (xdelta3/b/13_input.1 1.1 644) :no-keywords) | ||
89 | (save.regtest.bug10/input.0 (xdelta3/b/14_input.0 1.1 644) :no-keywords) | ||
90 | (save.regtest.bug10/output (xdelta3/b/15_output 1.1 644) :no-keywords) | ||
91 | (save.regtest.bug1/input.4 (xdelta3/b/16_input.4 1.1 644)) | ||
92 | (save.regtest.bug1/input.5 (xdelta3/b/17_input.5 1.1 644)) | ||
93 | |||
94 | ;; Files added by populate at Sun, 20 Jul 2003 04:22:28 +0400, | ||
95 | ;; to version 0.0(w), by jmacd: | ||
96 | |||
97 | (priorities.txt (xdelta3/b/18_priorities 1.1 644)) | ||
98 | |||
99 | ;; Files added by populate at Sun, 20 Jul 2003 04:22:40 +0400, | ||
100 | ;; to version 0.0(w), by jmacd: | ||
101 | |||
102 | (vcdiff.ps (xdelta3/b/19_vcdiff.ps 1.1 644)) | ||
103 | (draft-vcdiff-huffman.txt (xdelta3/b/20_draft-vcdi 1.1 600)) | ||
104 | |||
105 | ;; Files added by populate at Sun, 20 Jul 2003 04:22:59 +0400, | ||
106 | ;; to version 0.0(w), by jmacd: | ||
107 | |||
108 | (dead.code (xdelta3/b/21_dead.code 1.2 644)) | ||
109 | |||
110 | ;; Files added by populate at Sun, 20 Jul 2003 04:23:05 +0400, | ||
111 | ;; to version 0.0(w), by jmacd: | ||
112 | |||
113 | (draft-korn-vcdiff.txt (xdelta3/b/22_draft-korn 1.1 600)) | ||
114 | |||
115 | ;; Files added by populate at Sun, 20 Jul 2003 08:16:41 +0400, | ||
116 | ;; to version 0.1(w), by jmacd: | ||
117 | |||
118 | (www/xdelta3-api-guide.html (xdelta3/b/23_Xdelta3-ap 1.4 644)) | ||
119 | (www/xdelta3.html (xdelta3/b/24_Xdelta3.ht 1.4 644)) | ||
120 | |||
121 | ;; Files added by populate at Sun, 20 Jul 2003 22:35:48 +0400, | ||
122 | ;; to version 0.2(w), by jmacd: | ||
123 | |||
124 | (www/xdelta3-cmdline.html (xdelta3/b/25_xdelta3-cm 1.2 644)) | ||
125 | (www/xdelta3.css (xdelta3/b/26_xdelta3.cs 1.3 644)) | ||
126 | |||
127 | ;; Files added by populate at Wed, 21 Jul 2004 15:39:04 -0700, | ||
128 | ;; to version 0.5(w), by jmacd: | ||
129 | |||
130 | (gpl.txt ()) | ||
131 | ) | ||
132 | (Merge-Parents) | ||
133 | (New-Merge-Parents) | ||